1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 #include "exec/translation-block.h" 38 #include "exec/tlb-common.h" 39 #include "tcg/startup.h" 40 #include "tcg/tcg-op-common.h" 41 42 #if UINTPTR_MAX == UINT32_MAX 43 # define ELF_CLASS ELFCLASS32 44 #else 45 # define ELF_CLASS ELFCLASS64 46 #endif 47 #if HOST_BIG_ENDIAN 48 # define ELF_DATA ELFDATA2MSB 49 #else 50 # define ELF_DATA ELFDATA2LSB 51 #endif 52 53 #include "elf.h" 54 #include "exec/log.h" 55 #include "tcg/tcg-ldst.h" 56 #include "tcg/tcg-temp-internal.h" 57 #include "tcg-internal.h" 58 #include "tcg/perf.h" 59 #include "tcg-has.h" 60 #ifdef CONFIG_USER_ONLY 61 #include "user/guest-base.h" 62 #endif 63 64 /* Forward declarations for functions declared in tcg-target.c.inc and 65 used here. */ 66 static void tcg_target_init(TCGContext *s); 67 static void tcg_target_qemu_prologue(TCGContext *s); 68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 69 intptr_t value, intptr_t addend); 70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count); 71 72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; 73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 struct TCGLabelQemuLdst { 100 bool is_ld; /* qemu_ld: true, qemu_st: false */ 101 MemOpIdx oi; 102 TCGType type; /* result type of a load */ 103 TCGReg addr_reg; /* reg index for guest virtual addr */ 104 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 105 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 106 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 107 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 108 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 109 }; 110 111 static void tcg_register_jit_int(const void *buf, size_t size, 112 const void *debug_frame, 113 size_t debug_frame_size) 114 __attribute__((unused)); 115 116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 117 static void tcg_out_tb_start(TCGContext *s); 118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 119 intptr_t arg2); 120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 121 static void tcg_out_movi(TCGContext *s, TCGType type, 122 TCGReg ret, tcg_target_long arg); 123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 135 static void tcg_out_goto_tb(TCGContext *s, int which); 136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, 137 const TCGArg args[TCG_MAX_OP_ARGS], 138 const int const_args[TCG_MAX_OP_ARGS]); 139 #if TCG_TARGET_MAYBE_vec 140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 141 TCGReg dst, TCGReg src); 142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 143 TCGReg dst, TCGReg base, intptr_t offset); 144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 145 TCGReg dst, int64_t arg); 146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 147 unsigned vecl, unsigned vece, 148 const TCGArg args[TCG_MAX_OP_ARGS], 149 const int const_args[TCG_MAX_OP_ARGS]); 150 #else 151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 152 TCGReg dst, TCGReg src) 153 { 154 g_assert_not_reached(); 155 } 156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 157 TCGReg dst, TCGReg base, intptr_t offset) 158 { 159 g_assert_not_reached(); 160 } 161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 162 TCGReg dst, int64_t arg) 163 { 164 g_assert_not_reached(); 165 } 166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 167 unsigned vecl, unsigned vece, 168 const TCGArg args[TCG_MAX_OP_ARGS], 169 const int const_args[TCG_MAX_OP_ARGS]) 170 { 171 g_assert_not_reached(); 172 } 173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) 174 { 175 return 0; 176 } 177 #endif 178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 179 intptr_t arg2); 180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 181 TCGReg base, intptr_t ofs); 182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 183 const TCGHelperInfo *info); 184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 185 static bool tcg_target_const_match(int64_t val, int ct, 186 TCGType type, TCGCond cond, int vece); 187 188 #ifndef CONFIG_USER_ONLY 189 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) 190 #endif 191 192 typedef struct TCGLdstHelperParam { 193 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 194 unsigned ntmp; 195 int tmp[3]; 196 } TCGLdstHelperParam; 197 198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 199 const TCGLdstHelperParam *p) 200 __attribute__((unused)); 201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 202 bool load_sign, const TCGLdstHelperParam *p) 203 __attribute__((unused)); 204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 205 const TCGLdstHelperParam *p) 206 __attribute__((unused)); 207 208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 209 [MO_UB] = helper_ldub_mmu, 210 [MO_SB] = helper_ldsb_mmu, 211 [MO_UW] = helper_lduw_mmu, 212 [MO_SW] = helper_ldsw_mmu, 213 [MO_UL] = helper_ldul_mmu, 214 [MO_UQ] = helper_ldq_mmu, 215 #if TCG_TARGET_REG_BITS == 64 216 [MO_SL] = helper_ldsl_mmu, 217 [MO_128] = helper_ld16_mmu, 218 #endif 219 }; 220 221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 222 [MO_8] = helper_stb_mmu, 223 [MO_16] = helper_stw_mmu, 224 [MO_32] = helper_stl_mmu, 225 [MO_64] = helper_stq_mmu, 226 #if TCG_TARGET_REG_BITS == 64 227 [MO_128] = helper_st16_mmu, 228 #endif 229 }; 230 231 typedef struct { 232 MemOp atom; /* lg2 bits of atomicity required */ 233 MemOp align; /* lg2 bits of alignment to use */ 234 } TCGAtomAlign; 235 236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 237 MemOp host_atom, bool allow_two_ops) 238 __attribute__((unused)); 239 240 #ifdef CONFIG_USER_ONLY 241 bool tcg_use_softmmu; 242 #endif 243 244 TCGContext tcg_init_ctx; 245 __thread TCGContext *tcg_ctx; 246 247 TCGContext **tcg_ctxs; 248 unsigned int tcg_cur_ctxs; 249 unsigned int tcg_max_ctxs; 250 TCGv_env tcg_env; 251 const void *tcg_code_gen_epilogue; 252 uintptr_t tcg_splitwx_diff; 253 254 #ifndef CONFIG_TCG_INTERPRETER 255 tcg_prologue_fn *tcg_qemu_tb_exec; 256 #endif 257 258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 259 static TCGRegSet tcg_target_call_clobber_regs; 260 261 #if TCG_TARGET_INSN_UNIT_SIZE == 1 262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 263 { 264 *s->code_ptr++ = v; 265 } 266 267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 268 uint8_t v) 269 { 270 *p = v; 271 } 272 #endif 273 274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 276 { 277 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 278 *s->code_ptr++ = v; 279 } else { 280 tcg_insn_unit *p = s->code_ptr; 281 memcpy(p, &v, sizeof(v)); 282 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 283 } 284 } 285 286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 287 uint16_t v) 288 { 289 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 290 *p = v; 291 } else { 292 memcpy(p, &v, sizeof(v)); 293 } 294 } 295 #endif 296 297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 299 { 300 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 301 *s->code_ptr++ = v; 302 } else { 303 tcg_insn_unit *p = s->code_ptr; 304 memcpy(p, &v, sizeof(v)); 305 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 306 } 307 } 308 309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 310 uint32_t v) 311 { 312 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 313 *p = v; 314 } else { 315 memcpy(p, &v, sizeof(v)); 316 } 317 } 318 #endif 319 320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 322 { 323 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 324 *s->code_ptr++ = v; 325 } else { 326 tcg_insn_unit *p = s->code_ptr; 327 memcpy(p, &v, sizeof(v)); 328 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 329 } 330 } 331 332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 333 uint64_t v) 334 { 335 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 336 *p = v; 337 } else { 338 memcpy(p, &v, sizeof(v)); 339 } 340 } 341 #endif 342 343 /* label relocation processing */ 344 345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 346 TCGLabel *l, intptr_t addend) 347 { 348 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 349 350 r->type = type; 351 r->ptr = code_ptr; 352 r->addend = addend; 353 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 354 } 355 356 static void tcg_out_label(TCGContext *s, TCGLabel *l) 357 { 358 tcg_debug_assert(!l->has_value); 359 l->has_value = 1; 360 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 361 } 362 363 TCGLabel *gen_new_label(void) 364 { 365 TCGContext *s = tcg_ctx; 366 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 367 368 memset(l, 0, sizeof(TCGLabel)); 369 l->id = s->nb_labels++; 370 QSIMPLEQ_INIT(&l->branches); 371 QSIMPLEQ_INIT(&l->relocs); 372 373 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 374 375 return l; 376 } 377 378 static bool tcg_resolve_relocs(TCGContext *s) 379 { 380 TCGLabel *l; 381 382 QSIMPLEQ_FOREACH(l, &s->labels, next) { 383 TCGRelocation *r; 384 uintptr_t value = l->u.value; 385 386 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 387 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 388 return false; 389 } 390 } 391 } 392 return true; 393 } 394 395 static void set_jmp_reset_offset(TCGContext *s, int which) 396 { 397 /* 398 * We will check for overflow at the end of the opcode loop in 399 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 400 */ 401 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 402 } 403 404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 405 { 406 /* 407 * We will check for overflow at the end of the opcode loop in 408 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 409 */ 410 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 411 } 412 413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 414 { 415 /* 416 * Return the read-execute version of the pointer, for the benefit 417 * of any pc-relative addressing mode. 418 */ 419 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 420 } 421 422 static int __attribute__((unused)) 423 tlb_mask_table_ofs(TCGContext *s, int which) 424 { 425 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) - 426 sizeof(CPUNegativeOffsetState)); 427 } 428 429 /* Signal overflow, starting over with fewer guest insns. */ 430 static G_NORETURN 431 void tcg_raise_tb_overflow(TCGContext *s) 432 { 433 siglongjmp(s->jmp_trans, -2); 434 } 435 436 /* 437 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 438 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 439 * 440 * However, tcg_out_helper_load_slots reuses this field to hold an 441 * argument slot number (which may designate a argument register or an 442 * argument stack slot), converting to TCGReg once all arguments that 443 * are destined for the stack are processed. 444 */ 445 typedef struct TCGMovExtend { 446 unsigned dst; 447 TCGReg src; 448 TCGType dst_type; 449 TCGType src_type; 450 MemOp src_ext; 451 } TCGMovExtend; 452 453 /** 454 * tcg_out_movext -- move and extend 455 * @s: tcg context 456 * @dst_type: integral type for destination 457 * @dst: destination register 458 * @src_type: integral type for source 459 * @src_ext: extension to apply to source 460 * @src: source register 461 * 462 * Move or extend @src into @dst, depending on @src_ext and the types. 463 */ 464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 465 TCGType src_type, MemOp src_ext, TCGReg src) 466 { 467 switch (src_ext) { 468 case MO_UB: 469 tcg_out_ext8u(s, dst, src); 470 break; 471 case MO_SB: 472 tcg_out_ext8s(s, dst_type, dst, src); 473 break; 474 case MO_UW: 475 tcg_out_ext16u(s, dst, src); 476 break; 477 case MO_SW: 478 tcg_out_ext16s(s, dst_type, dst, src); 479 break; 480 case MO_UL: 481 case MO_SL: 482 if (dst_type == TCG_TYPE_I32) { 483 if (src_type == TCG_TYPE_I32) { 484 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 485 } else { 486 tcg_out_extrl_i64_i32(s, dst, src); 487 } 488 } else if (src_type == TCG_TYPE_I32) { 489 if (src_ext & MO_SIGN) { 490 tcg_out_exts_i32_i64(s, dst, src); 491 } else { 492 tcg_out_extu_i32_i64(s, dst, src); 493 } 494 } else { 495 if (src_ext & MO_SIGN) { 496 tcg_out_ext32s(s, dst, src); 497 } else { 498 tcg_out_ext32u(s, dst, src); 499 } 500 } 501 break; 502 case MO_UQ: 503 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 504 if (dst_type == TCG_TYPE_I32) { 505 tcg_out_extrl_i64_i32(s, dst, src); 506 } else { 507 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 508 } 509 break; 510 default: 511 g_assert_not_reached(); 512 } 513 } 514 515 /* Minor variations on a theme, using a structure. */ 516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 517 TCGReg src) 518 { 519 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 520 } 521 522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 523 { 524 tcg_out_movext1_new_src(s, i, i->src); 525 } 526 527 /** 528 * tcg_out_movext2 -- move and extend two pair 529 * @s: tcg context 530 * @i1: first move description 531 * @i2: second move description 532 * @scratch: temporary register, or -1 for none 533 * 534 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 535 * between the sources and destinations. 536 */ 537 538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 539 const TCGMovExtend *i2, int scratch) 540 { 541 TCGReg src1 = i1->src; 542 TCGReg src2 = i2->src; 543 544 if (i1->dst != src2) { 545 tcg_out_movext1(s, i1); 546 tcg_out_movext1(s, i2); 547 return; 548 } 549 if (i2->dst == src1) { 550 TCGType src1_type = i1->src_type; 551 TCGType src2_type = i2->src_type; 552 553 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 554 /* The data is now in the correct registers, now extend. */ 555 src1 = i2->src; 556 src2 = i1->src; 557 } else { 558 tcg_debug_assert(scratch >= 0); 559 tcg_out_mov(s, src1_type, scratch, src1); 560 src1 = scratch; 561 } 562 } 563 tcg_out_movext1_new_src(s, i2, src2); 564 tcg_out_movext1_new_src(s, i1, src1); 565 } 566 567 /** 568 * tcg_out_movext3 -- move and extend three pair 569 * @s: tcg context 570 * @i1: first move description 571 * @i2: second move description 572 * @i3: third move description 573 * @scratch: temporary register, or -1 for none 574 * 575 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 576 * between the sources and destinations. 577 */ 578 579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 580 const TCGMovExtend *i2, const TCGMovExtend *i3, 581 int scratch) 582 { 583 TCGReg src1 = i1->src; 584 TCGReg src2 = i2->src; 585 TCGReg src3 = i3->src; 586 587 if (i1->dst != src2 && i1->dst != src3) { 588 tcg_out_movext1(s, i1); 589 tcg_out_movext2(s, i2, i3, scratch); 590 return; 591 } 592 if (i2->dst != src1 && i2->dst != src3) { 593 tcg_out_movext1(s, i2); 594 tcg_out_movext2(s, i1, i3, scratch); 595 return; 596 } 597 if (i3->dst != src1 && i3->dst != src2) { 598 tcg_out_movext1(s, i3); 599 tcg_out_movext2(s, i1, i2, scratch); 600 return; 601 } 602 603 /* 604 * There is a cycle. Since there are only 3 nodes, the cycle is 605 * either "clockwise" or "anti-clockwise", and can be solved with 606 * a single scratch or two xchg. 607 */ 608 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 609 /* "Clockwise" */ 610 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 611 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 612 /* The data is now in the correct registers, now extend. */ 613 tcg_out_movext1_new_src(s, i1, i1->dst); 614 tcg_out_movext1_new_src(s, i2, i2->dst); 615 tcg_out_movext1_new_src(s, i3, i3->dst); 616 } else { 617 tcg_debug_assert(scratch >= 0); 618 tcg_out_mov(s, i1->src_type, scratch, src1); 619 tcg_out_movext1(s, i3); 620 tcg_out_movext1(s, i2); 621 tcg_out_movext1_new_src(s, i1, scratch); 622 } 623 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 624 /* "Anti-clockwise" */ 625 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 626 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 627 /* The data is now in the correct registers, now extend. */ 628 tcg_out_movext1_new_src(s, i1, i1->dst); 629 tcg_out_movext1_new_src(s, i2, i2->dst); 630 tcg_out_movext1_new_src(s, i3, i3->dst); 631 } else { 632 tcg_debug_assert(scratch >= 0); 633 tcg_out_mov(s, i1->src_type, scratch, src1); 634 tcg_out_movext1(s, i2); 635 tcg_out_movext1(s, i3); 636 tcg_out_movext1_new_src(s, i1, scratch); 637 } 638 } else { 639 g_assert_not_reached(); 640 } 641 } 642 643 /* 644 * Allocate a new TCGLabelQemuLdst entry. 645 */ 646 647 __attribute__((unused)) 648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) 649 { 650 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); 651 652 memset(l, 0, sizeof(*l)); 653 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); 654 655 return l; 656 } 657 658 /* 659 * Allocate new constant pool entries. 660 */ 661 662 typedef struct TCGLabelPoolData { 663 struct TCGLabelPoolData *next; 664 tcg_insn_unit *label; 665 intptr_t addend; 666 int rtype; 667 unsigned nlong; 668 tcg_target_ulong data[]; 669 } TCGLabelPoolData; 670 671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, 672 tcg_insn_unit *label, intptr_t addend) 673 { 674 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) 675 + sizeof(tcg_target_ulong) * nlong); 676 677 n->label = label; 678 n->addend = addend; 679 n->rtype = rtype; 680 n->nlong = nlong; 681 return n; 682 } 683 684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) 685 { 686 TCGLabelPoolData *i, **pp; 687 int nlong = n->nlong; 688 689 /* Insertion sort on the pool. */ 690 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { 691 if (nlong > i->nlong) { 692 break; 693 } 694 if (nlong < i->nlong) { 695 continue; 696 } 697 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { 698 break; 699 } 700 } 701 n->next = *pp; 702 *pp = n; 703 } 704 705 /* The "usual" for generic integer code. */ 706 __attribute__((unused)) 707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, 708 tcg_insn_unit *label, intptr_t addend) 709 { 710 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); 711 n->data[0] = d; 712 new_pool_insert(s, n); 713 } 714 715 /* For v64 or v128, depending on the host. */ 716 __attribute__((unused)) 717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, 718 intptr_t addend, tcg_target_ulong d0, 719 tcg_target_ulong d1) 720 { 721 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); 722 n->data[0] = d0; 723 n->data[1] = d1; 724 new_pool_insert(s, n); 725 } 726 727 /* For v128 or v256, depending on the host. */ 728 __attribute__((unused)) 729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, 730 intptr_t addend, tcg_target_ulong d0, 731 tcg_target_ulong d1, tcg_target_ulong d2, 732 tcg_target_ulong d3) 733 { 734 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); 735 n->data[0] = d0; 736 n->data[1] = d1; 737 n->data[2] = d2; 738 n->data[3] = d3; 739 new_pool_insert(s, n); 740 } 741 742 /* For v256, for 32-bit host. */ 743 __attribute__((unused)) 744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, 745 intptr_t addend, tcg_target_ulong d0, 746 tcg_target_ulong d1, tcg_target_ulong d2, 747 tcg_target_ulong d3, tcg_target_ulong d4, 748 tcg_target_ulong d5, tcg_target_ulong d6, 749 tcg_target_ulong d7) 750 { 751 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); 752 n->data[0] = d0; 753 n->data[1] = d1; 754 n->data[2] = d2; 755 n->data[3] = d3; 756 n->data[4] = d4; 757 n->data[5] = d5; 758 n->data[6] = d6; 759 n->data[7] = d7; 760 new_pool_insert(s, n); 761 } 762 763 /* 764 * Generate TB finalization at the end of block 765 */ 766 767 static int tcg_out_ldst_finalize(TCGContext *s) 768 { 769 TCGLabelQemuLdst *lb; 770 771 /* qemu_ld/st slow paths */ 772 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { 773 if (lb->is_ld 774 ? !tcg_out_qemu_ld_slow_path(s, lb) 775 : !tcg_out_qemu_st_slow_path(s, lb)) { 776 return -2; 777 } 778 779 /* 780 * Test for (pending) buffer overflow. The assumption is that any 781 * one operation beginning below the high water mark cannot overrun 782 * the buffer completely. Thus we can test for overflow after 783 * generating code without having to check during generation. 784 */ 785 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 786 return -1; 787 } 788 } 789 return 0; 790 } 791 792 static int tcg_out_pool_finalize(TCGContext *s) 793 { 794 TCGLabelPoolData *p = s->pool_labels; 795 TCGLabelPoolData *l = NULL; 796 void *a; 797 798 if (p == NULL) { 799 return 0; 800 } 801 802 /* 803 * ??? Round up to qemu_icache_linesize, but then do not round 804 * again when allocating the next TranslationBlock structure. 805 */ 806 a = (void *)ROUND_UP((uintptr_t)s->code_ptr, 807 sizeof(tcg_target_ulong) * p->nlong); 808 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); 809 s->data_gen_ptr = a; 810 811 for (; p != NULL; p = p->next) { 812 size_t size = sizeof(tcg_target_ulong) * p->nlong; 813 uintptr_t value; 814 815 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { 816 if (unlikely(a > s->code_gen_highwater)) { 817 return -1; 818 } 819 memcpy(a, p->data, size); 820 a += size; 821 l = p; 822 } 823 824 value = (uintptr_t)tcg_splitwx_to_rx(a) - size; 825 if (!patch_reloc(p->label, p->rtype, value, p->addend)) { 826 return -2; 827 } 828 } 829 830 s->code_ptr = a; 831 return 0; 832 } 833 834 #define C_PFX1(P, A) P##A 835 #define C_PFX2(P, A, B) P##A##_##B 836 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 837 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 838 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 839 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 840 841 /* Define an enumeration for the various combinations. */ 842 843 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 844 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 845 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 846 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 847 848 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 849 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 850 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 851 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 852 853 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 854 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), 855 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), 856 857 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 858 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 859 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), 862 863 typedef enum { 864 C_Dynamic = -2, 865 C_NotImplemented = -1, 866 #include "tcg-target-con-set.h" 867 } TCGConstraintSetIndex; 868 869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); 870 871 #undef C_O0_I1 872 #undef C_O0_I2 873 #undef C_O0_I3 874 #undef C_O0_I4 875 #undef C_O1_I1 876 #undef C_O1_I2 877 #undef C_O1_I3 878 #undef C_O1_I4 879 #undef C_N1_I2 880 #undef C_N1O1_I1 881 #undef C_N2_I1 882 #undef C_O2_I1 883 #undef C_O2_I2 884 #undef C_O2_I3 885 #undef C_O2_I4 886 #undef C_N1_O1_I4 887 888 /* Put all of the constraint sets into an array, indexed by the enum. */ 889 890 typedef struct TCGConstraintSet { 891 uint8_t nb_oargs, nb_iargs; 892 const char *args_ct_str[TCG_MAX_OP_ARGS]; 893 } TCGConstraintSet; 894 895 #define C_O0_I1(I1) { 0, 1, { #I1 } }, 896 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, 897 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, 898 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, 899 900 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, 901 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, 902 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, 903 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, 904 905 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, 906 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, 907 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, 908 909 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, 910 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, 911 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, 912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, 913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, 914 915 static const TCGConstraintSet constraint_sets[] = { 916 #include "tcg-target-con-set.h" 917 }; 918 919 #undef C_O0_I1 920 #undef C_O0_I2 921 #undef C_O0_I3 922 #undef C_O0_I4 923 #undef C_O1_I1 924 #undef C_O1_I2 925 #undef C_O1_I3 926 #undef C_O1_I4 927 #undef C_N1_I2 928 #undef C_N1O1_I1 929 #undef C_N2_I1 930 #undef C_O2_I1 931 #undef C_O2_I2 932 #undef C_O2_I3 933 #undef C_O2_I4 934 #undef C_N1_O1_I4 935 936 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 937 938 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 939 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 940 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 941 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 942 943 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 944 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 945 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 946 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 947 948 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 949 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) 950 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) 951 952 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 953 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 954 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) 957 958 /* 959 * TCGOutOp is the base class for a set of structures that describe how 960 * to generate code for a given TCGOpcode. 961 * 962 * @static_constraint: 963 * C_NotImplemented: The TCGOpcode is not supported by the backend. 964 * C_Dynamic: Use @dynamic_constraint to select a constraint set 965 * based on any of @type, @flags, or host isa. 966 * Otherwise: The register allocation constrains for the TCGOpcode. 967 * 968 * Subclasses of TCGOutOp will define a set of output routines that may 969 * be used. Such routines will often be selected by the set of registers 970 * and constants that come out of register allocation. The set of 971 * routines that are provided will guide the set of constraints that are 972 * legal. In particular, assume that tcg_optimize() has done its job in 973 * swapping commutative operands and folding operations for which all 974 * operands are constant. 975 */ 976 typedef struct TCGOutOp { 977 TCGConstraintSetIndex static_constraint; 978 TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); 979 } TCGOutOp; 980 981 typedef struct TCGOutOpBinary { 982 TCGOutOp base; 983 void (*out_rrr)(TCGContext *s, TCGType type, 984 TCGReg a0, TCGReg a1, TCGReg a2); 985 void (*out_rri)(TCGContext *s, TCGType type, 986 TCGReg a0, TCGReg a1, tcg_target_long a2); 987 } TCGOutOpBinary; 988 989 typedef struct TCGOutOpBrcond { 990 TCGOutOp base; 991 void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond, 992 TCGReg a1, TCGReg a2, TCGLabel *label); 993 void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond, 994 TCGReg a1, tcg_target_long a2, TCGLabel *label); 995 } TCGOutOpBrcond; 996 997 typedef struct TCGOutOpBrcond2 { 998 TCGOutOp base; 999 void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, 1000 TCGArg bl, bool const_bl, 1001 TCGArg bh, bool const_bh, TCGLabel *l); 1002 } TCGOutOpBrcond2; 1003 1004 typedef struct TCGOutOpBswap { 1005 TCGOutOp base; 1006 void (*out_rr)(TCGContext *s, TCGType type, 1007 TCGReg a0, TCGReg a1, unsigned flags); 1008 } TCGOutOpBswap; 1009 1010 typedef struct TCGOutOpDeposit { 1011 TCGOutOp base; 1012 void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 1013 TCGReg a2, unsigned ofs, unsigned len); 1014 void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 1015 tcg_target_long a2, unsigned ofs, unsigned len); 1016 void (*out_rzr)(TCGContext *s, TCGType type, TCGReg a0, 1017 TCGReg a2, unsigned ofs, unsigned len); 1018 } TCGOutOpDeposit; 1019 1020 typedef struct TCGOutOpDivRem { 1021 TCGOutOp base; 1022 void (*out_rr01r)(TCGContext *s, TCGType type, 1023 TCGReg a0, TCGReg a1, TCGReg a4); 1024 } TCGOutOpDivRem; 1025 1026 typedef struct TCGOutOpExtract { 1027 TCGOutOp base; 1028 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 1029 unsigned ofs, unsigned len); 1030 } TCGOutOpExtract; 1031 1032 typedef struct TCGOutOpExtract2 { 1033 TCGOutOp base; 1034 void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, 1035 TCGReg a2, unsigned shr); 1036 } TCGOutOpExtract2; 1037 1038 typedef struct TCGOutOpMovcond { 1039 TCGOutOp base; 1040 void (*out)(TCGContext *s, TCGType type, TCGCond cond, 1041 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, 1042 TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf); 1043 } TCGOutOpMovcond; 1044 1045 typedef struct TCGOutOpMul2 { 1046 TCGOutOp base; 1047 void (*out_rrrr)(TCGContext *s, TCGType type, 1048 TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3); 1049 } TCGOutOpMul2; 1050 1051 typedef struct TCGOutOpUnary { 1052 TCGOutOp base; 1053 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); 1054 } TCGOutOpUnary; 1055 1056 typedef struct TCGOutOpSetcond { 1057 TCGOutOp base; 1058 void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond, 1059 TCGReg ret, TCGReg a1, TCGReg a2); 1060 void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond, 1061 TCGReg ret, TCGReg a1, tcg_target_long a2); 1062 } TCGOutOpSetcond; 1063 1064 typedef struct TCGOutOpSetcond2 { 1065 TCGOutOp base; 1066 void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, 1067 TCGArg bl, bool const_bl, TCGArg bh, bool const_bh); 1068 } TCGOutOpSetcond2; 1069 1070 typedef struct TCGOutOpSubtract { 1071 TCGOutOp base; 1072 void (*out_rrr)(TCGContext *s, TCGType type, 1073 TCGReg a0, TCGReg a1, TCGReg a2); 1074 void (*out_rir)(TCGContext *s, TCGType type, 1075 TCGReg a0, tcg_target_long a1, TCGReg a2); 1076 } TCGOutOpSubtract; 1077 1078 #include "tcg-target.c.inc" 1079 1080 #ifndef CONFIG_TCG_INTERPRETER 1081 /* Validate CPUTLBDescFast placement. */ 1082 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - 1083 sizeof(CPUNegativeOffsetState)) 1084 < MIN_TLB_MASK_TABLE_OFS); 1085 #endif 1086 1087 #if TCG_TARGET_REG_BITS == 64 1088 /* 1089 * We require these functions for slow-path function calls. 1090 * Adapt them generically for opcode output. 1091 */ 1092 1093 static void tgen_exts_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) 1094 { 1095 tcg_out_exts_i32_i64(s, a0, a1); 1096 } 1097 1098 static const TCGOutOpUnary outop_exts_i32_i64 = { 1099 .base.static_constraint = C_O1_I1(r, r), 1100 .out_rr = tgen_exts_i32_i64, 1101 }; 1102 1103 static void tgen_extu_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) 1104 { 1105 tcg_out_extu_i32_i64(s, a0, a1); 1106 } 1107 1108 static const TCGOutOpUnary outop_extu_i32_i64 = { 1109 .base.static_constraint = C_O1_I1(r, r), 1110 .out_rr = tgen_extu_i32_i64, 1111 }; 1112 1113 static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) 1114 { 1115 tcg_out_extrl_i64_i32(s, a0, a1); 1116 } 1117 1118 static const TCGOutOpUnary outop_extrl_i64_i32 = { 1119 .base.static_constraint = C_O1_I1(r, r), 1120 .out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL, 1121 }; 1122 #endif 1123 1124 /* 1125 * Register V as the TCGOutOp for O. 1126 * This verifies that V is of type T, otherwise give a nice compiler error. 1127 * This prevents trivial mistakes within each arch/tcg-target.c.inc. 1128 */ 1129 #define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) 1130 1131 /* Register allocation descriptions for every TCGOpcode. */ 1132 static const TCGOutOp * const all_outop[NB_OPS] = { 1133 OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), 1134 OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), 1135 OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), 1136 OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond), 1137 OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16), 1138 OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32), 1139 OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), 1140 OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), 1141 OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), 1142 OUTOP(INDEX_op_deposit, TCGOutOpDeposit, outop_deposit), 1143 OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), 1144 OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), 1145 OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), 1146 OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), 1147 OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), 1148 OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract), 1149 OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2), 1150 OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), 1151 OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), 1152 OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), 1153 OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), 1154 OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2), 1155 OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), 1156 OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), 1157 OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), 1158 OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond), 1159 OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), 1160 OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), 1161 OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), 1162 OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), 1163 OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), 1164 OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), 1165 OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), 1166 OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), 1167 OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), 1168 OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond), 1169 OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract), 1170 OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), 1171 OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), 1172 OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), 1173 OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), 1174 1175 #if TCG_TARGET_REG_BITS == 32 1176 OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2), 1177 OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2), 1178 #else 1179 OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64), 1180 OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64), 1181 OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64), 1182 OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32), 1183 OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32), 1184 #endif 1185 }; 1186 1187 #undef OUTOP 1188 1189 /* 1190 * All TCG threads except the parent (i.e. the one that called tcg_context_init 1191 * and registered the target's TCG globals) must register with this function 1192 * before initiating translation. 1193 * 1194 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 1195 * of tcg_region_init() for the reasoning behind this. 1196 * 1197 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in 1198 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context 1199 * is not used anymore for translation once this function is called. 1200 * 1201 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that 1202 * iterates over the array (e.g. tcg_code_size() the same for both system/user 1203 * modes. 1204 */ 1205 #ifdef CONFIG_USER_ONLY 1206 void tcg_register_thread(void) 1207 { 1208 tcg_ctx = &tcg_init_ctx; 1209 } 1210 #else 1211 void tcg_register_thread(void) 1212 { 1213 TCGContext *s = g_malloc(sizeof(*s)); 1214 unsigned int i, n; 1215 1216 *s = tcg_init_ctx; 1217 1218 /* Relink mem_base. */ 1219 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 1220 if (tcg_init_ctx.temps[i].mem_base) { 1221 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 1222 tcg_debug_assert(b >= 0 && b < n); 1223 s->temps[i].mem_base = &s->temps[b]; 1224 } 1225 } 1226 1227 /* Claim an entry in tcg_ctxs */ 1228 n = qatomic_fetch_inc(&tcg_cur_ctxs); 1229 g_assert(n < tcg_max_ctxs); 1230 qatomic_set(&tcg_ctxs[n], s); 1231 1232 if (n > 0) { 1233 tcg_region_initial_alloc(s); 1234 } 1235 1236 tcg_ctx = s; 1237 } 1238 #endif /* !CONFIG_USER_ONLY */ 1239 1240 /* pool based memory allocation */ 1241 void *tcg_malloc_internal(TCGContext *s, int size) 1242 { 1243 TCGPool *p; 1244 int pool_size; 1245 1246 if (size > TCG_POOL_CHUNK_SIZE) { 1247 /* big malloc: insert a new pool (XXX: could optimize) */ 1248 p = g_malloc(sizeof(TCGPool) + size); 1249 p->size = size; 1250 p->next = s->pool_first_large; 1251 s->pool_first_large = p; 1252 return p->data; 1253 } else { 1254 p = s->pool_current; 1255 if (!p) { 1256 p = s->pool_first; 1257 if (!p) 1258 goto new_pool; 1259 } else { 1260 if (!p->next) { 1261 new_pool: 1262 pool_size = TCG_POOL_CHUNK_SIZE; 1263 p = g_malloc(sizeof(TCGPool) + pool_size); 1264 p->size = pool_size; 1265 p->next = NULL; 1266 if (s->pool_current) { 1267 s->pool_current->next = p; 1268 } else { 1269 s->pool_first = p; 1270 } 1271 } else { 1272 p = p->next; 1273 } 1274 } 1275 } 1276 s->pool_current = p; 1277 s->pool_cur = p->data + size; 1278 s->pool_end = p->data + p->size; 1279 return p->data; 1280 } 1281 1282 void tcg_pool_reset(TCGContext *s) 1283 { 1284 TCGPool *p, *t; 1285 for (p = s->pool_first_large; p; p = t) { 1286 t = p->next; 1287 g_free(p); 1288 } 1289 s->pool_first_large = NULL; 1290 s->pool_cur = s->pool_end = NULL; 1291 s->pool_current = NULL; 1292 } 1293 1294 /* 1295 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 1296 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 1297 * We only use these for layout in tcg_out_ld_helper_ret and 1298 * tcg_out_st_helper_args, and share them between several of 1299 * the helpers, with the end result that it's easier to build manually. 1300 */ 1301 1302 #if TCG_TARGET_REG_BITS == 32 1303 # define dh_typecode_ttl dh_typecode_i32 1304 #else 1305 # define dh_typecode_ttl dh_typecode_i64 1306 #endif 1307 1308 static TCGHelperInfo info_helper_ld32_mmu = { 1309 .flags = TCG_CALL_NO_WG, 1310 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 1311 | dh_typemask(env, 1) 1312 | dh_typemask(i64, 2) /* uint64_t addr */ 1313 | dh_typemask(i32, 3) /* unsigned oi */ 1314 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1315 }; 1316 1317 static TCGHelperInfo info_helper_ld64_mmu = { 1318 .flags = TCG_CALL_NO_WG, 1319 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 1320 | dh_typemask(env, 1) 1321 | dh_typemask(i64, 2) /* uint64_t addr */ 1322 | dh_typemask(i32, 3) /* unsigned oi */ 1323 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1324 }; 1325 1326 static TCGHelperInfo info_helper_ld128_mmu = { 1327 .flags = TCG_CALL_NO_WG, 1328 .typemask = dh_typemask(i128, 0) /* return Int128 */ 1329 | dh_typemask(env, 1) 1330 | dh_typemask(i64, 2) /* uint64_t addr */ 1331 | dh_typemask(i32, 3) /* unsigned oi */ 1332 | dh_typemask(ptr, 4) /* uintptr_t ra */ 1333 }; 1334 1335 static TCGHelperInfo info_helper_st32_mmu = { 1336 .flags = TCG_CALL_NO_WG, 1337 .typemask = dh_typemask(void, 0) 1338 | dh_typemask(env, 1) 1339 | dh_typemask(i64, 2) /* uint64_t addr */ 1340 | dh_typemask(i32, 3) /* uint32_t data */ 1341 | dh_typemask(i32, 4) /* unsigned oi */ 1342 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1343 }; 1344 1345 static TCGHelperInfo info_helper_st64_mmu = { 1346 .flags = TCG_CALL_NO_WG, 1347 .typemask = dh_typemask(void, 0) 1348 | dh_typemask(env, 1) 1349 | dh_typemask(i64, 2) /* uint64_t addr */ 1350 | dh_typemask(i64, 3) /* uint64_t data */ 1351 | dh_typemask(i32, 4) /* unsigned oi */ 1352 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1353 }; 1354 1355 static TCGHelperInfo info_helper_st128_mmu = { 1356 .flags = TCG_CALL_NO_WG, 1357 .typemask = dh_typemask(void, 0) 1358 | dh_typemask(env, 1) 1359 | dh_typemask(i64, 2) /* uint64_t addr */ 1360 | dh_typemask(i128, 3) /* Int128 data */ 1361 | dh_typemask(i32, 4) /* unsigned oi */ 1362 | dh_typemask(ptr, 5) /* uintptr_t ra */ 1363 }; 1364 1365 #ifdef CONFIG_TCG_INTERPRETER 1366 static ffi_type *typecode_to_ffi(int argmask) 1367 { 1368 /* 1369 * libffi does not support __int128_t, so we have forced Int128 1370 * to use the structure definition instead of the builtin type. 1371 */ 1372 static ffi_type *ffi_type_i128_elements[3] = { 1373 &ffi_type_uint64, 1374 &ffi_type_uint64, 1375 NULL 1376 }; 1377 static ffi_type ffi_type_i128 = { 1378 .size = 16, 1379 .alignment = __alignof__(Int128), 1380 .type = FFI_TYPE_STRUCT, 1381 .elements = ffi_type_i128_elements, 1382 }; 1383 1384 switch (argmask) { 1385 case dh_typecode_void: 1386 return &ffi_type_void; 1387 case dh_typecode_i32: 1388 return &ffi_type_uint32; 1389 case dh_typecode_s32: 1390 return &ffi_type_sint32; 1391 case dh_typecode_i64: 1392 return &ffi_type_uint64; 1393 case dh_typecode_s64: 1394 return &ffi_type_sint64; 1395 case dh_typecode_ptr: 1396 return &ffi_type_pointer; 1397 case dh_typecode_i128: 1398 return &ffi_type_i128; 1399 } 1400 g_assert_not_reached(); 1401 } 1402 1403 static ffi_cif *init_ffi_layout(TCGHelperInfo *info) 1404 { 1405 unsigned typemask = info->typemask; 1406 struct { 1407 ffi_cif cif; 1408 ffi_type *args[]; 1409 } *ca; 1410 ffi_status status; 1411 int nargs; 1412 1413 /* Ignoring the return type, find the last non-zero field. */ 1414 nargs = 32 - clz32(typemask >> 3); 1415 nargs = DIV_ROUND_UP(nargs, 3); 1416 assert(nargs <= MAX_CALL_IARGS); 1417 1418 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 1419 ca->cif.rtype = typecode_to_ffi(typemask & 7); 1420 ca->cif.nargs = nargs; 1421 1422 if (nargs != 0) { 1423 ca->cif.arg_types = ca->args; 1424 for (int j = 0; j < nargs; ++j) { 1425 int typecode = extract32(typemask, (j + 1) * 3, 3); 1426 ca->args[j] = typecode_to_ffi(typecode); 1427 } 1428 } 1429 1430 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1431 ca->cif.rtype, ca->cif.arg_types); 1432 assert(status == FFI_OK); 1433 1434 return &ca->cif; 1435 } 1436 1437 #define HELPER_INFO_INIT(I) (&(I)->cif) 1438 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I) 1439 #else 1440 #define HELPER_INFO_INIT(I) (&(I)->init) 1441 #define HELPER_INFO_INIT_VAL(I) 1 1442 #endif /* CONFIG_TCG_INTERPRETER */ 1443 1444 static inline bool arg_slot_reg_p(unsigned arg_slot) 1445 { 1446 /* 1447 * Split the sizeof away from the comparison to avoid Werror from 1448 * "unsigned < 0 is always false", when iarg_regs is empty. 1449 */ 1450 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1451 return arg_slot < nreg; 1452 } 1453 1454 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1455 { 1456 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1457 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1458 1459 tcg_debug_assert(stk_slot < max); 1460 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1461 } 1462 1463 typedef struct TCGCumulativeArgs { 1464 int arg_idx; /* tcg_gen_callN args[] */ 1465 int info_in_idx; /* TCGHelperInfo in[] */ 1466 int arg_slot; /* regs+stack slot */ 1467 int ref_slot; /* stack slots for references */ 1468 } TCGCumulativeArgs; 1469 1470 static void layout_arg_even(TCGCumulativeArgs *cum) 1471 { 1472 cum->arg_slot += cum->arg_slot & 1; 1473 } 1474 1475 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1476 TCGCallArgumentKind kind) 1477 { 1478 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1479 1480 *loc = (TCGCallArgumentLoc){ 1481 .kind = kind, 1482 .arg_idx = cum->arg_idx, 1483 .arg_slot = cum->arg_slot, 1484 }; 1485 cum->info_in_idx++; 1486 cum->arg_slot++; 1487 } 1488 1489 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1490 TCGHelperInfo *info, int n) 1491 { 1492 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1493 1494 for (int i = 0; i < n; ++i) { 1495 /* Layout all using the same arg_idx, adjusting the subindex. */ 1496 loc[i] = (TCGCallArgumentLoc){ 1497 .kind = TCG_CALL_ARG_NORMAL, 1498 .arg_idx = cum->arg_idx, 1499 .tmp_subindex = i, 1500 .arg_slot = cum->arg_slot + i, 1501 }; 1502 } 1503 cum->info_in_idx += n; 1504 cum->arg_slot += n; 1505 } 1506 1507 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1508 { 1509 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1510 int n = 128 / TCG_TARGET_REG_BITS; 1511 1512 /* The first subindex carries the pointer. */ 1513 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1514 1515 /* 1516 * The callee is allowed to clobber memory associated with 1517 * structure pass by-reference. Therefore we must make copies. 1518 * Allocate space from "ref_slot", which will be adjusted to 1519 * follow the parameters on the stack. 1520 */ 1521 loc[0].ref_slot = cum->ref_slot; 1522 1523 /* 1524 * Subsequent words also go into the reference slot, but 1525 * do not accumulate into the regular arguments. 1526 */ 1527 for (int i = 1; i < n; ++i) { 1528 loc[i] = (TCGCallArgumentLoc){ 1529 .kind = TCG_CALL_ARG_BY_REF_N, 1530 .arg_idx = cum->arg_idx, 1531 .tmp_subindex = i, 1532 .ref_slot = cum->ref_slot + i, 1533 }; 1534 } 1535 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */ 1536 cum->ref_slot += n; 1537 } 1538 1539 static void init_call_layout(TCGHelperInfo *info) 1540 { 1541 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1542 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1543 unsigned typemask = info->typemask; 1544 unsigned typecode; 1545 TCGCumulativeArgs cum = { }; 1546 1547 /* 1548 * Parse and place any function return value. 1549 */ 1550 typecode = typemask & 7; 1551 switch (typecode) { 1552 case dh_typecode_void: 1553 info->nr_out = 0; 1554 break; 1555 case dh_typecode_i32: 1556 case dh_typecode_s32: 1557 case dh_typecode_ptr: 1558 info->nr_out = 1; 1559 info->out_kind = TCG_CALL_RET_NORMAL; 1560 break; 1561 case dh_typecode_i64: 1562 case dh_typecode_s64: 1563 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1564 info->out_kind = TCG_CALL_RET_NORMAL; 1565 /* Query the last register now to trigger any assert early. */ 1566 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1567 break; 1568 case dh_typecode_i128: 1569 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1570 info->out_kind = TCG_TARGET_CALL_RET_I128; 1571 switch (TCG_TARGET_CALL_RET_I128) { 1572 case TCG_CALL_RET_NORMAL: 1573 /* Query the last register now to trigger any assert early. */ 1574 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1575 break; 1576 case TCG_CALL_RET_BY_VEC: 1577 /* Query the single register now to trigger any assert early. */ 1578 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1579 break; 1580 case TCG_CALL_RET_BY_REF: 1581 /* 1582 * Allocate the first argument to the output. 1583 * We don't need to store this anywhere, just make it 1584 * unavailable for use in the input loop below. 1585 */ 1586 cum.arg_slot = 1; 1587 break; 1588 default: 1589 qemu_build_not_reached(); 1590 } 1591 break; 1592 default: 1593 g_assert_not_reached(); 1594 } 1595 1596 /* 1597 * Parse and place function arguments. 1598 */ 1599 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1600 TCGCallArgumentKind kind; 1601 TCGType type; 1602 1603 typecode = typemask & 7; 1604 switch (typecode) { 1605 case dh_typecode_i32: 1606 case dh_typecode_s32: 1607 type = TCG_TYPE_I32; 1608 break; 1609 case dh_typecode_i64: 1610 case dh_typecode_s64: 1611 type = TCG_TYPE_I64; 1612 break; 1613 case dh_typecode_ptr: 1614 type = TCG_TYPE_PTR; 1615 break; 1616 case dh_typecode_i128: 1617 type = TCG_TYPE_I128; 1618 break; 1619 default: 1620 g_assert_not_reached(); 1621 } 1622 1623 switch (type) { 1624 case TCG_TYPE_I32: 1625 switch (TCG_TARGET_CALL_ARG_I32) { 1626 case TCG_CALL_ARG_EVEN: 1627 layout_arg_even(&cum); 1628 /* fall through */ 1629 case TCG_CALL_ARG_NORMAL: 1630 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1631 break; 1632 case TCG_CALL_ARG_EXTEND: 1633 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1634 layout_arg_1(&cum, info, kind); 1635 break; 1636 default: 1637 qemu_build_not_reached(); 1638 } 1639 break; 1640 1641 case TCG_TYPE_I64: 1642 switch (TCG_TARGET_CALL_ARG_I64) { 1643 case TCG_CALL_ARG_EVEN: 1644 layout_arg_even(&cum); 1645 /* fall through */ 1646 case TCG_CALL_ARG_NORMAL: 1647 if (TCG_TARGET_REG_BITS == 32) { 1648 layout_arg_normal_n(&cum, info, 2); 1649 } else { 1650 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1651 } 1652 break; 1653 default: 1654 qemu_build_not_reached(); 1655 } 1656 break; 1657 1658 case TCG_TYPE_I128: 1659 switch (TCG_TARGET_CALL_ARG_I128) { 1660 case TCG_CALL_ARG_EVEN: 1661 layout_arg_even(&cum); 1662 /* fall through */ 1663 case TCG_CALL_ARG_NORMAL: 1664 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1665 break; 1666 case TCG_CALL_ARG_BY_REF: 1667 layout_arg_by_ref(&cum, info); 1668 break; 1669 default: 1670 qemu_build_not_reached(); 1671 } 1672 break; 1673 1674 default: 1675 g_assert_not_reached(); 1676 } 1677 } 1678 info->nr_in = cum.info_in_idx; 1679 1680 /* Validate that we didn't overrun the input array. */ 1681 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1682 /* Validate the backend has enough argument space. */ 1683 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1684 1685 /* 1686 * Relocate the "ref_slot" area to the end of the parameters. 1687 * Minimizing this stack offset helps code size for x86, 1688 * which has a signed 8-bit offset encoding. 1689 */ 1690 if (cum.ref_slot != 0) { 1691 int ref_base = 0; 1692 1693 if (cum.arg_slot > max_reg_slots) { 1694 int align = __alignof(Int128) / sizeof(tcg_target_long); 1695 1696 ref_base = cum.arg_slot - max_reg_slots; 1697 if (align > 1) { 1698 ref_base = ROUND_UP(ref_base, align); 1699 } 1700 } 1701 assert(ref_base + cum.ref_slot <= max_stk_slots); 1702 ref_base += max_reg_slots; 1703 1704 if (ref_base != 0) { 1705 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1706 TCGCallArgumentLoc *loc = &info->in[i]; 1707 switch (loc->kind) { 1708 case TCG_CALL_ARG_BY_REF: 1709 case TCG_CALL_ARG_BY_REF_N: 1710 loc->ref_slot += ref_base; 1711 break; 1712 default: 1713 break; 1714 } 1715 } 1716 } 1717 } 1718 } 1719 1720 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1721 static void process_constraint_sets(void); 1722 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1723 TCGReg reg, const char *name); 1724 1725 static void tcg_context_init(unsigned max_threads) 1726 { 1727 TCGContext *s = &tcg_init_ctx; 1728 int n, i; 1729 TCGTemp *ts; 1730 1731 memset(s, 0, sizeof(*s)); 1732 s->nb_globals = 0; 1733 1734 init_call_layout(&info_helper_ld32_mmu); 1735 init_call_layout(&info_helper_ld64_mmu); 1736 init_call_layout(&info_helper_ld128_mmu); 1737 init_call_layout(&info_helper_st32_mmu); 1738 init_call_layout(&info_helper_st64_mmu); 1739 init_call_layout(&info_helper_st128_mmu); 1740 1741 tcg_target_init(s); 1742 process_constraint_sets(); 1743 1744 /* Reverse the order of the saved registers, assuming they're all at 1745 the start of tcg_target_reg_alloc_order. */ 1746 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1747 int r = tcg_target_reg_alloc_order[n]; 1748 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1749 break; 1750 } 1751 } 1752 for (i = 0; i < n; ++i) { 1753 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1754 } 1755 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1756 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1757 } 1758 1759 tcg_ctx = s; 1760 /* 1761 * In user-mode we simply share the init context among threads, since we 1762 * use a single region. See the documentation tcg_region_init() for the 1763 * reasoning behind this. 1764 * In system-mode we will have at most max_threads TCG threads. 1765 */ 1766 #ifdef CONFIG_USER_ONLY 1767 tcg_ctxs = &tcg_ctx; 1768 tcg_cur_ctxs = 1; 1769 tcg_max_ctxs = 1; 1770 #else 1771 tcg_max_ctxs = max_threads; 1772 tcg_ctxs = g_new0(TCGContext *, max_threads); 1773 #endif 1774 1775 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1776 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1777 tcg_env = temp_tcgv_ptr(ts); 1778 } 1779 1780 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) 1781 { 1782 tcg_context_init(max_threads); 1783 tcg_region_init(tb_size, splitwx, max_threads); 1784 } 1785 1786 /* 1787 * Allocate TBs right before their corresponding translated code, making 1788 * sure that TBs and code are on different cache lines. 1789 */ 1790 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1791 { 1792 uintptr_t align = qemu_icache_linesize; 1793 TranslationBlock *tb; 1794 void *next; 1795 1796 retry: 1797 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1798 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1799 1800 if (unlikely(next > s->code_gen_highwater)) { 1801 if (tcg_region_alloc(s)) { 1802 return NULL; 1803 } 1804 goto retry; 1805 } 1806 qatomic_set(&s->code_gen_ptr, next); 1807 return tb; 1808 } 1809 1810 void tcg_prologue_init(void) 1811 { 1812 TCGContext *s = tcg_ctx; 1813 size_t prologue_size; 1814 1815 s->code_ptr = s->code_gen_ptr; 1816 s->code_buf = s->code_gen_ptr; 1817 s->data_gen_ptr = NULL; 1818 1819 #ifndef CONFIG_TCG_INTERPRETER 1820 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1821 #endif 1822 1823 s->pool_labels = NULL; 1824 1825 qemu_thread_jit_write(); 1826 /* Generate the prologue. */ 1827 tcg_target_qemu_prologue(s); 1828 1829 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1830 { 1831 int result = tcg_out_pool_finalize(s); 1832 tcg_debug_assert(result == 0); 1833 } 1834 1835 prologue_size = tcg_current_code_size(s); 1836 perf_report_prologue(s->code_gen_ptr, prologue_size); 1837 1838 #ifndef CONFIG_TCG_INTERPRETER 1839 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1840 (uintptr_t)s->code_buf, prologue_size); 1841 #endif 1842 1843 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1844 FILE *logfile = qemu_log_trylock(); 1845 if (logfile) { 1846 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1847 if (s->data_gen_ptr) { 1848 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1849 size_t data_size = prologue_size - code_size; 1850 size_t i; 1851 1852 disas(logfile, s->code_gen_ptr, code_size); 1853 1854 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1855 if (sizeof(tcg_target_ulong) == 8) { 1856 fprintf(logfile, 1857 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1858 (uintptr_t)s->data_gen_ptr + i, 1859 *(uint64_t *)(s->data_gen_ptr + i)); 1860 } else { 1861 fprintf(logfile, 1862 "0x%08" PRIxPTR ": .long 0x%08x\n", 1863 (uintptr_t)s->data_gen_ptr + i, 1864 *(uint32_t *)(s->data_gen_ptr + i)); 1865 } 1866 } 1867 } else { 1868 disas(logfile, s->code_gen_ptr, prologue_size); 1869 } 1870 fprintf(logfile, "\n"); 1871 qemu_log_unlock(logfile); 1872 } 1873 } 1874 1875 #ifndef CONFIG_TCG_INTERPRETER 1876 /* 1877 * Assert that goto_ptr is implemented completely, setting an epilogue. 1878 * For tci, we use NULL as the signal to return from the interpreter, 1879 * so skip this check. 1880 */ 1881 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1882 #endif 1883 1884 tcg_region_prologue_set(s); 1885 } 1886 1887 void tcg_func_start(TCGContext *s) 1888 { 1889 tcg_pool_reset(s); 1890 s->nb_temps = s->nb_globals; 1891 1892 /* No temps have been previously allocated for size or locality. */ 1893 tcg_temp_ebb_reset_freed(s); 1894 1895 /* No constant temps have been previously allocated. */ 1896 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1897 if (s->const_table[i]) { 1898 g_hash_table_remove_all(s->const_table[i]); 1899 } 1900 } 1901 1902 s->nb_ops = 0; 1903 s->nb_labels = 0; 1904 s->current_frame_offset = s->frame_start; 1905 1906 #ifdef CONFIG_DEBUG_TCG 1907 s->goto_tb_issue_mask = 0; 1908 #endif 1909 1910 QTAILQ_INIT(&s->ops); 1911 QTAILQ_INIT(&s->free_ops); 1912 s->emit_before_op = NULL; 1913 QSIMPLEQ_INIT(&s->labels); 1914 1915 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 1916 tcg_debug_assert(s->insn_start_words > 0); 1917 } 1918 1919 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1920 { 1921 int n = s->nb_temps++; 1922 1923 if (n >= TCG_MAX_TEMPS) { 1924 tcg_raise_tb_overflow(s); 1925 } 1926 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1927 } 1928 1929 static TCGTemp *tcg_global_alloc(TCGContext *s) 1930 { 1931 TCGTemp *ts; 1932 1933 tcg_debug_assert(s->nb_globals == s->nb_temps); 1934 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1935 s->nb_globals++; 1936 ts = tcg_temp_alloc(s); 1937 ts->kind = TEMP_GLOBAL; 1938 1939 return ts; 1940 } 1941 1942 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1943 TCGReg reg, const char *name) 1944 { 1945 TCGTemp *ts; 1946 1947 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1948 1949 ts = tcg_global_alloc(s); 1950 ts->base_type = type; 1951 ts->type = type; 1952 ts->kind = TEMP_FIXED; 1953 ts->reg = reg; 1954 ts->name = name; 1955 tcg_regset_set_reg(s->reserved_regs, reg); 1956 1957 return ts; 1958 } 1959 1960 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1961 { 1962 s->frame_start = start; 1963 s->frame_end = start + size; 1964 s->frame_temp 1965 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1966 } 1967 1968 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset, 1969 const char *name, TCGType type) 1970 { 1971 TCGContext *s = tcg_ctx; 1972 TCGTemp *base_ts = tcgv_ptr_temp(base); 1973 TCGTemp *ts = tcg_global_alloc(s); 1974 int indirect_reg = 0; 1975 1976 switch (base_ts->kind) { 1977 case TEMP_FIXED: 1978 break; 1979 case TEMP_GLOBAL: 1980 /* We do not support double-indirect registers. */ 1981 tcg_debug_assert(!base_ts->indirect_reg); 1982 base_ts->indirect_base = 1; 1983 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1984 ? 2 : 1); 1985 indirect_reg = 1; 1986 break; 1987 default: 1988 g_assert_not_reached(); 1989 } 1990 1991 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1992 TCGTemp *ts2 = tcg_global_alloc(s); 1993 char buf[64]; 1994 1995 ts->base_type = TCG_TYPE_I64; 1996 ts->type = TCG_TYPE_I32; 1997 ts->indirect_reg = indirect_reg; 1998 ts->mem_allocated = 1; 1999 ts->mem_base = base_ts; 2000 ts->mem_offset = offset; 2001 pstrcpy(buf, sizeof(buf), name); 2002 pstrcat(buf, sizeof(buf), "_0"); 2003 ts->name = strdup(buf); 2004 2005 tcg_debug_assert(ts2 == ts + 1); 2006 ts2->base_type = TCG_TYPE_I64; 2007 ts2->type = TCG_TYPE_I32; 2008 ts2->indirect_reg = indirect_reg; 2009 ts2->mem_allocated = 1; 2010 ts2->mem_base = base_ts; 2011 ts2->mem_offset = offset + 4; 2012 ts2->temp_subindex = 1; 2013 pstrcpy(buf, sizeof(buf), name); 2014 pstrcat(buf, sizeof(buf), "_1"); 2015 ts2->name = strdup(buf); 2016 } else { 2017 ts->base_type = type; 2018 ts->type = type; 2019 ts->indirect_reg = indirect_reg; 2020 ts->mem_allocated = 1; 2021 ts->mem_base = base_ts; 2022 ts->mem_offset = offset; 2023 ts->name = name; 2024 } 2025 return ts; 2026 } 2027 2028 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name) 2029 { 2030 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32); 2031 return temp_tcgv_i32(ts); 2032 } 2033 2034 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name) 2035 { 2036 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64); 2037 return temp_tcgv_i64(ts); 2038 } 2039 2040 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name) 2041 { 2042 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR); 2043 return temp_tcgv_ptr(ts); 2044 } 2045 2046 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 2047 { 2048 TCGContext *s = tcg_ctx; 2049 TCGTemp *ts; 2050 int n; 2051 2052 if (kind == TEMP_EBB) { 2053 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 2054 2055 if (idx < TCG_MAX_TEMPS) { 2056 /* There is already an available temp with the right type. */ 2057 clear_bit(idx, s->free_temps[type].l); 2058 2059 ts = &s->temps[idx]; 2060 ts->temp_allocated = 1; 2061 tcg_debug_assert(ts->base_type == type); 2062 tcg_debug_assert(ts->kind == kind); 2063 return ts; 2064 } 2065 } else { 2066 tcg_debug_assert(kind == TEMP_TB); 2067 } 2068 2069 switch (type) { 2070 case TCG_TYPE_I32: 2071 case TCG_TYPE_V64: 2072 case TCG_TYPE_V128: 2073 case TCG_TYPE_V256: 2074 n = 1; 2075 break; 2076 case TCG_TYPE_I64: 2077 n = 64 / TCG_TARGET_REG_BITS; 2078 break; 2079 case TCG_TYPE_I128: 2080 n = 128 / TCG_TARGET_REG_BITS; 2081 break; 2082 default: 2083 g_assert_not_reached(); 2084 } 2085 2086 ts = tcg_temp_alloc(s); 2087 ts->base_type = type; 2088 ts->temp_allocated = 1; 2089 ts->kind = kind; 2090 2091 if (n == 1) { 2092 ts->type = type; 2093 } else { 2094 ts->type = TCG_TYPE_REG; 2095 2096 for (int i = 1; i < n; ++i) { 2097 TCGTemp *ts2 = tcg_temp_alloc(s); 2098 2099 tcg_debug_assert(ts2 == ts + i); 2100 ts2->base_type = type; 2101 ts2->type = TCG_TYPE_REG; 2102 ts2->temp_allocated = 1; 2103 ts2->temp_subindex = i; 2104 ts2->kind = kind; 2105 } 2106 } 2107 return ts; 2108 } 2109 2110 TCGv_i32 tcg_temp_new_i32(void) 2111 { 2112 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB)); 2113 } 2114 2115 TCGv_i32 tcg_temp_ebb_new_i32(void) 2116 { 2117 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB)); 2118 } 2119 2120 TCGv_i64 tcg_temp_new_i64(void) 2121 { 2122 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB)); 2123 } 2124 2125 TCGv_i64 tcg_temp_ebb_new_i64(void) 2126 { 2127 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB)); 2128 } 2129 2130 TCGv_ptr tcg_temp_new_ptr(void) 2131 { 2132 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB)); 2133 } 2134 2135 TCGv_ptr tcg_temp_ebb_new_ptr(void) 2136 { 2137 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB)); 2138 } 2139 2140 TCGv_i128 tcg_temp_new_i128(void) 2141 { 2142 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB)); 2143 } 2144 2145 TCGv_i128 tcg_temp_ebb_new_i128(void) 2146 { 2147 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB)); 2148 } 2149 2150 TCGv_vec tcg_temp_new_vec(TCGType type) 2151 { 2152 TCGTemp *t; 2153 2154 #ifdef CONFIG_DEBUG_TCG 2155 switch (type) { 2156 case TCG_TYPE_V64: 2157 assert(TCG_TARGET_HAS_v64); 2158 break; 2159 case TCG_TYPE_V128: 2160 assert(TCG_TARGET_HAS_v128); 2161 break; 2162 case TCG_TYPE_V256: 2163 assert(TCG_TARGET_HAS_v256); 2164 break; 2165 default: 2166 g_assert_not_reached(); 2167 } 2168 #endif 2169 2170 t = tcg_temp_new_internal(type, TEMP_EBB); 2171 return temp_tcgv_vec(t); 2172 } 2173 2174 /* Create a new temp of the same type as an existing temp. */ 2175 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 2176 { 2177 TCGTemp *t = tcgv_vec_temp(match); 2178 2179 tcg_debug_assert(t->temp_allocated != 0); 2180 2181 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 2182 return temp_tcgv_vec(t); 2183 } 2184 2185 void tcg_temp_free_internal(TCGTemp *ts) 2186 { 2187 TCGContext *s = tcg_ctx; 2188 2189 switch (ts->kind) { 2190 case TEMP_CONST: 2191 case TEMP_TB: 2192 /* Silently ignore free. */ 2193 break; 2194 case TEMP_EBB: 2195 tcg_debug_assert(ts->temp_allocated != 0); 2196 ts->temp_allocated = 0; 2197 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 2198 break; 2199 default: 2200 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 2201 g_assert_not_reached(); 2202 } 2203 } 2204 2205 void tcg_temp_free_i32(TCGv_i32 arg) 2206 { 2207 tcg_temp_free_internal(tcgv_i32_temp(arg)); 2208 } 2209 2210 void tcg_temp_free_i64(TCGv_i64 arg) 2211 { 2212 tcg_temp_free_internal(tcgv_i64_temp(arg)); 2213 } 2214 2215 void tcg_temp_free_i128(TCGv_i128 arg) 2216 { 2217 tcg_temp_free_internal(tcgv_i128_temp(arg)); 2218 } 2219 2220 void tcg_temp_free_ptr(TCGv_ptr arg) 2221 { 2222 tcg_temp_free_internal(tcgv_ptr_temp(arg)); 2223 } 2224 2225 void tcg_temp_free_vec(TCGv_vec arg) 2226 { 2227 tcg_temp_free_internal(tcgv_vec_temp(arg)); 2228 } 2229 2230 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 2231 { 2232 TCGContext *s = tcg_ctx; 2233 GHashTable *h = s->const_table[type]; 2234 TCGTemp *ts; 2235 2236 if (h == NULL) { 2237 h = g_hash_table_new(g_int64_hash, g_int64_equal); 2238 s->const_table[type] = h; 2239 } 2240 2241 ts = g_hash_table_lookup(h, &val); 2242 if (ts == NULL) { 2243 int64_t *val_ptr; 2244 2245 ts = tcg_temp_alloc(s); 2246 2247 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 2248 TCGTemp *ts2 = tcg_temp_alloc(s); 2249 2250 tcg_debug_assert(ts2 == ts + 1); 2251 2252 ts->base_type = TCG_TYPE_I64; 2253 ts->type = TCG_TYPE_I32; 2254 ts->kind = TEMP_CONST; 2255 ts->temp_allocated = 1; 2256 2257 ts2->base_type = TCG_TYPE_I64; 2258 ts2->type = TCG_TYPE_I32; 2259 ts2->kind = TEMP_CONST; 2260 ts2->temp_allocated = 1; 2261 ts2->temp_subindex = 1; 2262 2263 /* 2264 * Retain the full value of the 64-bit constant in the low 2265 * part, so that the hash table works. Actual uses will 2266 * truncate the value to the low part. 2267 */ 2268 ts[HOST_BIG_ENDIAN].val = val; 2269 ts[!HOST_BIG_ENDIAN].val = val >> 32; 2270 val_ptr = &ts[HOST_BIG_ENDIAN].val; 2271 } else { 2272 ts->base_type = type; 2273 ts->type = type; 2274 ts->kind = TEMP_CONST; 2275 ts->temp_allocated = 1; 2276 ts->val = val; 2277 val_ptr = &ts->val; 2278 } 2279 g_hash_table_insert(h, val_ptr, ts); 2280 } 2281 2282 return ts; 2283 } 2284 2285 TCGv_i32 tcg_constant_i32(int32_t val) 2286 { 2287 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val)); 2288 } 2289 2290 TCGv_i64 tcg_constant_i64(int64_t val) 2291 { 2292 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); 2293 } 2294 2295 TCGv_ptr tcg_constant_ptr_int(intptr_t val) 2296 { 2297 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); 2298 } 2299 2300 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 2301 { 2302 val = dup_const(vece, val); 2303 return temp_tcgv_vec(tcg_constant_internal(type, val)); 2304 } 2305 2306 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 2307 { 2308 TCGTemp *t = tcgv_vec_temp(match); 2309 2310 tcg_debug_assert(t->temp_allocated != 0); 2311 return tcg_constant_vec(t->base_type, vece, val); 2312 } 2313 2314 #ifdef CONFIG_DEBUG_TCG 2315 size_t temp_idx(TCGTemp *ts) 2316 { 2317 ptrdiff_t n = ts - tcg_ctx->temps; 2318 assert(n >= 0 && n < tcg_ctx->nb_temps); 2319 return n; 2320 } 2321 2322 TCGTemp *tcgv_i32_temp(TCGv_i32 v) 2323 { 2324 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps); 2325 2326 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps); 2327 assert(o % sizeof(TCGTemp) == 0); 2328 2329 return (void *)tcg_ctx + (uintptr_t)v; 2330 } 2331 #endif /* CONFIG_DEBUG_TCG */ 2332 2333 /* 2334 * Return true if OP may appear in the opcode stream with TYPE. 2335 * Test the runtime variable that controls each opcode. 2336 */ 2337 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) 2338 { 2339 bool has_type; 2340 2341 switch (type) { 2342 case TCG_TYPE_I32: 2343 has_type = true; 2344 break; 2345 case TCG_TYPE_I64: 2346 has_type = TCG_TARGET_REG_BITS == 64; 2347 break; 2348 case TCG_TYPE_V64: 2349 has_type = TCG_TARGET_HAS_v64; 2350 break; 2351 case TCG_TYPE_V128: 2352 has_type = TCG_TARGET_HAS_v128; 2353 break; 2354 case TCG_TYPE_V256: 2355 has_type = TCG_TARGET_HAS_v256; 2356 break; 2357 default: 2358 has_type = false; 2359 break; 2360 } 2361 2362 switch (op) { 2363 case INDEX_op_discard: 2364 case INDEX_op_set_label: 2365 case INDEX_op_call: 2366 case INDEX_op_br: 2367 case INDEX_op_mb: 2368 case INDEX_op_insn_start: 2369 case INDEX_op_exit_tb: 2370 case INDEX_op_goto_tb: 2371 case INDEX_op_goto_ptr: 2372 case INDEX_op_qemu_ld_i32: 2373 case INDEX_op_qemu_st_i32: 2374 case INDEX_op_qemu_ld_i64: 2375 case INDEX_op_qemu_st_i64: 2376 return true; 2377 2378 case INDEX_op_qemu_st8_i32: 2379 return TCG_TARGET_HAS_qemu_st8_i32; 2380 2381 case INDEX_op_qemu_ld_i128: 2382 case INDEX_op_qemu_st_i128: 2383 return TCG_TARGET_HAS_qemu_ldst_i128; 2384 2385 case INDEX_op_add: 2386 case INDEX_op_and: 2387 case INDEX_op_brcond: 2388 case INDEX_op_deposit: 2389 case INDEX_op_extract: 2390 case INDEX_op_mov: 2391 case INDEX_op_movcond: 2392 case INDEX_op_negsetcond: 2393 case INDEX_op_or: 2394 case INDEX_op_setcond: 2395 case INDEX_op_sextract: 2396 case INDEX_op_xor: 2397 return has_type; 2398 2399 case INDEX_op_ld8u_i32: 2400 case INDEX_op_ld8s_i32: 2401 case INDEX_op_ld16u_i32: 2402 case INDEX_op_ld16s_i32: 2403 case INDEX_op_ld_i32: 2404 case INDEX_op_st8_i32: 2405 case INDEX_op_st16_i32: 2406 case INDEX_op_st_i32: 2407 return true; 2408 2409 case INDEX_op_add2_i32: 2410 return TCG_TARGET_HAS_add2_i32; 2411 case INDEX_op_sub2_i32: 2412 return TCG_TARGET_HAS_sub2_i32; 2413 2414 case INDEX_op_brcond2_i32: 2415 case INDEX_op_setcond2_i32: 2416 return TCG_TARGET_REG_BITS == 32; 2417 2418 case INDEX_op_ld8u_i64: 2419 case INDEX_op_ld8s_i64: 2420 case INDEX_op_ld16u_i64: 2421 case INDEX_op_ld16s_i64: 2422 case INDEX_op_ld32u_i64: 2423 case INDEX_op_ld32s_i64: 2424 case INDEX_op_ld_i64: 2425 case INDEX_op_st8_i64: 2426 case INDEX_op_st16_i64: 2427 case INDEX_op_st32_i64: 2428 case INDEX_op_st_i64: 2429 case INDEX_op_ext_i32_i64: 2430 case INDEX_op_extu_i32_i64: 2431 case INDEX_op_extrl_i64_i32: 2432 case INDEX_op_extrh_i64_i32: 2433 return TCG_TARGET_REG_BITS == 64; 2434 2435 case INDEX_op_add2_i64: 2436 return TCG_TARGET_HAS_add2_i64; 2437 case INDEX_op_sub2_i64: 2438 return TCG_TARGET_HAS_sub2_i64; 2439 2440 case INDEX_op_mov_vec: 2441 case INDEX_op_dup_vec: 2442 case INDEX_op_dupm_vec: 2443 case INDEX_op_ld_vec: 2444 case INDEX_op_st_vec: 2445 case INDEX_op_add_vec: 2446 case INDEX_op_sub_vec: 2447 case INDEX_op_and_vec: 2448 case INDEX_op_or_vec: 2449 case INDEX_op_xor_vec: 2450 case INDEX_op_cmp_vec: 2451 return has_type; 2452 case INDEX_op_dup2_vec: 2453 return has_type && TCG_TARGET_REG_BITS == 32; 2454 case INDEX_op_not_vec: 2455 return has_type && TCG_TARGET_HAS_not_vec; 2456 case INDEX_op_neg_vec: 2457 return has_type && TCG_TARGET_HAS_neg_vec; 2458 case INDEX_op_abs_vec: 2459 return has_type && TCG_TARGET_HAS_abs_vec; 2460 case INDEX_op_andc_vec: 2461 return has_type && TCG_TARGET_HAS_andc_vec; 2462 case INDEX_op_orc_vec: 2463 return has_type && TCG_TARGET_HAS_orc_vec; 2464 case INDEX_op_nand_vec: 2465 return has_type && TCG_TARGET_HAS_nand_vec; 2466 case INDEX_op_nor_vec: 2467 return has_type && TCG_TARGET_HAS_nor_vec; 2468 case INDEX_op_eqv_vec: 2469 return has_type && TCG_TARGET_HAS_eqv_vec; 2470 case INDEX_op_mul_vec: 2471 return has_type && TCG_TARGET_HAS_mul_vec; 2472 case INDEX_op_shli_vec: 2473 case INDEX_op_shri_vec: 2474 case INDEX_op_sari_vec: 2475 return has_type && TCG_TARGET_HAS_shi_vec; 2476 case INDEX_op_shls_vec: 2477 case INDEX_op_shrs_vec: 2478 case INDEX_op_sars_vec: 2479 return has_type && TCG_TARGET_HAS_shs_vec; 2480 case INDEX_op_shlv_vec: 2481 case INDEX_op_shrv_vec: 2482 case INDEX_op_sarv_vec: 2483 return has_type && TCG_TARGET_HAS_shv_vec; 2484 case INDEX_op_rotli_vec: 2485 return has_type && TCG_TARGET_HAS_roti_vec; 2486 case INDEX_op_rotls_vec: 2487 return has_type && TCG_TARGET_HAS_rots_vec; 2488 case INDEX_op_rotlv_vec: 2489 case INDEX_op_rotrv_vec: 2490 return has_type && TCG_TARGET_HAS_rotv_vec; 2491 case INDEX_op_ssadd_vec: 2492 case INDEX_op_usadd_vec: 2493 case INDEX_op_sssub_vec: 2494 case INDEX_op_ussub_vec: 2495 return has_type && TCG_TARGET_HAS_sat_vec; 2496 case INDEX_op_smin_vec: 2497 case INDEX_op_umin_vec: 2498 case INDEX_op_smax_vec: 2499 case INDEX_op_umax_vec: 2500 return has_type && TCG_TARGET_HAS_minmax_vec; 2501 case INDEX_op_bitsel_vec: 2502 return has_type && TCG_TARGET_HAS_bitsel_vec; 2503 case INDEX_op_cmpsel_vec: 2504 return has_type && TCG_TARGET_HAS_cmpsel_vec; 2505 2506 default: 2507 if (op < INDEX_op_last_generic) { 2508 const TCGOutOp *outop; 2509 TCGConstraintSetIndex con_set; 2510 2511 if (!has_type) { 2512 return false; 2513 } 2514 2515 outop = all_outop[op]; 2516 tcg_debug_assert(outop != NULL); 2517 2518 con_set = outop->static_constraint; 2519 if (con_set == C_Dynamic) { 2520 con_set = outop->dynamic_constraint(type, flags); 2521 } 2522 if (con_set >= 0) { 2523 return true; 2524 } 2525 tcg_debug_assert(con_set == C_NotImplemented); 2526 return false; 2527 } 2528 tcg_debug_assert(op < NB_OPS); 2529 return true; 2530 2531 case INDEX_op_last_generic: 2532 g_assert_not_reached(); 2533 } 2534 } 2535 2536 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) 2537 { 2538 unsigned width; 2539 2540 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); 2541 width = (type == TCG_TYPE_I32 ? 32 : 64); 2542 2543 tcg_debug_assert(ofs < width); 2544 tcg_debug_assert(len > 0); 2545 tcg_debug_assert(len <= width - ofs); 2546 2547 return TCG_TARGET_deposit_valid(type, ofs, len); 2548 } 2549 2550 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2551 2552 static void tcg_gen_callN(void *func, TCGHelperInfo *info, 2553 TCGTemp *ret, TCGTemp **args) 2554 { 2555 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2556 int n_extend = 0; 2557 TCGOp *op; 2558 int i, n, pi = 0, total_args; 2559 2560 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) { 2561 init_call_layout(info); 2562 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info)); 2563 } 2564 2565 total_args = info->nr_out + info->nr_in + 2; 2566 op = tcg_op_alloc(INDEX_op_call, total_args); 2567 2568 #ifdef CONFIG_PLUGIN 2569 /* Flag helpers that may affect guest state */ 2570 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2571 tcg_ctx->plugin_insn->calls_helpers = true; 2572 } 2573 #endif 2574 2575 TCGOP_CALLO(op) = n = info->nr_out; 2576 switch (n) { 2577 case 0: 2578 tcg_debug_assert(ret == NULL); 2579 break; 2580 case 1: 2581 tcg_debug_assert(ret != NULL); 2582 op->args[pi++] = temp_arg(ret); 2583 break; 2584 case 2: 2585 case 4: 2586 tcg_debug_assert(ret != NULL); 2587 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2588 tcg_debug_assert(ret->temp_subindex == 0); 2589 for (i = 0; i < n; ++i) { 2590 op->args[pi++] = temp_arg(ret + i); 2591 } 2592 break; 2593 default: 2594 g_assert_not_reached(); 2595 } 2596 2597 TCGOP_CALLI(op) = n = info->nr_in; 2598 for (i = 0; i < n; i++) { 2599 const TCGCallArgumentLoc *loc = &info->in[i]; 2600 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2601 2602 switch (loc->kind) { 2603 case TCG_CALL_ARG_NORMAL: 2604 case TCG_CALL_ARG_BY_REF: 2605 case TCG_CALL_ARG_BY_REF_N: 2606 op->args[pi++] = temp_arg(ts); 2607 break; 2608 2609 case TCG_CALL_ARG_EXTEND_U: 2610 case TCG_CALL_ARG_EXTEND_S: 2611 { 2612 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2613 TCGv_i32 orig = temp_tcgv_i32(ts); 2614 2615 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2616 tcg_gen_ext_i32_i64(temp, orig); 2617 } else { 2618 tcg_gen_extu_i32_i64(temp, orig); 2619 } 2620 op->args[pi++] = tcgv_i64_arg(temp); 2621 extend_free[n_extend++] = temp; 2622 } 2623 break; 2624 2625 default: 2626 g_assert_not_reached(); 2627 } 2628 } 2629 op->args[pi++] = (uintptr_t)func; 2630 op->args[pi++] = (uintptr_t)info; 2631 tcg_debug_assert(pi == total_args); 2632 2633 if (tcg_ctx->emit_before_op) { 2634 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 2635 } else { 2636 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2637 } 2638 2639 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2640 for (i = 0; i < n_extend; ++i) { 2641 tcg_temp_free_i64(extend_free[i]); 2642 } 2643 } 2644 2645 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret) 2646 { 2647 tcg_gen_callN(func, info, ret, NULL); 2648 } 2649 2650 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1) 2651 { 2652 tcg_gen_callN(func, info, ret, &t1); 2653 } 2654 2655 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret, 2656 TCGTemp *t1, TCGTemp *t2) 2657 { 2658 TCGTemp *args[2] = { t1, t2 }; 2659 tcg_gen_callN(func, info, ret, args); 2660 } 2661 2662 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret, 2663 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3) 2664 { 2665 TCGTemp *args[3] = { t1, t2, t3 }; 2666 tcg_gen_callN(func, info, ret, args); 2667 } 2668 2669 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret, 2670 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4) 2671 { 2672 TCGTemp *args[4] = { t1, t2, t3, t4 }; 2673 tcg_gen_callN(func, info, ret, args); 2674 } 2675 2676 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2677 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5) 2678 { 2679 TCGTemp *args[5] = { t1, t2, t3, t4, t5 }; 2680 tcg_gen_callN(func, info, ret, args); 2681 } 2682 2683 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret, 2684 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, 2685 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6) 2686 { 2687 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 }; 2688 tcg_gen_callN(func, info, ret, args); 2689 } 2690 2691 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, 2692 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, 2693 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7) 2694 { 2695 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 }; 2696 tcg_gen_callN(func, info, ret, args); 2697 } 2698 2699 static void tcg_reg_alloc_start(TCGContext *s) 2700 { 2701 int i, n; 2702 2703 for (i = 0, n = s->nb_temps; i < n; i++) { 2704 TCGTemp *ts = &s->temps[i]; 2705 TCGTempVal val = TEMP_VAL_MEM; 2706 2707 switch (ts->kind) { 2708 case TEMP_CONST: 2709 val = TEMP_VAL_CONST; 2710 break; 2711 case TEMP_FIXED: 2712 val = TEMP_VAL_REG; 2713 break; 2714 case TEMP_GLOBAL: 2715 break; 2716 case TEMP_EBB: 2717 val = TEMP_VAL_DEAD; 2718 /* fall through */ 2719 case TEMP_TB: 2720 ts->mem_allocated = 0; 2721 break; 2722 default: 2723 g_assert_not_reached(); 2724 } 2725 ts->val_type = val; 2726 } 2727 2728 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2729 } 2730 2731 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2732 TCGTemp *ts) 2733 { 2734 int idx = temp_idx(ts); 2735 2736 switch (ts->kind) { 2737 case TEMP_FIXED: 2738 case TEMP_GLOBAL: 2739 pstrcpy(buf, buf_size, ts->name); 2740 break; 2741 case TEMP_TB: 2742 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2743 break; 2744 case TEMP_EBB: 2745 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2746 break; 2747 case TEMP_CONST: 2748 switch (ts->type) { 2749 case TCG_TYPE_I32: 2750 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2751 break; 2752 #if TCG_TARGET_REG_BITS > 32 2753 case TCG_TYPE_I64: 2754 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2755 break; 2756 #endif 2757 case TCG_TYPE_V64: 2758 case TCG_TYPE_V128: 2759 case TCG_TYPE_V256: 2760 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2761 64 << (ts->type - TCG_TYPE_V64), ts->val); 2762 break; 2763 default: 2764 g_assert_not_reached(); 2765 } 2766 break; 2767 } 2768 return buf; 2769 } 2770 2771 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2772 int buf_size, TCGArg arg) 2773 { 2774 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2775 } 2776 2777 static const char * const cond_name[] = 2778 { 2779 [TCG_COND_NEVER] = "never", 2780 [TCG_COND_ALWAYS] = "always", 2781 [TCG_COND_EQ] = "eq", 2782 [TCG_COND_NE] = "ne", 2783 [TCG_COND_LT] = "lt", 2784 [TCG_COND_GE] = "ge", 2785 [TCG_COND_LE] = "le", 2786 [TCG_COND_GT] = "gt", 2787 [TCG_COND_LTU] = "ltu", 2788 [TCG_COND_GEU] = "geu", 2789 [TCG_COND_LEU] = "leu", 2790 [TCG_COND_GTU] = "gtu", 2791 [TCG_COND_TSTEQ] = "tsteq", 2792 [TCG_COND_TSTNE] = "tstne", 2793 }; 2794 2795 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2796 { 2797 [MO_UB] = "ub", 2798 [MO_SB] = "sb", 2799 [MO_LEUW] = "leuw", 2800 [MO_LESW] = "lesw", 2801 [MO_LEUL] = "leul", 2802 [MO_LESL] = "lesl", 2803 [MO_LEUQ] = "leq", 2804 [MO_BEUW] = "beuw", 2805 [MO_BESW] = "besw", 2806 [MO_BEUL] = "beul", 2807 [MO_BESL] = "besl", 2808 [MO_BEUQ] = "beq", 2809 [MO_128 + MO_BE] = "beo", 2810 [MO_128 + MO_LE] = "leo", 2811 }; 2812 2813 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2814 [MO_UNALN >> MO_ASHIFT] = "un+", 2815 [MO_ALIGN >> MO_ASHIFT] = "al+", 2816 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2817 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2818 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2819 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2820 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2821 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2822 }; 2823 2824 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2825 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2826 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2827 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2828 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2829 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2830 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2831 }; 2832 2833 static const char bswap_flag_name[][6] = { 2834 [TCG_BSWAP_IZ] = "iz", 2835 [TCG_BSWAP_OZ] = "oz", 2836 [TCG_BSWAP_OS] = "os", 2837 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2838 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2839 }; 2840 2841 #ifdef CONFIG_PLUGIN 2842 static const char * const plugin_from_name[] = { 2843 "from-tb", 2844 "from-insn", 2845 "after-insn", 2846 "after-tb", 2847 }; 2848 #endif 2849 2850 static inline bool tcg_regset_single(TCGRegSet d) 2851 { 2852 return (d & (d - 1)) == 0; 2853 } 2854 2855 static inline TCGReg tcg_regset_first(TCGRegSet d) 2856 { 2857 if (TCG_TARGET_NB_REGS <= 32) { 2858 return ctz32(d); 2859 } else { 2860 return ctz64(d); 2861 } 2862 } 2863 2864 /* Return only the number of characters output -- no error return. */ 2865 #define ne_fprintf(...) \ 2866 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2867 2868 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2869 { 2870 char buf[128]; 2871 TCGOp *op; 2872 2873 QTAILQ_FOREACH(op, &s->ops, link) { 2874 int i, k, nb_oargs, nb_iargs, nb_cargs; 2875 const TCGOpDef *def; 2876 TCGOpcode c; 2877 int col = 0; 2878 2879 c = op->opc; 2880 def = &tcg_op_defs[c]; 2881 2882 if (c == INDEX_op_insn_start) { 2883 nb_oargs = 0; 2884 col += ne_fprintf(f, "\n ----"); 2885 2886 for (i = 0, k = s->insn_start_words; i < k; ++i) { 2887 col += ne_fprintf(f, " %016" PRIx64, 2888 tcg_get_insn_start_param(op, i)); 2889 } 2890 } else if (c == INDEX_op_call) { 2891 const TCGHelperInfo *info = tcg_call_info(op); 2892 void *func = tcg_call_func(op); 2893 2894 /* variable number of arguments */ 2895 nb_oargs = TCGOP_CALLO(op); 2896 nb_iargs = TCGOP_CALLI(op); 2897 nb_cargs = def->nb_cargs; 2898 2899 col += ne_fprintf(f, " %s ", def->name); 2900 2901 /* 2902 * Print the function name from TCGHelperInfo, if available. 2903 * Note that plugins have a template function for the info, 2904 * but the actual function pointer comes from the plugin. 2905 */ 2906 if (func == info->func) { 2907 col += ne_fprintf(f, "%s", info->name); 2908 } else { 2909 col += ne_fprintf(f, "plugin(%p)", func); 2910 } 2911 2912 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2913 for (i = 0; i < nb_oargs; i++) { 2914 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2915 op->args[i])); 2916 } 2917 for (i = 0; i < nb_iargs; i++) { 2918 TCGArg arg = op->args[nb_oargs + i]; 2919 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2920 col += ne_fprintf(f, ",%s", t); 2921 } 2922 } else { 2923 if (def->flags & TCG_OPF_INT) { 2924 col += ne_fprintf(f, " %s_i%d ", 2925 def->name, 2926 8 * tcg_type_size(TCGOP_TYPE(op))); 2927 } else if (def->flags & TCG_OPF_VECTOR) { 2928 col += ne_fprintf(f, "%s v%d,e%d,", 2929 def->name, 2930 8 * tcg_type_size(TCGOP_TYPE(op)), 2931 8 << TCGOP_VECE(op)); 2932 } else { 2933 col += ne_fprintf(f, " %s ", def->name); 2934 } 2935 2936 nb_oargs = def->nb_oargs; 2937 nb_iargs = def->nb_iargs; 2938 nb_cargs = def->nb_cargs; 2939 2940 k = 0; 2941 for (i = 0; i < nb_oargs; i++) { 2942 const char *sep = k ? "," : ""; 2943 col += ne_fprintf(f, "%s%s", sep, 2944 tcg_get_arg_str(s, buf, sizeof(buf), 2945 op->args[k++])); 2946 } 2947 for (i = 0; i < nb_iargs; i++) { 2948 const char *sep = k ? "," : ""; 2949 col += ne_fprintf(f, "%s%s", sep, 2950 tcg_get_arg_str(s, buf, sizeof(buf), 2951 op->args[k++])); 2952 } 2953 switch (c) { 2954 case INDEX_op_brcond: 2955 case INDEX_op_setcond: 2956 case INDEX_op_negsetcond: 2957 case INDEX_op_movcond: 2958 case INDEX_op_brcond2_i32: 2959 case INDEX_op_setcond2_i32: 2960 case INDEX_op_cmp_vec: 2961 case INDEX_op_cmpsel_vec: 2962 if (op->args[k] < ARRAY_SIZE(cond_name) 2963 && cond_name[op->args[k]]) { 2964 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2965 } else { 2966 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2967 } 2968 i = 1; 2969 break; 2970 case INDEX_op_qemu_ld_i32: 2971 case INDEX_op_qemu_st_i32: 2972 case INDEX_op_qemu_st8_i32: 2973 case INDEX_op_qemu_ld_i64: 2974 case INDEX_op_qemu_st_i64: 2975 case INDEX_op_qemu_ld_i128: 2976 case INDEX_op_qemu_st_i128: 2977 { 2978 const char *s_al, *s_op, *s_at; 2979 MemOpIdx oi = op->args[k++]; 2980 MemOp mop = get_memop(oi); 2981 unsigned ix = get_mmuidx(oi); 2982 2983 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT]; 2984 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)]; 2985 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 2986 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 2987 2988 /* If all fields are accounted for, print symbolically. */ 2989 if (!mop && s_al && s_op && s_at) { 2990 col += ne_fprintf(f, ",%s%s%s,%u", 2991 s_at, s_al, s_op, ix); 2992 } else { 2993 mop = get_memop(oi); 2994 col += ne_fprintf(f, ",$0x%x,%u", mop, ix); 2995 } 2996 i = 1; 2997 } 2998 break; 2999 case INDEX_op_bswap16: 3000 case INDEX_op_bswap32: 3001 case INDEX_op_bswap64: 3002 { 3003 TCGArg flags = op->args[k]; 3004 const char *name = NULL; 3005 3006 if (flags < ARRAY_SIZE(bswap_flag_name)) { 3007 name = bswap_flag_name[flags]; 3008 } 3009 if (name) { 3010 col += ne_fprintf(f, ",%s", name); 3011 } else { 3012 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 3013 } 3014 i = k = 1; 3015 } 3016 break; 3017 #ifdef CONFIG_PLUGIN 3018 case INDEX_op_plugin_cb: 3019 { 3020 TCGArg from = op->args[k++]; 3021 const char *name = NULL; 3022 3023 if (from < ARRAY_SIZE(plugin_from_name)) { 3024 name = plugin_from_name[from]; 3025 } 3026 if (name) { 3027 col += ne_fprintf(f, "%s", name); 3028 } else { 3029 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from); 3030 } 3031 i = 1; 3032 } 3033 break; 3034 #endif 3035 default: 3036 i = 0; 3037 break; 3038 } 3039 switch (c) { 3040 case INDEX_op_set_label: 3041 case INDEX_op_br: 3042 case INDEX_op_brcond: 3043 case INDEX_op_brcond2_i32: 3044 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 3045 arg_label(op->args[k])->id); 3046 i++, k++; 3047 break; 3048 case INDEX_op_mb: 3049 { 3050 TCGBar membar = op->args[k]; 3051 const char *b_op, *m_op; 3052 3053 switch (membar & TCG_BAR_SC) { 3054 case 0: 3055 b_op = "none"; 3056 break; 3057 case TCG_BAR_LDAQ: 3058 b_op = "acq"; 3059 break; 3060 case TCG_BAR_STRL: 3061 b_op = "rel"; 3062 break; 3063 case TCG_BAR_SC: 3064 b_op = "seq"; 3065 break; 3066 default: 3067 g_assert_not_reached(); 3068 } 3069 3070 switch (membar & TCG_MO_ALL) { 3071 case 0: 3072 m_op = "none"; 3073 break; 3074 case TCG_MO_LD_LD: 3075 m_op = "rr"; 3076 break; 3077 case TCG_MO_LD_ST: 3078 m_op = "rw"; 3079 break; 3080 case TCG_MO_ST_LD: 3081 m_op = "wr"; 3082 break; 3083 case TCG_MO_ST_ST: 3084 m_op = "ww"; 3085 break; 3086 case TCG_MO_LD_LD | TCG_MO_LD_ST: 3087 m_op = "rr+rw"; 3088 break; 3089 case TCG_MO_LD_LD | TCG_MO_ST_LD: 3090 m_op = "rr+wr"; 3091 break; 3092 case TCG_MO_LD_LD | TCG_MO_ST_ST: 3093 m_op = "rr+ww"; 3094 break; 3095 case TCG_MO_LD_ST | TCG_MO_ST_LD: 3096 m_op = "rw+wr"; 3097 break; 3098 case TCG_MO_LD_ST | TCG_MO_ST_ST: 3099 m_op = "rw+ww"; 3100 break; 3101 case TCG_MO_ST_LD | TCG_MO_ST_ST: 3102 m_op = "wr+ww"; 3103 break; 3104 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 3105 m_op = "rr+rw+wr"; 3106 break; 3107 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 3108 m_op = "rr+rw+ww"; 3109 break; 3110 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 3111 m_op = "rr+wr+ww"; 3112 break; 3113 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 3114 m_op = "rw+wr+ww"; 3115 break; 3116 case TCG_MO_ALL: 3117 m_op = "all"; 3118 break; 3119 default: 3120 g_assert_not_reached(); 3121 } 3122 3123 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 3124 i++, k++; 3125 } 3126 break; 3127 default: 3128 break; 3129 } 3130 for (; i < nb_cargs; i++, k++) { 3131 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 3132 op->args[k]); 3133 } 3134 } 3135 3136 if (have_prefs || op->life) { 3137 for (; col < 40; ++col) { 3138 putc(' ', f); 3139 } 3140 } 3141 3142 if (op->life) { 3143 unsigned life = op->life; 3144 3145 if (life & (SYNC_ARG * 3)) { 3146 ne_fprintf(f, " sync:"); 3147 for (i = 0; i < 2; ++i) { 3148 if (life & (SYNC_ARG << i)) { 3149 ne_fprintf(f, " %d", i); 3150 } 3151 } 3152 } 3153 life /= DEAD_ARG; 3154 if (life) { 3155 ne_fprintf(f, " dead:"); 3156 for (i = 0; life; ++i, life >>= 1) { 3157 if (life & 1) { 3158 ne_fprintf(f, " %d", i); 3159 } 3160 } 3161 } 3162 } 3163 3164 if (have_prefs) { 3165 for (i = 0; i < nb_oargs; ++i) { 3166 TCGRegSet set = output_pref(op, i); 3167 3168 if (i == 0) { 3169 ne_fprintf(f, " pref="); 3170 } else { 3171 ne_fprintf(f, ","); 3172 } 3173 if (set == 0) { 3174 ne_fprintf(f, "none"); 3175 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 3176 ne_fprintf(f, "all"); 3177 #ifdef CONFIG_DEBUG_TCG 3178 } else if (tcg_regset_single(set)) { 3179 TCGReg reg = tcg_regset_first(set); 3180 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 3181 #endif 3182 } else if (TCG_TARGET_NB_REGS <= 32) { 3183 ne_fprintf(f, "0x%x", (uint32_t)set); 3184 } else { 3185 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 3186 } 3187 } 3188 } 3189 3190 putc('\n', f); 3191 } 3192 } 3193 3194 /* we give more priority to constraints with less registers */ 3195 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) 3196 { 3197 int n; 3198 3199 arg_ct += k; 3200 n = ctpop64(arg_ct->regs); 3201 3202 /* 3203 * Sort constraints of a single register first, which includes output 3204 * aliases (which must exactly match the input already allocated). 3205 */ 3206 if (n == 1 || arg_ct->oalias) { 3207 return INT_MAX; 3208 } 3209 3210 /* 3211 * Sort register pairs next, first then second immediately after. 3212 * Arbitrarily sort multiple pairs by the index of the first reg; 3213 * there shouldn't be many pairs. 3214 */ 3215 switch (arg_ct->pair) { 3216 case 1: 3217 case 3: 3218 return (k + 1) * 2; 3219 case 2: 3220 return (arg_ct->pair_index + 1) * 2 - 1; 3221 } 3222 3223 /* Finally, sort by decreasing register count. */ 3224 assert(n > 1); 3225 return -n; 3226 } 3227 3228 /* sort from highest priority to lowest */ 3229 static void sort_constraints(TCGArgConstraint *a, int start, int n) 3230 { 3231 int i, j; 3232 3233 for (i = 0; i < n; i++) { 3234 a[start + i].sort_index = start + i; 3235 } 3236 if (n <= 1) { 3237 return; 3238 } 3239 for (i = 0; i < n - 1; i++) { 3240 for (j = i + 1; j < n; j++) { 3241 int p1 = get_constraint_priority(a, a[start + i].sort_index); 3242 int p2 = get_constraint_priority(a, a[start + j].sort_index); 3243 if (p1 < p2) { 3244 int tmp = a[start + i].sort_index; 3245 a[start + i].sort_index = a[start + j].sort_index; 3246 a[start + j].sort_index = tmp; 3247 } 3248 } 3249 } 3250 } 3251 3252 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; 3253 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; 3254 3255 static void process_constraint_sets(void) 3256 { 3257 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { 3258 const TCGConstraintSet *tdefs = &constraint_sets[c]; 3259 TCGArgConstraint *args_ct = all_cts[c]; 3260 int nb_oargs = tdefs->nb_oargs; 3261 int nb_iargs = tdefs->nb_iargs; 3262 int nb_args = nb_oargs + nb_iargs; 3263 bool saw_alias_pair = false; 3264 3265 for (int i = 0; i < nb_args; i++) { 3266 const char *ct_str = tdefs->args_ct_str[i]; 3267 bool input_p = i >= nb_oargs; 3268 int o; 3269 3270 switch (*ct_str) { 3271 case '0' ... '9': 3272 o = *ct_str - '0'; 3273 tcg_debug_assert(input_p); 3274 tcg_debug_assert(o < nb_oargs); 3275 tcg_debug_assert(args_ct[o].regs != 0); 3276 tcg_debug_assert(!args_ct[o].oalias); 3277 args_ct[i] = args_ct[o]; 3278 /* The output sets oalias. */ 3279 args_ct[o].oalias = 1; 3280 args_ct[o].alias_index = i; 3281 /* The input sets ialias. */ 3282 args_ct[i].ialias = 1; 3283 args_ct[i].alias_index = o; 3284 if (args_ct[i].pair) { 3285 saw_alias_pair = true; 3286 } 3287 tcg_debug_assert(ct_str[1] == '\0'); 3288 continue; 3289 3290 case '&': 3291 tcg_debug_assert(!input_p); 3292 args_ct[i].newreg = true; 3293 ct_str++; 3294 break; 3295 3296 case 'p': /* plus */ 3297 /* Allocate to the register after the previous. */ 3298 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3299 o = i - 1; 3300 tcg_debug_assert(!args_ct[o].pair); 3301 tcg_debug_assert(!args_ct[o].ct); 3302 args_ct[i] = (TCGArgConstraint){ 3303 .pair = 2, 3304 .pair_index = o, 3305 .regs = args_ct[o].regs << 1, 3306 .newreg = args_ct[o].newreg, 3307 }; 3308 args_ct[o].pair = 1; 3309 args_ct[o].pair_index = i; 3310 tcg_debug_assert(ct_str[1] == '\0'); 3311 continue; 3312 3313 case 'm': /* minus */ 3314 /* Allocate to the register before the previous. */ 3315 tcg_debug_assert(i > (input_p ? nb_oargs : 0)); 3316 o = i - 1; 3317 tcg_debug_assert(!args_ct[o].pair); 3318 tcg_debug_assert(!args_ct[o].ct); 3319 args_ct[i] = (TCGArgConstraint){ 3320 .pair = 1, 3321 .pair_index = o, 3322 .regs = args_ct[o].regs >> 1, 3323 .newreg = args_ct[o].newreg, 3324 }; 3325 args_ct[o].pair = 2; 3326 args_ct[o].pair_index = i; 3327 tcg_debug_assert(ct_str[1] == '\0'); 3328 continue; 3329 } 3330 3331 do { 3332 switch (*ct_str) { 3333 case 'i': 3334 args_ct[i].ct |= TCG_CT_CONST; 3335 break; 3336 #ifdef TCG_REG_ZERO 3337 case 'z': 3338 args_ct[i].ct |= TCG_CT_REG_ZERO; 3339 break; 3340 #endif 3341 3342 /* Include all of the target-specific constraints. */ 3343 3344 #undef CONST 3345 #define CONST(CASE, MASK) \ 3346 case CASE: args_ct[i].ct |= MASK; break; 3347 #define REGS(CASE, MASK) \ 3348 case CASE: args_ct[i].regs |= MASK; break; 3349 3350 #include "tcg-target-con-str.h" 3351 3352 #undef REGS 3353 #undef CONST 3354 default: 3355 case '0' ... '9': 3356 case '&': 3357 case 'p': 3358 case 'm': 3359 /* Typo in TCGConstraintSet constraint. */ 3360 g_assert_not_reached(); 3361 } 3362 } while (*++ct_str != '\0'); 3363 } 3364 3365 /* 3366 * Fix up output pairs that are aliased with inputs. 3367 * When we created the alias, we copied pair from the output. 3368 * There are three cases: 3369 * (1a) Pairs of inputs alias pairs of outputs. 3370 * (1b) One input aliases the first of a pair of outputs. 3371 * (2) One input aliases the second of a pair of outputs. 3372 * 3373 * Case 1a is handled by making sure that the pair_index'es are 3374 * properly updated so that they appear the same as a pair of inputs. 3375 * 3376 * Case 1b is handled by setting the pair_index of the input to 3377 * itself, simply so it doesn't point to an unrelated argument. 3378 * Since we don't encounter the "second" during the input allocation 3379 * phase, nothing happens with the second half of the input pair. 3380 * 3381 * Case 2 is handled by setting the second input to pair=3, the 3382 * first output to pair=3, and the pair_index'es to match. 3383 */ 3384 if (saw_alias_pair) { 3385 for (int i = nb_oargs; i < nb_args; i++) { 3386 int o, o2, i2; 3387 3388 /* 3389 * Since [0-9pm] must be alone in the constraint string, 3390 * the only way they can both be set is if the pair comes 3391 * from the output alias. 3392 */ 3393 if (!args_ct[i].ialias) { 3394 continue; 3395 } 3396 switch (args_ct[i].pair) { 3397 case 0: 3398 break; 3399 case 1: 3400 o = args_ct[i].alias_index; 3401 o2 = args_ct[o].pair_index; 3402 tcg_debug_assert(args_ct[o].pair == 1); 3403 tcg_debug_assert(args_ct[o2].pair == 2); 3404 if (args_ct[o2].oalias) { 3405 /* Case 1a */ 3406 i2 = args_ct[o2].alias_index; 3407 tcg_debug_assert(args_ct[i2].pair == 2); 3408 args_ct[i2].pair_index = i; 3409 args_ct[i].pair_index = i2; 3410 } else { 3411 /* Case 1b */ 3412 args_ct[i].pair_index = i; 3413 } 3414 break; 3415 case 2: 3416 o = args_ct[i].alias_index; 3417 o2 = args_ct[o].pair_index; 3418 tcg_debug_assert(args_ct[o].pair == 2); 3419 tcg_debug_assert(args_ct[o2].pair == 1); 3420 if (args_ct[o2].oalias) { 3421 /* Case 1a */ 3422 i2 = args_ct[o2].alias_index; 3423 tcg_debug_assert(args_ct[i2].pair == 1); 3424 args_ct[i2].pair_index = i; 3425 args_ct[i].pair_index = i2; 3426 } else { 3427 /* Case 2 */ 3428 args_ct[i].pair = 3; 3429 args_ct[o2].pair = 3; 3430 args_ct[i].pair_index = o2; 3431 args_ct[o2].pair_index = i; 3432 } 3433 break; 3434 default: 3435 g_assert_not_reached(); 3436 } 3437 } 3438 } 3439 3440 /* sort the constraints (XXX: this is just an heuristic) */ 3441 sort_constraints(args_ct, 0, nb_oargs); 3442 sort_constraints(args_ct, nb_oargs, nb_iargs); 3443 } 3444 } 3445 3446 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) 3447 { 3448 TCGOpcode opc = op->opc; 3449 TCGType type = TCGOP_TYPE(op); 3450 unsigned flags = TCGOP_FLAGS(op); 3451 const TCGOpDef *def = &tcg_op_defs[opc]; 3452 const TCGOutOp *outop = all_outop[opc]; 3453 TCGConstraintSetIndex con_set; 3454 3455 if (def->flags & TCG_OPF_NOT_PRESENT) { 3456 return empty_cts; 3457 } 3458 3459 if (outop) { 3460 con_set = outop->static_constraint; 3461 if (con_set == C_Dynamic) { 3462 con_set = outop->dynamic_constraint(type, flags); 3463 } 3464 } else { 3465 con_set = tcg_target_op_def(opc, type, flags); 3466 } 3467 tcg_debug_assert(con_set >= 0); 3468 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 3469 3470 /* The constraint arguments must match TCGOpcode arguments. */ 3471 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); 3472 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); 3473 3474 return all_cts[con_set]; 3475 } 3476 3477 static void remove_label_use(TCGOp *op, int idx) 3478 { 3479 TCGLabel *label = arg_label(op->args[idx]); 3480 TCGLabelUse *use; 3481 3482 QSIMPLEQ_FOREACH(use, &label->branches, next) { 3483 if (use->op == op) { 3484 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 3485 return; 3486 } 3487 } 3488 g_assert_not_reached(); 3489 } 3490 3491 void tcg_op_remove(TCGContext *s, TCGOp *op) 3492 { 3493 switch (op->opc) { 3494 case INDEX_op_br: 3495 remove_label_use(op, 0); 3496 break; 3497 case INDEX_op_brcond: 3498 remove_label_use(op, 3); 3499 break; 3500 case INDEX_op_brcond2_i32: 3501 remove_label_use(op, 5); 3502 break; 3503 default: 3504 break; 3505 } 3506 3507 QTAILQ_REMOVE(&s->ops, op, link); 3508 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 3509 s->nb_ops--; 3510 } 3511 3512 void tcg_remove_ops_after(TCGOp *op) 3513 { 3514 TCGContext *s = tcg_ctx; 3515 3516 while (true) { 3517 TCGOp *last = tcg_last_op(); 3518 if (last == op) { 3519 return; 3520 } 3521 tcg_op_remove(s, last); 3522 } 3523 } 3524 3525 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3526 { 3527 TCGContext *s = tcg_ctx; 3528 TCGOp *op = NULL; 3529 3530 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3531 QTAILQ_FOREACH(op, &s->free_ops, link) { 3532 if (nargs <= op->nargs) { 3533 QTAILQ_REMOVE(&s->free_ops, op, link); 3534 nargs = op->nargs; 3535 goto found; 3536 } 3537 } 3538 } 3539 3540 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3541 nargs = MAX(4, nargs); 3542 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3543 3544 found: 3545 memset(op, 0, offsetof(TCGOp, link)); 3546 op->opc = opc; 3547 op->nargs = nargs; 3548 3549 /* Check for bitfield overflow. */ 3550 tcg_debug_assert(op->nargs == nargs); 3551 3552 s->nb_ops++; 3553 return op; 3554 } 3555 3556 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3557 { 3558 TCGOp *op = tcg_op_alloc(opc, nargs); 3559 3560 if (tcg_ctx->emit_before_op) { 3561 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link); 3562 } else { 3563 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3564 } 3565 return op; 3566 } 3567 3568 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3569 TCGOpcode opc, TCGType type, unsigned nargs) 3570 { 3571 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3572 3573 TCGOP_TYPE(new_op) = type; 3574 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3575 return new_op; 3576 } 3577 3578 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3579 TCGOpcode opc, TCGType type, unsigned nargs) 3580 { 3581 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3582 3583 TCGOP_TYPE(new_op) = type; 3584 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3585 return new_op; 3586 } 3587 3588 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3589 { 3590 TCGLabelUse *u; 3591 3592 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3593 TCGOp *op = u->op; 3594 switch (op->opc) { 3595 case INDEX_op_br: 3596 op->args[0] = label_arg(to); 3597 break; 3598 case INDEX_op_brcond: 3599 op->args[3] = label_arg(to); 3600 break; 3601 case INDEX_op_brcond2_i32: 3602 op->args[5] = label_arg(to); 3603 break; 3604 default: 3605 g_assert_not_reached(); 3606 } 3607 } 3608 3609 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3610 } 3611 3612 /* Reachable analysis : remove unreachable code. */ 3613 static void __attribute__((noinline)) 3614 reachable_code_pass(TCGContext *s) 3615 { 3616 TCGOp *op, *op_next, *op_prev; 3617 bool dead = false; 3618 3619 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3620 bool remove = dead; 3621 TCGLabel *label; 3622 3623 switch (op->opc) { 3624 case INDEX_op_set_label: 3625 label = arg_label(op->args[0]); 3626 3627 /* 3628 * Note that the first op in the TB is always a load, 3629 * so there is always something before a label. 3630 */ 3631 op_prev = QTAILQ_PREV(op, link); 3632 3633 /* 3634 * If we find two sequential labels, move all branches to 3635 * reference the second label and remove the first label. 3636 * Do this before branch to next optimization, so that the 3637 * middle label is out of the way. 3638 */ 3639 if (op_prev->opc == INDEX_op_set_label) { 3640 move_label_uses(label, arg_label(op_prev->args[0])); 3641 tcg_op_remove(s, op_prev); 3642 op_prev = QTAILQ_PREV(op, link); 3643 } 3644 3645 /* 3646 * Optimization can fold conditional branches to unconditional. 3647 * If we find a label which is preceded by an unconditional 3648 * branch to next, remove the branch. We couldn't do this when 3649 * processing the branch because any dead code between the branch 3650 * and label had not yet been removed. 3651 */ 3652 if (op_prev->opc == INDEX_op_br && 3653 label == arg_label(op_prev->args[0])) { 3654 tcg_op_remove(s, op_prev); 3655 /* Fall through means insns become live again. */ 3656 dead = false; 3657 } 3658 3659 if (QSIMPLEQ_EMPTY(&label->branches)) { 3660 /* 3661 * While there is an occasional backward branch, virtually 3662 * all branches generated by the translators are forward. 3663 * Which means that generally we will have already removed 3664 * all references to the label that will be, and there is 3665 * little to be gained by iterating. 3666 */ 3667 remove = true; 3668 } else { 3669 /* Once we see a label, insns become live again. */ 3670 dead = false; 3671 remove = false; 3672 } 3673 break; 3674 3675 case INDEX_op_br: 3676 case INDEX_op_exit_tb: 3677 case INDEX_op_goto_ptr: 3678 /* Unconditional branches; everything following is dead. */ 3679 dead = true; 3680 break; 3681 3682 case INDEX_op_call: 3683 /* Notice noreturn helper calls, raising exceptions. */ 3684 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3685 dead = true; 3686 } 3687 break; 3688 3689 case INDEX_op_insn_start: 3690 /* Never remove -- we need to keep these for unwind. */ 3691 remove = false; 3692 break; 3693 3694 default: 3695 break; 3696 } 3697 3698 if (remove) { 3699 tcg_op_remove(s, op); 3700 } 3701 } 3702 } 3703 3704 #define TS_DEAD 1 3705 #define TS_MEM 2 3706 3707 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3708 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3709 3710 /* For liveness_pass_1, the register preferences for a given temp. */ 3711 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3712 { 3713 return ts->state_ptr; 3714 } 3715 3716 /* For liveness_pass_1, reset the preferences for a given temp to the 3717 * maximal regset for its type. 3718 */ 3719 static inline void la_reset_pref(TCGTemp *ts) 3720 { 3721 *la_temp_pref(ts) 3722 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3723 } 3724 3725 /* liveness analysis: end of function: all temps are dead, and globals 3726 should be in memory. */ 3727 static void la_func_end(TCGContext *s, int ng, int nt) 3728 { 3729 int i; 3730 3731 for (i = 0; i < ng; ++i) { 3732 s->temps[i].state = TS_DEAD | TS_MEM; 3733 la_reset_pref(&s->temps[i]); 3734 } 3735 for (i = ng; i < nt; ++i) { 3736 s->temps[i].state = TS_DEAD; 3737 la_reset_pref(&s->temps[i]); 3738 } 3739 } 3740 3741 /* liveness analysis: end of basic block: all temps are dead, globals 3742 and local temps should be in memory. */ 3743 static void la_bb_end(TCGContext *s, int ng, int nt) 3744 { 3745 int i; 3746 3747 for (i = 0; i < nt; ++i) { 3748 TCGTemp *ts = &s->temps[i]; 3749 int state; 3750 3751 switch (ts->kind) { 3752 case TEMP_FIXED: 3753 case TEMP_GLOBAL: 3754 case TEMP_TB: 3755 state = TS_DEAD | TS_MEM; 3756 break; 3757 case TEMP_EBB: 3758 case TEMP_CONST: 3759 state = TS_DEAD; 3760 break; 3761 default: 3762 g_assert_not_reached(); 3763 } 3764 ts->state = state; 3765 la_reset_pref(ts); 3766 } 3767 } 3768 3769 /* liveness analysis: sync globals back to memory. */ 3770 static void la_global_sync(TCGContext *s, int ng) 3771 { 3772 int i; 3773 3774 for (i = 0; i < ng; ++i) { 3775 int state = s->temps[i].state; 3776 s->temps[i].state = state | TS_MEM; 3777 if (state == TS_DEAD) { 3778 /* If the global was previously dead, reset prefs. */ 3779 la_reset_pref(&s->temps[i]); 3780 } 3781 } 3782 } 3783 3784 /* 3785 * liveness analysis: conditional branch: all temps are dead unless 3786 * explicitly live-across-conditional-branch, globals and local temps 3787 * should be synced. 3788 */ 3789 static void la_bb_sync(TCGContext *s, int ng, int nt) 3790 { 3791 la_global_sync(s, ng); 3792 3793 for (int i = ng; i < nt; ++i) { 3794 TCGTemp *ts = &s->temps[i]; 3795 int state; 3796 3797 switch (ts->kind) { 3798 case TEMP_TB: 3799 state = ts->state; 3800 ts->state = state | TS_MEM; 3801 if (state != TS_DEAD) { 3802 continue; 3803 } 3804 break; 3805 case TEMP_EBB: 3806 case TEMP_CONST: 3807 continue; 3808 default: 3809 g_assert_not_reached(); 3810 } 3811 la_reset_pref(&s->temps[i]); 3812 } 3813 } 3814 3815 /* liveness analysis: sync globals back to memory and kill. */ 3816 static void la_global_kill(TCGContext *s, int ng) 3817 { 3818 int i; 3819 3820 for (i = 0; i < ng; i++) { 3821 s->temps[i].state = TS_DEAD | TS_MEM; 3822 la_reset_pref(&s->temps[i]); 3823 } 3824 } 3825 3826 /* liveness analysis: note live globals crossing calls. */ 3827 static void la_cross_call(TCGContext *s, int nt) 3828 { 3829 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3830 int i; 3831 3832 for (i = 0; i < nt; i++) { 3833 TCGTemp *ts = &s->temps[i]; 3834 if (!(ts->state & TS_DEAD)) { 3835 TCGRegSet *pset = la_temp_pref(ts); 3836 TCGRegSet set = *pset; 3837 3838 set &= mask; 3839 /* If the combination is not possible, restart. */ 3840 if (set == 0) { 3841 set = tcg_target_available_regs[ts->type] & mask; 3842 } 3843 *pset = set; 3844 } 3845 } 3846 } 3847 3848 /* 3849 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3850 * to TEMP_EBB, if possible. 3851 */ 3852 static void __attribute__((noinline)) 3853 liveness_pass_0(TCGContext *s) 3854 { 3855 void * const multiple_ebb = (void *)(uintptr_t)-1; 3856 int nb_temps = s->nb_temps; 3857 TCGOp *op, *ebb; 3858 3859 for (int i = s->nb_globals; i < nb_temps; ++i) { 3860 s->temps[i].state_ptr = NULL; 3861 } 3862 3863 /* 3864 * Represent each EBB by the op at which it begins. In the case of 3865 * the first EBB, this is the first op, otherwise it is a label. 3866 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3867 * within a single EBB, else MULTIPLE_EBB. 3868 */ 3869 ebb = QTAILQ_FIRST(&s->ops); 3870 QTAILQ_FOREACH(op, &s->ops, link) { 3871 const TCGOpDef *def; 3872 int nb_oargs, nb_iargs; 3873 3874 switch (op->opc) { 3875 case INDEX_op_set_label: 3876 ebb = op; 3877 continue; 3878 case INDEX_op_discard: 3879 continue; 3880 case INDEX_op_call: 3881 nb_oargs = TCGOP_CALLO(op); 3882 nb_iargs = TCGOP_CALLI(op); 3883 break; 3884 default: 3885 def = &tcg_op_defs[op->opc]; 3886 nb_oargs = def->nb_oargs; 3887 nb_iargs = def->nb_iargs; 3888 break; 3889 } 3890 3891 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3892 TCGTemp *ts = arg_temp(op->args[i]); 3893 3894 if (ts->kind != TEMP_TB) { 3895 continue; 3896 } 3897 if (ts->state_ptr == NULL) { 3898 ts->state_ptr = ebb; 3899 } else if (ts->state_ptr != ebb) { 3900 ts->state_ptr = multiple_ebb; 3901 } 3902 } 3903 } 3904 3905 /* 3906 * For TEMP_TB that turned out not to be used beyond one EBB, 3907 * reduce the liveness to TEMP_EBB. 3908 */ 3909 for (int i = s->nb_globals; i < nb_temps; ++i) { 3910 TCGTemp *ts = &s->temps[i]; 3911 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3912 ts->kind = TEMP_EBB; 3913 } 3914 } 3915 } 3916 3917 static void assert_carry_dead(TCGContext *s) 3918 { 3919 /* 3920 * Carry operations can be separated by a few insns like mov, 3921 * load or store, but they should always be "close", and 3922 * carry-out operations should always be paired with carry-in. 3923 * At various boundaries, carry must have been consumed. 3924 */ 3925 tcg_debug_assert(!s->carry_live); 3926 } 3927 3928 /* Liveness analysis : update the opc_arg_life array to tell if a 3929 given input arguments is dead. Instructions updating dead 3930 temporaries are removed. */ 3931 static void __attribute__((noinline)) 3932 liveness_pass_1(TCGContext *s) 3933 { 3934 int nb_globals = s->nb_globals; 3935 int nb_temps = s->nb_temps; 3936 TCGOp *op, *op_prev; 3937 TCGRegSet *prefs; 3938 3939 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3940 for (int i = 0; i < nb_temps; ++i) { 3941 s->temps[i].state_ptr = prefs + i; 3942 } 3943 3944 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3945 la_func_end(s, nb_globals, nb_temps); 3946 3947 s->carry_live = false; 3948 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3949 int nb_iargs, nb_oargs; 3950 TCGOpcode opc_new, opc_new2; 3951 TCGLifeData arg_life = 0; 3952 TCGTemp *ts; 3953 TCGOpcode opc = op->opc; 3954 const TCGOpDef *def; 3955 const TCGArgConstraint *args_ct; 3956 3957 switch (opc) { 3958 case INDEX_op_call: 3959 assert_carry_dead(s); 3960 { 3961 const TCGHelperInfo *info = tcg_call_info(op); 3962 int call_flags = tcg_call_flags(op); 3963 3964 nb_oargs = TCGOP_CALLO(op); 3965 nb_iargs = TCGOP_CALLI(op); 3966 3967 /* pure functions can be removed if their result is unused */ 3968 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3969 for (int i = 0; i < nb_oargs; i++) { 3970 ts = arg_temp(op->args[i]); 3971 if (ts->state != TS_DEAD) { 3972 goto do_not_remove_call; 3973 } 3974 } 3975 goto do_remove; 3976 } 3977 do_not_remove_call: 3978 3979 /* Output args are dead. */ 3980 for (int i = 0; i < nb_oargs; i++) { 3981 ts = arg_temp(op->args[i]); 3982 if (ts->state & TS_DEAD) { 3983 arg_life |= DEAD_ARG << i; 3984 } 3985 if (ts->state & TS_MEM) { 3986 arg_life |= SYNC_ARG << i; 3987 } 3988 ts->state = TS_DEAD; 3989 la_reset_pref(ts); 3990 } 3991 3992 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3993 memset(op->output_pref, 0, sizeof(op->output_pref)); 3994 3995 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3996 TCG_CALL_NO_READ_GLOBALS))) { 3997 la_global_kill(s, nb_globals); 3998 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3999 la_global_sync(s, nb_globals); 4000 } 4001 4002 /* Record arguments that die in this helper. */ 4003 for (int i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4004 ts = arg_temp(op->args[i]); 4005 if (ts->state & TS_DEAD) { 4006 arg_life |= DEAD_ARG << i; 4007 } 4008 } 4009 4010 /* For all live registers, remove call-clobbered prefs. */ 4011 la_cross_call(s, nb_temps); 4012 4013 /* 4014 * Input arguments are live for preceding opcodes. 4015 * 4016 * For those arguments that die, and will be allocated in 4017 * registers, clear the register set for that arg, to be 4018 * filled in below. For args that will be on the stack, 4019 * reset to any available reg. Process arguments in reverse 4020 * order so that if a temp is used more than once, the stack 4021 * reset to max happens before the register reset to 0. 4022 */ 4023 for (int i = nb_iargs - 1; i >= 0; i--) { 4024 const TCGCallArgumentLoc *loc = &info->in[i]; 4025 ts = arg_temp(op->args[nb_oargs + i]); 4026 4027 if (ts->state & TS_DEAD) { 4028 switch (loc->kind) { 4029 case TCG_CALL_ARG_NORMAL: 4030 case TCG_CALL_ARG_EXTEND_U: 4031 case TCG_CALL_ARG_EXTEND_S: 4032 if (arg_slot_reg_p(loc->arg_slot)) { 4033 *la_temp_pref(ts) = 0; 4034 break; 4035 } 4036 /* fall through */ 4037 default: 4038 *la_temp_pref(ts) = 4039 tcg_target_available_regs[ts->type]; 4040 break; 4041 } 4042 ts->state &= ~TS_DEAD; 4043 } 4044 } 4045 4046 /* 4047 * For each input argument, add its input register to prefs. 4048 * If a temp is used once, this produces a single set bit; 4049 * if a temp is used multiple times, this produces a set. 4050 */ 4051 for (int i = 0; i < nb_iargs; i++) { 4052 const TCGCallArgumentLoc *loc = &info->in[i]; 4053 ts = arg_temp(op->args[nb_oargs + i]); 4054 4055 switch (loc->kind) { 4056 case TCG_CALL_ARG_NORMAL: 4057 case TCG_CALL_ARG_EXTEND_U: 4058 case TCG_CALL_ARG_EXTEND_S: 4059 if (arg_slot_reg_p(loc->arg_slot)) { 4060 tcg_regset_set_reg(*la_temp_pref(ts), 4061 tcg_target_call_iarg_regs[loc->arg_slot]); 4062 } 4063 break; 4064 default: 4065 break; 4066 } 4067 } 4068 } 4069 break; 4070 case INDEX_op_insn_start: 4071 assert_carry_dead(s); 4072 break; 4073 case INDEX_op_discard: 4074 /* mark the temporary as dead */ 4075 ts = arg_temp(op->args[0]); 4076 ts->state = TS_DEAD; 4077 la_reset_pref(ts); 4078 break; 4079 4080 case INDEX_op_add2_i32: 4081 case INDEX_op_add2_i64: 4082 opc_new = INDEX_op_add; 4083 goto do_addsub2; 4084 case INDEX_op_sub2_i32: 4085 case INDEX_op_sub2_i64: 4086 opc_new = INDEX_op_sub; 4087 do_addsub2: 4088 assert_carry_dead(s); 4089 /* Test if the high part of the operation is dead, but not 4090 the low part. The result can be optimized to a simple 4091 add or sub. This happens often for x86_64 guest when the 4092 cpu mode is set to 32 bit. */ 4093 if (arg_temp(op->args[1])->state == TS_DEAD) { 4094 if (arg_temp(op->args[0])->state == TS_DEAD) { 4095 goto do_remove; 4096 } 4097 /* Replace the opcode and adjust the args in place, 4098 leaving 3 unused args at the end. */ 4099 op->opc = opc = opc_new; 4100 op->args[1] = op->args[2]; 4101 op->args[2] = op->args[4]; 4102 /* Fall through and mark the single-word operation live. */ 4103 } 4104 goto do_not_remove; 4105 4106 case INDEX_op_muls2: 4107 opc_new = INDEX_op_mul; 4108 opc_new2 = INDEX_op_mulsh; 4109 goto do_mul2; 4110 case INDEX_op_mulu2: 4111 opc_new = INDEX_op_mul; 4112 opc_new2 = INDEX_op_muluh; 4113 do_mul2: 4114 assert_carry_dead(s); 4115 if (arg_temp(op->args[1])->state == TS_DEAD) { 4116 if (arg_temp(op->args[0])->state == TS_DEAD) { 4117 /* Both parts of the operation are dead. */ 4118 goto do_remove; 4119 } 4120 /* The high part of the operation is dead; generate the low. */ 4121 op->opc = opc = opc_new; 4122 op->args[1] = op->args[2]; 4123 op->args[2] = op->args[3]; 4124 } else if (arg_temp(op->args[0])->state == TS_DEAD && 4125 tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { 4126 /* The low part of the operation is dead; generate the high. */ 4127 op->opc = opc = opc_new2; 4128 op->args[0] = op->args[1]; 4129 op->args[1] = op->args[2]; 4130 op->args[2] = op->args[3]; 4131 } else { 4132 goto do_not_remove; 4133 } 4134 /* Mark the single-word operation live. */ 4135 goto do_not_remove; 4136 4137 case INDEX_op_addco: 4138 if (s->carry_live) { 4139 goto do_not_remove; 4140 } 4141 op->opc = opc = INDEX_op_add; 4142 goto do_default; 4143 4144 case INDEX_op_addcio: 4145 if (s->carry_live) { 4146 goto do_not_remove; 4147 } 4148 op->opc = opc = INDEX_op_addci; 4149 goto do_default; 4150 4151 case INDEX_op_subbo: 4152 if (s->carry_live) { 4153 goto do_not_remove; 4154 } 4155 /* Lower to sub, but this may also require canonicalization. */ 4156 op->opc = opc = INDEX_op_sub; 4157 ts = arg_temp(op->args[2]); 4158 if (ts->kind == TEMP_CONST) { 4159 ts = tcg_constant_internal(ts->type, -ts->val); 4160 if (ts->state_ptr == NULL) { 4161 tcg_debug_assert(temp_idx(ts) == nb_temps); 4162 nb_temps++; 4163 ts->state_ptr = tcg_malloc(sizeof(TCGRegSet)); 4164 ts->state = TS_DEAD; 4165 la_reset_pref(ts); 4166 } 4167 op->args[2] = temp_arg(ts); 4168 op->opc = opc = INDEX_op_add; 4169 } 4170 goto do_default; 4171 4172 case INDEX_op_subbio: 4173 if (s->carry_live) { 4174 goto do_not_remove; 4175 } 4176 op->opc = opc = INDEX_op_subbi; 4177 goto do_default; 4178 4179 case INDEX_op_addc1o: 4180 if (s->carry_live) { 4181 goto do_not_remove; 4182 } 4183 /* Lower to add, add +1. */ 4184 op_prev = tcg_op_insert_before(s, op, INDEX_op_add, 4185 TCGOP_TYPE(op), 3); 4186 op_prev->args[0] = op->args[0]; 4187 op_prev->args[1] = op->args[1]; 4188 op_prev->args[2] = op->args[2]; 4189 op->opc = opc = INDEX_op_add; 4190 op->args[1] = op->args[0]; 4191 ts = arg_temp(op->args[0]); 4192 ts = tcg_constant_internal(ts->type, 1); 4193 op->args[2] = temp_arg(ts); 4194 goto do_default; 4195 4196 case INDEX_op_subb1o: 4197 if (s->carry_live) { 4198 goto do_not_remove; 4199 } 4200 /* Lower to sub, add -1. */ 4201 op_prev = tcg_op_insert_before(s, op, INDEX_op_sub, 4202 TCGOP_TYPE(op), 3); 4203 op_prev->args[0] = op->args[0]; 4204 op_prev->args[1] = op->args[1]; 4205 op_prev->args[2] = op->args[2]; 4206 op->opc = opc = INDEX_op_add; 4207 op->args[1] = op->args[0]; 4208 ts = arg_temp(op->args[0]); 4209 ts = tcg_constant_internal(ts->type, -1); 4210 op->args[2] = temp_arg(ts); 4211 goto do_default; 4212 4213 default: 4214 do_default: 4215 /* 4216 * Test if the operation can be removed because all 4217 * its outputs are dead. We assume that nb_oargs == 0 4218 * implies side effects. 4219 */ 4220 def = &tcg_op_defs[opc]; 4221 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) { 4222 for (int i = def->nb_oargs - 1; i >= 0; i--) { 4223 if (arg_temp(op->args[i])->state != TS_DEAD) { 4224 goto do_not_remove; 4225 } 4226 } 4227 goto do_remove; 4228 } 4229 goto do_not_remove; 4230 4231 do_remove: 4232 tcg_op_remove(s, op); 4233 break; 4234 4235 do_not_remove: 4236 def = &tcg_op_defs[opc]; 4237 nb_iargs = def->nb_iargs; 4238 nb_oargs = def->nb_oargs; 4239 4240 for (int i = 0; i < nb_oargs; i++) { 4241 ts = arg_temp(op->args[i]); 4242 4243 /* Remember the preference of the uses that followed. */ 4244 if (i < ARRAY_SIZE(op->output_pref)) { 4245 op->output_pref[i] = *la_temp_pref(ts); 4246 } 4247 4248 /* Output args are dead. */ 4249 if (ts->state & TS_DEAD) { 4250 arg_life |= DEAD_ARG << i; 4251 } 4252 if (ts->state & TS_MEM) { 4253 arg_life |= SYNC_ARG << i; 4254 } 4255 ts->state = TS_DEAD; 4256 la_reset_pref(ts); 4257 } 4258 4259 /* If end of basic block, update. */ 4260 if (def->flags & TCG_OPF_BB_EXIT) { 4261 assert_carry_dead(s); 4262 la_func_end(s, nb_globals, nb_temps); 4263 } else if (def->flags & TCG_OPF_COND_BRANCH) { 4264 assert_carry_dead(s); 4265 la_bb_sync(s, nb_globals, nb_temps); 4266 } else if (def->flags & TCG_OPF_BB_END) { 4267 assert_carry_dead(s); 4268 la_bb_end(s, nb_globals, nb_temps); 4269 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4270 assert_carry_dead(s); 4271 la_global_sync(s, nb_globals); 4272 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4273 la_cross_call(s, nb_temps); 4274 } 4275 } 4276 4277 /* Record arguments that die in this opcode. */ 4278 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4279 ts = arg_temp(op->args[i]); 4280 if (ts->state & TS_DEAD) { 4281 arg_life |= DEAD_ARG << i; 4282 } 4283 } 4284 if (def->flags & TCG_OPF_CARRY_OUT) { 4285 s->carry_live = false; 4286 } 4287 4288 /* Input arguments are live for preceding opcodes. */ 4289 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4290 ts = arg_temp(op->args[i]); 4291 if (ts->state & TS_DEAD) { 4292 /* For operands that were dead, initially allow 4293 all regs for the type. */ 4294 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 4295 ts->state &= ~TS_DEAD; 4296 } 4297 } 4298 if (def->flags & TCG_OPF_CARRY_IN) { 4299 s->carry_live = true; 4300 } 4301 4302 /* Incorporate constraints for this operand. */ 4303 switch (opc) { 4304 case INDEX_op_mov: 4305 /* Note that these are TCG_OPF_NOT_PRESENT and do not 4306 have proper constraints. That said, special case 4307 moves to propagate preferences backward. */ 4308 if (IS_DEAD_ARG(1)) { 4309 *la_temp_pref(arg_temp(op->args[0])) 4310 = *la_temp_pref(arg_temp(op->args[1])); 4311 } 4312 break; 4313 4314 default: 4315 args_ct = opcode_args_ct(op); 4316 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4317 const TCGArgConstraint *ct = &args_ct[i]; 4318 TCGRegSet set, *pset; 4319 4320 ts = arg_temp(op->args[i]); 4321 pset = la_temp_pref(ts); 4322 set = *pset; 4323 4324 set &= ct->regs; 4325 if (ct->ialias) { 4326 set &= output_pref(op, ct->alias_index); 4327 } 4328 /* If the combination is not possible, restart. */ 4329 if (set == 0) { 4330 set = ct->regs; 4331 } 4332 *pset = set; 4333 } 4334 break; 4335 } 4336 break; 4337 } 4338 op->life = arg_life; 4339 } 4340 assert_carry_dead(s); 4341 } 4342 4343 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 4344 static bool __attribute__((noinline)) 4345 liveness_pass_2(TCGContext *s) 4346 { 4347 int nb_globals = s->nb_globals; 4348 int nb_temps, i; 4349 bool changes = false; 4350 TCGOp *op, *op_next; 4351 4352 /* Create a temporary for each indirect global. */ 4353 for (i = 0; i < nb_globals; ++i) { 4354 TCGTemp *its = &s->temps[i]; 4355 if (its->indirect_reg) { 4356 TCGTemp *dts = tcg_temp_alloc(s); 4357 dts->type = its->type; 4358 dts->base_type = its->base_type; 4359 dts->temp_subindex = its->temp_subindex; 4360 dts->kind = TEMP_EBB; 4361 its->state_ptr = dts; 4362 } else { 4363 its->state_ptr = NULL; 4364 } 4365 /* All globals begin dead. */ 4366 its->state = TS_DEAD; 4367 } 4368 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 4369 TCGTemp *its = &s->temps[i]; 4370 its->state_ptr = NULL; 4371 its->state = TS_DEAD; 4372 } 4373 4374 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 4375 TCGOpcode opc = op->opc; 4376 const TCGOpDef *def = &tcg_op_defs[opc]; 4377 TCGLifeData arg_life = op->life; 4378 int nb_iargs, nb_oargs, call_flags; 4379 TCGTemp *arg_ts, *dir_ts; 4380 4381 if (opc == INDEX_op_call) { 4382 nb_oargs = TCGOP_CALLO(op); 4383 nb_iargs = TCGOP_CALLI(op); 4384 call_flags = tcg_call_flags(op); 4385 } else { 4386 nb_iargs = def->nb_iargs; 4387 nb_oargs = def->nb_oargs; 4388 4389 /* Set flags similar to how calls require. */ 4390 if (def->flags & TCG_OPF_COND_BRANCH) { 4391 /* Like reading globals: sync_globals */ 4392 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4393 } else if (def->flags & TCG_OPF_BB_END) { 4394 /* Like writing globals: save_globals */ 4395 call_flags = 0; 4396 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4397 /* Like reading globals: sync_globals */ 4398 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 4399 } else { 4400 /* No effect on globals. */ 4401 call_flags = (TCG_CALL_NO_READ_GLOBALS | 4402 TCG_CALL_NO_WRITE_GLOBALS); 4403 } 4404 } 4405 4406 /* Make sure that input arguments are available. */ 4407 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4408 arg_ts = arg_temp(op->args[i]); 4409 dir_ts = arg_ts->state_ptr; 4410 if (dir_ts && arg_ts->state == TS_DEAD) { 4411 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 4412 ? INDEX_op_ld_i32 4413 : INDEX_op_ld_i64); 4414 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 4415 arg_ts->type, 3); 4416 4417 lop->args[0] = temp_arg(dir_ts); 4418 lop->args[1] = temp_arg(arg_ts->mem_base); 4419 lop->args[2] = arg_ts->mem_offset; 4420 4421 /* Loaded, but synced with memory. */ 4422 arg_ts->state = TS_MEM; 4423 } 4424 } 4425 4426 /* Perform input replacement, and mark inputs that became dead. 4427 No action is required except keeping temp_state up to date 4428 so that we reload when needed. */ 4429 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4430 arg_ts = arg_temp(op->args[i]); 4431 dir_ts = arg_ts->state_ptr; 4432 if (dir_ts) { 4433 op->args[i] = temp_arg(dir_ts); 4434 changes = true; 4435 if (IS_DEAD_ARG(i)) { 4436 arg_ts->state = TS_DEAD; 4437 } 4438 } 4439 } 4440 4441 /* Liveness analysis should ensure that the following are 4442 all correct, for call sites and basic block end points. */ 4443 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 4444 /* Nothing to do */ 4445 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 4446 for (i = 0; i < nb_globals; ++i) { 4447 /* Liveness should see that globals are synced back, 4448 that is, either TS_DEAD or TS_MEM. */ 4449 arg_ts = &s->temps[i]; 4450 tcg_debug_assert(arg_ts->state_ptr == 0 4451 || arg_ts->state != 0); 4452 } 4453 } else { 4454 for (i = 0; i < nb_globals; ++i) { 4455 /* Liveness should see that globals are saved back, 4456 that is, TS_DEAD, waiting to be reloaded. */ 4457 arg_ts = &s->temps[i]; 4458 tcg_debug_assert(arg_ts->state_ptr == 0 4459 || arg_ts->state == TS_DEAD); 4460 } 4461 } 4462 4463 /* Outputs become available. */ 4464 if (opc == INDEX_op_mov) { 4465 arg_ts = arg_temp(op->args[0]); 4466 dir_ts = arg_ts->state_ptr; 4467 if (dir_ts) { 4468 op->args[0] = temp_arg(dir_ts); 4469 changes = true; 4470 4471 /* The output is now live and modified. */ 4472 arg_ts->state = 0; 4473 4474 if (NEED_SYNC_ARG(0)) { 4475 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4476 ? INDEX_op_st_i32 4477 : INDEX_op_st_i64); 4478 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4479 arg_ts->type, 3); 4480 TCGTemp *out_ts = dir_ts; 4481 4482 if (IS_DEAD_ARG(0)) { 4483 out_ts = arg_temp(op->args[1]); 4484 arg_ts->state = TS_DEAD; 4485 tcg_op_remove(s, op); 4486 } else { 4487 arg_ts->state = TS_MEM; 4488 } 4489 4490 sop->args[0] = temp_arg(out_ts); 4491 sop->args[1] = temp_arg(arg_ts->mem_base); 4492 sop->args[2] = arg_ts->mem_offset; 4493 } else { 4494 tcg_debug_assert(!IS_DEAD_ARG(0)); 4495 } 4496 } 4497 } else { 4498 for (i = 0; i < nb_oargs; i++) { 4499 arg_ts = arg_temp(op->args[i]); 4500 dir_ts = arg_ts->state_ptr; 4501 if (!dir_ts) { 4502 continue; 4503 } 4504 op->args[i] = temp_arg(dir_ts); 4505 changes = true; 4506 4507 /* The output is now live and modified. */ 4508 arg_ts->state = 0; 4509 4510 /* Sync outputs upon their last write. */ 4511 if (NEED_SYNC_ARG(i)) { 4512 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 4513 ? INDEX_op_st_i32 4514 : INDEX_op_st_i64); 4515 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 4516 arg_ts->type, 3); 4517 4518 sop->args[0] = temp_arg(dir_ts); 4519 sop->args[1] = temp_arg(arg_ts->mem_base); 4520 sop->args[2] = arg_ts->mem_offset; 4521 4522 arg_ts->state = TS_MEM; 4523 } 4524 /* Drop outputs that are dead. */ 4525 if (IS_DEAD_ARG(i)) { 4526 arg_ts->state = TS_DEAD; 4527 } 4528 } 4529 } 4530 } 4531 4532 return changes; 4533 } 4534 4535 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 4536 { 4537 intptr_t off; 4538 int size, align; 4539 4540 /* When allocating an object, look at the full type. */ 4541 size = tcg_type_size(ts->base_type); 4542 switch (ts->base_type) { 4543 case TCG_TYPE_I32: 4544 align = 4; 4545 break; 4546 case TCG_TYPE_I64: 4547 case TCG_TYPE_V64: 4548 align = 8; 4549 break; 4550 case TCG_TYPE_I128: 4551 case TCG_TYPE_V128: 4552 case TCG_TYPE_V256: 4553 /* 4554 * Note that we do not require aligned storage for V256, 4555 * and that we provide alignment for I128 to match V128, 4556 * even if that's above what the host ABI requires. 4557 */ 4558 align = 16; 4559 break; 4560 default: 4561 g_assert_not_reached(); 4562 } 4563 4564 /* 4565 * Assume the stack is sufficiently aligned. 4566 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 4567 * and do not require 16 byte vector alignment. This seems slightly 4568 * easier than fully parameterizing the above switch statement. 4569 */ 4570 align = MIN(TCG_TARGET_STACK_ALIGN, align); 4571 off = ROUND_UP(s->current_frame_offset, align); 4572 4573 /* If we've exhausted the stack frame, restart with a smaller TB. */ 4574 if (off + size > s->frame_end) { 4575 tcg_raise_tb_overflow(s); 4576 } 4577 s->current_frame_offset = off + size; 4578 #if defined(__sparc__) 4579 off += TCG_TARGET_STACK_BIAS; 4580 #endif 4581 4582 /* If the object was subdivided, assign memory to all the parts. */ 4583 if (ts->base_type != ts->type) { 4584 int part_size = tcg_type_size(ts->type); 4585 int part_count = size / part_size; 4586 4587 /* 4588 * Each part is allocated sequentially in tcg_temp_new_internal. 4589 * Jump back to the first part by subtracting the current index. 4590 */ 4591 ts -= ts->temp_subindex; 4592 for (int i = 0; i < part_count; ++i) { 4593 ts[i].mem_offset = off + i * part_size; 4594 ts[i].mem_base = s->frame_temp; 4595 ts[i].mem_allocated = 1; 4596 } 4597 } else { 4598 ts->mem_offset = off; 4599 ts->mem_base = s->frame_temp; 4600 ts->mem_allocated = 1; 4601 } 4602 } 4603 4604 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4605 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4606 { 4607 if (ts->val_type == TEMP_VAL_REG) { 4608 TCGReg old = ts->reg; 4609 tcg_debug_assert(s->reg_to_temp[old] == ts); 4610 if (old == reg) { 4611 return; 4612 } 4613 s->reg_to_temp[old] = NULL; 4614 } 4615 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4616 s->reg_to_temp[reg] = ts; 4617 ts->val_type = TEMP_VAL_REG; 4618 ts->reg = reg; 4619 } 4620 4621 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4622 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4623 { 4624 tcg_debug_assert(type != TEMP_VAL_REG); 4625 if (ts->val_type == TEMP_VAL_REG) { 4626 TCGReg reg = ts->reg; 4627 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4628 s->reg_to_temp[reg] = NULL; 4629 } 4630 ts->val_type = type; 4631 } 4632 4633 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4634 4635 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4636 mark it free; otherwise mark it dead. */ 4637 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4638 { 4639 TCGTempVal new_type; 4640 4641 switch (ts->kind) { 4642 case TEMP_FIXED: 4643 return; 4644 case TEMP_GLOBAL: 4645 case TEMP_TB: 4646 new_type = TEMP_VAL_MEM; 4647 break; 4648 case TEMP_EBB: 4649 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4650 break; 4651 case TEMP_CONST: 4652 new_type = TEMP_VAL_CONST; 4653 break; 4654 default: 4655 g_assert_not_reached(); 4656 } 4657 set_temp_val_nonreg(s, ts, new_type); 4658 } 4659 4660 /* Mark a temporary as dead. */ 4661 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4662 { 4663 temp_free_or_dead(s, ts, 1); 4664 } 4665 4666 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4667 registers needs to be allocated to store a constant. If 'free_or_dead' 4668 is non-zero, subsequently release the temporary; if it is positive, the 4669 temp is dead; if it is negative, the temp is free. */ 4670 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4671 TCGRegSet preferred_regs, int free_or_dead) 4672 { 4673 if (!temp_readonly(ts) && !ts->mem_coherent) { 4674 if (!ts->mem_allocated) { 4675 temp_allocate_frame(s, ts); 4676 } 4677 switch (ts->val_type) { 4678 case TEMP_VAL_CONST: 4679 /* If we're going to free the temp immediately, then we won't 4680 require it later in a register, so attempt to store the 4681 constant to memory directly. */ 4682 if (free_or_dead 4683 && tcg_out_sti(s, ts->type, ts->val, 4684 ts->mem_base->reg, ts->mem_offset)) { 4685 break; 4686 } 4687 temp_load(s, ts, tcg_target_available_regs[ts->type], 4688 allocated_regs, preferred_regs); 4689 /* fallthrough */ 4690 4691 case TEMP_VAL_REG: 4692 tcg_out_st(s, ts->type, ts->reg, 4693 ts->mem_base->reg, ts->mem_offset); 4694 break; 4695 4696 case TEMP_VAL_MEM: 4697 break; 4698 4699 case TEMP_VAL_DEAD: 4700 default: 4701 g_assert_not_reached(); 4702 } 4703 ts->mem_coherent = 1; 4704 } 4705 if (free_or_dead) { 4706 temp_free_or_dead(s, ts, free_or_dead); 4707 } 4708 } 4709 4710 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4711 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4712 { 4713 TCGTemp *ts = s->reg_to_temp[reg]; 4714 if (ts != NULL) { 4715 temp_sync(s, ts, allocated_regs, 0, -1); 4716 } 4717 } 4718 4719 /** 4720 * tcg_reg_alloc: 4721 * @required_regs: Set of registers in which we must allocate. 4722 * @allocated_regs: Set of registers which must be avoided. 4723 * @preferred_regs: Set of registers we should prefer. 4724 * @rev: True if we search the registers in "indirect" order. 4725 * 4726 * The allocated register must be in @required_regs & ~@allocated_regs, 4727 * but if we can put it in @preferred_regs we may save a move later. 4728 */ 4729 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4730 TCGRegSet allocated_regs, 4731 TCGRegSet preferred_regs, bool rev) 4732 { 4733 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4734 TCGRegSet reg_ct[2]; 4735 const int *order; 4736 4737 reg_ct[1] = required_regs & ~allocated_regs; 4738 tcg_debug_assert(reg_ct[1] != 0); 4739 reg_ct[0] = reg_ct[1] & preferred_regs; 4740 4741 /* Skip the preferred_regs option if it cannot be satisfied, 4742 or if the preference made no difference. */ 4743 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4744 4745 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4746 4747 /* Try free registers, preferences first. */ 4748 for (j = f; j < 2; j++) { 4749 TCGRegSet set = reg_ct[j]; 4750 4751 if (tcg_regset_single(set)) { 4752 /* One register in the set. */ 4753 TCGReg reg = tcg_regset_first(set); 4754 if (s->reg_to_temp[reg] == NULL) { 4755 return reg; 4756 } 4757 } else { 4758 for (i = 0; i < n; i++) { 4759 TCGReg reg = order[i]; 4760 if (s->reg_to_temp[reg] == NULL && 4761 tcg_regset_test_reg(set, reg)) { 4762 return reg; 4763 } 4764 } 4765 } 4766 } 4767 4768 /* We must spill something. */ 4769 for (j = f; j < 2; j++) { 4770 TCGRegSet set = reg_ct[j]; 4771 4772 if (tcg_regset_single(set)) { 4773 /* One register in the set. */ 4774 TCGReg reg = tcg_regset_first(set); 4775 tcg_reg_free(s, reg, allocated_regs); 4776 return reg; 4777 } else { 4778 for (i = 0; i < n; i++) { 4779 TCGReg reg = order[i]; 4780 if (tcg_regset_test_reg(set, reg)) { 4781 tcg_reg_free(s, reg, allocated_regs); 4782 return reg; 4783 } 4784 } 4785 } 4786 } 4787 4788 g_assert_not_reached(); 4789 } 4790 4791 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4792 TCGRegSet allocated_regs, 4793 TCGRegSet preferred_regs, bool rev) 4794 { 4795 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4796 TCGRegSet reg_ct[2]; 4797 const int *order; 4798 4799 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4800 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4801 tcg_debug_assert(reg_ct[1] != 0); 4802 reg_ct[0] = reg_ct[1] & preferred_regs; 4803 4804 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4805 4806 /* 4807 * Skip the preferred_regs option if it cannot be satisfied, 4808 * or if the preference made no difference. 4809 */ 4810 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4811 4812 /* 4813 * Minimize the number of flushes by looking for 2 free registers first, 4814 * then a single flush, then two flushes. 4815 */ 4816 for (fmin = 2; fmin >= 0; fmin--) { 4817 for (j = k; j < 2; j++) { 4818 TCGRegSet set = reg_ct[j]; 4819 4820 for (i = 0; i < n; i++) { 4821 TCGReg reg = order[i]; 4822 4823 if (tcg_regset_test_reg(set, reg)) { 4824 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4825 if (f >= fmin) { 4826 tcg_reg_free(s, reg, allocated_regs); 4827 tcg_reg_free(s, reg + 1, allocated_regs); 4828 return reg; 4829 } 4830 } 4831 } 4832 } 4833 } 4834 g_assert_not_reached(); 4835 } 4836 4837 /* Make sure the temporary is in a register. If needed, allocate the register 4838 from DESIRED while avoiding ALLOCATED. */ 4839 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4840 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4841 { 4842 TCGReg reg; 4843 4844 switch (ts->val_type) { 4845 case TEMP_VAL_REG: 4846 return; 4847 case TEMP_VAL_CONST: 4848 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4849 preferred_regs, ts->indirect_base); 4850 if (ts->type <= TCG_TYPE_I64) { 4851 tcg_out_movi(s, ts->type, reg, ts->val); 4852 } else { 4853 uint64_t val = ts->val; 4854 MemOp vece = MO_64; 4855 4856 /* 4857 * Find the minimal vector element that matches the constant. 4858 * The targets will, in general, have to do this search anyway, 4859 * do this generically. 4860 */ 4861 if (val == dup_const(MO_8, val)) { 4862 vece = MO_8; 4863 } else if (val == dup_const(MO_16, val)) { 4864 vece = MO_16; 4865 } else if (val == dup_const(MO_32, val)) { 4866 vece = MO_32; 4867 } 4868 4869 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4870 } 4871 ts->mem_coherent = 0; 4872 break; 4873 case TEMP_VAL_MEM: 4874 if (!ts->mem_allocated) { 4875 temp_allocate_frame(s, ts); 4876 } 4877 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4878 preferred_regs, ts->indirect_base); 4879 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4880 ts->mem_coherent = 1; 4881 break; 4882 case TEMP_VAL_DEAD: 4883 default: 4884 g_assert_not_reached(); 4885 } 4886 set_temp_val_reg(s, ts, reg); 4887 } 4888 4889 /* Save a temporary to memory. 'allocated_regs' is used in case a 4890 temporary registers needs to be allocated to store a constant. */ 4891 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4892 { 4893 /* The liveness analysis already ensures that globals are back 4894 in memory. Keep an tcg_debug_assert for safety. */ 4895 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4896 } 4897 4898 /* save globals to their canonical location and assume they can be 4899 modified be the following code. 'allocated_regs' is used in case a 4900 temporary registers needs to be allocated to store a constant. */ 4901 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4902 { 4903 int i, n; 4904 4905 for (i = 0, n = s->nb_globals; i < n; i++) { 4906 temp_save(s, &s->temps[i], allocated_regs); 4907 } 4908 } 4909 4910 /* sync globals to their canonical location and assume they can be 4911 read by the following code. 'allocated_regs' is used in case a 4912 temporary registers needs to be allocated to store a constant. */ 4913 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4914 { 4915 int i, n; 4916 4917 for (i = 0, n = s->nb_globals; i < n; i++) { 4918 TCGTemp *ts = &s->temps[i]; 4919 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4920 || ts->kind == TEMP_FIXED 4921 || ts->mem_coherent); 4922 } 4923 } 4924 4925 /* at the end of a basic block, we assume all temporaries are dead and 4926 all globals are stored at their canonical location. */ 4927 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4928 { 4929 assert_carry_dead(s); 4930 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4931 TCGTemp *ts = &s->temps[i]; 4932 4933 switch (ts->kind) { 4934 case TEMP_TB: 4935 temp_save(s, ts, allocated_regs); 4936 break; 4937 case TEMP_EBB: 4938 /* The liveness analysis already ensures that temps are dead. 4939 Keep an tcg_debug_assert for safety. */ 4940 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4941 break; 4942 case TEMP_CONST: 4943 /* Similarly, we should have freed any allocated register. */ 4944 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4945 break; 4946 default: 4947 g_assert_not_reached(); 4948 } 4949 } 4950 4951 save_globals(s, allocated_regs); 4952 } 4953 4954 /* 4955 * At a conditional branch, we assume all temporaries are dead unless 4956 * explicitly live-across-conditional-branch; all globals and local 4957 * temps are synced to their location. 4958 */ 4959 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4960 { 4961 assert_carry_dead(s); 4962 sync_globals(s, allocated_regs); 4963 4964 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4965 TCGTemp *ts = &s->temps[i]; 4966 /* 4967 * The liveness analysis already ensures that temps are dead. 4968 * Keep tcg_debug_asserts for safety. 4969 */ 4970 switch (ts->kind) { 4971 case TEMP_TB: 4972 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4973 break; 4974 case TEMP_EBB: 4975 case TEMP_CONST: 4976 break; 4977 default: 4978 g_assert_not_reached(); 4979 } 4980 } 4981 } 4982 4983 /* 4984 * Specialized code generation for INDEX_op_mov_* with a constant. 4985 */ 4986 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4987 tcg_target_ulong val, TCGLifeData arg_life, 4988 TCGRegSet preferred_regs) 4989 { 4990 /* ENV should not be modified. */ 4991 tcg_debug_assert(!temp_readonly(ots)); 4992 4993 /* The movi is not explicitly generated here. */ 4994 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4995 ots->val = val; 4996 ots->mem_coherent = 0; 4997 if (NEED_SYNC_ARG(0)) { 4998 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4999 } else if (IS_DEAD_ARG(0)) { 5000 temp_dead(s, ots); 5001 } 5002 } 5003 5004 /* 5005 * Specialized code generation for INDEX_op_mov_*. 5006 */ 5007 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 5008 { 5009 const TCGLifeData arg_life = op->life; 5010 TCGRegSet allocated_regs, preferred_regs; 5011 TCGTemp *ts, *ots; 5012 TCGType otype, itype; 5013 TCGReg oreg, ireg; 5014 5015 allocated_regs = s->reserved_regs; 5016 preferred_regs = output_pref(op, 0); 5017 ots = arg_temp(op->args[0]); 5018 ts = arg_temp(op->args[1]); 5019 5020 /* ENV should not be modified. */ 5021 tcg_debug_assert(!temp_readonly(ots)); 5022 5023 /* Note that otype != itype for no-op truncation. */ 5024 otype = ots->type; 5025 itype = ts->type; 5026 5027 if (ts->val_type == TEMP_VAL_CONST) { 5028 /* propagate constant or generate sti */ 5029 tcg_target_ulong val = ts->val; 5030 if (IS_DEAD_ARG(1)) { 5031 temp_dead(s, ts); 5032 } 5033 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 5034 return; 5035 } 5036 5037 /* If the source value is in memory we're going to be forced 5038 to have it in a register in order to perform the copy. Copy 5039 the SOURCE value into its own register first, that way we 5040 don't have to reload SOURCE the next time it is used. */ 5041 if (ts->val_type == TEMP_VAL_MEM) { 5042 temp_load(s, ts, tcg_target_available_regs[itype], 5043 allocated_regs, preferred_regs); 5044 } 5045 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 5046 ireg = ts->reg; 5047 5048 if (IS_DEAD_ARG(0)) { 5049 /* mov to a non-saved dead register makes no sense (even with 5050 liveness analysis disabled). */ 5051 tcg_debug_assert(NEED_SYNC_ARG(0)); 5052 if (!ots->mem_allocated) { 5053 temp_allocate_frame(s, ots); 5054 } 5055 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 5056 if (IS_DEAD_ARG(1)) { 5057 temp_dead(s, ts); 5058 } 5059 temp_dead(s, ots); 5060 return; 5061 } 5062 5063 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 5064 /* 5065 * The mov can be suppressed. Kill input first, so that it 5066 * is unlinked from reg_to_temp, then set the output to the 5067 * reg that we saved from the input. 5068 */ 5069 temp_dead(s, ts); 5070 oreg = ireg; 5071 } else { 5072 if (ots->val_type == TEMP_VAL_REG) { 5073 oreg = ots->reg; 5074 } else { 5075 /* Make sure to not spill the input register during allocation. */ 5076 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 5077 allocated_regs | ((TCGRegSet)1 << ireg), 5078 preferred_regs, ots->indirect_base); 5079 } 5080 if (!tcg_out_mov(s, otype, oreg, ireg)) { 5081 /* 5082 * Cross register class move not supported. 5083 * Store the source register into the destination slot 5084 * and leave the destination temp as TEMP_VAL_MEM. 5085 */ 5086 assert(!temp_readonly(ots)); 5087 if (!ts->mem_allocated) { 5088 temp_allocate_frame(s, ots); 5089 } 5090 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 5091 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 5092 ots->mem_coherent = 1; 5093 return; 5094 } 5095 } 5096 set_temp_val_reg(s, ots, oreg); 5097 ots->mem_coherent = 0; 5098 5099 if (NEED_SYNC_ARG(0)) { 5100 temp_sync(s, ots, allocated_regs, 0, 0); 5101 } 5102 } 5103 5104 /* 5105 * Specialized code generation for INDEX_op_dup_vec. 5106 */ 5107 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 5108 { 5109 const TCGLifeData arg_life = op->life; 5110 TCGRegSet dup_out_regs, dup_in_regs; 5111 const TCGArgConstraint *dup_args_ct; 5112 TCGTemp *its, *ots; 5113 TCGType itype, vtype; 5114 unsigned vece; 5115 int lowpart_ofs; 5116 bool ok; 5117 5118 ots = arg_temp(op->args[0]); 5119 its = arg_temp(op->args[1]); 5120 5121 /* ENV should not be modified. */ 5122 tcg_debug_assert(!temp_readonly(ots)); 5123 5124 itype = its->type; 5125 vece = TCGOP_VECE(op); 5126 vtype = TCGOP_TYPE(op); 5127 5128 if (its->val_type == TEMP_VAL_CONST) { 5129 /* Propagate constant via movi -> dupi. */ 5130 tcg_target_ulong val = its->val; 5131 if (IS_DEAD_ARG(1)) { 5132 temp_dead(s, its); 5133 } 5134 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 5135 return; 5136 } 5137 5138 dup_args_ct = opcode_args_ct(op); 5139 dup_out_regs = dup_args_ct[0].regs; 5140 dup_in_regs = dup_args_ct[1].regs; 5141 5142 /* Allocate the output register now. */ 5143 if (ots->val_type != TEMP_VAL_REG) { 5144 TCGRegSet allocated_regs = s->reserved_regs; 5145 TCGReg oreg; 5146 5147 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 5148 /* Make sure to not spill the input register. */ 5149 tcg_regset_set_reg(allocated_regs, its->reg); 5150 } 5151 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5152 output_pref(op, 0), ots->indirect_base); 5153 set_temp_val_reg(s, ots, oreg); 5154 } 5155 5156 switch (its->val_type) { 5157 case TEMP_VAL_REG: 5158 /* 5159 * The dup constriaints must be broad, covering all possible VECE. 5160 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 5161 * to fail, indicating that extra moves are required for that case. 5162 */ 5163 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 5164 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 5165 goto done; 5166 } 5167 /* Try again from memory or a vector input register. */ 5168 } 5169 if (!its->mem_coherent) { 5170 /* 5171 * The input register is not synced, and so an extra store 5172 * would be required to use memory. Attempt an integer-vector 5173 * register move first. We do not have a TCGRegSet for this. 5174 */ 5175 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 5176 break; 5177 } 5178 /* Sync the temp back to its slot and load from there. */ 5179 temp_sync(s, its, s->reserved_regs, 0, 0); 5180 } 5181 /* fall through */ 5182 5183 case TEMP_VAL_MEM: 5184 lowpart_ofs = 0; 5185 if (HOST_BIG_ENDIAN) { 5186 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 5187 } 5188 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 5189 its->mem_offset + lowpart_ofs)) { 5190 goto done; 5191 } 5192 /* Load the input into the destination vector register. */ 5193 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 5194 break; 5195 5196 default: 5197 g_assert_not_reached(); 5198 } 5199 5200 /* We now have a vector input register, so dup must succeed. */ 5201 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 5202 tcg_debug_assert(ok); 5203 5204 done: 5205 ots->mem_coherent = 0; 5206 if (IS_DEAD_ARG(1)) { 5207 temp_dead(s, its); 5208 } 5209 if (NEED_SYNC_ARG(0)) { 5210 temp_sync(s, ots, s->reserved_regs, 0, 0); 5211 } 5212 if (IS_DEAD_ARG(0)) { 5213 temp_dead(s, ots); 5214 } 5215 } 5216 5217 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 5218 { 5219 const TCGLifeData arg_life = op->life; 5220 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 5221 TCGRegSet i_allocated_regs; 5222 TCGRegSet o_allocated_regs; 5223 int i, k, nb_iargs, nb_oargs; 5224 TCGReg reg; 5225 TCGArg arg; 5226 const TCGArgConstraint *args_ct; 5227 const TCGArgConstraint *arg_ct; 5228 TCGTemp *ts; 5229 TCGArg new_args[TCG_MAX_OP_ARGS]; 5230 int const_args[TCG_MAX_OP_ARGS]; 5231 TCGCond op_cond; 5232 5233 if (def->flags & TCG_OPF_CARRY_IN) { 5234 tcg_debug_assert(s->carry_live); 5235 } 5236 5237 nb_oargs = def->nb_oargs; 5238 nb_iargs = def->nb_iargs; 5239 5240 /* copy constants */ 5241 memcpy(new_args + nb_oargs + nb_iargs, 5242 op->args + nb_oargs + nb_iargs, 5243 sizeof(TCGArg) * def->nb_cargs); 5244 5245 i_allocated_regs = s->reserved_regs; 5246 o_allocated_regs = s->reserved_regs; 5247 5248 switch (op->opc) { 5249 case INDEX_op_brcond: 5250 op_cond = op->args[2]; 5251 break; 5252 case INDEX_op_setcond: 5253 case INDEX_op_negsetcond: 5254 case INDEX_op_cmp_vec: 5255 op_cond = op->args[3]; 5256 break; 5257 case INDEX_op_brcond2_i32: 5258 op_cond = op->args[4]; 5259 break; 5260 case INDEX_op_movcond: 5261 case INDEX_op_setcond2_i32: 5262 case INDEX_op_cmpsel_vec: 5263 op_cond = op->args[5]; 5264 break; 5265 default: 5266 /* No condition within opcode. */ 5267 op_cond = TCG_COND_ALWAYS; 5268 break; 5269 } 5270 5271 args_ct = opcode_args_ct(op); 5272 5273 /* satisfy input constraints */ 5274 for (k = 0; k < nb_iargs; k++) { 5275 TCGRegSet i_preferred_regs, i_required_regs; 5276 bool allocate_new_reg, copyto_new_reg; 5277 TCGTemp *ts2; 5278 int i1, i2; 5279 5280 i = args_ct[nb_oargs + k].sort_index; 5281 arg = op->args[i]; 5282 arg_ct = &args_ct[i]; 5283 ts = arg_temp(arg); 5284 5285 if (ts->val_type == TEMP_VAL_CONST) { 5286 #ifdef TCG_REG_ZERO 5287 if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { 5288 /* Hardware zero register: indicate register via non-const. */ 5289 const_args[i] = 0; 5290 new_args[i] = TCG_REG_ZERO; 5291 continue; 5292 } 5293 #endif 5294 5295 if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, 5296 op_cond, TCGOP_VECE(op))) { 5297 /* constant is OK for instruction */ 5298 const_args[i] = 1; 5299 new_args[i] = ts->val; 5300 continue; 5301 } 5302 } 5303 5304 reg = ts->reg; 5305 i_preferred_regs = 0; 5306 i_required_regs = arg_ct->regs; 5307 allocate_new_reg = false; 5308 copyto_new_reg = false; 5309 5310 switch (arg_ct->pair) { 5311 case 0: /* not paired */ 5312 if (arg_ct->ialias) { 5313 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5314 5315 /* 5316 * If the input is readonly, then it cannot also be an 5317 * output and aliased to itself. If the input is not 5318 * dead after the instruction, we must allocate a new 5319 * register and move it. 5320 */ 5321 if (temp_readonly(ts) || !IS_DEAD_ARG(i) 5322 || args_ct[arg_ct->alias_index].newreg) { 5323 allocate_new_reg = true; 5324 } else if (ts->val_type == TEMP_VAL_REG) { 5325 /* 5326 * Check if the current register has already been 5327 * allocated for another input. 5328 */ 5329 allocate_new_reg = 5330 tcg_regset_test_reg(i_allocated_regs, reg); 5331 } 5332 } 5333 if (!allocate_new_reg) { 5334 temp_load(s, ts, i_required_regs, i_allocated_regs, 5335 i_preferred_regs); 5336 reg = ts->reg; 5337 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 5338 } 5339 if (allocate_new_reg) { 5340 /* 5341 * Allocate a new register matching the constraint 5342 * and move the temporary register into it. 5343 */ 5344 temp_load(s, ts, tcg_target_available_regs[ts->type], 5345 i_allocated_regs, 0); 5346 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 5347 i_preferred_regs, ts->indirect_base); 5348 copyto_new_reg = true; 5349 } 5350 break; 5351 5352 case 1: 5353 /* First of an input pair; if i1 == i2, the second is an output. */ 5354 i1 = i; 5355 i2 = arg_ct->pair_index; 5356 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 5357 5358 /* 5359 * It is easier to default to allocating a new pair 5360 * and to identify a few cases where it's not required. 5361 */ 5362 if (arg_ct->ialias) { 5363 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5364 if (IS_DEAD_ARG(i1) && 5365 IS_DEAD_ARG(i2) && 5366 !temp_readonly(ts) && 5367 ts->val_type == TEMP_VAL_REG && 5368 ts->reg < TCG_TARGET_NB_REGS - 1 && 5369 tcg_regset_test_reg(i_required_regs, reg) && 5370 !tcg_regset_test_reg(i_allocated_regs, reg) && 5371 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 5372 (ts2 5373 ? ts2->val_type == TEMP_VAL_REG && 5374 ts2->reg == reg + 1 && 5375 !temp_readonly(ts2) 5376 : s->reg_to_temp[reg + 1] == NULL)) { 5377 break; 5378 } 5379 } else { 5380 /* Without aliasing, the pair must also be an input. */ 5381 tcg_debug_assert(ts2); 5382 if (ts->val_type == TEMP_VAL_REG && 5383 ts2->val_type == TEMP_VAL_REG && 5384 ts2->reg == reg + 1 && 5385 tcg_regset_test_reg(i_required_regs, reg)) { 5386 break; 5387 } 5388 } 5389 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 5390 0, ts->indirect_base); 5391 goto do_pair; 5392 5393 case 2: /* pair second */ 5394 reg = new_args[arg_ct->pair_index] + 1; 5395 goto do_pair; 5396 5397 case 3: /* ialias with second output, no first input */ 5398 tcg_debug_assert(arg_ct->ialias); 5399 i_preferred_regs = output_pref(op, arg_ct->alias_index); 5400 5401 if (IS_DEAD_ARG(i) && 5402 !temp_readonly(ts) && 5403 ts->val_type == TEMP_VAL_REG && 5404 reg > 0 && 5405 s->reg_to_temp[reg - 1] == NULL && 5406 tcg_regset_test_reg(i_required_regs, reg) && 5407 !tcg_regset_test_reg(i_allocated_regs, reg) && 5408 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 5409 tcg_regset_set_reg(i_allocated_regs, reg - 1); 5410 break; 5411 } 5412 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 5413 i_allocated_regs, 0, 5414 ts->indirect_base); 5415 tcg_regset_set_reg(i_allocated_regs, reg); 5416 reg += 1; 5417 goto do_pair; 5418 5419 do_pair: 5420 /* 5421 * If an aliased input is not dead after the instruction, 5422 * we must allocate a new register and move it. 5423 */ 5424 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 5425 TCGRegSet t_allocated_regs = i_allocated_regs; 5426 5427 /* 5428 * Because of the alias, and the continued life, make sure 5429 * that the temp is somewhere *other* than the reg pair, 5430 * and we get a copy in reg. 5431 */ 5432 tcg_regset_set_reg(t_allocated_regs, reg); 5433 tcg_regset_set_reg(t_allocated_regs, reg + 1); 5434 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 5435 /* If ts was already in reg, copy it somewhere else. */ 5436 TCGReg nr; 5437 bool ok; 5438 5439 tcg_debug_assert(ts->kind != TEMP_FIXED); 5440 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 5441 t_allocated_regs, 0, ts->indirect_base); 5442 ok = tcg_out_mov(s, ts->type, nr, reg); 5443 tcg_debug_assert(ok); 5444 5445 set_temp_val_reg(s, ts, nr); 5446 } else { 5447 temp_load(s, ts, tcg_target_available_regs[ts->type], 5448 t_allocated_regs, 0); 5449 copyto_new_reg = true; 5450 } 5451 } else { 5452 /* Preferably allocate to reg, otherwise copy. */ 5453 i_required_regs = (TCGRegSet)1 << reg; 5454 temp_load(s, ts, i_required_regs, i_allocated_regs, 5455 i_preferred_regs); 5456 copyto_new_reg = ts->reg != reg; 5457 } 5458 break; 5459 5460 default: 5461 g_assert_not_reached(); 5462 } 5463 5464 if (copyto_new_reg) { 5465 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5466 /* 5467 * Cross register class move not supported. Sync the 5468 * temp back to its slot and load from there. 5469 */ 5470 temp_sync(s, ts, i_allocated_regs, 0, 0); 5471 tcg_out_ld(s, ts->type, reg, 5472 ts->mem_base->reg, ts->mem_offset); 5473 } 5474 } 5475 new_args[i] = reg; 5476 const_args[i] = 0; 5477 tcg_regset_set_reg(i_allocated_regs, reg); 5478 } 5479 5480 /* mark dead temporaries and free the associated registers */ 5481 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 5482 if (IS_DEAD_ARG(i)) { 5483 temp_dead(s, arg_temp(op->args[i])); 5484 } 5485 } 5486 5487 if (def->flags & TCG_OPF_COND_BRANCH) { 5488 tcg_reg_alloc_cbranch(s, i_allocated_regs); 5489 } else if (def->flags & TCG_OPF_BB_END) { 5490 tcg_reg_alloc_bb_end(s, i_allocated_regs); 5491 } else { 5492 if (def->flags & TCG_OPF_CALL_CLOBBER) { 5493 assert_carry_dead(s); 5494 /* XXX: permit generic clobber register list ? */ 5495 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5496 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5497 tcg_reg_free(s, i, i_allocated_regs); 5498 } 5499 } 5500 } 5501 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 5502 /* sync globals if the op has side effects and might trigger 5503 an exception. */ 5504 sync_globals(s, i_allocated_regs); 5505 } 5506 5507 /* satisfy the output constraints */ 5508 for (k = 0; k < nb_oargs; k++) { 5509 i = args_ct[k].sort_index; 5510 arg = op->args[i]; 5511 arg_ct = &args_ct[i]; 5512 ts = arg_temp(arg); 5513 5514 /* ENV should not be modified. */ 5515 tcg_debug_assert(!temp_readonly(ts)); 5516 5517 switch (arg_ct->pair) { 5518 case 0: /* not paired */ 5519 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 5520 reg = new_args[arg_ct->alias_index]; 5521 } else if (arg_ct->newreg) { 5522 reg = tcg_reg_alloc(s, arg_ct->regs, 5523 i_allocated_regs | o_allocated_regs, 5524 output_pref(op, k), ts->indirect_base); 5525 } else { 5526 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 5527 output_pref(op, k), ts->indirect_base); 5528 } 5529 break; 5530 5531 case 1: /* first of pair */ 5532 if (arg_ct->oalias) { 5533 reg = new_args[arg_ct->alias_index]; 5534 } else if (arg_ct->newreg) { 5535 reg = tcg_reg_alloc_pair(s, arg_ct->regs, 5536 i_allocated_regs | o_allocated_regs, 5537 output_pref(op, k), 5538 ts->indirect_base); 5539 } else { 5540 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 5541 output_pref(op, k), 5542 ts->indirect_base); 5543 } 5544 break; 5545 5546 case 2: /* second of pair */ 5547 if (arg_ct->oalias) { 5548 reg = new_args[arg_ct->alias_index]; 5549 } else { 5550 reg = new_args[arg_ct->pair_index] + 1; 5551 } 5552 break; 5553 5554 case 3: /* first of pair, aliasing with a second input */ 5555 tcg_debug_assert(!arg_ct->newreg); 5556 reg = new_args[arg_ct->pair_index] - 1; 5557 break; 5558 5559 default: 5560 g_assert_not_reached(); 5561 } 5562 tcg_regset_set_reg(o_allocated_regs, reg); 5563 set_temp_val_reg(s, ts, reg); 5564 ts->mem_coherent = 0; 5565 new_args[i] = reg; 5566 } 5567 } 5568 5569 /* emit instruction */ 5570 TCGType type = TCGOP_TYPE(op); 5571 switch (op->opc) { 5572 case INDEX_op_add: 5573 case INDEX_op_and: 5574 case INDEX_op_andc: 5575 case INDEX_op_clz: 5576 case INDEX_op_ctz: 5577 case INDEX_op_divs: 5578 case INDEX_op_divu: 5579 case INDEX_op_eqv: 5580 case INDEX_op_mul: 5581 case INDEX_op_mulsh: 5582 case INDEX_op_muluh: 5583 case INDEX_op_nand: 5584 case INDEX_op_nor: 5585 case INDEX_op_or: 5586 case INDEX_op_orc: 5587 case INDEX_op_rems: 5588 case INDEX_op_remu: 5589 case INDEX_op_rotl: 5590 case INDEX_op_rotr: 5591 case INDEX_op_sar: 5592 case INDEX_op_shl: 5593 case INDEX_op_shr: 5594 case INDEX_op_xor: 5595 { 5596 const TCGOutOpBinary *out = 5597 container_of(all_outop[op->opc], TCGOutOpBinary, base); 5598 5599 /* Constants should never appear in the first source operand. */ 5600 tcg_debug_assert(!const_args[1]); 5601 if (const_args[2]) { 5602 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); 5603 } else { 5604 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5605 } 5606 } 5607 break; 5608 5609 case INDEX_op_sub: 5610 { 5611 const TCGOutOpSubtract *out = 5612 container_of(all_outop[op->opc], TCGOutOpSubtract, base); 5613 5614 /* 5615 * Constants should never appear in the second source operand. 5616 * These are folded to add with negative constant. 5617 */ 5618 tcg_debug_assert(!const_args[2]); 5619 if (const_args[1]) { 5620 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); 5621 } else { 5622 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); 5623 } 5624 } 5625 break; 5626 5627 case INDEX_op_addco: 5628 case INDEX_op_subbo: 5629 case INDEX_op_addci: 5630 case INDEX_op_subbi: 5631 case INDEX_op_addcio: 5632 case INDEX_op_subbio: 5633 case INDEX_op_addc1o: 5634 case INDEX_op_subb1o: 5635 g_assert_not_reached(); 5636 5637 case INDEX_op_bswap64: 5638 case INDEX_op_ext_i32_i64: 5639 case INDEX_op_extu_i32_i64: 5640 case INDEX_op_extrl_i64_i32: 5641 case INDEX_op_extrh_i64_i32: 5642 assert(TCG_TARGET_REG_BITS == 64); 5643 /* fall through */ 5644 case INDEX_op_ctpop: 5645 case INDEX_op_neg: 5646 case INDEX_op_not: 5647 { 5648 const TCGOutOpUnary *out = 5649 container_of(all_outop[op->opc], TCGOutOpUnary, base); 5650 5651 /* Constants should have been folded. */ 5652 tcg_debug_assert(!const_args[1]); 5653 out->out_rr(s, type, new_args[0], new_args[1]); 5654 } 5655 break; 5656 5657 case INDEX_op_bswap16: 5658 case INDEX_op_bswap32: 5659 { 5660 const TCGOutOpBswap *out = 5661 container_of(all_outop[op->opc], TCGOutOpBswap, base); 5662 5663 tcg_debug_assert(!const_args[1]); 5664 out->out_rr(s, type, new_args[0], new_args[1], new_args[2]); 5665 } 5666 break; 5667 5668 case INDEX_op_deposit: 5669 { 5670 const TCGOutOpDeposit *out = &outop_deposit; 5671 5672 if (const_args[2]) { 5673 tcg_debug_assert(!const_args[1]); 5674 out->out_rri(s, type, new_args[0], new_args[1], 5675 new_args[2], new_args[3], new_args[4]); 5676 } else if (const_args[1]) { 5677 tcg_debug_assert(new_args[1] == 0); 5678 tcg_debug_assert(!const_args[2]); 5679 out->out_rzr(s, type, new_args[0], new_args[2], 5680 new_args[3], new_args[4]); 5681 } else { 5682 out->out_rrr(s, type, new_args[0], new_args[1], 5683 new_args[2], new_args[3], new_args[4]); 5684 } 5685 } 5686 break; 5687 5688 case INDEX_op_divs2: 5689 case INDEX_op_divu2: 5690 { 5691 const TCGOutOpDivRem *out = 5692 container_of(all_outop[op->opc], TCGOutOpDivRem, base); 5693 5694 /* Only used by x86 and s390x, which use matching constraints. */ 5695 tcg_debug_assert(new_args[0] == new_args[2]); 5696 tcg_debug_assert(new_args[1] == new_args[3]); 5697 tcg_debug_assert(!const_args[4]); 5698 out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); 5699 } 5700 break; 5701 5702 case INDEX_op_extract: 5703 case INDEX_op_sextract: 5704 { 5705 const TCGOutOpExtract *out = 5706 container_of(all_outop[op->opc], TCGOutOpExtract, base); 5707 5708 tcg_debug_assert(!const_args[1]); 5709 out->out_rr(s, type, new_args[0], new_args[1], 5710 new_args[2], new_args[3]); 5711 } 5712 break; 5713 5714 case INDEX_op_extract2: 5715 { 5716 const TCGOutOpExtract2 *out = &outop_extract2; 5717 5718 tcg_debug_assert(!const_args[1]); 5719 tcg_debug_assert(!const_args[2]); 5720 out->out_rrr(s, type, new_args[0], new_args[1], 5721 new_args[2], new_args[3]); 5722 } 5723 break; 5724 5725 case INDEX_op_muls2: 5726 case INDEX_op_mulu2: 5727 { 5728 const TCGOutOpMul2 *out = 5729 container_of(all_outop[op->opc], TCGOutOpMul2, base); 5730 5731 tcg_debug_assert(!const_args[2]); 5732 tcg_debug_assert(!const_args[3]); 5733 out->out_rrrr(s, type, new_args[0], new_args[1], 5734 new_args[2], new_args[3]); 5735 } 5736 break; 5737 5738 case INDEX_op_brcond: 5739 { 5740 const TCGOutOpBrcond *out = &outop_brcond; 5741 TCGCond cond = new_args[2]; 5742 TCGLabel *label = arg_label(new_args[3]); 5743 5744 tcg_debug_assert(!const_args[0]); 5745 if (const_args[1]) { 5746 out->out_ri(s, type, cond, new_args[0], new_args[1], label); 5747 } else { 5748 out->out_rr(s, type, cond, new_args[0], new_args[1], label); 5749 } 5750 } 5751 break; 5752 5753 case INDEX_op_movcond: 5754 { 5755 const TCGOutOpMovcond *out = &outop_movcond; 5756 TCGCond cond = new_args[5]; 5757 5758 tcg_debug_assert(!const_args[1]); 5759 out->out(s, type, cond, new_args[0], 5760 new_args[1], new_args[2], const_args[2], 5761 new_args[3], const_args[3], 5762 new_args[4], const_args[4]); 5763 } 5764 break; 5765 5766 case INDEX_op_setcond: 5767 case INDEX_op_negsetcond: 5768 { 5769 const TCGOutOpSetcond *out = 5770 container_of(all_outop[op->opc], TCGOutOpSetcond, base); 5771 TCGCond cond = new_args[3]; 5772 5773 tcg_debug_assert(!const_args[1]); 5774 if (const_args[2]) { 5775 out->out_rri(s, type, cond, 5776 new_args[0], new_args[1], new_args[2]); 5777 } else { 5778 out->out_rrr(s, type, cond, 5779 new_args[0], new_args[1], new_args[2]); 5780 } 5781 } 5782 break; 5783 5784 #if TCG_TARGET_REG_BITS == 32 5785 case INDEX_op_brcond2_i32: 5786 { 5787 const TCGOutOpBrcond2 *out = &outop_brcond2; 5788 TCGCond cond = new_args[4]; 5789 TCGLabel *label = arg_label(new_args[5]); 5790 5791 tcg_debug_assert(!const_args[0]); 5792 tcg_debug_assert(!const_args[1]); 5793 out->out(s, cond, new_args[0], new_args[1], 5794 new_args[2], const_args[2], 5795 new_args[3], const_args[3], label); 5796 } 5797 break; 5798 case INDEX_op_setcond2_i32: 5799 { 5800 const TCGOutOpSetcond2 *out = &outop_setcond2; 5801 TCGCond cond = new_args[5]; 5802 5803 tcg_debug_assert(!const_args[1]); 5804 tcg_debug_assert(!const_args[2]); 5805 out->out(s, cond, new_args[0], new_args[1], new_args[2], 5806 new_args[3], const_args[3], new_args[4], const_args[4]); 5807 } 5808 break; 5809 #else 5810 case INDEX_op_brcond2_i32: 5811 case INDEX_op_setcond2_i32: 5812 g_assert_not_reached(); 5813 #endif 5814 5815 default: 5816 if (def->flags & TCG_OPF_VECTOR) { 5817 tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, 5818 TCGOP_VECE(op), new_args, const_args); 5819 } else { 5820 tcg_out_op(s, op->opc, type, new_args, const_args); 5821 } 5822 break; 5823 } 5824 5825 if (def->flags & TCG_OPF_CARRY_IN) { 5826 s->carry_live = false; 5827 } 5828 if (def->flags & TCG_OPF_CARRY_OUT) { 5829 s->carry_live = true; 5830 } 5831 5832 /* move the outputs in the correct register if needed */ 5833 for(i = 0; i < nb_oargs; i++) { 5834 ts = arg_temp(op->args[i]); 5835 5836 /* ENV should not be modified. */ 5837 tcg_debug_assert(!temp_readonly(ts)); 5838 5839 if (NEED_SYNC_ARG(i)) { 5840 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 5841 } else if (IS_DEAD_ARG(i)) { 5842 temp_dead(s, ts); 5843 } 5844 } 5845 } 5846 5847 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 5848 { 5849 const TCGLifeData arg_life = op->life; 5850 TCGTemp *ots, *itsl, *itsh; 5851 TCGType vtype = TCGOP_TYPE(op); 5852 5853 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 5854 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 5855 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 5856 5857 ots = arg_temp(op->args[0]); 5858 itsl = arg_temp(op->args[1]); 5859 itsh = arg_temp(op->args[2]); 5860 5861 /* ENV should not be modified. */ 5862 tcg_debug_assert(!temp_readonly(ots)); 5863 5864 /* Allocate the output register now. */ 5865 if (ots->val_type != TEMP_VAL_REG) { 5866 TCGRegSet allocated_regs = s->reserved_regs; 5867 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; 5868 TCGReg oreg; 5869 5870 /* Make sure to not spill the input registers. */ 5871 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5872 tcg_regset_set_reg(allocated_regs, itsl->reg); 5873 } 5874 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5875 tcg_regset_set_reg(allocated_regs, itsh->reg); 5876 } 5877 5878 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5879 output_pref(op, 0), ots->indirect_base); 5880 set_temp_val_reg(s, ots, oreg); 5881 } 5882 5883 /* Promote dup2 of immediates to dupi_vec. */ 5884 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5885 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5886 MemOp vece = MO_64; 5887 5888 if (val == dup_const(MO_8, val)) { 5889 vece = MO_8; 5890 } else if (val == dup_const(MO_16, val)) { 5891 vece = MO_16; 5892 } else if (val == dup_const(MO_32, val)) { 5893 vece = MO_32; 5894 } 5895 5896 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5897 goto done; 5898 } 5899 5900 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5901 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5902 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5903 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5904 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5905 5906 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5907 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5908 5909 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5910 its->mem_base->reg, its->mem_offset)) { 5911 goto done; 5912 } 5913 } 5914 5915 /* Fall back to generic expansion. */ 5916 return false; 5917 5918 done: 5919 ots->mem_coherent = 0; 5920 if (IS_DEAD_ARG(1)) { 5921 temp_dead(s, itsl); 5922 } 5923 if (IS_DEAD_ARG(2)) { 5924 temp_dead(s, itsh); 5925 } 5926 if (NEED_SYNC_ARG(0)) { 5927 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5928 } else if (IS_DEAD_ARG(0)) { 5929 temp_dead(s, ots); 5930 } 5931 return true; 5932 } 5933 5934 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5935 TCGRegSet allocated_regs) 5936 { 5937 if (ts->val_type == TEMP_VAL_REG) { 5938 if (ts->reg != reg) { 5939 tcg_reg_free(s, reg, allocated_regs); 5940 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5941 /* 5942 * Cross register class move not supported. Sync the 5943 * temp back to its slot and load from there. 5944 */ 5945 temp_sync(s, ts, allocated_regs, 0, 0); 5946 tcg_out_ld(s, ts->type, reg, 5947 ts->mem_base->reg, ts->mem_offset); 5948 } 5949 } 5950 } else { 5951 TCGRegSet arg_set = 0; 5952 5953 tcg_reg_free(s, reg, allocated_regs); 5954 tcg_regset_set_reg(arg_set, reg); 5955 temp_load(s, ts, arg_set, allocated_regs, 0); 5956 } 5957 } 5958 5959 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5960 TCGRegSet allocated_regs) 5961 { 5962 /* 5963 * When the destination is on the stack, load up the temp and store. 5964 * If there are many call-saved registers, the temp might live to 5965 * see another use; otherwise it'll be discarded. 5966 */ 5967 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5968 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5969 arg_slot_stk_ofs(arg_slot)); 5970 } 5971 5972 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5973 TCGTemp *ts, TCGRegSet *allocated_regs) 5974 { 5975 if (arg_slot_reg_p(l->arg_slot)) { 5976 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5977 load_arg_reg(s, reg, ts, *allocated_regs); 5978 tcg_regset_set_reg(*allocated_regs, reg); 5979 } else { 5980 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5981 } 5982 } 5983 5984 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5985 intptr_t ref_off, TCGRegSet *allocated_regs) 5986 { 5987 TCGReg reg; 5988 5989 if (arg_slot_reg_p(arg_slot)) { 5990 reg = tcg_target_call_iarg_regs[arg_slot]; 5991 tcg_reg_free(s, reg, *allocated_regs); 5992 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5993 tcg_regset_set_reg(*allocated_regs, reg); 5994 } else { 5995 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 5996 *allocated_regs, 0, false); 5997 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5998 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 5999 arg_slot_stk_ofs(arg_slot)); 6000 } 6001 } 6002 6003 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 6004 { 6005 const int nb_oargs = TCGOP_CALLO(op); 6006 const int nb_iargs = TCGOP_CALLI(op); 6007 const TCGLifeData arg_life = op->life; 6008 const TCGHelperInfo *info = tcg_call_info(op); 6009 TCGRegSet allocated_regs = s->reserved_regs; 6010 int i; 6011 6012 /* 6013 * Move inputs into place in reverse order, 6014 * so that we place stacked arguments first. 6015 */ 6016 for (i = nb_iargs - 1; i >= 0; --i) { 6017 const TCGCallArgumentLoc *loc = &info->in[i]; 6018 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 6019 6020 switch (loc->kind) { 6021 case TCG_CALL_ARG_NORMAL: 6022 case TCG_CALL_ARG_EXTEND_U: 6023 case TCG_CALL_ARG_EXTEND_S: 6024 load_arg_normal(s, loc, ts, &allocated_regs); 6025 break; 6026 case TCG_CALL_ARG_BY_REF: 6027 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 6028 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 6029 arg_slot_stk_ofs(loc->ref_slot), 6030 &allocated_regs); 6031 break; 6032 case TCG_CALL_ARG_BY_REF_N: 6033 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 6034 break; 6035 default: 6036 g_assert_not_reached(); 6037 } 6038 } 6039 6040 /* Mark dead temporaries and free the associated registers. */ 6041 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 6042 if (IS_DEAD_ARG(i)) { 6043 temp_dead(s, arg_temp(op->args[i])); 6044 } 6045 } 6046 6047 /* Clobber call registers. */ 6048 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 6049 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 6050 tcg_reg_free(s, i, allocated_regs); 6051 } 6052 } 6053 6054 /* 6055 * Save globals if they might be written by the helper, 6056 * sync them if they might be read. 6057 */ 6058 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 6059 /* Nothing to do */ 6060 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 6061 sync_globals(s, allocated_regs); 6062 } else { 6063 save_globals(s, allocated_regs); 6064 } 6065 6066 /* 6067 * If the ABI passes a pointer to the returned struct as the first 6068 * argument, load that now. Pass a pointer to the output home slot. 6069 */ 6070 if (info->out_kind == TCG_CALL_RET_BY_REF) { 6071 TCGTemp *ts = arg_temp(op->args[0]); 6072 6073 if (!ts->mem_allocated) { 6074 temp_allocate_frame(s, ts); 6075 } 6076 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 6077 } 6078 6079 tcg_out_call(s, tcg_call_func(op), info); 6080 6081 /* Assign output registers and emit moves if needed. */ 6082 switch (info->out_kind) { 6083 case TCG_CALL_RET_NORMAL: 6084 for (i = 0; i < nb_oargs; i++) { 6085 TCGTemp *ts = arg_temp(op->args[i]); 6086 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 6087 6088 /* ENV should not be modified. */ 6089 tcg_debug_assert(!temp_readonly(ts)); 6090 6091 set_temp_val_reg(s, ts, reg); 6092 ts->mem_coherent = 0; 6093 } 6094 break; 6095 6096 case TCG_CALL_RET_BY_VEC: 6097 { 6098 TCGTemp *ts = arg_temp(op->args[0]); 6099 6100 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 6101 tcg_debug_assert(ts->temp_subindex == 0); 6102 if (!ts->mem_allocated) { 6103 temp_allocate_frame(s, ts); 6104 } 6105 tcg_out_st(s, TCG_TYPE_V128, 6106 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6107 ts->mem_base->reg, ts->mem_offset); 6108 } 6109 /* fall through to mark all parts in memory */ 6110 6111 case TCG_CALL_RET_BY_REF: 6112 /* The callee has performed a write through the reference. */ 6113 for (i = 0; i < nb_oargs; i++) { 6114 TCGTemp *ts = arg_temp(op->args[i]); 6115 ts->val_type = TEMP_VAL_MEM; 6116 } 6117 break; 6118 6119 default: 6120 g_assert_not_reached(); 6121 } 6122 6123 /* Flush or discard output registers as needed. */ 6124 for (i = 0; i < nb_oargs; i++) { 6125 TCGTemp *ts = arg_temp(op->args[i]); 6126 if (NEED_SYNC_ARG(i)) { 6127 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 6128 } else if (IS_DEAD_ARG(i)) { 6129 temp_dead(s, ts); 6130 } 6131 } 6132 } 6133 6134 /** 6135 * atom_and_align_for_opc: 6136 * @s: tcg context 6137 * @opc: memory operation code 6138 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 6139 * @allow_two_ops: true if we are prepared to issue two operations 6140 * 6141 * Return the alignment and atomicity to use for the inline fast path 6142 * for the given memory operation. The alignment may be larger than 6143 * that specified in @opc, and the correct alignment will be diagnosed 6144 * by the slow path helper. 6145 * 6146 * If @allow_two_ops, the host is prepared to test for 2x alignment, 6147 * and issue two loads or stores for subalignment. 6148 */ 6149 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 6150 MemOp host_atom, bool allow_two_ops) 6151 { 6152 MemOp align = memop_alignment_bits(opc); 6153 MemOp size = opc & MO_SIZE; 6154 MemOp half = size ? size - 1 : 0; 6155 MemOp atom = opc & MO_ATOM_MASK; 6156 MemOp atmax; 6157 6158 switch (atom) { 6159 case MO_ATOM_NONE: 6160 /* The operation requires no specific atomicity. */ 6161 atmax = MO_8; 6162 break; 6163 6164 case MO_ATOM_IFALIGN: 6165 atmax = size; 6166 break; 6167 6168 case MO_ATOM_IFALIGN_PAIR: 6169 atmax = half; 6170 break; 6171 6172 case MO_ATOM_WITHIN16: 6173 atmax = size; 6174 if (size == MO_128) { 6175 /* Misalignment implies !within16, and therefore no atomicity. */ 6176 } else if (host_atom != MO_ATOM_WITHIN16) { 6177 /* The host does not implement within16, so require alignment. */ 6178 align = MAX(align, size); 6179 } 6180 break; 6181 6182 case MO_ATOM_WITHIN16_PAIR: 6183 atmax = size; 6184 /* 6185 * Misalignment implies !within16, and therefore half atomicity. 6186 * Any host prepared for two operations can implement this with 6187 * half alignment. 6188 */ 6189 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 6190 align = MAX(align, half); 6191 } 6192 break; 6193 6194 case MO_ATOM_SUBALIGN: 6195 atmax = size; 6196 if (host_atom != MO_ATOM_SUBALIGN) { 6197 /* If unaligned but not odd, there are subobjects up to half. */ 6198 if (allow_two_ops) { 6199 align = MAX(align, half); 6200 } else { 6201 align = MAX(align, size); 6202 } 6203 } 6204 break; 6205 6206 default: 6207 g_assert_not_reached(); 6208 } 6209 6210 return (TCGAtomAlign){ .atom = atmax, .align = align }; 6211 } 6212 6213 /* 6214 * Similarly for qemu_ld/st slow path helpers. 6215 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 6216 * using only the provided backend tcg_out_* functions. 6217 */ 6218 6219 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 6220 { 6221 int ofs = arg_slot_stk_ofs(slot); 6222 6223 /* 6224 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 6225 * require extension to uint64_t, adjust the address for uint32_t. 6226 */ 6227 if (HOST_BIG_ENDIAN && 6228 TCG_TARGET_REG_BITS == 64 && 6229 type == TCG_TYPE_I32) { 6230 ofs += 4; 6231 } 6232 return ofs; 6233 } 6234 6235 static void tcg_out_helper_load_slots(TCGContext *s, 6236 unsigned nmov, TCGMovExtend *mov, 6237 const TCGLdstHelperParam *parm) 6238 { 6239 unsigned i; 6240 TCGReg dst3; 6241 6242 /* 6243 * Start from the end, storing to the stack first. 6244 * This frees those registers, so we need not consider overlap. 6245 */ 6246 for (i = nmov; i-- > 0; ) { 6247 unsigned slot = mov[i].dst; 6248 6249 if (arg_slot_reg_p(slot)) { 6250 goto found_reg; 6251 } 6252 6253 TCGReg src = mov[i].src; 6254 TCGType dst_type = mov[i].dst_type; 6255 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6256 6257 /* The argument is going onto the stack; extend into scratch. */ 6258 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 6259 tcg_debug_assert(parm->ntmp != 0); 6260 mov[i].dst = src = parm->tmp[0]; 6261 tcg_out_movext1(s, &mov[i]); 6262 } 6263 6264 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 6265 tcg_out_helper_stk_ofs(dst_type, slot)); 6266 } 6267 return; 6268 6269 found_reg: 6270 /* 6271 * The remaining arguments are in registers. 6272 * Convert slot numbers to argument registers. 6273 */ 6274 nmov = i + 1; 6275 for (i = 0; i < nmov; ++i) { 6276 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 6277 } 6278 6279 switch (nmov) { 6280 case 4: 6281 /* The backend must have provided enough temps for the worst case. */ 6282 tcg_debug_assert(parm->ntmp >= 2); 6283 6284 dst3 = mov[3].dst; 6285 for (unsigned j = 0; j < 3; ++j) { 6286 if (dst3 == mov[j].src) { 6287 /* 6288 * Conflict. Copy the source to a temporary, perform the 6289 * remaining moves, then the extension from our scratch 6290 * on the way out. 6291 */ 6292 TCGReg scratch = parm->tmp[1]; 6293 6294 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 6295 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 6296 tcg_out_movext1_new_src(s, &mov[3], scratch); 6297 break; 6298 } 6299 } 6300 6301 /* No conflicts: perform this move and continue. */ 6302 tcg_out_movext1(s, &mov[3]); 6303 /* fall through */ 6304 6305 case 3: 6306 tcg_out_movext3(s, mov, mov + 1, mov + 2, 6307 parm->ntmp ? parm->tmp[0] : -1); 6308 break; 6309 case 2: 6310 tcg_out_movext2(s, mov, mov + 1, 6311 parm->ntmp ? parm->tmp[0] : -1); 6312 break; 6313 case 1: 6314 tcg_out_movext1(s, mov); 6315 break; 6316 default: 6317 g_assert_not_reached(); 6318 } 6319 } 6320 6321 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 6322 TCGType type, tcg_target_long imm, 6323 const TCGLdstHelperParam *parm) 6324 { 6325 if (arg_slot_reg_p(slot)) { 6326 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 6327 } else { 6328 int ofs = tcg_out_helper_stk_ofs(type, slot); 6329 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 6330 tcg_debug_assert(parm->ntmp != 0); 6331 tcg_out_movi(s, type, parm->tmp[0], imm); 6332 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 6333 } 6334 } 6335 } 6336 6337 static void tcg_out_helper_load_common_args(TCGContext *s, 6338 const TCGLabelQemuLdst *ldst, 6339 const TCGLdstHelperParam *parm, 6340 const TCGHelperInfo *info, 6341 unsigned next_arg) 6342 { 6343 TCGMovExtend ptr_mov = { 6344 .dst_type = TCG_TYPE_PTR, 6345 .src_type = TCG_TYPE_PTR, 6346 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 6347 }; 6348 const TCGCallArgumentLoc *loc = &info->in[0]; 6349 TCGType type; 6350 unsigned slot; 6351 tcg_target_ulong imm; 6352 6353 /* 6354 * Handle env, which is always first. 6355 */ 6356 ptr_mov.dst = loc->arg_slot; 6357 ptr_mov.src = TCG_AREG0; 6358 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6359 6360 /* 6361 * Handle oi. 6362 */ 6363 imm = ldst->oi; 6364 loc = &info->in[next_arg]; 6365 type = TCG_TYPE_I32; 6366 switch (loc->kind) { 6367 case TCG_CALL_ARG_NORMAL: 6368 break; 6369 case TCG_CALL_ARG_EXTEND_U: 6370 case TCG_CALL_ARG_EXTEND_S: 6371 /* No extension required for MemOpIdx. */ 6372 tcg_debug_assert(imm <= INT32_MAX); 6373 type = TCG_TYPE_REG; 6374 break; 6375 default: 6376 g_assert_not_reached(); 6377 } 6378 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 6379 next_arg++; 6380 6381 /* 6382 * Handle ra. 6383 */ 6384 loc = &info->in[next_arg]; 6385 slot = loc->arg_slot; 6386 if (parm->ra_gen) { 6387 int arg_reg = -1; 6388 TCGReg ra_reg; 6389 6390 if (arg_slot_reg_p(slot)) { 6391 arg_reg = tcg_target_call_iarg_regs[slot]; 6392 } 6393 ra_reg = parm->ra_gen(s, ldst, arg_reg); 6394 6395 ptr_mov.dst = slot; 6396 ptr_mov.src = ra_reg; 6397 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 6398 } else { 6399 imm = (uintptr_t)ldst->raddr; 6400 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 6401 } 6402 } 6403 6404 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 6405 const TCGCallArgumentLoc *loc, 6406 TCGType dst_type, TCGType src_type, 6407 TCGReg lo, TCGReg hi) 6408 { 6409 MemOp reg_mo; 6410 6411 if (dst_type <= TCG_TYPE_REG) { 6412 MemOp src_ext; 6413 6414 switch (loc->kind) { 6415 case TCG_CALL_ARG_NORMAL: 6416 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 6417 break; 6418 case TCG_CALL_ARG_EXTEND_U: 6419 dst_type = TCG_TYPE_REG; 6420 src_ext = MO_UL; 6421 break; 6422 case TCG_CALL_ARG_EXTEND_S: 6423 dst_type = TCG_TYPE_REG; 6424 src_ext = MO_SL; 6425 break; 6426 default: 6427 g_assert_not_reached(); 6428 } 6429 6430 mov[0].dst = loc->arg_slot; 6431 mov[0].dst_type = dst_type; 6432 mov[0].src = lo; 6433 mov[0].src_type = src_type; 6434 mov[0].src_ext = src_ext; 6435 return 1; 6436 } 6437 6438 if (TCG_TARGET_REG_BITS == 32) { 6439 assert(dst_type == TCG_TYPE_I64); 6440 reg_mo = MO_32; 6441 } else { 6442 assert(dst_type == TCG_TYPE_I128); 6443 reg_mo = MO_64; 6444 } 6445 6446 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 6447 mov[0].src = lo; 6448 mov[0].dst_type = TCG_TYPE_REG; 6449 mov[0].src_type = TCG_TYPE_REG; 6450 mov[0].src_ext = reg_mo; 6451 6452 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 6453 mov[1].src = hi; 6454 mov[1].dst_type = TCG_TYPE_REG; 6455 mov[1].src_type = TCG_TYPE_REG; 6456 mov[1].src_ext = reg_mo; 6457 6458 return 2; 6459 } 6460 6461 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6462 const TCGLdstHelperParam *parm) 6463 { 6464 const TCGHelperInfo *info; 6465 const TCGCallArgumentLoc *loc; 6466 TCGMovExtend mov[2]; 6467 unsigned next_arg, nmov; 6468 MemOp mop = get_memop(ldst->oi); 6469 6470 switch (mop & MO_SIZE) { 6471 case MO_8: 6472 case MO_16: 6473 case MO_32: 6474 info = &info_helper_ld32_mmu; 6475 break; 6476 case MO_64: 6477 info = &info_helper_ld64_mmu; 6478 break; 6479 case MO_128: 6480 info = &info_helper_ld128_mmu; 6481 break; 6482 default: 6483 g_assert_not_reached(); 6484 } 6485 6486 /* Defer env argument. */ 6487 next_arg = 1; 6488 6489 loc = &info->in[next_arg]; 6490 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 6491 /* 6492 * 32-bit host with 32-bit guest: zero-extend the guest address 6493 * to 64-bits for the helper by storing the low part, then 6494 * load a zero for the high part. 6495 */ 6496 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6497 TCG_TYPE_I32, TCG_TYPE_I32, 6498 ldst->addr_reg, -1); 6499 tcg_out_helper_load_slots(s, 1, mov, parm); 6500 6501 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 6502 TCG_TYPE_I32, 0, parm); 6503 next_arg += 2; 6504 } else { 6505 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6506 ldst->addr_reg, -1); 6507 tcg_out_helper_load_slots(s, nmov, mov, parm); 6508 next_arg += nmov; 6509 } 6510 6511 switch (info->out_kind) { 6512 case TCG_CALL_RET_NORMAL: 6513 case TCG_CALL_RET_BY_VEC: 6514 break; 6515 case TCG_CALL_RET_BY_REF: 6516 /* 6517 * The return reference is in the first argument slot. 6518 * We need memory in which to return: re-use the top of stack. 6519 */ 6520 { 6521 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6522 6523 if (arg_slot_reg_p(0)) { 6524 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 6525 TCG_REG_CALL_STACK, ofs_slot0); 6526 } else { 6527 tcg_debug_assert(parm->ntmp != 0); 6528 tcg_out_addi_ptr(s, parm->tmp[0], 6529 TCG_REG_CALL_STACK, ofs_slot0); 6530 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6531 TCG_REG_CALL_STACK, ofs_slot0); 6532 } 6533 } 6534 break; 6535 default: 6536 g_assert_not_reached(); 6537 } 6538 6539 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6540 } 6541 6542 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 6543 bool load_sign, 6544 const TCGLdstHelperParam *parm) 6545 { 6546 MemOp mop = get_memop(ldst->oi); 6547 TCGMovExtend mov[2]; 6548 int ofs_slot0; 6549 6550 switch (ldst->type) { 6551 case TCG_TYPE_I64: 6552 if (TCG_TARGET_REG_BITS == 32) { 6553 break; 6554 } 6555 /* fall through */ 6556 6557 case TCG_TYPE_I32: 6558 mov[0].dst = ldst->datalo_reg; 6559 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 6560 mov[0].dst_type = ldst->type; 6561 mov[0].src_type = TCG_TYPE_REG; 6562 6563 /* 6564 * If load_sign, then we allowed the helper to perform the 6565 * appropriate sign extension to tcg_target_ulong, and all 6566 * we need now is a plain move. 6567 * 6568 * If they do not, then we expect the relevant extension 6569 * instruction to be no more expensive than a move, and 6570 * we thus save the icache etc by only using one of two 6571 * helper functions. 6572 */ 6573 if (load_sign || !(mop & MO_SIGN)) { 6574 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 6575 mov[0].src_ext = MO_32; 6576 } else { 6577 mov[0].src_ext = MO_64; 6578 } 6579 } else { 6580 mov[0].src_ext = mop & MO_SSIZE; 6581 } 6582 tcg_out_movext1(s, mov); 6583 return; 6584 6585 case TCG_TYPE_I128: 6586 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6587 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 6588 switch (TCG_TARGET_CALL_RET_I128) { 6589 case TCG_CALL_RET_NORMAL: 6590 break; 6591 case TCG_CALL_RET_BY_VEC: 6592 tcg_out_st(s, TCG_TYPE_V128, 6593 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 6594 TCG_REG_CALL_STACK, ofs_slot0); 6595 /* fall through */ 6596 case TCG_CALL_RET_BY_REF: 6597 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 6598 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 6599 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 6600 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 6601 return; 6602 default: 6603 g_assert_not_reached(); 6604 } 6605 break; 6606 6607 default: 6608 g_assert_not_reached(); 6609 } 6610 6611 mov[0].dst = ldst->datalo_reg; 6612 mov[0].src = 6613 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 6614 mov[0].dst_type = TCG_TYPE_REG; 6615 mov[0].src_type = TCG_TYPE_REG; 6616 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6617 6618 mov[1].dst = ldst->datahi_reg; 6619 mov[1].src = 6620 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 6621 mov[1].dst_type = TCG_TYPE_REG; 6622 mov[1].src_type = TCG_TYPE_REG; 6623 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 6624 6625 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 6626 } 6627 6628 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 6629 const TCGLdstHelperParam *parm) 6630 { 6631 const TCGHelperInfo *info; 6632 const TCGCallArgumentLoc *loc; 6633 TCGMovExtend mov[4]; 6634 TCGType data_type; 6635 unsigned next_arg, nmov, n; 6636 MemOp mop = get_memop(ldst->oi); 6637 6638 switch (mop & MO_SIZE) { 6639 case MO_8: 6640 case MO_16: 6641 case MO_32: 6642 info = &info_helper_st32_mmu; 6643 data_type = TCG_TYPE_I32; 6644 break; 6645 case MO_64: 6646 info = &info_helper_st64_mmu; 6647 data_type = TCG_TYPE_I64; 6648 break; 6649 case MO_128: 6650 info = &info_helper_st128_mmu; 6651 data_type = TCG_TYPE_I128; 6652 break; 6653 default: 6654 g_assert_not_reached(); 6655 } 6656 6657 /* Defer env argument. */ 6658 next_arg = 1; 6659 nmov = 0; 6660 6661 /* Handle addr argument. */ 6662 loc = &info->in[next_arg]; 6663 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); 6664 if (TCG_TARGET_REG_BITS == 32) { 6665 /* 6666 * 32-bit host (and thus 32-bit guest): zero-extend the guest address 6667 * to 64-bits for the helper by storing the low part. Later, 6668 * after we have processed the register inputs, we will load a 6669 * zero for the high part. 6670 */ 6671 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 6672 TCG_TYPE_I32, TCG_TYPE_I32, 6673 ldst->addr_reg, -1); 6674 next_arg += 2; 6675 nmov += 1; 6676 } else { 6677 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 6678 ldst->addr_reg, -1); 6679 next_arg += n; 6680 nmov += n; 6681 } 6682 6683 /* Handle data argument. */ 6684 loc = &info->in[next_arg]; 6685 switch (loc->kind) { 6686 case TCG_CALL_ARG_NORMAL: 6687 case TCG_CALL_ARG_EXTEND_U: 6688 case TCG_CALL_ARG_EXTEND_S: 6689 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 6690 ldst->datalo_reg, ldst->datahi_reg); 6691 next_arg += n; 6692 nmov += n; 6693 tcg_out_helper_load_slots(s, nmov, mov, parm); 6694 break; 6695 6696 case TCG_CALL_ARG_BY_REF: 6697 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 6698 tcg_debug_assert(data_type == TCG_TYPE_I128); 6699 tcg_out_st(s, TCG_TYPE_I64, 6700 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 6701 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 6702 tcg_out_st(s, TCG_TYPE_I64, 6703 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 6704 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 6705 6706 tcg_out_helper_load_slots(s, nmov, mov, parm); 6707 6708 if (arg_slot_reg_p(loc->arg_slot)) { 6709 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 6710 TCG_REG_CALL_STACK, 6711 arg_slot_stk_ofs(loc->ref_slot)); 6712 } else { 6713 tcg_debug_assert(parm->ntmp != 0); 6714 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 6715 arg_slot_stk_ofs(loc->ref_slot)); 6716 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 6717 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 6718 } 6719 next_arg += 2; 6720 break; 6721 6722 default: 6723 g_assert_not_reached(); 6724 } 6725 6726 if (TCG_TARGET_REG_BITS == 32) { 6727 /* Zero extend the address by loading a zero for the high part. */ 6728 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 6729 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 6730 } 6731 6732 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 6733 } 6734 6735 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 6736 { 6737 int i, start_words, num_insns; 6738 TCGOp *op; 6739 6740 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6741 && qemu_log_in_addr_range(pc_start))) { 6742 FILE *logfile = qemu_log_trylock(); 6743 if (logfile) { 6744 fprintf(logfile, "OP:\n"); 6745 tcg_dump_ops(s, logfile, false); 6746 fprintf(logfile, "\n"); 6747 qemu_log_unlock(logfile); 6748 } 6749 } 6750 6751 #ifdef CONFIG_DEBUG_TCG 6752 /* Ensure all labels referenced have been emitted. */ 6753 { 6754 TCGLabel *l; 6755 bool error = false; 6756 6757 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6758 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6759 qemu_log_mask(CPU_LOG_TB_OP, 6760 "$L%d referenced but not present.\n", l->id); 6761 error = true; 6762 } 6763 } 6764 assert(!error); 6765 } 6766 #endif 6767 6768 /* Do not reuse any EBB that may be allocated within the TB. */ 6769 tcg_temp_ebb_reset_freed(s); 6770 6771 tcg_optimize(s); 6772 6773 reachable_code_pass(s); 6774 liveness_pass_0(s); 6775 liveness_pass_1(s); 6776 6777 if (s->nb_indirects > 0) { 6778 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6779 && qemu_log_in_addr_range(pc_start))) { 6780 FILE *logfile = qemu_log_trylock(); 6781 if (logfile) { 6782 fprintf(logfile, "OP before indirect lowering:\n"); 6783 tcg_dump_ops(s, logfile, false); 6784 fprintf(logfile, "\n"); 6785 qemu_log_unlock(logfile); 6786 } 6787 } 6788 6789 /* Replace indirect temps with direct temps. */ 6790 if (liveness_pass_2(s)) { 6791 /* If changes were made, re-run liveness. */ 6792 liveness_pass_1(s); 6793 } 6794 } 6795 6796 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6797 && qemu_log_in_addr_range(pc_start))) { 6798 FILE *logfile = qemu_log_trylock(); 6799 if (logfile) { 6800 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6801 tcg_dump_ops(s, logfile, true); 6802 fprintf(logfile, "\n"); 6803 qemu_log_unlock(logfile); 6804 } 6805 } 6806 6807 /* Initialize goto_tb jump offsets. */ 6808 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6809 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6810 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6811 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6812 6813 tcg_reg_alloc_start(s); 6814 6815 /* 6816 * Reset the buffer pointers when restarting after overflow. 6817 * TODO: Move this into translate-all.c with the rest of the 6818 * buffer management. Having only this done here is confusing. 6819 */ 6820 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6821 s->code_ptr = s->code_buf; 6822 s->data_gen_ptr = NULL; 6823 6824 QSIMPLEQ_INIT(&s->ldst_labels); 6825 s->pool_labels = NULL; 6826 6827 start_words = s->insn_start_words; 6828 s->gen_insn_data = 6829 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); 6830 6831 tcg_out_tb_start(s); 6832 6833 num_insns = -1; 6834 s->carry_live = false; 6835 QTAILQ_FOREACH(op, &s->ops, link) { 6836 TCGOpcode opc = op->opc; 6837 6838 switch (opc) { 6839 case INDEX_op_extrl_i64_i32: 6840 assert(TCG_TARGET_REG_BITS == 64); 6841 /* 6842 * If TCG_TYPE_I32 is represented in some canonical form, 6843 * e.g. zero or sign-extended, then emit as a unary op. 6844 * Otherwise we can treat this as a plain move. 6845 * If the output dies, treat this as a plain move, because 6846 * this will be implemented with a store. 6847 */ 6848 if (TCG_TARGET_HAS_extr_i64_i32) { 6849 TCGLifeData arg_life = op->life; 6850 if (!IS_DEAD_ARG(0)) { 6851 goto do_default; 6852 } 6853 } 6854 /* fall through */ 6855 case INDEX_op_mov: 6856 case INDEX_op_mov_vec: 6857 tcg_reg_alloc_mov(s, op); 6858 break; 6859 case INDEX_op_dup_vec: 6860 tcg_reg_alloc_dup(s, op); 6861 break; 6862 case INDEX_op_insn_start: 6863 assert_carry_dead(s); 6864 if (num_insns >= 0) { 6865 size_t off = tcg_current_code_size(s); 6866 s->gen_insn_end_off[num_insns] = off; 6867 /* Assert that we do not overflow our stored offset. */ 6868 assert(s->gen_insn_end_off[num_insns] == off); 6869 } 6870 num_insns++; 6871 for (i = 0; i < start_words; ++i) { 6872 s->gen_insn_data[num_insns * start_words + i] = 6873 tcg_get_insn_start_param(op, i); 6874 } 6875 break; 6876 case INDEX_op_discard: 6877 temp_dead(s, arg_temp(op->args[0])); 6878 break; 6879 case INDEX_op_set_label: 6880 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6881 tcg_out_label(s, arg_label(op->args[0])); 6882 break; 6883 case INDEX_op_call: 6884 assert_carry_dead(s); 6885 tcg_reg_alloc_call(s, op); 6886 break; 6887 case INDEX_op_exit_tb: 6888 tcg_out_exit_tb(s, op->args[0]); 6889 break; 6890 case INDEX_op_goto_tb: 6891 tcg_out_goto_tb(s, op->args[0]); 6892 break; 6893 case INDEX_op_dup2_vec: 6894 if (tcg_reg_alloc_dup2(s, op)) { 6895 break; 6896 } 6897 /* fall through */ 6898 default: 6899 do_default: 6900 /* Sanity check that we've not introduced any unhandled opcodes. */ 6901 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), 6902 TCGOP_FLAGS(op))); 6903 /* Note: in order to speed up the code, it would be much 6904 faster to have specialized register allocator functions for 6905 some common argument patterns */ 6906 tcg_reg_alloc_op(s, op); 6907 break; 6908 } 6909 /* Test for (pending) buffer overflow. The assumption is that any 6910 one operation beginning below the high water mark cannot overrun 6911 the buffer completely. Thus we can test for overflow after 6912 generating code without having to check during generation. */ 6913 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6914 return -1; 6915 } 6916 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6917 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6918 return -2; 6919 } 6920 } 6921 assert_carry_dead(s); 6922 6923 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); 6924 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6925 6926 /* Generate TB finalization at the end of block */ 6927 i = tcg_out_ldst_finalize(s); 6928 if (i < 0) { 6929 return i; 6930 } 6931 i = tcg_out_pool_finalize(s); 6932 if (i < 0) { 6933 return i; 6934 } 6935 if (!tcg_resolve_relocs(s)) { 6936 return -2; 6937 } 6938 6939 #ifndef CONFIG_TCG_INTERPRETER 6940 /* flush instruction cache */ 6941 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6942 (uintptr_t)s->code_buf, 6943 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6944 #endif 6945 6946 return tcg_current_code_size(s); 6947 } 6948 6949 #ifdef ELF_HOST_MACHINE 6950 /* In order to use this feature, the backend needs to do three things: 6951 6952 (1) Define ELF_HOST_MACHINE to indicate both what value to 6953 put into the ELF image and to indicate support for the feature. 6954 6955 (2) Define tcg_register_jit. This should create a buffer containing 6956 the contents of a .debug_frame section that describes the post- 6957 prologue unwind info for the tcg machine. 6958 6959 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6960 */ 6961 6962 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6963 typedef enum { 6964 JIT_NOACTION = 0, 6965 JIT_REGISTER_FN, 6966 JIT_UNREGISTER_FN 6967 } jit_actions_t; 6968 6969 struct jit_code_entry { 6970 struct jit_code_entry *next_entry; 6971 struct jit_code_entry *prev_entry; 6972 const void *symfile_addr; 6973 uint64_t symfile_size; 6974 }; 6975 6976 struct jit_descriptor { 6977 uint32_t version; 6978 uint32_t action_flag; 6979 struct jit_code_entry *relevant_entry; 6980 struct jit_code_entry *first_entry; 6981 }; 6982 6983 void __jit_debug_register_code(void) __attribute__((noinline)); 6984 void __jit_debug_register_code(void) 6985 { 6986 asm(""); 6987 } 6988 6989 /* Must statically initialize the version, because GDB may check 6990 the version before we can set it. */ 6991 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 6992 6993 /* End GDB interface. */ 6994 6995 static int find_string(const char *strtab, const char *str) 6996 { 6997 const char *p = strtab + 1; 6998 6999 while (1) { 7000 if (strcmp(p, str) == 0) { 7001 return p - strtab; 7002 } 7003 p += strlen(p) + 1; 7004 } 7005 } 7006 7007 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 7008 const void *debug_frame, 7009 size_t debug_frame_size) 7010 { 7011 struct __attribute__((packed)) DebugInfo { 7012 uint32_t len; 7013 uint16_t version; 7014 uint32_t abbrev; 7015 uint8_t ptr_size; 7016 uint8_t cu_die; 7017 uint16_t cu_lang; 7018 uintptr_t cu_low_pc; 7019 uintptr_t cu_high_pc; 7020 uint8_t fn_die; 7021 char fn_name[16]; 7022 uintptr_t fn_low_pc; 7023 uintptr_t fn_high_pc; 7024 uint8_t cu_eoc; 7025 }; 7026 7027 struct ElfImage { 7028 ElfW(Ehdr) ehdr; 7029 ElfW(Phdr) phdr; 7030 ElfW(Shdr) shdr[7]; 7031 ElfW(Sym) sym[2]; 7032 struct DebugInfo di; 7033 uint8_t da[24]; 7034 char str[80]; 7035 }; 7036 7037 struct ElfImage *img; 7038 7039 static const struct ElfImage img_template = { 7040 .ehdr = { 7041 .e_ident[EI_MAG0] = ELFMAG0, 7042 .e_ident[EI_MAG1] = ELFMAG1, 7043 .e_ident[EI_MAG2] = ELFMAG2, 7044 .e_ident[EI_MAG3] = ELFMAG3, 7045 .e_ident[EI_CLASS] = ELF_CLASS, 7046 .e_ident[EI_DATA] = ELF_DATA, 7047 .e_ident[EI_VERSION] = EV_CURRENT, 7048 .e_type = ET_EXEC, 7049 .e_machine = ELF_HOST_MACHINE, 7050 .e_version = EV_CURRENT, 7051 .e_phoff = offsetof(struct ElfImage, phdr), 7052 .e_shoff = offsetof(struct ElfImage, shdr), 7053 .e_ehsize = sizeof(ElfW(Shdr)), 7054 .e_phentsize = sizeof(ElfW(Phdr)), 7055 .e_phnum = 1, 7056 .e_shentsize = sizeof(ElfW(Shdr)), 7057 .e_shnum = ARRAY_SIZE(img->shdr), 7058 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 7059 #ifdef ELF_HOST_FLAGS 7060 .e_flags = ELF_HOST_FLAGS, 7061 #endif 7062 #ifdef ELF_OSABI 7063 .e_ident[EI_OSABI] = ELF_OSABI, 7064 #endif 7065 }, 7066 .phdr = { 7067 .p_type = PT_LOAD, 7068 .p_flags = PF_X, 7069 }, 7070 .shdr = { 7071 [0] = { .sh_type = SHT_NULL }, 7072 /* Trick: The contents of code_gen_buffer are not present in 7073 this fake ELF file; that got allocated elsewhere. Therefore 7074 we mark .text as SHT_NOBITS (similar to .bss) so that readers 7075 will not look for contents. We can record any address. */ 7076 [1] = { /* .text */ 7077 .sh_type = SHT_NOBITS, 7078 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 7079 }, 7080 [2] = { /* .debug_info */ 7081 .sh_type = SHT_PROGBITS, 7082 .sh_offset = offsetof(struct ElfImage, di), 7083 .sh_size = sizeof(struct DebugInfo), 7084 }, 7085 [3] = { /* .debug_abbrev */ 7086 .sh_type = SHT_PROGBITS, 7087 .sh_offset = offsetof(struct ElfImage, da), 7088 .sh_size = sizeof(img->da), 7089 }, 7090 [4] = { /* .debug_frame */ 7091 .sh_type = SHT_PROGBITS, 7092 .sh_offset = sizeof(struct ElfImage), 7093 }, 7094 [5] = { /* .symtab */ 7095 .sh_type = SHT_SYMTAB, 7096 .sh_offset = offsetof(struct ElfImage, sym), 7097 .sh_size = sizeof(img->sym), 7098 .sh_info = 1, 7099 .sh_link = ARRAY_SIZE(img->shdr) - 1, 7100 .sh_entsize = sizeof(ElfW(Sym)), 7101 }, 7102 [6] = { /* .strtab */ 7103 .sh_type = SHT_STRTAB, 7104 .sh_offset = offsetof(struct ElfImage, str), 7105 .sh_size = sizeof(img->str), 7106 } 7107 }, 7108 .sym = { 7109 [1] = { /* code_gen_buffer */ 7110 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 7111 .st_shndx = 1, 7112 } 7113 }, 7114 .di = { 7115 .len = sizeof(struct DebugInfo) - 4, 7116 .version = 2, 7117 .ptr_size = sizeof(void *), 7118 .cu_die = 1, 7119 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 7120 .fn_die = 2, 7121 .fn_name = "code_gen_buffer" 7122 }, 7123 .da = { 7124 1, /* abbrev number (the cu) */ 7125 0x11, 1, /* DW_TAG_compile_unit, has children */ 7126 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 7127 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 7128 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 7129 0, 0, /* end of abbrev */ 7130 2, /* abbrev number (the fn) */ 7131 0x2e, 0, /* DW_TAG_subprogram, no children */ 7132 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 7133 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 7134 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 7135 0, 0, /* end of abbrev */ 7136 0 /* no more abbrev */ 7137 }, 7138 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 7139 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 7140 }; 7141 7142 /* We only need a single jit entry; statically allocate it. */ 7143 static struct jit_code_entry one_entry; 7144 7145 uintptr_t buf = (uintptr_t)buf_ptr; 7146 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 7147 DebugFrameHeader *dfh; 7148 7149 img = g_malloc(img_size); 7150 *img = img_template; 7151 7152 img->phdr.p_vaddr = buf; 7153 img->phdr.p_paddr = buf; 7154 img->phdr.p_memsz = buf_size; 7155 7156 img->shdr[1].sh_name = find_string(img->str, ".text"); 7157 img->shdr[1].sh_addr = buf; 7158 img->shdr[1].sh_size = buf_size; 7159 7160 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 7161 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 7162 7163 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 7164 img->shdr[4].sh_size = debug_frame_size; 7165 7166 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 7167 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 7168 7169 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 7170 img->sym[1].st_value = buf; 7171 img->sym[1].st_size = buf_size; 7172 7173 img->di.cu_low_pc = buf; 7174 img->di.cu_high_pc = buf + buf_size; 7175 img->di.fn_low_pc = buf; 7176 img->di.fn_high_pc = buf + buf_size; 7177 7178 dfh = (DebugFrameHeader *)(img + 1); 7179 memcpy(dfh, debug_frame, debug_frame_size); 7180 dfh->fde.func_start = buf; 7181 dfh->fde.func_len = buf_size; 7182 7183 #ifdef DEBUG_JIT 7184 /* Enable this block to be able to debug the ELF image file creation. 7185 One can use readelf, objdump, or other inspection utilities. */ 7186 { 7187 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 7188 FILE *f = fopen(jit, "w+b"); 7189 if (f) { 7190 if (fwrite(img, img_size, 1, f) != img_size) { 7191 /* Avoid stupid unused return value warning for fwrite. */ 7192 } 7193 fclose(f); 7194 } 7195 } 7196 #endif 7197 7198 one_entry.symfile_addr = img; 7199 one_entry.symfile_size = img_size; 7200 7201 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 7202 __jit_debug_descriptor.relevant_entry = &one_entry; 7203 __jit_debug_descriptor.first_entry = &one_entry; 7204 __jit_debug_register_code(); 7205 } 7206 #else 7207 /* No support for the feature. Provide the entry point expected by exec.c, 7208 and implement the internal function we declared earlier. */ 7209 7210 static void tcg_register_jit_int(const void *buf, size_t size, 7211 const void *debug_frame, 7212 size_t debug_frame_size) 7213 { 7214 } 7215 7216 void tcg_register_jit(const void *buf, size_t buf_size) 7217 { 7218 } 7219 #endif /* ELF_HOST_MACHINE */ 7220 7221 #if !TCG_TARGET_MAYBE_vec 7222 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 7223 { 7224 g_assert_not_reached(); 7225 } 7226 #endif 7227