1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/cacheflush.h" 38 #include "qemu/cacheinfo.h" 39 #include "qemu/timer.h" 40 41 /* Note: the long term plan is to reduce the dependencies on the QEMU 42 CPU definitions. Currently they are used for qemu_ld/st 43 instructions */ 44 #define NO_CPU_IO_DEFS 45 46 #include "exec/exec-all.h" 47 #include "tcg/tcg-op.h" 48 49 #if UINTPTR_MAX == UINT32_MAX 50 # define ELF_CLASS ELFCLASS32 51 #else 52 # define ELF_CLASS ELFCLASS64 53 #endif 54 #if HOST_BIG_ENDIAN 55 # define ELF_DATA ELFDATA2MSB 56 #else 57 # define ELF_DATA ELFDATA2LSB 58 #endif 59 60 #include "elf.h" 61 #include "exec/log.h" 62 #include "tcg/tcg-ldst.h" 63 #include "tcg/tcg-temp-internal.h" 64 #include "tcg-internal.h" 65 #include "accel/tcg/perf.h" 66 #ifdef CONFIG_USER_ONLY 67 #include "exec/user/guest-base.h" 68 #endif 69 70 /* Forward declarations for functions declared in tcg-target.c.inc and 71 used here. */ 72 static void tcg_target_init(TCGContext *s); 73 static void tcg_target_qemu_prologue(TCGContext *s); 74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 75 intptr_t value, intptr_t addend); 76 77 /* The CIE and FDE header definitions will be common to all hosts. */ 78 typedef struct { 79 uint32_t len __attribute__((aligned((sizeof(void *))))); 80 uint32_t id; 81 uint8_t version; 82 char augmentation[1]; 83 uint8_t code_align; 84 uint8_t data_align; 85 uint8_t return_column; 86 } DebugFrameCIE; 87 88 typedef struct QEMU_PACKED { 89 uint32_t len __attribute__((aligned((sizeof(void *))))); 90 uint32_t cie_offset; 91 uintptr_t func_start; 92 uintptr_t func_len; 93 } DebugFrameFDEHeader; 94 95 typedef struct QEMU_PACKED { 96 DebugFrameCIE cie; 97 DebugFrameFDEHeader fde; 98 } DebugFrameHeader; 99 100 typedef struct TCGLabelQemuLdst { 101 bool is_ld; /* qemu_ld: true, qemu_st: false */ 102 MemOpIdx oi; 103 TCGType type; /* result type of a load */ 104 TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */ 105 TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */ 106 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 107 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 108 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 109 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 110 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 111 } TCGLabelQemuLdst; 112 113 static void tcg_register_jit_int(const void *buf, size_t size, 114 const void *debug_frame, 115 size_t debug_frame_size) 116 __attribute__((unused)); 117 118 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 119 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 120 intptr_t arg2); 121 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 122 static void tcg_out_movi(TCGContext *s, TCGType type, 123 TCGReg ret, tcg_target_long arg); 124 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 132 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 133 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 134 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 135 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 136 static void tcg_out_goto_tb(TCGContext *s, int which); 137 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 138 const TCGArg args[TCG_MAX_OP_ARGS], 139 const int const_args[TCG_MAX_OP_ARGS]); 140 #if TCG_TARGET_MAYBE_vec 141 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 142 TCGReg dst, TCGReg src); 143 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 144 TCGReg dst, TCGReg base, intptr_t offset); 145 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 146 TCGReg dst, int64_t arg); 147 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 148 unsigned vecl, unsigned vece, 149 const TCGArg args[TCG_MAX_OP_ARGS], 150 const int const_args[TCG_MAX_OP_ARGS]); 151 #else 152 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 153 TCGReg dst, TCGReg src) 154 { 155 g_assert_not_reached(); 156 } 157 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 158 TCGReg dst, TCGReg base, intptr_t offset) 159 { 160 g_assert_not_reached(); 161 } 162 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 163 TCGReg dst, int64_t arg) 164 { 165 g_assert_not_reached(); 166 } 167 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 168 unsigned vecl, unsigned vece, 169 const TCGArg args[TCG_MAX_OP_ARGS], 170 const int const_args[TCG_MAX_OP_ARGS]) 171 { 172 g_assert_not_reached(); 173 } 174 #endif 175 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 176 intptr_t arg2); 177 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 178 TCGReg base, intptr_t ofs); 179 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 180 const TCGHelperInfo *info); 181 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 182 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 183 #ifdef TCG_TARGET_NEED_LDST_LABELS 184 static int tcg_out_ldst_finalize(TCGContext *s); 185 #endif 186 187 typedef struct TCGLdstHelperParam { 188 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 189 unsigned ntmp; 190 int tmp[3]; 191 } TCGLdstHelperParam; 192 193 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 194 const TCGLdstHelperParam *p) 195 __attribute__((unused)); 196 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 197 bool load_sign, const TCGLdstHelperParam *p) 198 __attribute__((unused)); 199 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 200 const TCGLdstHelperParam *p) 201 __attribute__((unused)); 202 203 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 204 [MO_UB] = helper_ldub_mmu, 205 [MO_SB] = helper_ldsb_mmu, 206 [MO_UW] = helper_lduw_mmu, 207 [MO_SW] = helper_ldsw_mmu, 208 [MO_UL] = helper_ldul_mmu, 209 [MO_UQ] = helper_ldq_mmu, 210 #if TCG_TARGET_REG_BITS == 64 211 [MO_SL] = helper_ldsl_mmu, 212 [MO_128] = helper_ld16_mmu, 213 #endif 214 }; 215 216 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 217 [MO_8] = helper_stb_mmu, 218 [MO_16] = helper_stw_mmu, 219 [MO_32] = helper_stl_mmu, 220 [MO_64] = helper_stq_mmu, 221 #if TCG_TARGET_REG_BITS == 64 222 [MO_128] = helper_st16_mmu, 223 #endif 224 }; 225 226 typedef struct { 227 MemOp atom; /* lg2 bits of atomicity required */ 228 MemOp align; /* lg2 bits of alignment to use */ 229 } TCGAtomAlign; 230 231 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 232 MemOp host_atom, bool allow_two_ops) 233 __attribute__((unused)); 234 235 TCGContext tcg_init_ctx; 236 __thread TCGContext *tcg_ctx; 237 238 TCGContext **tcg_ctxs; 239 unsigned int tcg_cur_ctxs; 240 unsigned int tcg_max_ctxs; 241 TCGv_env cpu_env = 0; 242 const void *tcg_code_gen_epilogue; 243 uintptr_t tcg_splitwx_diff; 244 245 #ifndef CONFIG_TCG_INTERPRETER 246 tcg_prologue_fn *tcg_qemu_tb_exec; 247 #endif 248 249 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 250 static TCGRegSet tcg_target_call_clobber_regs; 251 252 #if TCG_TARGET_INSN_UNIT_SIZE == 1 253 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 254 { 255 *s->code_ptr++ = v; 256 } 257 258 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 259 uint8_t v) 260 { 261 *p = v; 262 } 263 #endif 264 265 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 266 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 267 { 268 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 269 *s->code_ptr++ = v; 270 } else { 271 tcg_insn_unit *p = s->code_ptr; 272 memcpy(p, &v, sizeof(v)); 273 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 274 } 275 } 276 277 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 278 uint16_t v) 279 { 280 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 281 *p = v; 282 } else { 283 memcpy(p, &v, sizeof(v)); 284 } 285 } 286 #endif 287 288 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 289 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 290 { 291 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 292 *s->code_ptr++ = v; 293 } else { 294 tcg_insn_unit *p = s->code_ptr; 295 memcpy(p, &v, sizeof(v)); 296 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 297 } 298 } 299 300 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 301 uint32_t v) 302 { 303 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 304 *p = v; 305 } else { 306 memcpy(p, &v, sizeof(v)); 307 } 308 } 309 #endif 310 311 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 312 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 313 { 314 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 315 *s->code_ptr++ = v; 316 } else { 317 tcg_insn_unit *p = s->code_ptr; 318 memcpy(p, &v, sizeof(v)); 319 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 320 } 321 } 322 323 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 324 uint64_t v) 325 { 326 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 327 *p = v; 328 } else { 329 memcpy(p, &v, sizeof(v)); 330 } 331 } 332 #endif 333 334 /* label relocation processing */ 335 336 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 337 TCGLabel *l, intptr_t addend) 338 { 339 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 340 341 r->type = type; 342 r->ptr = code_ptr; 343 r->addend = addend; 344 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 345 } 346 347 static void tcg_out_label(TCGContext *s, TCGLabel *l) 348 { 349 tcg_debug_assert(!l->has_value); 350 l->has_value = 1; 351 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 352 } 353 354 TCGLabel *gen_new_label(void) 355 { 356 TCGContext *s = tcg_ctx; 357 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 358 359 memset(l, 0, sizeof(TCGLabel)); 360 l->id = s->nb_labels++; 361 QSIMPLEQ_INIT(&l->branches); 362 QSIMPLEQ_INIT(&l->relocs); 363 364 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 365 366 return l; 367 } 368 369 static bool tcg_resolve_relocs(TCGContext *s) 370 { 371 TCGLabel *l; 372 373 QSIMPLEQ_FOREACH(l, &s->labels, next) { 374 TCGRelocation *r; 375 uintptr_t value = l->u.value; 376 377 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 378 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 379 return false; 380 } 381 } 382 } 383 return true; 384 } 385 386 static void set_jmp_reset_offset(TCGContext *s, int which) 387 { 388 /* 389 * We will check for overflow at the end of the opcode loop in 390 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 391 */ 392 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 393 } 394 395 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 396 { 397 /* 398 * We will check for overflow at the end of the opcode loop in 399 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 400 */ 401 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 402 } 403 404 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 405 { 406 /* 407 * Return the read-execute version of the pointer, for the benefit 408 * of any pc-relative addressing mode. 409 */ 410 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 411 } 412 413 /* Signal overflow, starting over with fewer guest insns. */ 414 static G_NORETURN 415 void tcg_raise_tb_overflow(TCGContext *s) 416 { 417 siglongjmp(s->jmp_trans, -2); 418 } 419 420 /* 421 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 422 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 423 * 424 * However, tcg_out_helper_load_slots reuses this field to hold an 425 * argument slot number (which may designate a argument register or an 426 * argument stack slot), converting to TCGReg once all arguments that 427 * are destined for the stack are processed. 428 */ 429 typedef struct TCGMovExtend { 430 unsigned dst; 431 TCGReg src; 432 TCGType dst_type; 433 TCGType src_type; 434 MemOp src_ext; 435 } TCGMovExtend; 436 437 /** 438 * tcg_out_movext -- move and extend 439 * @s: tcg context 440 * @dst_type: integral type for destination 441 * @dst: destination register 442 * @src_type: integral type for source 443 * @src_ext: extension to apply to source 444 * @src: source register 445 * 446 * Move or extend @src into @dst, depending on @src_ext and the types. 447 */ 448 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 449 TCGType src_type, MemOp src_ext, TCGReg src) 450 { 451 switch (src_ext) { 452 case MO_UB: 453 tcg_out_ext8u(s, dst, src); 454 break; 455 case MO_SB: 456 tcg_out_ext8s(s, dst_type, dst, src); 457 break; 458 case MO_UW: 459 tcg_out_ext16u(s, dst, src); 460 break; 461 case MO_SW: 462 tcg_out_ext16s(s, dst_type, dst, src); 463 break; 464 case MO_UL: 465 case MO_SL: 466 if (dst_type == TCG_TYPE_I32) { 467 if (src_type == TCG_TYPE_I32) { 468 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 469 } else { 470 tcg_out_extrl_i64_i32(s, dst, src); 471 } 472 } else if (src_type == TCG_TYPE_I32) { 473 if (src_ext & MO_SIGN) { 474 tcg_out_exts_i32_i64(s, dst, src); 475 } else { 476 tcg_out_extu_i32_i64(s, dst, src); 477 } 478 } else { 479 if (src_ext & MO_SIGN) { 480 tcg_out_ext32s(s, dst, src); 481 } else { 482 tcg_out_ext32u(s, dst, src); 483 } 484 } 485 break; 486 case MO_UQ: 487 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 488 if (dst_type == TCG_TYPE_I32) { 489 tcg_out_extrl_i64_i32(s, dst, src); 490 } else { 491 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 492 } 493 break; 494 default: 495 g_assert_not_reached(); 496 } 497 } 498 499 /* Minor variations on a theme, using a structure. */ 500 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 501 TCGReg src) 502 { 503 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 504 } 505 506 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 507 { 508 tcg_out_movext1_new_src(s, i, i->src); 509 } 510 511 /** 512 * tcg_out_movext2 -- move and extend two pair 513 * @s: tcg context 514 * @i1: first move description 515 * @i2: second move description 516 * @scratch: temporary register, or -1 for none 517 * 518 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 519 * between the sources and destinations. 520 */ 521 522 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 523 const TCGMovExtend *i2, int scratch) 524 { 525 TCGReg src1 = i1->src; 526 TCGReg src2 = i2->src; 527 528 if (i1->dst != src2) { 529 tcg_out_movext1(s, i1); 530 tcg_out_movext1(s, i2); 531 return; 532 } 533 if (i2->dst == src1) { 534 TCGType src1_type = i1->src_type; 535 TCGType src2_type = i2->src_type; 536 537 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 538 /* The data is now in the correct registers, now extend. */ 539 src1 = i2->src; 540 src2 = i1->src; 541 } else { 542 tcg_debug_assert(scratch >= 0); 543 tcg_out_mov(s, src1_type, scratch, src1); 544 src1 = scratch; 545 } 546 } 547 tcg_out_movext1_new_src(s, i2, src2); 548 tcg_out_movext1_new_src(s, i1, src1); 549 } 550 551 /** 552 * tcg_out_movext3 -- move and extend three pair 553 * @s: tcg context 554 * @i1: first move description 555 * @i2: second move description 556 * @i3: third move description 557 * @scratch: temporary register, or -1 for none 558 * 559 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 560 * between the sources and destinations. 561 */ 562 563 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 564 const TCGMovExtend *i2, const TCGMovExtend *i3, 565 int scratch) 566 { 567 TCGReg src1 = i1->src; 568 TCGReg src2 = i2->src; 569 TCGReg src3 = i3->src; 570 571 if (i1->dst != src2 && i1->dst != src3) { 572 tcg_out_movext1(s, i1); 573 tcg_out_movext2(s, i2, i3, scratch); 574 return; 575 } 576 if (i2->dst != src1 && i2->dst != src3) { 577 tcg_out_movext1(s, i2); 578 tcg_out_movext2(s, i1, i3, scratch); 579 return; 580 } 581 if (i3->dst != src1 && i3->dst != src2) { 582 tcg_out_movext1(s, i3); 583 tcg_out_movext2(s, i1, i2, scratch); 584 return; 585 } 586 587 /* 588 * There is a cycle. Since there are only 3 nodes, the cycle is 589 * either "clockwise" or "anti-clockwise", and can be solved with 590 * a single scratch or two xchg. 591 */ 592 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 593 /* "Clockwise" */ 594 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 595 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 596 /* The data is now in the correct registers, now extend. */ 597 tcg_out_movext1_new_src(s, i1, i1->dst); 598 tcg_out_movext1_new_src(s, i2, i2->dst); 599 tcg_out_movext1_new_src(s, i3, i3->dst); 600 } else { 601 tcg_debug_assert(scratch >= 0); 602 tcg_out_mov(s, i1->src_type, scratch, src1); 603 tcg_out_movext1(s, i3); 604 tcg_out_movext1(s, i2); 605 tcg_out_movext1_new_src(s, i1, scratch); 606 } 607 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 608 /* "Anti-clockwise" */ 609 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 610 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 611 /* The data is now in the correct registers, now extend. */ 612 tcg_out_movext1_new_src(s, i1, i1->dst); 613 tcg_out_movext1_new_src(s, i2, i2->dst); 614 tcg_out_movext1_new_src(s, i3, i3->dst); 615 } else { 616 tcg_debug_assert(scratch >= 0); 617 tcg_out_mov(s, i1->src_type, scratch, src1); 618 tcg_out_movext1(s, i2); 619 tcg_out_movext1(s, i3); 620 tcg_out_movext1_new_src(s, i1, scratch); 621 } 622 } else { 623 g_assert_not_reached(); 624 } 625 } 626 627 #define C_PFX1(P, A) P##A 628 #define C_PFX2(P, A, B) P##A##_##B 629 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 630 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 631 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 632 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 633 634 /* Define an enumeration for the various combinations. */ 635 636 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 637 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 638 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 639 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 640 641 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 642 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 643 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 644 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 645 646 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 647 648 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 649 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 650 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 651 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 652 653 typedef enum { 654 #include "tcg-target-con-set.h" 655 } TCGConstraintSetIndex; 656 657 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 658 659 #undef C_O0_I1 660 #undef C_O0_I2 661 #undef C_O0_I3 662 #undef C_O0_I4 663 #undef C_O1_I1 664 #undef C_O1_I2 665 #undef C_O1_I3 666 #undef C_O1_I4 667 #undef C_N1_I2 668 #undef C_O2_I1 669 #undef C_O2_I2 670 #undef C_O2_I3 671 #undef C_O2_I4 672 673 /* Put all of the constraint sets into an array, indexed by the enum. */ 674 675 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 676 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 677 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 678 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 679 680 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 681 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 682 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 683 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 684 685 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 686 687 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 688 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 689 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 690 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 691 692 static const TCGTargetOpDef constraint_sets[] = { 693 #include "tcg-target-con-set.h" 694 }; 695 696 697 #undef C_O0_I1 698 #undef C_O0_I2 699 #undef C_O0_I3 700 #undef C_O0_I4 701 #undef C_O1_I1 702 #undef C_O1_I2 703 #undef C_O1_I3 704 #undef C_O1_I4 705 #undef C_N1_I2 706 #undef C_O2_I1 707 #undef C_O2_I2 708 #undef C_O2_I3 709 #undef C_O2_I4 710 711 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 712 713 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 714 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 715 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 716 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 717 718 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 719 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 720 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 721 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 722 723 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 724 725 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 726 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 727 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 728 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 729 730 #include "tcg-target.c.inc" 731 732 static void alloc_tcg_plugin_context(TCGContext *s) 733 { 734 #ifdef CONFIG_PLUGIN 735 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 736 s->plugin_tb->insns = 737 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 738 #endif 739 } 740 741 /* 742 * All TCG threads except the parent (i.e. the one that called tcg_context_init 743 * and registered the target's TCG globals) must register with this function 744 * before initiating translation. 745 * 746 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 747 * of tcg_region_init() for the reasoning behind this. 748 * 749 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 750 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 751 * is not used anymore for translation once this function is called. 752 * 753 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 754 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 755 */ 756 #ifdef CONFIG_USER_ONLY 757 void tcg_register_thread(void) 758 { 759 tcg_ctx = &tcg_init_ctx; 760 } 761 #else 762 void tcg_register_thread(void) 763 { 764 TCGContext *s = g_malloc(sizeof(*s)); 765 unsigned int i, n; 766 767 *s = tcg_init_ctx; 768 769 /* Relink mem_base. */ 770 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 771 if (tcg_init_ctx.temps[i].mem_base) { 772 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 773 tcg_debug_assert(b >= 0 && b < n); 774 s->temps[i].mem_base = &s->temps[b]; 775 } 776 } 777 778 /* Claim an entry in tcg_ctxs */ 779 n = qatomic_fetch_inc(&tcg_cur_ctxs); 780 g_assert(n < tcg_max_ctxs); 781 qatomic_set(&tcg_ctxs[n], s); 782 783 if (n > 0) { 784 alloc_tcg_plugin_context(s); 785 tcg_region_initial_alloc(s); 786 } 787 788 tcg_ctx = s; 789 } 790 #endif /* !CONFIG_USER_ONLY */ 791 792 /* pool based memory allocation */ 793 void *tcg_malloc_internal(TCGContext *s, int size) 794 { 795 TCGPool *p; 796 int pool_size; 797 798 if (size > TCG_POOL_CHUNK_SIZE) { 799 /* big malloc: insert a new pool (XXX: could optimize) */ 800 p = g_malloc(sizeof(TCGPool) + size); 801 p->size = size; 802 p->next = s->pool_first_large; 803 s->pool_first_large = p; 804 return p->data; 805 } else { 806 p = s->pool_current; 807 if (!p) { 808 p = s->pool_first; 809 if (!p) 810 goto new_pool; 811 } else { 812 if (!p->next) { 813 new_pool: 814 pool_size = TCG_POOL_CHUNK_SIZE; 815 p = g_malloc(sizeof(TCGPool) + pool_size); 816 p->size = pool_size; 817 p->next = NULL; 818 if (s->pool_current) { 819 s->pool_current->next = p; 820 } else { 821 s->pool_first = p; 822 } 823 } else { 824 p = p->next; 825 } 826 } 827 } 828 s->pool_current = p; 829 s->pool_cur = p->data + size; 830 s->pool_end = p->data + p->size; 831 return p->data; 832 } 833 834 void tcg_pool_reset(TCGContext *s) 835 { 836 TCGPool *p, *t; 837 for (p = s->pool_first_large; p; p = t) { 838 t = p->next; 839 g_free(p); 840 } 841 s->pool_first_large = NULL; 842 s->pool_cur = s->pool_end = NULL; 843 s->pool_current = NULL; 844 } 845 846 #include "exec/helper-proto.h" 847 848 static TCGHelperInfo all_helpers[] = { 849 #include "exec/helper-tcg.h" 850 }; 851 static GHashTable *helper_table; 852 853 /* 854 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 855 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 856 * We only use these for layout in tcg_out_ld_helper_ret and 857 * tcg_out_st_helper_args, and share them between several of 858 * the helpers, with the end result that it's easier to build manually. 859 */ 860 861 #if TCG_TARGET_REG_BITS == 32 862 # define dh_typecode_ttl dh_typecode_i32 863 #else 864 # define dh_typecode_ttl dh_typecode_i64 865 #endif 866 867 static TCGHelperInfo info_helper_ld32_mmu = { 868 .flags = TCG_CALL_NO_WG, 869 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 870 | dh_typemask(env, 1) 871 | dh_typemask(i64, 2) /* uint64_t addr */ 872 | dh_typemask(i32, 3) /* unsigned oi */ 873 | dh_typemask(ptr, 4) /* uintptr_t ra */ 874 }; 875 876 static TCGHelperInfo info_helper_ld64_mmu = { 877 .flags = TCG_CALL_NO_WG, 878 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 879 | dh_typemask(env, 1) 880 | dh_typemask(i64, 2) /* uint64_t addr */ 881 | dh_typemask(i32, 3) /* unsigned oi */ 882 | dh_typemask(ptr, 4) /* uintptr_t ra */ 883 }; 884 885 static TCGHelperInfo info_helper_ld128_mmu = { 886 .flags = TCG_CALL_NO_WG, 887 .typemask = dh_typemask(i128, 0) /* return Int128 */ 888 | dh_typemask(env, 1) 889 | dh_typemask(i64, 2) /* uint64_t addr */ 890 | dh_typemask(i32, 3) /* unsigned oi */ 891 | dh_typemask(ptr, 4) /* uintptr_t ra */ 892 }; 893 894 static TCGHelperInfo info_helper_st32_mmu = { 895 .flags = TCG_CALL_NO_WG, 896 .typemask = dh_typemask(void, 0) 897 | dh_typemask(env, 1) 898 | dh_typemask(i64, 2) /* uint64_t addr */ 899 | dh_typemask(i32, 3) /* uint32_t data */ 900 | dh_typemask(i32, 4) /* unsigned oi */ 901 | dh_typemask(ptr, 5) /* uintptr_t ra */ 902 }; 903 904 static TCGHelperInfo info_helper_st64_mmu = { 905 .flags = TCG_CALL_NO_WG, 906 .typemask = dh_typemask(void, 0) 907 | dh_typemask(env, 1) 908 | dh_typemask(i64, 2) /* uint64_t addr */ 909 | dh_typemask(i64, 3) /* uint64_t data */ 910 | dh_typemask(i32, 4) /* unsigned oi */ 911 | dh_typemask(ptr, 5) /* uintptr_t ra */ 912 }; 913 914 static TCGHelperInfo info_helper_st128_mmu = { 915 .flags = TCG_CALL_NO_WG, 916 .typemask = dh_typemask(void, 0) 917 | dh_typemask(env, 1) 918 | dh_typemask(i64, 2) /* uint64_t addr */ 919 | dh_typemask(i128, 3) /* Int128 data */ 920 | dh_typemask(i32, 4) /* unsigned oi */ 921 | dh_typemask(ptr, 5) /* uintptr_t ra */ 922 }; 923 924 #ifdef CONFIG_TCG_INTERPRETER 925 static ffi_type *typecode_to_ffi(int argmask) 926 { 927 /* 928 * libffi does not support __int128_t, so we have forced Int128 929 * to use the structure definition instead of the builtin type. 930 */ 931 static ffi_type *ffi_type_i128_elements[3] = { 932 &ffi_type_uint64, 933 &ffi_type_uint64, 934 NULL 935 }; 936 static ffi_type ffi_type_i128 = { 937 .size = 16, 938 .alignment = __alignof__(Int128), 939 .type = FFI_TYPE_STRUCT, 940 .elements = ffi_type_i128_elements, 941 }; 942 943 switch (argmask) { 944 case dh_typecode_void: 945 return &ffi_type_void; 946 case dh_typecode_i32: 947 return &ffi_type_uint32; 948 case dh_typecode_s32: 949 return &ffi_type_sint32; 950 case dh_typecode_i64: 951 return &ffi_type_uint64; 952 case dh_typecode_s64: 953 return &ffi_type_sint64; 954 case dh_typecode_ptr: 955 return &ffi_type_pointer; 956 case dh_typecode_i128: 957 return &ffi_type_i128; 958 } 959 g_assert_not_reached(); 960 } 961 962 static void init_ffi_layouts(void) 963 { 964 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 965 GHashTable *ffi_table = g_hash_table_new(NULL, NULL); 966 967 for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 968 TCGHelperInfo *info = &all_helpers[i]; 969 unsigned typemask = info->typemask; 970 gpointer hash = (gpointer)(uintptr_t)typemask; 971 struct { 972 ffi_cif cif; 973 ffi_type *args[]; 974 } *ca; 975 ffi_status status; 976 int nargs; 977 ffi_cif *cif; 978 979 cif = g_hash_table_lookup(ffi_table, hash); 980 if (cif) { 981 info->cif = cif; 982 continue; 983 } 984 985 /* Ignoring the return type, find the last non-zero field. */ 986 nargs = 32 - clz32(typemask >> 3); 987 nargs = DIV_ROUND_UP(nargs, 3); 988 assert(nargs <= MAX_CALL_IARGS); 989 990 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 991 ca->cif.rtype = typecode_to_ffi(typemask & 7); 992 ca->cif.nargs = nargs; 993 994 if (nargs != 0) { 995 ca->cif.arg_types = ca->args; 996 for (int j = 0; j < nargs; ++j) { 997 int typecode = extract32(typemask, (j + 1) * 3, 3); 998 ca->args[j] = typecode_to_ffi(typecode); 999 } 1000 } 1001 1002 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1003 ca->cif.rtype, ca->cif.arg_types); 1004 assert(status == FFI_OK); 1005 1006 cif = &ca->cif; 1007 info->cif = cif; 1008 g_hash_table_insert(ffi_table, hash, (gpointer)cif); 1009 } 1010 1011 g_hash_table_destroy(ffi_table); 1012 } 1013 #endif /* CONFIG_TCG_INTERPRETER */ 1014 1015 static inline bool arg_slot_reg_p(unsigned arg_slot) 1016 { 1017 /* 1018 * Split the sizeof away from the comparison to avoid Werror from 1019 * "unsigned < 0 is always false", when iarg_regs is empty. 1020 */ 1021 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1022 return arg_slot < nreg; 1023 } 1024 1025 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1026 { 1027 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1028 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1029 1030 tcg_debug_assert(stk_slot < max); 1031 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1032 } 1033 1034 typedef struct TCGCumulativeArgs { 1035 int arg_idx; /* tcg_gen_callN args[] */ 1036 int info_in_idx; /* TCGHelperInfo in[] */ 1037 int arg_slot; /* regs+stack slot */ 1038 int ref_slot; /* stack slots for references */ 1039 } TCGCumulativeArgs; 1040 1041 static void layout_arg_even(TCGCumulativeArgs *cum) 1042 { 1043 cum->arg_slot += cum->arg_slot & 1; 1044 } 1045 1046 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1047 TCGCallArgumentKind kind) 1048 { 1049 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1050 1051 *loc = (TCGCallArgumentLoc){ 1052 .kind = kind, 1053 .arg_idx = cum->arg_idx, 1054 .arg_slot = cum->arg_slot, 1055 }; 1056 cum->info_in_idx++; 1057 cum->arg_slot++; 1058 } 1059 1060 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1061 TCGHelperInfo *info, int n) 1062 { 1063 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1064 1065 for (int i = 0; i < n; ++i) { 1066 /* Layout all using the same arg_idx, adjusting the subindex. */ 1067 loc[i] = (TCGCallArgumentLoc){ 1068 .kind = TCG_CALL_ARG_NORMAL, 1069 .arg_idx = cum->arg_idx, 1070 .tmp_subindex = i, 1071 .arg_slot = cum->arg_slot + i, 1072 }; 1073 } 1074 cum->info_in_idx += n; 1075 cum->arg_slot += n; 1076 } 1077 1078 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1079 { 1080 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1081 int n = 128 / TCG_TARGET_REG_BITS; 1082 1083 /* The first subindex carries the pointer. */ 1084 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1085 1086 /* 1087 * The callee is allowed to clobber memory associated with 1088 * structure pass by-reference. Therefore we must make copies. 1089 * Allocate space from "ref_slot", which will be adjusted to 1090 * follow the parameters on the stack. 1091 */ 1092 loc[0].ref_slot = cum->ref_slot; 1093 1094 /* 1095 * Subsequent words also go into the reference slot, but 1096 * do not accumulate into the regular arguments. 1097 */ 1098 for (int i = 1; i < n; ++i) { 1099 loc[i] = (TCGCallArgumentLoc){ 1100 .kind = TCG_CALL_ARG_BY_REF_N, 1101 .arg_idx = cum->arg_idx, 1102 .tmp_subindex = i, 1103 .ref_slot = cum->ref_slot + i, 1104 }; 1105 } 1106 cum->info_in_idx += n; 1107 cum->ref_slot += n; 1108 } 1109 1110 static void init_call_layout(TCGHelperInfo *info) 1111 { 1112 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1113 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1114 unsigned typemask = info->typemask; 1115 unsigned typecode; 1116 TCGCumulativeArgs cum = { }; 1117 1118 /* 1119 * Parse and place any function return value. 1120 */ 1121 typecode = typemask & 7; 1122 switch (typecode) { 1123 case dh_typecode_void: 1124 info->nr_out = 0; 1125 break; 1126 case dh_typecode_i32: 1127 case dh_typecode_s32: 1128 case dh_typecode_ptr: 1129 info->nr_out = 1; 1130 info->out_kind = TCG_CALL_RET_NORMAL; 1131 break; 1132 case dh_typecode_i64: 1133 case dh_typecode_s64: 1134 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1135 info->out_kind = TCG_CALL_RET_NORMAL; 1136 /* Query the last register now to trigger any assert early. */ 1137 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1138 break; 1139 case dh_typecode_i128: 1140 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1141 info->out_kind = TCG_TARGET_CALL_RET_I128; 1142 switch (TCG_TARGET_CALL_RET_I128) { 1143 case TCG_CALL_RET_NORMAL: 1144 /* Query the last register now to trigger any assert early. */ 1145 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1146 break; 1147 case TCG_CALL_RET_BY_VEC: 1148 /* Query the single register now to trigger any assert early. */ 1149 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1150 break; 1151 case TCG_CALL_RET_BY_REF: 1152 /* 1153 * Allocate the first argument to the output. 1154 * We don't need to store this anywhere, just make it 1155 * unavailable for use in the input loop below. 1156 */ 1157 cum.arg_slot = 1; 1158 break; 1159 default: 1160 qemu_build_not_reached(); 1161 } 1162 break; 1163 default: 1164 g_assert_not_reached(); 1165 } 1166 1167 /* 1168 * Parse and place function arguments. 1169 */ 1170 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1171 TCGCallArgumentKind kind; 1172 TCGType type; 1173 1174 typecode = typemask & 7; 1175 switch (typecode) { 1176 case dh_typecode_i32: 1177 case dh_typecode_s32: 1178 type = TCG_TYPE_I32; 1179 break; 1180 case dh_typecode_i64: 1181 case dh_typecode_s64: 1182 type = TCG_TYPE_I64; 1183 break; 1184 case dh_typecode_ptr: 1185 type = TCG_TYPE_PTR; 1186 break; 1187 case dh_typecode_i128: 1188 type = TCG_TYPE_I128; 1189 break; 1190 default: 1191 g_assert_not_reached(); 1192 } 1193 1194 switch (type) { 1195 case TCG_TYPE_I32: 1196 switch (TCG_TARGET_CALL_ARG_I32) { 1197 case TCG_CALL_ARG_EVEN: 1198 layout_arg_even(&cum); 1199 /* fall through */ 1200 case TCG_CALL_ARG_NORMAL: 1201 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1202 break; 1203 case TCG_CALL_ARG_EXTEND: 1204 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1205 layout_arg_1(&cum, info, kind); 1206 break; 1207 default: 1208 qemu_build_not_reached(); 1209 } 1210 break; 1211 1212 case TCG_TYPE_I64: 1213 switch (TCG_TARGET_CALL_ARG_I64) { 1214 case TCG_CALL_ARG_EVEN: 1215 layout_arg_even(&cum); 1216 /* fall through */ 1217 case TCG_CALL_ARG_NORMAL: 1218 if (TCG_TARGET_REG_BITS == 32) { 1219 layout_arg_normal_n(&cum, info, 2); 1220 } else { 1221 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1222 } 1223 break; 1224 default: 1225 qemu_build_not_reached(); 1226 } 1227 break; 1228 1229 case TCG_TYPE_I128: 1230 switch (TCG_TARGET_CALL_ARG_I128) { 1231 case TCG_CALL_ARG_EVEN: 1232 layout_arg_even(&cum); 1233 /* fall through */ 1234 case TCG_CALL_ARG_NORMAL: 1235 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1236 break; 1237 case TCG_CALL_ARG_BY_REF: 1238 layout_arg_by_ref(&cum, info); 1239 break; 1240 default: 1241 qemu_build_not_reached(); 1242 } 1243 break; 1244 1245 default: 1246 g_assert_not_reached(); 1247 } 1248 } 1249 info->nr_in = cum.info_in_idx; 1250 1251 /* Validate that we didn't overrun the input array. */ 1252 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1253 /* Validate the backend has enough argument space. */ 1254 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1255 1256 /* 1257 * Relocate the "ref_slot" area to the end of the parameters. 1258 * Minimizing this stack offset helps code size for x86, 1259 * which has a signed 8-bit offset encoding. 1260 */ 1261 if (cum.ref_slot != 0) { 1262 int ref_base = 0; 1263 1264 if (cum.arg_slot > max_reg_slots) { 1265 int align = __alignof(Int128) / sizeof(tcg_target_long); 1266 1267 ref_base = cum.arg_slot - max_reg_slots; 1268 if (align > 1) { 1269 ref_base = ROUND_UP(ref_base, align); 1270 } 1271 } 1272 assert(ref_base + cum.ref_slot <= max_stk_slots); 1273 ref_base += max_reg_slots; 1274 1275 if (ref_base != 0) { 1276 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1277 TCGCallArgumentLoc *loc = &info->in[i]; 1278 switch (loc->kind) { 1279 case TCG_CALL_ARG_BY_REF: 1280 case TCG_CALL_ARG_BY_REF_N: 1281 loc->ref_slot += ref_base; 1282 break; 1283 default: 1284 break; 1285 } 1286 } 1287 } 1288 } 1289 } 1290 1291 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1292 static void process_op_defs(TCGContext *s); 1293 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1294 TCGReg reg, const char *name); 1295 1296 static void tcg_context_init(unsigned max_cpus) 1297 { 1298 TCGContext *s = &tcg_init_ctx; 1299 int op, total_args, n, i; 1300 TCGOpDef *def; 1301 TCGArgConstraint *args_ct; 1302 TCGTemp *ts; 1303 1304 memset(s, 0, sizeof(*s)); 1305 s->nb_globals = 0; 1306 1307 /* Count total number of arguments and allocate the corresponding 1308 space */ 1309 total_args = 0; 1310 for(op = 0; op < NB_OPS; op++) { 1311 def = &tcg_op_defs[op]; 1312 n = def->nb_iargs + def->nb_oargs; 1313 total_args += n; 1314 } 1315 1316 args_ct = g_new0(TCGArgConstraint, total_args); 1317 1318 for(op = 0; op < NB_OPS; op++) { 1319 def = &tcg_op_defs[op]; 1320 def->args_ct = args_ct; 1321 n = def->nb_iargs + def->nb_oargs; 1322 args_ct += n; 1323 } 1324 1325 /* Register helpers. */ 1326 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 1327 helper_table = g_hash_table_new(NULL, NULL); 1328 1329 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 1330 init_call_layout(&all_helpers[i]); 1331 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 1332 (gpointer)&all_helpers[i]); 1333 } 1334 1335 init_call_layout(&info_helper_ld32_mmu); 1336 init_call_layout(&info_helper_ld64_mmu); 1337 init_call_layout(&info_helper_ld128_mmu); 1338 init_call_layout(&info_helper_st32_mmu); 1339 init_call_layout(&info_helper_st64_mmu); 1340 init_call_layout(&info_helper_st128_mmu); 1341 1342 #ifdef CONFIG_TCG_INTERPRETER 1343 init_ffi_layouts(); 1344 #endif 1345 1346 tcg_target_init(s); 1347 process_op_defs(s); 1348 1349 /* Reverse the order of the saved registers, assuming they're all at 1350 the start of tcg_target_reg_alloc_order. */ 1351 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1352 int r = tcg_target_reg_alloc_order[n]; 1353 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1354 break; 1355 } 1356 } 1357 for (i = 0; i < n; ++i) { 1358 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1359 } 1360 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1361 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1362 } 1363 1364 alloc_tcg_plugin_context(s); 1365 1366 tcg_ctx = s; 1367 /* 1368 * In user-mode we simply share the init context among threads, since we 1369 * use a single region. See the documentation tcg_region_init() for the 1370 * reasoning behind this. 1371 * In softmmu we will have at most max_cpus TCG threads. 1372 */ 1373 #ifdef CONFIG_USER_ONLY 1374 tcg_ctxs = &tcg_ctx; 1375 tcg_cur_ctxs = 1; 1376 tcg_max_ctxs = 1; 1377 #else 1378 tcg_max_ctxs = max_cpus; 1379 tcg_ctxs = g_new0(TCGContext *, max_cpus); 1380 #endif 1381 1382 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1383 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1384 cpu_env = temp_tcgv_ptr(ts); 1385 } 1386 1387 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 1388 { 1389 tcg_context_init(max_cpus); 1390 tcg_region_init(tb_size, splitwx, max_cpus); 1391 } 1392 1393 /* 1394 * Allocate TBs right before their corresponding translated code, making 1395 * sure that TBs and code are on different cache lines. 1396 */ 1397 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1398 { 1399 uintptr_t align = qemu_icache_linesize; 1400 TranslationBlock *tb; 1401 void *next; 1402 1403 retry: 1404 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1405 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1406 1407 if (unlikely(next > s->code_gen_highwater)) { 1408 if (tcg_region_alloc(s)) { 1409 return NULL; 1410 } 1411 goto retry; 1412 } 1413 qatomic_set(&s->code_gen_ptr, next); 1414 s->data_gen_ptr = NULL; 1415 return tb; 1416 } 1417 1418 void tcg_prologue_init(TCGContext *s) 1419 { 1420 size_t prologue_size; 1421 1422 s->code_ptr = s->code_gen_ptr; 1423 s->code_buf = s->code_gen_ptr; 1424 s->data_gen_ptr = NULL; 1425 1426 #ifndef CONFIG_TCG_INTERPRETER 1427 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1428 #endif 1429 1430 #ifdef TCG_TARGET_NEED_POOL_LABELS 1431 s->pool_labels = NULL; 1432 #endif 1433 1434 qemu_thread_jit_write(); 1435 /* Generate the prologue. */ 1436 tcg_target_qemu_prologue(s); 1437 1438 #ifdef TCG_TARGET_NEED_POOL_LABELS 1439 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1440 { 1441 int result = tcg_out_pool_finalize(s); 1442 tcg_debug_assert(result == 0); 1443 } 1444 #endif 1445 1446 prologue_size = tcg_current_code_size(s); 1447 perf_report_prologue(s->code_gen_ptr, prologue_size); 1448 1449 #ifndef CONFIG_TCG_INTERPRETER 1450 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1451 (uintptr_t)s->code_buf, prologue_size); 1452 #endif 1453 1454 #ifdef DEBUG_DISAS 1455 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1456 FILE *logfile = qemu_log_trylock(); 1457 if (logfile) { 1458 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1459 if (s->data_gen_ptr) { 1460 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1461 size_t data_size = prologue_size - code_size; 1462 size_t i; 1463 1464 disas(logfile, s->code_gen_ptr, code_size); 1465 1466 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1467 if (sizeof(tcg_target_ulong) == 8) { 1468 fprintf(logfile, 1469 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1470 (uintptr_t)s->data_gen_ptr + i, 1471 *(uint64_t *)(s->data_gen_ptr + i)); 1472 } else { 1473 fprintf(logfile, 1474 "0x%08" PRIxPTR ": .long 0x%08x\n", 1475 (uintptr_t)s->data_gen_ptr + i, 1476 *(uint32_t *)(s->data_gen_ptr + i)); 1477 } 1478 } 1479 } else { 1480 disas(logfile, s->code_gen_ptr, prologue_size); 1481 } 1482 fprintf(logfile, "\n"); 1483 qemu_log_unlock(logfile); 1484 } 1485 } 1486 #endif 1487 1488 #ifndef CONFIG_TCG_INTERPRETER 1489 /* 1490 * Assert that goto_ptr is implemented completely, setting an epilogue. 1491 * For tci, we use NULL as the signal to return from the interpreter, 1492 * so skip this check. 1493 */ 1494 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1495 #endif 1496 1497 tcg_region_prologue_set(s); 1498 } 1499 1500 void tcg_func_start(TCGContext *s) 1501 { 1502 tcg_pool_reset(s); 1503 s->nb_temps = s->nb_globals; 1504 1505 /* No temps have been previously allocated for size or locality. */ 1506 memset(s->free_temps, 0, sizeof(s->free_temps)); 1507 1508 /* No constant temps have been previously allocated. */ 1509 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1510 if (s->const_table[i]) { 1511 g_hash_table_remove_all(s->const_table[i]); 1512 } 1513 } 1514 1515 s->nb_ops = 0; 1516 s->nb_labels = 0; 1517 s->current_frame_offset = s->frame_start; 1518 1519 #ifdef CONFIG_DEBUG_TCG 1520 s->goto_tb_issue_mask = 0; 1521 #endif 1522 1523 QTAILQ_INIT(&s->ops); 1524 QTAILQ_INIT(&s->free_ops); 1525 QSIMPLEQ_INIT(&s->labels); 1526 1527 tcg_debug_assert(s->addr_type == TCG_TYPE_I32 || 1528 s->addr_type == TCG_TYPE_I64); 1529 } 1530 1531 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1532 { 1533 int n = s->nb_temps++; 1534 1535 if (n >= TCG_MAX_TEMPS) { 1536 tcg_raise_tb_overflow(s); 1537 } 1538 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1539 } 1540 1541 static TCGTemp *tcg_global_alloc(TCGContext *s) 1542 { 1543 TCGTemp *ts; 1544 1545 tcg_debug_assert(s->nb_globals == s->nb_temps); 1546 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1547 s->nb_globals++; 1548 ts = tcg_temp_alloc(s); 1549 ts->kind = TEMP_GLOBAL; 1550 1551 return ts; 1552 } 1553 1554 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1555 TCGReg reg, const char *name) 1556 { 1557 TCGTemp *ts; 1558 1559 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1560 1561 ts = tcg_global_alloc(s); 1562 ts->base_type = type; 1563 ts->type = type; 1564 ts->kind = TEMP_FIXED; 1565 ts->reg = reg; 1566 ts->name = name; 1567 tcg_regset_set_reg(s->reserved_regs, reg); 1568 1569 return ts; 1570 } 1571 1572 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1573 { 1574 s->frame_start = start; 1575 s->frame_end = start + size; 1576 s->frame_temp 1577 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1578 } 1579 1580 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1581 intptr_t offset, const char *name) 1582 { 1583 TCGContext *s = tcg_ctx; 1584 TCGTemp *base_ts = tcgv_ptr_temp(base); 1585 TCGTemp *ts = tcg_global_alloc(s); 1586 int indirect_reg = 0; 1587 1588 switch (base_ts->kind) { 1589 case TEMP_FIXED: 1590 break; 1591 case TEMP_GLOBAL: 1592 /* We do not support double-indirect registers. */ 1593 tcg_debug_assert(!base_ts->indirect_reg); 1594 base_ts->indirect_base = 1; 1595 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1596 ? 2 : 1); 1597 indirect_reg = 1; 1598 break; 1599 default: 1600 g_assert_not_reached(); 1601 } 1602 1603 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1604 TCGTemp *ts2 = tcg_global_alloc(s); 1605 char buf[64]; 1606 1607 ts->base_type = TCG_TYPE_I64; 1608 ts->type = TCG_TYPE_I32; 1609 ts->indirect_reg = indirect_reg; 1610 ts->mem_allocated = 1; 1611 ts->mem_base = base_ts; 1612 ts->mem_offset = offset; 1613 pstrcpy(buf, sizeof(buf), name); 1614 pstrcat(buf, sizeof(buf), "_0"); 1615 ts->name = strdup(buf); 1616 1617 tcg_debug_assert(ts2 == ts + 1); 1618 ts2->base_type = TCG_TYPE_I64; 1619 ts2->type = TCG_TYPE_I32; 1620 ts2->indirect_reg = indirect_reg; 1621 ts2->mem_allocated = 1; 1622 ts2->mem_base = base_ts; 1623 ts2->mem_offset = offset + 4; 1624 ts2->temp_subindex = 1; 1625 pstrcpy(buf, sizeof(buf), name); 1626 pstrcat(buf, sizeof(buf), "_1"); 1627 ts2->name = strdup(buf); 1628 } else { 1629 ts->base_type = type; 1630 ts->type = type; 1631 ts->indirect_reg = indirect_reg; 1632 ts->mem_allocated = 1; 1633 ts->mem_base = base_ts; 1634 ts->mem_offset = offset; 1635 ts->name = name; 1636 } 1637 return ts; 1638 } 1639 1640 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1641 { 1642 TCGContext *s = tcg_ctx; 1643 TCGTemp *ts; 1644 int n; 1645 1646 if (kind == TEMP_EBB) { 1647 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1648 1649 if (idx < TCG_MAX_TEMPS) { 1650 /* There is already an available temp with the right type. */ 1651 clear_bit(idx, s->free_temps[type].l); 1652 1653 ts = &s->temps[idx]; 1654 ts->temp_allocated = 1; 1655 tcg_debug_assert(ts->base_type == type); 1656 tcg_debug_assert(ts->kind == kind); 1657 return ts; 1658 } 1659 } else { 1660 tcg_debug_assert(kind == TEMP_TB); 1661 } 1662 1663 switch (type) { 1664 case TCG_TYPE_I32: 1665 case TCG_TYPE_V64: 1666 case TCG_TYPE_V128: 1667 case TCG_TYPE_V256: 1668 n = 1; 1669 break; 1670 case TCG_TYPE_I64: 1671 n = 64 / TCG_TARGET_REG_BITS; 1672 break; 1673 case TCG_TYPE_I128: 1674 n = 128 / TCG_TARGET_REG_BITS; 1675 break; 1676 default: 1677 g_assert_not_reached(); 1678 } 1679 1680 ts = tcg_temp_alloc(s); 1681 ts->base_type = type; 1682 ts->temp_allocated = 1; 1683 ts->kind = kind; 1684 1685 if (n == 1) { 1686 ts->type = type; 1687 } else { 1688 ts->type = TCG_TYPE_REG; 1689 1690 for (int i = 1; i < n; ++i) { 1691 TCGTemp *ts2 = tcg_temp_alloc(s); 1692 1693 tcg_debug_assert(ts2 == ts + i); 1694 ts2->base_type = type; 1695 ts2->type = TCG_TYPE_REG; 1696 ts2->temp_allocated = 1; 1697 ts2->temp_subindex = i; 1698 ts2->kind = kind; 1699 } 1700 } 1701 return ts; 1702 } 1703 1704 TCGv_vec tcg_temp_new_vec(TCGType type) 1705 { 1706 TCGTemp *t; 1707 1708 #ifdef CONFIG_DEBUG_TCG 1709 switch (type) { 1710 case TCG_TYPE_V64: 1711 assert(TCG_TARGET_HAS_v64); 1712 break; 1713 case TCG_TYPE_V128: 1714 assert(TCG_TARGET_HAS_v128); 1715 break; 1716 case TCG_TYPE_V256: 1717 assert(TCG_TARGET_HAS_v256); 1718 break; 1719 default: 1720 g_assert_not_reached(); 1721 } 1722 #endif 1723 1724 t = tcg_temp_new_internal(type, TEMP_EBB); 1725 return temp_tcgv_vec(t); 1726 } 1727 1728 /* Create a new temp of the same type as an existing temp. */ 1729 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1730 { 1731 TCGTemp *t = tcgv_vec_temp(match); 1732 1733 tcg_debug_assert(t->temp_allocated != 0); 1734 1735 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 1736 return temp_tcgv_vec(t); 1737 } 1738 1739 void tcg_temp_free_internal(TCGTemp *ts) 1740 { 1741 TCGContext *s = tcg_ctx; 1742 1743 switch (ts->kind) { 1744 case TEMP_CONST: 1745 case TEMP_TB: 1746 /* Silently ignore free. */ 1747 break; 1748 case TEMP_EBB: 1749 tcg_debug_assert(ts->temp_allocated != 0); 1750 ts->temp_allocated = 0; 1751 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 1752 break; 1753 default: 1754 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 1755 g_assert_not_reached(); 1756 } 1757 } 1758 1759 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1760 { 1761 TCGContext *s = tcg_ctx; 1762 GHashTable *h = s->const_table[type]; 1763 TCGTemp *ts; 1764 1765 if (h == NULL) { 1766 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1767 s->const_table[type] = h; 1768 } 1769 1770 ts = g_hash_table_lookup(h, &val); 1771 if (ts == NULL) { 1772 int64_t *val_ptr; 1773 1774 ts = tcg_temp_alloc(s); 1775 1776 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1777 TCGTemp *ts2 = tcg_temp_alloc(s); 1778 1779 tcg_debug_assert(ts2 == ts + 1); 1780 1781 ts->base_type = TCG_TYPE_I64; 1782 ts->type = TCG_TYPE_I32; 1783 ts->kind = TEMP_CONST; 1784 ts->temp_allocated = 1; 1785 1786 ts2->base_type = TCG_TYPE_I64; 1787 ts2->type = TCG_TYPE_I32; 1788 ts2->kind = TEMP_CONST; 1789 ts2->temp_allocated = 1; 1790 ts2->temp_subindex = 1; 1791 1792 /* 1793 * Retain the full value of the 64-bit constant in the low 1794 * part, so that the hash table works. Actual uses will 1795 * truncate the value to the low part. 1796 */ 1797 ts[HOST_BIG_ENDIAN].val = val; 1798 ts[!HOST_BIG_ENDIAN].val = val >> 32; 1799 val_ptr = &ts[HOST_BIG_ENDIAN].val; 1800 } else { 1801 ts->base_type = type; 1802 ts->type = type; 1803 ts->kind = TEMP_CONST; 1804 ts->temp_allocated = 1; 1805 ts->val = val; 1806 val_ptr = &ts->val; 1807 } 1808 g_hash_table_insert(h, val_ptr, ts); 1809 } 1810 1811 return ts; 1812 } 1813 1814 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1815 { 1816 val = dup_const(vece, val); 1817 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1818 } 1819 1820 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1821 { 1822 TCGTemp *t = tcgv_vec_temp(match); 1823 1824 tcg_debug_assert(t->temp_allocated != 0); 1825 return tcg_constant_vec(t->base_type, vece, val); 1826 } 1827 1828 /* Return true if OP may appear in the opcode stream. 1829 Test the runtime variable that controls each opcode. */ 1830 bool tcg_op_supported(TCGOpcode op) 1831 { 1832 const bool have_vec 1833 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1834 1835 switch (op) { 1836 case INDEX_op_discard: 1837 case INDEX_op_set_label: 1838 case INDEX_op_call: 1839 case INDEX_op_br: 1840 case INDEX_op_mb: 1841 case INDEX_op_insn_start: 1842 case INDEX_op_exit_tb: 1843 case INDEX_op_goto_tb: 1844 case INDEX_op_goto_ptr: 1845 case INDEX_op_qemu_ld_a32_i32: 1846 case INDEX_op_qemu_ld_a64_i32: 1847 case INDEX_op_qemu_st_a32_i32: 1848 case INDEX_op_qemu_st_a64_i32: 1849 case INDEX_op_qemu_ld_a32_i64: 1850 case INDEX_op_qemu_ld_a64_i64: 1851 case INDEX_op_qemu_st_a32_i64: 1852 case INDEX_op_qemu_st_a64_i64: 1853 return true; 1854 1855 case INDEX_op_qemu_st8_a32_i32: 1856 case INDEX_op_qemu_st8_a64_i32: 1857 return TCG_TARGET_HAS_qemu_st8_i32; 1858 1859 case INDEX_op_qemu_ld_a32_i128: 1860 case INDEX_op_qemu_ld_a64_i128: 1861 case INDEX_op_qemu_st_a32_i128: 1862 case INDEX_op_qemu_st_a64_i128: 1863 return TCG_TARGET_HAS_qemu_ldst_i128; 1864 1865 case INDEX_op_mov_i32: 1866 case INDEX_op_setcond_i32: 1867 case INDEX_op_brcond_i32: 1868 case INDEX_op_ld8u_i32: 1869 case INDEX_op_ld8s_i32: 1870 case INDEX_op_ld16u_i32: 1871 case INDEX_op_ld16s_i32: 1872 case INDEX_op_ld_i32: 1873 case INDEX_op_st8_i32: 1874 case INDEX_op_st16_i32: 1875 case INDEX_op_st_i32: 1876 case INDEX_op_add_i32: 1877 case INDEX_op_sub_i32: 1878 case INDEX_op_mul_i32: 1879 case INDEX_op_and_i32: 1880 case INDEX_op_or_i32: 1881 case INDEX_op_xor_i32: 1882 case INDEX_op_shl_i32: 1883 case INDEX_op_shr_i32: 1884 case INDEX_op_sar_i32: 1885 return true; 1886 1887 case INDEX_op_movcond_i32: 1888 return TCG_TARGET_HAS_movcond_i32; 1889 case INDEX_op_div_i32: 1890 case INDEX_op_divu_i32: 1891 return TCG_TARGET_HAS_div_i32; 1892 case INDEX_op_rem_i32: 1893 case INDEX_op_remu_i32: 1894 return TCG_TARGET_HAS_rem_i32; 1895 case INDEX_op_div2_i32: 1896 case INDEX_op_divu2_i32: 1897 return TCG_TARGET_HAS_div2_i32; 1898 case INDEX_op_rotl_i32: 1899 case INDEX_op_rotr_i32: 1900 return TCG_TARGET_HAS_rot_i32; 1901 case INDEX_op_deposit_i32: 1902 return TCG_TARGET_HAS_deposit_i32; 1903 case INDEX_op_extract_i32: 1904 return TCG_TARGET_HAS_extract_i32; 1905 case INDEX_op_sextract_i32: 1906 return TCG_TARGET_HAS_sextract_i32; 1907 case INDEX_op_extract2_i32: 1908 return TCG_TARGET_HAS_extract2_i32; 1909 case INDEX_op_add2_i32: 1910 return TCG_TARGET_HAS_add2_i32; 1911 case INDEX_op_sub2_i32: 1912 return TCG_TARGET_HAS_sub2_i32; 1913 case INDEX_op_mulu2_i32: 1914 return TCG_TARGET_HAS_mulu2_i32; 1915 case INDEX_op_muls2_i32: 1916 return TCG_TARGET_HAS_muls2_i32; 1917 case INDEX_op_muluh_i32: 1918 return TCG_TARGET_HAS_muluh_i32; 1919 case INDEX_op_mulsh_i32: 1920 return TCG_TARGET_HAS_mulsh_i32; 1921 case INDEX_op_ext8s_i32: 1922 return TCG_TARGET_HAS_ext8s_i32; 1923 case INDEX_op_ext16s_i32: 1924 return TCG_TARGET_HAS_ext16s_i32; 1925 case INDEX_op_ext8u_i32: 1926 return TCG_TARGET_HAS_ext8u_i32; 1927 case INDEX_op_ext16u_i32: 1928 return TCG_TARGET_HAS_ext16u_i32; 1929 case INDEX_op_bswap16_i32: 1930 return TCG_TARGET_HAS_bswap16_i32; 1931 case INDEX_op_bswap32_i32: 1932 return TCG_TARGET_HAS_bswap32_i32; 1933 case INDEX_op_not_i32: 1934 return TCG_TARGET_HAS_not_i32; 1935 case INDEX_op_neg_i32: 1936 return TCG_TARGET_HAS_neg_i32; 1937 case INDEX_op_andc_i32: 1938 return TCG_TARGET_HAS_andc_i32; 1939 case INDEX_op_orc_i32: 1940 return TCG_TARGET_HAS_orc_i32; 1941 case INDEX_op_eqv_i32: 1942 return TCG_TARGET_HAS_eqv_i32; 1943 case INDEX_op_nand_i32: 1944 return TCG_TARGET_HAS_nand_i32; 1945 case INDEX_op_nor_i32: 1946 return TCG_TARGET_HAS_nor_i32; 1947 case INDEX_op_clz_i32: 1948 return TCG_TARGET_HAS_clz_i32; 1949 case INDEX_op_ctz_i32: 1950 return TCG_TARGET_HAS_ctz_i32; 1951 case INDEX_op_ctpop_i32: 1952 return TCG_TARGET_HAS_ctpop_i32; 1953 1954 case INDEX_op_brcond2_i32: 1955 case INDEX_op_setcond2_i32: 1956 return TCG_TARGET_REG_BITS == 32; 1957 1958 case INDEX_op_mov_i64: 1959 case INDEX_op_setcond_i64: 1960 case INDEX_op_brcond_i64: 1961 case INDEX_op_ld8u_i64: 1962 case INDEX_op_ld8s_i64: 1963 case INDEX_op_ld16u_i64: 1964 case INDEX_op_ld16s_i64: 1965 case INDEX_op_ld32u_i64: 1966 case INDEX_op_ld32s_i64: 1967 case INDEX_op_ld_i64: 1968 case INDEX_op_st8_i64: 1969 case INDEX_op_st16_i64: 1970 case INDEX_op_st32_i64: 1971 case INDEX_op_st_i64: 1972 case INDEX_op_add_i64: 1973 case INDEX_op_sub_i64: 1974 case INDEX_op_mul_i64: 1975 case INDEX_op_and_i64: 1976 case INDEX_op_or_i64: 1977 case INDEX_op_xor_i64: 1978 case INDEX_op_shl_i64: 1979 case INDEX_op_shr_i64: 1980 case INDEX_op_sar_i64: 1981 case INDEX_op_ext_i32_i64: 1982 case INDEX_op_extu_i32_i64: 1983 return TCG_TARGET_REG_BITS == 64; 1984 1985 case INDEX_op_movcond_i64: 1986 return TCG_TARGET_HAS_movcond_i64; 1987 case INDEX_op_div_i64: 1988 case INDEX_op_divu_i64: 1989 return TCG_TARGET_HAS_div_i64; 1990 case INDEX_op_rem_i64: 1991 case INDEX_op_remu_i64: 1992 return TCG_TARGET_HAS_rem_i64; 1993 case INDEX_op_div2_i64: 1994 case INDEX_op_divu2_i64: 1995 return TCG_TARGET_HAS_div2_i64; 1996 case INDEX_op_rotl_i64: 1997 case INDEX_op_rotr_i64: 1998 return TCG_TARGET_HAS_rot_i64; 1999 case INDEX_op_deposit_i64: 2000 return TCG_TARGET_HAS_deposit_i64; 2001 case INDEX_op_extract_i64: 2002 return TCG_TARGET_HAS_extract_i64; 2003 case INDEX_op_sextract_i64: 2004 return TCG_TARGET_HAS_sextract_i64; 2005 case INDEX_op_extract2_i64: 2006 return TCG_TARGET_HAS_extract2_i64; 2007 case INDEX_op_extrl_i64_i32: 2008 return TCG_TARGET_HAS_extrl_i64_i32; 2009 case INDEX_op_extrh_i64_i32: 2010 return TCG_TARGET_HAS_extrh_i64_i32; 2011 case INDEX_op_ext8s_i64: 2012 return TCG_TARGET_HAS_ext8s_i64; 2013 case INDEX_op_ext16s_i64: 2014 return TCG_TARGET_HAS_ext16s_i64; 2015 case INDEX_op_ext32s_i64: 2016 return TCG_TARGET_HAS_ext32s_i64; 2017 case INDEX_op_ext8u_i64: 2018 return TCG_TARGET_HAS_ext8u_i64; 2019 case INDEX_op_ext16u_i64: 2020 return TCG_TARGET_HAS_ext16u_i64; 2021 case INDEX_op_ext32u_i64: 2022 return TCG_TARGET_HAS_ext32u_i64; 2023 case INDEX_op_bswap16_i64: 2024 return TCG_TARGET_HAS_bswap16_i64; 2025 case INDEX_op_bswap32_i64: 2026 return TCG_TARGET_HAS_bswap32_i64; 2027 case INDEX_op_bswap64_i64: 2028 return TCG_TARGET_HAS_bswap64_i64; 2029 case INDEX_op_not_i64: 2030 return TCG_TARGET_HAS_not_i64; 2031 case INDEX_op_neg_i64: 2032 return TCG_TARGET_HAS_neg_i64; 2033 case INDEX_op_andc_i64: 2034 return TCG_TARGET_HAS_andc_i64; 2035 case INDEX_op_orc_i64: 2036 return TCG_TARGET_HAS_orc_i64; 2037 case INDEX_op_eqv_i64: 2038 return TCG_TARGET_HAS_eqv_i64; 2039 case INDEX_op_nand_i64: 2040 return TCG_TARGET_HAS_nand_i64; 2041 case INDEX_op_nor_i64: 2042 return TCG_TARGET_HAS_nor_i64; 2043 case INDEX_op_clz_i64: 2044 return TCG_TARGET_HAS_clz_i64; 2045 case INDEX_op_ctz_i64: 2046 return TCG_TARGET_HAS_ctz_i64; 2047 case INDEX_op_ctpop_i64: 2048 return TCG_TARGET_HAS_ctpop_i64; 2049 case INDEX_op_add2_i64: 2050 return TCG_TARGET_HAS_add2_i64; 2051 case INDEX_op_sub2_i64: 2052 return TCG_TARGET_HAS_sub2_i64; 2053 case INDEX_op_mulu2_i64: 2054 return TCG_TARGET_HAS_mulu2_i64; 2055 case INDEX_op_muls2_i64: 2056 return TCG_TARGET_HAS_muls2_i64; 2057 case INDEX_op_muluh_i64: 2058 return TCG_TARGET_HAS_muluh_i64; 2059 case INDEX_op_mulsh_i64: 2060 return TCG_TARGET_HAS_mulsh_i64; 2061 2062 case INDEX_op_mov_vec: 2063 case INDEX_op_dup_vec: 2064 case INDEX_op_dupm_vec: 2065 case INDEX_op_ld_vec: 2066 case INDEX_op_st_vec: 2067 case INDEX_op_add_vec: 2068 case INDEX_op_sub_vec: 2069 case INDEX_op_and_vec: 2070 case INDEX_op_or_vec: 2071 case INDEX_op_xor_vec: 2072 case INDEX_op_cmp_vec: 2073 return have_vec; 2074 case INDEX_op_dup2_vec: 2075 return have_vec && TCG_TARGET_REG_BITS == 32; 2076 case INDEX_op_not_vec: 2077 return have_vec && TCG_TARGET_HAS_not_vec; 2078 case INDEX_op_neg_vec: 2079 return have_vec && TCG_TARGET_HAS_neg_vec; 2080 case INDEX_op_abs_vec: 2081 return have_vec && TCG_TARGET_HAS_abs_vec; 2082 case INDEX_op_andc_vec: 2083 return have_vec && TCG_TARGET_HAS_andc_vec; 2084 case INDEX_op_orc_vec: 2085 return have_vec && TCG_TARGET_HAS_orc_vec; 2086 case INDEX_op_nand_vec: 2087 return have_vec && TCG_TARGET_HAS_nand_vec; 2088 case INDEX_op_nor_vec: 2089 return have_vec && TCG_TARGET_HAS_nor_vec; 2090 case INDEX_op_eqv_vec: 2091 return have_vec && TCG_TARGET_HAS_eqv_vec; 2092 case INDEX_op_mul_vec: 2093 return have_vec && TCG_TARGET_HAS_mul_vec; 2094 case INDEX_op_shli_vec: 2095 case INDEX_op_shri_vec: 2096 case INDEX_op_sari_vec: 2097 return have_vec && TCG_TARGET_HAS_shi_vec; 2098 case INDEX_op_shls_vec: 2099 case INDEX_op_shrs_vec: 2100 case INDEX_op_sars_vec: 2101 return have_vec && TCG_TARGET_HAS_shs_vec; 2102 case INDEX_op_shlv_vec: 2103 case INDEX_op_shrv_vec: 2104 case INDEX_op_sarv_vec: 2105 return have_vec && TCG_TARGET_HAS_shv_vec; 2106 case INDEX_op_rotli_vec: 2107 return have_vec && TCG_TARGET_HAS_roti_vec; 2108 case INDEX_op_rotls_vec: 2109 return have_vec && TCG_TARGET_HAS_rots_vec; 2110 case INDEX_op_rotlv_vec: 2111 case INDEX_op_rotrv_vec: 2112 return have_vec && TCG_TARGET_HAS_rotv_vec; 2113 case INDEX_op_ssadd_vec: 2114 case INDEX_op_usadd_vec: 2115 case INDEX_op_sssub_vec: 2116 case INDEX_op_ussub_vec: 2117 return have_vec && TCG_TARGET_HAS_sat_vec; 2118 case INDEX_op_smin_vec: 2119 case INDEX_op_umin_vec: 2120 case INDEX_op_smax_vec: 2121 case INDEX_op_umax_vec: 2122 return have_vec && TCG_TARGET_HAS_minmax_vec; 2123 case INDEX_op_bitsel_vec: 2124 return have_vec && TCG_TARGET_HAS_bitsel_vec; 2125 case INDEX_op_cmpsel_vec: 2126 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 2127 2128 default: 2129 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 2130 return true; 2131 } 2132 } 2133 2134 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2135 2136 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 2137 { 2138 const TCGHelperInfo *info; 2139 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2140 int n_extend = 0; 2141 TCGOp *op; 2142 int i, n, pi = 0, total_args; 2143 2144 info = g_hash_table_lookup(helper_table, (gpointer)func); 2145 total_args = info->nr_out + info->nr_in + 2; 2146 op = tcg_op_alloc(INDEX_op_call, total_args); 2147 2148 #ifdef CONFIG_PLUGIN 2149 /* Flag helpers that may affect guest state */ 2150 if (tcg_ctx->plugin_insn && 2151 !(info->flags & TCG_CALL_PLUGIN) && 2152 !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2153 tcg_ctx->plugin_insn->calls_helpers = true; 2154 } 2155 #endif 2156 2157 TCGOP_CALLO(op) = n = info->nr_out; 2158 switch (n) { 2159 case 0: 2160 tcg_debug_assert(ret == NULL); 2161 break; 2162 case 1: 2163 tcg_debug_assert(ret != NULL); 2164 op->args[pi++] = temp_arg(ret); 2165 break; 2166 case 2: 2167 case 4: 2168 tcg_debug_assert(ret != NULL); 2169 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2170 tcg_debug_assert(ret->temp_subindex == 0); 2171 for (i = 0; i < n; ++i) { 2172 op->args[pi++] = temp_arg(ret + i); 2173 } 2174 break; 2175 default: 2176 g_assert_not_reached(); 2177 } 2178 2179 TCGOP_CALLI(op) = n = info->nr_in; 2180 for (i = 0; i < n; i++) { 2181 const TCGCallArgumentLoc *loc = &info->in[i]; 2182 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2183 2184 switch (loc->kind) { 2185 case TCG_CALL_ARG_NORMAL: 2186 case TCG_CALL_ARG_BY_REF: 2187 case TCG_CALL_ARG_BY_REF_N: 2188 op->args[pi++] = temp_arg(ts); 2189 break; 2190 2191 case TCG_CALL_ARG_EXTEND_U: 2192 case TCG_CALL_ARG_EXTEND_S: 2193 { 2194 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2195 TCGv_i32 orig = temp_tcgv_i32(ts); 2196 2197 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2198 tcg_gen_ext_i32_i64(temp, orig); 2199 } else { 2200 tcg_gen_extu_i32_i64(temp, orig); 2201 } 2202 op->args[pi++] = tcgv_i64_arg(temp); 2203 extend_free[n_extend++] = temp; 2204 } 2205 break; 2206 2207 default: 2208 g_assert_not_reached(); 2209 } 2210 } 2211 op->args[pi++] = (uintptr_t)func; 2212 op->args[pi++] = (uintptr_t)info; 2213 tcg_debug_assert(pi == total_args); 2214 2215 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2216 2217 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2218 for (i = 0; i < n_extend; ++i) { 2219 tcg_temp_free_i64(extend_free[i]); 2220 } 2221 } 2222 2223 static void tcg_reg_alloc_start(TCGContext *s) 2224 { 2225 int i, n; 2226 2227 for (i = 0, n = s->nb_temps; i < n; i++) { 2228 TCGTemp *ts = &s->temps[i]; 2229 TCGTempVal val = TEMP_VAL_MEM; 2230 2231 switch (ts->kind) { 2232 case TEMP_CONST: 2233 val = TEMP_VAL_CONST; 2234 break; 2235 case TEMP_FIXED: 2236 val = TEMP_VAL_REG; 2237 break; 2238 case TEMP_GLOBAL: 2239 break; 2240 case TEMP_EBB: 2241 val = TEMP_VAL_DEAD; 2242 /* fall through */ 2243 case TEMP_TB: 2244 ts->mem_allocated = 0; 2245 break; 2246 default: 2247 g_assert_not_reached(); 2248 } 2249 ts->val_type = val; 2250 } 2251 2252 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2253 } 2254 2255 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2256 TCGTemp *ts) 2257 { 2258 int idx = temp_idx(ts); 2259 2260 switch (ts->kind) { 2261 case TEMP_FIXED: 2262 case TEMP_GLOBAL: 2263 pstrcpy(buf, buf_size, ts->name); 2264 break; 2265 case TEMP_TB: 2266 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2267 break; 2268 case TEMP_EBB: 2269 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2270 break; 2271 case TEMP_CONST: 2272 switch (ts->type) { 2273 case TCG_TYPE_I32: 2274 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2275 break; 2276 #if TCG_TARGET_REG_BITS > 32 2277 case TCG_TYPE_I64: 2278 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2279 break; 2280 #endif 2281 case TCG_TYPE_V64: 2282 case TCG_TYPE_V128: 2283 case TCG_TYPE_V256: 2284 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2285 64 << (ts->type - TCG_TYPE_V64), ts->val); 2286 break; 2287 default: 2288 g_assert_not_reached(); 2289 } 2290 break; 2291 } 2292 return buf; 2293 } 2294 2295 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2296 int buf_size, TCGArg arg) 2297 { 2298 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2299 } 2300 2301 static const char * const cond_name[] = 2302 { 2303 [TCG_COND_NEVER] = "never", 2304 [TCG_COND_ALWAYS] = "always", 2305 [TCG_COND_EQ] = "eq", 2306 [TCG_COND_NE] = "ne", 2307 [TCG_COND_LT] = "lt", 2308 [TCG_COND_GE] = "ge", 2309 [TCG_COND_LE] = "le", 2310 [TCG_COND_GT] = "gt", 2311 [TCG_COND_LTU] = "ltu", 2312 [TCG_COND_GEU] = "geu", 2313 [TCG_COND_LEU] = "leu", 2314 [TCG_COND_GTU] = "gtu" 2315 }; 2316 2317 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2318 { 2319 [MO_UB] = "ub", 2320 [MO_SB] = "sb", 2321 [MO_LEUW] = "leuw", 2322 [MO_LESW] = "lesw", 2323 [MO_LEUL] = "leul", 2324 [MO_LESL] = "lesl", 2325 [MO_LEUQ] = "leq", 2326 [MO_BEUW] = "beuw", 2327 [MO_BESW] = "besw", 2328 [MO_BEUL] = "beul", 2329 [MO_BESL] = "besl", 2330 [MO_BEUQ] = "beq", 2331 [MO_128 + MO_BE] = "beo", 2332 [MO_128 + MO_LE] = "leo", 2333 }; 2334 2335 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2336 [MO_UNALN >> MO_ASHIFT] = "un+", 2337 [MO_ALIGN >> MO_ASHIFT] = "al+", 2338 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2339 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2340 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2341 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2342 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2343 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2344 }; 2345 2346 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2347 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2348 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2349 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2350 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2351 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2352 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2353 }; 2354 2355 static const char bswap_flag_name[][6] = { 2356 [TCG_BSWAP_IZ] = "iz", 2357 [TCG_BSWAP_OZ] = "oz", 2358 [TCG_BSWAP_OS] = "os", 2359 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2360 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2361 }; 2362 2363 static inline bool tcg_regset_single(TCGRegSet d) 2364 { 2365 return (d & (d - 1)) == 0; 2366 } 2367 2368 static inline TCGReg tcg_regset_first(TCGRegSet d) 2369 { 2370 if (TCG_TARGET_NB_REGS <= 32) { 2371 return ctz32(d); 2372 } else { 2373 return ctz64(d); 2374 } 2375 } 2376 2377 /* Return only the number of characters output -- no error return. */ 2378 #define ne_fprintf(...) \ 2379 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2380 2381 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2382 { 2383 char buf[128]; 2384 TCGOp *op; 2385 2386 QTAILQ_FOREACH(op, &s->ops, link) { 2387 int i, k, nb_oargs, nb_iargs, nb_cargs; 2388 const TCGOpDef *def; 2389 TCGOpcode c; 2390 int col = 0; 2391 2392 c = op->opc; 2393 def = &tcg_op_defs[c]; 2394 2395 if (c == INDEX_op_insn_start) { 2396 nb_oargs = 0; 2397 col += ne_fprintf(f, "\n ----"); 2398 2399 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2400 col += ne_fprintf(f, " %016" PRIx64, 2401 tcg_get_insn_start_param(op, i)); 2402 } 2403 } else if (c == INDEX_op_call) { 2404 const TCGHelperInfo *info = tcg_call_info(op); 2405 void *func = tcg_call_func(op); 2406 2407 /* variable number of arguments */ 2408 nb_oargs = TCGOP_CALLO(op); 2409 nb_iargs = TCGOP_CALLI(op); 2410 nb_cargs = def->nb_cargs; 2411 2412 col += ne_fprintf(f, " %s ", def->name); 2413 2414 /* 2415 * Print the function name from TCGHelperInfo, if available. 2416 * Note that plugins have a template function for the info, 2417 * but the actual function pointer comes from the plugin. 2418 */ 2419 if (func == info->func) { 2420 col += ne_fprintf(f, "%s", info->name); 2421 } else { 2422 col += ne_fprintf(f, "plugin(%p)", func); 2423 } 2424 2425 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2426 for (i = 0; i < nb_oargs; i++) { 2427 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2428 op->args[i])); 2429 } 2430 for (i = 0; i < nb_iargs; i++) { 2431 TCGArg arg = op->args[nb_oargs + i]; 2432 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2433 col += ne_fprintf(f, ",%s", t); 2434 } 2435 } else { 2436 col += ne_fprintf(f, " %s ", def->name); 2437 2438 nb_oargs = def->nb_oargs; 2439 nb_iargs = def->nb_iargs; 2440 nb_cargs = def->nb_cargs; 2441 2442 if (def->flags & TCG_OPF_VECTOR) { 2443 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 2444 8 << TCGOP_VECE(op)); 2445 } 2446 2447 k = 0; 2448 for (i = 0; i < nb_oargs; i++) { 2449 const char *sep = k ? "," : ""; 2450 col += ne_fprintf(f, "%s%s", sep, 2451 tcg_get_arg_str(s, buf, sizeof(buf), 2452 op->args[k++])); 2453 } 2454 for (i = 0; i < nb_iargs; i++) { 2455 const char *sep = k ? "," : ""; 2456 col += ne_fprintf(f, "%s%s", sep, 2457 tcg_get_arg_str(s, buf, sizeof(buf), 2458 op->args[k++])); 2459 } 2460 switch (c) { 2461 case INDEX_op_brcond_i32: 2462 case INDEX_op_setcond_i32: 2463 case INDEX_op_movcond_i32: 2464 case INDEX_op_brcond2_i32: 2465 case INDEX_op_setcond2_i32: 2466 case INDEX_op_brcond_i64: 2467 case INDEX_op_setcond_i64: 2468 case INDEX_op_movcond_i64: 2469 case INDEX_op_cmp_vec: 2470 case INDEX_op_cmpsel_vec: 2471 if (op->args[k] < ARRAY_SIZE(cond_name) 2472 && cond_name[op->args[k]]) { 2473 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2474 } else { 2475 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2476 } 2477 i = 1; 2478 break; 2479 case INDEX_op_qemu_ld_a32_i32: 2480 case INDEX_op_qemu_ld_a64_i32: 2481 case INDEX_op_qemu_st_a32_i32: 2482 case INDEX_op_qemu_st_a64_i32: 2483 case INDEX_op_qemu_st8_a32_i32: 2484 case INDEX_op_qemu_st8_a64_i32: 2485 case INDEX_op_qemu_ld_a32_i64: 2486 case INDEX_op_qemu_ld_a64_i64: 2487 case INDEX_op_qemu_st_a32_i64: 2488 case INDEX_op_qemu_st_a64_i64: 2489 case INDEX_op_qemu_ld_a32_i128: 2490 case INDEX_op_qemu_ld_a64_i128: 2491 case INDEX_op_qemu_st_a32_i128: 2492 case INDEX_op_qemu_st_a64_i128: 2493 { 2494 const char *s_al, *s_op, *s_at; 2495 MemOpIdx oi = op->args[k++]; 2496 MemOp op = get_memop(oi); 2497 unsigned ix = get_mmuidx(oi); 2498 2499 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2500 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2501 s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 2502 op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 2503 2504 /* If all fields are accounted for, print symbolically. */ 2505 if (!op && s_al && s_op && s_at) { 2506 col += ne_fprintf(f, ",%s%s%s,%u", 2507 s_at, s_al, s_op, ix); 2508 } else { 2509 op = get_memop(oi); 2510 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 2511 } 2512 i = 1; 2513 } 2514 break; 2515 case INDEX_op_bswap16_i32: 2516 case INDEX_op_bswap16_i64: 2517 case INDEX_op_bswap32_i32: 2518 case INDEX_op_bswap32_i64: 2519 case INDEX_op_bswap64_i64: 2520 { 2521 TCGArg flags = op->args[k]; 2522 const char *name = NULL; 2523 2524 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2525 name = bswap_flag_name[flags]; 2526 } 2527 if (name) { 2528 col += ne_fprintf(f, ",%s", name); 2529 } else { 2530 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2531 } 2532 i = k = 1; 2533 } 2534 break; 2535 default: 2536 i = 0; 2537 break; 2538 } 2539 switch (c) { 2540 case INDEX_op_set_label: 2541 case INDEX_op_br: 2542 case INDEX_op_brcond_i32: 2543 case INDEX_op_brcond_i64: 2544 case INDEX_op_brcond2_i32: 2545 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2546 arg_label(op->args[k])->id); 2547 i++, k++; 2548 break; 2549 case INDEX_op_mb: 2550 { 2551 TCGBar membar = op->args[k]; 2552 const char *b_op, *m_op; 2553 2554 switch (membar & TCG_BAR_SC) { 2555 case 0: 2556 b_op = "none"; 2557 break; 2558 case TCG_BAR_LDAQ: 2559 b_op = "acq"; 2560 break; 2561 case TCG_BAR_STRL: 2562 b_op = "rel"; 2563 break; 2564 case TCG_BAR_SC: 2565 b_op = "seq"; 2566 break; 2567 default: 2568 g_assert_not_reached(); 2569 } 2570 2571 switch (membar & TCG_MO_ALL) { 2572 case 0: 2573 m_op = "none"; 2574 break; 2575 case TCG_MO_LD_LD: 2576 m_op = "rr"; 2577 break; 2578 case TCG_MO_LD_ST: 2579 m_op = "rw"; 2580 break; 2581 case TCG_MO_ST_LD: 2582 m_op = "wr"; 2583 break; 2584 case TCG_MO_ST_ST: 2585 m_op = "ww"; 2586 break; 2587 case TCG_MO_LD_LD | TCG_MO_LD_ST: 2588 m_op = "rr+rw"; 2589 break; 2590 case TCG_MO_LD_LD | TCG_MO_ST_LD: 2591 m_op = "rr+wr"; 2592 break; 2593 case TCG_MO_LD_LD | TCG_MO_ST_ST: 2594 m_op = "rr+ww"; 2595 break; 2596 case TCG_MO_LD_ST | TCG_MO_ST_LD: 2597 m_op = "rw+wr"; 2598 break; 2599 case TCG_MO_LD_ST | TCG_MO_ST_ST: 2600 m_op = "rw+ww"; 2601 break; 2602 case TCG_MO_ST_LD | TCG_MO_ST_ST: 2603 m_op = "wr+ww"; 2604 break; 2605 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 2606 m_op = "rr+rw+wr"; 2607 break; 2608 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 2609 m_op = "rr+rw+ww"; 2610 break; 2611 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 2612 m_op = "rr+wr+ww"; 2613 break; 2614 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 2615 m_op = "rw+wr+ww"; 2616 break; 2617 case TCG_MO_ALL: 2618 m_op = "all"; 2619 break; 2620 default: 2621 g_assert_not_reached(); 2622 } 2623 2624 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 2625 i++, k++; 2626 } 2627 break; 2628 default: 2629 break; 2630 } 2631 for (; i < nb_cargs; i++, k++) { 2632 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 2633 op->args[k]); 2634 } 2635 } 2636 2637 if (have_prefs || op->life) { 2638 for (; col < 40; ++col) { 2639 putc(' ', f); 2640 } 2641 } 2642 2643 if (op->life) { 2644 unsigned life = op->life; 2645 2646 if (life & (SYNC_ARG * 3)) { 2647 ne_fprintf(f, " sync:"); 2648 for (i = 0; i < 2; ++i) { 2649 if (life & (SYNC_ARG << i)) { 2650 ne_fprintf(f, " %d", i); 2651 } 2652 } 2653 } 2654 life /= DEAD_ARG; 2655 if (life) { 2656 ne_fprintf(f, " dead:"); 2657 for (i = 0; life; ++i, life >>= 1) { 2658 if (life & 1) { 2659 ne_fprintf(f, " %d", i); 2660 } 2661 } 2662 } 2663 } 2664 2665 if (have_prefs) { 2666 for (i = 0; i < nb_oargs; ++i) { 2667 TCGRegSet set = output_pref(op, i); 2668 2669 if (i == 0) { 2670 ne_fprintf(f, " pref="); 2671 } else { 2672 ne_fprintf(f, ","); 2673 } 2674 if (set == 0) { 2675 ne_fprintf(f, "none"); 2676 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2677 ne_fprintf(f, "all"); 2678 #ifdef CONFIG_DEBUG_TCG 2679 } else if (tcg_regset_single(set)) { 2680 TCGReg reg = tcg_regset_first(set); 2681 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 2682 #endif 2683 } else if (TCG_TARGET_NB_REGS <= 32) { 2684 ne_fprintf(f, "0x%x", (uint32_t)set); 2685 } else { 2686 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 2687 } 2688 } 2689 } 2690 2691 putc('\n', f); 2692 } 2693 } 2694 2695 /* we give more priority to constraints with less registers */ 2696 static int get_constraint_priority(const TCGOpDef *def, int k) 2697 { 2698 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2699 int n = ctpop64(arg_ct->regs); 2700 2701 /* 2702 * Sort constraints of a single register first, which includes output 2703 * aliases (which must exactly match the input already allocated). 2704 */ 2705 if (n == 1 || arg_ct->oalias) { 2706 return INT_MAX; 2707 } 2708 2709 /* 2710 * Sort register pairs next, first then second immediately after. 2711 * Arbitrarily sort multiple pairs by the index of the first reg; 2712 * there shouldn't be many pairs. 2713 */ 2714 switch (arg_ct->pair) { 2715 case 1: 2716 case 3: 2717 return (k + 1) * 2; 2718 case 2: 2719 return (arg_ct->pair_index + 1) * 2 - 1; 2720 } 2721 2722 /* Finally, sort by decreasing register count. */ 2723 assert(n > 1); 2724 return -n; 2725 } 2726 2727 /* sort from highest priority to lowest */ 2728 static void sort_constraints(TCGOpDef *def, int start, int n) 2729 { 2730 int i, j; 2731 TCGArgConstraint *a = def->args_ct; 2732 2733 for (i = 0; i < n; i++) { 2734 a[start + i].sort_index = start + i; 2735 } 2736 if (n <= 1) { 2737 return; 2738 } 2739 for (i = 0; i < n - 1; i++) { 2740 for (j = i + 1; j < n; j++) { 2741 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2742 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2743 if (p1 < p2) { 2744 int tmp = a[start + i].sort_index; 2745 a[start + i].sort_index = a[start + j].sort_index; 2746 a[start + j].sort_index = tmp; 2747 } 2748 } 2749 } 2750 } 2751 2752 static void process_op_defs(TCGContext *s) 2753 { 2754 TCGOpcode op; 2755 2756 for (op = 0; op < NB_OPS; op++) { 2757 TCGOpDef *def = &tcg_op_defs[op]; 2758 const TCGTargetOpDef *tdefs; 2759 bool saw_alias_pair = false; 2760 int i, o, i2, o2, nb_args; 2761 2762 if (def->flags & TCG_OPF_NOT_PRESENT) { 2763 continue; 2764 } 2765 2766 nb_args = def->nb_iargs + def->nb_oargs; 2767 if (nb_args == 0) { 2768 continue; 2769 } 2770 2771 /* 2772 * Macro magic should make it impossible, but double-check that 2773 * the array index is in range. Since the signness of an enum 2774 * is implementation defined, force the result to unsigned. 2775 */ 2776 unsigned con_set = tcg_target_op_def(op); 2777 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2778 tdefs = &constraint_sets[con_set]; 2779 2780 for (i = 0; i < nb_args; i++) { 2781 const char *ct_str = tdefs->args_ct_str[i]; 2782 bool input_p = i >= def->nb_oargs; 2783 2784 /* Incomplete TCGTargetOpDef entry. */ 2785 tcg_debug_assert(ct_str != NULL); 2786 2787 switch (*ct_str) { 2788 case '0' ... '9': 2789 o = *ct_str - '0'; 2790 tcg_debug_assert(input_p); 2791 tcg_debug_assert(o < def->nb_oargs); 2792 tcg_debug_assert(def->args_ct[o].regs != 0); 2793 tcg_debug_assert(!def->args_ct[o].oalias); 2794 def->args_ct[i] = def->args_ct[o]; 2795 /* The output sets oalias. */ 2796 def->args_ct[o].oalias = 1; 2797 def->args_ct[o].alias_index = i; 2798 /* The input sets ialias. */ 2799 def->args_ct[i].ialias = 1; 2800 def->args_ct[i].alias_index = o; 2801 if (def->args_ct[i].pair) { 2802 saw_alias_pair = true; 2803 } 2804 tcg_debug_assert(ct_str[1] == '\0'); 2805 continue; 2806 2807 case '&': 2808 tcg_debug_assert(!input_p); 2809 def->args_ct[i].newreg = true; 2810 ct_str++; 2811 break; 2812 2813 case 'p': /* plus */ 2814 /* Allocate to the register after the previous. */ 2815 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2816 o = i - 1; 2817 tcg_debug_assert(!def->args_ct[o].pair); 2818 tcg_debug_assert(!def->args_ct[o].ct); 2819 def->args_ct[i] = (TCGArgConstraint){ 2820 .pair = 2, 2821 .pair_index = o, 2822 .regs = def->args_ct[o].regs << 1, 2823 }; 2824 def->args_ct[o].pair = 1; 2825 def->args_ct[o].pair_index = i; 2826 tcg_debug_assert(ct_str[1] == '\0'); 2827 continue; 2828 2829 case 'm': /* minus */ 2830 /* Allocate to the register before the previous. */ 2831 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2832 o = i - 1; 2833 tcg_debug_assert(!def->args_ct[o].pair); 2834 tcg_debug_assert(!def->args_ct[o].ct); 2835 def->args_ct[i] = (TCGArgConstraint){ 2836 .pair = 1, 2837 .pair_index = o, 2838 .regs = def->args_ct[o].regs >> 1, 2839 }; 2840 def->args_ct[o].pair = 2; 2841 def->args_ct[o].pair_index = i; 2842 tcg_debug_assert(ct_str[1] == '\0'); 2843 continue; 2844 } 2845 2846 do { 2847 switch (*ct_str) { 2848 case 'i': 2849 def->args_ct[i].ct |= TCG_CT_CONST; 2850 break; 2851 2852 /* Include all of the target-specific constraints. */ 2853 2854 #undef CONST 2855 #define CONST(CASE, MASK) \ 2856 case CASE: def->args_ct[i].ct |= MASK; break; 2857 #define REGS(CASE, MASK) \ 2858 case CASE: def->args_ct[i].regs |= MASK; break; 2859 2860 #include "tcg-target-con-str.h" 2861 2862 #undef REGS 2863 #undef CONST 2864 default: 2865 case '0' ... '9': 2866 case '&': 2867 case 'p': 2868 case 'm': 2869 /* Typo in TCGTargetOpDef constraint. */ 2870 g_assert_not_reached(); 2871 } 2872 } while (*++ct_str != '\0'); 2873 } 2874 2875 /* TCGTargetOpDef entry with too much information? */ 2876 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2877 2878 /* 2879 * Fix up output pairs that are aliased with inputs. 2880 * When we created the alias, we copied pair from the output. 2881 * There are three cases: 2882 * (1a) Pairs of inputs alias pairs of outputs. 2883 * (1b) One input aliases the first of a pair of outputs. 2884 * (2) One input aliases the second of a pair of outputs. 2885 * 2886 * Case 1a is handled by making sure that the pair_index'es are 2887 * properly updated so that they appear the same as a pair of inputs. 2888 * 2889 * Case 1b is handled by setting the pair_index of the input to 2890 * itself, simply so it doesn't point to an unrelated argument. 2891 * Since we don't encounter the "second" during the input allocation 2892 * phase, nothing happens with the second half of the input pair. 2893 * 2894 * Case 2 is handled by setting the second input to pair=3, the 2895 * first output to pair=3, and the pair_index'es to match. 2896 */ 2897 if (saw_alias_pair) { 2898 for (i = def->nb_oargs; i < nb_args; i++) { 2899 /* 2900 * Since [0-9pm] must be alone in the constraint string, 2901 * the only way they can both be set is if the pair comes 2902 * from the output alias. 2903 */ 2904 if (!def->args_ct[i].ialias) { 2905 continue; 2906 } 2907 switch (def->args_ct[i].pair) { 2908 case 0: 2909 break; 2910 case 1: 2911 o = def->args_ct[i].alias_index; 2912 o2 = def->args_ct[o].pair_index; 2913 tcg_debug_assert(def->args_ct[o].pair == 1); 2914 tcg_debug_assert(def->args_ct[o2].pair == 2); 2915 if (def->args_ct[o2].oalias) { 2916 /* Case 1a */ 2917 i2 = def->args_ct[o2].alias_index; 2918 tcg_debug_assert(def->args_ct[i2].pair == 2); 2919 def->args_ct[i2].pair_index = i; 2920 def->args_ct[i].pair_index = i2; 2921 } else { 2922 /* Case 1b */ 2923 def->args_ct[i].pair_index = i; 2924 } 2925 break; 2926 case 2: 2927 o = def->args_ct[i].alias_index; 2928 o2 = def->args_ct[o].pair_index; 2929 tcg_debug_assert(def->args_ct[o].pair == 2); 2930 tcg_debug_assert(def->args_ct[o2].pair == 1); 2931 if (def->args_ct[o2].oalias) { 2932 /* Case 1a */ 2933 i2 = def->args_ct[o2].alias_index; 2934 tcg_debug_assert(def->args_ct[i2].pair == 1); 2935 def->args_ct[i2].pair_index = i; 2936 def->args_ct[i].pair_index = i2; 2937 } else { 2938 /* Case 2 */ 2939 def->args_ct[i].pair = 3; 2940 def->args_ct[o2].pair = 3; 2941 def->args_ct[i].pair_index = o2; 2942 def->args_ct[o2].pair_index = i; 2943 } 2944 break; 2945 default: 2946 g_assert_not_reached(); 2947 } 2948 } 2949 } 2950 2951 /* sort the constraints (XXX: this is just an heuristic) */ 2952 sort_constraints(def, 0, def->nb_oargs); 2953 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2954 } 2955 } 2956 2957 static void remove_label_use(TCGOp *op, int idx) 2958 { 2959 TCGLabel *label = arg_label(op->args[idx]); 2960 TCGLabelUse *use; 2961 2962 QSIMPLEQ_FOREACH(use, &label->branches, next) { 2963 if (use->op == op) { 2964 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 2965 return; 2966 } 2967 } 2968 g_assert_not_reached(); 2969 } 2970 2971 void tcg_op_remove(TCGContext *s, TCGOp *op) 2972 { 2973 switch (op->opc) { 2974 case INDEX_op_br: 2975 remove_label_use(op, 0); 2976 break; 2977 case INDEX_op_brcond_i32: 2978 case INDEX_op_brcond_i64: 2979 remove_label_use(op, 3); 2980 break; 2981 case INDEX_op_brcond2_i32: 2982 remove_label_use(op, 5); 2983 break; 2984 default: 2985 break; 2986 } 2987 2988 QTAILQ_REMOVE(&s->ops, op, link); 2989 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2990 s->nb_ops--; 2991 2992 #ifdef CONFIG_PROFILER 2993 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2994 #endif 2995 } 2996 2997 void tcg_remove_ops_after(TCGOp *op) 2998 { 2999 TCGContext *s = tcg_ctx; 3000 3001 while (true) { 3002 TCGOp *last = tcg_last_op(); 3003 if (last == op) { 3004 return; 3005 } 3006 tcg_op_remove(s, last); 3007 } 3008 } 3009 3010 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3011 { 3012 TCGContext *s = tcg_ctx; 3013 TCGOp *op = NULL; 3014 3015 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3016 QTAILQ_FOREACH(op, &s->free_ops, link) { 3017 if (nargs <= op->nargs) { 3018 QTAILQ_REMOVE(&s->free_ops, op, link); 3019 nargs = op->nargs; 3020 goto found; 3021 } 3022 } 3023 } 3024 3025 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3026 nargs = MAX(4, nargs); 3027 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3028 3029 found: 3030 memset(op, 0, offsetof(TCGOp, link)); 3031 op->opc = opc; 3032 op->nargs = nargs; 3033 3034 /* Check for bitfield overflow. */ 3035 tcg_debug_assert(op->nargs == nargs); 3036 3037 s->nb_ops++; 3038 return op; 3039 } 3040 3041 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3042 { 3043 TCGOp *op = tcg_op_alloc(opc, nargs); 3044 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3045 return op; 3046 } 3047 3048 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3049 TCGOpcode opc, unsigned nargs) 3050 { 3051 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3052 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3053 return new_op; 3054 } 3055 3056 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3057 TCGOpcode opc, unsigned nargs) 3058 { 3059 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3060 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3061 return new_op; 3062 } 3063 3064 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3065 { 3066 TCGLabelUse *u; 3067 3068 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3069 TCGOp *op = u->op; 3070 switch (op->opc) { 3071 case INDEX_op_br: 3072 op->args[0] = label_arg(to); 3073 break; 3074 case INDEX_op_brcond_i32: 3075 case INDEX_op_brcond_i64: 3076 op->args[3] = label_arg(to); 3077 break; 3078 case INDEX_op_brcond2_i32: 3079 op->args[5] = label_arg(to); 3080 break; 3081 default: 3082 g_assert_not_reached(); 3083 } 3084 } 3085 3086 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3087 } 3088 3089 /* Reachable analysis : remove unreachable code. */ 3090 static void __attribute__((noinline)) 3091 reachable_code_pass(TCGContext *s) 3092 { 3093 TCGOp *op, *op_next, *op_prev; 3094 bool dead = false; 3095 3096 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3097 bool remove = dead; 3098 TCGLabel *label; 3099 3100 switch (op->opc) { 3101 case INDEX_op_set_label: 3102 label = arg_label(op->args[0]); 3103 3104 /* 3105 * Note that the first op in the TB is always a load, 3106 * so there is always something before a label. 3107 */ 3108 op_prev = QTAILQ_PREV(op, link); 3109 3110 /* 3111 * If we find two sequential labels, move all branches to 3112 * reference the second label and remove the first label. 3113 * Do this before branch to next optimization, so that the 3114 * middle label is out of the way. 3115 */ 3116 if (op_prev->opc == INDEX_op_set_label) { 3117 move_label_uses(label, arg_label(op_prev->args[0])); 3118 tcg_op_remove(s, op_prev); 3119 op_prev = QTAILQ_PREV(op, link); 3120 } 3121 3122 /* 3123 * Optimization can fold conditional branches to unconditional. 3124 * If we find a label which is preceded by an unconditional 3125 * branch to next, remove the branch. We couldn't do this when 3126 * processing the branch because any dead code between the branch 3127 * and label had not yet been removed. 3128 */ 3129 if (op_prev->opc == INDEX_op_br && 3130 label == arg_label(op_prev->args[0])) { 3131 tcg_op_remove(s, op_prev); 3132 /* Fall through means insns become live again. */ 3133 dead = false; 3134 } 3135 3136 if (QSIMPLEQ_EMPTY(&label->branches)) { 3137 /* 3138 * While there is an occasional backward branch, virtually 3139 * all branches generated by the translators are forward. 3140 * Which means that generally we will have already removed 3141 * all references to the label that will be, and there is 3142 * little to be gained by iterating. 3143 */ 3144 remove = true; 3145 } else { 3146 /* Once we see a label, insns become live again. */ 3147 dead = false; 3148 remove = false; 3149 } 3150 break; 3151 3152 case INDEX_op_br: 3153 case INDEX_op_exit_tb: 3154 case INDEX_op_goto_ptr: 3155 /* Unconditional branches; everything following is dead. */ 3156 dead = true; 3157 break; 3158 3159 case INDEX_op_call: 3160 /* Notice noreturn helper calls, raising exceptions. */ 3161 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3162 dead = true; 3163 } 3164 break; 3165 3166 case INDEX_op_insn_start: 3167 /* Never remove -- we need to keep these for unwind. */ 3168 remove = false; 3169 break; 3170 3171 default: 3172 break; 3173 } 3174 3175 if (remove) { 3176 tcg_op_remove(s, op); 3177 } 3178 } 3179 } 3180 3181 #define TS_DEAD 1 3182 #define TS_MEM 2 3183 3184 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3185 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3186 3187 /* For liveness_pass_1, the register preferences for a given temp. */ 3188 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3189 { 3190 return ts->state_ptr; 3191 } 3192 3193 /* For liveness_pass_1, reset the preferences for a given temp to the 3194 * maximal regset for its type. 3195 */ 3196 static inline void la_reset_pref(TCGTemp *ts) 3197 { 3198 *la_temp_pref(ts) 3199 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3200 } 3201 3202 /* liveness analysis: end of function: all temps are dead, and globals 3203 should be in memory. */ 3204 static void la_func_end(TCGContext *s, int ng, int nt) 3205 { 3206 int i; 3207 3208 for (i = 0; i < ng; ++i) { 3209 s->temps[i].state = TS_DEAD | TS_MEM; 3210 la_reset_pref(&s->temps[i]); 3211 } 3212 for (i = ng; i < nt; ++i) { 3213 s->temps[i].state = TS_DEAD; 3214 la_reset_pref(&s->temps[i]); 3215 } 3216 } 3217 3218 /* liveness analysis: end of basic block: all temps are dead, globals 3219 and local temps should be in memory. */ 3220 static void la_bb_end(TCGContext *s, int ng, int nt) 3221 { 3222 int i; 3223 3224 for (i = 0; i < nt; ++i) { 3225 TCGTemp *ts = &s->temps[i]; 3226 int state; 3227 3228 switch (ts->kind) { 3229 case TEMP_FIXED: 3230 case TEMP_GLOBAL: 3231 case TEMP_TB: 3232 state = TS_DEAD | TS_MEM; 3233 break; 3234 case TEMP_EBB: 3235 case TEMP_CONST: 3236 state = TS_DEAD; 3237 break; 3238 default: 3239 g_assert_not_reached(); 3240 } 3241 ts->state = state; 3242 la_reset_pref(ts); 3243 } 3244 } 3245 3246 /* liveness analysis: sync globals back to memory. */ 3247 static void la_global_sync(TCGContext *s, int ng) 3248 { 3249 int i; 3250 3251 for (i = 0; i < ng; ++i) { 3252 int state = s->temps[i].state; 3253 s->temps[i].state = state | TS_MEM; 3254 if (state == TS_DEAD) { 3255 /* If the global was previously dead, reset prefs. */ 3256 la_reset_pref(&s->temps[i]); 3257 } 3258 } 3259 } 3260 3261 /* 3262 * liveness analysis: conditional branch: all temps are dead unless 3263 * explicitly live-across-conditional-branch, globals and local temps 3264 * should be synced. 3265 */ 3266 static void la_bb_sync(TCGContext *s, int ng, int nt) 3267 { 3268 la_global_sync(s, ng); 3269 3270 for (int i = ng; i < nt; ++i) { 3271 TCGTemp *ts = &s->temps[i]; 3272 int state; 3273 3274 switch (ts->kind) { 3275 case TEMP_TB: 3276 state = ts->state; 3277 ts->state = state | TS_MEM; 3278 if (state != TS_DEAD) { 3279 continue; 3280 } 3281 break; 3282 case TEMP_EBB: 3283 case TEMP_CONST: 3284 continue; 3285 default: 3286 g_assert_not_reached(); 3287 } 3288 la_reset_pref(&s->temps[i]); 3289 } 3290 } 3291 3292 /* liveness analysis: sync globals back to memory and kill. */ 3293 static void la_global_kill(TCGContext *s, int ng) 3294 { 3295 int i; 3296 3297 for (i = 0; i < ng; i++) { 3298 s->temps[i].state = TS_DEAD | TS_MEM; 3299 la_reset_pref(&s->temps[i]); 3300 } 3301 } 3302 3303 /* liveness analysis: note live globals crossing calls. */ 3304 static void la_cross_call(TCGContext *s, int nt) 3305 { 3306 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3307 int i; 3308 3309 for (i = 0; i < nt; i++) { 3310 TCGTemp *ts = &s->temps[i]; 3311 if (!(ts->state & TS_DEAD)) { 3312 TCGRegSet *pset = la_temp_pref(ts); 3313 TCGRegSet set = *pset; 3314 3315 set &= mask; 3316 /* If the combination is not possible, restart. */ 3317 if (set == 0) { 3318 set = tcg_target_available_regs[ts->type] & mask; 3319 } 3320 *pset = set; 3321 } 3322 } 3323 } 3324 3325 /* 3326 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3327 * to TEMP_EBB, if possible. 3328 */ 3329 static void __attribute__((noinline)) 3330 liveness_pass_0(TCGContext *s) 3331 { 3332 void * const multiple_ebb = (void *)(uintptr_t)-1; 3333 int nb_temps = s->nb_temps; 3334 TCGOp *op, *ebb; 3335 3336 for (int i = s->nb_globals; i < nb_temps; ++i) { 3337 s->temps[i].state_ptr = NULL; 3338 } 3339 3340 /* 3341 * Represent each EBB by the op at which it begins. In the case of 3342 * the first EBB, this is the first op, otherwise it is a label. 3343 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3344 * within a single EBB, else MULTIPLE_EBB. 3345 */ 3346 ebb = QTAILQ_FIRST(&s->ops); 3347 QTAILQ_FOREACH(op, &s->ops, link) { 3348 const TCGOpDef *def; 3349 int nb_oargs, nb_iargs; 3350 3351 switch (op->opc) { 3352 case INDEX_op_set_label: 3353 ebb = op; 3354 continue; 3355 case INDEX_op_discard: 3356 continue; 3357 case INDEX_op_call: 3358 nb_oargs = TCGOP_CALLO(op); 3359 nb_iargs = TCGOP_CALLI(op); 3360 break; 3361 default: 3362 def = &tcg_op_defs[op->opc]; 3363 nb_oargs = def->nb_oargs; 3364 nb_iargs = def->nb_iargs; 3365 break; 3366 } 3367 3368 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3369 TCGTemp *ts = arg_temp(op->args[i]); 3370 3371 if (ts->kind != TEMP_TB) { 3372 continue; 3373 } 3374 if (ts->state_ptr == NULL) { 3375 ts->state_ptr = ebb; 3376 } else if (ts->state_ptr != ebb) { 3377 ts->state_ptr = multiple_ebb; 3378 } 3379 } 3380 } 3381 3382 /* 3383 * For TEMP_TB that turned out not to be used beyond one EBB, 3384 * reduce the liveness to TEMP_EBB. 3385 */ 3386 for (int i = s->nb_globals; i < nb_temps; ++i) { 3387 TCGTemp *ts = &s->temps[i]; 3388 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3389 ts->kind = TEMP_EBB; 3390 } 3391 } 3392 } 3393 3394 /* Liveness analysis : update the opc_arg_life array to tell if a 3395 given input arguments is dead. Instructions updating dead 3396 temporaries are removed. */ 3397 static void __attribute__((noinline)) 3398 liveness_pass_1(TCGContext *s) 3399 { 3400 int nb_globals = s->nb_globals; 3401 int nb_temps = s->nb_temps; 3402 TCGOp *op, *op_prev; 3403 TCGRegSet *prefs; 3404 int i; 3405 3406 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3407 for (i = 0; i < nb_temps; ++i) { 3408 s->temps[i].state_ptr = prefs + i; 3409 } 3410 3411 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3412 la_func_end(s, nb_globals, nb_temps); 3413 3414 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3415 int nb_iargs, nb_oargs; 3416 TCGOpcode opc_new, opc_new2; 3417 bool have_opc_new2; 3418 TCGLifeData arg_life = 0; 3419 TCGTemp *ts; 3420 TCGOpcode opc = op->opc; 3421 const TCGOpDef *def = &tcg_op_defs[opc]; 3422 3423 switch (opc) { 3424 case INDEX_op_call: 3425 { 3426 const TCGHelperInfo *info = tcg_call_info(op); 3427 int call_flags = tcg_call_flags(op); 3428 3429 nb_oargs = TCGOP_CALLO(op); 3430 nb_iargs = TCGOP_CALLI(op); 3431 3432 /* pure functions can be removed if their result is unused */ 3433 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3434 for (i = 0; i < nb_oargs; i++) { 3435 ts = arg_temp(op->args[i]); 3436 if (ts->state != TS_DEAD) { 3437 goto do_not_remove_call; 3438 } 3439 } 3440 goto do_remove; 3441 } 3442 do_not_remove_call: 3443 3444 /* Output args are dead. */ 3445 for (i = 0; i < nb_oargs; i++) { 3446 ts = arg_temp(op->args[i]); 3447 if (ts->state & TS_DEAD) { 3448 arg_life |= DEAD_ARG << i; 3449 } 3450 if (ts->state & TS_MEM) { 3451 arg_life |= SYNC_ARG << i; 3452 } 3453 ts->state = TS_DEAD; 3454 la_reset_pref(ts); 3455 } 3456 3457 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3458 memset(op->output_pref, 0, sizeof(op->output_pref)); 3459 3460 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3461 TCG_CALL_NO_READ_GLOBALS))) { 3462 la_global_kill(s, nb_globals); 3463 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3464 la_global_sync(s, nb_globals); 3465 } 3466 3467 /* Record arguments that die in this helper. */ 3468 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3469 ts = arg_temp(op->args[i]); 3470 if (ts->state & TS_DEAD) { 3471 arg_life |= DEAD_ARG << i; 3472 } 3473 } 3474 3475 /* For all live registers, remove call-clobbered prefs. */ 3476 la_cross_call(s, nb_temps); 3477 3478 /* 3479 * Input arguments are live for preceding opcodes. 3480 * 3481 * For those arguments that die, and will be allocated in 3482 * registers, clear the register set for that arg, to be 3483 * filled in below. For args that will be on the stack, 3484 * reset to any available reg. Process arguments in reverse 3485 * order so that if a temp is used more than once, the stack 3486 * reset to max happens before the register reset to 0. 3487 */ 3488 for (i = nb_iargs - 1; i >= 0; i--) { 3489 const TCGCallArgumentLoc *loc = &info->in[i]; 3490 ts = arg_temp(op->args[nb_oargs + i]); 3491 3492 if (ts->state & TS_DEAD) { 3493 switch (loc->kind) { 3494 case TCG_CALL_ARG_NORMAL: 3495 case TCG_CALL_ARG_EXTEND_U: 3496 case TCG_CALL_ARG_EXTEND_S: 3497 if (arg_slot_reg_p(loc->arg_slot)) { 3498 *la_temp_pref(ts) = 0; 3499 break; 3500 } 3501 /* fall through */ 3502 default: 3503 *la_temp_pref(ts) = 3504 tcg_target_available_regs[ts->type]; 3505 break; 3506 } 3507 ts->state &= ~TS_DEAD; 3508 } 3509 } 3510 3511 /* 3512 * For each input argument, add its input register to prefs. 3513 * If a temp is used once, this produces a single set bit; 3514 * if a temp is used multiple times, this produces a set. 3515 */ 3516 for (i = 0; i < nb_iargs; i++) { 3517 const TCGCallArgumentLoc *loc = &info->in[i]; 3518 ts = arg_temp(op->args[nb_oargs + i]); 3519 3520 switch (loc->kind) { 3521 case TCG_CALL_ARG_NORMAL: 3522 case TCG_CALL_ARG_EXTEND_U: 3523 case TCG_CALL_ARG_EXTEND_S: 3524 if (arg_slot_reg_p(loc->arg_slot)) { 3525 tcg_regset_set_reg(*la_temp_pref(ts), 3526 tcg_target_call_iarg_regs[loc->arg_slot]); 3527 } 3528 break; 3529 default: 3530 break; 3531 } 3532 } 3533 } 3534 break; 3535 case INDEX_op_insn_start: 3536 break; 3537 case INDEX_op_discard: 3538 /* mark the temporary as dead */ 3539 ts = arg_temp(op->args[0]); 3540 ts->state = TS_DEAD; 3541 la_reset_pref(ts); 3542 break; 3543 3544 case INDEX_op_add2_i32: 3545 opc_new = INDEX_op_add_i32; 3546 goto do_addsub2; 3547 case INDEX_op_sub2_i32: 3548 opc_new = INDEX_op_sub_i32; 3549 goto do_addsub2; 3550 case INDEX_op_add2_i64: 3551 opc_new = INDEX_op_add_i64; 3552 goto do_addsub2; 3553 case INDEX_op_sub2_i64: 3554 opc_new = INDEX_op_sub_i64; 3555 do_addsub2: 3556 nb_iargs = 4; 3557 nb_oargs = 2; 3558 /* Test if the high part of the operation is dead, but not 3559 the low part. The result can be optimized to a simple 3560 add or sub. This happens often for x86_64 guest when the 3561 cpu mode is set to 32 bit. */ 3562 if (arg_temp(op->args[1])->state == TS_DEAD) { 3563 if (arg_temp(op->args[0])->state == TS_DEAD) { 3564 goto do_remove; 3565 } 3566 /* Replace the opcode and adjust the args in place, 3567 leaving 3 unused args at the end. */ 3568 op->opc = opc = opc_new; 3569 op->args[1] = op->args[2]; 3570 op->args[2] = op->args[4]; 3571 /* Fall through and mark the single-word operation live. */ 3572 nb_iargs = 2; 3573 nb_oargs = 1; 3574 } 3575 goto do_not_remove; 3576 3577 case INDEX_op_mulu2_i32: 3578 opc_new = INDEX_op_mul_i32; 3579 opc_new2 = INDEX_op_muluh_i32; 3580 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 3581 goto do_mul2; 3582 case INDEX_op_muls2_i32: 3583 opc_new = INDEX_op_mul_i32; 3584 opc_new2 = INDEX_op_mulsh_i32; 3585 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 3586 goto do_mul2; 3587 case INDEX_op_mulu2_i64: 3588 opc_new = INDEX_op_mul_i64; 3589 opc_new2 = INDEX_op_muluh_i64; 3590 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 3591 goto do_mul2; 3592 case INDEX_op_muls2_i64: 3593 opc_new = INDEX_op_mul_i64; 3594 opc_new2 = INDEX_op_mulsh_i64; 3595 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 3596 goto do_mul2; 3597 do_mul2: 3598 nb_iargs = 2; 3599 nb_oargs = 2; 3600 if (arg_temp(op->args[1])->state == TS_DEAD) { 3601 if (arg_temp(op->args[0])->state == TS_DEAD) { 3602 /* Both parts of the operation are dead. */ 3603 goto do_remove; 3604 } 3605 /* The high part of the operation is dead; generate the low. */ 3606 op->opc = opc = opc_new; 3607 op->args[1] = op->args[2]; 3608 op->args[2] = op->args[3]; 3609 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 3610 /* The low part of the operation is dead; generate the high. */ 3611 op->opc = opc = opc_new2; 3612 op->args[0] = op->args[1]; 3613 op->args[1] = op->args[2]; 3614 op->args[2] = op->args[3]; 3615 } else { 3616 goto do_not_remove; 3617 } 3618 /* Mark the single-word operation live. */ 3619 nb_oargs = 1; 3620 goto do_not_remove; 3621 3622 default: 3623 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 3624 nb_iargs = def->nb_iargs; 3625 nb_oargs = def->nb_oargs; 3626 3627 /* Test if the operation can be removed because all 3628 its outputs are dead. We assume that nb_oargs == 0 3629 implies side effects */ 3630 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 3631 for (i = 0; i < nb_oargs; i++) { 3632 if (arg_temp(op->args[i])->state != TS_DEAD) { 3633 goto do_not_remove; 3634 } 3635 } 3636 goto do_remove; 3637 } 3638 goto do_not_remove; 3639 3640 do_remove: 3641 tcg_op_remove(s, op); 3642 break; 3643 3644 do_not_remove: 3645 for (i = 0; i < nb_oargs; i++) { 3646 ts = arg_temp(op->args[i]); 3647 3648 /* Remember the preference of the uses that followed. */ 3649 if (i < ARRAY_SIZE(op->output_pref)) { 3650 op->output_pref[i] = *la_temp_pref(ts); 3651 } 3652 3653 /* Output args are dead. */ 3654 if (ts->state & TS_DEAD) { 3655 arg_life |= DEAD_ARG << i; 3656 } 3657 if (ts->state & TS_MEM) { 3658 arg_life |= SYNC_ARG << i; 3659 } 3660 ts->state = TS_DEAD; 3661 la_reset_pref(ts); 3662 } 3663 3664 /* If end of basic block, update. */ 3665 if (def->flags & TCG_OPF_BB_EXIT) { 3666 la_func_end(s, nb_globals, nb_temps); 3667 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3668 la_bb_sync(s, nb_globals, nb_temps); 3669 } else if (def->flags & TCG_OPF_BB_END) { 3670 la_bb_end(s, nb_globals, nb_temps); 3671 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3672 la_global_sync(s, nb_globals); 3673 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3674 la_cross_call(s, nb_temps); 3675 } 3676 } 3677 3678 /* Record arguments that die in this opcode. */ 3679 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3680 ts = arg_temp(op->args[i]); 3681 if (ts->state & TS_DEAD) { 3682 arg_life |= DEAD_ARG << i; 3683 } 3684 } 3685 3686 /* Input arguments are live for preceding opcodes. */ 3687 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3688 ts = arg_temp(op->args[i]); 3689 if (ts->state & TS_DEAD) { 3690 /* For operands that were dead, initially allow 3691 all regs for the type. */ 3692 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3693 ts->state &= ~TS_DEAD; 3694 } 3695 } 3696 3697 /* Incorporate constraints for this operand. */ 3698 switch (opc) { 3699 case INDEX_op_mov_i32: 3700 case INDEX_op_mov_i64: 3701 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3702 have proper constraints. That said, special case 3703 moves to propagate preferences backward. */ 3704 if (IS_DEAD_ARG(1)) { 3705 *la_temp_pref(arg_temp(op->args[0])) 3706 = *la_temp_pref(arg_temp(op->args[1])); 3707 } 3708 break; 3709 3710 default: 3711 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3712 const TCGArgConstraint *ct = &def->args_ct[i]; 3713 TCGRegSet set, *pset; 3714 3715 ts = arg_temp(op->args[i]); 3716 pset = la_temp_pref(ts); 3717 set = *pset; 3718 3719 set &= ct->regs; 3720 if (ct->ialias) { 3721 set &= output_pref(op, ct->alias_index); 3722 } 3723 /* If the combination is not possible, restart. */ 3724 if (set == 0) { 3725 set = ct->regs; 3726 } 3727 *pset = set; 3728 } 3729 break; 3730 } 3731 break; 3732 } 3733 op->life = arg_life; 3734 } 3735 } 3736 3737 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3738 static bool __attribute__((noinline)) 3739 liveness_pass_2(TCGContext *s) 3740 { 3741 int nb_globals = s->nb_globals; 3742 int nb_temps, i; 3743 bool changes = false; 3744 TCGOp *op, *op_next; 3745 3746 /* Create a temporary for each indirect global. */ 3747 for (i = 0; i < nb_globals; ++i) { 3748 TCGTemp *its = &s->temps[i]; 3749 if (its->indirect_reg) { 3750 TCGTemp *dts = tcg_temp_alloc(s); 3751 dts->type = its->type; 3752 dts->base_type = its->base_type; 3753 dts->temp_subindex = its->temp_subindex; 3754 dts->kind = TEMP_EBB; 3755 its->state_ptr = dts; 3756 } else { 3757 its->state_ptr = NULL; 3758 } 3759 /* All globals begin dead. */ 3760 its->state = TS_DEAD; 3761 } 3762 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3763 TCGTemp *its = &s->temps[i]; 3764 its->state_ptr = NULL; 3765 its->state = TS_DEAD; 3766 } 3767 3768 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3769 TCGOpcode opc = op->opc; 3770 const TCGOpDef *def = &tcg_op_defs[opc]; 3771 TCGLifeData arg_life = op->life; 3772 int nb_iargs, nb_oargs, call_flags; 3773 TCGTemp *arg_ts, *dir_ts; 3774 3775 if (opc == INDEX_op_call) { 3776 nb_oargs = TCGOP_CALLO(op); 3777 nb_iargs = TCGOP_CALLI(op); 3778 call_flags = tcg_call_flags(op); 3779 } else { 3780 nb_iargs = def->nb_iargs; 3781 nb_oargs = def->nb_oargs; 3782 3783 /* Set flags similar to how calls require. */ 3784 if (def->flags & TCG_OPF_COND_BRANCH) { 3785 /* Like reading globals: sync_globals */ 3786 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3787 } else if (def->flags & TCG_OPF_BB_END) { 3788 /* Like writing globals: save_globals */ 3789 call_flags = 0; 3790 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3791 /* Like reading globals: sync_globals */ 3792 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3793 } else { 3794 /* No effect on globals. */ 3795 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3796 TCG_CALL_NO_WRITE_GLOBALS); 3797 } 3798 } 3799 3800 /* Make sure that input arguments are available. */ 3801 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3802 arg_ts = arg_temp(op->args[i]); 3803 dir_ts = arg_ts->state_ptr; 3804 if (dir_ts && arg_ts->state == TS_DEAD) { 3805 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3806 ? INDEX_op_ld_i32 3807 : INDEX_op_ld_i64); 3808 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 3809 3810 lop->args[0] = temp_arg(dir_ts); 3811 lop->args[1] = temp_arg(arg_ts->mem_base); 3812 lop->args[2] = arg_ts->mem_offset; 3813 3814 /* Loaded, but synced with memory. */ 3815 arg_ts->state = TS_MEM; 3816 } 3817 } 3818 3819 /* Perform input replacement, and mark inputs that became dead. 3820 No action is required except keeping temp_state up to date 3821 so that we reload when needed. */ 3822 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3823 arg_ts = arg_temp(op->args[i]); 3824 dir_ts = arg_ts->state_ptr; 3825 if (dir_ts) { 3826 op->args[i] = temp_arg(dir_ts); 3827 changes = true; 3828 if (IS_DEAD_ARG(i)) { 3829 arg_ts->state = TS_DEAD; 3830 } 3831 } 3832 } 3833 3834 /* Liveness analysis should ensure that the following are 3835 all correct, for call sites and basic block end points. */ 3836 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3837 /* Nothing to do */ 3838 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3839 for (i = 0; i < nb_globals; ++i) { 3840 /* Liveness should see that globals are synced back, 3841 that is, either TS_DEAD or TS_MEM. */ 3842 arg_ts = &s->temps[i]; 3843 tcg_debug_assert(arg_ts->state_ptr == 0 3844 || arg_ts->state != 0); 3845 } 3846 } else { 3847 for (i = 0; i < nb_globals; ++i) { 3848 /* Liveness should see that globals are saved back, 3849 that is, TS_DEAD, waiting to be reloaded. */ 3850 arg_ts = &s->temps[i]; 3851 tcg_debug_assert(arg_ts->state_ptr == 0 3852 || arg_ts->state == TS_DEAD); 3853 } 3854 } 3855 3856 /* Outputs become available. */ 3857 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3858 arg_ts = arg_temp(op->args[0]); 3859 dir_ts = arg_ts->state_ptr; 3860 if (dir_ts) { 3861 op->args[0] = temp_arg(dir_ts); 3862 changes = true; 3863 3864 /* The output is now live and modified. */ 3865 arg_ts->state = 0; 3866 3867 if (NEED_SYNC_ARG(0)) { 3868 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3869 ? INDEX_op_st_i32 3870 : INDEX_op_st_i64); 3871 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3872 TCGTemp *out_ts = dir_ts; 3873 3874 if (IS_DEAD_ARG(0)) { 3875 out_ts = arg_temp(op->args[1]); 3876 arg_ts->state = TS_DEAD; 3877 tcg_op_remove(s, op); 3878 } else { 3879 arg_ts->state = TS_MEM; 3880 } 3881 3882 sop->args[0] = temp_arg(out_ts); 3883 sop->args[1] = temp_arg(arg_ts->mem_base); 3884 sop->args[2] = arg_ts->mem_offset; 3885 } else { 3886 tcg_debug_assert(!IS_DEAD_ARG(0)); 3887 } 3888 } 3889 } else { 3890 for (i = 0; i < nb_oargs; i++) { 3891 arg_ts = arg_temp(op->args[i]); 3892 dir_ts = arg_ts->state_ptr; 3893 if (!dir_ts) { 3894 continue; 3895 } 3896 op->args[i] = temp_arg(dir_ts); 3897 changes = true; 3898 3899 /* The output is now live and modified. */ 3900 arg_ts->state = 0; 3901 3902 /* Sync outputs upon their last write. */ 3903 if (NEED_SYNC_ARG(i)) { 3904 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3905 ? INDEX_op_st_i32 3906 : INDEX_op_st_i64); 3907 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3908 3909 sop->args[0] = temp_arg(dir_ts); 3910 sop->args[1] = temp_arg(arg_ts->mem_base); 3911 sop->args[2] = arg_ts->mem_offset; 3912 3913 arg_ts->state = TS_MEM; 3914 } 3915 /* Drop outputs that are dead. */ 3916 if (IS_DEAD_ARG(i)) { 3917 arg_ts->state = TS_DEAD; 3918 } 3919 } 3920 } 3921 } 3922 3923 return changes; 3924 } 3925 3926 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3927 { 3928 intptr_t off; 3929 int size, align; 3930 3931 /* When allocating an object, look at the full type. */ 3932 size = tcg_type_size(ts->base_type); 3933 switch (ts->base_type) { 3934 case TCG_TYPE_I32: 3935 align = 4; 3936 break; 3937 case TCG_TYPE_I64: 3938 case TCG_TYPE_V64: 3939 align = 8; 3940 break; 3941 case TCG_TYPE_I128: 3942 case TCG_TYPE_V128: 3943 case TCG_TYPE_V256: 3944 /* 3945 * Note that we do not require aligned storage for V256, 3946 * and that we provide alignment for I128 to match V128, 3947 * even if that's above what the host ABI requires. 3948 */ 3949 align = 16; 3950 break; 3951 default: 3952 g_assert_not_reached(); 3953 } 3954 3955 /* 3956 * Assume the stack is sufficiently aligned. 3957 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3958 * and do not require 16 byte vector alignment. This seems slightly 3959 * easier than fully parameterizing the above switch statement. 3960 */ 3961 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3962 off = ROUND_UP(s->current_frame_offset, align); 3963 3964 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3965 if (off + size > s->frame_end) { 3966 tcg_raise_tb_overflow(s); 3967 } 3968 s->current_frame_offset = off + size; 3969 #if defined(__sparc__) 3970 off += TCG_TARGET_STACK_BIAS; 3971 #endif 3972 3973 /* If the object was subdivided, assign memory to all the parts. */ 3974 if (ts->base_type != ts->type) { 3975 int part_size = tcg_type_size(ts->type); 3976 int part_count = size / part_size; 3977 3978 /* 3979 * Each part is allocated sequentially in tcg_temp_new_internal. 3980 * Jump back to the first part by subtracting the current index. 3981 */ 3982 ts -= ts->temp_subindex; 3983 for (int i = 0; i < part_count; ++i) { 3984 ts[i].mem_offset = off + i * part_size; 3985 ts[i].mem_base = s->frame_temp; 3986 ts[i].mem_allocated = 1; 3987 } 3988 } else { 3989 ts->mem_offset = off; 3990 ts->mem_base = s->frame_temp; 3991 ts->mem_allocated = 1; 3992 } 3993 } 3994 3995 /* Assign @reg to @ts, and update reg_to_temp[]. */ 3996 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 3997 { 3998 if (ts->val_type == TEMP_VAL_REG) { 3999 TCGReg old = ts->reg; 4000 tcg_debug_assert(s->reg_to_temp[old] == ts); 4001 if (old == reg) { 4002 return; 4003 } 4004 s->reg_to_temp[old] = NULL; 4005 } 4006 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4007 s->reg_to_temp[reg] = ts; 4008 ts->val_type = TEMP_VAL_REG; 4009 ts->reg = reg; 4010 } 4011 4012 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4013 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4014 { 4015 tcg_debug_assert(type != TEMP_VAL_REG); 4016 if (ts->val_type == TEMP_VAL_REG) { 4017 TCGReg reg = ts->reg; 4018 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4019 s->reg_to_temp[reg] = NULL; 4020 } 4021 ts->val_type = type; 4022 } 4023 4024 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4025 4026 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4027 mark it free; otherwise mark it dead. */ 4028 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4029 { 4030 TCGTempVal new_type; 4031 4032 switch (ts->kind) { 4033 case TEMP_FIXED: 4034 return; 4035 case TEMP_GLOBAL: 4036 case TEMP_TB: 4037 new_type = TEMP_VAL_MEM; 4038 break; 4039 case TEMP_EBB: 4040 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4041 break; 4042 case TEMP_CONST: 4043 new_type = TEMP_VAL_CONST; 4044 break; 4045 default: 4046 g_assert_not_reached(); 4047 } 4048 set_temp_val_nonreg(s, ts, new_type); 4049 } 4050 4051 /* Mark a temporary as dead. */ 4052 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4053 { 4054 temp_free_or_dead(s, ts, 1); 4055 } 4056 4057 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4058 registers needs to be allocated to store a constant. If 'free_or_dead' 4059 is non-zero, subsequently release the temporary; if it is positive, the 4060 temp is dead; if it is negative, the temp is free. */ 4061 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4062 TCGRegSet preferred_regs, int free_or_dead) 4063 { 4064 if (!temp_readonly(ts) && !ts->mem_coherent) { 4065 if (!ts->mem_allocated) { 4066 temp_allocate_frame(s, ts); 4067 } 4068 switch (ts->val_type) { 4069 case TEMP_VAL_CONST: 4070 /* If we're going to free the temp immediately, then we won't 4071 require it later in a register, so attempt to store the 4072 constant to memory directly. */ 4073 if (free_or_dead 4074 && tcg_out_sti(s, ts->type, ts->val, 4075 ts->mem_base->reg, ts->mem_offset)) { 4076 break; 4077 } 4078 temp_load(s, ts, tcg_target_available_regs[ts->type], 4079 allocated_regs, preferred_regs); 4080 /* fallthrough */ 4081 4082 case TEMP_VAL_REG: 4083 tcg_out_st(s, ts->type, ts->reg, 4084 ts->mem_base->reg, ts->mem_offset); 4085 break; 4086 4087 case TEMP_VAL_MEM: 4088 break; 4089 4090 case TEMP_VAL_DEAD: 4091 default: 4092 g_assert_not_reached(); 4093 } 4094 ts->mem_coherent = 1; 4095 } 4096 if (free_or_dead) { 4097 temp_free_or_dead(s, ts, free_or_dead); 4098 } 4099 } 4100 4101 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4102 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4103 { 4104 TCGTemp *ts = s->reg_to_temp[reg]; 4105 if (ts != NULL) { 4106 temp_sync(s, ts, allocated_regs, 0, -1); 4107 } 4108 } 4109 4110 /** 4111 * tcg_reg_alloc: 4112 * @required_regs: Set of registers in which we must allocate. 4113 * @allocated_regs: Set of registers which must be avoided. 4114 * @preferred_regs: Set of registers we should prefer. 4115 * @rev: True if we search the registers in "indirect" order. 4116 * 4117 * The allocated register must be in @required_regs & ~@allocated_regs, 4118 * but if we can put it in @preferred_regs we may save a move later. 4119 */ 4120 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4121 TCGRegSet allocated_regs, 4122 TCGRegSet preferred_regs, bool rev) 4123 { 4124 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4125 TCGRegSet reg_ct[2]; 4126 const int *order; 4127 4128 reg_ct[1] = required_regs & ~allocated_regs; 4129 tcg_debug_assert(reg_ct[1] != 0); 4130 reg_ct[0] = reg_ct[1] & preferred_regs; 4131 4132 /* Skip the preferred_regs option if it cannot be satisfied, 4133 or if the preference made no difference. */ 4134 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4135 4136 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4137 4138 /* Try free registers, preferences first. */ 4139 for (j = f; j < 2; j++) { 4140 TCGRegSet set = reg_ct[j]; 4141 4142 if (tcg_regset_single(set)) { 4143 /* One register in the set. */ 4144 TCGReg reg = tcg_regset_first(set); 4145 if (s->reg_to_temp[reg] == NULL) { 4146 return reg; 4147 } 4148 } else { 4149 for (i = 0; i < n; i++) { 4150 TCGReg reg = order[i]; 4151 if (s->reg_to_temp[reg] == NULL && 4152 tcg_regset_test_reg(set, reg)) { 4153 return reg; 4154 } 4155 } 4156 } 4157 } 4158 4159 /* We must spill something. */ 4160 for (j = f; j < 2; j++) { 4161 TCGRegSet set = reg_ct[j]; 4162 4163 if (tcg_regset_single(set)) { 4164 /* One register in the set. */ 4165 TCGReg reg = tcg_regset_first(set); 4166 tcg_reg_free(s, reg, allocated_regs); 4167 return reg; 4168 } else { 4169 for (i = 0; i < n; i++) { 4170 TCGReg reg = order[i]; 4171 if (tcg_regset_test_reg(set, reg)) { 4172 tcg_reg_free(s, reg, allocated_regs); 4173 return reg; 4174 } 4175 } 4176 } 4177 } 4178 4179 g_assert_not_reached(); 4180 } 4181 4182 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4183 TCGRegSet allocated_regs, 4184 TCGRegSet preferred_regs, bool rev) 4185 { 4186 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4187 TCGRegSet reg_ct[2]; 4188 const int *order; 4189 4190 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4191 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4192 tcg_debug_assert(reg_ct[1] != 0); 4193 reg_ct[0] = reg_ct[1] & preferred_regs; 4194 4195 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4196 4197 /* 4198 * Skip the preferred_regs option if it cannot be satisfied, 4199 * or if the preference made no difference. 4200 */ 4201 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4202 4203 /* 4204 * Minimize the number of flushes by looking for 2 free registers first, 4205 * then a single flush, then two flushes. 4206 */ 4207 for (fmin = 2; fmin >= 0; fmin--) { 4208 for (j = k; j < 2; j++) { 4209 TCGRegSet set = reg_ct[j]; 4210 4211 for (i = 0; i < n; i++) { 4212 TCGReg reg = order[i]; 4213 4214 if (tcg_regset_test_reg(set, reg)) { 4215 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4216 if (f >= fmin) { 4217 tcg_reg_free(s, reg, allocated_regs); 4218 tcg_reg_free(s, reg + 1, allocated_regs); 4219 return reg; 4220 } 4221 } 4222 } 4223 } 4224 } 4225 g_assert_not_reached(); 4226 } 4227 4228 /* Make sure the temporary is in a register. If needed, allocate the register 4229 from DESIRED while avoiding ALLOCATED. */ 4230 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4231 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4232 { 4233 TCGReg reg; 4234 4235 switch (ts->val_type) { 4236 case TEMP_VAL_REG: 4237 return; 4238 case TEMP_VAL_CONST: 4239 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4240 preferred_regs, ts->indirect_base); 4241 if (ts->type <= TCG_TYPE_I64) { 4242 tcg_out_movi(s, ts->type, reg, ts->val); 4243 } else { 4244 uint64_t val = ts->val; 4245 MemOp vece = MO_64; 4246 4247 /* 4248 * Find the minimal vector element that matches the constant. 4249 * The targets will, in general, have to do this search anyway, 4250 * do this generically. 4251 */ 4252 if (val == dup_const(MO_8, val)) { 4253 vece = MO_8; 4254 } else if (val == dup_const(MO_16, val)) { 4255 vece = MO_16; 4256 } else if (val == dup_const(MO_32, val)) { 4257 vece = MO_32; 4258 } 4259 4260 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4261 } 4262 ts->mem_coherent = 0; 4263 break; 4264 case TEMP_VAL_MEM: 4265 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4266 preferred_regs, ts->indirect_base); 4267 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4268 ts->mem_coherent = 1; 4269 break; 4270 case TEMP_VAL_DEAD: 4271 default: 4272 g_assert_not_reached(); 4273 } 4274 set_temp_val_reg(s, ts, reg); 4275 } 4276 4277 /* Save a temporary to memory. 'allocated_regs' is used in case a 4278 temporary registers needs to be allocated to store a constant. */ 4279 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4280 { 4281 /* The liveness analysis already ensures that globals are back 4282 in memory. Keep an tcg_debug_assert for safety. */ 4283 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4284 } 4285 4286 /* save globals to their canonical location and assume they can be 4287 modified be the following code. 'allocated_regs' is used in case a 4288 temporary registers needs to be allocated to store a constant. */ 4289 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4290 { 4291 int i, n; 4292 4293 for (i = 0, n = s->nb_globals; i < n; i++) { 4294 temp_save(s, &s->temps[i], allocated_regs); 4295 } 4296 } 4297 4298 /* sync globals to their canonical location and assume they can be 4299 read by the following code. 'allocated_regs' is used in case a 4300 temporary registers needs to be allocated to store a constant. */ 4301 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4302 { 4303 int i, n; 4304 4305 for (i = 0, n = s->nb_globals; i < n; i++) { 4306 TCGTemp *ts = &s->temps[i]; 4307 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4308 || ts->kind == TEMP_FIXED 4309 || ts->mem_coherent); 4310 } 4311 } 4312 4313 /* at the end of a basic block, we assume all temporaries are dead and 4314 all globals are stored at their canonical location. */ 4315 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4316 { 4317 int i; 4318 4319 for (i = s->nb_globals; i < s->nb_temps; i++) { 4320 TCGTemp *ts = &s->temps[i]; 4321 4322 switch (ts->kind) { 4323 case TEMP_TB: 4324 temp_save(s, ts, allocated_regs); 4325 break; 4326 case TEMP_EBB: 4327 /* The liveness analysis already ensures that temps are dead. 4328 Keep an tcg_debug_assert for safety. */ 4329 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4330 break; 4331 case TEMP_CONST: 4332 /* Similarly, we should have freed any allocated register. */ 4333 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4334 break; 4335 default: 4336 g_assert_not_reached(); 4337 } 4338 } 4339 4340 save_globals(s, allocated_regs); 4341 } 4342 4343 /* 4344 * At a conditional branch, we assume all temporaries are dead unless 4345 * explicitly live-across-conditional-branch; all globals and local 4346 * temps are synced to their location. 4347 */ 4348 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4349 { 4350 sync_globals(s, allocated_regs); 4351 4352 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4353 TCGTemp *ts = &s->temps[i]; 4354 /* 4355 * The liveness analysis already ensures that temps are dead. 4356 * Keep tcg_debug_asserts for safety. 4357 */ 4358 switch (ts->kind) { 4359 case TEMP_TB: 4360 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4361 break; 4362 case TEMP_EBB: 4363 case TEMP_CONST: 4364 break; 4365 default: 4366 g_assert_not_reached(); 4367 } 4368 } 4369 } 4370 4371 /* 4372 * Specialized code generation for INDEX_op_mov_* with a constant. 4373 */ 4374 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4375 tcg_target_ulong val, TCGLifeData arg_life, 4376 TCGRegSet preferred_regs) 4377 { 4378 /* ENV should not be modified. */ 4379 tcg_debug_assert(!temp_readonly(ots)); 4380 4381 /* The movi is not explicitly generated here. */ 4382 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4383 ots->val = val; 4384 ots->mem_coherent = 0; 4385 if (NEED_SYNC_ARG(0)) { 4386 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4387 } else if (IS_DEAD_ARG(0)) { 4388 temp_dead(s, ots); 4389 } 4390 } 4391 4392 /* 4393 * Specialized code generation for INDEX_op_mov_*. 4394 */ 4395 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4396 { 4397 const TCGLifeData arg_life = op->life; 4398 TCGRegSet allocated_regs, preferred_regs; 4399 TCGTemp *ts, *ots; 4400 TCGType otype, itype; 4401 TCGReg oreg, ireg; 4402 4403 allocated_regs = s->reserved_regs; 4404 preferred_regs = output_pref(op, 0); 4405 ots = arg_temp(op->args[0]); 4406 ts = arg_temp(op->args[1]); 4407 4408 /* ENV should not be modified. */ 4409 tcg_debug_assert(!temp_readonly(ots)); 4410 4411 /* Note that otype != itype for no-op truncation. */ 4412 otype = ots->type; 4413 itype = ts->type; 4414 4415 if (ts->val_type == TEMP_VAL_CONST) { 4416 /* propagate constant or generate sti */ 4417 tcg_target_ulong val = ts->val; 4418 if (IS_DEAD_ARG(1)) { 4419 temp_dead(s, ts); 4420 } 4421 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4422 return; 4423 } 4424 4425 /* If the source value is in memory we're going to be forced 4426 to have it in a register in order to perform the copy. Copy 4427 the SOURCE value into its own register first, that way we 4428 don't have to reload SOURCE the next time it is used. */ 4429 if (ts->val_type == TEMP_VAL_MEM) { 4430 temp_load(s, ts, tcg_target_available_regs[itype], 4431 allocated_regs, preferred_regs); 4432 } 4433 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4434 ireg = ts->reg; 4435 4436 if (IS_DEAD_ARG(0)) { 4437 /* mov to a non-saved dead register makes no sense (even with 4438 liveness analysis disabled). */ 4439 tcg_debug_assert(NEED_SYNC_ARG(0)); 4440 if (!ots->mem_allocated) { 4441 temp_allocate_frame(s, ots); 4442 } 4443 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4444 if (IS_DEAD_ARG(1)) { 4445 temp_dead(s, ts); 4446 } 4447 temp_dead(s, ots); 4448 return; 4449 } 4450 4451 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4452 /* 4453 * The mov can be suppressed. Kill input first, so that it 4454 * is unlinked from reg_to_temp, then set the output to the 4455 * reg that we saved from the input. 4456 */ 4457 temp_dead(s, ts); 4458 oreg = ireg; 4459 } else { 4460 if (ots->val_type == TEMP_VAL_REG) { 4461 oreg = ots->reg; 4462 } else { 4463 /* Make sure to not spill the input register during allocation. */ 4464 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4465 allocated_regs | ((TCGRegSet)1 << ireg), 4466 preferred_regs, ots->indirect_base); 4467 } 4468 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4469 /* 4470 * Cross register class move not supported. 4471 * Store the source register into the destination slot 4472 * and leave the destination temp as TEMP_VAL_MEM. 4473 */ 4474 assert(!temp_readonly(ots)); 4475 if (!ts->mem_allocated) { 4476 temp_allocate_frame(s, ots); 4477 } 4478 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4479 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4480 ots->mem_coherent = 1; 4481 return; 4482 } 4483 } 4484 set_temp_val_reg(s, ots, oreg); 4485 ots->mem_coherent = 0; 4486 4487 if (NEED_SYNC_ARG(0)) { 4488 temp_sync(s, ots, allocated_regs, 0, 0); 4489 } 4490 } 4491 4492 /* 4493 * Specialized code generation for INDEX_op_dup_vec. 4494 */ 4495 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4496 { 4497 const TCGLifeData arg_life = op->life; 4498 TCGRegSet dup_out_regs, dup_in_regs; 4499 TCGTemp *its, *ots; 4500 TCGType itype, vtype; 4501 unsigned vece; 4502 int lowpart_ofs; 4503 bool ok; 4504 4505 ots = arg_temp(op->args[0]); 4506 its = arg_temp(op->args[1]); 4507 4508 /* ENV should not be modified. */ 4509 tcg_debug_assert(!temp_readonly(ots)); 4510 4511 itype = its->type; 4512 vece = TCGOP_VECE(op); 4513 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4514 4515 if (its->val_type == TEMP_VAL_CONST) { 4516 /* Propagate constant via movi -> dupi. */ 4517 tcg_target_ulong val = its->val; 4518 if (IS_DEAD_ARG(1)) { 4519 temp_dead(s, its); 4520 } 4521 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4522 return; 4523 } 4524 4525 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4526 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 4527 4528 /* Allocate the output register now. */ 4529 if (ots->val_type != TEMP_VAL_REG) { 4530 TCGRegSet allocated_regs = s->reserved_regs; 4531 TCGReg oreg; 4532 4533 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4534 /* Make sure to not spill the input register. */ 4535 tcg_regset_set_reg(allocated_regs, its->reg); 4536 } 4537 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4538 output_pref(op, 0), ots->indirect_base); 4539 set_temp_val_reg(s, ots, oreg); 4540 } 4541 4542 switch (its->val_type) { 4543 case TEMP_VAL_REG: 4544 /* 4545 * The dup constriaints must be broad, covering all possible VECE. 4546 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4547 * to fail, indicating that extra moves are required for that case. 4548 */ 4549 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4550 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4551 goto done; 4552 } 4553 /* Try again from memory or a vector input register. */ 4554 } 4555 if (!its->mem_coherent) { 4556 /* 4557 * The input register is not synced, and so an extra store 4558 * would be required to use memory. Attempt an integer-vector 4559 * register move first. We do not have a TCGRegSet for this. 4560 */ 4561 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4562 break; 4563 } 4564 /* Sync the temp back to its slot and load from there. */ 4565 temp_sync(s, its, s->reserved_regs, 0, 0); 4566 } 4567 /* fall through */ 4568 4569 case TEMP_VAL_MEM: 4570 lowpart_ofs = 0; 4571 if (HOST_BIG_ENDIAN) { 4572 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 4573 } 4574 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 4575 its->mem_offset + lowpart_ofs)) { 4576 goto done; 4577 } 4578 /* Load the input into the destination vector register. */ 4579 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 4580 break; 4581 4582 default: 4583 g_assert_not_reached(); 4584 } 4585 4586 /* We now have a vector input register, so dup must succeed. */ 4587 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 4588 tcg_debug_assert(ok); 4589 4590 done: 4591 ots->mem_coherent = 0; 4592 if (IS_DEAD_ARG(1)) { 4593 temp_dead(s, its); 4594 } 4595 if (NEED_SYNC_ARG(0)) { 4596 temp_sync(s, ots, s->reserved_regs, 0, 0); 4597 } 4598 if (IS_DEAD_ARG(0)) { 4599 temp_dead(s, ots); 4600 } 4601 } 4602 4603 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 4604 { 4605 const TCGLifeData arg_life = op->life; 4606 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 4607 TCGRegSet i_allocated_regs; 4608 TCGRegSet o_allocated_regs; 4609 int i, k, nb_iargs, nb_oargs; 4610 TCGReg reg; 4611 TCGArg arg; 4612 const TCGArgConstraint *arg_ct; 4613 TCGTemp *ts; 4614 TCGArg new_args[TCG_MAX_OP_ARGS]; 4615 int const_args[TCG_MAX_OP_ARGS]; 4616 4617 nb_oargs = def->nb_oargs; 4618 nb_iargs = def->nb_iargs; 4619 4620 /* copy constants */ 4621 memcpy(new_args + nb_oargs + nb_iargs, 4622 op->args + nb_oargs + nb_iargs, 4623 sizeof(TCGArg) * def->nb_cargs); 4624 4625 i_allocated_regs = s->reserved_regs; 4626 o_allocated_regs = s->reserved_regs; 4627 4628 /* satisfy input constraints */ 4629 for (k = 0; k < nb_iargs; k++) { 4630 TCGRegSet i_preferred_regs, i_required_regs; 4631 bool allocate_new_reg, copyto_new_reg; 4632 TCGTemp *ts2; 4633 int i1, i2; 4634 4635 i = def->args_ct[nb_oargs + k].sort_index; 4636 arg = op->args[i]; 4637 arg_ct = &def->args_ct[i]; 4638 ts = arg_temp(arg); 4639 4640 if (ts->val_type == TEMP_VAL_CONST 4641 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 4642 /* constant is OK for instruction */ 4643 const_args[i] = 1; 4644 new_args[i] = ts->val; 4645 continue; 4646 } 4647 4648 reg = ts->reg; 4649 i_preferred_regs = 0; 4650 i_required_regs = arg_ct->regs; 4651 allocate_new_reg = false; 4652 copyto_new_reg = false; 4653 4654 switch (arg_ct->pair) { 4655 case 0: /* not paired */ 4656 if (arg_ct->ialias) { 4657 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4658 4659 /* 4660 * If the input is readonly, then it cannot also be an 4661 * output and aliased to itself. If the input is not 4662 * dead after the instruction, we must allocate a new 4663 * register and move it. 4664 */ 4665 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 4666 allocate_new_reg = true; 4667 } else if (ts->val_type == TEMP_VAL_REG) { 4668 /* 4669 * Check if the current register has already been 4670 * allocated for another input. 4671 */ 4672 allocate_new_reg = 4673 tcg_regset_test_reg(i_allocated_regs, reg); 4674 } 4675 } 4676 if (!allocate_new_reg) { 4677 temp_load(s, ts, i_required_regs, i_allocated_regs, 4678 i_preferred_regs); 4679 reg = ts->reg; 4680 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 4681 } 4682 if (allocate_new_reg) { 4683 /* 4684 * Allocate a new register matching the constraint 4685 * and move the temporary register into it. 4686 */ 4687 temp_load(s, ts, tcg_target_available_regs[ts->type], 4688 i_allocated_regs, 0); 4689 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 4690 i_preferred_regs, ts->indirect_base); 4691 copyto_new_reg = true; 4692 } 4693 break; 4694 4695 case 1: 4696 /* First of an input pair; if i1 == i2, the second is an output. */ 4697 i1 = i; 4698 i2 = arg_ct->pair_index; 4699 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 4700 4701 /* 4702 * It is easier to default to allocating a new pair 4703 * and to identify a few cases where it's not required. 4704 */ 4705 if (arg_ct->ialias) { 4706 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4707 if (IS_DEAD_ARG(i1) && 4708 IS_DEAD_ARG(i2) && 4709 !temp_readonly(ts) && 4710 ts->val_type == TEMP_VAL_REG && 4711 ts->reg < TCG_TARGET_NB_REGS - 1 && 4712 tcg_regset_test_reg(i_required_regs, reg) && 4713 !tcg_regset_test_reg(i_allocated_regs, reg) && 4714 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 4715 (ts2 4716 ? ts2->val_type == TEMP_VAL_REG && 4717 ts2->reg == reg + 1 && 4718 !temp_readonly(ts2) 4719 : s->reg_to_temp[reg + 1] == NULL)) { 4720 break; 4721 } 4722 } else { 4723 /* Without aliasing, the pair must also be an input. */ 4724 tcg_debug_assert(ts2); 4725 if (ts->val_type == TEMP_VAL_REG && 4726 ts2->val_type == TEMP_VAL_REG && 4727 ts2->reg == reg + 1 && 4728 tcg_regset_test_reg(i_required_regs, reg)) { 4729 break; 4730 } 4731 } 4732 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 4733 0, ts->indirect_base); 4734 goto do_pair; 4735 4736 case 2: /* pair second */ 4737 reg = new_args[arg_ct->pair_index] + 1; 4738 goto do_pair; 4739 4740 case 3: /* ialias with second output, no first input */ 4741 tcg_debug_assert(arg_ct->ialias); 4742 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4743 4744 if (IS_DEAD_ARG(i) && 4745 !temp_readonly(ts) && 4746 ts->val_type == TEMP_VAL_REG && 4747 reg > 0 && 4748 s->reg_to_temp[reg - 1] == NULL && 4749 tcg_regset_test_reg(i_required_regs, reg) && 4750 !tcg_regset_test_reg(i_allocated_regs, reg) && 4751 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 4752 tcg_regset_set_reg(i_allocated_regs, reg - 1); 4753 break; 4754 } 4755 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 4756 i_allocated_regs, 0, 4757 ts->indirect_base); 4758 tcg_regset_set_reg(i_allocated_regs, reg); 4759 reg += 1; 4760 goto do_pair; 4761 4762 do_pair: 4763 /* 4764 * If an aliased input is not dead after the instruction, 4765 * we must allocate a new register and move it. 4766 */ 4767 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 4768 TCGRegSet t_allocated_regs = i_allocated_regs; 4769 4770 /* 4771 * Because of the alias, and the continued life, make sure 4772 * that the temp is somewhere *other* than the reg pair, 4773 * and we get a copy in reg. 4774 */ 4775 tcg_regset_set_reg(t_allocated_regs, reg); 4776 tcg_regset_set_reg(t_allocated_regs, reg + 1); 4777 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 4778 /* If ts was already in reg, copy it somewhere else. */ 4779 TCGReg nr; 4780 bool ok; 4781 4782 tcg_debug_assert(ts->kind != TEMP_FIXED); 4783 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 4784 t_allocated_regs, 0, ts->indirect_base); 4785 ok = tcg_out_mov(s, ts->type, nr, reg); 4786 tcg_debug_assert(ok); 4787 4788 set_temp_val_reg(s, ts, nr); 4789 } else { 4790 temp_load(s, ts, tcg_target_available_regs[ts->type], 4791 t_allocated_regs, 0); 4792 copyto_new_reg = true; 4793 } 4794 } else { 4795 /* Preferably allocate to reg, otherwise copy. */ 4796 i_required_regs = (TCGRegSet)1 << reg; 4797 temp_load(s, ts, i_required_regs, i_allocated_regs, 4798 i_preferred_regs); 4799 copyto_new_reg = ts->reg != reg; 4800 } 4801 break; 4802 4803 default: 4804 g_assert_not_reached(); 4805 } 4806 4807 if (copyto_new_reg) { 4808 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4809 /* 4810 * Cross register class move not supported. Sync the 4811 * temp back to its slot and load from there. 4812 */ 4813 temp_sync(s, ts, i_allocated_regs, 0, 0); 4814 tcg_out_ld(s, ts->type, reg, 4815 ts->mem_base->reg, ts->mem_offset); 4816 } 4817 } 4818 new_args[i] = reg; 4819 const_args[i] = 0; 4820 tcg_regset_set_reg(i_allocated_regs, reg); 4821 } 4822 4823 /* mark dead temporaries and free the associated registers */ 4824 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4825 if (IS_DEAD_ARG(i)) { 4826 temp_dead(s, arg_temp(op->args[i])); 4827 } 4828 } 4829 4830 if (def->flags & TCG_OPF_COND_BRANCH) { 4831 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4832 } else if (def->flags & TCG_OPF_BB_END) { 4833 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4834 } else { 4835 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4836 /* XXX: permit generic clobber register list ? */ 4837 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4838 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4839 tcg_reg_free(s, i, i_allocated_regs); 4840 } 4841 } 4842 } 4843 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4844 /* sync globals if the op has side effects and might trigger 4845 an exception. */ 4846 sync_globals(s, i_allocated_regs); 4847 } 4848 4849 /* satisfy the output constraints */ 4850 for(k = 0; k < nb_oargs; k++) { 4851 i = def->args_ct[k].sort_index; 4852 arg = op->args[i]; 4853 arg_ct = &def->args_ct[i]; 4854 ts = arg_temp(arg); 4855 4856 /* ENV should not be modified. */ 4857 tcg_debug_assert(!temp_readonly(ts)); 4858 4859 switch (arg_ct->pair) { 4860 case 0: /* not paired */ 4861 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4862 reg = new_args[arg_ct->alias_index]; 4863 } else if (arg_ct->newreg) { 4864 reg = tcg_reg_alloc(s, arg_ct->regs, 4865 i_allocated_regs | o_allocated_regs, 4866 output_pref(op, k), ts->indirect_base); 4867 } else { 4868 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4869 output_pref(op, k), ts->indirect_base); 4870 } 4871 break; 4872 4873 case 1: /* first of pair */ 4874 tcg_debug_assert(!arg_ct->newreg); 4875 if (arg_ct->oalias) { 4876 reg = new_args[arg_ct->alias_index]; 4877 break; 4878 } 4879 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 4880 output_pref(op, k), ts->indirect_base); 4881 break; 4882 4883 case 2: /* second of pair */ 4884 tcg_debug_assert(!arg_ct->newreg); 4885 if (arg_ct->oalias) { 4886 reg = new_args[arg_ct->alias_index]; 4887 } else { 4888 reg = new_args[arg_ct->pair_index] + 1; 4889 } 4890 break; 4891 4892 case 3: /* first of pair, aliasing with a second input */ 4893 tcg_debug_assert(!arg_ct->newreg); 4894 reg = new_args[arg_ct->pair_index] - 1; 4895 break; 4896 4897 default: 4898 g_assert_not_reached(); 4899 } 4900 tcg_regset_set_reg(o_allocated_regs, reg); 4901 set_temp_val_reg(s, ts, reg); 4902 ts->mem_coherent = 0; 4903 new_args[i] = reg; 4904 } 4905 } 4906 4907 /* emit instruction */ 4908 switch (op->opc) { 4909 case INDEX_op_ext8s_i32: 4910 tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]); 4911 break; 4912 case INDEX_op_ext8s_i64: 4913 tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]); 4914 break; 4915 case INDEX_op_ext8u_i32: 4916 case INDEX_op_ext8u_i64: 4917 tcg_out_ext8u(s, new_args[0], new_args[1]); 4918 break; 4919 case INDEX_op_ext16s_i32: 4920 tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]); 4921 break; 4922 case INDEX_op_ext16s_i64: 4923 tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]); 4924 break; 4925 case INDEX_op_ext16u_i32: 4926 case INDEX_op_ext16u_i64: 4927 tcg_out_ext16u(s, new_args[0], new_args[1]); 4928 break; 4929 case INDEX_op_ext32s_i64: 4930 tcg_out_ext32s(s, new_args[0], new_args[1]); 4931 break; 4932 case INDEX_op_ext32u_i64: 4933 tcg_out_ext32u(s, new_args[0], new_args[1]); 4934 break; 4935 case INDEX_op_ext_i32_i64: 4936 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 4937 break; 4938 case INDEX_op_extu_i32_i64: 4939 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 4940 break; 4941 case INDEX_op_extrl_i64_i32: 4942 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 4943 break; 4944 default: 4945 if (def->flags & TCG_OPF_VECTOR) { 4946 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4947 new_args, const_args); 4948 } else { 4949 tcg_out_op(s, op->opc, new_args, const_args); 4950 } 4951 break; 4952 } 4953 4954 /* move the outputs in the correct register if needed */ 4955 for(i = 0; i < nb_oargs; i++) { 4956 ts = arg_temp(op->args[i]); 4957 4958 /* ENV should not be modified. */ 4959 tcg_debug_assert(!temp_readonly(ts)); 4960 4961 if (NEED_SYNC_ARG(i)) { 4962 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4963 } else if (IS_DEAD_ARG(i)) { 4964 temp_dead(s, ts); 4965 } 4966 } 4967 } 4968 4969 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4970 { 4971 const TCGLifeData arg_life = op->life; 4972 TCGTemp *ots, *itsl, *itsh; 4973 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4974 4975 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4976 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4977 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4978 4979 ots = arg_temp(op->args[0]); 4980 itsl = arg_temp(op->args[1]); 4981 itsh = arg_temp(op->args[2]); 4982 4983 /* ENV should not be modified. */ 4984 tcg_debug_assert(!temp_readonly(ots)); 4985 4986 /* Allocate the output register now. */ 4987 if (ots->val_type != TEMP_VAL_REG) { 4988 TCGRegSet allocated_regs = s->reserved_regs; 4989 TCGRegSet dup_out_regs = 4990 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4991 TCGReg oreg; 4992 4993 /* Make sure to not spill the input registers. */ 4994 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4995 tcg_regset_set_reg(allocated_regs, itsl->reg); 4996 } 4997 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4998 tcg_regset_set_reg(allocated_regs, itsh->reg); 4999 } 5000 5001 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5002 output_pref(op, 0), ots->indirect_base); 5003 set_temp_val_reg(s, ots, oreg); 5004 } 5005 5006 /* Promote dup2 of immediates to dupi_vec. */ 5007 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5008 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5009 MemOp vece = MO_64; 5010 5011 if (val == dup_const(MO_8, val)) { 5012 vece = MO_8; 5013 } else if (val == dup_const(MO_16, val)) { 5014 vece = MO_16; 5015 } else if (val == dup_const(MO_32, val)) { 5016 vece = MO_32; 5017 } 5018 5019 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5020 goto done; 5021 } 5022 5023 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5024 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5025 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5026 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5027 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5028 5029 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5030 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5031 5032 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5033 its->mem_base->reg, its->mem_offset)) { 5034 goto done; 5035 } 5036 } 5037 5038 /* Fall back to generic expansion. */ 5039 return false; 5040 5041 done: 5042 ots->mem_coherent = 0; 5043 if (IS_DEAD_ARG(1)) { 5044 temp_dead(s, itsl); 5045 } 5046 if (IS_DEAD_ARG(2)) { 5047 temp_dead(s, itsh); 5048 } 5049 if (NEED_SYNC_ARG(0)) { 5050 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5051 } else if (IS_DEAD_ARG(0)) { 5052 temp_dead(s, ots); 5053 } 5054 return true; 5055 } 5056 5057 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5058 TCGRegSet allocated_regs) 5059 { 5060 if (ts->val_type == TEMP_VAL_REG) { 5061 if (ts->reg != reg) { 5062 tcg_reg_free(s, reg, allocated_regs); 5063 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5064 /* 5065 * Cross register class move not supported. Sync the 5066 * temp back to its slot and load from there. 5067 */ 5068 temp_sync(s, ts, allocated_regs, 0, 0); 5069 tcg_out_ld(s, ts->type, reg, 5070 ts->mem_base->reg, ts->mem_offset); 5071 } 5072 } 5073 } else { 5074 TCGRegSet arg_set = 0; 5075 5076 tcg_reg_free(s, reg, allocated_regs); 5077 tcg_regset_set_reg(arg_set, reg); 5078 temp_load(s, ts, arg_set, allocated_regs, 0); 5079 } 5080 } 5081 5082 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5083 TCGRegSet allocated_regs) 5084 { 5085 /* 5086 * When the destination is on the stack, load up the temp and store. 5087 * If there are many call-saved registers, the temp might live to 5088 * see another use; otherwise it'll be discarded. 5089 */ 5090 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5091 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5092 arg_slot_stk_ofs(arg_slot)); 5093 } 5094 5095 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5096 TCGTemp *ts, TCGRegSet *allocated_regs) 5097 { 5098 if (arg_slot_reg_p(l->arg_slot)) { 5099 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5100 load_arg_reg(s, reg, ts, *allocated_regs); 5101 tcg_regset_set_reg(*allocated_regs, reg); 5102 } else { 5103 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5104 } 5105 } 5106 5107 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5108 intptr_t ref_off, TCGRegSet *allocated_regs) 5109 { 5110 TCGReg reg; 5111 5112 if (arg_slot_reg_p(arg_slot)) { 5113 reg = tcg_target_call_iarg_regs[arg_slot]; 5114 tcg_reg_free(s, reg, *allocated_regs); 5115 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5116 tcg_regset_set_reg(*allocated_regs, reg); 5117 } else { 5118 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 5119 *allocated_regs, 0, false); 5120 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5121 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 5122 arg_slot_stk_ofs(arg_slot)); 5123 } 5124 } 5125 5126 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 5127 { 5128 const int nb_oargs = TCGOP_CALLO(op); 5129 const int nb_iargs = TCGOP_CALLI(op); 5130 const TCGLifeData arg_life = op->life; 5131 const TCGHelperInfo *info = tcg_call_info(op); 5132 TCGRegSet allocated_regs = s->reserved_regs; 5133 int i; 5134 5135 /* 5136 * Move inputs into place in reverse order, 5137 * so that we place stacked arguments first. 5138 */ 5139 for (i = nb_iargs - 1; i >= 0; --i) { 5140 const TCGCallArgumentLoc *loc = &info->in[i]; 5141 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 5142 5143 switch (loc->kind) { 5144 case TCG_CALL_ARG_NORMAL: 5145 case TCG_CALL_ARG_EXTEND_U: 5146 case TCG_CALL_ARG_EXTEND_S: 5147 load_arg_normal(s, loc, ts, &allocated_regs); 5148 break; 5149 case TCG_CALL_ARG_BY_REF: 5150 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5151 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 5152 arg_slot_stk_ofs(loc->ref_slot), 5153 &allocated_regs); 5154 break; 5155 case TCG_CALL_ARG_BY_REF_N: 5156 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5157 break; 5158 default: 5159 g_assert_not_reached(); 5160 } 5161 } 5162 5163 /* Mark dead temporaries and free the associated registers. */ 5164 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 5165 if (IS_DEAD_ARG(i)) { 5166 temp_dead(s, arg_temp(op->args[i])); 5167 } 5168 } 5169 5170 /* Clobber call registers. */ 5171 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5172 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5173 tcg_reg_free(s, i, allocated_regs); 5174 } 5175 } 5176 5177 /* 5178 * Save globals if they might be written by the helper, 5179 * sync them if they might be read. 5180 */ 5181 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 5182 /* Nothing to do */ 5183 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 5184 sync_globals(s, allocated_regs); 5185 } else { 5186 save_globals(s, allocated_regs); 5187 } 5188 5189 /* 5190 * If the ABI passes a pointer to the returned struct as the first 5191 * argument, load that now. Pass a pointer to the output home slot. 5192 */ 5193 if (info->out_kind == TCG_CALL_RET_BY_REF) { 5194 TCGTemp *ts = arg_temp(op->args[0]); 5195 5196 if (!ts->mem_allocated) { 5197 temp_allocate_frame(s, ts); 5198 } 5199 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 5200 } 5201 5202 tcg_out_call(s, tcg_call_func(op), info); 5203 5204 /* Assign output registers and emit moves if needed. */ 5205 switch (info->out_kind) { 5206 case TCG_CALL_RET_NORMAL: 5207 for (i = 0; i < nb_oargs; i++) { 5208 TCGTemp *ts = arg_temp(op->args[i]); 5209 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 5210 5211 /* ENV should not be modified. */ 5212 tcg_debug_assert(!temp_readonly(ts)); 5213 5214 set_temp_val_reg(s, ts, reg); 5215 ts->mem_coherent = 0; 5216 } 5217 break; 5218 5219 case TCG_CALL_RET_BY_VEC: 5220 { 5221 TCGTemp *ts = arg_temp(op->args[0]); 5222 5223 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 5224 tcg_debug_assert(ts->temp_subindex == 0); 5225 if (!ts->mem_allocated) { 5226 temp_allocate_frame(s, ts); 5227 } 5228 tcg_out_st(s, TCG_TYPE_V128, 5229 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5230 ts->mem_base->reg, ts->mem_offset); 5231 } 5232 /* fall through to mark all parts in memory */ 5233 5234 case TCG_CALL_RET_BY_REF: 5235 /* The callee has performed a write through the reference. */ 5236 for (i = 0; i < nb_oargs; i++) { 5237 TCGTemp *ts = arg_temp(op->args[i]); 5238 ts->val_type = TEMP_VAL_MEM; 5239 } 5240 break; 5241 5242 default: 5243 g_assert_not_reached(); 5244 } 5245 5246 /* Flush or discard output registers as needed. */ 5247 for (i = 0; i < nb_oargs; i++) { 5248 TCGTemp *ts = arg_temp(op->args[i]); 5249 if (NEED_SYNC_ARG(i)) { 5250 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 5251 } else if (IS_DEAD_ARG(i)) { 5252 temp_dead(s, ts); 5253 } 5254 } 5255 } 5256 5257 /** 5258 * atom_and_align_for_opc: 5259 * @s: tcg context 5260 * @opc: memory operation code 5261 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 5262 * @allow_two_ops: true if we are prepared to issue two operations 5263 * 5264 * Return the alignment and atomicity to use for the inline fast path 5265 * for the given memory operation. The alignment may be larger than 5266 * that specified in @opc, and the correct alignment will be diagnosed 5267 * by the slow path helper. 5268 * 5269 * If @allow_two_ops, the host is prepared to test for 2x alignment, 5270 * and issue two loads or stores for subalignment. 5271 */ 5272 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 5273 MemOp host_atom, bool allow_two_ops) 5274 { 5275 MemOp align = get_alignment_bits(opc); 5276 MemOp size = opc & MO_SIZE; 5277 MemOp half = size ? size - 1 : 0; 5278 MemOp atmax; 5279 MemOp atom; 5280 5281 /* When serialized, no further atomicity required. */ 5282 if (s->gen_tb->cflags & CF_PARALLEL) { 5283 atom = opc & MO_ATOM_MASK; 5284 } else { 5285 atom = MO_ATOM_NONE; 5286 } 5287 5288 switch (atom) { 5289 case MO_ATOM_NONE: 5290 /* The operation requires no specific atomicity. */ 5291 atmax = MO_8; 5292 break; 5293 5294 case MO_ATOM_IFALIGN: 5295 atmax = size; 5296 break; 5297 5298 case MO_ATOM_IFALIGN_PAIR: 5299 atmax = half; 5300 break; 5301 5302 case MO_ATOM_WITHIN16: 5303 atmax = size; 5304 if (size == MO_128) { 5305 /* Misalignment implies !within16, and therefore no atomicity. */ 5306 } else if (host_atom != MO_ATOM_WITHIN16) { 5307 /* The host does not implement within16, so require alignment. */ 5308 align = MAX(align, size); 5309 } 5310 break; 5311 5312 case MO_ATOM_WITHIN16_PAIR: 5313 atmax = size; 5314 /* 5315 * Misalignment implies !within16, and therefore half atomicity. 5316 * Any host prepared for two operations can implement this with 5317 * half alignment. 5318 */ 5319 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 5320 align = MAX(align, half); 5321 } 5322 break; 5323 5324 case MO_ATOM_SUBALIGN: 5325 atmax = size; 5326 if (host_atom != MO_ATOM_SUBALIGN) { 5327 /* If unaligned but not odd, there are subobjects up to half. */ 5328 if (allow_two_ops) { 5329 align = MAX(align, half); 5330 } else { 5331 align = MAX(align, size); 5332 } 5333 } 5334 break; 5335 5336 default: 5337 g_assert_not_reached(); 5338 } 5339 5340 return (TCGAtomAlign){ .atom = atmax, .align = align }; 5341 } 5342 5343 /* 5344 * Similarly for qemu_ld/st slow path helpers. 5345 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 5346 * using only the provided backend tcg_out_* functions. 5347 */ 5348 5349 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 5350 { 5351 int ofs = arg_slot_stk_ofs(slot); 5352 5353 /* 5354 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 5355 * require extension to uint64_t, adjust the address for uint32_t. 5356 */ 5357 if (HOST_BIG_ENDIAN && 5358 TCG_TARGET_REG_BITS == 64 && 5359 type == TCG_TYPE_I32) { 5360 ofs += 4; 5361 } 5362 return ofs; 5363 } 5364 5365 static void tcg_out_helper_load_slots(TCGContext *s, 5366 unsigned nmov, TCGMovExtend *mov, 5367 const TCGLdstHelperParam *parm) 5368 { 5369 unsigned i; 5370 TCGReg dst3; 5371 5372 /* 5373 * Start from the end, storing to the stack first. 5374 * This frees those registers, so we need not consider overlap. 5375 */ 5376 for (i = nmov; i-- > 0; ) { 5377 unsigned slot = mov[i].dst; 5378 5379 if (arg_slot_reg_p(slot)) { 5380 goto found_reg; 5381 } 5382 5383 TCGReg src = mov[i].src; 5384 TCGType dst_type = mov[i].dst_type; 5385 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5386 5387 /* The argument is going onto the stack; extend into scratch. */ 5388 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 5389 tcg_debug_assert(parm->ntmp != 0); 5390 mov[i].dst = src = parm->tmp[0]; 5391 tcg_out_movext1(s, &mov[i]); 5392 } 5393 5394 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 5395 tcg_out_helper_stk_ofs(dst_type, slot)); 5396 } 5397 return; 5398 5399 found_reg: 5400 /* 5401 * The remaining arguments are in registers. 5402 * Convert slot numbers to argument registers. 5403 */ 5404 nmov = i + 1; 5405 for (i = 0; i < nmov; ++i) { 5406 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 5407 } 5408 5409 switch (nmov) { 5410 case 4: 5411 /* The backend must have provided enough temps for the worst case. */ 5412 tcg_debug_assert(parm->ntmp >= 2); 5413 5414 dst3 = mov[3].dst; 5415 for (unsigned j = 0; j < 3; ++j) { 5416 if (dst3 == mov[j].src) { 5417 /* 5418 * Conflict. Copy the source to a temporary, perform the 5419 * remaining moves, then the extension from our scratch 5420 * on the way out. 5421 */ 5422 TCGReg scratch = parm->tmp[1]; 5423 5424 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 5425 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 5426 tcg_out_movext1_new_src(s, &mov[3], scratch); 5427 break; 5428 } 5429 } 5430 5431 /* No conflicts: perform this move and continue. */ 5432 tcg_out_movext1(s, &mov[3]); 5433 /* fall through */ 5434 5435 case 3: 5436 tcg_out_movext3(s, mov, mov + 1, mov + 2, 5437 parm->ntmp ? parm->tmp[0] : -1); 5438 break; 5439 case 2: 5440 tcg_out_movext2(s, mov, mov + 1, 5441 parm->ntmp ? parm->tmp[0] : -1); 5442 break; 5443 case 1: 5444 tcg_out_movext1(s, mov); 5445 break; 5446 default: 5447 g_assert_not_reached(); 5448 } 5449 } 5450 5451 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 5452 TCGType type, tcg_target_long imm, 5453 const TCGLdstHelperParam *parm) 5454 { 5455 if (arg_slot_reg_p(slot)) { 5456 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 5457 } else { 5458 int ofs = tcg_out_helper_stk_ofs(type, slot); 5459 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 5460 tcg_debug_assert(parm->ntmp != 0); 5461 tcg_out_movi(s, type, parm->tmp[0], imm); 5462 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 5463 } 5464 } 5465 } 5466 5467 static void tcg_out_helper_load_common_args(TCGContext *s, 5468 const TCGLabelQemuLdst *ldst, 5469 const TCGLdstHelperParam *parm, 5470 const TCGHelperInfo *info, 5471 unsigned next_arg) 5472 { 5473 TCGMovExtend ptr_mov = { 5474 .dst_type = TCG_TYPE_PTR, 5475 .src_type = TCG_TYPE_PTR, 5476 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 5477 }; 5478 const TCGCallArgumentLoc *loc = &info->in[0]; 5479 TCGType type; 5480 unsigned slot; 5481 tcg_target_ulong imm; 5482 5483 /* 5484 * Handle env, which is always first. 5485 */ 5486 ptr_mov.dst = loc->arg_slot; 5487 ptr_mov.src = TCG_AREG0; 5488 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 5489 5490 /* 5491 * Handle oi. 5492 */ 5493 imm = ldst->oi; 5494 loc = &info->in[next_arg]; 5495 type = TCG_TYPE_I32; 5496 switch (loc->kind) { 5497 case TCG_CALL_ARG_NORMAL: 5498 break; 5499 case TCG_CALL_ARG_EXTEND_U: 5500 case TCG_CALL_ARG_EXTEND_S: 5501 /* No extension required for MemOpIdx. */ 5502 tcg_debug_assert(imm <= INT32_MAX); 5503 type = TCG_TYPE_REG; 5504 break; 5505 default: 5506 g_assert_not_reached(); 5507 } 5508 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 5509 next_arg++; 5510 5511 /* 5512 * Handle ra. 5513 */ 5514 loc = &info->in[next_arg]; 5515 slot = loc->arg_slot; 5516 if (parm->ra_gen) { 5517 int arg_reg = -1; 5518 TCGReg ra_reg; 5519 5520 if (arg_slot_reg_p(slot)) { 5521 arg_reg = tcg_target_call_iarg_regs[slot]; 5522 } 5523 ra_reg = parm->ra_gen(s, ldst, arg_reg); 5524 5525 ptr_mov.dst = slot; 5526 ptr_mov.src = ra_reg; 5527 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 5528 } else { 5529 imm = (uintptr_t)ldst->raddr; 5530 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 5531 } 5532 } 5533 5534 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 5535 const TCGCallArgumentLoc *loc, 5536 TCGType dst_type, TCGType src_type, 5537 TCGReg lo, TCGReg hi) 5538 { 5539 MemOp reg_mo; 5540 5541 if (dst_type <= TCG_TYPE_REG) { 5542 MemOp src_ext; 5543 5544 switch (loc->kind) { 5545 case TCG_CALL_ARG_NORMAL: 5546 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5547 break; 5548 case TCG_CALL_ARG_EXTEND_U: 5549 dst_type = TCG_TYPE_REG; 5550 src_ext = MO_UL; 5551 break; 5552 case TCG_CALL_ARG_EXTEND_S: 5553 dst_type = TCG_TYPE_REG; 5554 src_ext = MO_SL; 5555 break; 5556 default: 5557 g_assert_not_reached(); 5558 } 5559 5560 mov[0].dst = loc->arg_slot; 5561 mov[0].dst_type = dst_type; 5562 mov[0].src = lo; 5563 mov[0].src_type = src_type; 5564 mov[0].src_ext = src_ext; 5565 return 1; 5566 } 5567 5568 if (TCG_TARGET_REG_BITS == 32) { 5569 assert(dst_type == TCG_TYPE_I64); 5570 reg_mo = MO_32; 5571 } else { 5572 assert(dst_type == TCG_TYPE_I128); 5573 reg_mo = MO_64; 5574 } 5575 5576 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 5577 mov[0].src = lo; 5578 mov[0].dst_type = TCG_TYPE_REG; 5579 mov[0].src_type = TCG_TYPE_REG; 5580 mov[0].src_ext = reg_mo; 5581 5582 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 5583 mov[1].src = hi; 5584 mov[1].dst_type = TCG_TYPE_REG; 5585 mov[1].src_type = TCG_TYPE_REG; 5586 mov[1].src_ext = reg_mo; 5587 5588 return 2; 5589 } 5590 5591 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 5592 const TCGLdstHelperParam *parm) 5593 { 5594 const TCGHelperInfo *info; 5595 const TCGCallArgumentLoc *loc; 5596 TCGMovExtend mov[2]; 5597 unsigned next_arg, nmov; 5598 MemOp mop = get_memop(ldst->oi); 5599 5600 switch (mop & MO_SIZE) { 5601 case MO_8: 5602 case MO_16: 5603 case MO_32: 5604 info = &info_helper_ld32_mmu; 5605 break; 5606 case MO_64: 5607 info = &info_helper_ld64_mmu; 5608 break; 5609 case MO_128: 5610 info = &info_helper_ld128_mmu; 5611 break; 5612 default: 5613 g_assert_not_reached(); 5614 } 5615 5616 /* Defer env argument. */ 5617 next_arg = 1; 5618 5619 loc = &info->in[next_arg]; 5620 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 5621 /* 5622 * 32-bit host with 32-bit guest: zero-extend the guest address 5623 * to 64-bits for the helper by storing the low part, then 5624 * load a zero for the high part. 5625 */ 5626 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 5627 TCG_TYPE_I32, TCG_TYPE_I32, 5628 ldst->addrlo_reg, -1); 5629 tcg_out_helper_load_slots(s, 1, mov, parm); 5630 5631 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 5632 TCG_TYPE_I32, 0, parm); 5633 next_arg += 2; 5634 } else { 5635 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 5636 ldst->addrlo_reg, ldst->addrhi_reg); 5637 tcg_out_helper_load_slots(s, nmov, mov, parm); 5638 next_arg += nmov; 5639 } 5640 5641 switch (info->out_kind) { 5642 case TCG_CALL_RET_NORMAL: 5643 case TCG_CALL_RET_BY_VEC: 5644 break; 5645 case TCG_CALL_RET_BY_REF: 5646 /* 5647 * The return reference is in the first argument slot. 5648 * We need memory in which to return: re-use the top of stack. 5649 */ 5650 { 5651 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 5652 5653 if (arg_slot_reg_p(0)) { 5654 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 5655 TCG_REG_CALL_STACK, ofs_slot0); 5656 } else { 5657 tcg_debug_assert(parm->ntmp != 0); 5658 tcg_out_addi_ptr(s, parm->tmp[0], 5659 TCG_REG_CALL_STACK, ofs_slot0); 5660 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 5661 TCG_REG_CALL_STACK, ofs_slot0); 5662 } 5663 } 5664 break; 5665 default: 5666 g_assert_not_reached(); 5667 } 5668 5669 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 5670 } 5671 5672 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 5673 bool load_sign, 5674 const TCGLdstHelperParam *parm) 5675 { 5676 MemOp mop = get_memop(ldst->oi); 5677 TCGMovExtend mov[2]; 5678 int ofs_slot0; 5679 5680 switch (ldst->type) { 5681 case TCG_TYPE_I64: 5682 if (TCG_TARGET_REG_BITS == 32) { 5683 break; 5684 } 5685 /* fall through */ 5686 5687 case TCG_TYPE_I32: 5688 mov[0].dst = ldst->datalo_reg; 5689 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 5690 mov[0].dst_type = ldst->type; 5691 mov[0].src_type = TCG_TYPE_REG; 5692 5693 /* 5694 * If load_sign, then we allowed the helper to perform the 5695 * appropriate sign extension to tcg_target_ulong, and all 5696 * we need now is a plain move. 5697 * 5698 * If they do not, then we expect the relevant extension 5699 * instruction to be no more expensive than a move, and 5700 * we thus save the icache etc by only using one of two 5701 * helper functions. 5702 */ 5703 if (load_sign || !(mop & MO_SIGN)) { 5704 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 5705 mov[0].src_ext = MO_32; 5706 } else { 5707 mov[0].src_ext = MO_64; 5708 } 5709 } else { 5710 mov[0].src_ext = mop & MO_SSIZE; 5711 } 5712 tcg_out_movext1(s, mov); 5713 return; 5714 5715 case TCG_TYPE_I128: 5716 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 5717 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 5718 switch (TCG_TARGET_CALL_RET_I128) { 5719 case TCG_CALL_RET_NORMAL: 5720 break; 5721 case TCG_CALL_RET_BY_VEC: 5722 tcg_out_st(s, TCG_TYPE_V128, 5723 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5724 TCG_REG_CALL_STACK, ofs_slot0); 5725 /* fall through */ 5726 case TCG_CALL_RET_BY_REF: 5727 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 5728 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 5729 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 5730 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 5731 return; 5732 default: 5733 g_assert_not_reached(); 5734 } 5735 break; 5736 5737 default: 5738 g_assert_not_reached(); 5739 } 5740 5741 mov[0].dst = ldst->datalo_reg; 5742 mov[0].src = 5743 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 5744 mov[0].dst_type = TCG_TYPE_I32; 5745 mov[0].src_type = TCG_TYPE_I32; 5746 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 5747 5748 mov[1].dst = ldst->datahi_reg; 5749 mov[1].src = 5750 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 5751 mov[1].dst_type = TCG_TYPE_REG; 5752 mov[1].src_type = TCG_TYPE_REG; 5753 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 5754 5755 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 5756 } 5757 5758 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 5759 const TCGLdstHelperParam *parm) 5760 { 5761 const TCGHelperInfo *info; 5762 const TCGCallArgumentLoc *loc; 5763 TCGMovExtend mov[4]; 5764 TCGType data_type; 5765 unsigned next_arg, nmov, n; 5766 MemOp mop = get_memop(ldst->oi); 5767 5768 switch (mop & MO_SIZE) { 5769 case MO_8: 5770 case MO_16: 5771 case MO_32: 5772 info = &info_helper_st32_mmu; 5773 data_type = TCG_TYPE_I32; 5774 break; 5775 case MO_64: 5776 info = &info_helper_st64_mmu; 5777 data_type = TCG_TYPE_I64; 5778 break; 5779 case MO_128: 5780 info = &info_helper_st128_mmu; 5781 data_type = TCG_TYPE_I128; 5782 break; 5783 default: 5784 g_assert_not_reached(); 5785 } 5786 5787 /* Defer env argument. */ 5788 next_arg = 1; 5789 nmov = 0; 5790 5791 /* Handle addr argument. */ 5792 loc = &info->in[next_arg]; 5793 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 5794 /* 5795 * 32-bit host with 32-bit guest: zero-extend the guest address 5796 * to 64-bits for the helper by storing the low part. Later, 5797 * after we have processed the register inputs, we will load a 5798 * zero for the high part. 5799 */ 5800 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 5801 TCG_TYPE_I32, TCG_TYPE_I32, 5802 ldst->addrlo_reg, -1); 5803 next_arg += 2; 5804 nmov += 1; 5805 } else { 5806 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 5807 ldst->addrlo_reg, ldst->addrhi_reg); 5808 next_arg += n; 5809 nmov += n; 5810 } 5811 5812 /* Handle data argument. */ 5813 loc = &info->in[next_arg]; 5814 switch (loc->kind) { 5815 case TCG_CALL_ARG_NORMAL: 5816 case TCG_CALL_ARG_EXTEND_U: 5817 case TCG_CALL_ARG_EXTEND_S: 5818 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 5819 ldst->datalo_reg, ldst->datahi_reg); 5820 next_arg += n; 5821 nmov += n; 5822 tcg_out_helper_load_slots(s, nmov, mov, parm); 5823 break; 5824 5825 case TCG_CALL_ARG_BY_REF: 5826 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 5827 tcg_debug_assert(data_type == TCG_TYPE_I128); 5828 tcg_out_st(s, TCG_TYPE_I64, 5829 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 5830 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 5831 tcg_out_st(s, TCG_TYPE_I64, 5832 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 5833 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 5834 5835 tcg_out_helper_load_slots(s, nmov, mov, parm); 5836 5837 if (arg_slot_reg_p(loc->arg_slot)) { 5838 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 5839 TCG_REG_CALL_STACK, 5840 arg_slot_stk_ofs(loc->ref_slot)); 5841 } else { 5842 tcg_debug_assert(parm->ntmp != 0); 5843 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 5844 arg_slot_stk_ofs(loc->ref_slot)); 5845 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 5846 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 5847 } 5848 next_arg += 2; 5849 break; 5850 5851 default: 5852 g_assert_not_reached(); 5853 } 5854 5855 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 5856 /* Zero extend the address by loading a zero for the high part. */ 5857 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 5858 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 5859 } 5860 5861 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 5862 } 5863 5864 #ifdef CONFIG_PROFILER 5865 5866 /* avoid copy/paste errors */ 5867 #define PROF_ADD(to, from, field) \ 5868 do { \ 5869 (to)->field += qatomic_read(&((from)->field)); \ 5870 } while (0) 5871 5872 #define PROF_MAX(to, from, field) \ 5873 do { \ 5874 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 5875 if (val__ > (to)->field) { \ 5876 (to)->field = val__; \ 5877 } \ 5878 } while (0) 5879 5880 /* Pass in a zero'ed @prof */ 5881 static inline 5882 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 5883 { 5884 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 5885 unsigned int i; 5886 5887 for (i = 0; i < n_ctxs; i++) { 5888 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 5889 const TCGProfile *orig = &s->prof; 5890 5891 if (counters) { 5892 PROF_ADD(prof, orig, cpu_exec_time); 5893 PROF_ADD(prof, orig, tb_count1); 5894 PROF_ADD(prof, orig, tb_count); 5895 PROF_ADD(prof, orig, op_count); 5896 PROF_MAX(prof, orig, op_count_max); 5897 PROF_ADD(prof, orig, temp_count); 5898 PROF_MAX(prof, orig, temp_count_max); 5899 PROF_ADD(prof, orig, del_op_count); 5900 PROF_ADD(prof, orig, code_in_len); 5901 PROF_ADD(prof, orig, code_out_len); 5902 PROF_ADD(prof, orig, search_out_len); 5903 PROF_ADD(prof, orig, interm_time); 5904 PROF_ADD(prof, orig, code_time); 5905 PROF_ADD(prof, orig, la_time); 5906 PROF_ADD(prof, orig, opt_time); 5907 PROF_ADD(prof, orig, restore_count); 5908 PROF_ADD(prof, orig, restore_time); 5909 } 5910 if (table) { 5911 int i; 5912 5913 for (i = 0; i < NB_OPS; i++) { 5914 PROF_ADD(prof, orig, table_op_count[i]); 5915 } 5916 } 5917 } 5918 } 5919 5920 #undef PROF_ADD 5921 #undef PROF_MAX 5922 5923 static void tcg_profile_snapshot_counters(TCGProfile *prof) 5924 { 5925 tcg_profile_snapshot(prof, true, false); 5926 } 5927 5928 static void tcg_profile_snapshot_table(TCGProfile *prof) 5929 { 5930 tcg_profile_snapshot(prof, false, true); 5931 } 5932 5933 void tcg_dump_op_count(GString *buf) 5934 { 5935 TCGProfile prof = {}; 5936 int i; 5937 5938 tcg_profile_snapshot_table(&prof); 5939 for (i = 0; i < NB_OPS; i++) { 5940 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 5941 prof.table_op_count[i]); 5942 } 5943 } 5944 5945 int64_t tcg_cpu_exec_time(void) 5946 { 5947 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 5948 unsigned int i; 5949 int64_t ret = 0; 5950 5951 for (i = 0; i < n_ctxs; i++) { 5952 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 5953 const TCGProfile *prof = &s->prof; 5954 5955 ret += qatomic_read(&prof->cpu_exec_time); 5956 } 5957 return ret; 5958 } 5959 #else 5960 void tcg_dump_op_count(GString *buf) 5961 { 5962 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 5963 } 5964 5965 int64_t tcg_cpu_exec_time(void) 5966 { 5967 error_report("%s: TCG profiler not compiled", __func__); 5968 exit(EXIT_FAILURE); 5969 } 5970 #endif 5971 5972 5973 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 5974 { 5975 #ifdef CONFIG_PROFILER 5976 TCGProfile *prof = &s->prof; 5977 #endif 5978 int i, num_insns; 5979 TCGOp *op; 5980 5981 #ifdef CONFIG_PROFILER 5982 { 5983 int n = 0; 5984 5985 QTAILQ_FOREACH(op, &s->ops, link) { 5986 n++; 5987 } 5988 qatomic_set(&prof->op_count, prof->op_count + n); 5989 if (n > prof->op_count_max) { 5990 qatomic_set(&prof->op_count_max, n); 5991 } 5992 5993 n = s->nb_temps; 5994 qatomic_set(&prof->temp_count, prof->temp_count + n); 5995 if (n > prof->temp_count_max) { 5996 qatomic_set(&prof->temp_count_max, n); 5997 } 5998 } 5999 #endif 6000 6001 #ifdef DEBUG_DISAS 6002 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6003 && qemu_log_in_addr_range(pc_start))) { 6004 FILE *logfile = qemu_log_trylock(); 6005 if (logfile) { 6006 fprintf(logfile, "OP:\n"); 6007 tcg_dump_ops(s, logfile, false); 6008 fprintf(logfile, "\n"); 6009 qemu_log_unlock(logfile); 6010 } 6011 } 6012 #endif 6013 6014 #ifdef CONFIG_DEBUG_TCG 6015 /* Ensure all labels referenced have been emitted. */ 6016 { 6017 TCGLabel *l; 6018 bool error = false; 6019 6020 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6021 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6022 qemu_log_mask(CPU_LOG_TB_OP, 6023 "$L%d referenced but not present.\n", l->id); 6024 error = true; 6025 } 6026 } 6027 assert(!error); 6028 } 6029 #endif 6030 6031 #ifdef CONFIG_PROFILER 6032 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 6033 #endif 6034 6035 #ifdef USE_TCG_OPTIMIZATIONS 6036 tcg_optimize(s); 6037 #endif 6038 6039 #ifdef CONFIG_PROFILER 6040 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 6041 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 6042 #endif 6043 6044 reachable_code_pass(s); 6045 liveness_pass_0(s); 6046 liveness_pass_1(s); 6047 6048 if (s->nb_indirects > 0) { 6049 #ifdef DEBUG_DISAS 6050 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6051 && qemu_log_in_addr_range(pc_start))) { 6052 FILE *logfile = qemu_log_trylock(); 6053 if (logfile) { 6054 fprintf(logfile, "OP before indirect lowering:\n"); 6055 tcg_dump_ops(s, logfile, false); 6056 fprintf(logfile, "\n"); 6057 qemu_log_unlock(logfile); 6058 } 6059 } 6060 #endif 6061 /* Replace indirect temps with direct temps. */ 6062 if (liveness_pass_2(s)) { 6063 /* If changes were made, re-run liveness. */ 6064 liveness_pass_1(s); 6065 } 6066 } 6067 6068 #ifdef CONFIG_PROFILER 6069 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 6070 #endif 6071 6072 #ifdef DEBUG_DISAS 6073 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6074 && qemu_log_in_addr_range(pc_start))) { 6075 FILE *logfile = qemu_log_trylock(); 6076 if (logfile) { 6077 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6078 tcg_dump_ops(s, logfile, true); 6079 fprintf(logfile, "\n"); 6080 qemu_log_unlock(logfile); 6081 } 6082 } 6083 #endif 6084 6085 /* Initialize goto_tb jump offsets. */ 6086 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6087 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6088 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6089 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6090 6091 tcg_reg_alloc_start(s); 6092 6093 /* 6094 * Reset the buffer pointers when restarting after overflow. 6095 * TODO: Move this into translate-all.c with the rest of the 6096 * buffer management. Having only this done here is confusing. 6097 */ 6098 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6099 s->code_ptr = s->code_buf; 6100 6101 #ifdef TCG_TARGET_NEED_LDST_LABELS 6102 QSIMPLEQ_INIT(&s->ldst_labels); 6103 #endif 6104 #ifdef TCG_TARGET_NEED_POOL_LABELS 6105 s->pool_labels = NULL; 6106 #endif 6107 6108 num_insns = -1; 6109 QTAILQ_FOREACH(op, &s->ops, link) { 6110 TCGOpcode opc = op->opc; 6111 6112 #ifdef CONFIG_PROFILER 6113 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 6114 #endif 6115 6116 switch (opc) { 6117 case INDEX_op_mov_i32: 6118 case INDEX_op_mov_i64: 6119 case INDEX_op_mov_vec: 6120 tcg_reg_alloc_mov(s, op); 6121 break; 6122 case INDEX_op_dup_vec: 6123 tcg_reg_alloc_dup(s, op); 6124 break; 6125 case INDEX_op_insn_start: 6126 if (num_insns >= 0) { 6127 size_t off = tcg_current_code_size(s); 6128 s->gen_insn_end_off[num_insns] = off; 6129 /* Assert that we do not overflow our stored offset. */ 6130 assert(s->gen_insn_end_off[num_insns] == off); 6131 } 6132 num_insns++; 6133 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 6134 s->gen_insn_data[num_insns][i] = 6135 tcg_get_insn_start_param(op, i); 6136 } 6137 break; 6138 case INDEX_op_discard: 6139 temp_dead(s, arg_temp(op->args[0])); 6140 break; 6141 case INDEX_op_set_label: 6142 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6143 tcg_out_label(s, arg_label(op->args[0])); 6144 break; 6145 case INDEX_op_call: 6146 tcg_reg_alloc_call(s, op); 6147 break; 6148 case INDEX_op_exit_tb: 6149 tcg_out_exit_tb(s, op->args[0]); 6150 break; 6151 case INDEX_op_goto_tb: 6152 tcg_out_goto_tb(s, op->args[0]); 6153 break; 6154 case INDEX_op_dup2_vec: 6155 if (tcg_reg_alloc_dup2(s, op)) { 6156 break; 6157 } 6158 /* fall through */ 6159 default: 6160 /* Sanity check that we've not introduced any unhandled opcodes. */ 6161 tcg_debug_assert(tcg_op_supported(opc)); 6162 /* Note: in order to speed up the code, it would be much 6163 faster to have specialized register allocator functions for 6164 some common argument patterns */ 6165 tcg_reg_alloc_op(s, op); 6166 break; 6167 } 6168 /* Test for (pending) buffer overflow. The assumption is that any 6169 one operation beginning below the high water mark cannot overrun 6170 the buffer completely. Thus we can test for overflow after 6171 generating code without having to check during generation. */ 6172 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6173 return -1; 6174 } 6175 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6176 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6177 return -2; 6178 } 6179 } 6180 tcg_debug_assert(num_insns >= 0); 6181 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6182 6183 /* Generate TB finalization at the end of block */ 6184 #ifdef TCG_TARGET_NEED_LDST_LABELS 6185 i = tcg_out_ldst_finalize(s); 6186 if (i < 0) { 6187 return i; 6188 } 6189 #endif 6190 #ifdef TCG_TARGET_NEED_POOL_LABELS 6191 i = tcg_out_pool_finalize(s); 6192 if (i < 0) { 6193 return i; 6194 } 6195 #endif 6196 if (!tcg_resolve_relocs(s)) { 6197 return -2; 6198 } 6199 6200 #ifndef CONFIG_TCG_INTERPRETER 6201 /* flush instruction cache */ 6202 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6203 (uintptr_t)s->code_buf, 6204 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6205 #endif 6206 6207 return tcg_current_code_size(s); 6208 } 6209 6210 #ifdef CONFIG_PROFILER 6211 void tcg_dump_info(GString *buf) 6212 { 6213 TCGProfile prof = {}; 6214 const TCGProfile *s; 6215 int64_t tb_count; 6216 int64_t tb_div_count; 6217 int64_t tot; 6218 6219 tcg_profile_snapshot_counters(&prof); 6220 s = &prof; 6221 tb_count = s->tb_count; 6222 tb_div_count = tb_count ? tb_count : 1; 6223 tot = s->interm_time + s->code_time; 6224 6225 g_string_append_printf(buf, "JIT cycles %" PRId64 6226 " (%0.3f s at 2.4 GHz)\n", 6227 tot, tot / 2.4e9); 6228 g_string_append_printf(buf, "translated TBs %" PRId64 6229 " (aborted=%" PRId64 " %0.1f%%)\n", 6230 tb_count, s->tb_count1 - tb_count, 6231 (double)(s->tb_count1 - s->tb_count) 6232 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 6233 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 6234 (double)s->op_count / tb_div_count, s->op_count_max); 6235 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 6236 (double)s->del_op_count / tb_div_count); 6237 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 6238 (double)s->temp_count / tb_div_count, 6239 s->temp_count_max); 6240 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 6241 (double)s->code_out_len / tb_div_count); 6242 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 6243 (double)s->search_out_len / tb_div_count); 6244 6245 g_string_append_printf(buf, "cycles/op %0.1f\n", 6246 s->op_count ? (double)tot / s->op_count : 0); 6247 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 6248 s->code_in_len ? (double)tot / s->code_in_len : 0); 6249 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 6250 s->code_out_len ? (double)tot / s->code_out_len : 0); 6251 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 6252 s->search_out_len ? 6253 (double)tot / s->search_out_len : 0); 6254 if (tot == 0) { 6255 tot = 1; 6256 } 6257 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 6258 (double)s->interm_time / tot * 100.0); 6259 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 6260 (double)s->code_time / tot * 100.0); 6261 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 6262 (double)s->opt_time / (s->code_time ? 6263 s->code_time : 1) 6264 * 100.0); 6265 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 6266 (double)s->la_time / (s->code_time ? 6267 s->code_time : 1) * 100.0); 6268 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 6269 s->restore_count); 6270 g_string_append_printf(buf, " avg cycles %0.1f\n", 6271 s->restore_count ? 6272 (double)s->restore_time / s->restore_count : 0); 6273 } 6274 #else 6275 void tcg_dump_info(GString *buf) 6276 { 6277 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 6278 } 6279 #endif 6280 6281 #ifdef ELF_HOST_MACHINE 6282 /* In order to use this feature, the backend needs to do three things: 6283 6284 (1) Define ELF_HOST_MACHINE to indicate both what value to 6285 put into the ELF image and to indicate support for the feature. 6286 6287 (2) Define tcg_register_jit. This should create a buffer containing 6288 the contents of a .debug_frame section that describes the post- 6289 prologue unwind info for the tcg machine. 6290 6291 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6292 */ 6293 6294 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6295 typedef enum { 6296 JIT_NOACTION = 0, 6297 JIT_REGISTER_FN, 6298 JIT_UNREGISTER_FN 6299 } jit_actions_t; 6300 6301 struct jit_code_entry { 6302 struct jit_code_entry *next_entry; 6303 struct jit_code_entry *prev_entry; 6304 const void *symfile_addr; 6305 uint64_t symfile_size; 6306 }; 6307 6308 struct jit_descriptor { 6309 uint32_t version; 6310 uint32_t action_flag; 6311 struct jit_code_entry *relevant_entry; 6312 struct jit_code_entry *first_entry; 6313 }; 6314 6315 void __jit_debug_register_code(void) __attribute__((noinline)); 6316 void __jit_debug_register_code(void) 6317 { 6318 asm(""); 6319 } 6320 6321 /* Must statically initialize the version, because GDB may check 6322 the version before we can set it. */ 6323 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 6324 6325 /* End GDB interface. */ 6326 6327 static int find_string(const char *strtab, const char *str) 6328 { 6329 const char *p = strtab + 1; 6330 6331 while (1) { 6332 if (strcmp(p, str) == 0) { 6333 return p - strtab; 6334 } 6335 p += strlen(p) + 1; 6336 } 6337 } 6338 6339 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 6340 const void *debug_frame, 6341 size_t debug_frame_size) 6342 { 6343 struct __attribute__((packed)) DebugInfo { 6344 uint32_t len; 6345 uint16_t version; 6346 uint32_t abbrev; 6347 uint8_t ptr_size; 6348 uint8_t cu_die; 6349 uint16_t cu_lang; 6350 uintptr_t cu_low_pc; 6351 uintptr_t cu_high_pc; 6352 uint8_t fn_die; 6353 char fn_name[16]; 6354 uintptr_t fn_low_pc; 6355 uintptr_t fn_high_pc; 6356 uint8_t cu_eoc; 6357 }; 6358 6359 struct ElfImage { 6360 ElfW(Ehdr) ehdr; 6361 ElfW(Phdr) phdr; 6362 ElfW(Shdr) shdr[7]; 6363 ElfW(Sym) sym[2]; 6364 struct DebugInfo di; 6365 uint8_t da[24]; 6366 char str[80]; 6367 }; 6368 6369 struct ElfImage *img; 6370 6371 static const struct ElfImage img_template = { 6372 .ehdr = { 6373 .e_ident[EI_MAG0] = ELFMAG0, 6374 .e_ident[EI_MAG1] = ELFMAG1, 6375 .e_ident[EI_MAG2] = ELFMAG2, 6376 .e_ident[EI_MAG3] = ELFMAG3, 6377 .e_ident[EI_CLASS] = ELF_CLASS, 6378 .e_ident[EI_DATA] = ELF_DATA, 6379 .e_ident[EI_VERSION] = EV_CURRENT, 6380 .e_type = ET_EXEC, 6381 .e_machine = ELF_HOST_MACHINE, 6382 .e_version = EV_CURRENT, 6383 .e_phoff = offsetof(struct ElfImage, phdr), 6384 .e_shoff = offsetof(struct ElfImage, shdr), 6385 .e_ehsize = sizeof(ElfW(Shdr)), 6386 .e_phentsize = sizeof(ElfW(Phdr)), 6387 .e_phnum = 1, 6388 .e_shentsize = sizeof(ElfW(Shdr)), 6389 .e_shnum = ARRAY_SIZE(img->shdr), 6390 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 6391 #ifdef ELF_HOST_FLAGS 6392 .e_flags = ELF_HOST_FLAGS, 6393 #endif 6394 #ifdef ELF_OSABI 6395 .e_ident[EI_OSABI] = ELF_OSABI, 6396 #endif 6397 }, 6398 .phdr = { 6399 .p_type = PT_LOAD, 6400 .p_flags = PF_X, 6401 }, 6402 .shdr = { 6403 [0] = { .sh_type = SHT_NULL }, 6404 /* Trick: The contents of code_gen_buffer are not present in 6405 this fake ELF file; that got allocated elsewhere. Therefore 6406 we mark .text as SHT_NOBITS (similar to .bss) so that readers 6407 will not look for contents. We can record any address. */ 6408 [1] = { /* .text */ 6409 .sh_type = SHT_NOBITS, 6410 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 6411 }, 6412 [2] = { /* .debug_info */ 6413 .sh_type = SHT_PROGBITS, 6414 .sh_offset = offsetof(struct ElfImage, di), 6415 .sh_size = sizeof(struct DebugInfo), 6416 }, 6417 [3] = { /* .debug_abbrev */ 6418 .sh_type = SHT_PROGBITS, 6419 .sh_offset = offsetof(struct ElfImage, da), 6420 .sh_size = sizeof(img->da), 6421 }, 6422 [4] = { /* .debug_frame */ 6423 .sh_type = SHT_PROGBITS, 6424 .sh_offset = sizeof(struct ElfImage), 6425 }, 6426 [5] = { /* .symtab */ 6427 .sh_type = SHT_SYMTAB, 6428 .sh_offset = offsetof(struct ElfImage, sym), 6429 .sh_size = sizeof(img->sym), 6430 .sh_info = 1, 6431 .sh_link = ARRAY_SIZE(img->shdr) - 1, 6432 .sh_entsize = sizeof(ElfW(Sym)), 6433 }, 6434 [6] = { /* .strtab */ 6435 .sh_type = SHT_STRTAB, 6436 .sh_offset = offsetof(struct ElfImage, str), 6437 .sh_size = sizeof(img->str), 6438 } 6439 }, 6440 .sym = { 6441 [1] = { /* code_gen_buffer */ 6442 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 6443 .st_shndx = 1, 6444 } 6445 }, 6446 .di = { 6447 .len = sizeof(struct DebugInfo) - 4, 6448 .version = 2, 6449 .ptr_size = sizeof(void *), 6450 .cu_die = 1, 6451 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 6452 .fn_die = 2, 6453 .fn_name = "code_gen_buffer" 6454 }, 6455 .da = { 6456 1, /* abbrev number (the cu) */ 6457 0x11, 1, /* DW_TAG_compile_unit, has children */ 6458 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 6459 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6460 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6461 0, 0, /* end of abbrev */ 6462 2, /* abbrev number (the fn) */ 6463 0x2e, 0, /* DW_TAG_subprogram, no children */ 6464 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 6465 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6466 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6467 0, 0, /* end of abbrev */ 6468 0 /* no more abbrev */ 6469 }, 6470 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 6471 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 6472 }; 6473 6474 /* We only need a single jit entry; statically allocate it. */ 6475 static struct jit_code_entry one_entry; 6476 6477 uintptr_t buf = (uintptr_t)buf_ptr; 6478 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 6479 DebugFrameHeader *dfh; 6480 6481 img = g_malloc(img_size); 6482 *img = img_template; 6483 6484 img->phdr.p_vaddr = buf; 6485 img->phdr.p_paddr = buf; 6486 img->phdr.p_memsz = buf_size; 6487 6488 img->shdr[1].sh_name = find_string(img->str, ".text"); 6489 img->shdr[1].sh_addr = buf; 6490 img->shdr[1].sh_size = buf_size; 6491 6492 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 6493 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 6494 6495 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 6496 img->shdr[4].sh_size = debug_frame_size; 6497 6498 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 6499 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 6500 6501 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 6502 img->sym[1].st_value = buf; 6503 img->sym[1].st_size = buf_size; 6504 6505 img->di.cu_low_pc = buf; 6506 img->di.cu_high_pc = buf + buf_size; 6507 img->di.fn_low_pc = buf; 6508 img->di.fn_high_pc = buf + buf_size; 6509 6510 dfh = (DebugFrameHeader *)(img + 1); 6511 memcpy(dfh, debug_frame, debug_frame_size); 6512 dfh->fde.func_start = buf; 6513 dfh->fde.func_len = buf_size; 6514 6515 #ifdef DEBUG_JIT 6516 /* Enable this block to be able to debug the ELF image file creation. 6517 One can use readelf, objdump, or other inspection utilities. */ 6518 { 6519 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 6520 FILE *f = fopen(jit, "w+b"); 6521 if (f) { 6522 if (fwrite(img, img_size, 1, f) != img_size) { 6523 /* Avoid stupid unused return value warning for fwrite. */ 6524 } 6525 fclose(f); 6526 } 6527 } 6528 #endif 6529 6530 one_entry.symfile_addr = img; 6531 one_entry.symfile_size = img_size; 6532 6533 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 6534 __jit_debug_descriptor.relevant_entry = &one_entry; 6535 __jit_debug_descriptor.first_entry = &one_entry; 6536 __jit_debug_register_code(); 6537 } 6538 #else 6539 /* No support for the feature. Provide the entry point expected by exec.c, 6540 and implement the internal function we declared earlier. */ 6541 6542 static void tcg_register_jit_int(const void *buf, size_t size, 6543 const void *debug_frame, 6544 size_t debug_frame_size) 6545 { 6546 } 6547 6548 void tcg_register_jit(const void *buf, size_t buf_size) 6549 { 6550 } 6551 #endif /* ELF_HOST_MACHINE */ 6552 6553 #if !TCG_TARGET_MAYBE_vec 6554 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 6555 { 6556 g_assert_not_reached(); 6557 } 6558 #endif 6559