1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 /* Define to jump the ELF file used to communicate with GDB. */ 28 #undef DEBUG_JIT 29 30 #include "qemu/error-report.h" 31 #include "qemu/cutils.h" 32 #include "qemu/host-utils.h" 33 #include "qemu/qemu-print.h" 34 #include "qemu/cacheflush.h" 35 #include "qemu/cacheinfo.h" 36 #include "qemu/timer.h" 37 38 /* Note: the long term plan is to reduce the dependencies on the QEMU 39 CPU definitions. Currently they are used for qemu_ld/st 40 instructions */ 41 #define NO_CPU_IO_DEFS 42 43 #include "exec/exec-all.h" 44 #include "exec/tlb-common.h" 45 #include "tcg/tcg-op.h" 46 47 #if UINTPTR_MAX == UINT32_MAX 48 # define ELF_CLASS ELFCLASS32 49 #else 50 # define ELF_CLASS ELFCLASS64 51 #endif 52 #if HOST_BIG_ENDIAN 53 # define ELF_DATA ELFDATA2MSB 54 #else 55 # define ELF_DATA ELFDATA2LSB 56 #endif 57 58 #include "elf.h" 59 #include "exec/log.h" 60 #include "tcg/tcg-ldst.h" 61 #include "tcg/tcg-temp-internal.h" 62 #include "tcg-internal.h" 63 #include "accel/tcg/perf.h" 64 #ifdef CONFIG_USER_ONLY 65 #include "exec/user/guest-base.h" 66 #endif 67 68 /* Forward declarations for functions declared in tcg-target.c.inc and 69 used here. */ 70 static void tcg_target_init(TCGContext *s); 71 static void tcg_target_qemu_prologue(TCGContext *s); 72 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 73 intptr_t value, intptr_t addend); 74 75 /* The CIE and FDE header definitions will be common to all hosts. */ 76 typedef struct { 77 uint32_t len __attribute__((aligned((sizeof(void *))))); 78 uint32_t id; 79 uint8_t version; 80 char augmentation[1]; 81 uint8_t code_align; 82 uint8_t data_align; 83 uint8_t return_column; 84 } DebugFrameCIE; 85 86 typedef struct QEMU_PACKED { 87 uint32_t len __attribute__((aligned((sizeof(void *))))); 88 uint32_t cie_offset; 89 uintptr_t func_start; 90 uintptr_t func_len; 91 } DebugFrameFDEHeader; 92 93 typedef struct QEMU_PACKED { 94 DebugFrameCIE cie; 95 DebugFrameFDEHeader fde; 96 } DebugFrameHeader; 97 98 typedef struct TCGLabelQemuLdst { 99 bool is_ld; /* qemu_ld: true, qemu_st: false */ 100 MemOpIdx oi; 101 TCGType type; /* result type of a load */ 102 TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */ 103 TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */ 104 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 105 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 106 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 107 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 108 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 109 } TCGLabelQemuLdst; 110 111 static void tcg_register_jit_int(const void *buf, size_t size, 112 const void *debug_frame, 113 size_t debug_frame_size) 114 __attribute__((unused)); 115 116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 117 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 118 intptr_t arg2); 119 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 120 static void tcg_out_movi(TCGContext *s, TCGType type, 121 TCGReg ret, tcg_target_long arg); 122 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 123 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 131 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 132 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 133 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 134 static void tcg_out_goto_tb(TCGContext *s, int which); 135 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 136 const TCGArg args[TCG_MAX_OP_ARGS], 137 const int const_args[TCG_MAX_OP_ARGS]); 138 #if TCG_TARGET_MAYBE_vec 139 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 140 TCGReg dst, TCGReg src); 141 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 142 TCGReg dst, TCGReg base, intptr_t offset); 143 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 144 TCGReg dst, int64_t arg); 145 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 146 unsigned vecl, unsigned vece, 147 const TCGArg args[TCG_MAX_OP_ARGS], 148 const int const_args[TCG_MAX_OP_ARGS]); 149 #else 150 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 151 TCGReg dst, TCGReg src) 152 { 153 g_assert_not_reached(); 154 } 155 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 156 TCGReg dst, TCGReg base, intptr_t offset) 157 { 158 g_assert_not_reached(); 159 } 160 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 161 TCGReg dst, int64_t arg) 162 { 163 g_assert_not_reached(); 164 } 165 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 166 unsigned vecl, unsigned vece, 167 const TCGArg args[TCG_MAX_OP_ARGS], 168 const int const_args[TCG_MAX_OP_ARGS]) 169 { 170 g_assert_not_reached(); 171 } 172 #endif 173 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 174 intptr_t arg2); 175 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 176 TCGReg base, intptr_t ofs); 177 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 178 const TCGHelperInfo *info); 179 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 180 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 181 #ifdef TCG_TARGET_NEED_LDST_LABELS 182 static int tcg_out_ldst_finalize(TCGContext *s); 183 #endif 184 185 typedef struct TCGLdstHelperParam { 186 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg); 187 unsigned ntmp; 188 int tmp[3]; 189 } TCGLdstHelperParam; 190 191 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 192 const TCGLdstHelperParam *p) 193 __attribute__((unused)); 194 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l, 195 bool load_sign, const TCGLdstHelperParam *p) 196 __attribute__((unused)); 197 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l, 198 const TCGLdstHelperParam *p) 199 __attribute__((unused)); 200 201 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = { 202 [MO_UB] = helper_ldub_mmu, 203 [MO_SB] = helper_ldsb_mmu, 204 [MO_UW] = helper_lduw_mmu, 205 [MO_SW] = helper_ldsw_mmu, 206 [MO_UL] = helper_ldul_mmu, 207 [MO_UQ] = helper_ldq_mmu, 208 #if TCG_TARGET_REG_BITS == 64 209 [MO_SL] = helper_ldsl_mmu, 210 [MO_128] = helper_ld16_mmu, 211 #endif 212 }; 213 214 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = { 215 [MO_8] = helper_stb_mmu, 216 [MO_16] = helper_stw_mmu, 217 [MO_32] = helper_stl_mmu, 218 [MO_64] = helper_stq_mmu, 219 #if TCG_TARGET_REG_BITS == 64 220 [MO_128] = helper_st16_mmu, 221 #endif 222 }; 223 224 typedef struct { 225 MemOp atom; /* lg2 bits of atomicity required */ 226 MemOp align; /* lg2 bits of alignment to use */ 227 } TCGAtomAlign; 228 229 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 230 MemOp host_atom, bool allow_two_ops) 231 __attribute__((unused)); 232 233 TCGContext tcg_init_ctx; 234 __thread TCGContext *tcg_ctx; 235 236 TCGContext **tcg_ctxs; 237 unsigned int tcg_cur_ctxs; 238 unsigned int tcg_max_ctxs; 239 TCGv_env cpu_env = 0; 240 const void *tcg_code_gen_epilogue; 241 uintptr_t tcg_splitwx_diff; 242 243 #ifndef CONFIG_TCG_INTERPRETER 244 tcg_prologue_fn *tcg_qemu_tb_exec; 245 #endif 246 247 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 248 static TCGRegSet tcg_target_call_clobber_regs; 249 250 #if TCG_TARGET_INSN_UNIT_SIZE == 1 251 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 252 { 253 *s->code_ptr++ = v; 254 } 255 256 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 257 uint8_t v) 258 { 259 *p = v; 260 } 261 #endif 262 263 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 264 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 265 { 266 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 267 *s->code_ptr++ = v; 268 } else { 269 tcg_insn_unit *p = s->code_ptr; 270 memcpy(p, &v, sizeof(v)); 271 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 272 } 273 } 274 275 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 276 uint16_t v) 277 { 278 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 279 *p = v; 280 } else { 281 memcpy(p, &v, sizeof(v)); 282 } 283 } 284 #endif 285 286 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 287 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 288 { 289 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 290 *s->code_ptr++ = v; 291 } else { 292 tcg_insn_unit *p = s->code_ptr; 293 memcpy(p, &v, sizeof(v)); 294 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 295 } 296 } 297 298 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 299 uint32_t v) 300 { 301 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 302 *p = v; 303 } else { 304 memcpy(p, &v, sizeof(v)); 305 } 306 } 307 #endif 308 309 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 310 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 311 { 312 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 313 *s->code_ptr++ = v; 314 } else { 315 tcg_insn_unit *p = s->code_ptr; 316 memcpy(p, &v, sizeof(v)); 317 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 318 } 319 } 320 321 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 322 uint64_t v) 323 { 324 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 325 *p = v; 326 } else { 327 memcpy(p, &v, sizeof(v)); 328 } 329 } 330 #endif 331 332 /* label relocation processing */ 333 334 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 335 TCGLabel *l, intptr_t addend) 336 { 337 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 338 339 r->type = type; 340 r->ptr = code_ptr; 341 r->addend = addend; 342 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 343 } 344 345 static void tcg_out_label(TCGContext *s, TCGLabel *l) 346 { 347 tcg_debug_assert(!l->has_value); 348 l->has_value = 1; 349 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 350 } 351 352 TCGLabel *gen_new_label(void) 353 { 354 TCGContext *s = tcg_ctx; 355 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 356 357 memset(l, 0, sizeof(TCGLabel)); 358 l->id = s->nb_labels++; 359 QSIMPLEQ_INIT(&l->branches); 360 QSIMPLEQ_INIT(&l->relocs); 361 362 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 363 364 return l; 365 } 366 367 static bool tcg_resolve_relocs(TCGContext *s) 368 { 369 TCGLabel *l; 370 371 QSIMPLEQ_FOREACH(l, &s->labels, next) { 372 TCGRelocation *r; 373 uintptr_t value = l->u.value; 374 375 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 376 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 377 return false; 378 } 379 } 380 } 381 return true; 382 } 383 384 static void set_jmp_reset_offset(TCGContext *s, int which) 385 { 386 /* 387 * We will check for overflow at the end of the opcode loop in 388 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 389 */ 390 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 391 } 392 393 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 394 { 395 /* 396 * We will check for overflow at the end of the opcode loop in 397 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 398 */ 399 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 400 } 401 402 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 403 { 404 /* 405 * Return the read-execute version of the pointer, for the benefit 406 * of any pc-relative addressing mode. 407 */ 408 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 409 } 410 411 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER) 412 static int tlb_mask_table_ofs(TCGContext *s, int which) 413 { 414 return s->tlb_fast_offset + which * sizeof(CPUTLBDescFast); 415 } 416 #endif 417 418 /* Signal overflow, starting over with fewer guest insns. */ 419 static G_NORETURN 420 void tcg_raise_tb_overflow(TCGContext *s) 421 { 422 siglongjmp(s->jmp_trans, -2); 423 } 424 425 /* 426 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext. 427 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg. 428 * 429 * However, tcg_out_helper_load_slots reuses this field to hold an 430 * argument slot number (which may designate a argument register or an 431 * argument stack slot), converting to TCGReg once all arguments that 432 * are destined for the stack are processed. 433 */ 434 typedef struct TCGMovExtend { 435 unsigned dst; 436 TCGReg src; 437 TCGType dst_type; 438 TCGType src_type; 439 MemOp src_ext; 440 } TCGMovExtend; 441 442 /** 443 * tcg_out_movext -- move and extend 444 * @s: tcg context 445 * @dst_type: integral type for destination 446 * @dst: destination register 447 * @src_type: integral type for source 448 * @src_ext: extension to apply to source 449 * @src: source register 450 * 451 * Move or extend @src into @dst, depending on @src_ext and the types. 452 */ 453 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 454 TCGType src_type, MemOp src_ext, TCGReg src) 455 { 456 switch (src_ext) { 457 case MO_UB: 458 tcg_out_ext8u(s, dst, src); 459 break; 460 case MO_SB: 461 tcg_out_ext8s(s, dst_type, dst, src); 462 break; 463 case MO_UW: 464 tcg_out_ext16u(s, dst, src); 465 break; 466 case MO_SW: 467 tcg_out_ext16s(s, dst_type, dst, src); 468 break; 469 case MO_UL: 470 case MO_SL: 471 if (dst_type == TCG_TYPE_I32) { 472 if (src_type == TCG_TYPE_I32) { 473 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 474 } else { 475 tcg_out_extrl_i64_i32(s, dst, src); 476 } 477 } else if (src_type == TCG_TYPE_I32) { 478 if (src_ext & MO_SIGN) { 479 tcg_out_exts_i32_i64(s, dst, src); 480 } else { 481 tcg_out_extu_i32_i64(s, dst, src); 482 } 483 } else { 484 if (src_ext & MO_SIGN) { 485 tcg_out_ext32s(s, dst, src); 486 } else { 487 tcg_out_ext32u(s, dst, src); 488 } 489 } 490 break; 491 case MO_UQ: 492 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 493 if (dst_type == TCG_TYPE_I32) { 494 tcg_out_extrl_i64_i32(s, dst, src); 495 } else { 496 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 497 } 498 break; 499 default: 500 g_assert_not_reached(); 501 } 502 } 503 504 /* Minor variations on a theme, using a structure. */ 505 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 506 TCGReg src) 507 { 508 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 509 } 510 511 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 512 { 513 tcg_out_movext1_new_src(s, i, i->src); 514 } 515 516 /** 517 * tcg_out_movext2 -- move and extend two pair 518 * @s: tcg context 519 * @i1: first move description 520 * @i2: second move description 521 * @scratch: temporary register, or -1 for none 522 * 523 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 524 * between the sources and destinations. 525 */ 526 527 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 528 const TCGMovExtend *i2, int scratch) 529 { 530 TCGReg src1 = i1->src; 531 TCGReg src2 = i2->src; 532 533 if (i1->dst != src2) { 534 tcg_out_movext1(s, i1); 535 tcg_out_movext1(s, i2); 536 return; 537 } 538 if (i2->dst == src1) { 539 TCGType src1_type = i1->src_type; 540 TCGType src2_type = i2->src_type; 541 542 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 543 /* The data is now in the correct registers, now extend. */ 544 src1 = i2->src; 545 src2 = i1->src; 546 } else { 547 tcg_debug_assert(scratch >= 0); 548 tcg_out_mov(s, src1_type, scratch, src1); 549 src1 = scratch; 550 } 551 } 552 tcg_out_movext1_new_src(s, i2, src2); 553 tcg_out_movext1_new_src(s, i1, src1); 554 } 555 556 /** 557 * tcg_out_movext3 -- move and extend three pair 558 * @s: tcg context 559 * @i1: first move description 560 * @i2: second move description 561 * @i3: third move description 562 * @scratch: temporary register, or -1 for none 563 * 564 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap 565 * between the sources and destinations. 566 */ 567 568 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, 569 const TCGMovExtend *i2, const TCGMovExtend *i3, 570 int scratch) 571 { 572 TCGReg src1 = i1->src; 573 TCGReg src2 = i2->src; 574 TCGReg src3 = i3->src; 575 576 if (i1->dst != src2 && i1->dst != src3) { 577 tcg_out_movext1(s, i1); 578 tcg_out_movext2(s, i2, i3, scratch); 579 return; 580 } 581 if (i2->dst != src1 && i2->dst != src3) { 582 tcg_out_movext1(s, i2); 583 tcg_out_movext2(s, i1, i3, scratch); 584 return; 585 } 586 if (i3->dst != src1 && i3->dst != src2) { 587 tcg_out_movext1(s, i3); 588 tcg_out_movext2(s, i1, i2, scratch); 589 return; 590 } 591 592 /* 593 * There is a cycle. Since there are only 3 nodes, the cycle is 594 * either "clockwise" or "anti-clockwise", and can be solved with 595 * a single scratch or two xchg. 596 */ 597 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) { 598 /* "Clockwise" */ 599 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) { 600 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3); 601 /* The data is now in the correct registers, now extend. */ 602 tcg_out_movext1_new_src(s, i1, i1->dst); 603 tcg_out_movext1_new_src(s, i2, i2->dst); 604 tcg_out_movext1_new_src(s, i3, i3->dst); 605 } else { 606 tcg_debug_assert(scratch >= 0); 607 tcg_out_mov(s, i1->src_type, scratch, src1); 608 tcg_out_movext1(s, i3); 609 tcg_out_movext1(s, i2); 610 tcg_out_movext1_new_src(s, i1, scratch); 611 } 612 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) { 613 /* "Anti-clockwise" */ 614 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) { 615 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2); 616 /* The data is now in the correct registers, now extend. */ 617 tcg_out_movext1_new_src(s, i1, i1->dst); 618 tcg_out_movext1_new_src(s, i2, i2->dst); 619 tcg_out_movext1_new_src(s, i3, i3->dst); 620 } else { 621 tcg_debug_assert(scratch >= 0); 622 tcg_out_mov(s, i1->src_type, scratch, src1); 623 tcg_out_movext1(s, i2); 624 tcg_out_movext1(s, i3); 625 tcg_out_movext1_new_src(s, i1, scratch); 626 } 627 } else { 628 g_assert_not_reached(); 629 } 630 } 631 632 #define C_PFX1(P, A) P##A 633 #define C_PFX2(P, A, B) P##A##_##B 634 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 635 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 636 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 637 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 638 639 /* Define an enumeration for the various combinations. */ 640 641 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 642 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 643 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 644 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 645 646 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 647 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 648 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 649 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 650 651 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 652 653 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 654 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 655 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 656 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 657 658 typedef enum { 659 #include "tcg-target-con-set.h" 660 } TCGConstraintSetIndex; 661 662 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 663 664 #undef C_O0_I1 665 #undef C_O0_I2 666 #undef C_O0_I3 667 #undef C_O0_I4 668 #undef C_O1_I1 669 #undef C_O1_I2 670 #undef C_O1_I3 671 #undef C_O1_I4 672 #undef C_N1_I2 673 #undef C_O2_I1 674 #undef C_O2_I2 675 #undef C_O2_I3 676 #undef C_O2_I4 677 678 /* Put all of the constraint sets into an array, indexed by the enum. */ 679 680 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 681 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 682 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 683 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 684 685 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 686 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 687 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 688 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 689 690 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 691 692 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 693 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 694 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 695 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 696 697 static const TCGTargetOpDef constraint_sets[] = { 698 #include "tcg-target-con-set.h" 699 }; 700 701 702 #undef C_O0_I1 703 #undef C_O0_I2 704 #undef C_O0_I3 705 #undef C_O0_I4 706 #undef C_O1_I1 707 #undef C_O1_I2 708 #undef C_O1_I3 709 #undef C_O1_I4 710 #undef C_N1_I2 711 #undef C_O2_I1 712 #undef C_O2_I2 713 #undef C_O2_I3 714 #undef C_O2_I4 715 716 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 717 718 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 719 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 720 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 721 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 722 723 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 724 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 725 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 726 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 727 728 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 729 730 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 731 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 732 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 733 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 734 735 #include "tcg-target.c.inc" 736 737 static void alloc_tcg_plugin_context(TCGContext *s) 738 { 739 #ifdef CONFIG_PLUGIN 740 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 741 s->plugin_tb->insns = 742 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 743 #endif 744 } 745 746 /* 747 * All TCG threads except the parent (i.e. the one that called tcg_context_init 748 * and registered the target's TCG globals) must register with this function 749 * before initiating translation. 750 * 751 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 752 * of tcg_region_init() for the reasoning behind this. 753 * 754 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 755 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 756 * is not used anymore for translation once this function is called. 757 * 758 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 759 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 760 */ 761 #ifdef CONFIG_USER_ONLY 762 void tcg_register_thread(void) 763 { 764 tcg_ctx = &tcg_init_ctx; 765 } 766 #else 767 void tcg_register_thread(void) 768 { 769 TCGContext *s = g_malloc(sizeof(*s)); 770 unsigned int i, n; 771 772 *s = tcg_init_ctx; 773 774 /* Relink mem_base. */ 775 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 776 if (tcg_init_ctx.temps[i].mem_base) { 777 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 778 tcg_debug_assert(b >= 0 && b < n); 779 s->temps[i].mem_base = &s->temps[b]; 780 } 781 } 782 783 /* Claim an entry in tcg_ctxs */ 784 n = qatomic_fetch_inc(&tcg_cur_ctxs); 785 g_assert(n < tcg_max_ctxs); 786 qatomic_set(&tcg_ctxs[n], s); 787 788 if (n > 0) { 789 alloc_tcg_plugin_context(s); 790 tcg_region_initial_alloc(s); 791 } 792 793 tcg_ctx = s; 794 } 795 #endif /* !CONFIG_USER_ONLY */ 796 797 /* pool based memory allocation */ 798 void *tcg_malloc_internal(TCGContext *s, int size) 799 { 800 TCGPool *p; 801 int pool_size; 802 803 if (size > TCG_POOL_CHUNK_SIZE) { 804 /* big malloc: insert a new pool (XXX: could optimize) */ 805 p = g_malloc(sizeof(TCGPool) + size); 806 p->size = size; 807 p->next = s->pool_first_large; 808 s->pool_first_large = p; 809 return p->data; 810 } else { 811 p = s->pool_current; 812 if (!p) { 813 p = s->pool_first; 814 if (!p) 815 goto new_pool; 816 } else { 817 if (!p->next) { 818 new_pool: 819 pool_size = TCG_POOL_CHUNK_SIZE; 820 p = g_malloc(sizeof(TCGPool) + pool_size); 821 p->size = pool_size; 822 p->next = NULL; 823 if (s->pool_current) { 824 s->pool_current->next = p; 825 } else { 826 s->pool_first = p; 827 } 828 } else { 829 p = p->next; 830 } 831 } 832 } 833 s->pool_current = p; 834 s->pool_cur = p->data + size; 835 s->pool_end = p->data + p->size; 836 return p->data; 837 } 838 839 void tcg_pool_reset(TCGContext *s) 840 { 841 TCGPool *p, *t; 842 for (p = s->pool_first_large; p; p = t) { 843 t = p->next; 844 g_free(p); 845 } 846 s->pool_first_large = NULL; 847 s->pool_cur = s->pool_end = NULL; 848 s->pool_current = NULL; 849 } 850 851 #include "exec/helper-proto.h" 852 853 static TCGHelperInfo all_helpers[] = { 854 #include "exec/helper-tcg.h" 855 }; 856 static GHashTable *helper_table; 857 858 /* 859 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions, 860 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N. 861 * We only use these for layout in tcg_out_ld_helper_ret and 862 * tcg_out_st_helper_args, and share them between several of 863 * the helpers, with the end result that it's easier to build manually. 864 */ 865 866 #if TCG_TARGET_REG_BITS == 32 867 # define dh_typecode_ttl dh_typecode_i32 868 #else 869 # define dh_typecode_ttl dh_typecode_i64 870 #endif 871 872 static TCGHelperInfo info_helper_ld32_mmu = { 873 .flags = TCG_CALL_NO_WG, 874 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */ 875 | dh_typemask(env, 1) 876 | dh_typemask(i64, 2) /* uint64_t addr */ 877 | dh_typemask(i32, 3) /* unsigned oi */ 878 | dh_typemask(ptr, 4) /* uintptr_t ra */ 879 }; 880 881 static TCGHelperInfo info_helper_ld64_mmu = { 882 .flags = TCG_CALL_NO_WG, 883 .typemask = dh_typemask(i64, 0) /* return uint64_t */ 884 | dh_typemask(env, 1) 885 | dh_typemask(i64, 2) /* uint64_t addr */ 886 | dh_typemask(i32, 3) /* unsigned oi */ 887 | dh_typemask(ptr, 4) /* uintptr_t ra */ 888 }; 889 890 static TCGHelperInfo info_helper_ld128_mmu = { 891 .flags = TCG_CALL_NO_WG, 892 .typemask = dh_typemask(i128, 0) /* return Int128 */ 893 | dh_typemask(env, 1) 894 | dh_typemask(i64, 2) /* uint64_t addr */ 895 | dh_typemask(i32, 3) /* unsigned oi */ 896 | dh_typemask(ptr, 4) /* uintptr_t ra */ 897 }; 898 899 static TCGHelperInfo info_helper_st32_mmu = { 900 .flags = TCG_CALL_NO_WG, 901 .typemask = dh_typemask(void, 0) 902 | dh_typemask(env, 1) 903 | dh_typemask(i64, 2) /* uint64_t addr */ 904 | dh_typemask(i32, 3) /* uint32_t data */ 905 | dh_typemask(i32, 4) /* unsigned oi */ 906 | dh_typemask(ptr, 5) /* uintptr_t ra */ 907 }; 908 909 static TCGHelperInfo info_helper_st64_mmu = { 910 .flags = TCG_CALL_NO_WG, 911 .typemask = dh_typemask(void, 0) 912 | dh_typemask(env, 1) 913 | dh_typemask(i64, 2) /* uint64_t addr */ 914 | dh_typemask(i64, 3) /* uint64_t data */ 915 | dh_typemask(i32, 4) /* unsigned oi */ 916 | dh_typemask(ptr, 5) /* uintptr_t ra */ 917 }; 918 919 static TCGHelperInfo info_helper_st128_mmu = { 920 .flags = TCG_CALL_NO_WG, 921 .typemask = dh_typemask(void, 0) 922 | dh_typemask(env, 1) 923 | dh_typemask(i64, 2) /* uint64_t addr */ 924 | dh_typemask(i128, 3) /* Int128 data */ 925 | dh_typemask(i32, 4) /* unsigned oi */ 926 | dh_typemask(ptr, 5) /* uintptr_t ra */ 927 }; 928 929 #ifdef CONFIG_TCG_INTERPRETER 930 static ffi_type *typecode_to_ffi(int argmask) 931 { 932 /* 933 * libffi does not support __int128_t, so we have forced Int128 934 * to use the structure definition instead of the builtin type. 935 */ 936 static ffi_type *ffi_type_i128_elements[3] = { 937 &ffi_type_uint64, 938 &ffi_type_uint64, 939 NULL 940 }; 941 static ffi_type ffi_type_i128 = { 942 .size = 16, 943 .alignment = __alignof__(Int128), 944 .type = FFI_TYPE_STRUCT, 945 .elements = ffi_type_i128_elements, 946 }; 947 948 switch (argmask) { 949 case dh_typecode_void: 950 return &ffi_type_void; 951 case dh_typecode_i32: 952 return &ffi_type_uint32; 953 case dh_typecode_s32: 954 return &ffi_type_sint32; 955 case dh_typecode_i64: 956 return &ffi_type_uint64; 957 case dh_typecode_s64: 958 return &ffi_type_sint64; 959 case dh_typecode_ptr: 960 return &ffi_type_pointer; 961 case dh_typecode_i128: 962 return &ffi_type_i128; 963 } 964 g_assert_not_reached(); 965 } 966 967 static void init_ffi_layouts(void) 968 { 969 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 970 GHashTable *ffi_table = g_hash_table_new(NULL, NULL); 971 972 for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 973 TCGHelperInfo *info = &all_helpers[i]; 974 unsigned typemask = info->typemask; 975 gpointer hash = (gpointer)(uintptr_t)typemask; 976 struct { 977 ffi_cif cif; 978 ffi_type *args[]; 979 } *ca; 980 ffi_status status; 981 int nargs; 982 ffi_cif *cif; 983 984 cif = g_hash_table_lookup(ffi_table, hash); 985 if (cif) { 986 info->cif = cif; 987 continue; 988 } 989 990 /* Ignoring the return type, find the last non-zero field. */ 991 nargs = 32 - clz32(typemask >> 3); 992 nargs = DIV_ROUND_UP(nargs, 3); 993 assert(nargs <= MAX_CALL_IARGS); 994 995 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 996 ca->cif.rtype = typecode_to_ffi(typemask & 7); 997 ca->cif.nargs = nargs; 998 999 if (nargs != 0) { 1000 ca->cif.arg_types = ca->args; 1001 for (int j = 0; j < nargs; ++j) { 1002 int typecode = extract32(typemask, (j + 1) * 3, 3); 1003 ca->args[j] = typecode_to_ffi(typecode); 1004 } 1005 } 1006 1007 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 1008 ca->cif.rtype, ca->cif.arg_types); 1009 assert(status == FFI_OK); 1010 1011 cif = &ca->cif; 1012 info->cif = cif; 1013 g_hash_table_insert(ffi_table, hash, (gpointer)cif); 1014 } 1015 1016 g_hash_table_destroy(ffi_table); 1017 } 1018 #endif /* CONFIG_TCG_INTERPRETER */ 1019 1020 static inline bool arg_slot_reg_p(unsigned arg_slot) 1021 { 1022 /* 1023 * Split the sizeof away from the comparison to avoid Werror from 1024 * "unsigned < 0 is always false", when iarg_regs is empty. 1025 */ 1026 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 1027 return arg_slot < nreg; 1028 } 1029 1030 static inline int arg_slot_stk_ofs(unsigned arg_slot) 1031 { 1032 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1033 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 1034 1035 tcg_debug_assert(stk_slot < max); 1036 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 1037 } 1038 1039 typedef struct TCGCumulativeArgs { 1040 int arg_idx; /* tcg_gen_callN args[] */ 1041 int info_in_idx; /* TCGHelperInfo in[] */ 1042 int arg_slot; /* regs+stack slot */ 1043 int ref_slot; /* stack slots for references */ 1044 } TCGCumulativeArgs; 1045 1046 static void layout_arg_even(TCGCumulativeArgs *cum) 1047 { 1048 cum->arg_slot += cum->arg_slot & 1; 1049 } 1050 1051 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 1052 TCGCallArgumentKind kind) 1053 { 1054 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1055 1056 *loc = (TCGCallArgumentLoc){ 1057 .kind = kind, 1058 .arg_idx = cum->arg_idx, 1059 .arg_slot = cum->arg_slot, 1060 }; 1061 cum->info_in_idx++; 1062 cum->arg_slot++; 1063 } 1064 1065 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 1066 TCGHelperInfo *info, int n) 1067 { 1068 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1069 1070 for (int i = 0; i < n; ++i) { 1071 /* Layout all using the same arg_idx, adjusting the subindex. */ 1072 loc[i] = (TCGCallArgumentLoc){ 1073 .kind = TCG_CALL_ARG_NORMAL, 1074 .arg_idx = cum->arg_idx, 1075 .tmp_subindex = i, 1076 .arg_slot = cum->arg_slot + i, 1077 }; 1078 } 1079 cum->info_in_idx += n; 1080 cum->arg_slot += n; 1081 } 1082 1083 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 1084 { 1085 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 1086 int n = 128 / TCG_TARGET_REG_BITS; 1087 1088 /* The first subindex carries the pointer. */ 1089 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 1090 1091 /* 1092 * The callee is allowed to clobber memory associated with 1093 * structure pass by-reference. Therefore we must make copies. 1094 * Allocate space from "ref_slot", which will be adjusted to 1095 * follow the parameters on the stack. 1096 */ 1097 loc[0].ref_slot = cum->ref_slot; 1098 1099 /* 1100 * Subsequent words also go into the reference slot, but 1101 * do not accumulate into the regular arguments. 1102 */ 1103 for (int i = 1; i < n; ++i) { 1104 loc[i] = (TCGCallArgumentLoc){ 1105 .kind = TCG_CALL_ARG_BY_REF_N, 1106 .arg_idx = cum->arg_idx, 1107 .tmp_subindex = i, 1108 .ref_slot = cum->ref_slot + i, 1109 }; 1110 } 1111 cum->info_in_idx += n; 1112 cum->ref_slot += n; 1113 } 1114 1115 static void init_call_layout(TCGHelperInfo *info) 1116 { 1117 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 1118 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 1119 unsigned typemask = info->typemask; 1120 unsigned typecode; 1121 TCGCumulativeArgs cum = { }; 1122 1123 /* 1124 * Parse and place any function return value. 1125 */ 1126 typecode = typemask & 7; 1127 switch (typecode) { 1128 case dh_typecode_void: 1129 info->nr_out = 0; 1130 break; 1131 case dh_typecode_i32: 1132 case dh_typecode_s32: 1133 case dh_typecode_ptr: 1134 info->nr_out = 1; 1135 info->out_kind = TCG_CALL_RET_NORMAL; 1136 break; 1137 case dh_typecode_i64: 1138 case dh_typecode_s64: 1139 info->nr_out = 64 / TCG_TARGET_REG_BITS; 1140 info->out_kind = TCG_CALL_RET_NORMAL; 1141 /* Query the last register now to trigger any assert early. */ 1142 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1143 break; 1144 case dh_typecode_i128: 1145 info->nr_out = 128 / TCG_TARGET_REG_BITS; 1146 info->out_kind = TCG_TARGET_CALL_RET_I128; 1147 switch (TCG_TARGET_CALL_RET_I128) { 1148 case TCG_CALL_RET_NORMAL: 1149 /* Query the last register now to trigger any assert early. */ 1150 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 1151 break; 1152 case TCG_CALL_RET_BY_VEC: 1153 /* Query the single register now to trigger any assert early. */ 1154 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 1155 break; 1156 case TCG_CALL_RET_BY_REF: 1157 /* 1158 * Allocate the first argument to the output. 1159 * We don't need to store this anywhere, just make it 1160 * unavailable for use in the input loop below. 1161 */ 1162 cum.arg_slot = 1; 1163 break; 1164 default: 1165 qemu_build_not_reached(); 1166 } 1167 break; 1168 default: 1169 g_assert_not_reached(); 1170 } 1171 1172 /* 1173 * Parse and place function arguments. 1174 */ 1175 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 1176 TCGCallArgumentKind kind; 1177 TCGType type; 1178 1179 typecode = typemask & 7; 1180 switch (typecode) { 1181 case dh_typecode_i32: 1182 case dh_typecode_s32: 1183 type = TCG_TYPE_I32; 1184 break; 1185 case dh_typecode_i64: 1186 case dh_typecode_s64: 1187 type = TCG_TYPE_I64; 1188 break; 1189 case dh_typecode_ptr: 1190 type = TCG_TYPE_PTR; 1191 break; 1192 case dh_typecode_i128: 1193 type = TCG_TYPE_I128; 1194 break; 1195 default: 1196 g_assert_not_reached(); 1197 } 1198 1199 switch (type) { 1200 case TCG_TYPE_I32: 1201 switch (TCG_TARGET_CALL_ARG_I32) { 1202 case TCG_CALL_ARG_EVEN: 1203 layout_arg_even(&cum); 1204 /* fall through */ 1205 case TCG_CALL_ARG_NORMAL: 1206 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1207 break; 1208 case TCG_CALL_ARG_EXTEND: 1209 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 1210 layout_arg_1(&cum, info, kind); 1211 break; 1212 default: 1213 qemu_build_not_reached(); 1214 } 1215 break; 1216 1217 case TCG_TYPE_I64: 1218 switch (TCG_TARGET_CALL_ARG_I64) { 1219 case TCG_CALL_ARG_EVEN: 1220 layout_arg_even(&cum); 1221 /* fall through */ 1222 case TCG_CALL_ARG_NORMAL: 1223 if (TCG_TARGET_REG_BITS == 32) { 1224 layout_arg_normal_n(&cum, info, 2); 1225 } else { 1226 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1227 } 1228 break; 1229 default: 1230 qemu_build_not_reached(); 1231 } 1232 break; 1233 1234 case TCG_TYPE_I128: 1235 switch (TCG_TARGET_CALL_ARG_I128) { 1236 case TCG_CALL_ARG_EVEN: 1237 layout_arg_even(&cum); 1238 /* fall through */ 1239 case TCG_CALL_ARG_NORMAL: 1240 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1241 break; 1242 case TCG_CALL_ARG_BY_REF: 1243 layout_arg_by_ref(&cum, info); 1244 break; 1245 default: 1246 qemu_build_not_reached(); 1247 } 1248 break; 1249 1250 default: 1251 g_assert_not_reached(); 1252 } 1253 } 1254 info->nr_in = cum.info_in_idx; 1255 1256 /* Validate that we didn't overrun the input array. */ 1257 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1258 /* Validate the backend has enough argument space. */ 1259 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1260 1261 /* 1262 * Relocate the "ref_slot" area to the end of the parameters. 1263 * Minimizing this stack offset helps code size for x86, 1264 * which has a signed 8-bit offset encoding. 1265 */ 1266 if (cum.ref_slot != 0) { 1267 int ref_base = 0; 1268 1269 if (cum.arg_slot > max_reg_slots) { 1270 int align = __alignof(Int128) / sizeof(tcg_target_long); 1271 1272 ref_base = cum.arg_slot - max_reg_slots; 1273 if (align > 1) { 1274 ref_base = ROUND_UP(ref_base, align); 1275 } 1276 } 1277 assert(ref_base + cum.ref_slot <= max_stk_slots); 1278 ref_base += max_reg_slots; 1279 1280 if (ref_base != 0) { 1281 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1282 TCGCallArgumentLoc *loc = &info->in[i]; 1283 switch (loc->kind) { 1284 case TCG_CALL_ARG_BY_REF: 1285 case TCG_CALL_ARG_BY_REF_N: 1286 loc->ref_slot += ref_base; 1287 break; 1288 default: 1289 break; 1290 } 1291 } 1292 } 1293 } 1294 } 1295 1296 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1297 static void process_op_defs(TCGContext *s); 1298 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1299 TCGReg reg, const char *name); 1300 1301 static void tcg_context_init(unsigned max_cpus) 1302 { 1303 TCGContext *s = &tcg_init_ctx; 1304 int op, total_args, n, i; 1305 TCGOpDef *def; 1306 TCGArgConstraint *args_ct; 1307 TCGTemp *ts; 1308 1309 memset(s, 0, sizeof(*s)); 1310 s->nb_globals = 0; 1311 1312 /* Count total number of arguments and allocate the corresponding 1313 space */ 1314 total_args = 0; 1315 for(op = 0; op < NB_OPS; op++) { 1316 def = &tcg_op_defs[op]; 1317 n = def->nb_iargs + def->nb_oargs; 1318 total_args += n; 1319 } 1320 1321 args_ct = g_new0(TCGArgConstraint, total_args); 1322 1323 for(op = 0; op < NB_OPS; op++) { 1324 def = &tcg_op_defs[op]; 1325 def->args_ct = args_ct; 1326 n = def->nb_iargs + def->nb_oargs; 1327 args_ct += n; 1328 } 1329 1330 /* Register helpers. */ 1331 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 1332 helper_table = g_hash_table_new(NULL, NULL); 1333 1334 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 1335 init_call_layout(&all_helpers[i]); 1336 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 1337 (gpointer)&all_helpers[i]); 1338 } 1339 1340 init_call_layout(&info_helper_ld32_mmu); 1341 init_call_layout(&info_helper_ld64_mmu); 1342 init_call_layout(&info_helper_ld128_mmu); 1343 init_call_layout(&info_helper_st32_mmu); 1344 init_call_layout(&info_helper_st64_mmu); 1345 init_call_layout(&info_helper_st128_mmu); 1346 1347 #ifdef CONFIG_TCG_INTERPRETER 1348 init_ffi_layouts(); 1349 #endif 1350 1351 tcg_target_init(s); 1352 process_op_defs(s); 1353 1354 /* Reverse the order of the saved registers, assuming they're all at 1355 the start of tcg_target_reg_alloc_order. */ 1356 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1357 int r = tcg_target_reg_alloc_order[n]; 1358 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1359 break; 1360 } 1361 } 1362 for (i = 0; i < n; ++i) { 1363 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1364 } 1365 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1366 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1367 } 1368 1369 alloc_tcg_plugin_context(s); 1370 1371 tcg_ctx = s; 1372 /* 1373 * In user-mode we simply share the init context among threads, since we 1374 * use a single region. See the documentation tcg_region_init() for the 1375 * reasoning behind this. 1376 * In softmmu we will have at most max_cpus TCG threads. 1377 */ 1378 #ifdef CONFIG_USER_ONLY 1379 tcg_ctxs = &tcg_ctx; 1380 tcg_cur_ctxs = 1; 1381 tcg_max_ctxs = 1; 1382 #else 1383 tcg_max_ctxs = max_cpus; 1384 tcg_ctxs = g_new0(TCGContext *, max_cpus); 1385 #endif 1386 1387 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1388 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1389 cpu_env = temp_tcgv_ptr(ts); 1390 } 1391 1392 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 1393 { 1394 tcg_context_init(max_cpus); 1395 tcg_region_init(tb_size, splitwx, max_cpus); 1396 } 1397 1398 /* 1399 * Allocate TBs right before their corresponding translated code, making 1400 * sure that TBs and code are on different cache lines. 1401 */ 1402 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1403 { 1404 uintptr_t align = qemu_icache_linesize; 1405 TranslationBlock *tb; 1406 void *next; 1407 1408 retry: 1409 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1410 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1411 1412 if (unlikely(next > s->code_gen_highwater)) { 1413 if (tcg_region_alloc(s)) { 1414 return NULL; 1415 } 1416 goto retry; 1417 } 1418 qatomic_set(&s->code_gen_ptr, next); 1419 s->data_gen_ptr = NULL; 1420 return tb; 1421 } 1422 1423 void tcg_prologue_init(TCGContext *s) 1424 { 1425 size_t prologue_size; 1426 1427 s->code_ptr = s->code_gen_ptr; 1428 s->code_buf = s->code_gen_ptr; 1429 s->data_gen_ptr = NULL; 1430 1431 #ifndef CONFIG_TCG_INTERPRETER 1432 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1433 #endif 1434 1435 #ifdef TCG_TARGET_NEED_POOL_LABELS 1436 s->pool_labels = NULL; 1437 #endif 1438 1439 qemu_thread_jit_write(); 1440 /* Generate the prologue. */ 1441 tcg_target_qemu_prologue(s); 1442 1443 #ifdef TCG_TARGET_NEED_POOL_LABELS 1444 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1445 { 1446 int result = tcg_out_pool_finalize(s); 1447 tcg_debug_assert(result == 0); 1448 } 1449 #endif 1450 1451 prologue_size = tcg_current_code_size(s); 1452 perf_report_prologue(s->code_gen_ptr, prologue_size); 1453 1454 #ifndef CONFIG_TCG_INTERPRETER 1455 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1456 (uintptr_t)s->code_buf, prologue_size); 1457 #endif 1458 1459 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1460 FILE *logfile = qemu_log_trylock(); 1461 if (logfile) { 1462 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1463 if (s->data_gen_ptr) { 1464 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1465 size_t data_size = prologue_size - code_size; 1466 size_t i; 1467 1468 disas(logfile, s->code_gen_ptr, code_size); 1469 1470 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1471 if (sizeof(tcg_target_ulong) == 8) { 1472 fprintf(logfile, 1473 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1474 (uintptr_t)s->data_gen_ptr + i, 1475 *(uint64_t *)(s->data_gen_ptr + i)); 1476 } else { 1477 fprintf(logfile, 1478 "0x%08" PRIxPTR ": .long 0x%08x\n", 1479 (uintptr_t)s->data_gen_ptr + i, 1480 *(uint32_t *)(s->data_gen_ptr + i)); 1481 } 1482 } 1483 } else { 1484 disas(logfile, s->code_gen_ptr, prologue_size); 1485 } 1486 fprintf(logfile, "\n"); 1487 qemu_log_unlock(logfile); 1488 } 1489 } 1490 1491 #ifndef CONFIG_TCG_INTERPRETER 1492 /* 1493 * Assert that goto_ptr is implemented completely, setting an epilogue. 1494 * For tci, we use NULL as the signal to return from the interpreter, 1495 * so skip this check. 1496 */ 1497 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1498 #endif 1499 1500 tcg_region_prologue_set(s); 1501 } 1502 1503 void tcg_func_start(TCGContext *s) 1504 { 1505 tcg_pool_reset(s); 1506 s->nb_temps = s->nb_globals; 1507 1508 /* No temps have been previously allocated for size or locality. */ 1509 memset(s->free_temps, 0, sizeof(s->free_temps)); 1510 1511 /* No constant temps have been previously allocated. */ 1512 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1513 if (s->const_table[i]) { 1514 g_hash_table_remove_all(s->const_table[i]); 1515 } 1516 } 1517 1518 s->nb_ops = 0; 1519 s->nb_labels = 0; 1520 s->current_frame_offset = s->frame_start; 1521 1522 #ifdef CONFIG_DEBUG_TCG 1523 s->goto_tb_issue_mask = 0; 1524 #endif 1525 1526 QTAILQ_INIT(&s->ops); 1527 QTAILQ_INIT(&s->free_ops); 1528 QSIMPLEQ_INIT(&s->labels); 1529 1530 tcg_debug_assert(s->addr_type == TCG_TYPE_I32 || 1531 s->addr_type == TCG_TYPE_I64); 1532 1533 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER) 1534 tcg_debug_assert(s->tlb_fast_offset < 0); 1535 tcg_debug_assert(s->tlb_fast_offset >= MIN_TLB_MASK_TABLE_OFS); 1536 #endif 1537 } 1538 1539 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1540 { 1541 int n = s->nb_temps++; 1542 1543 if (n >= TCG_MAX_TEMPS) { 1544 tcg_raise_tb_overflow(s); 1545 } 1546 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1547 } 1548 1549 static TCGTemp *tcg_global_alloc(TCGContext *s) 1550 { 1551 TCGTemp *ts; 1552 1553 tcg_debug_assert(s->nb_globals == s->nb_temps); 1554 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1555 s->nb_globals++; 1556 ts = tcg_temp_alloc(s); 1557 ts->kind = TEMP_GLOBAL; 1558 1559 return ts; 1560 } 1561 1562 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1563 TCGReg reg, const char *name) 1564 { 1565 TCGTemp *ts; 1566 1567 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1568 1569 ts = tcg_global_alloc(s); 1570 ts->base_type = type; 1571 ts->type = type; 1572 ts->kind = TEMP_FIXED; 1573 ts->reg = reg; 1574 ts->name = name; 1575 tcg_regset_set_reg(s->reserved_regs, reg); 1576 1577 return ts; 1578 } 1579 1580 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1581 { 1582 s->frame_start = start; 1583 s->frame_end = start + size; 1584 s->frame_temp 1585 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1586 } 1587 1588 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1589 intptr_t offset, const char *name) 1590 { 1591 TCGContext *s = tcg_ctx; 1592 TCGTemp *base_ts = tcgv_ptr_temp(base); 1593 TCGTemp *ts = tcg_global_alloc(s); 1594 int indirect_reg = 0; 1595 1596 switch (base_ts->kind) { 1597 case TEMP_FIXED: 1598 break; 1599 case TEMP_GLOBAL: 1600 /* We do not support double-indirect registers. */ 1601 tcg_debug_assert(!base_ts->indirect_reg); 1602 base_ts->indirect_base = 1; 1603 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1604 ? 2 : 1); 1605 indirect_reg = 1; 1606 break; 1607 default: 1608 g_assert_not_reached(); 1609 } 1610 1611 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1612 TCGTemp *ts2 = tcg_global_alloc(s); 1613 char buf[64]; 1614 1615 ts->base_type = TCG_TYPE_I64; 1616 ts->type = TCG_TYPE_I32; 1617 ts->indirect_reg = indirect_reg; 1618 ts->mem_allocated = 1; 1619 ts->mem_base = base_ts; 1620 ts->mem_offset = offset; 1621 pstrcpy(buf, sizeof(buf), name); 1622 pstrcat(buf, sizeof(buf), "_0"); 1623 ts->name = strdup(buf); 1624 1625 tcg_debug_assert(ts2 == ts + 1); 1626 ts2->base_type = TCG_TYPE_I64; 1627 ts2->type = TCG_TYPE_I32; 1628 ts2->indirect_reg = indirect_reg; 1629 ts2->mem_allocated = 1; 1630 ts2->mem_base = base_ts; 1631 ts2->mem_offset = offset + 4; 1632 ts2->temp_subindex = 1; 1633 pstrcpy(buf, sizeof(buf), name); 1634 pstrcat(buf, sizeof(buf), "_1"); 1635 ts2->name = strdup(buf); 1636 } else { 1637 ts->base_type = type; 1638 ts->type = type; 1639 ts->indirect_reg = indirect_reg; 1640 ts->mem_allocated = 1; 1641 ts->mem_base = base_ts; 1642 ts->mem_offset = offset; 1643 ts->name = name; 1644 } 1645 return ts; 1646 } 1647 1648 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1649 { 1650 TCGContext *s = tcg_ctx; 1651 TCGTemp *ts; 1652 int n; 1653 1654 if (kind == TEMP_EBB) { 1655 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1656 1657 if (idx < TCG_MAX_TEMPS) { 1658 /* There is already an available temp with the right type. */ 1659 clear_bit(idx, s->free_temps[type].l); 1660 1661 ts = &s->temps[idx]; 1662 ts->temp_allocated = 1; 1663 tcg_debug_assert(ts->base_type == type); 1664 tcg_debug_assert(ts->kind == kind); 1665 return ts; 1666 } 1667 } else { 1668 tcg_debug_assert(kind == TEMP_TB); 1669 } 1670 1671 switch (type) { 1672 case TCG_TYPE_I32: 1673 case TCG_TYPE_V64: 1674 case TCG_TYPE_V128: 1675 case TCG_TYPE_V256: 1676 n = 1; 1677 break; 1678 case TCG_TYPE_I64: 1679 n = 64 / TCG_TARGET_REG_BITS; 1680 break; 1681 case TCG_TYPE_I128: 1682 n = 128 / TCG_TARGET_REG_BITS; 1683 break; 1684 default: 1685 g_assert_not_reached(); 1686 } 1687 1688 ts = tcg_temp_alloc(s); 1689 ts->base_type = type; 1690 ts->temp_allocated = 1; 1691 ts->kind = kind; 1692 1693 if (n == 1) { 1694 ts->type = type; 1695 } else { 1696 ts->type = TCG_TYPE_REG; 1697 1698 for (int i = 1; i < n; ++i) { 1699 TCGTemp *ts2 = tcg_temp_alloc(s); 1700 1701 tcg_debug_assert(ts2 == ts + i); 1702 ts2->base_type = type; 1703 ts2->type = TCG_TYPE_REG; 1704 ts2->temp_allocated = 1; 1705 ts2->temp_subindex = i; 1706 ts2->kind = kind; 1707 } 1708 } 1709 return ts; 1710 } 1711 1712 TCGv_vec tcg_temp_new_vec(TCGType type) 1713 { 1714 TCGTemp *t; 1715 1716 #ifdef CONFIG_DEBUG_TCG 1717 switch (type) { 1718 case TCG_TYPE_V64: 1719 assert(TCG_TARGET_HAS_v64); 1720 break; 1721 case TCG_TYPE_V128: 1722 assert(TCG_TARGET_HAS_v128); 1723 break; 1724 case TCG_TYPE_V256: 1725 assert(TCG_TARGET_HAS_v256); 1726 break; 1727 default: 1728 g_assert_not_reached(); 1729 } 1730 #endif 1731 1732 t = tcg_temp_new_internal(type, TEMP_EBB); 1733 return temp_tcgv_vec(t); 1734 } 1735 1736 /* Create a new temp of the same type as an existing temp. */ 1737 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1738 { 1739 TCGTemp *t = tcgv_vec_temp(match); 1740 1741 tcg_debug_assert(t->temp_allocated != 0); 1742 1743 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 1744 return temp_tcgv_vec(t); 1745 } 1746 1747 void tcg_temp_free_internal(TCGTemp *ts) 1748 { 1749 TCGContext *s = tcg_ctx; 1750 1751 switch (ts->kind) { 1752 case TEMP_CONST: 1753 case TEMP_TB: 1754 /* Silently ignore free. */ 1755 break; 1756 case TEMP_EBB: 1757 tcg_debug_assert(ts->temp_allocated != 0); 1758 ts->temp_allocated = 0; 1759 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 1760 break; 1761 default: 1762 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 1763 g_assert_not_reached(); 1764 } 1765 } 1766 1767 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1768 { 1769 TCGContext *s = tcg_ctx; 1770 GHashTable *h = s->const_table[type]; 1771 TCGTemp *ts; 1772 1773 if (h == NULL) { 1774 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1775 s->const_table[type] = h; 1776 } 1777 1778 ts = g_hash_table_lookup(h, &val); 1779 if (ts == NULL) { 1780 int64_t *val_ptr; 1781 1782 ts = tcg_temp_alloc(s); 1783 1784 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1785 TCGTemp *ts2 = tcg_temp_alloc(s); 1786 1787 tcg_debug_assert(ts2 == ts + 1); 1788 1789 ts->base_type = TCG_TYPE_I64; 1790 ts->type = TCG_TYPE_I32; 1791 ts->kind = TEMP_CONST; 1792 ts->temp_allocated = 1; 1793 1794 ts2->base_type = TCG_TYPE_I64; 1795 ts2->type = TCG_TYPE_I32; 1796 ts2->kind = TEMP_CONST; 1797 ts2->temp_allocated = 1; 1798 ts2->temp_subindex = 1; 1799 1800 /* 1801 * Retain the full value of the 64-bit constant in the low 1802 * part, so that the hash table works. Actual uses will 1803 * truncate the value to the low part. 1804 */ 1805 ts[HOST_BIG_ENDIAN].val = val; 1806 ts[!HOST_BIG_ENDIAN].val = val >> 32; 1807 val_ptr = &ts[HOST_BIG_ENDIAN].val; 1808 } else { 1809 ts->base_type = type; 1810 ts->type = type; 1811 ts->kind = TEMP_CONST; 1812 ts->temp_allocated = 1; 1813 ts->val = val; 1814 val_ptr = &ts->val; 1815 } 1816 g_hash_table_insert(h, val_ptr, ts); 1817 } 1818 1819 return ts; 1820 } 1821 1822 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1823 { 1824 val = dup_const(vece, val); 1825 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1826 } 1827 1828 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1829 { 1830 TCGTemp *t = tcgv_vec_temp(match); 1831 1832 tcg_debug_assert(t->temp_allocated != 0); 1833 return tcg_constant_vec(t->base_type, vece, val); 1834 } 1835 1836 /* Return true if OP may appear in the opcode stream. 1837 Test the runtime variable that controls each opcode. */ 1838 bool tcg_op_supported(TCGOpcode op) 1839 { 1840 const bool have_vec 1841 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1842 1843 switch (op) { 1844 case INDEX_op_discard: 1845 case INDEX_op_set_label: 1846 case INDEX_op_call: 1847 case INDEX_op_br: 1848 case INDEX_op_mb: 1849 case INDEX_op_insn_start: 1850 case INDEX_op_exit_tb: 1851 case INDEX_op_goto_tb: 1852 case INDEX_op_goto_ptr: 1853 case INDEX_op_qemu_ld_a32_i32: 1854 case INDEX_op_qemu_ld_a64_i32: 1855 case INDEX_op_qemu_st_a32_i32: 1856 case INDEX_op_qemu_st_a64_i32: 1857 case INDEX_op_qemu_ld_a32_i64: 1858 case INDEX_op_qemu_ld_a64_i64: 1859 case INDEX_op_qemu_st_a32_i64: 1860 case INDEX_op_qemu_st_a64_i64: 1861 return true; 1862 1863 case INDEX_op_qemu_st8_a32_i32: 1864 case INDEX_op_qemu_st8_a64_i32: 1865 return TCG_TARGET_HAS_qemu_st8_i32; 1866 1867 case INDEX_op_qemu_ld_a32_i128: 1868 case INDEX_op_qemu_ld_a64_i128: 1869 case INDEX_op_qemu_st_a32_i128: 1870 case INDEX_op_qemu_st_a64_i128: 1871 return TCG_TARGET_HAS_qemu_ldst_i128; 1872 1873 case INDEX_op_mov_i32: 1874 case INDEX_op_setcond_i32: 1875 case INDEX_op_brcond_i32: 1876 case INDEX_op_ld8u_i32: 1877 case INDEX_op_ld8s_i32: 1878 case INDEX_op_ld16u_i32: 1879 case INDEX_op_ld16s_i32: 1880 case INDEX_op_ld_i32: 1881 case INDEX_op_st8_i32: 1882 case INDEX_op_st16_i32: 1883 case INDEX_op_st_i32: 1884 case INDEX_op_add_i32: 1885 case INDEX_op_sub_i32: 1886 case INDEX_op_mul_i32: 1887 case INDEX_op_and_i32: 1888 case INDEX_op_or_i32: 1889 case INDEX_op_xor_i32: 1890 case INDEX_op_shl_i32: 1891 case INDEX_op_shr_i32: 1892 case INDEX_op_sar_i32: 1893 return true; 1894 1895 case INDEX_op_movcond_i32: 1896 return TCG_TARGET_HAS_movcond_i32; 1897 case INDEX_op_div_i32: 1898 case INDEX_op_divu_i32: 1899 return TCG_TARGET_HAS_div_i32; 1900 case INDEX_op_rem_i32: 1901 case INDEX_op_remu_i32: 1902 return TCG_TARGET_HAS_rem_i32; 1903 case INDEX_op_div2_i32: 1904 case INDEX_op_divu2_i32: 1905 return TCG_TARGET_HAS_div2_i32; 1906 case INDEX_op_rotl_i32: 1907 case INDEX_op_rotr_i32: 1908 return TCG_TARGET_HAS_rot_i32; 1909 case INDEX_op_deposit_i32: 1910 return TCG_TARGET_HAS_deposit_i32; 1911 case INDEX_op_extract_i32: 1912 return TCG_TARGET_HAS_extract_i32; 1913 case INDEX_op_sextract_i32: 1914 return TCG_TARGET_HAS_sextract_i32; 1915 case INDEX_op_extract2_i32: 1916 return TCG_TARGET_HAS_extract2_i32; 1917 case INDEX_op_add2_i32: 1918 return TCG_TARGET_HAS_add2_i32; 1919 case INDEX_op_sub2_i32: 1920 return TCG_TARGET_HAS_sub2_i32; 1921 case INDEX_op_mulu2_i32: 1922 return TCG_TARGET_HAS_mulu2_i32; 1923 case INDEX_op_muls2_i32: 1924 return TCG_TARGET_HAS_muls2_i32; 1925 case INDEX_op_muluh_i32: 1926 return TCG_TARGET_HAS_muluh_i32; 1927 case INDEX_op_mulsh_i32: 1928 return TCG_TARGET_HAS_mulsh_i32; 1929 case INDEX_op_ext8s_i32: 1930 return TCG_TARGET_HAS_ext8s_i32; 1931 case INDEX_op_ext16s_i32: 1932 return TCG_TARGET_HAS_ext16s_i32; 1933 case INDEX_op_ext8u_i32: 1934 return TCG_TARGET_HAS_ext8u_i32; 1935 case INDEX_op_ext16u_i32: 1936 return TCG_TARGET_HAS_ext16u_i32; 1937 case INDEX_op_bswap16_i32: 1938 return TCG_TARGET_HAS_bswap16_i32; 1939 case INDEX_op_bswap32_i32: 1940 return TCG_TARGET_HAS_bswap32_i32; 1941 case INDEX_op_not_i32: 1942 return TCG_TARGET_HAS_not_i32; 1943 case INDEX_op_neg_i32: 1944 return TCG_TARGET_HAS_neg_i32; 1945 case INDEX_op_andc_i32: 1946 return TCG_TARGET_HAS_andc_i32; 1947 case INDEX_op_orc_i32: 1948 return TCG_TARGET_HAS_orc_i32; 1949 case INDEX_op_eqv_i32: 1950 return TCG_TARGET_HAS_eqv_i32; 1951 case INDEX_op_nand_i32: 1952 return TCG_TARGET_HAS_nand_i32; 1953 case INDEX_op_nor_i32: 1954 return TCG_TARGET_HAS_nor_i32; 1955 case INDEX_op_clz_i32: 1956 return TCG_TARGET_HAS_clz_i32; 1957 case INDEX_op_ctz_i32: 1958 return TCG_TARGET_HAS_ctz_i32; 1959 case INDEX_op_ctpop_i32: 1960 return TCG_TARGET_HAS_ctpop_i32; 1961 1962 case INDEX_op_brcond2_i32: 1963 case INDEX_op_setcond2_i32: 1964 return TCG_TARGET_REG_BITS == 32; 1965 1966 case INDEX_op_mov_i64: 1967 case INDEX_op_setcond_i64: 1968 case INDEX_op_brcond_i64: 1969 case INDEX_op_ld8u_i64: 1970 case INDEX_op_ld8s_i64: 1971 case INDEX_op_ld16u_i64: 1972 case INDEX_op_ld16s_i64: 1973 case INDEX_op_ld32u_i64: 1974 case INDEX_op_ld32s_i64: 1975 case INDEX_op_ld_i64: 1976 case INDEX_op_st8_i64: 1977 case INDEX_op_st16_i64: 1978 case INDEX_op_st32_i64: 1979 case INDEX_op_st_i64: 1980 case INDEX_op_add_i64: 1981 case INDEX_op_sub_i64: 1982 case INDEX_op_mul_i64: 1983 case INDEX_op_and_i64: 1984 case INDEX_op_or_i64: 1985 case INDEX_op_xor_i64: 1986 case INDEX_op_shl_i64: 1987 case INDEX_op_shr_i64: 1988 case INDEX_op_sar_i64: 1989 case INDEX_op_ext_i32_i64: 1990 case INDEX_op_extu_i32_i64: 1991 return TCG_TARGET_REG_BITS == 64; 1992 1993 case INDEX_op_movcond_i64: 1994 return TCG_TARGET_HAS_movcond_i64; 1995 case INDEX_op_div_i64: 1996 case INDEX_op_divu_i64: 1997 return TCG_TARGET_HAS_div_i64; 1998 case INDEX_op_rem_i64: 1999 case INDEX_op_remu_i64: 2000 return TCG_TARGET_HAS_rem_i64; 2001 case INDEX_op_div2_i64: 2002 case INDEX_op_divu2_i64: 2003 return TCG_TARGET_HAS_div2_i64; 2004 case INDEX_op_rotl_i64: 2005 case INDEX_op_rotr_i64: 2006 return TCG_TARGET_HAS_rot_i64; 2007 case INDEX_op_deposit_i64: 2008 return TCG_TARGET_HAS_deposit_i64; 2009 case INDEX_op_extract_i64: 2010 return TCG_TARGET_HAS_extract_i64; 2011 case INDEX_op_sextract_i64: 2012 return TCG_TARGET_HAS_sextract_i64; 2013 case INDEX_op_extract2_i64: 2014 return TCG_TARGET_HAS_extract2_i64; 2015 case INDEX_op_extrl_i64_i32: 2016 return TCG_TARGET_HAS_extrl_i64_i32; 2017 case INDEX_op_extrh_i64_i32: 2018 return TCG_TARGET_HAS_extrh_i64_i32; 2019 case INDEX_op_ext8s_i64: 2020 return TCG_TARGET_HAS_ext8s_i64; 2021 case INDEX_op_ext16s_i64: 2022 return TCG_TARGET_HAS_ext16s_i64; 2023 case INDEX_op_ext32s_i64: 2024 return TCG_TARGET_HAS_ext32s_i64; 2025 case INDEX_op_ext8u_i64: 2026 return TCG_TARGET_HAS_ext8u_i64; 2027 case INDEX_op_ext16u_i64: 2028 return TCG_TARGET_HAS_ext16u_i64; 2029 case INDEX_op_ext32u_i64: 2030 return TCG_TARGET_HAS_ext32u_i64; 2031 case INDEX_op_bswap16_i64: 2032 return TCG_TARGET_HAS_bswap16_i64; 2033 case INDEX_op_bswap32_i64: 2034 return TCG_TARGET_HAS_bswap32_i64; 2035 case INDEX_op_bswap64_i64: 2036 return TCG_TARGET_HAS_bswap64_i64; 2037 case INDEX_op_not_i64: 2038 return TCG_TARGET_HAS_not_i64; 2039 case INDEX_op_neg_i64: 2040 return TCG_TARGET_HAS_neg_i64; 2041 case INDEX_op_andc_i64: 2042 return TCG_TARGET_HAS_andc_i64; 2043 case INDEX_op_orc_i64: 2044 return TCG_TARGET_HAS_orc_i64; 2045 case INDEX_op_eqv_i64: 2046 return TCG_TARGET_HAS_eqv_i64; 2047 case INDEX_op_nand_i64: 2048 return TCG_TARGET_HAS_nand_i64; 2049 case INDEX_op_nor_i64: 2050 return TCG_TARGET_HAS_nor_i64; 2051 case INDEX_op_clz_i64: 2052 return TCG_TARGET_HAS_clz_i64; 2053 case INDEX_op_ctz_i64: 2054 return TCG_TARGET_HAS_ctz_i64; 2055 case INDEX_op_ctpop_i64: 2056 return TCG_TARGET_HAS_ctpop_i64; 2057 case INDEX_op_add2_i64: 2058 return TCG_TARGET_HAS_add2_i64; 2059 case INDEX_op_sub2_i64: 2060 return TCG_TARGET_HAS_sub2_i64; 2061 case INDEX_op_mulu2_i64: 2062 return TCG_TARGET_HAS_mulu2_i64; 2063 case INDEX_op_muls2_i64: 2064 return TCG_TARGET_HAS_muls2_i64; 2065 case INDEX_op_muluh_i64: 2066 return TCG_TARGET_HAS_muluh_i64; 2067 case INDEX_op_mulsh_i64: 2068 return TCG_TARGET_HAS_mulsh_i64; 2069 2070 case INDEX_op_mov_vec: 2071 case INDEX_op_dup_vec: 2072 case INDEX_op_dupm_vec: 2073 case INDEX_op_ld_vec: 2074 case INDEX_op_st_vec: 2075 case INDEX_op_add_vec: 2076 case INDEX_op_sub_vec: 2077 case INDEX_op_and_vec: 2078 case INDEX_op_or_vec: 2079 case INDEX_op_xor_vec: 2080 case INDEX_op_cmp_vec: 2081 return have_vec; 2082 case INDEX_op_dup2_vec: 2083 return have_vec && TCG_TARGET_REG_BITS == 32; 2084 case INDEX_op_not_vec: 2085 return have_vec && TCG_TARGET_HAS_not_vec; 2086 case INDEX_op_neg_vec: 2087 return have_vec && TCG_TARGET_HAS_neg_vec; 2088 case INDEX_op_abs_vec: 2089 return have_vec && TCG_TARGET_HAS_abs_vec; 2090 case INDEX_op_andc_vec: 2091 return have_vec && TCG_TARGET_HAS_andc_vec; 2092 case INDEX_op_orc_vec: 2093 return have_vec && TCG_TARGET_HAS_orc_vec; 2094 case INDEX_op_nand_vec: 2095 return have_vec && TCG_TARGET_HAS_nand_vec; 2096 case INDEX_op_nor_vec: 2097 return have_vec && TCG_TARGET_HAS_nor_vec; 2098 case INDEX_op_eqv_vec: 2099 return have_vec && TCG_TARGET_HAS_eqv_vec; 2100 case INDEX_op_mul_vec: 2101 return have_vec && TCG_TARGET_HAS_mul_vec; 2102 case INDEX_op_shli_vec: 2103 case INDEX_op_shri_vec: 2104 case INDEX_op_sari_vec: 2105 return have_vec && TCG_TARGET_HAS_shi_vec; 2106 case INDEX_op_shls_vec: 2107 case INDEX_op_shrs_vec: 2108 case INDEX_op_sars_vec: 2109 return have_vec && TCG_TARGET_HAS_shs_vec; 2110 case INDEX_op_shlv_vec: 2111 case INDEX_op_shrv_vec: 2112 case INDEX_op_sarv_vec: 2113 return have_vec && TCG_TARGET_HAS_shv_vec; 2114 case INDEX_op_rotli_vec: 2115 return have_vec && TCG_TARGET_HAS_roti_vec; 2116 case INDEX_op_rotls_vec: 2117 return have_vec && TCG_TARGET_HAS_rots_vec; 2118 case INDEX_op_rotlv_vec: 2119 case INDEX_op_rotrv_vec: 2120 return have_vec && TCG_TARGET_HAS_rotv_vec; 2121 case INDEX_op_ssadd_vec: 2122 case INDEX_op_usadd_vec: 2123 case INDEX_op_sssub_vec: 2124 case INDEX_op_ussub_vec: 2125 return have_vec && TCG_TARGET_HAS_sat_vec; 2126 case INDEX_op_smin_vec: 2127 case INDEX_op_umin_vec: 2128 case INDEX_op_smax_vec: 2129 case INDEX_op_umax_vec: 2130 return have_vec && TCG_TARGET_HAS_minmax_vec; 2131 case INDEX_op_bitsel_vec: 2132 return have_vec && TCG_TARGET_HAS_bitsel_vec; 2133 case INDEX_op_cmpsel_vec: 2134 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 2135 2136 default: 2137 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 2138 return true; 2139 } 2140 } 2141 2142 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 2143 2144 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 2145 { 2146 const TCGHelperInfo *info; 2147 TCGv_i64 extend_free[MAX_CALL_IARGS]; 2148 int n_extend = 0; 2149 TCGOp *op; 2150 int i, n, pi = 0, total_args; 2151 2152 info = g_hash_table_lookup(helper_table, (gpointer)func); 2153 total_args = info->nr_out + info->nr_in + 2; 2154 op = tcg_op_alloc(INDEX_op_call, total_args); 2155 2156 #ifdef CONFIG_PLUGIN 2157 /* Flag helpers that may affect guest state */ 2158 if (tcg_ctx->plugin_insn && 2159 !(info->flags & TCG_CALL_PLUGIN) && 2160 !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 2161 tcg_ctx->plugin_insn->calls_helpers = true; 2162 } 2163 #endif 2164 2165 TCGOP_CALLO(op) = n = info->nr_out; 2166 switch (n) { 2167 case 0: 2168 tcg_debug_assert(ret == NULL); 2169 break; 2170 case 1: 2171 tcg_debug_assert(ret != NULL); 2172 op->args[pi++] = temp_arg(ret); 2173 break; 2174 case 2: 2175 case 4: 2176 tcg_debug_assert(ret != NULL); 2177 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 2178 tcg_debug_assert(ret->temp_subindex == 0); 2179 for (i = 0; i < n; ++i) { 2180 op->args[pi++] = temp_arg(ret + i); 2181 } 2182 break; 2183 default: 2184 g_assert_not_reached(); 2185 } 2186 2187 TCGOP_CALLI(op) = n = info->nr_in; 2188 for (i = 0; i < n; i++) { 2189 const TCGCallArgumentLoc *loc = &info->in[i]; 2190 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 2191 2192 switch (loc->kind) { 2193 case TCG_CALL_ARG_NORMAL: 2194 case TCG_CALL_ARG_BY_REF: 2195 case TCG_CALL_ARG_BY_REF_N: 2196 op->args[pi++] = temp_arg(ts); 2197 break; 2198 2199 case TCG_CALL_ARG_EXTEND_U: 2200 case TCG_CALL_ARG_EXTEND_S: 2201 { 2202 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 2203 TCGv_i32 orig = temp_tcgv_i32(ts); 2204 2205 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 2206 tcg_gen_ext_i32_i64(temp, orig); 2207 } else { 2208 tcg_gen_extu_i32_i64(temp, orig); 2209 } 2210 op->args[pi++] = tcgv_i64_arg(temp); 2211 extend_free[n_extend++] = temp; 2212 } 2213 break; 2214 2215 default: 2216 g_assert_not_reached(); 2217 } 2218 } 2219 op->args[pi++] = (uintptr_t)func; 2220 op->args[pi++] = (uintptr_t)info; 2221 tcg_debug_assert(pi == total_args); 2222 2223 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2224 2225 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 2226 for (i = 0; i < n_extend; ++i) { 2227 tcg_temp_free_i64(extend_free[i]); 2228 } 2229 } 2230 2231 static void tcg_reg_alloc_start(TCGContext *s) 2232 { 2233 int i, n; 2234 2235 for (i = 0, n = s->nb_temps; i < n; i++) { 2236 TCGTemp *ts = &s->temps[i]; 2237 TCGTempVal val = TEMP_VAL_MEM; 2238 2239 switch (ts->kind) { 2240 case TEMP_CONST: 2241 val = TEMP_VAL_CONST; 2242 break; 2243 case TEMP_FIXED: 2244 val = TEMP_VAL_REG; 2245 break; 2246 case TEMP_GLOBAL: 2247 break; 2248 case TEMP_EBB: 2249 val = TEMP_VAL_DEAD; 2250 /* fall through */ 2251 case TEMP_TB: 2252 ts->mem_allocated = 0; 2253 break; 2254 default: 2255 g_assert_not_reached(); 2256 } 2257 ts->val_type = val; 2258 } 2259 2260 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2261 } 2262 2263 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2264 TCGTemp *ts) 2265 { 2266 int idx = temp_idx(ts); 2267 2268 switch (ts->kind) { 2269 case TEMP_FIXED: 2270 case TEMP_GLOBAL: 2271 pstrcpy(buf, buf_size, ts->name); 2272 break; 2273 case TEMP_TB: 2274 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2275 break; 2276 case TEMP_EBB: 2277 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2278 break; 2279 case TEMP_CONST: 2280 switch (ts->type) { 2281 case TCG_TYPE_I32: 2282 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2283 break; 2284 #if TCG_TARGET_REG_BITS > 32 2285 case TCG_TYPE_I64: 2286 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2287 break; 2288 #endif 2289 case TCG_TYPE_V64: 2290 case TCG_TYPE_V128: 2291 case TCG_TYPE_V256: 2292 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2293 64 << (ts->type - TCG_TYPE_V64), ts->val); 2294 break; 2295 default: 2296 g_assert_not_reached(); 2297 } 2298 break; 2299 } 2300 return buf; 2301 } 2302 2303 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2304 int buf_size, TCGArg arg) 2305 { 2306 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2307 } 2308 2309 static const char * const cond_name[] = 2310 { 2311 [TCG_COND_NEVER] = "never", 2312 [TCG_COND_ALWAYS] = "always", 2313 [TCG_COND_EQ] = "eq", 2314 [TCG_COND_NE] = "ne", 2315 [TCG_COND_LT] = "lt", 2316 [TCG_COND_GE] = "ge", 2317 [TCG_COND_LE] = "le", 2318 [TCG_COND_GT] = "gt", 2319 [TCG_COND_LTU] = "ltu", 2320 [TCG_COND_GEU] = "geu", 2321 [TCG_COND_LEU] = "leu", 2322 [TCG_COND_GTU] = "gtu" 2323 }; 2324 2325 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] = 2326 { 2327 [MO_UB] = "ub", 2328 [MO_SB] = "sb", 2329 [MO_LEUW] = "leuw", 2330 [MO_LESW] = "lesw", 2331 [MO_LEUL] = "leul", 2332 [MO_LESL] = "lesl", 2333 [MO_LEUQ] = "leq", 2334 [MO_BEUW] = "beuw", 2335 [MO_BESW] = "besw", 2336 [MO_BEUL] = "beul", 2337 [MO_BESL] = "besl", 2338 [MO_BEUQ] = "beq", 2339 [MO_128 + MO_BE] = "beo", 2340 [MO_128 + MO_LE] = "leo", 2341 }; 2342 2343 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2344 [MO_UNALN >> MO_ASHIFT] = "un+", 2345 [MO_ALIGN >> MO_ASHIFT] = "al+", 2346 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2347 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2348 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2349 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2350 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2351 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2352 }; 2353 2354 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = { 2355 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "", 2356 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+", 2357 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+", 2358 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+", 2359 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+", 2360 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+", 2361 }; 2362 2363 static const char bswap_flag_name[][6] = { 2364 [TCG_BSWAP_IZ] = "iz", 2365 [TCG_BSWAP_OZ] = "oz", 2366 [TCG_BSWAP_OS] = "os", 2367 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2368 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2369 }; 2370 2371 static inline bool tcg_regset_single(TCGRegSet d) 2372 { 2373 return (d & (d - 1)) == 0; 2374 } 2375 2376 static inline TCGReg tcg_regset_first(TCGRegSet d) 2377 { 2378 if (TCG_TARGET_NB_REGS <= 32) { 2379 return ctz32(d); 2380 } else { 2381 return ctz64(d); 2382 } 2383 } 2384 2385 /* Return only the number of characters output -- no error return. */ 2386 #define ne_fprintf(...) \ 2387 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2388 2389 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2390 { 2391 char buf[128]; 2392 TCGOp *op; 2393 2394 QTAILQ_FOREACH(op, &s->ops, link) { 2395 int i, k, nb_oargs, nb_iargs, nb_cargs; 2396 const TCGOpDef *def; 2397 TCGOpcode c; 2398 int col = 0; 2399 2400 c = op->opc; 2401 def = &tcg_op_defs[c]; 2402 2403 if (c == INDEX_op_insn_start) { 2404 nb_oargs = 0; 2405 col += ne_fprintf(f, "\n ----"); 2406 2407 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2408 col += ne_fprintf(f, " %016" PRIx64, 2409 tcg_get_insn_start_param(op, i)); 2410 } 2411 } else if (c == INDEX_op_call) { 2412 const TCGHelperInfo *info = tcg_call_info(op); 2413 void *func = tcg_call_func(op); 2414 2415 /* variable number of arguments */ 2416 nb_oargs = TCGOP_CALLO(op); 2417 nb_iargs = TCGOP_CALLI(op); 2418 nb_cargs = def->nb_cargs; 2419 2420 col += ne_fprintf(f, " %s ", def->name); 2421 2422 /* 2423 * Print the function name from TCGHelperInfo, if available. 2424 * Note that plugins have a template function for the info, 2425 * but the actual function pointer comes from the plugin. 2426 */ 2427 if (func == info->func) { 2428 col += ne_fprintf(f, "%s", info->name); 2429 } else { 2430 col += ne_fprintf(f, "plugin(%p)", func); 2431 } 2432 2433 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2434 for (i = 0; i < nb_oargs; i++) { 2435 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2436 op->args[i])); 2437 } 2438 for (i = 0; i < nb_iargs; i++) { 2439 TCGArg arg = op->args[nb_oargs + i]; 2440 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2441 col += ne_fprintf(f, ",%s", t); 2442 } 2443 } else { 2444 col += ne_fprintf(f, " %s ", def->name); 2445 2446 nb_oargs = def->nb_oargs; 2447 nb_iargs = def->nb_iargs; 2448 nb_cargs = def->nb_cargs; 2449 2450 if (def->flags & TCG_OPF_VECTOR) { 2451 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 2452 8 << TCGOP_VECE(op)); 2453 } 2454 2455 k = 0; 2456 for (i = 0; i < nb_oargs; i++) { 2457 const char *sep = k ? "," : ""; 2458 col += ne_fprintf(f, "%s%s", sep, 2459 tcg_get_arg_str(s, buf, sizeof(buf), 2460 op->args[k++])); 2461 } 2462 for (i = 0; i < nb_iargs; i++) { 2463 const char *sep = k ? "," : ""; 2464 col += ne_fprintf(f, "%s%s", sep, 2465 tcg_get_arg_str(s, buf, sizeof(buf), 2466 op->args[k++])); 2467 } 2468 switch (c) { 2469 case INDEX_op_brcond_i32: 2470 case INDEX_op_setcond_i32: 2471 case INDEX_op_movcond_i32: 2472 case INDEX_op_brcond2_i32: 2473 case INDEX_op_setcond2_i32: 2474 case INDEX_op_brcond_i64: 2475 case INDEX_op_setcond_i64: 2476 case INDEX_op_movcond_i64: 2477 case INDEX_op_cmp_vec: 2478 case INDEX_op_cmpsel_vec: 2479 if (op->args[k] < ARRAY_SIZE(cond_name) 2480 && cond_name[op->args[k]]) { 2481 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2482 } else { 2483 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2484 } 2485 i = 1; 2486 break; 2487 case INDEX_op_qemu_ld_a32_i32: 2488 case INDEX_op_qemu_ld_a64_i32: 2489 case INDEX_op_qemu_st_a32_i32: 2490 case INDEX_op_qemu_st_a64_i32: 2491 case INDEX_op_qemu_st8_a32_i32: 2492 case INDEX_op_qemu_st8_a64_i32: 2493 case INDEX_op_qemu_ld_a32_i64: 2494 case INDEX_op_qemu_ld_a64_i64: 2495 case INDEX_op_qemu_st_a32_i64: 2496 case INDEX_op_qemu_st_a64_i64: 2497 case INDEX_op_qemu_ld_a32_i128: 2498 case INDEX_op_qemu_ld_a64_i128: 2499 case INDEX_op_qemu_st_a32_i128: 2500 case INDEX_op_qemu_st_a64_i128: 2501 { 2502 const char *s_al, *s_op, *s_at; 2503 MemOpIdx oi = op->args[k++]; 2504 MemOp op = get_memop(oi); 2505 unsigned ix = get_mmuidx(oi); 2506 2507 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2508 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2509 s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT]; 2510 op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK); 2511 2512 /* If all fields are accounted for, print symbolically. */ 2513 if (!op && s_al && s_op && s_at) { 2514 col += ne_fprintf(f, ",%s%s%s,%u", 2515 s_at, s_al, s_op, ix); 2516 } else { 2517 op = get_memop(oi); 2518 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 2519 } 2520 i = 1; 2521 } 2522 break; 2523 case INDEX_op_bswap16_i32: 2524 case INDEX_op_bswap16_i64: 2525 case INDEX_op_bswap32_i32: 2526 case INDEX_op_bswap32_i64: 2527 case INDEX_op_bswap64_i64: 2528 { 2529 TCGArg flags = op->args[k]; 2530 const char *name = NULL; 2531 2532 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2533 name = bswap_flag_name[flags]; 2534 } 2535 if (name) { 2536 col += ne_fprintf(f, ",%s", name); 2537 } else { 2538 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2539 } 2540 i = k = 1; 2541 } 2542 break; 2543 default: 2544 i = 0; 2545 break; 2546 } 2547 switch (c) { 2548 case INDEX_op_set_label: 2549 case INDEX_op_br: 2550 case INDEX_op_brcond_i32: 2551 case INDEX_op_brcond_i64: 2552 case INDEX_op_brcond2_i32: 2553 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2554 arg_label(op->args[k])->id); 2555 i++, k++; 2556 break; 2557 case INDEX_op_mb: 2558 { 2559 TCGBar membar = op->args[k]; 2560 const char *b_op, *m_op; 2561 2562 switch (membar & TCG_BAR_SC) { 2563 case 0: 2564 b_op = "none"; 2565 break; 2566 case TCG_BAR_LDAQ: 2567 b_op = "acq"; 2568 break; 2569 case TCG_BAR_STRL: 2570 b_op = "rel"; 2571 break; 2572 case TCG_BAR_SC: 2573 b_op = "seq"; 2574 break; 2575 default: 2576 g_assert_not_reached(); 2577 } 2578 2579 switch (membar & TCG_MO_ALL) { 2580 case 0: 2581 m_op = "none"; 2582 break; 2583 case TCG_MO_LD_LD: 2584 m_op = "rr"; 2585 break; 2586 case TCG_MO_LD_ST: 2587 m_op = "rw"; 2588 break; 2589 case TCG_MO_ST_LD: 2590 m_op = "wr"; 2591 break; 2592 case TCG_MO_ST_ST: 2593 m_op = "ww"; 2594 break; 2595 case TCG_MO_LD_LD | TCG_MO_LD_ST: 2596 m_op = "rr+rw"; 2597 break; 2598 case TCG_MO_LD_LD | TCG_MO_ST_LD: 2599 m_op = "rr+wr"; 2600 break; 2601 case TCG_MO_LD_LD | TCG_MO_ST_ST: 2602 m_op = "rr+ww"; 2603 break; 2604 case TCG_MO_LD_ST | TCG_MO_ST_LD: 2605 m_op = "rw+wr"; 2606 break; 2607 case TCG_MO_LD_ST | TCG_MO_ST_ST: 2608 m_op = "rw+ww"; 2609 break; 2610 case TCG_MO_ST_LD | TCG_MO_ST_ST: 2611 m_op = "wr+ww"; 2612 break; 2613 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 2614 m_op = "rr+rw+wr"; 2615 break; 2616 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 2617 m_op = "rr+rw+ww"; 2618 break; 2619 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 2620 m_op = "rr+wr+ww"; 2621 break; 2622 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 2623 m_op = "rw+wr+ww"; 2624 break; 2625 case TCG_MO_ALL: 2626 m_op = "all"; 2627 break; 2628 default: 2629 g_assert_not_reached(); 2630 } 2631 2632 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 2633 i++, k++; 2634 } 2635 break; 2636 default: 2637 break; 2638 } 2639 for (; i < nb_cargs; i++, k++) { 2640 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 2641 op->args[k]); 2642 } 2643 } 2644 2645 if (have_prefs || op->life) { 2646 for (; col < 40; ++col) { 2647 putc(' ', f); 2648 } 2649 } 2650 2651 if (op->life) { 2652 unsigned life = op->life; 2653 2654 if (life & (SYNC_ARG * 3)) { 2655 ne_fprintf(f, " sync:"); 2656 for (i = 0; i < 2; ++i) { 2657 if (life & (SYNC_ARG << i)) { 2658 ne_fprintf(f, " %d", i); 2659 } 2660 } 2661 } 2662 life /= DEAD_ARG; 2663 if (life) { 2664 ne_fprintf(f, " dead:"); 2665 for (i = 0; life; ++i, life >>= 1) { 2666 if (life & 1) { 2667 ne_fprintf(f, " %d", i); 2668 } 2669 } 2670 } 2671 } 2672 2673 if (have_prefs) { 2674 for (i = 0; i < nb_oargs; ++i) { 2675 TCGRegSet set = output_pref(op, i); 2676 2677 if (i == 0) { 2678 ne_fprintf(f, " pref="); 2679 } else { 2680 ne_fprintf(f, ","); 2681 } 2682 if (set == 0) { 2683 ne_fprintf(f, "none"); 2684 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2685 ne_fprintf(f, "all"); 2686 #ifdef CONFIG_DEBUG_TCG 2687 } else if (tcg_regset_single(set)) { 2688 TCGReg reg = tcg_regset_first(set); 2689 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 2690 #endif 2691 } else if (TCG_TARGET_NB_REGS <= 32) { 2692 ne_fprintf(f, "0x%x", (uint32_t)set); 2693 } else { 2694 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 2695 } 2696 } 2697 } 2698 2699 putc('\n', f); 2700 } 2701 } 2702 2703 /* we give more priority to constraints with less registers */ 2704 static int get_constraint_priority(const TCGOpDef *def, int k) 2705 { 2706 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2707 int n = ctpop64(arg_ct->regs); 2708 2709 /* 2710 * Sort constraints of a single register first, which includes output 2711 * aliases (which must exactly match the input already allocated). 2712 */ 2713 if (n == 1 || arg_ct->oalias) { 2714 return INT_MAX; 2715 } 2716 2717 /* 2718 * Sort register pairs next, first then second immediately after. 2719 * Arbitrarily sort multiple pairs by the index of the first reg; 2720 * there shouldn't be many pairs. 2721 */ 2722 switch (arg_ct->pair) { 2723 case 1: 2724 case 3: 2725 return (k + 1) * 2; 2726 case 2: 2727 return (arg_ct->pair_index + 1) * 2 - 1; 2728 } 2729 2730 /* Finally, sort by decreasing register count. */ 2731 assert(n > 1); 2732 return -n; 2733 } 2734 2735 /* sort from highest priority to lowest */ 2736 static void sort_constraints(TCGOpDef *def, int start, int n) 2737 { 2738 int i, j; 2739 TCGArgConstraint *a = def->args_ct; 2740 2741 for (i = 0; i < n; i++) { 2742 a[start + i].sort_index = start + i; 2743 } 2744 if (n <= 1) { 2745 return; 2746 } 2747 for (i = 0; i < n - 1; i++) { 2748 for (j = i + 1; j < n; j++) { 2749 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2750 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2751 if (p1 < p2) { 2752 int tmp = a[start + i].sort_index; 2753 a[start + i].sort_index = a[start + j].sort_index; 2754 a[start + j].sort_index = tmp; 2755 } 2756 } 2757 } 2758 } 2759 2760 static void process_op_defs(TCGContext *s) 2761 { 2762 TCGOpcode op; 2763 2764 for (op = 0; op < NB_OPS; op++) { 2765 TCGOpDef *def = &tcg_op_defs[op]; 2766 const TCGTargetOpDef *tdefs; 2767 bool saw_alias_pair = false; 2768 int i, o, i2, o2, nb_args; 2769 2770 if (def->flags & TCG_OPF_NOT_PRESENT) { 2771 continue; 2772 } 2773 2774 nb_args = def->nb_iargs + def->nb_oargs; 2775 if (nb_args == 0) { 2776 continue; 2777 } 2778 2779 /* 2780 * Macro magic should make it impossible, but double-check that 2781 * the array index is in range. Since the signness of an enum 2782 * is implementation defined, force the result to unsigned. 2783 */ 2784 unsigned con_set = tcg_target_op_def(op); 2785 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2786 tdefs = &constraint_sets[con_set]; 2787 2788 for (i = 0; i < nb_args; i++) { 2789 const char *ct_str = tdefs->args_ct_str[i]; 2790 bool input_p = i >= def->nb_oargs; 2791 2792 /* Incomplete TCGTargetOpDef entry. */ 2793 tcg_debug_assert(ct_str != NULL); 2794 2795 switch (*ct_str) { 2796 case '0' ... '9': 2797 o = *ct_str - '0'; 2798 tcg_debug_assert(input_p); 2799 tcg_debug_assert(o < def->nb_oargs); 2800 tcg_debug_assert(def->args_ct[o].regs != 0); 2801 tcg_debug_assert(!def->args_ct[o].oalias); 2802 def->args_ct[i] = def->args_ct[o]; 2803 /* The output sets oalias. */ 2804 def->args_ct[o].oalias = 1; 2805 def->args_ct[o].alias_index = i; 2806 /* The input sets ialias. */ 2807 def->args_ct[i].ialias = 1; 2808 def->args_ct[i].alias_index = o; 2809 if (def->args_ct[i].pair) { 2810 saw_alias_pair = true; 2811 } 2812 tcg_debug_assert(ct_str[1] == '\0'); 2813 continue; 2814 2815 case '&': 2816 tcg_debug_assert(!input_p); 2817 def->args_ct[i].newreg = true; 2818 ct_str++; 2819 break; 2820 2821 case 'p': /* plus */ 2822 /* Allocate to the register after the previous. */ 2823 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2824 o = i - 1; 2825 tcg_debug_assert(!def->args_ct[o].pair); 2826 tcg_debug_assert(!def->args_ct[o].ct); 2827 def->args_ct[i] = (TCGArgConstraint){ 2828 .pair = 2, 2829 .pair_index = o, 2830 .regs = def->args_ct[o].regs << 1, 2831 }; 2832 def->args_ct[o].pair = 1; 2833 def->args_ct[o].pair_index = i; 2834 tcg_debug_assert(ct_str[1] == '\0'); 2835 continue; 2836 2837 case 'm': /* minus */ 2838 /* Allocate to the register before the previous. */ 2839 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2840 o = i - 1; 2841 tcg_debug_assert(!def->args_ct[o].pair); 2842 tcg_debug_assert(!def->args_ct[o].ct); 2843 def->args_ct[i] = (TCGArgConstraint){ 2844 .pair = 1, 2845 .pair_index = o, 2846 .regs = def->args_ct[o].regs >> 1, 2847 }; 2848 def->args_ct[o].pair = 2; 2849 def->args_ct[o].pair_index = i; 2850 tcg_debug_assert(ct_str[1] == '\0'); 2851 continue; 2852 } 2853 2854 do { 2855 switch (*ct_str) { 2856 case 'i': 2857 def->args_ct[i].ct |= TCG_CT_CONST; 2858 break; 2859 2860 /* Include all of the target-specific constraints. */ 2861 2862 #undef CONST 2863 #define CONST(CASE, MASK) \ 2864 case CASE: def->args_ct[i].ct |= MASK; break; 2865 #define REGS(CASE, MASK) \ 2866 case CASE: def->args_ct[i].regs |= MASK; break; 2867 2868 #include "tcg-target-con-str.h" 2869 2870 #undef REGS 2871 #undef CONST 2872 default: 2873 case '0' ... '9': 2874 case '&': 2875 case 'p': 2876 case 'm': 2877 /* Typo in TCGTargetOpDef constraint. */ 2878 g_assert_not_reached(); 2879 } 2880 } while (*++ct_str != '\0'); 2881 } 2882 2883 /* TCGTargetOpDef entry with too much information? */ 2884 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2885 2886 /* 2887 * Fix up output pairs that are aliased with inputs. 2888 * When we created the alias, we copied pair from the output. 2889 * There are three cases: 2890 * (1a) Pairs of inputs alias pairs of outputs. 2891 * (1b) One input aliases the first of a pair of outputs. 2892 * (2) One input aliases the second of a pair of outputs. 2893 * 2894 * Case 1a is handled by making sure that the pair_index'es are 2895 * properly updated so that they appear the same as a pair of inputs. 2896 * 2897 * Case 1b is handled by setting the pair_index of the input to 2898 * itself, simply so it doesn't point to an unrelated argument. 2899 * Since we don't encounter the "second" during the input allocation 2900 * phase, nothing happens with the second half of the input pair. 2901 * 2902 * Case 2 is handled by setting the second input to pair=3, the 2903 * first output to pair=3, and the pair_index'es to match. 2904 */ 2905 if (saw_alias_pair) { 2906 for (i = def->nb_oargs; i < nb_args; i++) { 2907 /* 2908 * Since [0-9pm] must be alone in the constraint string, 2909 * the only way they can both be set is if the pair comes 2910 * from the output alias. 2911 */ 2912 if (!def->args_ct[i].ialias) { 2913 continue; 2914 } 2915 switch (def->args_ct[i].pair) { 2916 case 0: 2917 break; 2918 case 1: 2919 o = def->args_ct[i].alias_index; 2920 o2 = def->args_ct[o].pair_index; 2921 tcg_debug_assert(def->args_ct[o].pair == 1); 2922 tcg_debug_assert(def->args_ct[o2].pair == 2); 2923 if (def->args_ct[o2].oalias) { 2924 /* Case 1a */ 2925 i2 = def->args_ct[o2].alias_index; 2926 tcg_debug_assert(def->args_ct[i2].pair == 2); 2927 def->args_ct[i2].pair_index = i; 2928 def->args_ct[i].pair_index = i2; 2929 } else { 2930 /* Case 1b */ 2931 def->args_ct[i].pair_index = i; 2932 } 2933 break; 2934 case 2: 2935 o = def->args_ct[i].alias_index; 2936 o2 = def->args_ct[o].pair_index; 2937 tcg_debug_assert(def->args_ct[o].pair == 2); 2938 tcg_debug_assert(def->args_ct[o2].pair == 1); 2939 if (def->args_ct[o2].oalias) { 2940 /* Case 1a */ 2941 i2 = def->args_ct[o2].alias_index; 2942 tcg_debug_assert(def->args_ct[i2].pair == 1); 2943 def->args_ct[i2].pair_index = i; 2944 def->args_ct[i].pair_index = i2; 2945 } else { 2946 /* Case 2 */ 2947 def->args_ct[i].pair = 3; 2948 def->args_ct[o2].pair = 3; 2949 def->args_ct[i].pair_index = o2; 2950 def->args_ct[o2].pair_index = i; 2951 } 2952 break; 2953 default: 2954 g_assert_not_reached(); 2955 } 2956 } 2957 } 2958 2959 /* sort the constraints (XXX: this is just an heuristic) */ 2960 sort_constraints(def, 0, def->nb_oargs); 2961 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2962 } 2963 } 2964 2965 static void remove_label_use(TCGOp *op, int idx) 2966 { 2967 TCGLabel *label = arg_label(op->args[idx]); 2968 TCGLabelUse *use; 2969 2970 QSIMPLEQ_FOREACH(use, &label->branches, next) { 2971 if (use->op == op) { 2972 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 2973 return; 2974 } 2975 } 2976 g_assert_not_reached(); 2977 } 2978 2979 void tcg_op_remove(TCGContext *s, TCGOp *op) 2980 { 2981 switch (op->opc) { 2982 case INDEX_op_br: 2983 remove_label_use(op, 0); 2984 break; 2985 case INDEX_op_brcond_i32: 2986 case INDEX_op_brcond_i64: 2987 remove_label_use(op, 3); 2988 break; 2989 case INDEX_op_brcond2_i32: 2990 remove_label_use(op, 5); 2991 break; 2992 default: 2993 break; 2994 } 2995 2996 QTAILQ_REMOVE(&s->ops, op, link); 2997 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2998 s->nb_ops--; 2999 3000 #ifdef CONFIG_PROFILER 3001 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 3002 #endif 3003 } 3004 3005 void tcg_remove_ops_after(TCGOp *op) 3006 { 3007 TCGContext *s = tcg_ctx; 3008 3009 while (true) { 3010 TCGOp *last = tcg_last_op(); 3011 if (last == op) { 3012 return; 3013 } 3014 tcg_op_remove(s, last); 3015 } 3016 } 3017 3018 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 3019 { 3020 TCGContext *s = tcg_ctx; 3021 TCGOp *op = NULL; 3022 3023 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 3024 QTAILQ_FOREACH(op, &s->free_ops, link) { 3025 if (nargs <= op->nargs) { 3026 QTAILQ_REMOVE(&s->free_ops, op, link); 3027 nargs = op->nargs; 3028 goto found; 3029 } 3030 } 3031 } 3032 3033 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 3034 nargs = MAX(4, nargs); 3035 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 3036 3037 found: 3038 memset(op, 0, offsetof(TCGOp, link)); 3039 op->opc = opc; 3040 op->nargs = nargs; 3041 3042 /* Check for bitfield overflow. */ 3043 tcg_debug_assert(op->nargs == nargs); 3044 3045 s->nb_ops++; 3046 return op; 3047 } 3048 3049 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 3050 { 3051 TCGOp *op = tcg_op_alloc(opc, nargs); 3052 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 3053 return op; 3054 } 3055 3056 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 3057 TCGOpcode opc, unsigned nargs) 3058 { 3059 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3060 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 3061 return new_op; 3062 } 3063 3064 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 3065 TCGOpcode opc, unsigned nargs) 3066 { 3067 TCGOp *new_op = tcg_op_alloc(opc, nargs); 3068 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 3069 return new_op; 3070 } 3071 3072 static void move_label_uses(TCGLabel *to, TCGLabel *from) 3073 { 3074 TCGLabelUse *u; 3075 3076 QSIMPLEQ_FOREACH(u, &from->branches, next) { 3077 TCGOp *op = u->op; 3078 switch (op->opc) { 3079 case INDEX_op_br: 3080 op->args[0] = label_arg(to); 3081 break; 3082 case INDEX_op_brcond_i32: 3083 case INDEX_op_brcond_i64: 3084 op->args[3] = label_arg(to); 3085 break; 3086 case INDEX_op_brcond2_i32: 3087 op->args[5] = label_arg(to); 3088 break; 3089 default: 3090 g_assert_not_reached(); 3091 } 3092 } 3093 3094 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 3095 } 3096 3097 /* Reachable analysis : remove unreachable code. */ 3098 static void __attribute__((noinline)) 3099 reachable_code_pass(TCGContext *s) 3100 { 3101 TCGOp *op, *op_next, *op_prev; 3102 bool dead = false; 3103 3104 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3105 bool remove = dead; 3106 TCGLabel *label; 3107 3108 switch (op->opc) { 3109 case INDEX_op_set_label: 3110 label = arg_label(op->args[0]); 3111 3112 /* 3113 * Note that the first op in the TB is always a load, 3114 * so there is always something before a label. 3115 */ 3116 op_prev = QTAILQ_PREV(op, link); 3117 3118 /* 3119 * If we find two sequential labels, move all branches to 3120 * reference the second label and remove the first label. 3121 * Do this before branch to next optimization, so that the 3122 * middle label is out of the way. 3123 */ 3124 if (op_prev->opc == INDEX_op_set_label) { 3125 move_label_uses(label, arg_label(op_prev->args[0])); 3126 tcg_op_remove(s, op_prev); 3127 op_prev = QTAILQ_PREV(op, link); 3128 } 3129 3130 /* 3131 * Optimization can fold conditional branches to unconditional. 3132 * If we find a label which is preceded by an unconditional 3133 * branch to next, remove the branch. We couldn't do this when 3134 * processing the branch because any dead code between the branch 3135 * and label had not yet been removed. 3136 */ 3137 if (op_prev->opc == INDEX_op_br && 3138 label == arg_label(op_prev->args[0])) { 3139 tcg_op_remove(s, op_prev); 3140 /* Fall through means insns become live again. */ 3141 dead = false; 3142 } 3143 3144 if (QSIMPLEQ_EMPTY(&label->branches)) { 3145 /* 3146 * While there is an occasional backward branch, virtually 3147 * all branches generated by the translators are forward. 3148 * Which means that generally we will have already removed 3149 * all references to the label that will be, and there is 3150 * little to be gained by iterating. 3151 */ 3152 remove = true; 3153 } else { 3154 /* Once we see a label, insns become live again. */ 3155 dead = false; 3156 remove = false; 3157 } 3158 break; 3159 3160 case INDEX_op_br: 3161 case INDEX_op_exit_tb: 3162 case INDEX_op_goto_ptr: 3163 /* Unconditional branches; everything following is dead. */ 3164 dead = true; 3165 break; 3166 3167 case INDEX_op_call: 3168 /* Notice noreturn helper calls, raising exceptions. */ 3169 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 3170 dead = true; 3171 } 3172 break; 3173 3174 case INDEX_op_insn_start: 3175 /* Never remove -- we need to keep these for unwind. */ 3176 remove = false; 3177 break; 3178 3179 default: 3180 break; 3181 } 3182 3183 if (remove) { 3184 tcg_op_remove(s, op); 3185 } 3186 } 3187 } 3188 3189 #define TS_DEAD 1 3190 #define TS_MEM 2 3191 3192 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 3193 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 3194 3195 /* For liveness_pass_1, the register preferences for a given temp. */ 3196 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 3197 { 3198 return ts->state_ptr; 3199 } 3200 3201 /* For liveness_pass_1, reset the preferences for a given temp to the 3202 * maximal regset for its type. 3203 */ 3204 static inline void la_reset_pref(TCGTemp *ts) 3205 { 3206 *la_temp_pref(ts) 3207 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 3208 } 3209 3210 /* liveness analysis: end of function: all temps are dead, and globals 3211 should be in memory. */ 3212 static void la_func_end(TCGContext *s, int ng, int nt) 3213 { 3214 int i; 3215 3216 for (i = 0; i < ng; ++i) { 3217 s->temps[i].state = TS_DEAD | TS_MEM; 3218 la_reset_pref(&s->temps[i]); 3219 } 3220 for (i = ng; i < nt; ++i) { 3221 s->temps[i].state = TS_DEAD; 3222 la_reset_pref(&s->temps[i]); 3223 } 3224 } 3225 3226 /* liveness analysis: end of basic block: all temps are dead, globals 3227 and local temps should be in memory. */ 3228 static void la_bb_end(TCGContext *s, int ng, int nt) 3229 { 3230 int i; 3231 3232 for (i = 0; i < nt; ++i) { 3233 TCGTemp *ts = &s->temps[i]; 3234 int state; 3235 3236 switch (ts->kind) { 3237 case TEMP_FIXED: 3238 case TEMP_GLOBAL: 3239 case TEMP_TB: 3240 state = TS_DEAD | TS_MEM; 3241 break; 3242 case TEMP_EBB: 3243 case TEMP_CONST: 3244 state = TS_DEAD; 3245 break; 3246 default: 3247 g_assert_not_reached(); 3248 } 3249 ts->state = state; 3250 la_reset_pref(ts); 3251 } 3252 } 3253 3254 /* liveness analysis: sync globals back to memory. */ 3255 static void la_global_sync(TCGContext *s, int ng) 3256 { 3257 int i; 3258 3259 for (i = 0; i < ng; ++i) { 3260 int state = s->temps[i].state; 3261 s->temps[i].state = state | TS_MEM; 3262 if (state == TS_DEAD) { 3263 /* If the global was previously dead, reset prefs. */ 3264 la_reset_pref(&s->temps[i]); 3265 } 3266 } 3267 } 3268 3269 /* 3270 * liveness analysis: conditional branch: all temps are dead unless 3271 * explicitly live-across-conditional-branch, globals and local temps 3272 * should be synced. 3273 */ 3274 static void la_bb_sync(TCGContext *s, int ng, int nt) 3275 { 3276 la_global_sync(s, ng); 3277 3278 for (int i = ng; i < nt; ++i) { 3279 TCGTemp *ts = &s->temps[i]; 3280 int state; 3281 3282 switch (ts->kind) { 3283 case TEMP_TB: 3284 state = ts->state; 3285 ts->state = state | TS_MEM; 3286 if (state != TS_DEAD) { 3287 continue; 3288 } 3289 break; 3290 case TEMP_EBB: 3291 case TEMP_CONST: 3292 continue; 3293 default: 3294 g_assert_not_reached(); 3295 } 3296 la_reset_pref(&s->temps[i]); 3297 } 3298 } 3299 3300 /* liveness analysis: sync globals back to memory and kill. */ 3301 static void la_global_kill(TCGContext *s, int ng) 3302 { 3303 int i; 3304 3305 for (i = 0; i < ng; i++) { 3306 s->temps[i].state = TS_DEAD | TS_MEM; 3307 la_reset_pref(&s->temps[i]); 3308 } 3309 } 3310 3311 /* liveness analysis: note live globals crossing calls. */ 3312 static void la_cross_call(TCGContext *s, int nt) 3313 { 3314 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3315 int i; 3316 3317 for (i = 0; i < nt; i++) { 3318 TCGTemp *ts = &s->temps[i]; 3319 if (!(ts->state & TS_DEAD)) { 3320 TCGRegSet *pset = la_temp_pref(ts); 3321 TCGRegSet set = *pset; 3322 3323 set &= mask; 3324 /* If the combination is not possible, restart. */ 3325 if (set == 0) { 3326 set = tcg_target_available_regs[ts->type] & mask; 3327 } 3328 *pset = set; 3329 } 3330 } 3331 } 3332 3333 /* 3334 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3335 * to TEMP_EBB, if possible. 3336 */ 3337 static void __attribute__((noinline)) 3338 liveness_pass_0(TCGContext *s) 3339 { 3340 void * const multiple_ebb = (void *)(uintptr_t)-1; 3341 int nb_temps = s->nb_temps; 3342 TCGOp *op, *ebb; 3343 3344 for (int i = s->nb_globals; i < nb_temps; ++i) { 3345 s->temps[i].state_ptr = NULL; 3346 } 3347 3348 /* 3349 * Represent each EBB by the op at which it begins. In the case of 3350 * the first EBB, this is the first op, otherwise it is a label. 3351 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3352 * within a single EBB, else MULTIPLE_EBB. 3353 */ 3354 ebb = QTAILQ_FIRST(&s->ops); 3355 QTAILQ_FOREACH(op, &s->ops, link) { 3356 const TCGOpDef *def; 3357 int nb_oargs, nb_iargs; 3358 3359 switch (op->opc) { 3360 case INDEX_op_set_label: 3361 ebb = op; 3362 continue; 3363 case INDEX_op_discard: 3364 continue; 3365 case INDEX_op_call: 3366 nb_oargs = TCGOP_CALLO(op); 3367 nb_iargs = TCGOP_CALLI(op); 3368 break; 3369 default: 3370 def = &tcg_op_defs[op->opc]; 3371 nb_oargs = def->nb_oargs; 3372 nb_iargs = def->nb_iargs; 3373 break; 3374 } 3375 3376 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3377 TCGTemp *ts = arg_temp(op->args[i]); 3378 3379 if (ts->kind != TEMP_TB) { 3380 continue; 3381 } 3382 if (ts->state_ptr == NULL) { 3383 ts->state_ptr = ebb; 3384 } else if (ts->state_ptr != ebb) { 3385 ts->state_ptr = multiple_ebb; 3386 } 3387 } 3388 } 3389 3390 /* 3391 * For TEMP_TB that turned out not to be used beyond one EBB, 3392 * reduce the liveness to TEMP_EBB. 3393 */ 3394 for (int i = s->nb_globals; i < nb_temps; ++i) { 3395 TCGTemp *ts = &s->temps[i]; 3396 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3397 ts->kind = TEMP_EBB; 3398 } 3399 } 3400 } 3401 3402 /* Liveness analysis : update the opc_arg_life array to tell if a 3403 given input arguments is dead. Instructions updating dead 3404 temporaries are removed. */ 3405 static void __attribute__((noinline)) 3406 liveness_pass_1(TCGContext *s) 3407 { 3408 int nb_globals = s->nb_globals; 3409 int nb_temps = s->nb_temps; 3410 TCGOp *op, *op_prev; 3411 TCGRegSet *prefs; 3412 int i; 3413 3414 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3415 for (i = 0; i < nb_temps; ++i) { 3416 s->temps[i].state_ptr = prefs + i; 3417 } 3418 3419 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3420 la_func_end(s, nb_globals, nb_temps); 3421 3422 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3423 int nb_iargs, nb_oargs; 3424 TCGOpcode opc_new, opc_new2; 3425 bool have_opc_new2; 3426 TCGLifeData arg_life = 0; 3427 TCGTemp *ts; 3428 TCGOpcode opc = op->opc; 3429 const TCGOpDef *def = &tcg_op_defs[opc]; 3430 3431 switch (opc) { 3432 case INDEX_op_call: 3433 { 3434 const TCGHelperInfo *info = tcg_call_info(op); 3435 int call_flags = tcg_call_flags(op); 3436 3437 nb_oargs = TCGOP_CALLO(op); 3438 nb_iargs = TCGOP_CALLI(op); 3439 3440 /* pure functions can be removed if their result is unused */ 3441 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3442 for (i = 0; i < nb_oargs; i++) { 3443 ts = arg_temp(op->args[i]); 3444 if (ts->state != TS_DEAD) { 3445 goto do_not_remove_call; 3446 } 3447 } 3448 goto do_remove; 3449 } 3450 do_not_remove_call: 3451 3452 /* Output args are dead. */ 3453 for (i = 0; i < nb_oargs; i++) { 3454 ts = arg_temp(op->args[i]); 3455 if (ts->state & TS_DEAD) { 3456 arg_life |= DEAD_ARG << i; 3457 } 3458 if (ts->state & TS_MEM) { 3459 arg_life |= SYNC_ARG << i; 3460 } 3461 ts->state = TS_DEAD; 3462 la_reset_pref(ts); 3463 } 3464 3465 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3466 memset(op->output_pref, 0, sizeof(op->output_pref)); 3467 3468 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3469 TCG_CALL_NO_READ_GLOBALS))) { 3470 la_global_kill(s, nb_globals); 3471 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3472 la_global_sync(s, nb_globals); 3473 } 3474 3475 /* Record arguments that die in this helper. */ 3476 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3477 ts = arg_temp(op->args[i]); 3478 if (ts->state & TS_DEAD) { 3479 arg_life |= DEAD_ARG << i; 3480 } 3481 } 3482 3483 /* For all live registers, remove call-clobbered prefs. */ 3484 la_cross_call(s, nb_temps); 3485 3486 /* 3487 * Input arguments are live for preceding opcodes. 3488 * 3489 * For those arguments that die, and will be allocated in 3490 * registers, clear the register set for that arg, to be 3491 * filled in below. For args that will be on the stack, 3492 * reset to any available reg. Process arguments in reverse 3493 * order so that if a temp is used more than once, the stack 3494 * reset to max happens before the register reset to 0. 3495 */ 3496 for (i = nb_iargs - 1; i >= 0; i--) { 3497 const TCGCallArgumentLoc *loc = &info->in[i]; 3498 ts = arg_temp(op->args[nb_oargs + i]); 3499 3500 if (ts->state & TS_DEAD) { 3501 switch (loc->kind) { 3502 case TCG_CALL_ARG_NORMAL: 3503 case TCG_CALL_ARG_EXTEND_U: 3504 case TCG_CALL_ARG_EXTEND_S: 3505 if (arg_slot_reg_p(loc->arg_slot)) { 3506 *la_temp_pref(ts) = 0; 3507 break; 3508 } 3509 /* fall through */ 3510 default: 3511 *la_temp_pref(ts) = 3512 tcg_target_available_regs[ts->type]; 3513 break; 3514 } 3515 ts->state &= ~TS_DEAD; 3516 } 3517 } 3518 3519 /* 3520 * For each input argument, add its input register to prefs. 3521 * If a temp is used once, this produces a single set bit; 3522 * if a temp is used multiple times, this produces a set. 3523 */ 3524 for (i = 0; i < nb_iargs; i++) { 3525 const TCGCallArgumentLoc *loc = &info->in[i]; 3526 ts = arg_temp(op->args[nb_oargs + i]); 3527 3528 switch (loc->kind) { 3529 case TCG_CALL_ARG_NORMAL: 3530 case TCG_CALL_ARG_EXTEND_U: 3531 case TCG_CALL_ARG_EXTEND_S: 3532 if (arg_slot_reg_p(loc->arg_slot)) { 3533 tcg_regset_set_reg(*la_temp_pref(ts), 3534 tcg_target_call_iarg_regs[loc->arg_slot]); 3535 } 3536 break; 3537 default: 3538 break; 3539 } 3540 } 3541 } 3542 break; 3543 case INDEX_op_insn_start: 3544 break; 3545 case INDEX_op_discard: 3546 /* mark the temporary as dead */ 3547 ts = arg_temp(op->args[0]); 3548 ts->state = TS_DEAD; 3549 la_reset_pref(ts); 3550 break; 3551 3552 case INDEX_op_add2_i32: 3553 opc_new = INDEX_op_add_i32; 3554 goto do_addsub2; 3555 case INDEX_op_sub2_i32: 3556 opc_new = INDEX_op_sub_i32; 3557 goto do_addsub2; 3558 case INDEX_op_add2_i64: 3559 opc_new = INDEX_op_add_i64; 3560 goto do_addsub2; 3561 case INDEX_op_sub2_i64: 3562 opc_new = INDEX_op_sub_i64; 3563 do_addsub2: 3564 nb_iargs = 4; 3565 nb_oargs = 2; 3566 /* Test if the high part of the operation is dead, but not 3567 the low part. The result can be optimized to a simple 3568 add or sub. This happens often for x86_64 guest when the 3569 cpu mode is set to 32 bit. */ 3570 if (arg_temp(op->args[1])->state == TS_DEAD) { 3571 if (arg_temp(op->args[0])->state == TS_DEAD) { 3572 goto do_remove; 3573 } 3574 /* Replace the opcode and adjust the args in place, 3575 leaving 3 unused args at the end. */ 3576 op->opc = opc = opc_new; 3577 op->args[1] = op->args[2]; 3578 op->args[2] = op->args[4]; 3579 /* Fall through and mark the single-word operation live. */ 3580 nb_iargs = 2; 3581 nb_oargs = 1; 3582 } 3583 goto do_not_remove; 3584 3585 case INDEX_op_mulu2_i32: 3586 opc_new = INDEX_op_mul_i32; 3587 opc_new2 = INDEX_op_muluh_i32; 3588 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 3589 goto do_mul2; 3590 case INDEX_op_muls2_i32: 3591 opc_new = INDEX_op_mul_i32; 3592 opc_new2 = INDEX_op_mulsh_i32; 3593 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 3594 goto do_mul2; 3595 case INDEX_op_mulu2_i64: 3596 opc_new = INDEX_op_mul_i64; 3597 opc_new2 = INDEX_op_muluh_i64; 3598 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 3599 goto do_mul2; 3600 case INDEX_op_muls2_i64: 3601 opc_new = INDEX_op_mul_i64; 3602 opc_new2 = INDEX_op_mulsh_i64; 3603 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 3604 goto do_mul2; 3605 do_mul2: 3606 nb_iargs = 2; 3607 nb_oargs = 2; 3608 if (arg_temp(op->args[1])->state == TS_DEAD) { 3609 if (arg_temp(op->args[0])->state == TS_DEAD) { 3610 /* Both parts of the operation are dead. */ 3611 goto do_remove; 3612 } 3613 /* The high part of the operation is dead; generate the low. */ 3614 op->opc = opc = opc_new; 3615 op->args[1] = op->args[2]; 3616 op->args[2] = op->args[3]; 3617 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 3618 /* The low part of the operation is dead; generate the high. */ 3619 op->opc = opc = opc_new2; 3620 op->args[0] = op->args[1]; 3621 op->args[1] = op->args[2]; 3622 op->args[2] = op->args[3]; 3623 } else { 3624 goto do_not_remove; 3625 } 3626 /* Mark the single-word operation live. */ 3627 nb_oargs = 1; 3628 goto do_not_remove; 3629 3630 default: 3631 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 3632 nb_iargs = def->nb_iargs; 3633 nb_oargs = def->nb_oargs; 3634 3635 /* Test if the operation can be removed because all 3636 its outputs are dead. We assume that nb_oargs == 0 3637 implies side effects */ 3638 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 3639 for (i = 0; i < nb_oargs; i++) { 3640 if (arg_temp(op->args[i])->state != TS_DEAD) { 3641 goto do_not_remove; 3642 } 3643 } 3644 goto do_remove; 3645 } 3646 goto do_not_remove; 3647 3648 do_remove: 3649 tcg_op_remove(s, op); 3650 break; 3651 3652 do_not_remove: 3653 for (i = 0; i < nb_oargs; i++) { 3654 ts = arg_temp(op->args[i]); 3655 3656 /* Remember the preference of the uses that followed. */ 3657 if (i < ARRAY_SIZE(op->output_pref)) { 3658 op->output_pref[i] = *la_temp_pref(ts); 3659 } 3660 3661 /* Output args are dead. */ 3662 if (ts->state & TS_DEAD) { 3663 arg_life |= DEAD_ARG << i; 3664 } 3665 if (ts->state & TS_MEM) { 3666 arg_life |= SYNC_ARG << i; 3667 } 3668 ts->state = TS_DEAD; 3669 la_reset_pref(ts); 3670 } 3671 3672 /* If end of basic block, update. */ 3673 if (def->flags & TCG_OPF_BB_EXIT) { 3674 la_func_end(s, nb_globals, nb_temps); 3675 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3676 la_bb_sync(s, nb_globals, nb_temps); 3677 } else if (def->flags & TCG_OPF_BB_END) { 3678 la_bb_end(s, nb_globals, nb_temps); 3679 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3680 la_global_sync(s, nb_globals); 3681 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3682 la_cross_call(s, nb_temps); 3683 } 3684 } 3685 3686 /* Record arguments that die in this opcode. */ 3687 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3688 ts = arg_temp(op->args[i]); 3689 if (ts->state & TS_DEAD) { 3690 arg_life |= DEAD_ARG << i; 3691 } 3692 } 3693 3694 /* Input arguments are live for preceding opcodes. */ 3695 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3696 ts = arg_temp(op->args[i]); 3697 if (ts->state & TS_DEAD) { 3698 /* For operands that were dead, initially allow 3699 all regs for the type. */ 3700 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3701 ts->state &= ~TS_DEAD; 3702 } 3703 } 3704 3705 /* Incorporate constraints for this operand. */ 3706 switch (opc) { 3707 case INDEX_op_mov_i32: 3708 case INDEX_op_mov_i64: 3709 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3710 have proper constraints. That said, special case 3711 moves to propagate preferences backward. */ 3712 if (IS_DEAD_ARG(1)) { 3713 *la_temp_pref(arg_temp(op->args[0])) 3714 = *la_temp_pref(arg_temp(op->args[1])); 3715 } 3716 break; 3717 3718 default: 3719 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3720 const TCGArgConstraint *ct = &def->args_ct[i]; 3721 TCGRegSet set, *pset; 3722 3723 ts = arg_temp(op->args[i]); 3724 pset = la_temp_pref(ts); 3725 set = *pset; 3726 3727 set &= ct->regs; 3728 if (ct->ialias) { 3729 set &= output_pref(op, ct->alias_index); 3730 } 3731 /* If the combination is not possible, restart. */ 3732 if (set == 0) { 3733 set = ct->regs; 3734 } 3735 *pset = set; 3736 } 3737 break; 3738 } 3739 break; 3740 } 3741 op->life = arg_life; 3742 } 3743 } 3744 3745 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3746 static bool __attribute__((noinline)) 3747 liveness_pass_2(TCGContext *s) 3748 { 3749 int nb_globals = s->nb_globals; 3750 int nb_temps, i; 3751 bool changes = false; 3752 TCGOp *op, *op_next; 3753 3754 /* Create a temporary for each indirect global. */ 3755 for (i = 0; i < nb_globals; ++i) { 3756 TCGTemp *its = &s->temps[i]; 3757 if (its->indirect_reg) { 3758 TCGTemp *dts = tcg_temp_alloc(s); 3759 dts->type = its->type; 3760 dts->base_type = its->base_type; 3761 dts->temp_subindex = its->temp_subindex; 3762 dts->kind = TEMP_EBB; 3763 its->state_ptr = dts; 3764 } else { 3765 its->state_ptr = NULL; 3766 } 3767 /* All globals begin dead. */ 3768 its->state = TS_DEAD; 3769 } 3770 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3771 TCGTemp *its = &s->temps[i]; 3772 its->state_ptr = NULL; 3773 its->state = TS_DEAD; 3774 } 3775 3776 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3777 TCGOpcode opc = op->opc; 3778 const TCGOpDef *def = &tcg_op_defs[opc]; 3779 TCGLifeData arg_life = op->life; 3780 int nb_iargs, nb_oargs, call_flags; 3781 TCGTemp *arg_ts, *dir_ts; 3782 3783 if (opc == INDEX_op_call) { 3784 nb_oargs = TCGOP_CALLO(op); 3785 nb_iargs = TCGOP_CALLI(op); 3786 call_flags = tcg_call_flags(op); 3787 } else { 3788 nb_iargs = def->nb_iargs; 3789 nb_oargs = def->nb_oargs; 3790 3791 /* Set flags similar to how calls require. */ 3792 if (def->flags & TCG_OPF_COND_BRANCH) { 3793 /* Like reading globals: sync_globals */ 3794 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3795 } else if (def->flags & TCG_OPF_BB_END) { 3796 /* Like writing globals: save_globals */ 3797 call_flags = 0; 3798 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3799 /* Like reading globals: sync_globals */ 3800 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3801 } else { 3802 /* No effect on globals. */ 3803 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3804 TCG_CALL_NO_WRITE_GLOBALS); 3805 } 3806 } 3807 3808 /* Make sure that input arguments are available. */ 3809 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3810 arg_ts = arg_temp(op->args[i]); 3811 dir_ts = arg_ts->state_ptr; 3812 if (dir_ts && arg_ts->state == TS_DEAD) { 3813 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3814 ? INDEX_op_ld_i32 3815 : INDEX_op_ld_i64); 3816 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 3817 3818 lop->args[0] = temp_arg(dir_ts); 3819 lop->args[1] = temp_arg(arg_ts->mem_base); 3820 lop->args[2] = arg_ts->mem_offset; 3821 3822 /* Loaded, but synced with memory. */ 3823 arg_ts->state = TS_MEM; 3824 } 3825 } 3826 3827 /* Perform input replacement, and mark inputs that became dead. 3828 No action is required except keeping temp_state up to date 3829 so that we reload when needed. */ 3830 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3831 arg_ts = arg_temp(op->args[i]); 3832 dir_ts = arg_ts->state_ptr; 3833 if (dir_ts) { 3834 op->args[i] = temp_arg(dir_ts); 3835 changes = true; 3836 if (IS_DEAD_ARG(i)) { 3837 arg_ts->state = TS_DEAD; 3838 } 3839 } 3840 } 3841 3842 /* Liveness analysis should ensure that the following are 3843 all correct, for call sites and basic block end points. */ 3844 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3845 /* Nothing to do */ 3846 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3847 for (i = 0; i < nb_globals; ++i) { 3848 /* Liveness should see that globals are synced back, 3849 that is, either TS_DEAD or TS_MEM. */ 3850 arg_ts = &s->temps[i]; 3851 tcg_debug_assert(arg_ts->state_ptr == 0 3852 || arg_ts->state != 0); 3853 } 3854 } else { 3855 for (i = 0; i < nb_globals; ++i) { 3856 /* Liveness should see that globals are saved back, 3857 that is, TS_DEAD, waiting to be reloaded. */ 3858 arg_ts = &s->temps[i]; 3859 tcg_debug_assert(arg_ts->state_ptr == 0 3860 || arg_ts->state == TS_DEAD); 3861 } 3862 } 3863 3864 /* Outputs become available. */ 3865 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3866 arg_ts = arg_temp(op->args[0]); 3867 dir_ts = arg_ts->state_ptr; 3868 if (dir_ts) { 3869 op->args[0] = temp_arg(dir_ts); 3870 changes = true; 3871 3872 /* The output is now live and modified. */ 3873 arg_ts->state = 0; 3874 3875 if (NEED_SYNC_ARG(0)) { 3876 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3877 ? INDEX_op_st_i32 3878 : INDEX_op_st_i64); 3879 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3880 TCGTemp *out_ts = dir_ts; 3881 3882 if (IS_DEAD_ARG(0)) { 3883 out_ts = arg_temp(op->args[1]); 3884 arg_ts->state = TS_DEAD; 3885 tcg_op_remove(s, op); 3886 } else { 3887 arg_ts->state = TS_MEM; 3888 } 3889 3890 sop->args[0] = temp_arg(out_ts); 3891 sop->args[1] = temp_arg(arg_ts->mem_base); 3892 sop->args[2] = arg_ts->mem_offset; 3893 } else { 3894 tcg_debug_assert(!IS_DEAD_ARG(0)); 3895 } 3896 } 3897 } else { 3898 for (i = 0; i < nb_oargs; i++) { 3899 arg_ts = arg_temp(op->args[i]); 3900 dir_ts = arg_ts->state_ptr; 3901 if (!dir_ts) { 3902 continue; 3903 } 3904 op->args[i] = temp_arg(dir_ts); 3905 changes = true; 3906 3907 /* The output is now live and modified. */ 3908 arg_ts->state = 0; 3909 3910 /* Sync outputs upon their last write. */ 3911 if (NEED_SYNC_ARG(i)) { 3912 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3913 ? INDEX_op_st_i32 3914 : INDEX_op_st_i64); 3915 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3916 3917 sop->args[0] = temp_arg(dir_ts); 3918 sop->args[1] = temp_arg(arg_ts->mem_base); 3919 sop->args[2] = arg_ts->mem_offset; 3920 3921 arg_ts->state = TS_MEM; 3922 } 3923 /* Drop outputs that are dead. */ 3924 if (IS_DEAD_ARG(i)) { 3925 arg_ts->state = TS_DEAD; 3926 } 3927 } 3928 } 3929 } 3930 3931 return changes; 3932 } 3933 3934 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3935 { 3936 intptr_t off; 3937 int size, align; 3938 3939 /* When allocating an object, look at the full type. */ 3940 size = tcg_type_size(ts->base_type); 3941 switch (ts->base_type) { 3942 case TCG_TYPE_I32: 3943 align = 4; 3944 break; 3945 case TCG_TYPE_I64: 3946 case TCG_TYPE_V64: 3947 align = 8; 3948 break; 3949 case TCG_TYPE_I128: 3950 case TCG_TYPE_V128: 3951 case TCG_TYPE_V256: 3952 /* 3953 * Note that we do not require aligned storage for V256, 3954 * and that we provide alignment for I128 to match V128, 3955 * even if that's above what the host ABI requires. 3956 */ 3957 align = 16; 3958 break; 3959 default: 3960 g_assert_not_reached(); 3961 } 3962 3963 /* 3964 * Assume the stack is sufficiently aligned. 3965 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3966 * and do not require 16 byte vector alignment. This seems slightly 3967 * easier than fully parameterizing the above switch statement. 3968 */ 3969 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3970 off = ROUND_UP(s->current_frame_offset, align); 3971 3972 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3973 if (off + size > s->frame_end) { 3974 tcg_raise_tb_overflow(s); 3975 } 3976 s->current_frame_offset = off + size; 3977 #if defined(__sparc__) 3978 off += TCG_TARGET_STACK_BIAS; 3979 #endif 3980 3981 /* If the object was subdivided, assign memory to all the parts. */ 3982 if (ts->base_type != ts->type) { 3983 int part_size = tcg_type_size(ts->type); 3984 int part_count = size / part_size; 3985 3986 /* 3987 * Each part is allocated sequentially in tcg_temp_new_internal. 3988 * Jump back to the first part by subtracting the current index. 3989 */ 3990 ts -= ts->temp_subindex; 3991 for (int i = 0; i < part_count; ++i) { 3992 ts[i].mem_offset = off + i * part_size; 3993 ts[i].mem_base = s->frame_temp; 3994 ts[i].mem_allocated = 1; 3995 } 3996 } else { 3997 ts->mem_offset = off; 3998 ts->mem_base = s->frame_temp; 3999 ts->mem_allocated = 1; 4000 } 4001 } 4002 4003 /* Assign @reg to @ts, and update reg_to_temp[]. */ 4004 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 4005 { 4006 if (ts->val_type == TEMP_VAL_REG) { 4007 TCGReg old = ts->reg; 4008 tcg_debug_assert(s->reg_to_temp[old] == ts); 4009 if (old == reg) { 4010 return; 4011 } 4012 s->reg_to_temp[old] = NULL; 4013 } 4014 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4015 s->reg_to_temp[reg] = ts; 4016 ts->val_type = TEMP_VAL_REG; 4017 ts->reg = reg; 4018 } 4019 4020 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 4021 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 4022 { 4023 tcg_debug_assert(type != TEMP_VAL_REG); 4024 if (ts->val_type == TEMP_VAL_REG) { 4025 TCGReg reg = ts->reg; 4026 tcg_debug_assert(s->reg_to_temp[reg] == ts); 4027 s->reg_to_temp[reg] = NULL; 4028 } 4029 ts->val_type = type; 4030 } 4031 4032 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 4033 4034 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 4035 mark it free; otherwise mark it dead. */ 4036 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 4037 { 4038 TCGTempVal new_type; 4039 4040 switch (ts->kind) { 4041 case TEMP_FIXED: 4042 return; 4043 case TEMP_GLOBAL: 4044 case TEMP_TB: 4045 new_type = TEMP_VAL_MEM; 4046 break; 4047 case TEMP_EBB: 4048 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 4049 break; 4050 case TEMP_CONST: 4051 new_type = TEMP_VAL_CONST; 4052 break; 4053 default: 4054 g_assert_not_reached(); 4055 } 4056 set_temp_val_nonreg(s, ts, new_type); 4057 } 4058 4059 /* Mark a temporary as dead. */ 4060 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 4061 { 4062 temp_free_or_dead(s, ts, 1); 4063 } 4064 4065 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 4066 registers needs to be allocated to store a constant. If 'free_or_dead' 4067 is non-zero, subsequently release the temporary; if it is positive, the 4068 temp is dead; if it is negative, the temp is free. */ 4069 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 4070 TCGRegSet preferred_regs, int free_or_dead) 4071 { 4072 if (!temp_readonly(ts) && !ts->mem_coherent) { 4073 if (!ts->mem_allocated) { 4074 temp_allocate_frame(s, ts); 4075 } 4076 switch (ts->val_type) { 4077 case TEMP_VAL_CONST: 4078 /* If we're going to free the temp immediately, then we won't 4079 require it later in a register, so attempt to store the 4080 constant to memory directly. */ 4081 if (free_or_dead 4082 && tcg_out_sti(s, ts->type, ts->val, 4083 ts->mem_base->reg, ts->mem_offset)) { 4084 break; 4085 } 4086 temp_load(s, ts, tcg_target_available_regs[ts->type], 4087 allocated_regs, preferred_regs); 4088 /* fallthrough */ 4089 4090 case TEMP_VAL_REG: 4091 tcg_out_st(s, ts->type, ts->reg, 4092 ts->mem_base->reg, ts->mem_offset); 4093 break; 4094 4095 case TEMP_VAL_MEM: 4096 break; 4097 4098 case TEMP_VAL_DEAD: 4099 default: 4100 g_assert_not_reached(); 4101 } 4102 ts->mem_coherent = 1; 4103 } 4104 if (free_or_dead) { 4105 temp_free_or_dead(s, ts, free_or_dead); 4106 } 4107 } 4108 4109 /* free register 'reg' by spilling the corresponding temporary if necessary */ 4110 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 4111 { 4112 TCGTemp *ts = s->reg_to_temp[reg]; 4113 if (ts != NULL) { 4114 temp_sync(s, ts, allocated_regs, 0, -1); 4115 } 4116 } 4117 4118 /** 4119 * tcg_reg_alloc: 4120 * @required_regs: Set of registers in which we must allocate. 4121 * @allocated_regs: Set of registers which must be avoided. 4122 * @preferred_regs: Set of registers we should prefer. 4123 * @rev: True if we search the registers in "indirect" order. 4124 * 4125 * The allocated register must be in @required_regs & ~@allocated_regs, 4126 * but if we can put it in @preferred_regs we may save a move later. 4127 */ 4128 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 4129 TCGRegSet allocated_regs, 4130 TCGRegSet preferred_regs, bool rev) 4131 { 4132 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4133 TCGRegSet reg_ct[2]; 4134 const int *order; 4135 4136 reg_ct[1] = required_regs & ~allocated_regs; 4137 tcg_debug_assert(reg_ct[1] != 0); 4138 reg_ct[0] = reg_ct[1] & preferred_regs; 4139 4140 /* Skip the preferred_regs option if it cannot be satisfied, 4141 or if the preference made no difference. */ 4142 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4143 4144 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4145 4146 /* Try free registers, preferences first. */ 4147 for (j = f; j < 2; j++) { 4148 TCGRegSet set = reg_ct[j]; 4149 4150 if (tcg_regset_single(set)) { 4151 /* One register in the set. */ 4152 TCGReg reg = tcg_regset_first(set); 4153 if (s->reg_to_temp[reg] == NULL) { 4154 return reg; 4155 } 4156 } else { 4157 for (i = 0; i < n; i++) { 4158 TCGReg reg = order[i]; 4159 if (s->reg_to_temp[reg] == NULL && 4160 tcg_regset_test_reg(set, reg)) { 4161 return reg; 4162 } 4163 } 4164 } 4165 } 4166 4167 /* We must spill something. */ 4168 for (j = f; j < 2; j++) { 4169 TCGRegSet set = reg_ct[j]; 4170 4171 if (tcg_regset_single(set)) { 4172 /* One register in the set. */ 4173 TCGReg reg = tcg_regset_first(set); 4174 tcg_reg_free(s, reg, allocated_regs); 4175 return reg; 4176 } else { 4177 for (i = 0; i < n; i++) { 4178 TCGReg reg = order[i]; 4179 if (tcg_regset_test_reg(set, reg)) { 4180 tcg_reg_free(s, reg, allocated_regs); 4181 return reg; 4182 } 4183 } 4184 } 4185 } 4186 4187 g_assert_not_reached(); 4188 } 4189 4190 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 4191 TCGRegSet allocated_regs, 4192 TCGRegSet preferred_regs, bool rev) 4193 { 4194 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 4195 TCGRegSet reg_ct[2]; 4196 const int *order; 4197 4198 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 4199 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 4200 tcg_debug_assert(reg_ct[1] != 0); 4201 reg_ct[0] = reg_ct[1] & preferred_regs; 4202 4203 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 4204 4205 /* 4206 * Skip the preferred_regs option if it cannot be satisfied, 4207 * or if the preference made no difference. 4208 */ 4209 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 4210 4211 /* 4212 * Minimize the number of flushes by looking for 2 free registers first, 4213 * then a single flush, then two flushes. 4214 */ 4215 for (fmin = 2; fmin >= 0; fmin--) { 4216 for (j = k; j < 2; j++) { 4217 TCGRegSet set = reg_ct[j]; 4218 4219 for (i = 0; i < n; i++) { 4220 TCGReg reg = order[i]; 4221 4222 if (tcg_regset_test_reg(set, reg)) { 4223 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 4224 if (f >= fmin) { 4225 tcg_reg_free(s, reg, allocated_regs); 4226 tcg_reg_free(s, reg + 1, allocated_regs); 4227 return reg; 4228 } 4229 } 4230 } 4231 } 4232 } 4233 g_assert_not_reached(); 4234 } 4235 4236 /* Make sure the temporary is in a register. If needed, allocate the register 4237 from DESIRED while avoiding ALLOCATED. */ 4238 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 4239 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 4240 { 4241 TCGReg reg; 4242 4243 switch (ts->val_type) { 4244 case TEMP_VAL_REG: 4245 return; 4246 case TEMP_VAL_CONST: 4247 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4248 preferred_regs, ts->indirect_base); 4249 if (ts->type <= TCG_TYPE_I64) { 4250 tcg_out_movi(s, ts->type, reg, ts->val); 4251 } else { 4252 uint64_t val = ts->val; 4253 MemOp vece = MO_64; 4254 4255 /* 4256 * Find the minimal vector element that matches the constant. 4257 * The targets will, in general, have to do this search anyway, 4258 * do this generically. 4259 */ 4260 if (val == dup_const(MO_8, val)) { 4261 vece = MO_8; 4262 } else if (val == dup_const(MO_16, val)) { 4263 vece = MO_16; 4264 } else if (val == dup_const(MO_32, val)) { 4265 vece = MO_32; 4266 } 4267 4268 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4269 } 4270 ts->mem_coherent = 0; 4271 break; 4272 case TEMP_VAL_MEM: 4273 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4274 preferred_regs, ts->indirect_base); 4275 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4276 ts->mem_coherent = 1; 4277 break; 4278 case TEMP_VAL_DEAD: 4279 default: 4280 g_assert_not_reached(); 4281 } 4282 set_temp_val_reg(s, ts, reg); 4283 } 4284 4285 /* Save a temporary to memory. 'allocated_regs' is used in case a 4286 temporary registers needs to be allocated to store a constant. */ 4287 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4288 { 4289 /* The liveness analysis already ensures that globals are back 4290 in memory. Keep an tcg_debug_assert for safety. */ 4291 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4292 } 4293 4294 /* save globals to their canonical location and assume they can be 4295 modified be the following code. 'allocated_regs' is used in case a 4296 temporary registers needs to be allocated to store a constant. */ 4297 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4298 { 4299 int i, n; 4300 4301 for (i = 0, n = s->nb_globals; i < n; i++) { 4302 temp_save(s, &s->temps[i], allocated_regs); 4303 } 4304 } 4305 4306 /* sync globals to their canonical location and assume they can be 4307 read by the following code. 'allocated_regs' is used in case a 4308 temporary registers needs to be allocated to store a constant. */ 4309 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4310 { 4311 int i, n; 4312 4313 for (i = 0, n = s->nb_globals; i < n; i++) { 4314 TCGTemp *ts = &s->temps[i]; 4315 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4316 || ts->kind == TEMP_FIXED 4317 || ts->mem_coherent); 4318 } 4319 } 4320 4321 /* at the end of a basic block, we assume all temporaries are dead and 4322 all globals are stored at their canonical location. */ 4323 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4324 { 4325 int i; 4326 4327 for (i = s->nb_globals; i < s->nb_temps; i++) { 4328 TCGTemp *ts = &s->temps[i]; 4329 4330 switch (ts->kind) { 4331 case TEMP_TB: 4332 temp_save(s, ts, allocated_regs); 4333 break; 4334 case TEMP_EBB: 4335 /* The liveness analysis already ensures that temps are dead. 4336 Keep an tcg_debug_assert for safety. */ 4337 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4338 break; 4339 case TEMP_CONST: 4340 /* Similarly, we should have freed any allocated register. */ 4341 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4342 break; 4343 default: 4344 g_assert_not_reached(); 4345 } 4346 } 4347 4348 save_globals(s, allocated_regs); 4349 } 4350 4351 /* 4352 * At a conditional branch, we assume all temporaries are dead unless 4353 * explicitly live-across-conditional-branch; all globals and local 4354 * temps are synced to their location. 4355 */ 4356 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4357 { 4358 sync_globals(s, allocated_regs); 4359 4360 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4361 TCGTemp *ts = &s->temps[i]; 4362 /* 4363 * The liveness analysis already ensures that temps are dead. 4364 * Keep tcg_debug_asserts for safety. 4365 */ 4366 switch (ts->kind) { 4367 case TEMP_TB: 4368 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4369 break; 4370 case TEMP_EBB: 4371 case TEMP_CONST: 4372 break; 4373 default: 4374 g_assert_not_reached(); 4375 } 4376 } 4377 } 4378 4379 /* 4380 * Specialized code generation for INDEX_op_mov_* with a constant. 4381 */ 4382 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4383 tcg_target_ulong val, TCGLifeData arg_life, 4384 TCGRegSet preferred_regs) 4385 { 4386 /* ENV should not be modified. */ 4387 tcg_debug_assert(!temp_readonly(ots)); 4388 4389 /* The movi is not explicitly generated here. */ 4390 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4391 ots->val = val; 4392 ots->mem_coherent = 0; 4393 if (NEED_SYNC_ARG(0)) { 4394 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4395 } else if (IS_DEAD_ARG(0)) { 4396 temp_dead(s, ots); 4397 } 4398 } 4399 4400 /* 4401 * Specialized code generation for INDEX_op_mov_*. 4402 */ 4403 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4404 { 4405 const TCGLifeData arg_life = op->life; 4406 TCGRegSet allocated_regs, preferred_regs; 4407 TCGTemp *ts, *ots; 4408 TCGType otype, itype; 4409 TCGReg oreg, ireg; 4410 4411 allocated_regs = s->reserved_regs; 4412 preferred_regs = output_pref(op, 0); 4413 ots = arg_temp(op->args[0]); 4414 ts = arg_temp(op->args[1]); 4415 4416 /* ENV should not be modified. */ 4417 tcg_debug_assert(!temp_readonly(ots)); 4418 4419 /* Note that otype != itype for no-op truncation. */ 4420 otype = ots->type; 4421 itype = ts->type; 4422 4423 if (ts->val_type == TEMP_VAL_CONST) { 4424 /* propagate constant or generate sti */ 4425 tcg_target_ulong val = ts->val; 4426 if (IS_DEAD_ARG(1)) { 4427 temp_dead(s, ts); 4428 } 4429 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4430 return; 4431 } 4432 4433 /* If the source value is in memory we're going to be forced 4434 to have it in a register in order to perform the copy. Copy 4435 the SOURCE value into its own register first, that way we 4436 don't have to reload SOURCE the next time it is used. */ 4437 if (ts->val_type == TEMP_VAL_MEM) { 4438 temp_load(s, ts, tcg_target_available_regs[itype], 4439 allocated_regs, preferred_regs); 4440 } 4441 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4442 ireg = ts->reg; 4443 4444 if (IS_DEAD_ARG(0)) { 4445 /* mov to a non-saved dead register makes no sense (even with 4446 liveness analysis disabled). */ 4447 tcg_debug_assert(NEED_SYNC_ARG(0)); 4448 if (!ots->mem_allocated) { 4449 temp_allocate_frame(s, ots); 4450 } 4451 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4452 if (IS_DEAD_ARG(1)) { 4453 temp_dead(s, ts); 4454 } 4455 temp_dead(s, ots); 4456 return; 4457 } 4458 4459 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4460 /* 4461 * The mov can be suppressed. Kill input first, so that it 4462 * is unlinked from reg_to_temp, then set the output to the 4463 * reg that we saved from the input. 4464 */ 4465 temp_dead(s, ts); 4466 oreg = ireg; 4467 } else { 4468 if (ots->val_type == TEMP_VAL_REG) { 4469 oreg = ots->reg; 4470 } else { 4471 /* Make sure to not spill the input register during allocation. */ 4472 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4473 allocated_regs | ((TCGRegSet)1 << ireg), 4474 preferred_regs, ots->indirect_base); 4475 } 4476 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4477 /* 4478 * Cross register class move not supported. 4479 * Store the source register into the destination slot 4480 * and leave the destination temp as TEMP_VAL_MEM. 4481 */ 4482 assert(!temp_readonly(ots)); 4483 if (!ts->mem_allocated) { 4484 temp_allocate_frame(s, ots); 4485 } 4486 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4487 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4488 ots->mem_coherent = 1; 4489 return; 4490 } 4491 } 4492 set_temp_val_reg(s, ots, oreg); 4493 ots->mem_coherent = 0; 4494 4495 if (NEED_SYNC_ARG(0)) { 4496 temp_sync(s, ots, allocated_regs, 0, 0); 4497 } 4498 } 4499 4500 /* 4501 * Specialized code generation for INDEX_op_dup_vec. 4502 */ 4503 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4504 { 4505 const TCGLifeData arg_life = op->life; 4506 TCGRegSet dup_out_regs, dup_in_regs; 4507 TCGTemp *its, *ots; 4508 TCGType itype, vtype; 4509 unsigned vece; 4510 int lowpart_ofs; 4511 bool ok; 4512 4513 ots = arg_temp(op->args[0]); 4514 its = arg_temp(op->args[1]); 4515 4516 /* ENV should not be modified. */ 4517 tcg_debug_assert(!temp_readonly(ots)); 4518 4519 itype = its->type; 4520 vece = TCGOP_VECE(op); 4521 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4522 4523 if (its->val_type == TEMP_VAL_CONST) { 4524 /* Propagate constant via movi -> dupi. */ 4525 tcg_target_ulong val = its->val; 4526 if (IS_DEAD_ARG(1)) { 4527 temp_dead(s, its); 4528 } 4529 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4530 return; 4531 } 4532 4533 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4534 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 4535 4536 /* Allocate the output register now. */ 4537 if (ots->val_type != TEMP_VAL_REG) { 4538 TCGRegSet allocated_regs = s->reserved_regs; 4539 TCGReg oreg; 4540 4541 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4542 /* Make sure to not spill the input register. */ 4543 tcg_regset_set_reg(allocated_regs, its->reg); 4544 } 4545 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4546 output_pref(op, 0), ots->indirect_base); 4547 set_temp_val_reg(s, ots, oreg); 4548 } 4549 4550 switch (its->val_type) { 4551 case TEMP_VAL_REG: 4552 /* 4553 * The dup constriaints must be broad, covering all possible VECE. 4554 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4555 * to fail, indicating that extra moves are required for that case. 4556 */ 4557 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4558 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4559 goto done; 4560 } 4561 /* Try again from memory or a vector input register. */ 4562 } 4563 if (!its->mem_coherent) { 4564 /* 4565 * The input register is not synced, and so an extra store 4566 * would be required to use memory. Attempt an integer-vector 4567 * register move first. We do not have a TCGRegSet for this. 4568 */ 4569 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4570 break; 4571 } 4572 /* Sync the temp back to its slot and load from there. */ 4573 temp_sync(s, its, s->reserved_regs, 0, 0); 4574 } 4575 /* fall through */ 4576 4577 case TEMP_VAL_MEM: 4578 lowpart_ofs = 0; 4579 if (HOST_BIG_ENDIAN) { 4580 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 4581 } 4582 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 4583 its->mem_offset + lowpart_ofs)) { 4584 goto done; 4585 } 4586 /* Load the input into the destination vector register. */ 4587 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 4588 break; 4589 4590 default: 4591 g_assert_not_reached(); 4592 } 4593 4594 /* We now have a vector input register, so dup must succeed. */ 4595 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 4596 tcg_debug_assert(ok); 4597 4598 done: 4599 ots->mem_coherent = 0; 4600 if (IS_DEAD_ARG(1)) { 4601 temp_dead(s, its); 4602 } 4603 if (NEED_SYNC_ARG(0)) { 4604 temp_sync(s, ots, s->reserved_regs, 0, 0); 4605 } 4606 if (IS_DEAD_ARG(0)) { 4607 temp_dead(s, ots); 4608 } 4609 } 4610 4611 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 4612 { 4613 const TCGLifeData arg_life = op->life; 4614 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 4615 TCGRegSet i_allocated_regs; 4616 TCGRegSet o_allocated_regs; 4617 int i, k, nb_iargs, nb_oargs; 4618 TCGReg reg; 4619 TCGArg arg; 4620 const TCGArgConstraint *arg_ct; 4621 TCGTemp *ts; 4622 TCGArg new_args[TCG_MAX_OP_ARGS]; 4623 int const_args[TCG_MAX_OP_ARGS]; 4624 4625 nb_oargs = def->nb_oargs; 4626 nb_iargs = def->nb_iargs; 4627 4628 /* copy constants */ 4629 memcpy(new_args + nb_oargs + nb_iargs, 4630 op->args + nb_oargs + nb_iargs, 4631 sizeof(TCGArg) * def->nb_cargs); 4632 4633 i_allocated_regs = s->reserved_regs; 4634 o_allocated_regs = s->reserved_regs; 4635 4636 /* satisfy input constraints */ 4637 for (k = 0; k < nb_iargs; k++) { 4638 TCGRegSet i_preferred_regs, i_required_regs; 4639 bool allocate_new_reg, copyto_new_reg; 4640 TCGTemp *ts2; 4641 int i1, i2; 4642 4643 i = def->args_ct[nb_oargs + k].sort_index; 4644 arg = op->args[i]; 4645 arg_ct = &def->args_ct[i]; 4646 ts = arg_temp(arg); 4647 4648 if (ts->val_type == TEMP_VAL_CONST 4649 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 4650 /* constant is OK for instruction */ 4651 const_args[i] = 1; 4652 new_args[i] = ts->val; 4653 continue; 4654 } 4655 4656 reg = ts->reg; 4657 i_preferred_regs = 0; 4658 i_required_regs = arg_ct->regs; 4659 allocate_new_reg = false; 4660 copyto_new_reg = false; 4661 4662 switch (arg_ct->pair) { 4663 case 0: /* not paired */ 4664 if (arg_ct->ialias) { 4665 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4666 4667 /* 4668 * If the input is readonly, then it cannot also be an 4669 * output and aliased to itself. If the input is not 4670 * dead after the instruction, we must allocate a new 4671 * register and move it. 4672 */ 4673 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 4674 allocate_new_reg = true; 4675 } else if (ts->val_type == TEMP_VAL_REG) { 4676 /* 4677 * Check if the current register has already been 4678 * allocated for another input. 4679 */ 4680 allocate_new_reg = 4681 tcg_regset_test_reg(i_allocated_regs, reg); 4682 } 4683 } 4684 if (!allocate_new_reg) { 4685 temp_load(s, ts, i_required_regs, i_allocated_regs, 4686 i_preferred_regs); 4687 reg = ts->reg; 4688 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 4689 } 4690 if (allocate_new_reg) { 4691 /* 4692 * Allocate a new register matching the constraint 4693 * and move the temporary register into it. 4694 */ 4695 temp_load(s, ts, tcg_target_available_regs[ts->type], 4696 i_allocated_regs, 0); 4697 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 4698 i_preferred_regs, ts->indirect_base); 4699 copyto_new_reg = true; 4700 } 4701 break; 4702 4703 case 1: 4704 /* First of an input pair; if i1 == i2, the second is an output. */ 4705 i1 = i; 4706 i2 = arg_ct->pair_index; 4707 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 4708 4709 /* 4710 * It is easier to default to allocating a new pair 4711 * and to identify a few cases where it's not required. 4712 */ 4713 if (arg_ct->ialias) { 4714 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4715 if (IS_DEAD_ARG(i1) && 4716 IS_DEAD_ARG(i2) && 4717 !temp_readonly(ts) && 4718 ts->val_type == TEMP_VAL_REG && 4719 ts->reg < TCG_TARGET_NB_REGS - 1 && 4720 tcg_regset_test_reg(i_required_regs, reg) && 4721 !tcg_regset_test_reg(i_allocated_regs, reg) && 4722 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 4723 (ts2 4724 ? ts2->val_type == TEMP_VAL_REG && 4725 ts2->reg == reg + 1 && 4726 !temp_readonly(ts2) 4727 : s->reg_to_temp[reg + 1] == NULL)) { 4728 break; 4729 } 4730 } else { 4731 /* Without aliasing, the pair must also be an input. */ 4732 tcg_debug_assert(ts2); 4733 if (ts->val_type == TEMP_VAL_REG && 4734 ts2->val_type == TEMP_VAL_REG && 4735 ts2->reg == reg + 1 && 4736 tcg_regset_test_reg(i_required_regs, reg)) { 4737 break; 4738 } 4739 } 4740 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 4741 0, ts->indirect_base); 4742 goto do_pair; 4743 4744 case 2: /* pair second */ 4745 reg = new_args[arg_ct->pair_index] + 1; 4746 goto do_pair; 4747 4748 case 3: /* ialias with second output, no first input */ 4749 tcg_debug_assert(arg_ct->ialias); 4750 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4751 4752 if (IS_DEAD_ARG(i) && 4753 !temp_readonly(ts) && 4754 ts->val_type == TEMP_VAL_REG && 4755 reg > 0 && 4756 s->reg_to_temp[reg - 1] == NULL && 4757 tcg_regset_test_reg(i_required_regs, reg) && 4758 !tcg_regset_test_reg(i_allocated_regs, reg) && 4759 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 4760 tcg_regset_set_reg(i_allocated_regs, reg - 1); 4761 break; 4762 } 4763 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 4764 i_allocated_regs, 0, 4765 ts->indirect_base); 4766 tcg_regset_set_reg(i_allocated_regs, reg); 4767 reg += 1; 4768 goto do_pair; 4769 4770 do_pair: 4771 /* 4772 * If an aliased input is not dead after the instruction, 4773 * we must allocate a new register and move it. 4774 */ 4775 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 4776 TCGRegSet t_allocated_regs = i_allocated_regs; 4777 4778 /* 4779 * Because of the alias, and the continued life, make sure 4780 * that the temp is somewhere *other* than the reg pair, 4781 * and we get a copy in reg. 4782 */ 4783 tcg_regset_set_reg(t_allocated_regs, reg); 4784 tcg_regset_set_reg(t_allocated_regs, reg + 1); 4785 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 4786 /* If ts was already in reg, copy it somewhere else. */ 4787 TCGReg nr; 4788 bool ok; 4789 4790 tcg_debug_assert(ts->kind != TEMP_FIXED); 4791 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 4792 t_allocated_regs, 0, ts->indirect_base); 4793 ok = tcg_out_mov(s, ts->type, nr, reg); 4794 tcg_debug_assert(ok); 4795 4796 set_temp_val_reg(s, ts, nr); 4797 } else { 4798 temp_load(s, ts, tcg_target_available_regs[ts->type], 4799 t_allocated_regs, 0); 4800 copyto_new_reg = true; 4801 } 4802 } else { 4803 /* Preferably allocate to reg, otherwise copy. */ 4804 i_required_regs = (TCGRegSet)1 << reg; 4805 temp_load(s, ts, i_required_regs, i_allocated_regs, 4806 i_preferred_regs); 4807 copyto_new_reg = ts->reg != reg; 4808 } 4809 break; 4810 4811 default: 4812 g_assert_not_reached(); 4813 } 4814 4815 if (copyto_new_reg) { 4816 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4817 /* 4818 * Cross register class move not supported. Sync the 4819 * temp back to its slot and load from there. 4820 */ 4821 temp_sync(s, ts, i_allocated_regs, 0, 0); 4822 tcg_out_ld(s, ts->type, reg, 4823 ts->mem_base->reg, ts->mem_offset); 4824 } 4825 } 4826 new_args[i] = reg; 4827 const_args[i] = 0; 4828 tcg_regset_set_reg(i_allocated_regs, reg); 4829 } 4830 4831 /* mark dead temporaries and free the associated registers */ 4832 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4833 if (IS_DEAD_ARG(i)) { 4834 temp_dead(s, arg_temp(op->args[i])); 4835 } 4836 } 4837 4838 if (def->flags & TCG_OPF_COND_BRANCH) { 4839 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4840 } else if (def->flags & TCG_OPF_BB_END) { 4841 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4842 } else { 4843 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4844 /* XXX: permit generic clobber register list ? */ 4845 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4846 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4847 tcg_reg_free(s, i, i_allocated_regs); 4848 } 4849 } 4850 } 4851 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4852 /* sync globals if the op has side effects and might trigger 4853 an exception. */ 4854 sync_globals(s, i_allocated_regs); 4855 } 4856 4857 /* satisfy the output constraints */ 4858 for(k = 0; k < nb_oargs; k++) { 4859 i = def->args_ct[k].sort_index; 4860 arg = op->args[i]; 4861 arg_ct = &def->args_ct[i]; 4862 ts = arg_temp(arg); 4863 4864 /* ENV should not be modified. */ 4865 tcg_debug_assert(!temp_readonly(ts)); 4866 4867 switch (arg_ct->pair) { 4868 case 0: /* not paired */ 4869 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4870 reg = new_args[arg_ct->alias_index]; 4871 } else if (arg_ct->newreg) { 4872 reg = tcg_reg_alloc(s, arg_ct->regs, 4873 i_allocated_regs | o_allocated_regs, 4874 output_pref(op, k), ts->indirect_base); 4875 } else { 4876 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4877 output_pref(op, k), ts->indirect_base); 4878 } 4879 break; 4880 4881 case 1: /* first of pair */ 4882 tcg_debug_assert(!arg_ct->newreg); 4883 if (arg_ct->oalias) { 4884 reg = new_args[arg_ct->alias_index]; 4885 break; 4886 } 4887 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 4888 output_pref(op, k), ts->indirect_base); 4889 break; 4890 4891 case 2: /* second of pair */ 4892 tcg_debug_assert(!arg_ct->newreg); 4893 if (arg_ct->oalias) { 4894 reg = new_args[arg_ct->alias_index]; 4895 } else { 4896 reg = new_args[arg_ct->pair_index] + 1; 4897 } 4898 break; 4899 4900 case 3: /* first of pair, aliasing with a second input */ 4901 tcg_debug_assert(!arg_ct->newreg); 4902 reg = new_args[arg_ct->pair_index] - 1; 4903 break; 4904 4905 default: 4906 g_assert_not_reached(); 4907 } 4908 tcg_regset_set_reg(o_allocated_regs, reg); 4909 set_temp_val_reg(s, ts, reg); 4910 ts->mem_coherent = 0; 4911 new_args[i] = reg; 4912 } 4913 } 4914 4915 /* emit instruction */ 4916 switch (op->opc) { 4917 case INDEX_op_ext8s_i32: 4918 tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]); 4919 break; 4920 case INDEX_op_ext8s_i64: 4921 tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]); 4922 break; 4923 case INDEX_op_ext8u_i32: 4924 case INDEX_op_ext8u_i64: 4925 tcg_out_ext8u(s, new_args[0], new_args[1]); 4926 break; 4927 case INDEX_op_ext16s_i32: 4928 tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]); 4929 break; 4930 case INDEX_op_ext16s_i64: 4931 tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]); 4932 break; 4933 case INDEX_op_ext16u_i32: 4934 case INDEX_op_ext16u_i64: 4935 tcg_out_ext16u(s, new_args[0], new_args[1]); 4936 break; 4937 case INDEX_op_ext32s_i64: 4938 tcg_out_ext32s(s, new_args[0], new_args[1]); 4939 break; 4940 case INDEX_op_ext32u_i64: 4941 tcg_out_ext32u(s, new_args[0], new_args[1]); 4942 break; 4943 case INDEX_op_ext_i32_i64: 4944 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 4945 break; 4946 case INDEX_op_extu_i32_i64: 4947 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 4948 break; 4949 case INDEX_op_extrl_i64_i32: 4950 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 4951 break; 4952 default: 4953 if (def->flags & TCG_OPF_VECTOR) { 4954 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4955 new_args, const_args); 4956 } else { 4957 tcg_out_op(s, op->opc, new_args, const_args); 4958 } 4959 break; 4960 } 4961 4962 /* move the outputs in the correct register if needed */ 4963 for(i = 0; i < nb_oargs; i++) { 4964 ts = arg_temp(op->args[i]); 4965 4966 /* ENV should not be modified. */ 4967 tcg_debug_assert(!temp_readonly(ts)); 4968 4969 if (NEED_SYNC_ARG(i)) { 4970 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4971 } else if (IS_DEAD_ARG(i)) { 4972 temp_dead(s, ts); 4973 } 4974 } 4975 } 4976 4977 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4978 { 4979 const TCGLifeData arg_life = op->life; 4980 TCGTemp *ots, *itsl, *itsh; 4981 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4982 4983 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4984 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4985 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4986 4987 ots = arg_temp(op->args[0]); 4988 itsl = arg_temp(op->args[1]); 4989 itsh = arg_temp(op->args[2]); 4990 4991 /* ENV should not be modified. */ 4992 tcg_debug_assert(!temp_readonly(ots)); 4993 4994 /* Allocate the output register now. */ 4995 if (ots->val_type != TEMP_VAL_REG) { 4996 TCGRegSet allocated_regs = s->reserved_regs; 4997 TCGRegSet dup_out_regs = 4998 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4999 TCGReg oreg; 5000 5001 /* Make sure to not spill the input registers. */ 5002 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 5003 tcg_regset_set_reg(allocated_regs, itsl->reg); 5004 } 5005 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 5006 tcg_regset_set_reg(allocated_regs, itsh->reg); 5007 } 5008 5009 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 5010 output_pref(op, 0), ots->indirect_base); 5011 set_temp_val_reg(s, ots, oreg); 5012 } 5013 5014 /* Promote dup2 of immediates to dupi_vec. */ 5015 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 5016 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 5017 MemOp vece = MO_64; 5018 5019 if (val == dup_const(MO_8, val)) { 5020 vece = MO_8; 5021 } else if (val == dup_const(MO_16, val)) { 5022 vece = MO_16; 5023 } else if (val == dup_const(MO_32, val)) { 5024 vece = MO_32; 5025 } 5026 5027 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 5028 goto done; 5029 } 5030 5031 /* If the two inputs form one 64-bit value, try dupm_vec. */ 5032 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 5033 itsh->temp_subindex == !HOST_BIG_ENDIAN && 5034 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 5035 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 5036 5037 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 5038 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 5039 5040 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 5041 its->mem_base->reg, its->mem_offset)) { 5042 goto done; 5043 } 5044 } 5045 5046 /* Fall back to generic expansion. */ 5047 return false; 5048 5049 done: 5050 ots->mem_coherent = 0; 5051 if (IS_DEAD_ARG(1)) { 5052 temp_dead(s, itsl); 5053 } 5054 if (IS_DEAD_ARG(2)) { 5055 temp_dead(s, itsh); 5056 } 5057 if (NEED_SYNC_ARG(0)) { 5058 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 5059 } else if (IS_DEAD_ARG(0)) { 5060 temp_dead(s, ots); 5061 } 5062 return true; 5063 } 5064 5065 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 5066 TCGRegSet allocated_regs) 5067 { 5068 if (ts->val_type == TEMP_VAL_REG) { 5069 if (ts->reg != reg) { 5070 tcg_reg_free(s, reg, allocated_regs); 5071 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 5072 /* 5073 * Cross register class move not supported. Sync the 5074 * temp back to its slot and load from there. 5075 */ 5076 temp_sync(s, ts, allocated_regs, 0, 0); 5077 tcg_out_ld(s, ts->type, reg, 5078 ts->mem_base->reg, ts->mem_offset); 5079 } 5080 } 5081 } else { 5082 TCGRegSet arg_set = 0; 5083 5084 tcg_reg_free(s, reg, allocated_regs); 5085 tcg_regset_set_reg(arg_set, reg); 5086 temp_load(s, ts, arg_set, allocated_regs, 0); 5087 } 5088 } 5089 5090 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 5091 TCGRegSet allocated_regs) 5092 { 5093 /* 5094 * When the destination is on the stack, load up the temp and store. 5095 * If there are many call-saved registers, the temp might live to 5096 * see another use; otherwise it'll be discarded. 5097 */ 5098 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 5099 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 5100 arg_slot_stk_ofs(arg_slot)); 5101 } 5102 5103 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 5104 TCGTemp *ts, TCGRegSet *allocated_regs) 5105 { 5106 if (arg_slot_reg_p(l->arg_slot)) { 5107 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 5108 load_arg_reg(s, reg, ts, *allocated_regs); 5109 tcg_regset_set_reg(*allocated_regs, reg); 5110 } else { 5111 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 5112 } 5113 } 5114 5115 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 5116 intptr_t ref_off, TCGRegSet *allocated_regs) 5117 { 5118 TCGReg reg; 5119 5120 if (arg_slot_reg_p(arg_slot)) { 5121 reg = tcg_target_call_iarg_regs[arg_slot]; 5122 tcg_reg_free(s, reg, *allocated_regs); 5123 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5124 tcg_regset_set_reg(*allocated_regs, reg); 5125 } else { 5126 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 5127 *allocated_regs, 0, false); 5128 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 5129 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 5130 arg_slot_stk_ofs(arg_slot)); 5131 } 5132 } 5133 5134 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 5135 { 5136 const int nb_oargs = TCGOP_CALLO(op); 5137 const int nb_iargs = TCGOP_CALLI(op); 5138 const TCGLifeData arg_life = op->life; 5139 const TCGHelperInfo *info = tcg_call_info(op); 5140 TCGRegSet allocated_regs = s->reserved_regs; 5141 int i; 5142 5143 /* 5144 * Move inputs into place in reverse order, 5145 * so that we place stacked arguments first. 5146 */ 5147 for (i = nb_iargs - 1; i >= 0; --i) { 5148 const TCGCallArgumentLoc *loc = &info->in[i]; 5149 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 5150 5151 switch (loc->kind) { 5152 case TCG_CALL_ARG_NORMAL: 5153 case TCG_CALL_ARG_EXTEND_U: 5154 case TCG_CALL_ARG_EXTEND_S: 5155 load_arg_normal(s, loc, ts, &allocated_regs); 5156 break; 5157 case TCG_CALL_ARG_BY_REF: 5158 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5159 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 5160 arg_slot_stk_ofs(loc->ref_slot), 5161 &allocated_regs); 5162 break; 5163 case TCG_CALL_ARG_BY_REF_N: 5164 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 5165 break; 5166 default: 5167 g_assert_not_reached(); 5168 } 5169 } 5170 5171 /* Mark dead temporaries and free the associated registers. */ 5172 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 5173 if (IS_DEAD_ARG(i)) { 5174 temp_dead(s, arg_temp(op->args[i])); 5175 } 5176 } 5177 5178 /* Clobber call registers. */ 5179 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 5180 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 5181 tcg_reg_free(s, i, allocated_regs); 5182 } 5183 } 5184 5185 /* 5186 * Save globals if they might be written by the helper, 5187 * sync them if they might be read. 5188 */ 5189 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 5190 /* Nothing to do */ 5191 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 5192 sync_globals(s, allocated_regs); 5193 } else { 5194 save_globals(s, allocated_regs); 5195 } 5196 5197 /* 5198 * If the ABI passes a pointer to the returned struct as the first 5199 * argument, load that now. Pass a pointer to the output home slot. 5200 */ 5201 if (info->out_kind == TCG_CALL_RET_BY_REF) { 5202 TCGTemp *ts = arg_temp(op->args[0]); 5203 5204 if (!ts->mem_allocated) { 5205 temp_allocate_frame(s, ts); 5206 } 5207 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 5208 } 5209 5210 tcg_out_call(s, tcg_call_func(op), info); 5211 5212 /* Assign output registers and emit moves if needed. */ 5213 switch (info->out_kind) { 5214 case TCG_CALL_RET_NORMAL: 5215 for (i = 0; i < nb_oargs; i++) { 5216 TCGTemp *ts = arg_temp(op->args[i]); 5217 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 5218 5219 /* ENV should not be modified. */ 5220 tcg_debug_assert(!temp_readonly(ts)); 5221 5222 set_temp_val_reg(s, ts, reg); 5223 ts->mem_coherent = 0; 5224 } 5225 break; 5226 5227 case TCG_CALL_RET_BY_VEC: 5228 { 5229 TCGTemp *ts = arg_temp(op->args[0]); 5230 5231 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 5232 tcg_debug_assert(ts->temp_subindex == 0); 5233 if (!ts->mem_allocated) { 5234 temp_allocate_frame(s, ts); 5235 } 5236 tcg_out_st(s, TCG_TYPE_V128, 5237 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5238 ts->mem_base->reg, ts->mem_offset); 5239 } 5240 /* fall through to mark all parts in memory */ 5241 5242 case TCG_CALL_RET_BY_REF: 5243 /* The callee has performed a write through the reference. */ 5244 for (i = 0; i < nb_oargs; i++) { 5245 TCGTemp *ts = arg_temp(op->args[i]); 5246 ts->val_type = TEMP_VAL_MEM; 5247 } 5248 break; 5249 5250 default: 5251 g_assert_not_reached(); 5252 } 5253 5254 /* Flush or discard output registers as needed. */ 5255 for (i = 0; i < nb_oargs; i++) { 5256 TCGTemp *ts = arg_temp(op->args[i]); 5257 if (NEED_SYNC_ARG(i)) { 5258 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 5259 } else if (IS_DEAD_ARG(i)) { 5260 temp_dead(s, ts); 5261 } 5262 } 5263 } 5264 5265 /** 5266 * atom_and_align_for_opc: 5267 * @s: tcg context 5268 * @opc: memory operation code 5269 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations 5270 * @allow_two_ops: true if we are prepared to issue two operations 5271 * 5272 * Return the alignment and atomicity to use for the inline fast path 5273 * for the given memory operation. The alignment may be larger than 5274 * that specified in @opc, and the correct alignment will be diagnosed 5275 * by the slow path helper. 5276 * 5277 * If @allow_two_ops, the host is prepared to test for 2x alignment, 5278 * and issue two loads or stores for subalignment. 5279 */ 5280 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, 5281 MemOp host_atom, bool allow_two_ops) 5282 { 5283 MemOp align = get_alignment_bits(opc); 5284 MemOp size = opc & MO_SIZE; 5285 MemOp half = size ? size - 1 : 0; 5286 MemOp atmax; 5287 MemOp atom; 5288 5289 /* When serialized, no further atomicity required. */ 5290 if (s->gen_tb->cflags & CF_PARALLEL) { 5291 atom = opc & MO_ATOM_MASK; 5292 } else { 5293 atom = MO_ATOM_NONE; 5294 } 5295 5296 switch (atom) { 5297 case MO_ATOM_NONE: 5298 /* The operation requires no specific atomicity. */ 5299 atmax = MO_8; 5300 break; 5301 5302 case MO_ATOM_IFALIGN: 5303 atmax = size; 5304 break; 5305 5306 case MO_ATOM_IFALIGN_PAIR: 5307 atmax = half; 5308 break; 5309 5310 case MO_ATOM_WITHIN16: 5311 atmax = size; 5312 if (size == MO_128) { 5313 /* Misalignment implies !within16, and therefore no atomicity. */ 5314 } else if (host_atom != MO_ATOM_WITHIN16) { 5315 /* The host does not implement within16, so require alignment. */ 5316 align = MAX(align, size); 5317 } 5318 break; 5319 5320 case MO_ATOM_WITHIN16_PAIR: 5321 atmax = size; 5322 /* 5323 * Misalignment implies !within16, and therefore half atomicity. 5324 * Any host prepared for two operations can implement this with 5325 * half alignment. 5326 */ 5327 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) { 5328 align = MAX(align, half); 5329 } 5330 break; 5331 5332 case MO_ATOM_SUBALIGN: 5333 atmax = size; 5334 if (host_atom != MO_ATOM_SUBALIGN) { 5335 /* If unaligned but not odd, there are subobjects up to half. */ 5336 if (allow_two_ops) { 5337 align = MAX(align, half); 5338 } else { 5339 align = MAX(align, size); 5340 } 5341 } 5342 break; 5343 5344 default: 5345 g_assert_not_reached(); 5346 } 5347 5348 return (TCGAtomAlign){ .atom = atmax, .align = align }; 5349 } 5350 5351 /* 5352 * Similarly for qemu_ld/st slow path helpers. 5353 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously, 5354 * using only the provided backend tcg_out_* functions. 5355 */ 5356 5357 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot) 5358 { 5359 int ofs = arg_slot_stk_ofs(slot); 5360 5361 /* 5362 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not 5363 * require extension to uint64_t, adjust the address for uint32_t. 5364 */ 5365 if (HOST_BIG_ENDIAN && 5366 TCG_TARGET_REG_BITS == 64 && 5367 type == TCG_TYPE_I32) { 5368 ofs += 4; 5369 } 5370 return ofs; 5371 } 5372 5373 static void tcg_out_helper_load_slots(TCGContext *s, 5374 unsigned nmov, TCGMovExtend *mov, 5375 const TCGLdstHelperParam *parm) 5376 { 5377 unsigned i; 5378 TCGReg dst3; 5379 5380 /* 5381 * Start from the end, storing to the stack first. 5382 * This frees those registers, so we need not consider overlap. 5383 */ 5384 for (i = nmov; i-- > 0; ) { 5385 unsigned slot = mov[i].dst; 5386 5387 if (arg_slot_reg_p(slot)) { 5388 goto found_reg; 5389 } 5390 5391 TCGReg src = mov[i].src; 5392 TCGType dst_type = mov[i].dst_type; 5393 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5394 5395 /* The argument is going onto the stack; extend into scratch. */ 5396 if ((mov[i].src_ext & MO_SIZE) != dst_mo) { 5397 tcg_debug_assert(parm->ntmp != 0); 5398 mov[i].dst = src = parm->tmp[0]; 5399 tcg_out_movext1(s, &mov[i]); 5400 } 5401 5402 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK, 5403 tcg_out_helper_stk_ofs(dst_type, slot)); 5404 } 5405 return; 5406 5407 found_reg: 5408 /* 5409 * The remaining arguments are in registers. 5410 * Convert slot numbers to argument registers. 5411 */ 5412 nmov = i + 1; 5413 for (i = 0; i < nmov; ++i) { 5414 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst]; 5415 } 5416 5417 switch (nmov) { 5418 case 4: 5419 /* The backend must have provided enough temps for the worst case. */ 5420 tcg_debug_assert(parm->ntmp >= 2); 5421 5422 dst3 = mov[3].dst; 5423 for (unsigned j = 0; j < 3; ++j) { 5424 if (dst3 == mov[j].src) { 5425 /* 5426 * Conflict. Copy the source to a temporary, perform the 5427 * remaining moves, then the extension from our scratch 5428 * on the way out. 5429 */ 5430 TCGReg scratch = parm->tmp[1]; 5431 5432 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src); 5433 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]); 5434 tcg_out_movext1_new_src(s, &mov[3], scratch); 5435 break; 5436 } 5437 } 5438 5439 /* No conflicts: perform this move and continue. */ 5440 tcg_out_movext1(s, &mov[3]); 5441 /* fall through */ 5442 5443 case 3: 5444 tcg_out_movext3(s, mov, mov + 1, mov + 2, 5445 parm->ntmp ? parm->tmp[0] : -1); 5446 break; 5447 case 2: 5448 tcg_out_movext2(s, mov, mov + 1, 5449 parm->ntmp ? parm->tmp[0] : -1); 5450 break; 5451 case 1: 5452 tcg_out_movext1(s, mov); 5453 break; 5454 default: 5455 g_assert_not_reached(); 5456 } 5457 } 5458 5459 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot, 5460 TCGType type, tcg_target_long imm, 5461 const TCGLdstHelperParam *parm) 5462 { 5463 if (arg_slot_reg_p(slot)) { 5464 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm); 5465 } else { 5466 int ofs = tcg_out_helper_stk_ofs(type, slot); 5467 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) { 5468 tcg_debug_assert(parm->ntmp != 0); 5469 tcg_out_movi(s, type, parm->tmp[0], imm); 5470 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs); 5471 } 5472 } 5473 } 5474 5475 static void tcg_out_helper_load_common_args(TCGContext *s, 5476 const TCGLabelQemuLdst *ldst, 5477 const TCGLdstHelperParam *parm, 5478 const TCGHelperInfo *info, 5479 unsigned next_arg) 5480 { 5481 TCGMovExtend ptr_mov = { 5482 .dst_type = TCG_TYPE_PTR, 5483 .src_type = TCG_TYPE_PTR, 5484 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64 5485 }; 5486 const TCGCallArgumentLoc *loc = &info->in[0]; 5487 TCGType type; 5488 unsigned slot; 5489 tcg_target_ulong imm; 5490 5491 /* 5492 * Handle env, which is always first. 5493 */ 5494 ptr_mov.dst = loc->arg_slot; 5495 ptr_mov.src = TCG_AREG0; 5496 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 5497 5498 /* 5499 * Handle oi. 5500 */ 5501 imm = ldst->oi; 5502 loc = &info->in[next_arg]; 5503 type = TCG_TYPE_I32; 5504 switch (loc->kind) { 5505 case TCG_CALL_ARG_NORMAL: 5506 break; 5507 case TCG_CALL_ARG_EXTEND_U: 5508 case TCG_CALL_ARG_EXTEND_S: 5509 /* No extension required for MemOpIdx. */ 5510 tcg_debug_assert(imm <= INT32_MAX); 5511 type = TCG_TYPE_REG; 5512 break; 5513 default: 5514 g_assert_not_reached(); 5515 } 5516 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm); 5517 next_arg++; 5518 5519 /* 5520 * Handle ra. 5521 */ 5522 loc = &info->in[next_arg]; 5523 slot = loc->arg_slot; 5524 if (parm->ra_gen) { 5525 int arg_reg = -1; 5526 TCGReg ra_reg; 5527 5528 if (arg_slot_reg_p(slot)) { 5529 arg_reg = tcg_target_call_iarg_regs[slot]; 5530 } 5531 ra_reg = parm->ra_gen(s, ldst, arg_reg); 5532 5533 ptr_mov.dst = slot; 5534 ptr_mov.src = ra_reg; 5535 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm); 5536 } else { 5537 imm = (uintptr_t)ldst->raddr; 5538 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm); 5539 } 5540 } 5541 5542 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov, 5543 const TCGCallArgumentLoc *loc, 5544 TCGType dst_type, TCGType src_type, 5545 TCGReg lo, TCGReg hi) 5546 { 5547 MemOp reg_mo; 5548 5549 if (dst_type <= TCG_TYPE_REG) { 5550 MemOp src_ext; 5551 5552 switch (loc->kind) { 5553 case TCG_CALL_ARG_NORMAL: 5554 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64; 5555 break; 5556 case TCG_CALL_ARG_EXTEND_U: 5557 dst_type = TCG_TYPE_REG; 5558 src_ext = MO_UL; 5559 break; 5560 case TCG_CALL_ARG_EXTEND_S: 5561 dst_type = TCG_TYPE_REG; 5562 src_ext = MO_SL; 5563 break; 5564 default: 5565 g_assert_not_reached(); 5566 } 5567 5568 mov[0].dst = loc->arg_slot; 5569 mov[0].dst_type = dst_type; 5570 mov[0].src = lo; 5571 mov[0].src_type = src_type; 5572 mov[0].src_ext = src_ext; 5573 return 1; 5574 } 5575 5576 if (TCG_TARGET_REG_BITS == 32) { 5577 assert(dst_type == TCG_TYPE_I64); 5578 reg_mo = MO_32; 5579 } else { 5580 assert(dst_type == TCG_TYPE_I128); 5581 reg_mo = MO_64; 5582 } 5583 5584 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot; 5585 mov[0].src = lo; 5586 mov[0].dst_type = TCG_TYPE_REG; 5587 mov[0].src_type = TCG_TYPE_REG; 5588 mov[0].src_ext = reg_mo; 5589 5590 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot; 5591 mov[1].src = hi; 5592 mov[1].dst_type = TCG_TYPE_REG; 5593 mov[1].src_type = TCG_TYPE_REG; 5594 mov[1].src_ext = reg_mo; 5595 5596 return 2; 5597 } 5598 5599 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 5600 const TCGLdstHelperParam *parm) 5601 { 5602 const TCGHelperInfo *info; 5603 const TCGCallArgumentLoc *loc; 5604 TCGMovExtend mov[2]; 5605 unsigned next_arg, nmov; 5606 MemOp mop = get_memop(ldst->oi); 5607 5608 switch (mop & MO_SIZE) { 5609 case MO_8: 5610 case MO_16: 5611 case MO_32: 5612 info = &info_helper_ld32_mmu; 5613 break; 5614 case MO_64: 5615 info = &info_helper_ld64_mmu; 5616 break; 5617 case MO_128: 5618 info = &info_helper_ld128_mmu; 5619 break; 5620 default: 5621 g_assert_not_reached(); 5622 } 5623 5624 /* Defer env argument. */ 5625 next_arg = 1; 5626 5627 loc = &info->in[next_arg]; 5628 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 5629 /* 5630 * 32-bit host with 32-bit guest: zero-extend the guest address 5631 * to 64-bits for the helper by storing the low part, then 5632 * load a zero for the high part. 5633 */ 5634 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 5635 TCG_TYPE_I32, TCG_TYPE_I32, 5636 ldst->addrlo_reg, -1); 5637 tcg_out_helper_load_slots(s, 1, mov, parm); 5638 5639 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, 5640 TCG_TYPE_I32, 0, parm); 5641 next_arg += 2; 5642 } else { 5643 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 5644 ldst->addrlo_reg, ldst->addrhi_reg); 5645 tcg_out_helper_load_slots(s, nmov, mov, parm); 5646 next_arg += nmov; 5647 } 5648 5649 switch (info->out_kind) { 5650 case TCG_CALL_RET_NORMAL: 5651 case TCG_CALL_RET_BY_VEC: 5652 break; 5653 case TCG_CALL_RET_BY_REF: 5654 /* 5655 * The return reference is in the first argument slot. 5656 * We need memory in which to return: re-use the top of stack. 5657 */ 5658 { 5659 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 5660 5661 if (arg_slot_reg_p(0)) { 5662 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0], 5663 TCG_REG_CALL_STACK, ofs_slot0); 5664 } else { 5665 tcg_debug_assert(parm->ntmp != 0); 5666 tcg_out_addi_ptr(s, parm->tmp[0], 5667 TCG_REG_CALL_STACK, ofs_slot0); 5668 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 5669 TCG_REG_CALL_STACK, ofs_slot0); 5670 } 5671 } 5672 break; 5673 default: 5674 g_assert_not_reached(); 5675 } 5676 5677 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 5678 } 5679 5680 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, 5681 bool load_sign, 5682 const TCGLdstHelperParam *parm) 5683 { 5684 MemOp mop = get_memop(ldst->oi); 5685 TCGMovExtend mov[2]; 5686 int ofs_slot0; 5687 5688 switch (ldst->type) { 5689 case TCG_TYPE_I64: 5690 if (TCG_TARGET_REG_BITS == 32) { 5691 break; 5692 } 5693 /* fall through */ 5694 5695 case TCG_TYPE_I32: 5696 mov[0].dst = ldst->datalo_reg; 5697 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0); 5698 mov[0].dst_type = ldst->type; 5699 mov[0].src_type = TCG_TYPE_REG; 5700 5701 /* 5702 * If load_sign, then we allowed the helper to perform the 5703 * appropriate sign extension to tcg_target_ulong, and all 5704 * we need now is a plain move. 5705 * 5706 * If they do not, then we expect the relevant extension 5707 * instruction to be no more expensive than a move, and 5708 * we thus save the icache etc by only using one of two 5709 * helper functions. 5710 */ 5711 if (load_sign || !(mop & MO_SIGN)) { 5712 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) { 5713 mov[0].src_ext = MO_32; 5714 } else { 5715 mov[0].src_ext = MO_64; 5716 } 5717 } else { 5718 mov[0].src_ext = mop & MO_SSIZE; 5719 } 5720 tcg_out_movext1(s, mov); 5721 return; 5722 5723 case TCG_TYPE_I128: 5724 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 5725 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET; 5726 switch (TCG_TARGET_CALL_RET_I128) { 5727 case TCG_CALL_RET_NORMAL: 5728 break; 5729 case TCG_CALL_RET_BY_VEC: 5730 tcg_out_st(s, TCG_TYPE_V128, 5731 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 5732 TCG_REG_CALL_STACK, ofs_slot0); 5733 /* fall through */ 5734 case TCG_CALL_RET_BY_REF: 5735 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg, 5736 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN); 5737 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg, 5738 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN); 5739 return; 5740 default: 5741 g_assert_not_reached(); 5742 } 5743 break; 5744 5745 default: 5746 g_assert_not_reached(); 5747 } 5748 5749 mov[0].dst = ldst->datalo_reg; 5750 mov[0].src = 5751 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); 5752 mov[0].dst_type = TCG_TYPE_REG; 5753 mov[0].src_type = TCG_TYPE_REG; 5754 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 5755 5756 mov[1].dst = ldst->datahi_reg; 5757 mov[1].src = 5758 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN); 5759 mov[1].dst_type = TCG_TYPE_REG; 5760 mov[1].src_type = TCG_TYPE_REG; 5761 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; 5762 5763 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1); 5764 } 5765 5766 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, 5767 const TCGLdstHelperParam *parm) 5768 { 5769 const TCGHelperInfo *info; 5770 const TCGCallArgumentLoc *loc; 5771 TCGMovExtend mov[4]; 5772 TCGType data_type; 5773 unsigned next_arg, nmov, n; 5774 MemOp mop = get_memop(ldst->oi); 5775 5776 switch (mop & MO_SIZE) { 5777 case MO_8: 5778 case MO_16: 5779 case MO_32: 5780 info = &info_helper_st32_mmu; 5781 data_type = TCG_TYPE_I32; 5782 break; 5783 case MO_64: 5784 info = &info_helper_st64_mmu; 5785 data_type = TCG_TYPE_I64; 5786 break; 5787 case MO_128: 5788 info = &info_helper_st128_mmu; 5789 data_type = TCG_TYPE_I128; 5790 break; 5791 default: 5792 g_assert_not_reached(); 5793 } 5794 5795 /* Defer env argument. */ 5796 next_arg = 1; 5797 nmov = 0; 5798 5799 /* Handle addr argument. */ 5800 loc = &info->in[next_arg]; 5801 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 5802 /* 5803 * 32-bit host with 32-bit guest: zero-extend the guest address 5804 * to 64-bits for the helper by storing the low part. Later, 5805 * after we have processed the register inputs, we will load a 5806 * zero for the high part. 5807 */ 5808 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, 5809 TCG_TYPE_I32, TCG_TYPE_I32, 5810 ldst->addrlo_reg, -1); 5811 next_arg += 2; 5812 nmov += 1; 5813 } else { 5814 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, 5815 ldst->addrlo_reg, ldst->addrhi_reg); 5816 next_arg += n; 5817 nmov += n; 5818 } 5819 5820 /* Handle data argument. */ 5821 loc = &info->in[next_arg]; 5822 switch (loc->kind) { 5823 case TCG_CALL_ARG_NORMAL: 5824 case TCG_CALL_ARG_EXTEND_U: 5825 case TCG_CALL_ARG_EXTEND_S: 5826 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type, 5827 ldst->datalo_reg, ldst->datahi_reg); 5828 next_arg += n; 5829 nmov += n; 5830 tcg_out_helper_load_slots(s, nmov, mov, parm); 5831 break; 5832 5833 case TCG_CALL_ARG_BY_REF: 5834 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 5835 tcg_debug_assert(data_type == TCG_TYPE_I128); 5836 tcg_out_st(s, TCG_TYPE_I64, 5837 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg, 5838 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot)); 5839 tcg_out_st(s, TCG_TYPE_I64, 5840 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg, 5841 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot)); 5842 5843 tcg_out_helper_load_slots(s, nmov, mov, parm); 5844 5845 if (arg_slot_reg_p(loc->arg_slot)) { 5846 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot], 5847 TCG_REG_CALL_STACK, 5848 arg_slot_stk_ofs(loc->ref_slot)); 5849 } else { 5850 tcg_debug_assert(parm->ntmp != 0); 5851 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK, 5852 arg_slot_stk_ofs(loc->ref_slot)); 5853 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0], 5854 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot)); 5855 } 5856 next_arg += 2; 5857 break; 5858 5859 default: 5860 g_assert_not_reached(); 5861 } 5862 5863 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { 5864 /* Zero extend the address by loading a zero for the high part. */ 5865 loc = &info->in[1 + !HOST_BIG_ENDIAN]; 5866 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); 5867 } 5868 5869 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg); 5870 } 5871 5872 #ifdef CONFIG_PROFILER 5873 5874 /* avoid copy/paste errors */ 5875 #define PROF_ADD(to, from, field) \ 5876 do { \ 5877 (to)->field += qatomic_read(&((from)->field)); \ 5878 } while (0) 5879 5880 #define PROF_MAX(to, from, field) \ 5881 do { \ 5882 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 5883 if (val__ > (to)->field) { \ 5884 (to)->field = val__; \ 5885 } \ 5886 } while (0) 5887 5888 /* Pass in a zero'ed @prof */ 5889 static inline 5890 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 5891 { 5892 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 5893 unsigned int i; 5894 5895 for (i = 0; i < n_ctxs; i++) { 5896 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 5897 const TCGProfile *orig = &s->prof; 5898 5899 if (counters) { 5900 PROF_ADD(prof, orig, cpu_exec_time); 5901 PROF_ADD(prof, orig, tb_count1); 5902 PROF_ADD(prof, orig, tb_count); 5903 PROF_ADD(prof, orig, op_count); 5904 PROF_MAX(prof, orig, op_count_max); 5905 PROF_ADD(prof, orig, temp_count); 5906 PROF_MAX(prof, orig, temp_count_max); 5907 PROF_ADD(prof, orig, del_op_count); 5908 PROF_ADD(prof, orig, code_in_len); 5909 PROF_ADD(prof, orig, code_out_len); 5910 PROF_ADD(prof, orig, search_out_len); 5911 PROF_ADD(prof, orig, interm_time); 5912 PROF_ADD(prof, orig, code_time); 5913 PROF_ADD(prof, orig, la_time); 5914 PROF_ADD(prof, orig, opt_time); 5915 PROF_ADD(prof, orig, restore_count); 5916 PROF_ADD(prof, orig, restore_time); 5917 } 5918 if (table) { 5919 int i; 5920 5921 for (i = 0; i < NB_OPS; i++) { 5922 PROF_ADD(prof, orig, table_op_count[i]); 5923 } 5924 } 5925 } 5926 } 5927 5928 #undef PROF_ADD 5929 #undef PROF_MAX 5930 5931 static void tcg_profile_snapshot_counters(TCGProfile *prof) 5932 { 5933 tcg_profile_snapshot(prof, true, false); 5934 } 5935 5936 static void tcg_profile_snapshot_table(TCGProfile *prof) 5937 { 5938 tcg_profile_snapshot(prof, false, true); 5939 } 5940 5941 void tcg_dump_op_count(GString *buf) 5942 { 5943 TCGProfile prof = {}; 5944 int i; 5945 5946 tcg_profile_snapshot_table(&prof); 5947 for (i = 0; i < NB_OPS; i++) { 5948 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 5949 prof.table_op_count[i]); 5950 } 5951 } 5952 5953 int64_t tcg_cpu_exec_time(void) 5954 { 5955 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 5956 unsigned int i; 5957 int64_t ret = 0; 5958 5959 for (i = 0; i < n_ctxs; i++) { 5960 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 5961 const TCGProfile *prof = &s->prof; 5962 5963 ret += qatomic_read(&prof->cpu_exec_time); 5964 } 5965 return ret; 5966 } 5967 #else 5968 void tcg_dump_op_count(GString *buf) 5969 { 5970 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 5971 } 5972 5973 int64_t tcg_cpu_exec_time(void) 5974 { 5975 error_report("%s: TCG profiler not compiled", __func__); 5976 exit(EXIT_FAILURE); 5977 } 5978 #endif 5979 5980 5981 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) 5982 { 5983 #ifdef CONFIG_PROFILER 5984 TCGProfile *prof = &s->prof; 5985 #endif 5986 int i, num_insns; 5987 TCGOp *op; 5988 5989 #ifdef CONFIG_PROFILER 5990 { 5991 int n = 0; 5992 5993 QTAILQ_FOREACH(op, &s->ops, link) { 5994 n++; 5995 } 5996 qatomic_set(&prof->op_count, prof->op_count + n); 5997 if (n > prof->op_count_max) { 5998 qatomic_set(&prof->op_count_max, n); 5999 } 6000 6001 n = s->nb_temps; 6002 qatomic_set(&prof->temp_count, prof->temp_count + n); 6003 if (n > prof->temp_count_max) { 6004 qatomic_set(&prof->temp_count_max, n); 6005 } 6006 } 6007 #endif 6008 6009 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 6010 && qemu_log_in_addr_range(pc_start))) { 6011 FILE *logfile = qemu_log_trylock(); 6012 if (logfile) { 6013 fprintf(logfile, "OP:\n"); 6014 tcg_dump_ops(s, logfile, false); 6015 fprintf(logfile, "\n"); 6016 qemu_log_unlock(logfile); 6017 } 6018 } 6019 6020 #ifdef CONFIG_DEBUG_TCG 6021 /* Ensure all labels referenced have been emitted. */ 6022 { 6023 TCGLabel *l; 6024 bool error = false; 6025 6026 QSIMPLEQ_FOREACH(l, &s->labels, next) { 6027 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 6028 qemu_log_mask(CPU_LOG_TB_OP, 6029 "$L%d referenced but not present.\n", l->id); 6030 error = true; 6031 } 6032 } 6033 assert(!error); 6034 } 6035 #endif 6036 6037 #ifdef CONFIG_PROFILER 6038 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 6039 #endif 6040 6041 tcg_optimize(s); 6042 6043 #ifdef CONFIG_PROFILER 6044 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 6045 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 6046 #endif 6047 6048 reachable_code_pass(s); 6049 liveness_pass_0(s); 6050 liveness_pass_1(s); 6051 6052 if (s->nb_indirects > 0) { 6053 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 6054 && qemu_log_in_addr_range(pc_start))) { 6055 FILE *logfile = qemu_log_trylock(); 6056 if (logfile) { 6057 fprintf(logfile, "OP before indirect lowering:\n"); 6058 tcg_dump_ops(s, logfile, false); 6059 fprintf(logfile, "\n"); 6060 qemu_log_unlock(logfile); 6061 } 6062 } 6063 6064 /* Replace indirect temps with direct temps. */ 6065 if (liveness_pass_2(s)) { 6066 /* If changes were made, re-run liveness. */ 6067 liveness_pass_1(s); 6068 } 6069 } 6070 6071 #ifdef CONFIG_PROFILER 6072 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 6073 #endif 6074 6075 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 6076 && qemu_log_in_addr_range(pc_start))) { 6077 FILE *logfile = qemu_log_trylock(); 6078 if (logfile) { 6079 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 6080 tcg_dump_ops(s, logfile, true); 6081 fprintf(logfile, "\n"); 6082 qemu_log_unlock(logfile); 6083 } 6084 } 6085 6086 /* Initialize goto_tb jump offsets. */ 6087 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 6088 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 6089 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 6090 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 6091 6092 tcg_reg_alloc_start(s); 6093 6094 /* 6095 * Reset the buffer pointers when restarting after overflow. 6096 * TODO: Move this into translate-all.c with the rest of the 6097 * buffer management. Having only this done here is confusing. 6098 */ 6099 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 6100 s->code_ptr = s->code_buf; 6101 6102 #ifdef TCG_TARGET_NEED_LDST_LABELS 6103 QSIMPLEQ_INIT(&s->ldst_labels); 6104 #endif 6105 #ifdef TCG_TARGET_NEED_POOL_LABELS 6106 s->pool_labels = NULL; 6107 #endif 6108 6109 num_insns = -1; 6110 QTAILQ_FOREACH(op, &s->ops, link) { 6111 TCGOpcode opc = op->opc; 6112 6113 #ifdef CONFIG_PROFILER 6114 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 6115 #endif 6116 6117 switch (opc) { 6118 case INDEX_op_mov_i32: 6119 case INDEX_op_mov_i64: 6120 case INDEX_op_mov_vec: 6121 tcg_reg_alloc_mov(s, op); 6122 break; 6123 case INDEX_op_dup_vec: 6124 tcg_reg_alloc_dup(s, op); 6125 break; 6126 case INDEX_op_insn_start: 6127 if (num_insns >= 0) { 6128 size_t off = tcg_current_code_size(s); 6129 s->gen_insn_end_off[num_insns] = off; 6130 /* Assert that we do not overflow our stored offset. */ 6131 assert(s->gen_insn_end_off[num_insns] == off); 6132 } 6133 num_insns++; 6134 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 6135 s->gen_insn_data[num_insns][i] = 6136 tcg_get_insn_start_param(op, i); 6137 } 6138 break; 6139 case INDEX_op_discard: 6140 temp_dead(s, arg_temp(op->args[0])); 6141 break; 6142 case INDEX_op_set_label: 6143 tcg_reg_alloc_bb_end(s, s->reserved_regs); 6144 tcg_out_label(s, arg_label(op->args[0])); 6145 break; 6146 case INDEX_op_call: 6147 tcg_reg_alloc_call(s, op); 6148 break; 6149 case INDEX_op_exit_tb: 6150 tcg_out_exit_tb(s, op->args[0]); 6151 break; 6152 case INDEX_op_goto_tb: 6153 tcg_out_goto_tb(s, op->args[0]); 6154 break; 6155 case INDEX_op_dup2_vec: 6156 if (tcg_reg_alloc_dup2(s, op)) { 6157 break; 6158 } 6159 /* fall through */ 6160 default: 6161 /* Sanity check that we've not introduced any unhandled opcodes. */ 6162 tcg_debug_assert(tcg_op_supported(opc)); 6163 /* Note: in order to speed up the code, it would be much 6164 faster to have specialized register allocator functions for 6165 some common argument patterns */ 6166 tcg_reg_alloc_op(s, op); 6167 break; 6168 } 6169 /* Test for (pending) buffer overflow. The assumption is that any 6170 one operation beginning below the high water mark cannot overrun 6171 the buffer completely. Thus we can test for overflow after 6172 generating code without having to check during generation. */ 6173 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 6174 return -1; 6175 } 6176 /* Test for TB overflow, as seen by gen_insn_end_off. */ 6177 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 6178 return -2; 6179 } 6180 } 6181 tcg_debug_assert(num_insns >= 0); 6182 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 6183 6184 /* Generate TB finalization at the end of block */ 6185 #ifdef TCG_TARGET_NEED_LDST_LABELS 6186 i = tcg_out_ldst_finalize(s); 6187 if (i < 0) { 6188 return i; 6189 } 6190 #endif 6191 #ifdef TCG_TARGET_NEED_POOL_LABELS 6192 i = tcg_out_pool_finalize(s); 6193 if (i < 0) { 6194 return i; 6195 } 6196 #endif 6197 if (!tcg_resolve_relocs(s)) { 6198 return -2; 6199 } 6200 6201 #ifndef CONFIG_TCG_INTERPRETER 6202 /* flush instruction cache */ 6203 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 6204 (uintptr_t)s->code_buf, 6205 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 6206 #endif 6207 6208 return tcg_current_code_size(s); 6209 } 6210 6211 #ifdef CONFIG_PROFILER 6212 void tcg_dump_info(GString *buf) 6213 { 6214 TCGProfile prof = {}; 6215 const TCGProfile *s; 6216 int64_t tb_count; 6217 int64_t tb_div_count; 6218 int64_t tot; 6219 6220 tcg_profile_snapshot_counters(&prof); 6221 s = &prof; 6222 tb_count = s->tb_count; 6223 tb_div_count = tb_count ? tb_count : 1; 6224 tot = s->interm_time + s->code_time; 6225 6226 g_string_append_printf(buf, "JIT cycles %" PRId64 6227 " (%0.3f s at 2.4 GHz)\n", 6228 tot, tot / 2.4e9); 6229 g_string_append_printf(buf, "translated TBs %" PRId64 6230 " (aborted=%" PRId64 " %0.1f%%)\n", 6231 tb_count, s->tb_count1 - tb_count, 6232 (double)(s->tb_count1 - s->tb_count) 6233 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 6234 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 6235 (double)s->op_count / tb_div_count, s->op_count_max); 6236 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 6237 (double)s->del_op_count / tb_div_count); 6238 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 6239 (double)s->temp_count / tb_div_count, 6240 s->temp_count_max); 6241 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 6242 (double)s->code_out_len / tb_div_count); 6243 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 6244 (double)s->search_out_len / tb_div_count); 6245 6246 g_string_append_printf(buf, "cycles/op %0.1f\n", 6247 s->op_count ? (double)tot / s->op_count : 0); 6248 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 6249 s->code_in_len ? (double)tot / s->code_in_len : 0); 6250 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 6251 s->code_out_len ? (double)tot / s->code_out_len : 0); 6252 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 6253 s->search_out_len ? 6254 (double)tot / s->search_out_len : 0); 6255 if (tot == 0) { 6256 tot = 1; 6257 } 6258 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 6259 (double)s->interm_time / tot * 100.0); 6260 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 6261 (double)s->code_time / tot * 100.0); 6262 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 6263 (double)s->opt_time / (s->code_time ? 6264 s->code_time : 1) 6265 * 100.0); 6266 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 6267 (double)s->la_time / (s->code_time ? 6268 s->code_time : 1) * 100.0); 6269 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 6270 s->restore_count); 6271 g_string_append_printf(buf, " avg cycles %0.1f\n", 6272 s->restore_count ? 6273 (double)s->restore_time / s->restore_count : 0); 6274 } 6275 #else 6276 void tcg_dump_info(GString *buf) 6277 { 6278 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 6279 } 6280 #endif 6281 6282 #ifdef ELF_HOST_MACHINE 6283 /* In order to use this feature, the backend needs to do three things: 6284 6285 (1) Define ELF_HOST_MACHINE to indicate both what value to 6286 put into the ELF image and to indicate support for the feature. 6287 6288 (2) Define tcg_register_jit. This should create a buffer containing 6289 the contents of a .debug_frame section that describes the post- 6290 prologue unwind info for the tcg machine. 6291 6292 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 6293 */ 6294 6295 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 6296 typedef enum { 6297 JIT_NOACTION = 0, 6298 JIT_REGISTER_FN, 6299 JIT_UNREGISTER_FN 6300 } jit_actions_t; 6301 6302 struct jit_code_entry { 6303 struct jit_code_entry *next_entry; 6304 struct jit_code_entry *prev_entry; 6305 const void *symfile_addr; 6306 uint64_t symfile_size; 6307 }; 6308 6309 struct jit_descriptor { 6310 uint32_t version; 6311 uint32_t action_flag; 6312 struct jit_code_entry *relevant_entry; 6313 struct jit_code_entry *first_entry; 6314 }; 6315 6316 void __jit_debug_register_code(void) __attribute__((noinline)); 6317 void __jit_debug_register_code(void) 6318 { 6319 asm(""); 6320 } 6321 6322 /* Must statically initialize the version, because GDB may check 6323 the version before we can set it. */ 6324 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 6325 6326 /* End GDB interface. */ 6327 6328 static int find_string(const char *strtab, const char *str) 6329 { 6330 const char *p = strtab + 1; 6331 6332 while (1) { 6333 if (strcmp(p, str) == 0) { 6334 return p - strtab; 6335 } 6336 p += strlen(p) + 1; 6337 } 6338 } 6339 6340 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 6341 const void *debug_frame, 6342 size_t debug_frame_size) 6343 { 6344 struct __attribute__((packed)) DebugInfo { 6345 uint32_t len; 6346 uint16_t version; 6347 uint32_t abbrev; 6348 uint8_t ptr_size; 6349 uint8_t cu_die; 6350 uint16_t cu_lang; 6351 uintptr_t cu_low_pc; 6352 uintptr_t cu_high_pc; 6353 uint8_t fn_die; 6354 char fn_name[16]; 6355 uintptr_t fn_low_pc; 6356 uintptr_t fn_high_pc; 6357 uint8_t cu_eoc; 6358 }; 6359 6360 struct ElfImage { 6361 ElfW(Ehdr) ehdr; 6362 ElfW(Phdr) phdr; 6363 ElfW(Shdr) shdr[7]; 6364 ElfW(Sym) sym[2]; 6365 struct DebugInfo di; 6366 uint8_t da[24]; 6367 char str[80]; 6368 }; 6369 6370 struct ElfImage *img; 6371 6372 static const struct ElfImage img_template = { 6373 .ehdr = { 6374 .e_ident[EI_MAG0] = ELFMAG0, 6375 .e_ident[EI_MAG1] = ELFMAG1, 6376 .e_ident[EI_MAG2] = ELFMAG2, 6377 .e_ident[EI_MAG3] = ELFMAG3, 6378 .e_ident[EI_CLASS] = ELF_CLASS, 6379 .e_ident[EI_DATA] = ELF_DATA, 6380 .e_ident[EI_VERSION] = EV_CURRENT, 6381 .e_type = ET_EXEC, 6382 .e_machine = ELF_HOST_MACHINE, 6383 .e_version = EV_CURRENT, 6384 .e_phoff = offsetof(struct ElfImage, phdr), 6385 .e_shoff = offsetof(struct ElfImage, shdr), 6386 .e_ehsize = sizeof(ElfW(Shdr)), 6387 .e_phentsize = sizeof(ElfW(Phdr)), 6388 .e_phnum = 1, 6389 .e_shentsize = sizeof(ElfW(Shdr)), 6390 .e_shnum = ARRAY_SIZE(img->shdr), 6391 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 6392 #ifdef ELF_HOST_FLAGS 6393 .e_flags = ELF_HOST_FLAGS, 6394 #endif 6395 #ifdef ELF_OSABI 6396 .e_ident[EI_OSABI] = ELF_OSABI, 6397 #endif 6398 }, 6399 .phdr = { 6400 .p_type = PT_LOAD, 6401 .p_flags = PF_X, 6402 }, 6403 .shdr = { 6404 [0] = { .sh_type = SHT_NULL }, 6405 /* Trick: The contents of code_gen_buffer are not present in 6406 this fake ELF file; that got allocated elsewhere. Therefore 6407 we mark .text as SHT_NOBITS (similar to .bss) so that readers 6408 will not look for contents. We can record any address. */ 6409 [1] = { /* .text */ 6410 .sh_type = SHT_NOBITS, 6411 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 6412 }, 6413 [2] = { /* .debug_info */ 6414 .sh_type = SHT_PROGBITS, 6415 .sh_offset = offsetof(struct ElfImage, di), 6416 .sh_size = sizeof(struct DebugInfo), 6417 }, 6418 [3] = { /* .debug_abbrev */ 6419 .sh_type = SHT_PROGBITS, 6420 .sh_offset = offsetof(struct ElfImage, da), 6421 .sh_size = sizeof(img->da), 6422 }, 6423 [4] = { /* .debug_frame */ 6424 .sh_type = SHT_PROGBITS, 6425 .sh_offset = sizeof(struct ElfImage), 6426 }, 6427 [5] = { /* .symtab */ 6428 .sh_type = SHT_SYMTAB, 6429 .sh_offset = offsetof(struct ElfImage, sym), 6430 .sh_size = sizeof(img->sym), 6431 .sh_info = 1, 6432 .sh_link = ARRAY_SIZE(img->shdr) - 1, 6433 .sh_entsize = sizeof(ElfW(Sym)), 6434 }, 6435 [6] = { /* .strtab */ 6436 .sh_type = SHT_STRTAB, 6437 .sh_offset = offsetof(struct ElfImage, str), 6438 .sh_size = sizeof(img->str), 6439 } 6440 }, 6441 .sym = { 6442 [1] = { /* code_gen_buffer */ 6443 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 6444 .st_shndx = 1, 6445 } 6446 }, 6447 .di = { 6448 .len = sizeof(struct DebugInfo) - 4, 6449 .version = 2, 6450 .ptr_size = sizeof(void *), 6451 .cu_die = 1, 6452 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 6453 .fn_die = 2, 6454 .fn_name = "code_gen_buffer" 6455 }, 6456 .da = { 6457 1, /* abbrev number (the cu) */ 6458 0x11, 1, /* DW_TAG_compile_unit, has children */ 6459 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 6460 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6461 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6462 0, 0, /* end of abbrev */ 6463 2, /* abbrev number (the fn) */ 6464 0x2e, 0, /* DW_TAG_subprogram, no children */ 6465 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 6466 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 6467 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 6468 0, 0, /* end of abbrev */ 6469 0 /* no more abbrev */ 6470 }, 6471 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 6472 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 6473 }; 6474 6475 /* We only need a single jit entry; statically allocate it. */ 6476 static struct jit_code_entry one_entry; 6477 6478 uintptr_t buf = (uintptr_t)buf_ptr; 6479 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 6480 DebugFrameHeader *dfh; 6481 6482 img = g_malloc(img_size); 6483 *img = img_template; 6484 6485 img->phdr.p_vaddr = buf; 6486 img->phdr.p_paddr = buf; 6487 img->phdr.p_memsz = buf_size; 6488 6489 img->shdr[1].sh_name = find_string(img->str, ".text"); 6490 img->shdr[1].sh_addr = buf; 6491 img->shdr[1].sh_size = buf_size; 6492 6493 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 6494 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 6495 6496 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 6497 img->shdr[4].sh_size = debug_frame_size; 6498 6499 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 6500 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 6501 6502 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 6503 img->sym[1].st_value = buf; 6504 img->sym[1].st_size = buf_size; 6505 6506 img->di.cu_low_pc = buf; 6507 img->di.cu_high_pc = buf + buf_size; 6508 img->di.fn_low_pc = buf; 6509 img->di.fn_high_pc = buf + buf_size; 6510 6511 dfh = (DebugFrameHeader *)(img + 1); 6512 memcpy(dfh, debug_frame, debug_frame_size); 6513 dfh->fde.func_start = buf; 6514 dfh->fde.func_len = buf_size; 6515 6516 #ifdef DEBUG_JIT 6517 /* Enable this block to be able to debug the ELF image file creation. 6518 One can use readelf, objdump, or other inspection utilities. */ 6519 { 6520 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 6521 FILE *f = fopen(jit, "w+b"); 6522 if (f) { 6523 if (fwrite(img, img_size, 1, f) != img_size) { 6524 /* Avoid stupid unused return value warning for fwrite. */ 6525 } 6526 fclose(f); 6527 } 6528 } 6529 #endif 6530 6531 one_entry.symfile_addr = img; 6532 one_entry.symfile_size = img_size; 6533 6534 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 6535 __jit_debug_descriptor.relevant_entry = &one_entry; 6536 __jit_debug_descriptor.first_entry = &one_entry; 6537 __jit_debug_register_code(); 6538 } 6539 #else 6540 /* No support for the feature. Provide the entry point expected by exec.c, 6541 and implement the internal function we declared earlier. */ 6542 6543 static void tcg_register_jit_int(const void *buf, size_t size, 6544 const void *debug_frame, 6545 size_t debug_frame_size) 6546 { 6547 } 6548 6549 void tcg_register_jit(const void *buf, size_t buf_size) 6550 { 6551 } 6552 #endif /* ELF_HOST_MACHINE */ 6553 6554 #if !TCG_TARGET_MAYBE_vec 6555 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 6556 { 6557 g_assert_not_reached(); 6558 } 6559 #endif 6560