1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/cacheflush.h" 38 #include "qemu/cacheinfo.h" 39 #include "qemu/timer.h" 40 41 /* Note: the long term plan is to reduce the dependencies on the QEMU 42 CPU definitions. Currently they are used for qemu_ld/st 43 instructions */ 44 #define NO_CPU_IO_DEFS 45 46 #include "exec/exec-all.h" 47 #include "tcg/tcg-op.h" 48 49 #if UINTPTR_MAX == UINT32_MAX 50 # define ELF_CLASS ELFCLASS32 51 #else 52 # define ELF_CLASS ELFCLASS64 53 #endif 54 #if HOST_BIG_ENDIAN 55 # define ELF_DATA ELFDATA2MSB 56 #else 57 # define ELF_DATA ELFDATA2LSB 58 #endif 59 60 #include "elf.h" 61 #include "exec/log.h" 62 #include "tcg/tcg-ldst.h" 63 #include "tcg/tcg-temp-internal.h" 64 #include "tcg-internal.h" 65 #include "accel/tcg/perf.h" 66 67 /* Forward declarations for functions declared in tcg-target.c.inc and 68 used here. */ 69 static void tcg_target_init(TCGContext *s); 70 static void tcg_target_qemu_prologue(TCGContext *s); 71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 72 intptr_t value, intptr_t addend); 73 74 /* The CIE and FDE header definitions will be common to all hosts. */ 75 typedef struct { 76 uint32_t len __attribute__((aligned((sizeof(void *))))); 77 uint32_t id; 78 uint8_t version; 79 char augmentation[1]; 80 uint8_t code_align; 81 uint8_t data_align; 82 uint8_t return_column; 83 } DebugFrameCIE; 84 85 typedef struct QEMU_PACKED { 86 uint32_t len __attribute__((aligned((sizeof(void *))))); 87 uint32_t cie_offset; 88 uintptr_t func_start; 89 uintptr_t func_len; 90 } DebugFrameFDEHeader; 91 92 typedef struct QEMU_PACKED { 93 DebugFrameCIE cie; 94 DebugFrameFDEHeader fde; 95 } DebugFrameHeader; 96 97 typedef struct TCGLabelQemuLdst { 98 bool is_ld; /* qemu_ld: true, qemu_st: false */ 99 MemOpIdx oi; 100 TCGType type; /* result type of a load */ 101 TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */ 102 TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */ 103 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ 104 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ 105 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ 106 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ 107 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; 108 } TCGLabelQemuLdst; 109 110 static void tcg_register_jit_int(const void *buf, size_t size, 111 const void *debug_frame, 112 size_t debug_frame_size) 113 __attribute__((unused)); 114 115 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 116 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 117 intptr_t arg2); 118 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 119 static void tcg_out_movi(TCGContext *s, TCGType type, 120 TCGReg ret, tcg_target_long arg); 121 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 122 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 123 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 124 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 125 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 126 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 127 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 128 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 129 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 130 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 131 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 132 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 133 static void tcg_out_goto_tb(TCGContext *s, int which); 134 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 135 const TCGArg args[TCG_MAX_OP_ARGS], 136 const int const_args[TCG_MAX_OP_ARGS]); 137 #if TCG_TARGET_MAYBE_vec 138 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 139 TCGReg dst, TCGReg src); 140 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 141 TCGReg dst, TCGReg base, intptr_t offset); 142 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 143 TCGReg dst, int64_t arg); 144 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 145 unsigned vecl, unsigned vece, 146 const TCGArg args[TCG_MAX_OP_ARGS], 147 const int const_args[TCG_MAX_OP_ARGS]); 148 #else 149 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 150 TCGReg dst, TCGReg src) 151 { 152 g_assert_not_reached(); 153 } 154 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 155 TCGReg dst, TCGReg base, intptr_t offset) 156 { 157 g_assert_not_reached(); 158 } 159 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 160 TCGReg dst, int64_t arg) 161 { 162 g_assert_not_reached(); 163 } 164 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 165 unsigned vecl, unsigned vece, 166 const TCGArg args[TCG_MAX_OP_ARGS], 167 const int const_args[TCG_MAX_OP_ARGS]) 168 { 169 g_assert_not_reached(); 170 } 171 #endif 172 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 173 intptr_t arg2); 174 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 175 TCGReg base, intptr_t ofs); 176 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 177 const TCGHelperInfo *info); 178 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 179 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 180 #ifdef TCG_TARGET_NEED_LDST_LABELS 181 static int tcg_out_ldst_finalize(TCGContext *s); 182 #endif 183 184 TCGContext tcg_init_ctx; 185 __thread TCGContext *tcg_ctx; 186 187 TCGContext **tcg_ctxs; 188 unsigned int tcg_cur_ctxs; 189 unsigned int tcg_max_ctxs; 190 TCGv_env cpu_env = 0; 191 const void *tcg_code_gen_epilogue; 192 uintptr_t tcg_splitwx_diff; 193 194 #ifndef CONFIG_TCG_INTERPRETER 195 tcg_prologue_fn *tcg_qemu_tb_exec; 196 #endif 197 198 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 199 static TCGRegSet tcg_target_call_clobber_regs; 200 201 #if TCG_TARGET_INSN_UNIT_SIZE == 1 202 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 203 { 204 *s->code_ptr++ = v; 205 } 206 207 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 208 uint8_t v) 209 { 210 *p = v; 211 } 212 #endif 213 214 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 215 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 216 { 217 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 218 *s->code_ptr++ = v; 219 } else { 220 tcg_insn_unit *p = s->code_ptr; 221 memcpy(p, &v, sizeof(v)); 222 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 223 } 224 } 225 226 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 227 uint16_t v) 228 { 229 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 230 *p = v; 231 } else { 232 memcpy(p, &v, sizeof(v)); 233 } 234 } 235 #endif 236 237 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 238 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 239 { 240 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 241 *s->code_ptr++ = v; 242 } else { 243 tcg_insn_unit *p = s->code_ptr; 244 memcpy(p, &v, sizeof(v)); 245 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 246 } 247 } 248 249 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 250 uint32_t v) 251 { 252 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 253 *p = v; 254 } else { 255 memcpy(p, &v, sizeof(v)); 256 } 257 } 258 #endif 259 260 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 261 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 262 { 263 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 264 *s->code_ptr++ = v; 265 } else { 266 tcg_insn_unit *p = s->code_ptr; 267 memcpy(p, &v, sizeof(v)); 268 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 269 } 270 } 271 272 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 273 uint64_t v) 274 { 275 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 276 *p = v; 277 } else { 278 memcpy(p, &v, sizeof(v)); 279 } 280 } 281 #endif 282 283 /* label relocation processing */ 284 285 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 286 TCGLabel *l, intptr_t addend) 287 { 288 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 289 290 r->type = type; 291 r->ptr = code_ptr; 292 r->addend = addend; 293 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 294 } 295 296 static void tcg_out_label(TCGContext *s, TCGLabel *l) 297 { 298 tcg_debug_assert(!l->has_value); 299 l->has_value = 1; 300 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 301 } 302 303 TCGLabel *gen_new_label(void) 304 { 305 TCGContext *s = tcg_ctx; 306 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 307 308 memset(l, 0, sizeof(TCGLabel)); 309 l->id = s->nb_labels++; 310 QSIMPLEQ_INIT(&l->branches); 311 QSIMPLEQ_INIT(&l->relocs); 312 313 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 314 315 return l; 316 } 317 318 static bool tcg_resolve_relocs(TCGContext *s) 319 { 320 TCGLabel *l; 321 322 QSIMPLEQ_FOREACH(l, &s->labels, next) { 323 TCGRelocation *r; 324 uintptr_t value = l->u.value; 325 326 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 327 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 328 return false; 329 } 330 } 331 } 332 return true; 333 } 334 335 static void set_jmp_reset_offset(TCGContext *s, int which) 336 { 337 /* 338 * We will check for overflow at the end of the opcode loop in 339 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 340 */ 341 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 342 } 343 344 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 345 { 346 /* 347 * We will check for overflow at the end of the opcode loop in 348 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 349 */ 350 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 351 } 352 353 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 354 { 355 /* 356 * Return the read-execute version of the pointer, for the benefit 357 * of any pc-relative addressing mode. 358 */ 359 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 360 } 361 362 /* Signal overflow, starting over with fewer guest insns. */ 363 static G_NORETURN 364 void tcg_raise_tb_overflow(TCGContext *s) 365 { 366 siglongjmp(s->jmp_trans, -2); 367 } 368 369 typedef struct TCGMovExtend { 370 TCGReg dst; 371 TCGReg src; 372 TCGType dst_type; 373 TCGType src_type; 374 MemOp src_ext; 375 } TCGMovExtend; 376 377 /** 378 * tcg_out_movext -- move and extend 379 * @s: tcg context 380 * @dst_type: integral type for destination 381 * @dst: destination register 382 * @src_type: integral type for source 383 * @src_ext: extension to apply to source 384 * @src: source register 385 * 386 * Move or extend @src into @dst, depending on @src_ext and the types. 387 */ 388 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 389 TCGType src_type, MemOp src_ext, TCGReg src) 390 { 391 switch (src_ext) { 392 case MO_UB: 393 tcg_out_ext8u(s, dst, src); 394 break; 395 case MO_SB: 396 tcg_out_ext8s(s, dst_type, dst, src); 397 break; 398 case MO_UW: 399 tcg_out_ext16u(s, dst, src); 400 break; 401 case MO_SW: 402 tcg_out_ext16s(s, dst_type, dst, src); 403 break; 404 case MO_UL: 405 case MO_SL: 406 if (dst_type == TCG_TYPE_I32) { 407 if (src_type == TCG_TYPE_I32) { 408 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 409 } else { 410 tcg_out_extrl_i64_i32(s, dst, src); 411 } 412 } else if (src_type == TCG_TYPE_I32) { 413 if (src_ext & MO_SIGN) { 414 tcg_out_exts_i32_i64(s, dst, src); 415 } else { 416 tcg_out_extu_i32_i64(s, dst, src); 417 } 418 } else { 419 if (src_ext & MO_SIGN) { 420 tcg_out_ext32s(s, dst, src); 421 } else { 422 tcg_out_ext32u(s, dst, src); 423 } 424 } 425 break; 426 case MO_UQ: 427 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 428 if (dst_type == TCG_TYPE_I32) { 429 tcg_out_extrl_i64_i32(s, dst, src); 430 } else { 431 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 432 } 433 break; 434 default: 435 g_assert_not_reached(); 436 } 437 } 438 439 /* Minor variations on a theme, using a structure. */ 440 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 441 TCGReg src) 442 { 443 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 444 } 445 446 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 447 { 448 tcg_out_movext1_new_src(s, i, i->src); 449 } 450 451 /** 452 * tcg_out_movext2 -- move and extend two pair 453 * @s: tcg context 454 * @i1: first move description 455 * @i2: second move description 456 * @scratch: temporary register, or -1 for none 457 * 458 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 459 * between the sources and destinations. 460 */ 461 462 static void __attribute__((unused)) 463 tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 464 const TCGMovExtend *i2, int scratch) 465 { 466 TCGReg src1 = i1->src; 467 TCGReg src2 = i2->src; 468 469 if (i1->dst != src2) { 470 tcg_out_movext1(s, i1); 471 tcg_out_movext1(s, i2); 472 return; 473 } 474 if (i2->dst == src1) { 475 TCGType src1_type = i1->src_type; 476 TCGType src2_type = i2->src_type; 477 478 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 479 /* The data is now in the correct registers, now extend. */ 480 src1 = i2->src; 481 src2 = i1->src; 482 } else { 483 tcg_debug_assert(scratch >= 0); 484 tcg_out_mov(s, src1_type, scratch, src1); 485 src1 = scratch; 486 } 487 } 488 tcg_out_movext1_new_src(s, i2, src2); 489 tcg_out_movext1_new_src(s, i1, src1); 490 } 491 492 #define C_PFX1(P, A) P##A 493 #define C_PFX2(P, A, B) P##A##_##B 494 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 495 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 496 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 497 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 498 499 /* Define an enumeration for the various combinations. */ 500 501 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 502 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 503 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 504 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 505 506 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 507 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 508 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 509 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 510 511 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 512 513 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 514 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 515 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 516 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 517 518 typedef enum { 519 #include "tcg-target-con-set.h" 520 } TCGConstraintSetIndex; 521 522 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 523 524 #undef C_O0_I1 525 #undef C_O0_I2 526 #undef C_O0_I3 527 #undef C_O0_I4 528 #undef C_O1_I1 529 #undef C_O1_I2 530 #undef C_O1_I3 531 #undef C_O1_I4 532 #undef C_N1_I2 533 #undef C_O2_I1 534 #undef C_O2_I2 535 #undef C_O2_I3 536 #undef C_O2_I4 537 538 /* Put all of the constraint sets into an array, indexed by the enum. */ 539 540 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 541 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 542 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 543 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 544 545 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 546 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 547 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 548 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 549 550 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 551 552 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 553 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 554 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 555 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 556 557 static const TCGTargetOpDef constraint_sets[] = { 558 #include "tcg-target-con-set.h" 559 }; 560 561 562 #undef C_O0_I1 563 #undef C_O0_I2 564 #undef C_O0_I3 565 #undef C_O0_I4 566 #undef C_O1_I1 567 #undef C_O1_I2 568 #undef C_O1_I3 569 #undef C_O1_I4 570 #undef C_N1_I2 571 #undef C_O2_I1 572 #undef C_O2_I2 573 #undef C_O2_I3 574 #undef C_O2_I4 575 576 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 577 578 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 579 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 580 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 581 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 582 583 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 584 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 585 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 586 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 587 588 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 589 590 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 591 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 592 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 593 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 594 595 #include "tcg-target.c.inc" 596 597 static void alloc_tcg_plugin_context(TCGContext *s) 598 { 599 #ifdef CONFIG_PLUGIN 600 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 601 s->plugin_tb->insns = 602 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 603 #endif 604 } 605 606 /* 607 * All TCG threads except the parent (i.e. the one that called tcg_context_init 608 * and registered the target's TCG globals) must register with this function 609 * before initiating translation. 610 * 611 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 612 * of tcg_region_init() for the reasoning behind this. 613 * 614 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 615 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 616 * is not used anymore for translation once this function is called. 617 * 618 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 619 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 620 */ 621 #ifdef CONFIG_USER_ONLY 622 void tcg_register_thread(void) 623 { 624 tcg_ctx = &tcg_init_ctx; 625 } 626 #else 627 void tcg_register_thread(void) 628 { 629 TCGContext *s = g_malloc(sizeof(*s)); 630 unsigned int i, n; 631 632 *s = tcg_init_ctx; 633 634 /* Relink mem_base. */ 635 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 636 if (tcg_init_ctx.temps[i].mem_base) { 637 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 638 tcg_debug_assert(b >= 0 && b < n); 639 s->temps[i].mem_base = &s->temps[b]; 640 } 641 } 642 643 /* Claim an entry in tcg_ctxs */ 644 n = qatomic_fetch_inc(&tcg_cur_ctxs); 645 g_assert(n < tcg_max_ctxs); 646 qatomic_set(&tcg_ctxs[n], s); 647 648 if (n > 0) { 649 alloc_tcg_plugin_context(s); 650 tcg_region_initial_alloc(s); 651 } 652 653 tcg_ctx = s; 654 } 655 #endif /* !CONFIG_USER_ONLY */ 656 657 /* pool based memory allocation */ 658 void *tcg_malloc_internal(TCGContext *s, int size) 659 { 660 TCGPool *p; 661 int pool_size; 662 663 if (size > TCG_POOL_CHUNK_SIZE) { 664 /* big malloc: insert a new pool (XXX: could optimize) */ 665 p = g_malloc(sizeof(TCGPool) + size); 666 p->size = size; 667 p->next = s->pool_first_large; 668 s->pool_first_large = p; 669 return p->data; 670 } else { 671 p = s->pool_current; 672 if (!p) { 673 p = s->pool_first; 674 if (!p) 675 goto new_pool; 676 } else { 677 if (!p->next) { 678 new_pool: 679 pool_size = TCG_POOL_CHUNK_SIZE; 680 p = g_malloc(sizeof(TCGPool) + pool_size); 681 p->size = pool_size; 682 p->next = NULL; 683 if (s->pool_current) { 684 s->pool_current->next = p; 685 } else { 686 s->pool_first = p; 687 } 688 } else { 689 p = p->next; 690 } 691 } 692 } 693 s->pool_current = p; 694 s->pool_cur = p->data + size; 695 s->pool_end = p->data + p->size; 696 return p->data; 697 } 698 699 void tcg_pool_reset(TCGContext *s) 700 { 701 TCGPool *p, *t; 702 for (p = s->pool_first_large; p; p = t) { 703 t = p->next; 704 g_free(p); 705 } 706 s->pool_first_large = NULL; 707 s->pool_cur = s->pool_end = NULL; 708 s->pool_current = NULL; 709 } 710 711 #include "exec/helper-proto.h" 712 713 static TCGHelperInfo all_helpers[] = { 714 #include "exec/helper-tcg.h" 715 }; 716 static GHashTable *helper_table; 717 718 #ifdef CONFIG_TCG_INTERPRETER 719 static ffi_type *typecode_to_ffi(int argmask) 720 { 721 /* 722 * libffi does not support __int128_t, so we have forced Int128 723 * to use the structure definition instead of the builtin type. 724 */ 725 static ffi_type *ffi_type_i128_elements[3] = { 726 &ffi_type_uint64, 727 &ffi_type_uint64, 728 NULL 729 }; 730 static ffi_type ffi_type_i128 = { 731 .size = 16, 732 .alignment = __alignof__(Int128), 733 .type = FFI_TYPE_STRUCT, 734 .elements = ffi_type_i128_elements, 735 }; 736 737 switch (argmask) { 738 case dh_typecode_void: 739 return &ffi_type_void; 740 case dh_typecode_i32: 741 return &ffi_type_uint32; 742 case dh_typecode_s32: 743 return &ffi_type_sint32; 744 case dh_typecode_i64: 745 return &ffi_type_uint64; 746 case dh_typecode_s64: 747 return &ffi_type_sint64; 748 case dh_typecode_ptr: 749 return &ffi_type_pointer; 750 case dh_typecode_i128: 751 return &ffi_type_i128; 752 } 753 g_assert_not_reached(); 754 } 755 756 static void init_ffi_layouts(void) 757 { 758 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 759 GHashTable *ffi_table = g_hash_table_new(NULL, NULL); 760 761 for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 762 TCGHelperInfo *info = &all_helpers[i]; 763 unsigned typemask = info->typemask; 764 gpointer hash = (gpointer)(uintptr_t)typemask; 765 struct { 766 ffi_cif cif; 767 ffi_type *args[]; 768 } *ca; 769 ffi_status status; 770 int nargs; 771 ffi_cif *cif; 772 773 cif = g_hash_table_lookup(ffi_table, hash); 774 if (cif) { 775 info->cif = cif; 776 continue; 777 } 778 779 /* Ignoring the return type, find the last non-zero field. */ 780 nargs = 32 - clz32(typemask >> 3); 781 nargs = DIV_ROUND_UP(nargs, 3); 782 assert(nargs <= MAX_CALL_IARGS); 783 784 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 785 ca->cif.rtype = typecode_to_ffi(typemask & 7); 786 ca->cif.nargs = nargs; 787 788 if (nargs != 0) { 789 ca->cif.arg_types = ca->args; 790 for (int j = 0; j < nargs; ++j) { 791 int typecode = extract32(typemask, (j + 1) * 3, 3); 792 ca->args[j] = typecode_to_ffi(typecode); 793 } 794 } 795 796 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 797 ca->cif.rtype, ca->cif.arg_types); 798 assert(status == FFI_OK); 799 800 cif = &ca->cif; 801 info->cif = cif; 802 g_hash_table_insert(ffi_table, hash, (gpointer)cif); 803 } 804 805 g_hash_table_destroy(ffi_table); 806 } 807 #endif /* CONFIG_TCG_INTERPRETER */ 808 809 static inline bool arg_slot_reg_p(unsigned arg_slot) 810 { 811 /* 812 * Split the sizeof away from the comparison to avoid Werror from 813 * "unsigned < 0 is always false", when iarg_regs is empty. 814 */ 815 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs); 816 return arg_slot < nreg; 817 } 818 819 static inline int arg_slot_stk_ofs(unsigned arg_slot) 820 { 821 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 822 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 823 824 tcg_debug_assert(stk_slot < max); 825 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long); 826 } 827 828 typedef struct TCGCumulativeArgs { 829 int arg_idx; /* tcg_gen_callN args[] */ 830 int info_in_idx; /* TCGHelperInfo in[] */ 831 int arg_slot; /* regs+stack slot */ 832 int ref_slot; /* stack slots for references */ 833 } TCGCumulativeArgs; 834 835 static void layout_arg_even(TCGCumulativeArgs *cum) 836 { 837 cum->arg_slot += cum->arg_slot & 1; 838 } 839 840 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 841 TCGCallArgumentKind kind) 842 { 843 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 844 845 *loc = (TCGCallArgumentLoc){ 846 .kind = kind, 847 .arg_idx = cum->arg_idx, 848 .arg_slot = cum->arg_slot, 849 }; 850 cum->info_in_idx++; 851 cum->arg_slot++; 852 } 853 854 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 855 TCGHelperInfo *info, int n) 856 { 857 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 858 859 for (int i = 0; i < n; ++i) { 860 /* Layout all using the same arg_idx, adjusting the subindex. */ 861 loc[i] = (TCGCallArgumentLoc){ 862 .kind = TCG_CALL_ARG_NORMAL, 863 .arg_idx = cum->arg_idx, 864 .tmp_subindex = i, 865 .arg_slot = cum->arg_slot + i, 866 }; 867 } 868 cum->info_in_idx += n; 869 cum->arg_slot += n; 870 } 871 872 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 873 { 874 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 875 int n = 128 / TCG_TARGET_REG_BITS; 876 877 /* The first subindex carries the pointer. */ 878 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 879 880 /* 881 * The callee is allowed to clobber memory associated with 882 * structure pass by-reference. Therefore we must make copies. 883 * Allocate space from "ref_slot", which will be adjusted to 884 * follow the parameters on the stack. 885 */ 886 loc[0].ref_slot = cum->ref_slot; 887 888 /* 889 * Subsequent words also go into the reference slot, but 890 * do not accumulate into the regular arguments. 891 */ 892 for (int i = 1; i < n; ++i) { 893 loc[i] = (TCGCallArgumentLoc){ 894 .kind = TCG_CALL_ARG_BY_REF_N, 895 .arg_idx = cum->arg_idx, 896 .tmp_subindex = i, 897 .ref_slot = cum->ref_slot + i, 898 }; 899 } 900 cum->info_in_idx += n; 901 cum->ref_slot += n; 902 } 903 904 static void init_call_layout(TCGHelperInfo *info) 905 { 906 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 907 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 908 unsigned typemask = info->typemask; 909 unsigned typecode; 910 TCGCumulativeArgs cum = { }; 911 912 /* 913 * Parse and place any function return value. 914 */ 915 typecode = typemask & 7; 916 switch (typecode) { 917 case dh_typecode_void: 918 info->nr_out = 0; 919 break; 920 case dh_typecode_i32: 921 case dh_typecode_s32: 922 case dh_typecode_ptr: 923 info->nr_out = 1; 924 info->out_kind = TCG_CALL_RET_NORMAL; 925 break; 926 case dh_typecode_i64: 927 case dh_typecode_s64: 928 info->nr_out = 64 / TCG_TARGET_REG_BITS; 929 info->out_kind = TCG_CALL_RET_NORMAL; 930 /* Query the last register now to trigger any assert early. */ 931 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 932 break; 933 case dh_typecode_i128: 934 info->nr_out = 128 / TCG_TARGET_REG_BITS; 935 info->out_kind = TCG_TARGET_CALL_RET_I128; 936 switch (TCG_TARGET_CALL_RET_I128) { 937 case TCG_CALL_RET_NORMAL: 938 /* Query the last register now to trigger any assert early. */ 939 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 940 break; 941 case TCG_CALL_RET_BY_VEC: 942 /* Query the single register now to trigger any assert early. */ 943 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 944 break; 945 case TCG_CALL_RET_BY_REF: 946 /* 947 * Allocate the first argument to the output. 948 * We don't need to store this anywhere, just make it 949 * unavailable for use in the input loop below. 950 */ 951 cum.arg_slot = 1; 952 break; 953 default: 954 qemu_build_not_reached(); 955 } 956 break; 957 default: 958 g_assert_not_reached(); 959 } 960 961 /* 962 * Parse and place function arguments. 963 */ 964 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 965 TCGCallArgumentKind kind; 966 TCGType type; 967 968 typecode = typemask & 7; 969 switch (typecode) { 970 case dh_typecode_i32: 971 case dh_typecode_s32: 972 type = TCG_TYPE_I32; 973 break; 974 case dh_typecode_i64: 975 case dh_typecode_s64: 976 type = TCG_TYPE_I64; 977 break; 978 case dh_typecode_ptr: 979 type = TCG_TYPE_PTR; 980 break; 981 case dh_typecode_i128: 982 type = TCG_TYPE_I128; 983 break; 984 default: 985 g_assert_not_reached(); 986 } 987 988 switch (type) { 989 case TCG_TYPE_I32: 990 switch (TCG_TARGET_CALL_ARG_I32) { 991 case TCG_CALL_ARG_EVEN: 992 layout_arg_even(&cum); 993 /* fall through */ 994 case TCG_CALL_ARG_NORMAL: 995 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 996 break; 997 case TCG_CALL_ARG_EXTEND: 998 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 999 layout_arg_1(&cum, info, kind); 1000 break; 1001 default: 1002 qemu_build_not_reached(); 1003 } 1004 break; 1005 1006 case TCG_TYPE_I64: 1007 switch (TCG_TARGET_CALL_ARG_I64) { 1008 case TCG_CALL_ARG_EVEN: 1009 layout_arg_even(&cum); 1010 /* fall through */ 1011 case TCG_CALL_ARG_NORMAL: 1012 if (TCG_TARGET_REG_BITS == 32) { 1013 layout_arg_normal_n(&cum, info, 2); 1014 } else { 1015 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 1016 } 1017 break; 1018 default: 1019 qemu_build_not_reached(); 1020 } 1021 break; 1022 1023 case TCG_TYPE_I128: 1024 switch (TCG_TARGET_CALL_ARG_I128) { 1025 case TCG_CALL_ARG_EVEN: 1026 layout_arg_even(&cum); 1027 /* fall through */ 1028 case TCG_CALL_ARG_NORMAL: 1029 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 1030 break; 1031 case TCG_CALL_ARG_BY_REF: 1032 layout_arg_by_ref(&cum, info); 1033 break; 1034 default: 1035 qemu_build_not_reached(); 1036 } 1037 break; 1038 1039 default: 1040 g_assert_not_reached(); 1041 } 1042 } 1043 info->nr_in = cum.info_in_idx; 1044 1045 /* Validate that we didn't overrun the input array. */ 1046 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1047 /* Validate the backend has enough argument space. */ 1048 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1049 1050 /* 1051 * Relocate the "ref_slot" area to the end of the parameters. 1052 * Minimizing this stack offset helps code size for x86, 1053 * which has a signed 8-bit offset encoding. 1054 */ 1055 if (cum.ref_slot != 0) { 1056 int ref_base = 0; 1057 1058 if (cum.arg_slot > max_reg_slots) { 1059 int align = __alignof(Int128) / sizeof(tcg_target_long); 1060 1061 ref_base = cum.arg_slot - max_reg_slots; 1062 if (align > 1) { 1063 ref_base = ROUND_UP(ref_base, align); 1064 } 1065 } 1066 assert(ref_base + cum.ref_slot <= max_stk_slots); 1067 ref_base += max_reg_slots; 1068 1069 if (ref_base != 0) { 1070 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1071 TCGCallArgumentLoc *loc = &info->in[i]; 1072 switch (loc->kind) { 1073 case TCG_CALL_ARG_BY_REF: 1074 case TCG_CALL_ARG_BY_REF_N: 1075 loc->ref_slot += ref_base; 1076 break; 1077 default: 1078 break; 1079 } 1080 } 1081 } 1082 } 1083 } 1084 1085 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1086 static void process_op_defs(TCGContext *s); 1087 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1088 TCGReg reg, const char *name); 1089 1090 static void tcg_context_init(unsigned max_cpus) 1091 { 1092 TCGContext *s = &tcg_init_ctx; 1093 int op, total_args, n, i; 1094 TCGOpDef *def; 1095 TCGArgConstraint *args_ct; 1096 TCGTemp *ts; 1097 1098 memset(s, 0, sizeof(*s)); 1099 s->nb_globals = 0; 1100 1101 /* Count total number of arguments and allocate the corresponding 1102 space */ 1103 total_args = 0; 1104 for(op = 0; op < NB_OPS; op++) { 1105 def = &tcg_op_defs[op]; 1106 n = def->nb_iargs + def->nb_oargs; 1107 total_args += n; 1108 } 1109 1110 args_ct = g_new0(TCGArgConstraint, total_args); 1111 1112 for(op = 0; op < NB_OPS; op++) { 1113 def = &tcg_op_defs[op]; 1114 def->args_ct = args_ct; 1115 n = def->nb_iargs + def->nb_oargs; 1116 args_ct += n; 1117 } 1118 1119 /* Register helpers. */ 1120 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 1121 helper_table = g_hash_table_new(NULL, NULL); 1122 1123 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 1124 init_call_layout(&all_helpers[i]); 1125 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 1126 (gpointer)&all_helpers[i]); 1127 } 1128 1129 #ifdef CONFIG_TCG_INTERPRETER 1130 init_ffi_layouts(); 1131 #endif 1132 1133 tcg_target_init(s); 1134 process_op_defs(s); 1135 1136 /* Reverse the order of the saved registers, assuming they're all at 1137 the start of tcg_target_reg_alloc_order. */ 1138 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1139 int r = tcg_target_reg_alloc_order[n]; 1140 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1141 break; 1142 } 1143 } 1144 for (i = 0; i < n; ++i) { 1145 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1146 } 1147 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1148 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1149 } 1150 1151 alloc_tcg_plugin_context(s); 1152 1153 tcg_ctx = s; 1154 /* 1155 * In user-mode we simply share the init context among threads, since we 1156 * use a single region. See the documentation tcg_region_init() for the 1157 * reasoning behind this. 1158 * In softmmu we will have at most max_cpus TCG threads. 1159 */ 1160 #ifdef CONFIG_USER_ONLY 1161 tcg_ctxs = &tcg_ctx; 1162 tcg_cur_ctxs = 1; 1163 tcg_max_ctxs = 1; 1164 #else 1165 tcg_max_ctxs = max_cpus; 1166 tcg_ctxs = g_new0(TCGContext *, max_cpus); 1167 #endif 1168 1169 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1170 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1171 cpu_env = temp_tcgv_ptr(ts); 1172 } 1173 1174 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 1175 { 1176 tcg_context_init(max_cpus); 1177 tcg_region_init(tb_size, splitwx, max_cpus); 1178 } 1179 1180 /* 1181 * Allocate TBs right before their corresponding translated code, making 1182 * sure that TBs and code are on different cache lines. 1183 */ 1184 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1185 { 1186 uintptr_t align = qemu_icache_linesize; 1187 TranslationBlock *tb; 1188 void *next; 1189 1190 retry: 1191 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1192 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1193 1194 if (unlikely(next > s->code_gen_highwater)) { 1195 if (tcg_region_alloc(s)) { 1196 return NULL; 1197 } 1198 goto retry; 1199 } 1200 qatomic_set(&s->code_gen_ptr, next); 1201 s->data_gen_ptr = NULL; 1202 return tb; 1203 } 1204 1205 void tcg_prologue_init(TCGContext *s) 1206 { 1207 size_t prologue_size; 1208 1209 s->code_ptr = s->code_gen_ptr; 1210 s->code_buf = s->code_gen_ptr; 1211 s->data_gen_ptr = NULL; 1212 1213 #ifndef CONFIG_TCG_INTERPRETER 1214 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1215 #endif 1216 1217 #ifdef TCG_TARGET_NEED_POOL_LABELS 1218 s->pool_labels = NULL; 1219 #endif 1220 1221 qemu_thread_jit_write(); 1222 /* Generate the prologue. */ 1223 tcg_target_qemu_prologue(s); 1224 1225 #ifdef TCG_TARGET_NEED_POOL_LABELS 1226 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1227 { 1228 int result = tcg_out_pool_finalize(s); 1229 tcg_debug_assert(result == 0); 1230 } 1231 #endif 1232 1233 prologue_size = tcg_current_code_size(s); 1234 perf_report_prologue(s->code_gen_ptr, prologue_size); 1235 1236 #ifndef CONFIG_TCG_INTERPRETER 1237 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1238 (uintptr_t)s->code_buf, prologue_size); 1239 #endif 1240 1241 #ifdef DEBUG_DISAS 1242 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1243 FILE *logfile = qemu_log_trylock(); 1244 if (logfile) { 1245 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1246 if (s->data_gen_ptr) { 1247 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1248 size_t data_size = prologue_size - code_size; 1249 size_t i; 1250 1251 disas(logfile, s->code_gen_ptr, code_size); 1252 1253 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1254 if (sizeof(tcg_target_ulong) == 8) { 1255 fprintf(logfile, 1256 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1257 (uintptr_t)s->data_gen_ptr + i, 1258 *(uint64_t *)(s->data_gen_ptr + i)); 1259 } else { 1260 fprintf(logfile, 1261 "0x%08" PRIxPTR ": .long 0x%08x\n", 1262 (uintptr_t)s->data_gen_ptr + i, 1263 *(uint32_t *)(s->data_gen_ptr + i)); 1264 } 1265 } 1266 } else { 1267 disas(logfile, s->code_gen_ptr, prologue_size); 1268 } 1269 fprintf(logfile, "\n"); 1270 qemu_log_unlock(logfile); 1271 } 1272 } 1273 #endif 1274 1275 #ifndef CONFIG_TCG_INTERPRETER 1276 /* 1277 * Assert that goto_ptr is implemented completely, setting an epilogue. 1278 * For tci, we use NULL as the signal to return from the interpreter, 1279 * so skip this check. 1280 */ 1281 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1282 #endif 1283 1284 tcg_region_prologue_set(s); 1285 } 1286 1287 void tcg_func_start(TCGContext *s) 1288 { 1289 tcg_pool_reset(s); 1290 s->nb_temps = s->nb_globals; 1291 1292 /* No temps have been previously allocated for size or locality. */ 1293 memset(s->free_temps, 0, sizeof(s->free_temps)); 1294 1295 /* No constant temps have been previously allocated. */ 1296 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1297 if (s->const_table[i]) { 1298 g_hash_table_remove_all(s->const_table[i]); 1299 } 1300 } 1301 1302 s->nb_ops = 0; 1303 s->nb_labels = 0; 1304 s->current_frame_offset = s->frame_start; 1305 1306 #ifdef CONFIG_DEBUG_TCG 1307 s->goto_tb_issue_mask = 0; 1308 #endif 1309 1310 QTAILQ_INIT(&s->ops); 1311 QTAILQ_INIT(&s->free_ops); 1312 QSIMPLEQ_INIT(&s->labels); 1313 } 1314 1315 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1316 { 1317 int n = s->nb_temps++; 1318 1319 if (n >= TCG_MAX_TEMPS) { 1320 tcg_raise_tb_overflow(s); 1321 } 1322 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1323 } 1324 1325 static TCGTemp *tcg_global_alloc(TCGContext *s) 1326 { 1327 TCGTemp *ts; 1328 1329 tcg_debug_assert(s->nb_globals == s->nb_temps); 1330 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1331 s->nb_globals++; 1332 ts = tcg_temp_alloc(s); 1333 ts->kind = TEMP_GLOBAL; 1334 1335 return ts; 1336 } 1337 1338 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1339 TCGReg reg, const char *name) 1340 { 1341 TCGTemp *ts; 1342 1343 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1344 1345 ts = tcg_global_alloc(s); 1346 ts->base_type = type; 1347 ts->type = type; 1348 ts->kind = TEMP_FIXED; 1349 ts->reg = reg; 1350 ts->name = name; 1351 tcg_regset_set_reg(s->reserved_regs, reg); 1352 1353 return ts; 1354 } 1355 1356 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1357 { 1358 s->frame_start = start; 1359 s->frame_end = start + size; 1360 s->frame_temp 1361 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1362 } 1363 1364 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1365 intptr_t offset, const char *name) 1366 { 1367 TCGContext *s = tcg_ctx; 1368 TCGTemp *base_ts = tcgv_ptr_temp(base); 1369 TCGTemp *ts = tcg_global_alloc(s); 1370 int indirect_reg = 0; 1371 1372 switch (base_ts->kind) { 1373 case TEMP_FIXED: 1374 break; 1375 case TEMP_GLOBAL: 1376 /* We do not support double-indirect registers. */ 1377 tcg_debug_assert(!base_ts->indirect_reg); 1378 base_ts->indirect_base = 1; 1379 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1380 ? 2 : 1); 1381 indirect_reg = 1; 1382 break; 1383 default: 1384 g_assert_not_reached(); 1385 } 1386 1387 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1388 TCGTemp *ts2 = tcg_global_alloc(s); 1389 char buf[64]; 1390 1391 ts->base_type = TCG_TYPE_I64; 1392 ts->type = TCG_TYPE_I32; 1393 ts->indirect_reg = indirect_reg; 1394 ts->mem_allocated = 1; 1395 ts->mem_base = base_ts; 1396 ts->mem_offset = offset; 1397 pstrcpy(buf, sizeof(buf), name); 1398 pstrcat(buf, sizeof(buf), "_0"); 1399 ts->name = strdup(buf); 1400 1401 tcg_debug_assert(ts2 == ts + 1); 1402 ts2->base_type = TCG_TYPE_I64; 1403 ts2->type = TCG_TYPE_I32; 1404 ts2->indirect_reg = indirect_reg; 1405 ts2->mem_allocated = 1; 1406 ts2->mem_base = base_ts; 1407 ts2->mem_offset = offset + 4; 1408 ts2->temp_subindex = 1; 1409 pstrcpy(buf, sizeof(buf), name); 1410 pstrcat(buf, sizeof(buf), "_1"); 1411 ts2->name = strdup(buf); 1412 } else { 1413 ts->base_type = type; 1414 ts->type = type; 1415 ts->indirect_reg = indirect_reg; 1416 ts->mem_allocated = 1; 1417 ts->mem_base = base_ts; 1418 ts->mem_offset = offset; 1419 ts->name = name; 1420 } 1421 return ts; 1422 } 1423 1424 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1425 { 1426 TCGContext *s = tcg_ctx; 1427 TCGTemp *ts; 1428 int n; 1429 1430 if (kind == TEMP_EBB) { 1431 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1432 1433 if (idx < TCG_MAX_TEMPS) { 1434 /* There is already an available temp with the right type. */ 1435 clear_bit(idx, s->free_temps[type].l); 1436 1437 ts = &s->temps[idx]; 1438 ts->temp_allocated = 1; 1439 tcg_debug_assert(ts->base_type == type); 1440 tcg_debug_assert(ts->kind == kind); 1441 return ts; 1442 } 1443 } else { 1444 tcg_debug_assert(kind == TEMP_TB); 1445 } 1446 1447 switch (type) { 1448 case TCG_TYPE_I32: 1449 case TCG_TYPE_V64: 1450 case TCG_TYPE_V128: 1451 case TCG_TYPE_V256: 1452 n = 1; 1453 break; 1454 case TCG_TYPE_I64: 1455 n = 64 / TCG_TARGET_REG_BITS; 1456 break; 1457 case TCG_TYPE_I128: 1458 n = 128 / TCG_TARGET_REG_BITS; 1459 break; 1460 default: 1461 g_assert_not_reached(); 1462 } 1463 1464 ts = tcg_temp_alloc(s); 1465 ts->base_type = type; 1466 ts->temp_allocated = 1; 1467 ts->kind = kind; 1468 1469 if (n == 1) { 1470 ts->type = type; 1471 } else { 1472 ts->type = TCG_TYPE_REG; 1473 1474 for (int i = 1; i < n; ++i) { 1475 TCGTemp *ts2 = tcg_temp_alloc(s); 1476 1477 tcg_debug_assert(ts2 == ts + i); 1478 ts2->base_type = type; 1479 ts2->type = TCG_TYPE_REG; 1480 ts2->temp_allocated = 1; 1481 ts2->temp_subindex = i; 1482 ts2->kind = kind; 1483 } 1484 } 1485 return ts; 1486 } 1487 1488 TCGv_vec tcg_temp_new_vec(TCGType type) 1489 { 1490 TCGTemp *t; 1491 1492 #ifdef CONFIG_DEBUG_TCG 1493 switch (type) { 1494 case TCG_TYPE_V64: 1495 assert(TCG_TARGET_HAS_v64); 1496 break; 1497 case TCG_TYPE_V128: 1498 assert(TCG_TARGET_HAS_v128); 1499 break; 1500 case TCG_TYPE_V256: 1501 assert(TCG_TARGET_HAS_v256); 1502 break; 1503 default: 1504 g_assert_not_reached(); 1505 } 1506 #endif 1507 1508 t = tcg_temp_new_internal(type, TEMP_EBB); 1509 return temp_tcgv_vec(t); 1510 } 1511 1512 /* Create a new temp of the same type as an existing temp. */ 1513 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1514 { 1515 TCGTemp *t = tcgv_vec_temp(match); 1516 1517 tcg_debug_assert(t->temp_allocated != 0); 1518 1519 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 1520 return temp_tcgv_vec(t); 1521 } 1522 1523 void tcg_temp_free_internal(TCGTemp *ts) 1524 { 1525 TCGContext *s = tcg_ctx; 1526 1527 switch (ts->kind) { 1528 case TEMP_CONST: 1529 case TEMP_TB: 1530 /* Silently ignore free. */ 1531 break; 1532 case TEMP_EBB: 1533 tcg_debug_assert(ts->temp_allocated != 0); 1534 ts->temp_allocated = 0; 1535 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 1536 break; 1537 default: 1538 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 1539 g_assert_not_reached(); 1540 } 1541 } 1542 1543 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1544 { 1545 TCGContext *s = tcg_ctx; 1546 GHashTable *h = s->const_table[type]; 1547 TCGTemp *ts; 1548 1549 if (h == NULL) { 1550 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1551 s->const_table[type] = h; 1552 } 1553 1554 ts = g_hash_table_lookup(h, &val); 1555 if (ts == NULL) { 1556 int64_t *val_ptr; 1557 1558 ts = tcg_temp_alloc(s); 1559 1560 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1561 TCGTemp *ts2 = tcg_temp_alloc(s); 1562 1563 tcg_debug_assert(ts2 == ts + 1); 1564 1565 ts->base_type = TCG_TYPE_I64; 1566 ts->type = TCG_TYPE_I32; 1567 ts->kind = TEMP_CONST; 1568 ts->temp_allocated = 1; 1569 1570 ts2->base_type = TCG_TYPE_I64; 1571 ts2->type = TCG_TYPE_I32; 1572 ts2->kind = TEMP_CONST; 1573 ts2->temp_allocated = 1; 1574 ts2->temp_subindex = 1; 1575 1576 /* 1577 * Retain the full value of the 64-bit constant in the low 1578 * part, so that the hash table works. Actual uses will 1579 * truncate the value to the low part. 1580 */ 1581 ts[HOST_BIG_ENDIAN].val = val; 1582 ts[!HOST_BIG_ENDIAN].val = val >> 32; 1583 val_ptr = &ts[HOST_BIG_ENDIAN].val; 1584 } else { 1585 ts->base_type = type; 1586 ts->type = type; 1587 ts->kind = TEMP_CONST; 1588 ts->temp_allocated = 1; 1589 ts->val = val; 1590 val_ptr = &ts->val; 1591 } 1592 g_hash_table_insert(h, val_ptr, ts); 1593 } 1594 1595 return ts; 1596 } 1597 1598 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1599 { 1600 val = dup_const(vece, val); 1601 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1602 } 1603 1604 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1605 { 1606 TCGTemp *t = tcgv_vec_temp(match); 1607 1608 tcg_debug_assert(t->temp_allocated != 0); 1609 return tcg_constant_vec(t->base_type, vece, val); 1610 } 1611 1612 /* Return true if OP may appear in the opcode stream. 1613 Test the runtime variable that controls each opcode. */ 1614 bool tcg_op_supported(TCGOpcode op) 1615 { 1616 const bool have_vec 1617 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1618 1619 switch (op) { 1620 case INDEX_op_discard: 1621 case INDEX_op_set_label: 1622 case INDEX_op_call: 1623 case INDEX_op_br: 1624 case INDEX_op_mb: 1625 case INDEX_op_insn_start: 1626 case INDEX_op_exit_tb: 1627 case INDEX_op_goto_tb: 1628 case INDEX_op_goto_ptr: 1629 case INDEX_op_qemu_ld_i32: 1630 case INDEX_op_qemu_st_i32: 1631 case INDEX_op_qemu_ld_i64: 1632 case INDEX_op_qemu_st_i64: 1633 return true; 1634 1635 case INDEX_op_qemu_st8_i32: 1636 return TCG_TARGET_HAS_qemu_st8_i32; 1637 1638 case INDEX_op_mov_i32: 1639 case INDEX_op_setcond_i32: 1640 case INDEX_op_brcond_i32: 1641 case INDEX_op_ld8u_i32: 1642 case INDEX_op_ld8s_i32: 1643 case INDEX_op_ld16u_i32: 1644 case INDEX_op_ld16s_i32: 1645 case INDEX_op_ld_i32: 1646 case INDEX_op_st8_i32: 1647 case INDEX_op_st16_i32: 1648 case INDEX_op_st_i32: 1649 case INDEX_op_add_i32: 1650 case INDEX_op_sub_i32: 1651 case INDEX_op_mul_i32: 1652 case INDEX_op_and_i32: 1653 case INDEX_op_or_i32: 1654 case INDEX_op_xor_i32: 1655 case INDEX_op_shl_i32: 1656 case INDEX_op_shr_i32: 1657 case INDEX_op_sar_i32: 1658 return true; 1659 1660 case INDEX_op_movcond_i32: 1661 return TCG_TARGET_HAS_movcond_i32; 1662 case INDEX_op_div_i32: 1663 case INDEX_op_divu_i32: 1664 return TCG_TARGET_HAS_div_i32; 1665 case INDEX_op_rem_i32: 1666 case INDEX_op_remu_i32: 1667 return TCG_TARGET_HAS_rem_i32; 1668 case INDEX_op_div2_i32: 1669 case INDEX_op_divu2_i32: 1670 return TCG_TARGET_HAS_div2_i32; 1671 case INDEX_op_rotl_i32: 1672 case INDEX_op_rotr_i32: 1673 return TCG_TARGET_HAS_rot_i32; 1674 case INDEX_op_deposit_i32: 1675 return TCG_TARGET_HAS_deposit_i32; 1676 case INDEX_op_extract_i32: 1677 return TCG_TARGET_HAS_extract_i32; 1678 case INDEX_op_sextract_i32: 1679 return TCG_TARGET_HAS_sextract_i32; 1680 case INDEX_op_extract2_i32: 1681 return TCG_TARGET_HAS_extract2_i32; 1682 case INDEX_op_add2_i32: 1683 return TCG_TARGET_HAS_add2_i32; 1684 case INDEX_op_sub2_i32: 1685 return TCG_TARGET_HAS_sub2_i32; 1686 case INDEX_op_mulu2_i32: 1687 return TCG_TARGET_HAS_mulu2_i32; 1688 case INDEX_op_muls2_i32: 1689 return TCG_TARGET_HAS_muls2_i32; 1690 case INDEX_op_muluh_i32: 1691 return TCG_TARGET_HAS_muluh_i32; 1692 case INDEX_op_mulsh_i32: 1693 return TCG_TARGET_HAS_mulsh_i32; 1694 case INDEX_op_ext8s_i32: 1695 return TCG_TARGET_HAS_ext8s_i32; 1696 case INDEX_op_ext16s_i32: 1697 return TCG_TARGET_HAS_ext16s_i32; 1698 case INDEX_op_ext8u_i32: 1699 return TCG_TARGET_HAS_ext8u_i32; 1700 case INDEX_op_ext16u_i32: 1701 return TCG_TARGET_HAS_ext16u_i32; 1702 case INDEX_op_bswap16_i32: 1703 return TCG_TARGET_HAS_bswap16_i32; 1704 case INDEX_op_bswap32_i32: 1705 return TCG_TARGET_HAS_bswap32_i32; 1706 case INDEX_op_not_i32: 1707 return TCG_TARGET_HAS_not_i32; 1708 case INDEX_op_neg_i32: 1709 return TCG_TARGET_HAS_neg_i32; 1710 case INDEX_op_andc_i32: 1711 return TCG_TARGET_HAS_andc_i32; 1712 case INDEX_op_orc_i32: 1713 return TCG_TARGET_HAS_orc_i32; 1714 case INDEX_op_eqv_i32: 1715 return TCG_TARGET_HAS_eqv_i32; 1716 case INDEX_op_nand_i32: 1717 return TCG_TARGET_HAS_nand_i32; 1718 case INDEX_op_nor_i32: 1719 return TCG_TARGET_HAS_nor_i32; 1720 case INDEX_op_clz_i32: 1721 return TCG_TARGET_HAS_clz_i32; 1722 case INDEX_op_ctz_i32: 1723 return TCG_TARGET_HAS_ctz_i32; 1724 case INDEX_op_ctpop_i32: 1725 return TCG_TARGET_HAS_ctpop_i32; 1726 1727 case INDEX_op_brcond2_i32: 1728 case INDEX_op_setcond2_i32: 1729 return TCG_TARGET_REG_BITS == 32; 1730 1731 case INDEX_op_mov_i64: 1732 case INDEX_op_setcond_i64: 1733 case INDEX_op_brcond_i64: 1734 case INDEX_op_ld8u_i64: 1735 case INDEX_op_ld8s_i64: 1736 case INDEX_op_ld16u_i64: 1737 case INDEX_op_ld16s_i64: 1738 case INDEX_op_ld32u_i64: 1739 case INDEX_op_ld32s_i64: 1740 case INDEX_op_ld_i64: 1741 case INDEX_op_st8_i64: 1742 case INDEX_op_st16_i64: 1743 case INDEX_op_st32_i64: 1744 case INDEX_op_st_i64: 1745 case INDEX_op_add_i64: 1746 case INDEX_op_sub_i64: 1747 case INDEX_op_mul_i64: 1748 case INDEX_op_and_i64: 1749 case INDEX_op_or_i64: 1750 case INDEX_op_xor_i64: 1751 case INDEX_op_shl_i64: 1752 case INDEX_op_shr_i64: 1753 case INDEX_op_sar_i64: 1754 case INDEX_op_ext_i32_i64: 1755 case INDEX_op_extu_i32_i64: 1756 return TCG_TARGET_REG_BITS == 64; 1757 1758 case INDEX_op_movcond_i64: 1759 return TCG_TARGET_HAS_movcond_i64; 1760 case INDEX_op_div_i64: 1761 case INDEX_op_divu_i64: 1762 return TCG_TARGET_HAS_div_i64; 1763 case INDEX_op_rem_i64: 1764 case INDEX_op_remu_i64: 1765 return TCG_TARGET_HAS_rem_i64; 1766 case INDEX_op_div2_i64: 1767 case INDEX_op_divu2_i64: 1768 return TCG_TARGET_HAS_div2_i64; 1769 case INDEX_op_rotl_i64: 1770 case INDEX_op_rotr_i64: 1771 return TCG_TARGET_HAS_rot_i64; 1772 case INDEX_op_deposit_i64: 1773 return TCG_TARGET_HAS_deposit_i64; 1774 case INDEX_op_extract_i64: 1775 return TCG_TARGET_HAS_extract_i64; 1776 case INDEX_op_sextract_i64: 1777 return TCG_TARGET_HAS_sextract_i64; 1778 case INDEX_op_extract2_i64: 1779 return TCG_TARGET_HAS_extract2_i64; 1780 case INDEX_op_extrl_i64_i32: 1781 return TCG_TARGET_HAS_extrl_i64_i32; 1782 case INDEX_op_extrh_i64_i32: 1783 return TCG_TARGET_HAS_extrh_i64_i32; 1784 case INDEX_op_ext8s_i64: 1785 return TCG_TARGET_HAS_ext8s_i64; 1786 case INDEX_op_ext16s_i64: 1787 return TCG_TARGET_HAS_ext16s_i64; 1788 case INDEX_op_ext32s_i64: 1789 return TCG_TARGET_HAS_ext32s_i64; 1790 case INDEX_op_ext8u_i64: 1791 return TCG_TARGET_HAS_ext8u_i64; 1792 case INDEX_op_ext16u_i64: 1793 return TCG_TARGET_HAS_ext16u_i64; 1794 case INDEX_op_ext32u_i64: 1795 return TCG_TARGET_HAS_ext32u_i64; 1796 case INDEX_op_bswap16_i64: 1797 return TCG_TARGET_HAS_bswap16_i64; 1798 case INDEX_op_bswap32_i64: 1799 return TCG_TARGET_HAS_bswap32_i64; 1800 case INDEX_op_bswap64_i64: 1801 return TCG_TARGET_HAS_bswap64_i64; 1802 case INDEX_op_not_i64: 1803 return TCG_TARGET_HAS_not_i64; 1804 case INDEX_op_neg_i64: 1805 return TCG_TARGET_HAS_neg_i64; 1806 case INDEX_op_andc_i64: 1807 return TCG_TARGET_HAS_andc_i64; 1808 case INDEX_op_orc_i64: 1809 return TCG_TARGET_HAS_orc_i64; 1810 case INDEX_op_eqv_i64: 1811 return TCG_TARGET_HAS_eqv_i64; 1812 case INDEX_op_nand_i64: 1813 return TCG_TARGET_HAS_nand_i64; 1814 case INDEX_op_nor_i64: 1815 return TCG_TARGET_HAS_nor_i64; 1816 case INDEX_op_clz_i64: 1817 return TCG_TARGET_HAS_clz_i64; 1818 case INDEX_op_ctz_i64: 1819 return TCG_TARGET_HAS_ctz_i64; 1820 case INDEX_op_ctpop_i64: 1821 return TCG_TARGET_HAS_ctpop_i64; 1822 case INDEX_op_add2_i64: 1823 return TCG_TARGET_HAS_add2_i64; 1824 case INDEX_op_sub2_i64: 1825 return TCG_TARGET_HAS_sub2_i64; 1826 case INDEX_op_mulu2_i64: 1827 return TCG_TARGET_HAS_mulu2_i64; 1828 case INDEX_op_muls2_i64: 1829 return TCG_TARGET_HAS_muls2_i64; 1830 case INDEX_op_muluh_i64: 1831 return TCG_TARGET_HAS_muluh_i64; 1832 case INDEX_op_mulsh_i64: 1833 return TCG_TARGET_HAS_mulsh_i64; 1834 1835 case INDEX_op_mov_vec: 1836 case INDEX_op_dup_vec: 1837 case INDEX_op_dupm_vec: 1838 case INDEX_op_ld_vec: 1839 case INDEX_op_st_vec: 1840 case INDEX_op_add_vec: 1841 case INDEX_op_sub_vec: 1842 case INDEX_op_and_vec: 1843 case INDEX_op_or_vec: 1844 case INDEX_op_xor_vec: 1845 case INDEX_op_cmp_vec: 1846 return have_vec; 1847 case INDEX_op_dup2_vec: 1848 return have_vec && TCG_TARGET_REG_BITS == 32; 1849 case INDEX_op_not_vec: 1850 return have_vec && TCG_TARGET_HAS_not_vec; 1851 case INDEX_op_neg_vec: 1852 return have_vec && TCG_TARGET_HAS_neg_vec; 1853 case INDEX_op_abs_vec: 1854 return have_vec && TCG_TARGET_HAS_abs_vec; 1855 case INDEX_op_andc_vec: 1856 return have_vec && TCG_TARGET_HAS_andc_vec; 1857 case INDEX_op_orc_vec: 1858 return have_vec && TCG_TARGET_HAS_orc_vec; 1859 case INDEX_op_nand_vec: 1860 return have_vec && TCG_TARGET_HAS_nand_vec; 1861 case INDEX_op_nor_vec: 1862 return have_vec && TCG_TARGET_HAS_nor_vec; 1863 case INDEX_op_eqv_vec: 1864 return have_vec && TCG_TARGET_HAS_eqv_vec; 1865 case INDEX_op_mul_vec: 1866 return have_vec && TCG_TARGET_HAS_mul_vec; 1867 case INDEX_op_shli_vec: 1868 case INDEX_op_shri_vec: 1869 case INDEX_op_sari_vec: 1870 return have_vec && TCG_TARGET_HAS_shi_vec; 1871 case INDEX_op_shls_vec: 1872 case INDEX_op_shrs_vec: 1873 case INDEX_op_sars_vec: 1874 return have_vec && TCG_TARGET_HAS_shs_vec; 1875 case INDEX_op_shlv_vec: 1876 case INDEX_op_shrv_vec: 1877 case INDEX_op_sarv_vec: 1878 return have_vec && TCG_TARGET_HAS_shv_vec; 1879 case INDEX_op_rotli_vec: 1880 return have_vec && TCG_TARGET_HAS_roti_vec; 1881 case INDEX_op_rotls_vec: 1882 return have_vec && TCG_TARGET_HAS_rots_vec; 1883 case INDEX_op_rotlv_vec: 1884 case INDEX_op_rotrv_vec: 1885 return have_vec && TCG_TARGET_HAS_rotv_vec; 1886 case INDEX_op_ssadd_vec: 1887 case INDEX_op_usadd_vec: 1888 case INDEX_op_sssub_vec: 1889 case INDEX_op_ussub_vec: 1890 return have_vec && TCG_TARGET_HAS_sat_vec; 1891 case INDEX_op_smin_vec: 1892 case INDEX_op_umin_vec: 1893 case INDEX_op_smax_vec: 1894 case INDEX_op_umax_vec: 1895 return have_vec && TCG_TARGET_HAS_minmax_vec; 1896 case INDEX_op_bitsel_vec: 1897 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1898 case INDEX_op_cmpsel_vec: 1899 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1900 1901 default: 1902 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1903 return true; 1904 } 1905 } 1906 1907 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 1908 1909 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1910 { 1911 const TCGHelperInfo *info; 1912 TCGv_i64 extend_free[MAX_CALL_IARGS]; 1913 int n_extend = 0; 1914 TCGOp *op; 1915 int i, n, pi = 0, total_args; 1916 1917 info = g_hash_table_lookup(helper_table, (gpointer)func); 1918 total_args = info->nr_out + info->nr_in + 2; 1919 op = tcg_op_alloc(INDEX_op_call, total_args); 1920 1921 #ifdef CONFIG_PLUGIN 1922 /* Flag helpers that may affect guest state */ 1923 if (tcg_ctx->plugin_insn && 1924 !(info->flags & TCG_CALL_PLUGIN) && 1925 !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1926 tcg_ctx->plugin_insn->calls_helpers = true; 1927 } 1928 #endif 1929 1930 TCGOP_CALLO(op) = n = info->nr_out; 1931 switch (n) { 1932 case 0: 1933 tcg_debug_assert(ret == NULL); 1934 break; 1935 case 1: 1936 tcg_debug_assert(ret != NULL); 1937 op->args[pi++] = temp_arg(ret); 1938 break; 1939 case 2: 1940 case 4: 1941 tcg_debug_assert(ret != NULL); 1942 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 1943 tcg_debug_assert(ret->temp_subindex == 0); 1944 for (i = 0; i < n; ++i) { 1945 op->args[pi++] = temp_arg(ret + i); 1946 } 1947 break; 1948 default: 1949 g_assert_not_reached(); 1950 } 1951 1952 TCGOP_CALLI(op) = n = info->nr_in; 1953 for (i = 0; i < n; i++) { 1954 const TCGCallArgumentLoc *loc = &info->in[i]; 1955 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 1956 1957 switch (loc->kind) { 1958 case TCG_CALL_ARG_NORMAL: 1959 case TCG_CALL_ARG_BY_REF: 1960 case TCG_CALL_ARG_BY_REF_N: 1961 op->args[pi++] = temp_arg(ts); 1962 break; 1963 1964 case TCG_CALL_ARG_EXTEND_U: 1965 case TCG_CALL_ARG_EXTEND_S: 1966 { 1967 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 1968 TCGv_i32 orig = temp_tcgv_i32(ts); 1969 1970 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 1971 tcg_gen_ext_i32_i64(temp, orig); 1972 } else { 1973 tcg_gen_extu_i32_i64(temp, orig); 1974 } 1975 op->args[pi++] = tcgv_i64_arg(temp); 1976 extend_free[n_extend++] = temp; 1977 } 1978 break; 1979 1980 default: 1981 g_assert_not_reached(); 1982 } 1983 } 1984 op->args[pi++] = (uintptr_t)func; 1985 op->args[pi++] = (uintptr_t)info; 1986 tcg_debug_assert(pi == total_args); 1987 1988 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 1989 1990 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 1991 for (i = 0; i < n_extend; ++i) { 1992 tcg_temp_free_i64(extend_free[i]); 1993 } 1994 } 1995 1996 static void tcg_reg_alloc_start(TCGContext *s) 1997 { 1998 int i, n; 1999 2000 for (i = 0, n = s->nb_temps; i < n; i++) { 2001 TCGTemp *ts = &s->temps[i]; 2002 TCGTempVal val = TEMP_VAL_MEM; 2003 2004 switch (ts->kind) { 2005 case TEMP_CONST: 2006 val = TEMP_VAL_CONST; 2007 break; 2008 case TEMP_FIXED: 2009 val = TEMP_VAL_REG; 2010 break; 2011 case TEMP_GLOBAL: 2012 break; 2013 case TEMP_EBB: 2014 val = TEMP_VAL_DEAD; 2015 /* fall through */ 2016 case TEMP_TB: 2017 ts->mem_allocated = 0; 2018 break; 2019 default: 2020 g_assert_not_reached(); 2021 } 2022 ts->val_type = val; 2023 } 2024 2025 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2026 } 2027 2028 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2029 TCGTemp *ts) 2030 { 2031 int idx = temp_idx(ts); 2032 2033 switch (ts->kind) { 2034 case TEMP_FIXED: 2035 case TEMP_GLOBAL: 2036 pstrcpy(buf, buf_size, ts->name); 2037 break; 2038 case TEMP_TB: 2039 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2040 break; 2041 case TEMP_EBB: 2042 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2043 break; 2044 case TEMP_CONST: 2045 switch (ts->type) { 2046 case TCG_TYPE_I32: 2047 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2048 break; 2049 #if TCG_TARGET_REG_BITS > 32 2050 case TCG_TYPE_I64: 2051 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2052 break; 2053 #endif 2054 case TCG_TYPE_V64: 2055 case TCG_TYPE_V128: 2056 case TCG_TYPE_V256: 2057 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2058 64 << (ts->type - TCG_TYPE_V64), ts->val); 2059 break; 2060 default: 2061 g_assert_not_reached(); 2062 } 2063 break; 2064 } 2065 return buf; 2066 } 2067 2068 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2069 int buf_size, TCGArg arg) 2070 { 2071 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2072 } 2073 2074 static const char * const cond_name[] = 2075 { 2076 [TCG_COND_NEVER] = "never", 2077 [TCG_COND_ALWAYS] = "always", 2078 [TCG_COND_EQ] = "eq", 2079 [TCG_COND_NE] = "ne", 2080 [TCG_COND_LT] = "lt", 2081 [TCG_COND_GE] = "ge", 2082 [TCG_COND_LE] = "le", 2083 [TCG_COND_GT] = "gt", 2084 [TCG_COND_LTU] = "ltu", 2085 [TCG_COND_GEU] = "geu", 2086 [TCG_COND_LEU] = "leu", 2087 [TCG_COND_GTU] = "gtu" 2088 }; 2089 2090 static const char * const ldst_name[] = 2091 { 2092 [MO_UB] = "ub", 2093 [MO_SB] = "sb", 2094 [MO_LEUW] = "leuw", 2095 [MO_LESW] = "lesw", 2096 [MO_LEUL] = "leul", 2097 [MO_LESL] = "lesl", 2098 [MO_LEUQ] = "leq", 2099 [MO_BEUW] = "beuw", 2100 [MO_BESW] = "besw", 2101 [MO_BEUL] = "beul", 2102 [MO_BESL] = "besl", 2103 [MO_BEUQ] = "beq", 2104 }; 2105 2106 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2107 #ifdef TARGET_ALIGNED_ONLY 2108 [MO_UNALN >> MO_ASHIFT] = "un+", 2109 [MO_ALIGN >> MO_ASHIFT] = "", 2110 #else 2111 [MO_UNALN >> MO_ASHIFT] = "", 2112 [MO_ALIGN >> MO_ASHIFT] = "al+", 2113 #endif 2114 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2115 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2116 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2117 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2118 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2119 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2120 }; 2121 2122 static const char bswap_flag_name[][6] = { 2123 [TCG_BSWAP_IZ] = "iz", 2124 [TCG_BSWAP_OZ] = "oz", 2125 [TCG_BSWAP_OS] = "os", 2126 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2127 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2128 }; 2129 2130 static inline bool tcg_regset_single(TCGRegSet d) 2131 { 2132 return (d & (d - 1)) == 0; 2133 } 2134 2135 static inline TCGReg tcg_regset_first(TCGRegSet d) 2136 { 2137 if (TCG_TARGET_NB_REGS <= 32) { 2138 return ctz32(d); 2139 } else { 2140 return ctz64(d); 2141 } 2142 } 2143 2144 /* Return only the number of characters output -- no error return. */ 2145 #define ne_fprintf(...) \ 2146 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2147 2148 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2149 { 2150 char buf[128]; 2151 TCGOp *op; 2152 2153 QTAILQ_FOREACH(op, &s->ops, link) { 2154 int i, k, nb_oargs, nb_iargs, nb_cargs; 2155 const TCGOpDef *def; 2156 TCGOpcode c; 2157 int col = 0; 2158 2159 c = op->opc; 2160 def = &tcg_op_defs[c]; 2161 2162 if (c == INDEX_op_insn_start) { 2163 nb_oargs = 0; 2164 col += ne_fprintf(f, "\n ----"); 2165 2166 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2167 target_ulong a; 2168 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2169 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 2170 #else 2171 a = op->args[i]; 2172 #endif 2173 col += ne_fprintf(f, " " TARGET_FMT_lx, a); 2174 } 2175 } else if (c == INDEX_op_call) { 2176 const TCGHelperInfo *info = tcg_call_info(op); 2177 void *func = tcg_call_func(op); 2178 2179 /* variable number of arguments */ 2180 nb_oargs = TCGOP_CALLO(op); 2181 nb_iargs = TCGOP_CALLI(op); 2182 nb_cargs = def->nb_cargs; 2183 2184 col += ne_fprintf(f, " %s ", def->name); 2185 2186 /* 2187 * Print the function name from TCGHelperInfo, if available. 2188 * Note that plugins have a template function for the info, 2189 * but the actual function pointer comes from the plugin. 2190 */ 2191 if (func == info->func) { 2192 col += ne_fprintf(f, "%s", info->name); 2193 } else { 2194 col += ne_fprintf(f, "plugin(%p)", func); 2195 } 2196 2197 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2198 for (i = 0; i < nb_oargs; i++) { 2199 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2200 op->args[i])); 2201 } 2202 for (i = 0; i < nb_iargs; i++) { 2203 TCGArg arg = op->args[nb_oargs + i]; 2204 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2205 col += ne_fprintf(f, ",%s", t); 2206 } 2207 } else { 2208 col += ne_fprintf(f, " %s ", def->name); 2209 2210 nb_oargs = def->nb_oargs; 2211 nb_iargs = def->nb_iargs; 2212 nb_cargs = def->nb_cargs; 2213 2214 if (def->flags & TCG_OPF_VECTOR) { 2215 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 2216 8 << TCGOP_VECE(op)); 2217 } 2218 2219 k = 0; 2220 for (i = 0; i < nb_oargs; i++) { 2221 const char *sep = k ? "," : ""; 2222 col += ne_fprintf(f, "%s%s", sep, 2223 tcg_get_arg_str(s, buf, sizeof(buf), 2224 op->args[k++])); 2225 } 2226 for (i = 0; i < nb_iargs; i++) { 2227 const char *sep = k ? "," : ""; 2228 col += ne_fprintf(f, "%s%s", sep, 2229 tcg_get_arg_str(s, buf, sizeof(buf), 2230 op->args[k++])); 2231 } 2232 switch (c) { 2233 case INDEX_op_brcond_i32: 2234 case INDEX_op_setcond_i32: 2235 case INDEX_op_movcond_i32: 2236 case INDEX_op_brcond2_i32: 2237 case INDEX_op_setcond2_i32: 2238 case INDEX_op_brcond_i64: 2239 case INDEX_op_setcond_i64: 2240 case INDEX_op_movcond_i64: 2241 case INDEX_op_cmp_vec: 2242 case INDEX_op_cmpsel_vec: 2243 if (op->args[k] < ARRAY_SIZE(cond_name) 2244 && cond_name[op->args[k]]) { 2245 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2246 } else { 2247 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2248 } 2249 i = 1; 2250 break; 2251 case INDEX_op_qemu_ld_i32: 2252 case INDEX_op_qemu_st_i32: 2253 case INDEX_op_qemu_st8_i32: 2254 case INDEX_op_qemu_ld_i64: 2255 case INDEX_op_qemu_st_i64: 2256 { 2257 MemOpIdx oi = op->args[k++]; 2258 MemOp op = get_memop(oi); 2259 unsigned ix = get_mmuidx(oi); 2260 2261 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2262 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 2263 } else { 2264 const char *s_al, *s_op; 2265 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2266 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2267 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix); 2268 } 2269 i = 1; 2270 } 2271 break; 2272 case INDEX_op_bswap16_i32: 2273 case INDEX_op_bswap16_i64: 2274 case INDEX_op_bswap32_i32: 2275 case INDEX_op_bswap32_i64: 2276 case INDEX_op_bswap64_i64: 2277 { 2278 TCGArg flags = op->args[k]; 2279 const char *name = NULL; 2280 2281 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2282 name = bswap_flag_name[flags]; 2283 } 2284 if (name) { 2285 col += ne_fprintf(f, ",%s", name); 2286 } else { 2287 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2288 } 2289 i = k = 1; 2290 } 2291 break; 2292 default: 2293 i = 0; 2294 break; 2295 } 2296 switch (c) { 2297 case INDEX_op_set_label: 2298 case INDEX_op_br: 2299 case INDEX_op_brcond_i32: 2300 case INDEX_op_brcond_i64: 2301 case INDEX_op_brcond2_i32: 2302 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2303 arg_label(op->args[k])->id); 2304 i++, k++; 2305 break; 2306 case INDEX_op_mb: 2307 { 2308 TCGBar membar = op->args[k]; 2309 const char *b_op, *m_op; 2310 2311 switch (membar & TCG_BAR_SC) { 2312 case 0: 2313 b_op = "none"; 2314 break; 2315 case TCG_BAR_LDAQ: 2316 b_op = "acq"; 2317 break; 2318 case TCG_BAR_STRL: 2319 b_op = "rel"; 2320 break; 2321 case TCG_BAR_SC: 2322 b_op = "seq"; 2323 break; 2324 default: 2325 g_assert_not_reached(); 2326 } 2327 2328 switch (membar & TCG_MO_ALL) { 2329 case 0: 2330 m_op = "none"; 2331 break; 2332 case TCG_MO_LD_LD: 2333 m_op = "rr"; 2334 break; 2335 case TCG_MO_LD_ST: 2336 m_op = "rw"; 2337 break; 2338 case TCG_MO_ST_LD: 2339 m_op = "wr"; 2340 break; 2341 case TCG_MO_ST_ST: 2342 m_op = "ww"; 2343 break; 2344 case TCG_MO_LD_LD | TCG_MO_LD_ST: 2345 m_op = "rr+rw"; 2346 break; 2347 case TCG_MO_LD_LD | TCG_MO_ST_LD: 2348 m_op = "rr+wr"; 2349 break; 2350 case TCG_MO_LD_LD | TCG_MO_ST_ST: 2351 m_op = "rr+ww"; 2352 break; 2353 case TCG_MO_LD_ST | TCG_MO_ST_LD: 2354 m_op = "rw+wr"; 2355 break; 2356 case TCG_MO_LD_ST | TCG_MO_ST_ST: 2357 m_op = "rw+ww"; 2358 break; 2359 case TCG_MO_ST_LD | TCG_MO_ST_ST: 2360 m_op = "wr+ww"; 2361 break; 2362 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 2363 m_op = "rr+rw+wr"; 2364 break; 2365 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 2366 m_op = "rr+rw+ww"; 2367 break; 2368 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 2369 m_op = "rr+wr+ww"; 2370 break; 2371 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 2372 m_op = "rw+wr+ww"; 2373 break; 2374 case TCG_MO_ALL: 2375 m_op = "all"; 2376 break; 2377 default: 2378 g_assert_not_reached(); 2379 } 2380 2381 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 2382 i++, k++; 2383 } 2384 break; 2385 default: 2386 break; 2387 } 2388 for (; i < nb_cargs; i++, k++) { 2389 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 2390 op->args[k]); 2391 } 2392 } 2393 2394 if (have_prefs || op->life) { 2395 for (; col < 40; ++col) { 2396 putc(' ', f); 2397 } 2398 } 2399 2400 if (op->life) { 2401 unsigned life = op->life; 2402 2403 if (life & (SYNC_ARG * 3)) { 2404 ne_fprintf(f, " sync:"); 2405 for (i = 0; i < 2; ++i) { 2406 if (life & (SYNC_ARG << i)) { 2407 ne_fprintf(f, " %d", i); 2408 } 2409 } 2410 } 2411 life /= DEAD_ARG; 2412 if (life) { 2413 ne_fprintf(f, " dead:"); 2414 for (i = 0; life; ++i, life >>= 1) { 2415 if (life & 1) { 2416 ne_fprintf(f, " %d", i); 2417 } 2418 } 2419 } 2420 } 2421 2422 if (have_prefs) { 2423 for (i = 0; i < nb_oargs; ++i) { 2424 TCGRegSet set = output_pref(op, i); 2425 2426 if (i == 0) { 2427 ne_fprintf(f, " pref="); 2428 } else { 2429 ne_fprintf(f, ","); 2430 } 2431 if (set == 0) { 2432 ne_fprintf(f, "none"); 2433 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2434 ne_fprintf(f, "all"); 2435 #ifdef CONFIG_DEBUG_TCG 2436 } else if (tcg_regset_single(set)) { 2437 TCGReg reg = tcg_regset_first(set); 2438 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 2439 #endif 2440 } else if (TCG_TARGET_NB_REGS <= 32) { 2441 ne_fprintf(f, "0x%x", (uint32_t)set); 2442 } else { 2443 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 2444 } 2445 } 2446 } 2447 2448 putc('\n', f); 2449 } 2450 } 2451 2452 /* we give more priority to constraints with less registers */ 2453 static int get_constraint_priority(const TCGOpDef *def, int k) 2454 { 2455 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2456 int n = ctpop64(arg_ct->regs); 2457 2458 /* 2459 * Sort constraints of a single register first, which includes output 2460 * aliases (which must exactly match the input already allocated). 2461 */ 2462 if (n == 1 || arg_ct->oalias) { 2463 return INT_MAX; 2464 } 2465 2466 /* 2467 * Sort register pairs next, first then second immediately after. 2468 * Arbitrarily sort multiple pairs by the index of the first reg; 2469 * there shouldn't be many pairs. 2470 */ 2471 switch (arg_ct->pair) { 2472 case 1: 2473 case 3: 2474 return (k + 1) * 2; 2475 case 2: 2476 return (arg_ct->pair_index + 1) * 2 - 1; 2477 } 2478 2479 /* Finally, sort by decreasing register count. */ 2480 assert(n > 1); 2481 return -n; 2482 } 2483 2484 /* sort from highest priority to lowest */ 2485 static void sort_constraints(TCGOpDef *def, int start, int n) 2486 { 2487 int i, j; 2488 TCGArgConstraint *a = def->args_ct; 2489 2490 for (i = 0; i < n; i++) { 2491 a[start + i].sort_index = start + i; 2492 } 2493 if (n <= 1) { 2494 return; 2495 } 2496 for (i = 0; i < n - 1; i++) { 2497 for (j = i + 1; j < n; j++) { 2498 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2499 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2500 if (p1 < p2) { 2501 int tmp = a[start + i].sort_index; 2502 a[start + i].sort_index = a[start + j].sort_index; 2503 a[start + j].sort_index = tmp; 2504 } 2505 } 2506 } 2507 } 2508 2509 static void process_op_defs(TCGContext *s) 2510 { 2511 TCGOpcode op; 2512 2513 for (op = 0; op < NB_OPS; op++) { 2514 TCGOpDef *def = &tcg_op_defs[op]; 2515 const TCGTargetOpDef *tdefs; 2516 bool saw_alias_pair = false; 2517 int i, o, i2, o2, nb_args; 2518 2519 if (def->flags & TCG_OPF_NOT_PRESENT) { 2520 continue; 2521 } 2522 2523 nb_args = def->nb_iargs + def->nb_oargs; 2524 if (nb_args == 0) { 2525 continue; 2526 } 2527 2528 /* 2529 * Macro magic should make it impossible, but double-check that 2530 * the array index is in range. Since the signness of an enum 2531 * is implementation defined, force the result to unsigned. 2532 */ 2533 unsigned con_set = tcg_target_op_def(op); 2534 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2535 tdefs = &constraint_sets[con_set]; 2536 2537 for (i = 0; i < nb_args; i++) { 2538 const char *ct_str = tdefs->args_ct_str[i]; 2539 bool input_p = i >= def->nb_oargs; 2540 2541 /* Incomplete TCGTargetOpDef entry. */ 2542 tcg_debug_assert(ct_str != NULL); 2543 2544 switch (*ct_str) { 2545 case '0' ... '9': 2546 o = *ct_str - '0'; 2547 tcg_debug_assert(input_p); 2548 tcg_debug_assert(o < def->nb_oargs); 2549 tcg_debug_assert(def->args_ct[o].regs != 0); 2550 tcg_debug_assert(!def->args_ct[o].oalias); 2551 def->args_ct[i] = def->args_ct[o]; 2552 /* The output sets oalias. */ 2553 def->args_ct[o].oalias = 1; 2554 def->args_ct[o].alias_index = i; 2555 /* The input sets ialias. */ 2556 def->args_ct[i].ialias = 1; 2557 def->args_ct[i].alias_index = o; 2558 if (def->args_ct[i].pair) { 2559 saw_alias_pair = true; 2560 } 2561 tcg_debug_assert(ct_str[1] == '\0'); 2562 continue; 2563 2564 case '&': 2565 tcg_debug_assert(!input_p); 2566 def->args_ct[i].newreg = true; 2567 ct_str++; 2568 break; 2569 2570 case 'p': /* plus */ 2571 /* Allocate to the register after the previous. */ 2572 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2573 o = i - 1; 2574 tcg_debug_assert(!def->args_ct[o].pair); 2575 tcg_debug_assert(!def->args_ct[o].ct); 2576 def->args_ct[i] = (TCGArgConstraint){ 2577 .pair = 2, 2578 .pair_index = o, 2579 .regs = def->args_ct[o].regs << 1, 2580 }; 2581 def->args_ct[o].pair = 1; 2582 def->args_ct[o].pair_index = i; 2583 tcg_debug_assert(ct_str[1] == '\0'); 2584 continue; 2585 2586 case 'm': /* minus */ 2587 /* Allocate to the register before the previous. */ 2588 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2589 o = i - 1; 2590 tcg_debug_assert(!def->args_ct[o].pair); 2591 tcg_debug_assert(!def->args_ct[o].ct); 2592 def->args_ct[i] = (TCGArgConstraint){ 2593 .pair = 1, 2594 .pair_index = o, 2595 .regs = def->args_ct[o].regs >> 1, 2596 }; 2597 def->args_ct[o].pair = 2; 2598 def->args_ct[o].pair_index = i; 2599 tcg_debug_assert(ct_str[1] == '\0'); 2600 continue; 2601 } 2602 2603 do { 2604 switch (*ct_str) { 2605 case 'i': 2606 def->args_ct[i].ct |= TCG_CT_CONST; 2607 break; 2608 2609 /* Include all of the target-specific constraints. */ 2610 2611 #undef CONST 2612 #define CONST(CASE, MASK) \ 2613 case CASE: def->args_ct[i].ct |= MASK; break; 2614 #define REGS(CASE, MASK) \ 2615 case CASE: def->args_ct[i].regs |= MASK; break; 2616 2617 #include "tcg-target-con-str.h" 2618 2619 #undef REGS 2620 #undef CONST 2621 default: 2622 case '0' ... '9': 2623 case '&': 2624 case 'p': 2625 case 'm': 2626 /* Typo in TCGTargetOpDef constraint. */ 2627 g_assert_not_reached(); 2628 } 2629 } while (*++ct_str != '\0'); 2630 } 2631 2632 /* TCGTargetOpDef entry with too much information? */ 2633 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2634 2635 /* 2636 * Fix up output pairs that are aliased with inputs. 2637 * When we created the alias, we copied pair from the output. 2638 * There are three cases: 2639 * (1a) Pairs of inputs alias pairs of outputs. 2640 * (1b) One input aliases the first of a pair of outputs. 2641 * (2) One input aliases the second of a pair of outputs. 2642 * 2643 * Case 1a is handled by making sure that the pair_index'es are 2644 * properly updated so that they appear the same as a pair of inputs. 2645 * 2646 * Case 1b is handled by setting the pair_index of the input to 2647 * itself, simply so it doesn't point to an unrelated argument. 2648 * Since we don't encounter the "second" during the input allocation 2649 * phase, nothing happens with the second half of the input pair. 2650 * 2651 * Case 2 is handled by setting the second input to pair=3, the 2652 * first output to pair=3, and the pair_index'es to match. 2653 */ 2654 if (saw_alias_pair) { 2655 for (i = def->nb_oargs; i < nb_args; i++) { 2656 /* 2657 * Since [0-9pm] must be alone in the constraint string, 2658 * the only way they can both be set is if the pair comes 2659 * from the output alias. 2660 */ 2661 if (!def->args_ct[i].ialias) { 2662 continue; 2663 } 2664 switch (def->args_ct[i].pair) { 2665 case 0: 2666 break; 2667 case 1: 2668 o = def->args_ct[i].alias_index; 2669 o2 = def->args_ct[o].pair_index; 2670 tcg_debug_assert(def->args_ct[o].pair == 1); 2671 tcg_debug_assert(def->args_ct[o2].pair == 2); 2672 if (def->args_ct[o2].oalias) { 2673 /* Case 1a */ 2674 i2 = def->args_ct[o2].alias_index; 2675 tcg_debug_assert(def->args_ct[i2].pair == 2); 2676 def->args_ct[i2].pair_index = i; 2677 def->args_ct[i].pair_index = i2; 2678 } else { 2679 /* Case 1b */ 2680 def->args_ct[i].pair_index = i; 2681 } 2682 break; 2683 case 2: 2684 o = def->args_ct[i].alias_index; 2685 o2 = def->args_ct[o].pair_index; 2686 tcg_debug_assert(def->args_ct[o].pair == 2); 2687 tcg_debug_assert(def->args_ct[o2].pair == 1); 2688 if (def->args_ct[o2].oalias) { 2689 /* Case 1a */ 2690 i2 = def->args_ct[o2].alias_index; 2691 tcg_debug_assert(def->args_ct[i2].pair == 1); 2692 def->args_ct[i2].pair_index = i; 2693 def->args_ct[i].pair_index = i2; 2694 } else { 2695 /* Case 2 */ 2696 def->args_ct[i].pair = 3; 2697 def->args_ct[o2].pair = 3; 2698 def->args_ct[i].pair_index = o2; 2699 def->args_ct[o2].pair_index = i; 2700 } 2701 break; 2702 default: 2703 g_assert_not_reached(); 2704 } 2705 } 2706 } 2707 2708 /* sort the constraints (XXX: this is just an heuristic) */ 2709 sort_constraints(def, 0, def->nb_oargs); 2710 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2711 } 2712 } 2713 2714 static void remove_label_use(TCGOp *op, int idx) 2715 { 2716 TCGLabel *label = arg_label(op->args[idx]); 2717 TCGLabelUse *use; 2718 2719 QSIMPLEQ_FOREACH(use, &label->branches, next) { 2720 if (use->op == op) { 2721 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 2722 return; 2723 } 2724 } 2725 g_assert_not_reached(); 2726 } 2727 2728 void tcg_op_remove(TCGContext *s, TCGOp *op) 2729 { 2730 switch (op->opc) { 2731 case INDEX_op_br: 2732 remove_label_use(op, 0); 2733 break; 2734 case INDEX_op_brcond_i32: 2735 case INDEX_op_brcond_i64: 2736 remove_label_use(op, 3); 2737 break; 2738 case INDEX_op_brcond2_i32: 2739 remove_label_use(op, 5); 2740 break; 2741 default: 2742 break; 2743 } 2744 2745 QTAILQ_REMOVE(&s->ops, op, link); 2746 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2747 s->nb_ops--; 2748 2749 #ifdef CONFIG_PROFILER 2750 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2751 #endif 2752 } 2753 2754 void tcg_remove_ops_after(TCGOp *op) 2755 { 2756 TCGContext *s = tcg_ctx; 2757 2758 while (true) { 2759 TCGOp *last = tcg_last_op(); 2760 if (last == op) { 2761 return; 2762 } 2763 tcg_op_remove(s, last); 2764 } 2765 } 2766 2767 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 2768 { 2769 TCGContext *s = tcg_ctx; 2770 TCGOp *op = NULL; 2771 2772 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 2773 QTAILQ_FOREACH(op, &s->free_ops, link) { 2774 if (nargs <= op->nargs) { 2775 QTAILQ_REMOVE(&s->free_ops, op, link); 2776 nargs = op->nargs; 2777 goto found; 2778 } 2779 } 2780 } 2781 2782 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 2783 nargs = MAX(4, nargs); 2784 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 2785 2786 found: 2787 memset(op, 0, offsetof(TCGOp, link)); 2788 op->opc = opc; 2789 op->nargs = nargs; 2790 2791 /* Check for bitfield overflow. */ 2792 tcg_debug_assert(op->nargs == nargs); 2793 2794 s->nb_ops++; 2795 return op; 2796 } 2797 2798 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 2799 { 2800 TCGOp *op = tcg_op_alloc(opc, nargs); 2801 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2802 return op; 2803 } 2804 2805 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 2806 TCGOpcode opc, unsigned nargs) 2807 { 2808 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2809 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2810 return new_op; 2811 } 2812 2813 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 2814 TCGOpcode opc, unsigned nargs) 2815 { 2816 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2817 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2818 return new_op; 2819 } 2820 2821 static void move_label_uses(TCGLabel *to, TCGLabel *from) 2822 { 2823 TCGLabelUse *u; 2824 2825 QSIMPLEQ_FOREACH(u, &from->branches, next) { 2826 TCGOp *op = u->op; 2827 switch (op->opc) { 2828 case INDEX_op_br: 2829 op->args[0] = label_arg(to); 2830 break; 2831 case INDEX_op_brcond_i32: 2832 case INDEX_op_brcond_i64: 2833 op->args[3] = label_arg(to); 2834 break; 2835 case INDEX_op_brcond2_i32: 2836 op->args[5] = label_arg(to); 2837 break; 2838 default: 2839 g_assert_not_reached(); 2840 } 2841 } 2842 2843 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 2844 } 2845 2846 /* Reachable analysis : remove unreachable code. */ 2847 static void __attribute__((noinline)) 2848 reachable_code_pass(TCGContext *s) 2849 { 2850 TCGOp *op, *op_next, *op_prev; 2851 bool dead = false; 2852 2853 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2854 bool remove = dead; 2855 TCGLabel *label; 2856 2857 switch (op->opc) { 2858 case INDEX_op_set_label: 2859 label = arg_label(op->args[0]); 2860 2861 /* 2862 * Note that the first op in the TB is always a load, 2863 * so there is always something before a label. 2864 */ 2865 op_prev = QTAILQ_PREV(op, link); 2866 2867 /* 2868 * If we find two sequential labels, move all branches to 2869 * reference the second label and remove the first label. 2870 * Do this before branch to next optimization, so that the 2871 * middle label is out of the way. 2872 */ 2873 if (op_prev->opc == INDEX_op_set_label) { 2874 move_label_uses(label, arg_label(op_prev->args[0])); 2875 tcg_op_remove(s, op_prev); 2876 op_prev = QTAILQ_PREV(op, link); 2877 } 2878 2879 /* 2880 * Optimization can fold conditional branches to unconditional. 2881 * If we find a label which is preceded by an unconditional 2882 * branch to next, remove the branch. We couldn't do this when 2883 * processing the branch because any dead code between the branch 2884 * and label had not yet been removed. 2885 */ 2886 if (op_prev->opc == INDEX_op_br && 2887 label == arg_label(op_prev->args[0])) { 2888 tcg_op_remove(s, op_prev); 2889 /* Fall through means insns become live again. */ 2890 dead = false; 2891 } 2892 2893 if (QSIMPLEQ_EMPTY(&label->branches)) { 2894 /* 2895 * While there is an occasional backward branch, virtually 2896 * all branches generated by the translators are forward. 2897 * Which means that generally we will have already removed 2898 * all references to the label that will be, and there is 2899 * little to be gained by iterating. 2900 */ 2901 remove = true; 2902 } else { 2903 /* Once we see a label, insns become live again. */ 2904 dead = false; 2905 remove = false; 2906 } 2907 break; 2908 2909 case INDEX_op_br: 2910 case INDEX_op_exit_tb: 2911 case INDEX_op_goto_ptr: 2912 /* Unconditional branches; everything following is dead. */ 2913 dead = true; 2914 break; 2915 2916 case INDEX_op_call: 2917 /* Notice noreturn helper calls, raising exceptions. */ 2918 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 2919 dead = true; 2920 } 2921 break; 2922 2923 case INDEX_op_insn_start: 2924 /* Never remove -- we need to keep these for unwind. */ 2925 remove = false; 2926 break; 2927 2928 default: 2929 break; 2930 } 2931 2932 if (remove) { 2933 tcg_op_remove(s, op); 2934 } 2935 } 2936 } 2937 2938 #define TS_DEAD 1 2939 #define TS_MEM 2 2940 2941 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2942 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2943 2944 /* For liveness_pass_1, the register preferences for a given temp. */ 2945 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2946 { 2947 return ts->state_ptr; 2948 } 2949 2950 /* For liveness_pass_1, reset the preferences for a given temp to the 2951 * maximal regset for its type. 2952 */ 2953 static inline void la_reset_pref(TCGTemp *ts) 2954 { 2955 *la_temp_pref(ts) 2956 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2957 } 2958 2959 /* liveness analysis: end of function: all temps are dead, and globals 2960 should be in memory. */ 2961 static void la_func_end(TCGContext *s, int ng, int nt) 2962 { 2963 int i; 2964 2965 for (i = 0; i < ng; ++i) { 2966 s->temps[i].state = TS_DEAD | TS_MEM; 2967 la_reset_pref(&s->temps[i]); 2968 } 2969 for (i = ng; i < nt; ++i) { 2970 s->temps[i].state = TS_DEAD; 2971 la_reset_pref(&s->temps[i]); 2972 } 2973 } 2974 2975 /* liveness analysis: end of basic block: all temps are dead, globals 2976 and local temps should be in memory. */ 2977 static void la_bb_end(TCGContext *s, int ng, int nt) 2978 { 2979 int i; 2980 2981 for (i = 0; i < nt; ++i) { 2982 TCGTemp *ts = &s->temps[i]; 2983 int state; 2984 2985 switch (ts->kind) { 2986 case TEMP_FIXED: 2987 case TEMP_GLOBAL: 2988 case TEMP_TB: 2989 state = TS_DEAD | TS_MEM; 2990 break; 2991 case TEMP_EBB: 2992 case TEMP_CONST: 2993 state = TS_DEAD; 2994 break; 2995 default: 2996 g_assert_not_reached(); 2997 } 2998 ts->state = state; 2999 la_reset_pref(ts); 3000 } 3001 } 3002 3003 /* liveness analysis: sync globals back to memory. */ 3004 static void la_global_sync(TCGContext *s, int ng) 3005 { 3006 int i; 3007 3008 for (i = 0; i < ng; ++i) { 3009 int state = s->temps[i].state; 3010 s->temps[i].state = state | TS_MEM; 3011 if (state == TS_DEAD) { 3012 /* If the global was previously dead, reset prefs. */ 3013 la_reset_pref(&s->temps[i]); 3014 } 3015 } 3016 } 3017 3018 /* 3019 * liveness analysis: conditional branch: all temps are dead unless 3020 * explicitly live-across-conditional-branch, globals and local temps 3021 * should be synced. 3022 */ 3023 static void la_bb_sync(TCGContext *s, int ng, int nt) 3024 { 3025 la_global_sync(s, ng); 3026 3027 for (int i = ng; i < nt; ++i) { 3028 TCGTemp *ts = &s->temps[i]; 3029 int state; 3030 3031 switch (ts->kind) { 3032 case TEMP_TB: 3033 state = ts->state; 3034 ts->state = state | TS_MEM; 3035 if (state != TS_DEAD) { 3036 continue; 3037 } 3038 break; 3039 case TEMP_EBB: 3040 case TEMP_CONST: 3041 continue; 3042 default: 3043 g_assert_not_reached(); 3044 } 3045 la_reset_pref(&s->temps[i]); 3046 } 3047 } 3048 3049 /* liveness analysis: sync globals back to memory and kill. */ 3050 static void la_global_kill(TCGContext *s, int ng) 3051 { 3052 int i; 3053 3054 for (i = 0; i < ng; i++) { 3055 s->temps[i].state = TS_DEAD | TS_MEM; 3056 la_reset_pref(&s->temps[i]); 3057 } 3058 } 3059 3060 /* liveness analysis: note live globals crossing calls. */ 3061 static void la_cross_call(TCGContext *s, int nt) 3062 { 3063 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3064 int i; 3065 3066 for (i = 0; i < nt; i++) { 3067 TCGTemp *ts = &s->temps[i]; 3068 if (!(ts->state & TS_DEAD)) { 3069 TCGRegSet *pset = la_temp_pref(ts); 3070 TCGRegSet set = *pset; 3071 3072 set &= mask; 3073 /* If the combination is not possible, restart. */ 3074 if (set == 0) { 3075 set = tcg_target_available_regs[ts->type] & mask; 3076 } 3077 *pset = set; 3078 } 3079 } 3080 } 3081 3082 /* 3083 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3084 * to TEMP_EBB, if possible. 3085 */ 3086 static void __attribute__((noinline)) 3087 liveness_pass_0(TCGContext *s) 3088 { 3089 void * const multiple_ebb = (void *)(uintptr_t)-1; 3090 int nb_temps = s->nb_temps; 3091 TCGOp *op, *ebb; 3092 3093 for (int i = s->nb_globals; i < nb_temps; ++i) { 3094 s->temps[i].state_ptr = NULL; 3095 } 3096 3097 /* 3098 * Represent each EBB by the op at which it begins. In the case of 3099 * the first EBB, this is the first op, otherwise it is a label. 3100 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3101 * within a single EBB, else MULTIPLE_EBB. 3102 */ 3103 ebb = QTAILQ_FIRST(&s->ops); 3104 QTAILQ_FOREACH(op, &s->ops, link) { 3105 const TCGOpDef *def; 3106 int nb_oargs, nb_iargs; 3107 3108 switch (op->opc) { 3109 case INDEX_op_set_label: 3110 ebb = op; 3111 continue; 3112 case INDEX_op_discard: 3113 continue; 3114 case INDEX_op_call: 3115 nb_oargs = TCGOP_CALLO(op); 3116 nb_iargs = TCGOP_CALLI(op); 3117 break; 3118 default: 3119 def = &tcg_op_defs[op->opc]; 3120 nb_oargs = def->nb_oargs; 3121 nb_iargs = def->nb_iargs; 3122 break; 3123 } 3124 3125 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3126 TCGTemp *ts = arg_temp(op->args[i]); 3127 3128 if (ts->kind != TEMP_TB) { 3129 continue; 3130 } 3131 if (ts->state_ptr == NULL) { 3132 ts->state_ptr = ebb; 3133 } else if (ts->state_ptr != ebb) { 3134 ts->state_ptr = multiple_ebb; 3135 } 3136 } 3137 } 3138 3139 /* 3140 * For TEMP_TB that turned out not to be used beyond one EBB, 3141 * reduce the liveness to TEMP_EBB. 3142 */ 3143 for (int i = s->nb_globals; i < nb_temps; ++i) { 3144 TCGTemp *ts = &s->temps[i]; 3145 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3146 ts->kind = TEMP_EBB; 3147 } 3148 } 3149 } 3150 3151 /* Liveness analysis : update the opc_arg_life array to tell if a 3152 given input arguments is dead. Instructions updating dead 3153 temporaries are removed. */ 3154 static void __attribute__((noinline)) 3155 liveness_pass_1(TCGContext *s) 3156 { 3157 int nb_globals = s->nb_globals; 3158 int nb_temps = s->nb_temps; 3159 TCGOp *op, *op_prev; 3160 TCGRegSet *prefs; 3161 int i; 3162 3163 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3164 for (i = 0; i < nb_temps; ++i) { 3165 s->temps[i].state_ptr = prefs + i; 3166 } 3167 3168 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3169 la_func_end(s, nb_globals, nb_temps); 3170 3171 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3172 int nb_iargs, nb_oargs; 3173 TCGOpcode opc_new, opc_new2; 3174 bool have_opc_new2; 3175 TCGLifeData arg_life = 0; 3176 TCGTemp *ts; 3177 TCGOpcode opc = op->opc; 3178 const TCGOpDef *def = &tcg_op_defs[opc]; 3179 3180 switch (opc) { 3181 case INDEX_op_call: 3182 { 3183 const TCGHelperInfo *info = tcg_call_info(op); 3184 int call_flags = tcg_call_flags(op); 3185 3186 nb_oargs = TCGOP_CALLO(op); 3187 nb_iargs = TCGOP_CALLI(op); 3188 3189 /* pure functions can be removed if their result is unused */ 3190 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3191 for (i = 0; i < nb_oargs; i++) { 3192 ts = arg_temp(op->args[i]); 3193 if (ts->state != TS_DEAD) { 3194 goto do_not_remove_call; 3195 } 3196 } 3197 goto do_remove; 3198 } 3199 do_not_remove_call: 3200 3201 /* Output args are dead. */ 3202 for (i = 0; i < nb_oargs; i++) { 3203 ts = arg_temp(op->args[i]); 3204 if (ts->state & TS_DEAD) { 3205 arg_life |= DEAD_ARG << i; 3206 } 3207 if (ts->state & TS_MEM) { 3208 arg_life |= SYNC_ARG << i; 3209 } 3210 ts->state = TS_DEAD; 3211 la_reset_pref(ts); 3212 } 3213 3214 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3215 memset(op->output_pref, 0, sizeof(op->output_pref)); 3216 3217 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3218 TCG_CALL_NO_READ_GLOBALS))) { 3219 la_global_kill(s, nb_globals); 3220 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3221 la_global_sync(s, nb_globals); 3222 } 3223 3224 /* Record arguments that die in this helper. */ 3225 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3226 ts = arg_temp(op->args[i]); 3227 if (ts->state & TS_DEAD) { 3228 arg_life |= DEAD_ARG << i; 3229 } 3230 } 3231 3232 /* For all live registers, remove call-clobbered prefs. */ 3233 la_cross_call(s, nb_temps); 3234 3235 /* 3236 * Input arguments are live for preceding opcodes. 3237 * 3238 * For those arguments that die, and will be allocated in 3239 * registers, clear the register set for that arg, to be 3240 * filled in below. For args that will be on the stack, 3241 * reset to any available reg. Process arguments in reverse 3242 * order so that if a temp is used more than once, the stack 3243 * reset to max happens before the register reset to 0. 3244 */ 3245 for (i = nb_iargs - 1; i >= 0; i--) { 3246 const TCGCallArgumentLoc *loc = &info->in[i]; 3247 ts = arg_temp(op->args[nb_oargs + i]); 3248 3249 if (ts->state & TS_DEAD) { 3250 switch (loc->kind) { 3251 case TCG_CALL_ARG_NORMAL: 3252 case TCG_CALL_ARG_EXTEND_U: 3253 case TCG_CALL_ARG_EXTEND_S: 3254 if (arg_slot_reg_p(loc->arg_slot)) { 3255 *la_temp_pref(ts) = 0; 3256 break; 3257 } 3258 /* fall through */ 3259 default: 3260 *la_temp_pref(ts) = 3261 tcg_target_available_regs[ts->type]; 3262 break; 3263 } 3264 ts->state &= ~TS_DEAD; 3265 } 3266 } 3267 3268 /* 3269 * For each input argument, add its input register to prefs. 3270 * If a temp is used once, this produces a single set bit; 3271 * if a temp is used multiple times, this produces a set. 3272 */ 3273 for (i = 0; i < nb_iargs; i++) { 3274 const TCGCallArgumentLoc *loc = &info->in[i]; 3275 ts = arg_temp(op->args[nb_oargs + i]); 3276 3277 switch (loc->kind) { 3278 case TCG_CALL_ARG_NORMAL: 3279 case TCG_CALL_ARG_EXTEND_U: 3280 case TCG_CALL_ARG_EXTEND_S: 3281 if (arg_slot_reg_p(loc->arg_slot)) { 3282 tcg_regset_set_reg(*la_temp_pref(ts), 3283 tcg_target_call_iarg_regs[loc->arg_slot]); 3284 } 3285 break; 3286 default: 3287 break; 3288 } 3289 } 3290 } 3291 break; 3292 case INDEX_op_insn_start: 3293 break; 3294 case INDEX_op_discard: 3295 /* mark the temporary as dead */ 3296 ts = arg_temp(op->args[0]); 3297 ts->state = TS_DEAD; 3298 la_reset_pref(ts); 3299 break; 3300 3301 case INDEX_op_add2_i32: 3302 opc_new = INDEX_op_add_i32; 3303 goto do_addsub2; 3304 case INDEX_op_sub2_i32: 3305 opc_new = INDEX_op_sub_i32; 3306 goto do_addsub2; 3307 case INDEX_op_add2_i64: 3308 opc_new = INDEX_op_add_i64; 3309 goto do_addsub2; 3310 case INDEX_op_sub2_i64: 3311 opc_new = INDEX_op_sub_i64; 3312 do_addsub2: 3313 nb_iargs = 4; 3314 nb_oargs = 2; 3315 /* Test if the high part of the operation is dead, but not 3316 the low part. The result can be optimized to a simple 3317 add or sub. This happens often for x86_64 guest when the 3318 cpu mode is set to 32 bit. */ 3319 if (arg_temp(op->args[1])->state == TS_DEAD) { 3320 if (arg_temp(op->args[0])->state == TS_DEAD) { 3321 goto do_remove; 3322 } 3323 /* Replace the opcode and adjust the args in place, 3324 leaving 3 unused args at the end. */ 3325 op->opc = opc = opc_new; 3326 op->args[1] = op->args[2]; 3327 op->args[2] = op->args[4]; 3328 /* Fall through and mark the single-word operation live. */ 3329 nb_iargs = 2; 3330 nb_oargs = 1; 3331 } 3332 goto do_not_remove; 3333 3334 case INDEX_op_mulu2_i32: 3335 opc_new = INDEX_op_mul_i32; 3336 opc_new2 = INDEX_op_muluh_i32; 3337 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 3338 goto do_mul2; 3339 case INDEX_op_muls2_i32: 3340 opc_new = INDEX_op_mul_i32; 3341 opc_new2 = INDEX_op_mulsh_i32; 3342 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 3343 goto do_mul2; 3344 case INDEX_op_mulu2_i64: 3345 opc_new = INDEX_op_mul_i64; 3346 opc_new2 = INDEX_op_muluh_i64; 3347 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 3348 goto do_mul2; 3349 case INDEX_op_muls2_i64: 3350 opc_new = INDEX_op_mul_i64; 3351 opc_new2 = INDEX_op_mulsh_i64; 3352 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 3353 goto do_mul2; 3354 do_mul2: 3355 nb_iargs = 2; 3356 nb_oargs = 2; 3357 if (arg_temp(op->args[1])->state == TS_DEAD) { 3358 if (arg_temp(op->args[0])->state == TS_DEAD) { 3359 /* Both parts of the operation are dead. */ 3360 goto do_remove; 3361 } 3362 /* The high part of the operation is dead; generate the low. */ 3363 op->opc = opc = opc_new; 3364 op->args[1] = op->args[2]; 3365 op->args[2] = op->args[3]; 3366 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 3367 /* The low part of the operation is dead; generate the high. */ 3368 op->opc = opc = opc_new2; 3369 op->args[0] = op->args[1]; 3370 op->args[1] = op->args[2]; 3371 op->args[2] = op->args[3]; 3372 } else { 3373 goto do_not_remove; 3374 } 3375 /* Mark the single-word operation live. */ 3376 nb_oargs = 1; 3377 goto do_not_remove; 3378 3379 default: 3380 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 3381 nb_iargs = def->nb_iargs; 3382 nb_oargs = def->nb_oargs; 3383 3384 /* Test if the operation can be removed because all 3385 its outputs are dead. We assume that nb_oargs == 0 3386 implies side effects */ 3387 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 3388 for (i = 0; i < nb_oargs; i++) { 3389 if (arg_temp(op->args[i])->state != TS_DEAD) { 3390 goto do_not_remove; 3391 } 3392 } 3393 goto do_remove; 3394 } 3395 goto do_not_remove; 3396 3397 do_remove: 3398 tcg_op_remove(s, op); 3399 break; 3400 3401 do_not_remove: 3402 for (i = 0; i < nb_oargs; i++) { 3403 ts = arg_temp(op->args[i]); 3404 3405 /* Remember the preference of the uses that followed. */ 3406 if (i < ARRAY_SIZE(op->output_pref)) { 3407 op->output_pref[i] = *la_temp_pref(ts); 3408 } 3409 3410 /* Output args are dead. */ 3411 if (ts->state & TS_DEAD) { 3412 arg_life |= DEAD_ARG << i; 3413 } 3414 if (ts->state & TS_MEM) { 3415 arg_life |= SYNC_ARG << i; 3416 } 3417 ts->state = TS_DEAD; 3418 la_reset_pref(ts); 3419 } 3420 3421 /* If end of basic block, update. */ 3422 if (def->flags & TCG_OPF_BB_EXIT) { 3423 la_func_end(s, nb_globals, nb_temps); 3424 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3425 la_bb_sync(s, nb_globals, nb_temps); 3426 } else if (def->flags & TCG_OPF_BB_END) { 3427 la_bb_end(s, nb_globals, nb_temps); 3428 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3429 la_global_sync(s, nb_globals); 3430 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3431 la_cross_call(s, nb_temps); 3432 } 3433 } 3434 3435 /* Record arguments that die in this opcode. */ 3436 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3437 ts = arg_temp(op->args[i]); 3438 if (ts->state & TS_DEAD) { 3439 arg_life |= DEAD_ARG << i; 3440 } 3441 } 3442 3443 /* Input arguments are live for preceding opcodes. */ 3444 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3445 ts = arg_temp(op->args[i]); 3446 if (ts->state & TS_DEAD) { 3447 /* For operands that were dead, initially allow 3448 all regs for the type. */ 3449 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3450 ts->state &= ~TS_DEAD; 3451 } 3452 } 3453 3454 /* Incorporate constraints for this operand. */ 3455 switch (opc) { 3456 case INDEX_op_mov_i32: 3457 case INDEX_op_mov_i64: 3458 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3459 have proper constraints. That said, special case 3460 moves to propagate preferences backward. */ 3461 if (IS_DEAD_ARG(1)) { 3462 *la_temp_pref(arg_temp(op->args[0])) 3463 = *la_temp_pref(arg_temp(op->args[1])); 3464 } 3465 break; 3466 3467 default: 3468 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3469 const TCGArgConstraint *ct = &def->args_ct[i]; 3470 TCGRegSet set, *pset; 3471 3472 ts = arg_temp(op->args[i]); 3473 pset = la_temp_pref(ts); 3474 set = *pset; 3475 3476 set &= ct->regs; 3477 if (ct->ialias) { 3478 set &= output_pref(op, ct->alias_index); 3479 } 3480 /* If the combination is not possible, restart. */ 3481 if (set == 0) { 3482 set = ct->regs; 3483 } 3484 *pset = set; 3485 } 3486 break; 3487 } 3488 break; 3489 } 3490 op->life = arg_life; 3491 } 3492 } 3493 3494 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3495 static bool __attribute__((noinline)) 3496 liveness_pass_2(TCGContext *s) 3497 { 3498 int nb_globals = s->nb_globals; 3499 int nb_temps, i; 3500 bool changes = false; 3501 TCGOp *op, *op_next; 3502 3503 /* Create a temporary for each indirect global. */ 3504 for (i = 0; i < nb_globals; ++i) { 3505 TCGTemp *its = &s->temps[i]; 3506 if (its->indirect_reg) { 3507 TCGTemp *dts = tcg_temp_alloc(s); 3508 dts->type = its->type; 3509 dts->base_type = its->base_type; 3510 dts->temp_subindex = its->temp_subindex; 3511 dts->kind = TEMP_EBB; 3512 its->state_ptr = dts; 3513 } else { 3514 its->state_ptr = NULL; 3515 } 3516 /* All globals begin dead. */ 3517 its->state = TS_DEAD; 3518 } 3519 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3520 TCGTemp *its = &s->temps[i]; 3521 its->state_ptr = NULL; 3522 its->state = TS_DEAD; 3523 } 3524 3525 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3526 TCGOpcode opc = op->opc; 3527 const TCGOpDef *def = &tcg_op_defs[opc]; 3528 TCGLifeData arg_life = op->life; 3529 int nb_iargs, nb_oargs, call_flags; 3530 TCGTemp *arg_ts, *dir_ts; 3531 3532 if (opc == INDEX_op_call) { 3533 nb_oargs = TCGOP_CALLO(op); 3534 nb_iargs = TCGOP_CALLI(op); 3535 call_flags = tcg_call_flags(op); 3536 } else { 3537 nb_iargs = def->nb_iargs; 3538 nb_oargs = def->nb_oargs; 3539 3540 /* Set flags similar to how calls require. */ 3541 if (def->flags & TCG_OPF_COND_BRANCH) { 3542 /* Like reading globals: sync_globals */ 3543 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3544 } else if (def->flags & TCG_OPF_BB_END) { 3545 /* Like writing globals: save_globals */ 3546 call_flags = 0; 3547 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3548 /* Like reading globals: sync_globals */ 3549 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3550 } else { 3551 /* No effect on globals. */ 3552 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3553 TCG_CALL_NO_WRITE_GLOBALS); 3554 } 3555 } 3556 3557 /* Make sure that input arguments are available. */ 3558 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3559 arg_ts = arg_temp(op->args[i]); 3560 dir_ts = arg_ts->state_ptr; 3561 if (dir_ts && arg_ts->state == TS_DEAD) { 3562 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3563 ? INDEX_op_ld_i32 3564 : INDEX_op_ld_i64); 3565 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 3566 3567 lop->args[0] = temp_arg(dir_ts); 3568 lop->args[1] = temp_arg(arg_ts->mem_base); 3569 lop->args[2] = arg_ts->mem_offset; 3570 3571 /* Loaded, but synced with memory. */ 3572 arg_ts->state = TS_MEM; 3573 } 3574 } 3575 3576 /* Perform input replacement, and mark inputs that became dead. 3577 No action is required except keeping temp_state up to date 3578 so that we reload when needed. */ 3579 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3580 arg_ts = arg_temp(op->args[i]); 3581 dir_ts = arg_ts->state_ptr; 3582 if (dir_ts) { 3583 op->args[i] = temp_arg(dir_ts); 3584 changes = true; 3585 if (IS_DEAD_ARG(i)) { 3586 arg_ts->state = TS_DEAD; 3587 } 3588 } 3589 } 3590 3591 /* Liveness analysis should ensure that the following are 3592 all correct, for call sites and basic block end points. */ 3593 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3594 /* Nothing to do */ 3595 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3596 for (i = 0; i < nb_globals; ++i) { 3597 /* Liveness should see that globals are synced back, 3598 that is, either TS_DEAD or TS_MEM. */ 3599 arg_ts = &s->temps[i]; 3600 tcg_debug_assert(arg_ts->state_ptr == 0 3601 || arg_ts->state != 0); 3602 } 3603 } else { 3604 for (i = 0; i < nb_globals; ++i) { 3605 /* Liveness should see that globals are saved back, 3606 that is, TS_DEAD, waiting to be reloaded. */ 3607 arg_ts = &s->temps[i]; 3608 tcg_debug_assert(arg_ts->state_ptr == 0 3609 || arg_ts->state == TS_DEAD); 3610 } 3611 } 3612 3613 /* Outputs become available. */ 3614 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3615 arg_ts = arg_temp(op->args[0]); 3616 dir_ts = arg_ts->state_ptr; 3617 if (dir_ts) { 3618 op->args[0] = temp_arg(dir_ts); 3619 changes = true; 3620 3621 /* The output is now live and modified. */ 3622 arg_ts->state = 0; 3623 3624 if (NEED_SYNC_ARG(0)) { 3625 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3626 ? INDEX_op_st_i32 3627 : INDEX_op_st_i64); 3628 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3629 TCGTemp *out_ts = dir_ts; 3630 3631 if (IS_DEAD_ARG(0)) { 3632 out_ts = arg_temp(op->args[1]); 3633 arg_ts->state = TS_DEAD; 3634 tcg_op_remove(s, op); 3635 } else { 3636 arg_ts->state = TS_MEM; 3637 } 3638 3639 sop->args[0] = temp_arg(out_ts); 3640 sop->args[1] = temp_arg(arg_ts->mem_base); 3641 sop->args[2] = arg_ts->mem_offset; 3642 } else { 3643 tcg_debug_assert(!IS_DEAD_ARG(0)); 3644 } 3645 } 3646 } else { 3647 for (i = 0; i < nb_oargs; i++) { 3648 arg_ts = arg_temp(op->args[i]); 3649 dir_ts = arg_ts->state_ptr; 3650 if (!dir_ts) { 3651 continue; 3652 } 3653 op->args[i] = temp_arg(dir_ts); 3654 changes = true; 3655 3656 /* The output is now live and modified. */ 3657 arg_ts->state = 0; 3658 3659 /* Sync outputs upon their last write. */ 3660 if (NEED_SYNC_ARG(i)) { 3661 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3662 ? INDEX_op_st_i32 3663 : INDEX_op_st_i64); 3664 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3665 3666 sop->args[0] = temp_arg(dir_ts); 3667 sop->args[1] = temp_arg(arg_ts->mem_base); 3668 sop->args[2] = arg_ts->mem_offset; 3669 3670 arg_ts->state = TS_MEM; 3671 } 3672 /* Drop outputs that are dead. */ 3673 if (IS_DEAD_ARG(i)) { 3674 arg_ts->state = TS_DEAD; 3675 } 3676 } 3677 } 3678 } 3679 3680 return changes; 3681 } 3682 3683 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3684 { 3685 intptr_t off; 3686 int size, align; 3687 3688 /* When allocating an object, look at the full type. */ 3689 size = tcg_type_size(ts->base_type); 3690 switch (ts->base_type) { 3691 case TCG_TYPE_I32: 3692 align = 4; 3693 break; 3694 case TCG_TYPE_I64: 3695 case TCG_TYPE_V64: 3696 align = 8; 3697 break; 3698 case TCG_TYPE_I128: 3699 case TCG_TYPE_V128: 3700 case TCG_TYPE_V256: 3701 /* 3702 * Note that we do not require aligned storage for V256, 3703 * and that we provide alignment for I128 to match V128, 3704 * even if that's above what the host ABI requires. 3705 */ 3706 align = 16; 3707 break; 3708 default: 3709 g_assert_not_reached(); 3710 } 3711 3712 /* 3713 * Assume the stack is sufficiently aligned. 3714 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3715 * and do not require 16 byte vector alignment. This seems slightly 3716 * easier than fully parameterizing the above switch statement. 3717 */ 3718 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3719 off = ROUND_UP(s->current_frame_offset, align); 3720 3721 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3722 if (off + size > s->frame_end) { 3723 tcg_raise_tb_overflow(s); 3724 } 3725 s->current_frame_offset = off + size; 3726 #if defined(__sparc__) 3727 off += TCG_TARGET_STACK_BIAS; 3728 #endif 3729 3730 /* If the object was subdivided, assign memory to all the parts. */ 3731 if (ts->base_type != ts->type) { 3732 int part_size = tcg_type_size(ts->type); 3733 int part_count = size / part_size; 3734 3735 /* 3736 * Each part is allocated sequentially in tcg_temp_new_internal. 3737 * Jump back to the first part by subtracting the current index. 3738 */ 3739 ts -= ts->temp_subindex; 3740 for (int i = 0; i < part_count; ++i) { 3741 ts[i].mem_offset = off + i * part_size; 3742 ts[i].mem_base = s->frame_temp; 3743 ts[i].mem_allocated = 1; 3744 } 3745 } else { 3746 ts->mem_offset = off; 3747 ts->mem_base = s->frame_temp; 3748 ts->mem_allocated = 1; 3749 } 3750 } 3751 3752 /* Assign @reg to @ts, and update reg_to_temp[]. */ 3753 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 3754 { 3755 if (ts->val_type == TEMP_VAL_REG) { 3756 TCGReg old = ts->reg; 3757 tcg_debug_assert(s->reg_to_temp[old] == ts); 3758 if (old == reg) { 3759 return; 3760 } 3761 s->reg_to_temp[old] = NULL; 3762 } 3763 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3764 s->reg_to_temp[reg] = ts; 3765 ts->val_type = TEMP_VAL_REG; 3766 ts->reg = reg; 3767 } 3768 3769 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 3770 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 3771 { 3772 tcg_debug_assert(type != TEMP_VAL_REG); 3773 if (ts->val_type == TEMP_VAL_REG) { 3774 TCGReg reg = ts->reg; 3775 tcg_debug_assert(s->reg_to_temp[reg] == ts); 3776 s->reg_to_temp[reg] = NULL; 3777 } 3778 ts->val_type = type; 3779 } 3780 3781 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3782 3783 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3784 mark it free; otherwise mark it dead. */ 3785 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3786 { 3787 TCGTempVal new_type; 3788 3789 switch (ts->kind) { 3790 case TEMP_FIXED: 3791 return; 3792 case TEMP_GLOBAL: 3793 case TEMP_TB: 3794 new_type = TEMP_VAL_MEM; 3795 break; 3796 case TEMP_EBB: 3797 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3798 break; 3799 case TEMP_CONST: 3800 new_type = TEMP_VAL_CONST; 3801 break; 3802 default: 3803 g_assert_not_reached(); 3804 } 3805 set_temp_val_nonreg(s, ts, new_type); 3806 } 3807 3808 /* Mark a temporary as dead. */ 3809 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3810 { 3811 temp_free_or_dead(s, ts, 1); 3812 } 3813 3814 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3815 registers needs to be allocated to store a constant. If 'free_or_dead' 3816 is non-zero, subsequently release the temporary; if it is positive, the 3817 temp is dead; if it is negative, the temp is free. */ 3818 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3819 TCGRegSet preferred_regs, int free_or_dead) 3820 { 3821 if (!temp_readonly(ts) && !ts->mem_coherent) { 3822 if (!ts->mem_allocated) { 3823 temp_allocate_frame(s, ts); 3824 } 3825 switch (ts->val_type) { 3826 case TEMP_VAL_CONST: 3827 /* If we're going to free the temp immediately, then we won't 3828 require it later in a register, so attempt to store the 3829 constant to memory directly. */ 3830 if (free_or_dead 3831 && tcg_out_sti(s, ts->type, ts->val, 3832 ts->mem_base->reg, ts->mem_offset)) { 3833 break; 3834 } 3835 temp_load(s, ts, tcg_target_available_regs[ts->type], 3836 allocated_regs, preferred_regs); 3837 /* fallthrough */ 3838 3839 case TEMP_VAL_REG: 3840 tcg_out_st(s, ts->type, ts->reg, 3841 ts->mem_base->reg, ts->mem_offset); 3842 break; 3843 3844 case TEMP_VAL_MEM: 3845 break; 3846 3847 case TEMP_VAL_DEAD: 3848 default: 3849 g_assert_not_reached(); 3850 } 3851 ts->mem_coherent = 1; 3852 } 3853 if (free_or_dead) { 3854 temp_free_or_dead(s, ts, free_or_dead); 3855 } 3856 } 3857 3858 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3859 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3860 { 3861 TCGTemp *ts = s->reg_to_temp[reg]; 3862 if (ts != NULL) { 3863 temp_sync(s, ts, allocated_regs, 0, -1); 3864 } 3865 } 3866 3867 /** 3868 * tcg_reg_alloc: 3869 * @required_regs: Set of registers in which we must allocate. 3870 * @allocated_regs: Set of registers which must be avoided. 3871 * @preferred_regs: Set of registers we should prefer. 3872 * @rev: True if we search the registers in "indirect" order. 3873 * 3874 * The allocated register must be in @required_regs & ~@allocated_regs, 3875 * but if we can put it in @preferred_regs we may save a move later. 3876 */ 3877 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3878 TCGRegSet allocated_regs, 3879 TCGRegSet preferred_regs, bool rev) 3880 { 3881 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3882 TCGRegSet reg_ct[2]; 3883 const int *order; 3884 3885 reg_ct[1] = required_regs & ~allocated_regs; 3886 tcg_debug_assert(reg_ct[1] != 0); 3887 reg_ct[0] = reg_ct[1] & preferred_regs; 3888 3889 /* Skip the preferred_regs option if it cannot be satisfied, 3890 or if the preference made no difference. */ 3891 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3892 3893 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3894 3895 /* Try free registers, preferences first. */ 3896 for (j = f; j < 2; j++) { 3897 TCGRegSet set = reg_ct[j]; 3898 3899 if (tcg_regset_single(set)) { 3900 /* One register in the set. */ 3901 TCGReg reg = tcg_regset_first(set); 3902 if (s->reg_to_temp[reg] == NULL) { 3903 return reg; 3904 } 3905 } else { 3906 for (i = 0; i < n; i++) { 3907 TCGReg reg = order[i]; 3908 if (s->reg_to_temp[reg] == NULL && 3909 tcg_regset_test_reg(set, reg)) { 3910 return reg; 3911 } 3912 } 3913 } 3914 } 3915 3916 /* We must spill something. */ 3917 for (j = f; j < 2; j++) { 3918 TCGRegSet set = reg_ct[j]; 3919 3920 if (tcg_regset_single(set)) { 3921 /* One register in the set. */ 3922 TCGReg reg = tcg_regset_first(set); 3923 tcg_reg_free(s, reg, allocated_regs); 3924 return reg; 3925 } else { 3926 for (i = 0; i < n; i++) { 3927 TCGReg reg = order[i]; 3928 if (tcg_regset_test_reg(set, reg)) { 3929 tcg_reg_free(s, reg, allocated_regs); 3930 return reg; 3931 } 3932 } 3933 } 3934 } 3935 3936 g_assert_not_reached(); 3937 } 3938 3939 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 3940 TCGRegSet allocated_regs, 3941 TCGRegSet preferred_regs, bool rev) 3942 { 3943 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3944 TCGRegSet reg_ct[2]; 3945 const int *order; 3946 3947 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 3948 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 3949 tcg_debug_assert(reg_ct[1] != 0); 3950 reg_ct[0] = reg_ct[1] & preferred_regs; 3951 3952 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3953 3954 /* 3955 * Skip the preferred_regs option if it cannot be satisfied, 3956 * or if the preference made no difference. 3957 */ 3958 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3959 3960 /* 3961 * Minimize the number of flushes by looking for 2 free registers first, 3962 * then a single flush, then two flushes. 3963 */ 3964 for (fmin = 2; fmin >= 0; fmin--) { 3965 for (j = k; j < 2; j++) { 3966 TCGRegSet set = reg_ct[j]; 3967 3968 for (i = 0; i < n; i++) { 3969 TCGReg reg = order[i]; 3970 3971 if (tcg_regset_test_reg(set, reg)) { 3972 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 3973 if (f >= fmin) { 3974 tcg_reg_free(s, reg, allocated_regs); 3975 tcg_reg_free(s, reg + 1, allocated_regs); 3976 return reg; 3977 } 3978 } 3979 } 3980 } 3981 } 3982 g_assert_not_reached(); 3983 } 3984 3985 /* Make sure the temporary is in a register. If needed, allocate the register 3986 from DESIRED while avoiding ALLOCATED. */ 3987 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3988 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3989 { 3990 TCGReg reg; 3991 3992 switch (ts->val_type) { 3993 case TEMP_VAL_REG: 3994 return; 3995 case TEMP_VAL_CONST: 3996 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3997 preferred_regs, ts->indirect_base); 3998 if (ts->type <= TCG_TYPE_I64) { 3999 tcg_out_movi(s, ts->type, reg, ts->val); 4000 } else { 4001 uint64_t val = ts->val; 4002 MemOp vece = MO_64; 4003 4004 /* 4005 * Find the minimal vector element that matches the constant. 4006 * The targets will, in general, have to do this search anyway, 4007 * do this generically. 4008 */ 4009 if (val == dup_const(MO_8, val)) { 4010 vece = MO_8; 4011 } else if (val == dup_const(MO_16, val)) { 4012 vece = MO_16; 4013 } else if (val == dup_const(MO_32, val)) { 4014 vece = MO_32; 4015 } 4016 4017 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 4018 } 4019 ts->mem_coherent = 0; 4020 break; 4021 case TEMP_VAL_MEM: 4022 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 4023 preferred_regs, ts->indirect_base); 4024 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 4025 ts->mem_coherent = 1; 4026 break; 4027 case TEMP_VAL_DEAD: 4028 default: 4029 g_assert_not_reached(); 4030 } 4031 set_temp_val_reg(s, ts, reg); 4032 } 4033 4034 /* Save a temporary to memory. 'allocated_regs' is used in case a 4035 temporary registers needs to be allocated to store a constant. */ 4036 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4037 { 4038 /* The liveness analysis already ensures that globals are back 4039 in memory. Keep an tcg_debug_assert for safety. */ 4040 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4041 } 4042 4043 /* save globals to their canonical location and assume they can be 4044 modified be the following code. 'allocated_regs' is used in case a 4045 temporary registers needs to be allocated to store a constant. */ 4046 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4047 { 4048 int i, n; 4049 4050 for (i = 0, n = s->nb_globals; i < n; i++) { 4051 temp_save(s, &s->temps[i], allocated_regs); 4052 } 4053 } 4054 4055 /* sync globals to their canonical location and assume they can be 4056 read by the following code. 'allocated_regs' is used in case a 4057 temporary registers needs to be allocated to store a constant. */ 4058 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4059 { 4060 int i, n; 4061 4062 for (i = 0, n = s->nb_globals; i < n; i++) { 4063 TCGTemp *ts = &s->temps[i]; 4064 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4065 || ts->kind == TEMP_FIXED 4066 || ts->mem_coherent); 4067 } 4068 } 4069 4070 /* at the end of a basic block, we assume all temporaries are dead and 4071 all globals are stored at their canonical location. */ 4072 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4073 { 4074 int i; 4075 4076 for (i = s->nb_globals; i < s->nb_temps; i++) { 4077 TCGTemp *ts = &s->temps[i]; 4078 4079 switch (ts->kind) { 4080 case TEMP_TB: 4081 temp_save(s, ts, allocated_regs); 4082 break; 4083 case TEMP_EBB: 4084 /* The liveness analysis already ensures that temps are dead. 4085 Keep an tcg_debug_assert for safety. */ 4086 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4087 break; 4088 case TEMP_CONST: 4089 /* Similarly, we should have freed any allocated register. */ 4090 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4091 break; 4092 default: 4093 g_assert_not_reached(); 4094 } 4095 } 4096 4097 save_globals(s, allocated_regs); 4098 } 4099 4100 /* 4101 * At a conditional branch, we assume all temporaries are dead unless 4102 * explicitly live-across-conditional-branch; all globals and local 4103 * temps are synced to their location. 4104 */ 4105 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4106 { 4107 sync_globals(s, allocated_regs); 4108 4109 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4110 TCGTemp *ts = &s->temps[i]; 4111 /* 4112 * The liveness analysis already ensures that temps are dead. 4113 * Keep tcg_debug_asserts for safety. 4114 */ 4115 switch (ts->kind) { 4116 case TEMP_TB: 4117 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4118 break; 4119 case TEMP_EBB: 4120 case TEMP_CONST: 4121 break; 4122 default: 4123 g_assert_not_reached(); 4124 } 4125 } 4126 } 4127 4128 /* 4129 * Specialized code generation for INDEX_op_mov_* with a constant. 4130 */ 4131 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4132 tcg_target_ulong val, TCGLifeData arg_life, 4133 TCGRegSet preferred_regs) 4134 { 4135 /* ENV should not be modified. */ 4136 tcg_debug_assert(!temp_readonly(ots)); 4137 4138 /* The movi is not explicitly generated here. */ 4139 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4140 ots->val = val; 4141 ots->mem_coherent = 0; 4142 if (NEED_SYNC_ARG(0)) { 4143 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4144 } else if (IS_DEAD_ARG(0)) { 4145 temp_dead(s, ots); 4146 } 4147 } 4148 4149 /* 4150 * Specialized code generation for INDEX_op_mov_*. 4151 */ 4152 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4153 { 4154 const TCGLifeData arg_life = op->life; 4155 TCGRegSet allocated_regs, preferred_regs; 4156 TCGTemp *ts, *ots; 4157 TCGType otype, itype; 4158 TCGReg oreg, ireg; 4159 4160 allocated_regs = s->reserved_regs; 4161 preferred_regs = output_pref(op, 0); 4162 ots = arg_temp(op->args[0]); 4163 ts = arg_temp(op->args[1]); 4164 4165 /* ENV should not be modified. */ 4166 tcg_debug_assert(!temp_readonly(ots)); 4167 4168 /* Note that otype != itype for no-op truncation. */ 4169 otype = ots->type; 4170 itype = ts->type; 4171 4172 if (ts->val_type == TEMP_VAL_CONST) { 4173 /* propagate constant or generate sti */ 4174 tcg_target_ulong val = ts->val; 4175 if (IS_DEAD_ARG(1)) { 4176 temp_dead(s, ts); 4177 } 4178 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4179 return; 4180 } 4181 4182 /* If the source value is in memory we're going to be forced 4183 to have it in a register in order to perform the copy. Copy 4184 the SOURCE value into its own register first, that way we 4185 don't have to reload SOURCE the next time it is used. */ 4186 if (ts->val_type == TEMP_VAL_MEM) { 4187 temp_load(s, ts, tcg_target_available_regs[itype], 4188 allocated_regs, preferred_regs); 4189 } 4190 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4191 ireg = ts->reg; 4192 4193 if (IS_DEAD_ARG(0)) { 4194 /* mov to a non-saved dead register makes no sense (even with 4195 liveness analysis disabled). */ 4196 tcg_debug_assert(NEED_SYNC_ARG(0)); 4197 if (!ots->mem_allocated) { 4198 temp_allocate_frame(s, ots); 4199 } 4200 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4201 if (IS_DEAD_ARG(1)) { 4202 temp_dead(s, ts); 4203 } 4204 temp_dead(s, ots); 4205 return; 4206 } 4207 4208 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4209 /* 4210 * The mov can be suppressed. Kill input first, so that it 4211 * is unlinked from reg_to_temp, then set the output to the 4212 * reg that we saved from the input. 4213 */ 4214 temp_dead(s, ts); 4215 oreg = ireg; 4216 } else { 4217 if (ots->val_type == TEMP_VAL_REG) { 4218 oreg = ots->reg; 4219 } else { 4220 /* Make sure to not spill the input register during allocation. */ 4221 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4222 allocated_regs | ((TCGRegSet)1 << ireg), 4223 preferred_regs, ots->indirect_base); 4224 } 4225 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4226 /* 4227 * Cross register class move not supported. 4228 * Store the source register into the destination slot 4229 * and leave the destination temp as TEMP_VAL_MEM. 4230 */ 4231 assert(!temp_readonly(ots)); 4232 if (!ts->mem_allocated) { 4233 temp_allocate_frame(s, ots); 4234 } 4235 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4236 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4237 ots->mem_coherent = 1; 4238 return; 4239 } 4240 } 4241 set_temp_val_reg(s, ots, oreg); 4242 ots->mem_coherent = 0; 4243 4244 if (NEED_SYNC_ARG(0)) { 4245 temp_sync(s, ots, allocated_regs, 0, 0); 4246 } 4247 } 4248 4249 /* 4250 * Specialized code generation for INDEX_op_dup_vec. 4251 */ 4252 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4253 { 4254 const TCGLifeData arg_life = op->life; 4255 TCGRegSet dup_out_regs, dup_in_regs; 4256 TCGTemp *its, *ots; 4257 TCGType itype, vtype; 4258 unsigned vece; 4259 int lowpart_ofs; 4260 bool ok; 4261 4262 ots = arg_temp(op->args[0]); 4263 its = arg_temp(op->args[1]); 4264 4265 /* ENV should not be modified. */ 4266 tcg_debug_assert(!temp_readonly(ots)); 4267 4268 itype = its->type; 4269 vece = TCGOP_VECE(op); 4270 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4271 4272 if (its->val_type == TEMP_VAL_CONST) { 4273 /* Propagate constant via movi -> dupi. */ 4274 tcg_target_ulong val = its->val; 4275 if (IS_DEAD_ARG(1)) { 4276 temp_dead(s, its); 4277 } 4278 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4279 return; 4280 } 4281 4282 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4283 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 4284 4285 /* Allocate the output register now. */ 4286 if (ots->val_type != TEMP_VAL_REG) { 4287 TCGRegSet allocated_regs = s->reserved_regs; 4288 TCGReg oreg; 4289 4290 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4291 /* Make sure to not spill the input register. */ 4292 tcg_regset_set_reg(allocated_regs, its->reg); 4293 } 4294 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4295 output_pref(op, 0), ots->indirect_base); 4296 set_temp_val_reg(s, ots, oreg); 4297 } 4298 4299 switch (its->val_type) { 4300 case TEMP_VAL_REG: 4301 /* 4302 * The dup constriaints must be broad, covering all possible VECE. 4303 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4304 * to fail, indicating that extra moves are required for that case. 4305 */ 4306 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4307 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4308 goto done; 4309 } 4310 /* Try again from memory or a vector input register. */ 4311 } 4312 if (!its->mem_coherent) { 4313 /* 4314 * The input register is not synced, and so an extra store 4315 * would be required to use memory. Attempt an integer-vector 4316 * register move first. We do not have a TCGRegSet for this. 4317 */ 4318 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4319 break; 4320 } 4321 /* Sync the temp back to its slot and load from there. */ 4322 temp_sync(s, its, s->reserved_regs, 0, 0); 4323 } 4324 /* fall through */ 4325 4326 case TEMP_VAL_MEM: 4327 lowpart_ofs = 0; 4328 if (HOST_BIG_ENDIAN) { 4329 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 4330 } 4331 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 4332 its->mem_offset + lowpart_ofs)) { 4333 goto done; 4334 } 4335 /* Load the input into the destination vector register. */ 4336 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 4337 break; 4338 4339 default: 4340 g_assert_not_reached(); 4341 } 4342 4343 /* We now have a vector input register, so dup must succeed. */ 4344 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 4345 tcg_debug_assert(ok); 4346 4347 done: 4348 ots->mem_coherent = 0; 4349 if (IS_DEAD_ARG(1)) { 4350 temp_dead(s, its); 4351 } 4352 if (NEED_SYNC_ARG(0)) { 4353 temp_sync(s, ots, s->reserved_regs, 0, 0); 4354 } 4355 if (IS_DEAD_ARG(0)) { 4356 temp_dead(s, ots); 4357 } 4358 } 4359 4360 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 4361 { 4362 const TCGLifeData arg_life = op->life; 4363 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 4364 TCGRegSet i_allocated_regs; 4365 TCGRegSet o_allocated_regs; 4366 int i, k, nb_iargs, nb_oargs; 4367 TCGReg reg; 4368 TCGArg arg; 4369 const TCGArgConstraint *arg_ct; 4370 TCGTemp *ts; 4371 TCGArg new_args[TCG_MAX_OP_ARGS]; 4372 int const_args[TCG_MAX_OP_ARGS]; 4373 4374 nb_oargs = def->nb_oargs; 4375 nb_iargs = def->nb_iargs; 4376 4377 /* copy constants */ 4378 memcpy(new_args + nb_oargs + nb_iargs, 4379 op->args + nb_oargs + nb_iargs, 4380 sizeof(TCGArg) * def->nb_cargs); 4381 4382 i_allocated_regs = s->reserved_regs; 4383 o_allocated_regs = s->reserved_regs; 4384 4385 /* satisfy input constraints */ 4386 for (k = 0; k < nb_iargs; k++) { 4387 TCGRegSet i_preferred_regs, i_required_regs; 4388 bool allocate_new_reg, copyto_new_reg; 4389 TCGTemp *ts2; 4390 int i1, i2; 4391 4392 i = def->args_ct[nb_oargs + k].sort_index; 4393 arg = op->args[i]; 4394 arg_ct = &def->args_ct[i]; 4395 ts = arg_temp(arg); 4396 4397 if (ts->val_type == TEMP_VAL_CONST 4398 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 4399 /* constant is OK for instruction */ 4400 const_args[i] = 1; 4401 new_args[i] = ts->val; 4402 continue; 4403 } 4404 4405 reg = ts->reg; 4406 i_preferred_regs = 0; 4407 i_required_regs = arg_ct->regs; 4408 allocate_new_reg = false; 4409 copyto_new_reg = false; 4410 4411 switch (arg_ct->pair) { 4412 case 0: /* not paired */ 4413 if (arg_ct->ialias) { 4414 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4415 4416 /* 4417 * If the input is readonly, then it cannot also be an 4418 * output and aliased to itself. If the input is not 4419 * dead after the instruction, we must allocate a new 4420 * register and move it. 4421 */ 4422 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 4423 allocate_new_reg = true; 4424 } else if (ts->val_type == TEMP_VAL_REG) { 4425 /* 4426 * Check if the current register has already been 4427 * allocated for another input. 4428 */ 4429 allocate_new_reg = 4430 tcg_regset_test_reg(i_allocated_regs, reg); 4431 } 4432 } 4433 if (!allocate_new_reg) { 4434 temp_load(s, ts, i_required_regs, i_allocated_regs, 4435 i_preferred_regs); 4436 reg = ts->reg; 4437 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 4438 } 4439 if (allocate_new_reg) { 4440 /* 4441 * Allocate a new register matching the constraint 4442 * and move the temporary register into it. 4443 */ 4444 temp_load(s, ts, tcg_target_available_regs[ts->type], 4445 i_allocated_regs, 0); 4446 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 4447 i_preferred_regs, ts->indirect_base); 4448 copyto_new_reg = true; 4449 } 4450 break; 4451 4452 case 1: 4453 /* First of an input pair; if i1 == i2, the second is an output. */ 4454 i1 = i; 4455 i2 = arg_ct->pair_index; 4456 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 4457 4458 /* 4459 * It is easier to default to allocating a new pair 4460 * and to identify a few cases where it's not required. 4461 */ 4462 if (arg_ct->ialias) { 4463 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4464 if (IS_DEAD_ARG(i1) && 4465 IS_DEAD_ARG(i2) && 4466 !temp_readonly(ts) && 4467 ts->val_type == TEMP_VAL_REG && 4468 ts->reg < TCG_TARGET_NB_REGS - 1 && 4469 tcg_regset_test_reg(i_required_regs, reg) && 4470 !tcg_regset_test_reg(i_allocated_regs, reg) && 4471 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 4472 (ts2 4473 ? ts2->val_type == TEMP_VAL_REG && 4474 ts2->reg == reg + 1 && 4475 !temp_readonly(ts2) 4476 : s->reg_to_temp[reg + 1] == NULL)) { 4477 break; 4478 } 4479 } else { 4480 /* Without aliasing, the pair must also be an input. */ 4481 tcg_debug_assert(ts2); 4482 if (ts->val_type == TEMP_VAL_REG && 4483 ts2->val_type == TEMP_VAL_REG && 4484 ts2->reg == reg + 1 && 4485 tcg_regset_test_reg(i_required_regs, reg)) { 4486 break; 4487 } 4488 } 4489 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 4490 0, ts->indirect_base); 4491 goto do_pair; 4492 4493 case 2: /* pair second */ 4494 reg = new_args[arg_ct->pair_index] + 1; 4495 goto do_pair; 4496 4497 case 3: /* ialias with second output, no first input */ 4498 tcg_debug_assert(arg_ct->ialias); 4499 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4500 4501 if (IS_DEAD_ARG(i) && 4502 !temp_readonly(ts) && 4503 ts->val_type == TEMP_VAL_REG && 4504 reg > 0 && 4505 s->reg_to_temp[reg - 1] == NULL && 4506 tcg_regset_test_reg(i_required_regs, reg) && 4507 !tcg_regset_test_reg(i_allocated_regs, reg) && 4508 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 4509 tcg_regset_set_reg(i_allocated_regs, reg - 1); 4510 break; 4511 } 4512 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 4513 i_allocated_regs, 0, 4514 ts->indirect_base); 4515 tcg_regset_set_reg(i_allocated_regs, reg); 4516 reg += 1; 4517 goto do_pair; 4518 4519 do_pair: 4520 /* 4521 * If an aliased input is not dead after the instruction, 4522 * we must allocate a new register and move it. 4523 */ 4524 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 4525 TCGRegSet t_allocated_regs = i_allocated_regs; 4526 4527 /* 4528 * Because of the alias, and the continued life, make sure 4529 * that the temp is somewhere *other* than the reg pair, 4530 * and we get a copy in reg. 4531 */ 4532 tcg_regset_set_reg(t_allocated_regs, reg); 4533 tcg_regset_set_reg(t_allocated_regs, reg + 1); 4534 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 4535 /* If ts was already in reg, copy it somewhere else. */ 4536 TCGReg nr; 4537 bool ok; 4538 4539 tcg_debug_assert(ts->kind != TEMP_FIXED); 4540 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 4541 t_allocated_regs, 0, ts->indirect_base); 4542 ok = tcg_out_mov(s, ts->type, nr, reg); 4543 tcg_debug_assert(ok); 4544 4545 set_temp_val_reg(s, ts, nr); 4546 } else { 4547 temp_load(s, ts, tcg_target_available_regs[ts->type], 4548 t_allocated_regs, 0); 4549 copyto_new_reg = true; 4550 } 4551 } else { 4552 /* Preferably allocate to reg, otherwise copy. */ 4553 i_required_regs = (TCGRegSet)1 << reg; 4554 temp_load(s, ts, i_required_regs, i_allocated_regs, 4555 i_preferred_regs); 4556 copyto_new_reg = ts->reg != reg; 4557 } 4558 break; 4559 4560 default: 4561 g_assert_not_reached(); 4562 } 4563 4564 if (copyto_new_reg) { 4565 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4566 /* 4567 * Cross register class move not supported. Sync the 4568 * temp back to its slot and load from there. 4569 */ 4570 temp_sync(s, ts, i_allocated_regs, 0, 0); 4571 tcg_out_ld(s, ts->type, reg, 4572 ts->mem_base->reg, ts->mem_offset); 4573 } 4574 } 4575 new_args[i] = reg; 4576 const_args[i] = 0; 4577 tcg_regset_set_reg(i_allocated_regs, reg); 4578 } 4579 4580 /* mark dead temporaries and free the associated registers */ 4581 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4582 if (IS_DEAD_ARG(i)) { 4583 temp_dead(s, arg_temp(op->args[i])); 4584 } 4585 } 4586 4587 if (def->flags & TCG_OPF_COND_BRANCH) { 4588 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4589 } else if (def->flags & TCG_OPF_BB_END) { 4590 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4591 } else { 4592 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4593 /* XXX: permit generic clobber register list ? */ 4594 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4595 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4596 tcg_reg_free(s, i, i_allocated_regs); 4597 } 4598 } 4599 } 4600 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4601 /* sync globals if the op has side effects and might trigger 4602 an exception. */ 4603 sync_globals(s, i_allocated_regs); 4604 } 4605 4606 /* satisfy the output constraints */ 4607 for(k = 0; k < nb_oargs; k++) { 4608 i = def->args_ct[k].sort_index; 4609 arg = op->args[i]; 4610 arg_ct = &def->args_ct[i]; 4611 ts = arg_temp(arg); 4612 4613 /* ENV should not be modified. */ 4614 tcg_debug_assert(!temp_readonly(ts)); 4615 4616 switch (arg_ct->pair) { 4617 case 0: /* not paired */ 4618 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4619 reg = new_args[arg_ct->alias_index]; 4620 } else if (arg_ct->newreg) { 4621 reg = tcg_reg_alloc(s, arg_ct->regs, 4622 i_allocated_regs | o_allocated_regs, 4623 output_pref(op, k), ts->indirect_base); 4624 } else { 4625 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4626 output_pref(op, k), ts->indirect_base); 4627 } 4628 break; 4629 4630 case 1: /* first of pair */ 4631 tcg_debug_assert(!arg_ct->newreg); 4632 if (arg_ct->oalias) { 4633 reg = new_args[arg_ct->alias_index]; 4634 break; 4635 } 4636 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 4637 output_pref(op, k), ts->indirect_base); 4638 break; 4639 4640 case 2: /* second of pair */ 4641 tcg_debug_assert(!arg_ct->newreg); 4642 if (arg_ct->oalias) { 4643 reg = new_args[arg_ct->alias_index]; 4644 } else { 4645 reg = new_args[arg_ct->pair_index] + 1; 4646 } 4647 break; 4648 4649 case 3: /* first of pair, aliasing with a second input */ 4650 tcg_debug_assert(!arg_ct->newreg); 4651 reg = new_args[arg_ct->pair_index] - 1; 4652 break; 4653 4654 default: 4655 g_assert_not_reached(); 4656 } 4657 tcg_regset_set_reg(o_allocated_regs, reg); 4658 set_temp_val_reg(s, ts, reg); 4659 ts->mem_coherent = 0; 4660 new_args[i] = reg; 4661 } 4662 } 4663 4664 /* emit instruction */ 4665 switch (op->opc) { 4666 case INDEX_op_ext8s_i32: 4667 tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]); 4668 break; 4669 case INDEX_op_ext8s_i64: 4670 tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]); 4671 break; 4672 case INDEX_op_ext8u_i32: 4673 case INDEX_op_ext8u_i64: 4674 tcg_out_ext8u(s, new_args[0], new_args[1]); 4675 break; 4676 case INDEX_op_ext16s_i32: 4677 tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]); 4678 break; 4679 case INDEX_op_ext16s_i64: 4680 tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]); 4681 break; 4682 case INDEX_op_ext16u_i32: 4683 case INDEX_op_ext16u_i64: 4684 tcg_out_ext16u(s, new_args[0], new_args[1]); 4685 break; 4686 case INDEX_op_ext32s_i64: 4687 tcg_out_ext32s(s, new_args[0], new_args[1]); 4688 break; 4689 case INDEX_op_ext32u_i64: 4690 tcg_out_ext32u(s, new_args[0], new_args[1]); 4691 break; 4692 case INDEX_op_ext_i32_i64: 4693 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 4694 break; 4695 case INDEX_op_extu_i32_i64: 4696 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 4697 break; 4698 case INDEX_op_extrl_i64_i32: 4699 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 4700 break; 4701 default: 4702 if (def->flags & TCG_OPF_VECTOR) { 4703 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4704 new_args, const_args); 4705 } else { 4706 tcg_out_op(s, op->opc, new_args, const_args); 4707 } 4708 break; 4709 } 4710 4711 /* move the outputs in the correct register if needed */ 4712 for(i = 0; i < nb_oargs; i++) { 4713 ts = arg_temp(op->args[i]); 4714 4715 /* ENV should not be modified. */ 4716 tcg_debug_assert(!temp_readonly(ts)); 4717 4718 if (NEED_SYNC_ARG(i)) { 4719 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4720 } else if (IS_DEAD_ARG(i)) { 4721 temp_dead(s, ts); 4722 } 4723 } 4724 } 4725 4726 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4727 { 4728 const TCGLifeData arg_life = op->life; 4729 TCGTemp *ots, *itsl, *itsh; 4730 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4731 4732 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4733 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4734 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4735 4736 ots = arg_temp(op->args[0]); 4737 itsl = arg_temp(op->args[1]); 4738 itsh = arg_temp(op->args[2]); 4739 4740 /* ENV should not be modified. */ 4741 tcg_debug_assert(!temp_readonly(ots)); 4742 4743 /* Allocate the output register now. */ 4744 if (ots->val_type != TEMP_VAL_REG) { 4745 TCGRegSet allocated_regs = s->reserved_regs; 4746 TCGRegSet dup_out_regs = 4747 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4748 TCGReg oreg; 4749 4750 /* Make sure to not spill the input registers. */ 4751 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4752 tcg_regset_set_reg(allocated_regs, itsl->reg); 4753 } 4754 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4755 tcg_regset_set_reg(allocated_regs, itsh->reg); 4756 } 4757 4758 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4759 output_pref(op, 0), ots->indirect_base); 4760 set_temp_val_reg(s, ots, oreg); 4761 } 4762 4763 /* Promote dup2 of immediates to dupi_vec. */ 4764 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 4765 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 4766 MemOp vece = MO_64; 4767 4768 if (val == dup_const(MO_8, val)) { 4769 vece = MO_8; 4770 } else if (val == dup_const(MO_16, val)) { 4771 vece = MO_16; 4772 } else if (val == dup_const(MO_32, val)) { 4773 vece = MO_32; 4774 } 4775 4776 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 4777 goto done; 4778 } 4779 4780 /* If the two inputs form one 64-bit value, try dupm_vec. */ 4781 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 4782 itsh->temp_subindex == !HOST_BIG_ENDIAN && 4783 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 4784 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 4785 4786 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 4787 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 4788 4789 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 4790 its->mem_base->reg, its->mem_offset)) { 4791 goto done; 4792 } 4793 } 4794 4795 /* Fall back to generic expansion. */ 4796 return false; 4797 4798 done: 4799 ots->mem_coherent = 0; 4800 if (IS_DEAD_ARG(1)) { 4801 temp_dead(s, itsl); 4802 } 4803 if (IS_DEAD_ARG(2)) { 4804 temp_dead(s, itsh); 4805 } 4806 if (NEED_SYNC_ARG(0)) { 4807 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 4808 } else if (IS_DEAD_ARG(0)) { 4809 temp_dead(s, ots); 4810 } 4811 return true; 4812 } 4813 4814 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 4815 TCGRegSet allocated_regs) 4816 { 4817 if (ts->val_type == TEMP_VAL_REG) { 4818 if (ts->reg != reg) { 4819 tcg_reg_free(s, reg, allocated_regs); 4820 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4821 /* 4822 * Cross register class move not supported. Sync the 4823 * temp back to its slot and load from there. 4824 */ 4825 temp_sync(s, ts, allocated_regs, 0, 0); 4826 tcg_out_ld(s, ts->type, reg, 4827 ts->mem_base->reg, ts->mem_offset); 4828 } 4829 } 4830 } else { 4831 TCGRegSet arg_set = 0; 4832 4833 tcg_reg_free(s, reg, allocated_regs); 4834 tcg_regset_set_reg(arg_set, reg); 4835 temp_load(s, ts, arg_set, allocated_regs, 0); 4836 } 4837 } 4838 4839 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts, 4840 TCGRegSet allocated_regs) 4841 { 4842 /* 4843 * When the destination is on the stack, load up the temp and store. 4844 * If there are many call-saved registers, the temp might live to 4845 * see another use; otherwise it'll be discarded. 4846 */ 4847 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 4848 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 4849 arg_slot_stk_ofs(arg_slot)); 4850 } 4851 4852 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 4853 TCGTemp *ts, TCGRegSet *allocated_regs) 4854 { 4855 if (arg_slot_reg_p(l->arg_slot)) { 4856 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 4857 load_arg_reg(s, reg, ts, *allocated_regs); 4858 tcg_regset_set_reg(*allocated_regs, reg); 4859 } else { 4860 load_arg_stk(s, l->arg_slot, ts, *allocated_regs); 4861 } 4862 } 4863 4864 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base, 4865 intptr_t ref_off, TCGRegSet *allocated_regs) 4866 { 4867 TCGReg reg; 4868 4869 if (arg_slot_reg_p(arg_slot)) { 4870 reg = tcg_target_call_iarg_regs[arg_slot]; 4871 tcg_reg_free(s, reg, *allocated_regs); 4872 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4873 tcg_regset_set_reg(*allocated_regs, reg); 4874 } else { 4875 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 4876 *allocated_regs, 0, false); 4877 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4878 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 4879 arg_slot_stk_ofs(arg_slot)); 4880 } 4881 } 4882 4883 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 4884 { 4885 const int nb_oargs = TCGOP_CALLO(op); 4886 const int nb_iargs = TCGOP_CALLI(op); 4887 const TCGLifeData arg_life = op->life; 4888 const TCGHelperInfo *info = tcg_call_info(op); 4889 TCGRegSet allocated_regs = s->reserved_regs; 4890 int i; 4891 4892 /* 4893 * Move inputs into place in reverse order, 4894 * so that we place stacked arguments first. 4895 */ 4896 for (i = nb_iargs - 1; i >= 0; --i) { 4897 const TCGCallArgumentLoc *loc = &info->in[i]; 4898 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 4899 4900 switch (loc->kind) { 4901 case TCG_CALL_ARG_NORMAL: 4902 case TCG_CALL_ARG_EXTEND_U: 4903 case TCG_CALL_ARG_EXTEND_S: 4904 load_arg_normal(s, loc, ts, &allocated_regs); 4905 break; 4906 case TCG_CALL_ARG_BY_REF: 4907 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4908 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 4909 arg_slot_stk_ofs(loc->ref_slot), 4910 &allocated_regs); 4911 break; 4912 case TCG_CALL_ARG_BY_REF_N: 4913 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4914 break; 4915 default: 4916 g_assert_not_reached(); 4917 } 4918 } 4919 4920 /* Mark dead temporaries and free the associated registers. */ 4921 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4922 if (IS_DEAD_ARG(i)) { 4923 temp_dead(s, arg_temp(op->args[i])); 4924 } 4925 } 4926 4927 /* Clobber call registers. */ 4928 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4929 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4930 tcg_reg_free(s, i, allocated_regs); 4931 } 4932 } 4933 4934 /* 4935 * Save globals if they might be written by the helper, 4936 * sync them if they might be read. 4937 */ 4938 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 4939 /* Nothing to do */ 4940 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 4941 sync_globals(s, allocated_regs); 4942 } else { 4943 save_globals(s, allocated_regs); 4944 } 4945 4946 /* 4947 * If the ABI passes a pointer to the returned struct as the first 4948 * argument, load that now. Pass a pointer to the output home slot. 4949 */ 4950 if (info->out_kind == TCG_CALL_RET_BY_REF) { 4951 TCGTemp *ts = arg_temp(op->args[0]); 4952 4953 if (!ts->mem_allocated) { 4954 temp_allocate_frame(s, ts); 4955 } 4956 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 4957 } 4958 4959 tcg_out_call(s, tcg_call_func(op), info); 4960 4961 /* Assign output registers and emit moves if needed. */ 4962 switch (info->out_kind) { 4963 case TCG_CALL_RET_NORMAL: 4964 for (i = 0; i < nb_oargs; i++) { 4965 TCGTemp *ts = arg_temp(op->args[i]); 4966 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 4967 4968 /* ENV should not be modified. */ 4969 tcg_debug_assert(!temp_readonly(ts)); 4970 4971 set_temp_val_reg(s, ts, reg); 4972 ts->mem_coherent = 0; 4973 } 4974 break; 4975 4976 case TCG_CALL_RET_BY_VEC: 4977 { 4978 TCGTemp *ts = arg_temp(op->args[0]); 4979 4980 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 4981 tcg_debug_assert(ts->temp_subindex == 0); 4982 if (!ts->mem_allocated) { 4983 temp_allocate_frame(s, ts); 4984 } 4985 tcg_out_st(s, TCG_TYPE_V128, 4986 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 4987 ts->mem_base->reg, ts->mem_offset); 4988 } 4989 /* fall through to mark all parts in memory */ 4990 4991 case TCG_CALL_RET_BY_REF: 4992 /* The callee has performed a write through the reference. */ 4993 for (i = 0; i < nb_oargs; i++) { 4994 TCGTemp *ts = arg_temp(op->args[i]); 4995 ts->val_type = TEMP_VAL_MEM; 4996 } 4997 break; 4998 4999 default: 5000 g_assert_not_reached(); 5001 } 5002 5003 /* Flush or discard output registers as needed. */ 5004 for (i = 0; i < nb_oargs; i++) { 5005 TCGTemp *ts = arg_temp(op->args[i]); 5006 if (NEED_SYNC_ARG(i)) { 5007 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 5008 } else if (IS_DEAD_ARG(i)) { 5009 temp_dead(s, ts); 5010 } 5011 } 5012 } 5013 5014 #ifdef CONFIG_PROFILER 5015 5016 /* avoid copy/paste errors */ 5017 #define PROF_ADD(to, from, field) \ 5018 do { \ 5019 (to)->field += qatomic_read(&((from)->field)); \ 5020 } while (0) 5021 5022 #define PROF_MAX(to, from, field) \ 5023 do { \ 5024 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 5025 if (val__ > (to)->field) { \ 5026 (to)->field = val__; \ 5027 } \ 5028 } while (0) 5029 5030 /* Pass in a zero'ed @prof */ 5031 static inline 5032 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 5033 { 5034 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 5035 unsigned int i; 5036 5037 for (i = 0; i < n_ctxs; i++) { 5038 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 5039 const TCGProfile *orig = &s->prof; 5040 5041 if (counters) { 5042 PROF_ADD(prof, orig, cpu_exec_time); 5043 PROF_ADD(prof, orig, tb_count1); 5044 PROF_ADD(prof, orig, tb_count); 5045 PROF_ADD(prof, orig, op_count); 5046 PROF_MAX(prof, orig, op_count_max); 5047 PROF_ADD(prof, orig, temp_count); 5048 PROF_MAX(prof, orig, temp_count_max); 5049 PROF_ADD(prof, orig, del_op_count); 5050 PROF_ADD(prof, orig, code_in_len); 5051 PROF_ADD(prof, orig, code_out_len); 5052 PROF_ADD(prof, orig, search_out_len); 5053 PROF_ADD(prof, orig, interm_time); 5054 PROF_ADD(prof, orig, code_time); 5055 PROF_ADD(prof, orig, la_time); 5056 PROF_ADD(prof, orig, opt_time); 5057 PROF_ADD(prof, orig, restore_count); 5058 PROF_ADD(prof, orig, restore_time); 5059 } 5060 if (table) { 5061 int i; 5062 5063 for (i = 0; i < NB_OPS; i++) { 5064 PROF_ADD(prof, orig, table_op_count[i]); 5065 } 5066 } 5067 } 5068 } 5069 5070 #undef PROF_ADD 5071 #undef PROF_MAX 5072 5073 static void tcg_profile_snapshot_counters(TCGProfile *prof) 5074 { 5075 tcg_profile_snapshot(prof, true, false); 5076 } 5077 5078 static void tcg_profile_snapshot_table(TCGProfile *prof) 5079 { 5080 tcg_profile_snapshot(prof, false, true); 5081 } 5082 5083 void tcg_dump_op_count(GString *buf) 5084 { 5085 TCGProfile prof = {}; 5086 int i; 5087 5088 tcg_profile_snapshot_table(&prof); 5089 for (i = 0; i < NB_OPS; i++) { 5090 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 5091 prof.table_op_count[i]); 5092 } 5093 } 5094 5095 int64_t tcg_cpu_exec_time(void) 5096 { 5097 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 5098 unsigned int i; 5099 int64_t ret = 0; 5100 5101 for (i = 0; i < n_ctxs; i++) { 5102 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 5103 const TCGProfile *prof = &s->prof; 5104 5105 ret += qatomic_read(&prof->cpu_exec_time); 5106 } 5107 return ret; 5108 } 5109 #else 5110 void tcg_dump_op_count(GString *buf) 5111 { 5112 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 5113 } 5114 5115 int64_t tcg_cpu_exec_time(void) 5116 { 5117 error_report("%s: TCG profiler not compiled", __func__); 5118 exit(EXIT_FAILURE); 5119 } 5120 #endif 5121 5122 5123 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) 5124 { 5125 #ifdef CONFIG_PROFILER 5126 TCGProfile *prof = &s->prof; 5127 #endif 5128 int i, num_insns; 5129 TCGOp *op; 5130 5131 #ifdef CONFIG_PROFILER 5132 { 5133 int n = 0; 5134 5135 QTAILQ_FOREACH(op, &s->ops, link) { 5136 n++; 5137 } 5138 qatomic_set(&prof->op_count, prof->op_count + n); 5139 if (n > prof->op_count_max) { 5140 qatomic_set(&prof->op_count_max, n); 5141 } 5142 5143 n = s->nb_temps; 5144 qatomic_set(&prof->temp_count, prof->temp_count + n); 5145 if (n > prof->temp_count_max) { 5146 qatomic_set(&prof->temp_count_max, n); 5147 } 5148 } 5149 #endif 5150 5151 #ifdef DEBUG_DISAS 5152 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 5153 && qemu_log_in_addr_range(pc_start))) { 5154 FILE *logfile = qemu_log_trylock(); 5155 if (logfile) { 5156 fprintf(logfile, "OP:\n"); 5157 tcg_dump_ops(s, logfile, false); 5158 fprintf(logfile, "\n"); 5159 qemu_log_unlock(logfile); 5160 } 5161 } 5162 #endif 5163 5164 #ifdef CONFIG_DEBUG_TCG 5165 /* Ensure all labels referenced have been emitted. */ 5166 { 5167 TCGLabel *l; 5168 bool error = false; 5169 5170 QSIMPLEQ_FOREACH(l, &s->labels, next) { 5171 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 5172 qemu_log_mask(CPU_LOG_TB_OP, 5173 "$L%d referenced but not present.\n", l->id); 5174 error = true; 5175 } 5176 } 5177 assert(!error); 5178 } 5179 #endif 5180 5181 #ifdef CONFIG_PROFILER 5182 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 5183 #endif 5184 5185 #ifdef USE_TCG_OPTIMIZATIONS 5186 tcg_optimize(s); 5187 #endif 5188 5189 #ifdef CONFIG_PROFILER 5190 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 5191 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 5192 #endif 5193 5194 reachable_code_pass(s); 5195 liveness_pass_0(s); 5196 liveness_pass_1(s); 5197 5198 if (s->nb_indirects > 0) { 5199 #ifdef DEBUG_DISAS 5200 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 5201 && qemu_log_in_addr_range(pc_start))) { 5202 FILE *logfile = qemu_log_trylock(); 5203 if (logfile) { 5204 fprintf(logfile, "OP before indirect lowering:\n"); 5205 tcg_dump_ops(s, logfile, false); 5206 fprintf(logfile, "\n"); 5207 qemu_log_unlock(logfile); 5208 } 5209 } 5210 #endif 5211 /* Replace indirect temps with direct temps. */ 5212 if (liveness_pass_2(s)) { 5213 /* If changes were made, re-run liveness. */ 5214 liveness_pass_1(s); 5215 } 5216 } 5217 5218 #ifdef CONFIG_PROFILER 5219 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 5220 #endif 5221 5222 #ifdef DEBUG_DISAS 5223 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 5224 && qemu_log_in_addr_range(pc_start))) { 5225 FILE *logfile = qemu_log_trylock(); 5226 if (logfile) { 5227 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 5228 tcg_dump_ops(s, logfile, true); 5229 fprintf(logfile, "\n"); 5230 qemu_log_unlock(logfile); 5231 } 5232 } 5233 #endif 5234 5235 /* Initialize goto_tb jump offsets. */ 5236 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 5237 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 5238 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 5239 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 5240 5241 tcg_reg_alloc_start(s); 5242 5243 /* 5244 * Reset the buffer pointers when restarting after overflow. 5245 * TODO: Move this into translate-all.c with the rest of the 5246 * buffer management. Having only this done here is confusing. 5247 */ 5248 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 5249 s->code_ptr = s->code_buf; 5250 5251 #ifdef TCG_TARGET_NEED_LDST_LABELS 5252 QSIMPLEQ_INIT(&s->ldst_labels); 5253 #endif 5254 #ifdef TCG_TARGET_NEED_POOL_LABELS 5255 s->pool_labels = NULL; 5256 #endif 5257 5258 num_insns = -1; 5259 QTAILQ_FOREACH(op, &s->ops, link) { 5260 TCGOpcode opc = op->opc; 5261 5262 #ifdef CONFIG_PROFILER 5263 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 5264 #endif 5265 5266 switch (opc) { 5267 case INDEX_op_mov_i32: 5268 case INDEX_op_mov_i64: 5269 case INDEX_op_mov_vec: 5270 tcg_reg_alloc_mov(s, op); 5271 break; 5272 case INDEX_op_dup_vec: 5273 tcg_reg_alloc_dup(s, op); 5274 break; 5275 case INDEX_op_insn_start: 5276 if (num_insns >= 0) { 5277 size_t off = tcg_current_code_size(s); 5278 s->gen_insn_end_off[num_insns] = off; 5279 /* Assert that we do not overflow our stored offset. */ 5280 assert(s->gen_insn_end_off[num_insns] == off); 5281 } 5282 num_insns++; 5283 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 5284 target_ulong a; 5285 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 5286 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 5287 #else 5288 a = op->args[i]; 5289 #endif 5290 s->gen_insn_data[num_insns][i] = a; 5291 } 5292 break; 5293 case INDEX_op_discard: 5294 temp_dead(s, arg_temp(op->args[0])); 5295 break; 5296 case INDEX_op_set_label: 5297 tcg_reg_alloc_bb_end(s, s->reserved_regs); 5298 tcg_out_label(s, arg_label(op->args[0])); 5299 break; 5300 case INDEX_op_call: 5301 tcg_reg_alloc_call(s, op); 5302 break; 5303 case INDEX_op_exit_tb: 5304 tcg_out_exit_tb(s, op->args[0]); 5305 break; 5306 case INDEX_op_goto_tb: 5307 tcg_out_goto_tb(s, op->args[0]); 5308 break; 5309 case INDEX_op_dup2_vec: 5310 if (tcg_reg_alloc_dup2(s, op)) { 5311 break; 5312 } 5313 /* fall through */ 5314 default: 5315 /* Sanity check that we've not introduced any unhandled opcodes. */ 5316 tcg_debug_assert(tcg_op_supported(opc)); 5317 /* Note: in order to speed up the code, it would be much 5318 faster to have specialized register allocator functions for 5319 some common argument patterns */ 5320 tcg_reg_alloc_op(s, op); 5321 break; 5322 } 5323 /* Test for (pending) buffer overflow. The assumption is that any 5324 one operation beginning below the high water mark cannot overrun 5325 the buffer completely. Thus we can test for overflow after 5326 generating code without having to check during generation. */ 5327 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 5328 return -1; 5329 } 5330 /* Test for TB overflow, as seen by gen_insn_end_off. */ 5331 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 5332 return -2; 5333 } 5334 } 5335 tcg_debug_assert(num_insns >= 0); 5336 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 5337 5338 /* Generate TB finalization at the end of block */ 5339 #ifdef TCG_TARGET_NEED_LDST_LABELS 5340 i = tcg_out_ldst_finalize(s); 5341 if (i < 0) { 5342 return i; 5343 } 5344 #endif 5345 #ifdef TCG_TARGET_NEED_POOL_LABELS 5346 i = tcg_out_pool_finalize(s); 5347 if (i < 0) { 5348 return i; 5349 } 5350 #endif 5351 if (!tcg_resolve_relocs(s)) { 5352 return -2; 5353 } 5354 5355 #ifndef CONFIG_TCG_INTERPRETER 5356 /* flush instruction cache */ 5357 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 5358 (uintptr_t)s->code_buf, 5359 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 5360 #endif 5361 5362 return tcg_current_code_size(s); 5363 } 5364 5365 #ifdef CONFIG_PROFILER 5366 void tcg_dump_info(GString *buf) 5367 { 5368 TCGProfile prof = {}; 5369 const TCGProfile *s; 5370 int64_t tb_count; 5371 int64_t tb_div_count; 5372 int64_t tot; 5373 5374 tcg_profile_snapshot_counters(&prof); 5375 s = &prof; 5376 tb_count = s->tb_count; 5377 tb_div_count = tb_count ? tb_count : 1; 5378 tot = s->interm_time + s->code_time; 5379 5380 g_string_append_printf(buf, "JIT cycles %" PRId64 5381 " (%0.3f s at 2.4 GHz)\n", 5382 tot, tot / 2.4e9); 5383 g_string_append_printf(buf, "translated TBs %" PRId64 5384 " (aborted=%" PRId64 " %0.1f%%)\n", 5385 tb_count, s->tb_count1 - tb_count, 5386 (double)(s->tb_count1 - s->tb_count) 5387 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 5388 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 5389 (double)s->op_count / tb_div_count, s->op_count_max); 5390 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 5391 (double)s->del_op_count / tb_div_count); 5392 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 5393 (double)s->temp_count / tb_div_count, 5394 s->temp_count_max); 5395 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 5396 (double)s->code_out_len / tb_div_count); 5397 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 5398 (double)s->search_out_len / tb_div_count); 5399 5400 g_string_append_printf(buf, "cycles/op %0.1f\n", 5401 s->op_count ? (double)tot / s->op_count : 0); 5402 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 5403 s->code_in_len ? (double)tot / s->code_in_len : 0); 5404 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 5405 s->code_out_len ? (double)tot / s->code_out_len : 0); 5406 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 5407 s->search_out_len ? 5408 (double)tot / s->search_out_len : 0); 5409 if (tot == 0) { 5410 tot = 1; 5411 } 5412 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 5413 (double)s->interm_time / tot * 100.0); 5414 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 5415 (double)s->code_time / tot * 100.0); 5416 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 5417 (double)s->opt_time / (s->code_time ? 5418 s->code_time : 1) 5419 * 100.0); 5420 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 5421 (double)s->la_time / (s->code_time ? 5422 s->code_time : 1) * 100.0); 5423 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 5424 s->restore_count); 5425 g_string_append_printf(buf, " avg cycles %0.1f\n", 5426 s->restore_count ? 5427 (double)s->restore_time / s->restore_count : 0); 5428 } 5429 #else 5430 void tcg_dump_info(GString *buf) 5431 { 5432 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 5433 } 5434 #endif 5435 5436 #ifdef ELF_HOST_MACHINE 5437 /* In order to use this feature, the backend needs to do three things: 5438 5439 (1) Define ELF_HOST_MACHINE to indicate both what value to 5440 put into the ELF image and to indicate support for the feature. 5441 5442 (2) Define tcg_register_jit. This should create a buffer containing 5443 the contents of a .debug_frame section that describes the post- 5444 prologue unwind info for the tcg machine. 5445 5446 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 5447 */ 5448 5449 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 5450 typedef enum { 5451 JIT_NOACTION = 0, 5452 JIT_REGISTER_FN, 5453 JIT_UNREGISTER_FN 5454 } jit_actions_t; 5455 5456 struct jit_code_entry { 5457 struct jit_code_entry *next_entry; 5458 struct jit_code_entry *prev_entry; 5459 const void *symfile_addr; 5460 uint64_t symfile_size; 5461 }; 5462 5463 struct jit_descriptor { 5464 uint32_t version; 5465 uint32_t action_flag; 5466 struct jit_code_entry *relevant_entry; 5467 struct jit_code_entry *first_entry; 5468 }; 5469 5470 void __jit_debug_register_code(void) __attribute__((noinline)); 5471 void __jit_debug_register_code(void) 5472 { 5473 asm(""); 5474 } 5475 5476 /* Must statically initialize the version, because GDB may check 5477 the version before we can set it. */ 5478 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 5479 5480 /* End GDB interface. */ 5481 5482 static int find_string(const char *strtab, const char *str) 5483 { 5484 const char *p = strtab + 1; 5485 5486 while (1) { 5487 if (strcmp(p, str) == 0) { 5488 return p - strtab; 5489 } 5490 p += strlen(p) + 1; 5491 } 5492 } 5493 5494 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 5495 const void *debug_frame, 5496 size_t debug_frame_size) 5497 { 5498 struct __attribute__((packed)) DebugInfo { 5499 uint32_t len; 5500 uint16_t version; 5501 uint32_t abbrev; 5502 uint8_t ptr_size; 5503 uint8_t cu_die; 5504 uint16_t cu_lang; 5505 uintptr_t cu_low_pc; 5506 uintptr_t cu_high_pc; 5507 uint8_t fn_die; 5508 char fn_name[16]; 5509 uintptr_t fn_low_pc; 5510 uintptr_t fn_high_pc; 5511 uint8_t cu_eoc; 5512 }; 5513 5514 struct ElfImage { 5515 ElfW(Ehdr) ehdr; 5516 ElfW(Phdr) phdr; 5517 ElfW(Shdr) shdr[7]; 5518 ElfW(Sym) sym[2]; 5519 struct DebugInfo di; 5520 uint8_t da[24]; 5521 char str[80]; 5522 }; 5523 5524 struct ElfImage *img; 5525 5526 static const struct ElfImage img_template = { 5527 .ehdr = { 5528 .e_ident[EI_MAG0] = ELFMAG0, 5529 .e_ident[EI_MAG1] = ELFMAG1, 5530 .e_ident[EI_MAG2] = ELFMAG2, 5531 .e_ident[EI_MAG3] = ELFMAG3, 5532 .e_ident[EI_CLASS] = ELF_CLASS, 5533 .e_ident[EI_DATA] = ELF_DATA, 5534 .e_ident[EI_VERSION] = EV_CURRENT, 5535 .e_type = ET_EXEC, 5536 .e_machine = ELF_HOST_MACHINE, 5537 .e_version = EV_CURRENT, 5538 .e_phoff = offsetof(struct ElfImage, phdr), 5539 .e_shoff = offsetof(struct ElfImage, shdr), 5540 .e_ehsize = sizeof(ElfW(Shdr)), 5541 .e_phentsize = sizeof(ElfW(Phdr)), 5542 .e_phnum = 1, 5543 .e_shentsize = sizeof(ElfW(Shdr)), 5544 .e_shnum = ARRAY_SIZE(img->shdr), 5545 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 5546 #ifdef ELF_HOST_FLAGS 5547 .e_flags = ELF_HOST_FLAGS, 5548 #endif 5549 #ifdef ELF_OSABI 5550 .e_ident[EI_OSABI] = ELF_OSABI, 5551 #endif 5552 }, 5553 .phdr = { 5554 .p_type = PT_LOAD, 5555 .p_flags = PF_X, 5556 }, 5557 .shdr = { 5558 [0] = { .sh_type = SHT_NULL }, 5559 /* Trick: The contents of code_gen_buffer are not present in 5560 this fake ELF file; that got allocated elsewhere. Therefore 5561 we mark .text as SHT_NOBITS (similar to .bss) so that readers 5562 will not look for contents. We can record any address. */ 5563 [1] = { /* .text */ 5564 .sh_type = SHT_NOBITS, 5565 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 5566 }, 5567 [2] = { /* .debug_info */ 5568 .sh_type = SHT_PROGBITS, 5569 .sh_offset = offsetof(struct ElfImage, di), 5570 .sh_size = sizeof(struct DebugInfo), 5571 }, 5572 [3] = { /* .debug_abbrev */ 5573 .sh_type = SHT_PROGBITS, 5574 .sh_offset = offsetof(struct ElfImage, da), 5575 .sh_size = sizeof(img->da), 5576 }, 5577 [4] = { /* .debug_frame */ 5578 .sh_type = SHT_PROGBITS, 5579 .sh_offset = sizeof(struct ElfImage), 5580 }, 5581 [5] = { /* .symtab */ 5582 .sh_type = SHT_SYMTAB, 5583 .sh_offset = offsetof(struct ElfImage, sym), 5584 .sh_size = sizeof(img->sym), 5585 .sh_info = 1, 5586 .sh_link = ARRAY_SIZE(img->shdr) - 1, 5587 .sh_entsize = sizeof(ElfW(Sym)), 5588 }, 5589 [6] = { /* .strtab */ 5590 .sh_type = SHT_STRTAB, 5591 .sh_offset = offsetof(struct ElfImage, str), 5592 .sh_size = sizeof(img->str), 5593 } 5594 }, 5595 .sym = { 5596 [1] = { /* code_gen_buffer */ 5597 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 5598 .st_shndx = 1, 5599 } 5600 }, 5601 .di = { 5602 .len = sizeof(struct DebugInfo) - 4, 5603 .version = 2, 5604 .ptr_size = sizeof(void *), 5605 .cu_die = 1, 5606 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 5607 .fn_die = 2, 5608 .fn_name = "code_gen_buffer" 5609 }, 5610 .da = { 5611 1, /* abbrev number (the cu) */ 5612 0x11, 1, /* DW_TAG_compile_unit, has children */ 5613 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 5614 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5615 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5616 0, 0, /* end of abbrev */ 5617 2, /* abbrev number (the fn) */ 5618 0x2e, 0, /* DW_TAG_subprogram, no children */ 5619 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 5620 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5621 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5622 0, 0, /* end of abbrev */ 5623 0 /* no more abbrev */ 5624 }, 5625 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 5626 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 5627 }; 5628 5629 /* We only need a single jit entry; statically allocate it. */ 5630 static struct jit_code_entry one_entry; 5631 5632 uintptr_t buf = (uintptr_t)buf_ptr; 5633 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 5634 DebugFrameHeader *dfh; 5635 5636 img = g_malloc(img_size); 5637 *img = img_template; 5638 5639 img->phdr.p_vaddr = buf; 5640 img->phdr.p_paddr = buf; 5641 img->phdr.p_memsz = buf_size; 5642 5643 img->shdr[1].sh_name = find_string(img->str, ".text"); 5644 img->shdr[1].sh_addr = buf; 5645 img->shdr[1].sh_size = buf_size; 5646 5647 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 5648 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 5649 5650 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 5651 img->shdr[4].sh_size = debug_frame_size; 5652 5653 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 5654 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 5655 5656 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 5657 img->sym[1].st_value = buf; 5658 img->sym[1].st_size = buf_size; 5659 5660 img->di.cu_low_pc = buf; 5661 img->di.cu_high_pc = buf + buf_size; 5662 img->di.fn_low_pc = buf; 5663 img->di.fn_high_pc = buf + buf_size; 5664 5665 dfh = (DebugFrameHeader *)(img + 1); 5666 memcpy(dfh, debug_frame, debug_frame_size); 5667 dfh->fde.func_start = buf; 5668 dfh->fde.func_len = buf_size; 5669 5670 #ifdef DEBUG_JIT 5671 /* Enable this block to be able to debug the ELF image file creation. 5672 One can use readelf, objdump, or other inspection utilities. */ 5673 { 5674 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 5675 FILE *f = fopen(jit, "w+b"); 5676 if (f) { 5677 if (fwrite(img, img_size, 1, f) != img_size) { 5678 /* Avoid stupid unused return value warning for fwrite. */ 5679 } 5680 fclose(f); 5681 } 5682 } 5683 #endif 5684 5685 one_entry.symfile_addr = img; 5686 one_entry.symfile_size = img_size; 5687 5688 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 5689 __jit_debug_descriptor.relevant_entry = &one_entry; 5690 __jit_debug_descriptor.first_entry = &one_entry; 5691 __jit_debug_register_code(); 5692 } 5693 #else 5694 /* No support for the feature. Provide the entry point expected by exec.c, 5695 and implement the internal function we declared earlier. */ 5696 5697 static void tcg_register_jit_int(const void *buf, size_t size, 5698 const void *debug_frame, 5699 size_t debug_frame_size) 5700 { 5701 } 5702 5703 void tcg_register_jit(const void *buf, size_t buf_size) 5704 { 5705 } 5706 #endif /* ELF_HOST_MACHINE */ 5707 5708 #if !TCG_TARGET_MAYBE_vec 5709 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 5710 { 5711 g_assert_not_reached(); 5712 } 5713 #endif 5714