1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/cacheflush.h" 38 #include "qemu/cacheinfo.h" 39 #include "qemu/timer.h" 40 41 /* Note: the long term plan is to reduce the dependencies on the QEMU 42 CPU definitions. Currently they are used for qemu_ld/st 43 instructions */ 44 #define NO_CPU_IO_DEFS 45 46 #include "exec/exec-all.h" 47 #include "tcg/tcg-op.h" 48 49 #if UINTPTR_MAX == UINT32_MAX 50 # define ELF_CLASS ELFCLASS32 51 #else 52 # define ELF_CLASS ELFCLASS64 53 #endif 54 #if HOST_BIG_ENDIAN 55 # define ELF_DATA ELFDATA2MSB 56 #else 57 # define ELF_DATA ELFDATA2LSB 58 #endif 59 60 #include "elf.h" 61 #include "exec/log.h" 62 #include "tcg/tcg-ldst.h" 63 #include "tcg/tcg-temp-internal.h" 64 #include "tcg-internal.h" 65 #include "accel/tcg/perf.h" 66 67 /* Forward declarations for functions declared in tcg-target.c.inc and 68 used here. */ 69 static void tcg_target_init(TCGContext *s); 70 static void tcg_target_qemu_prologue(TCGContext *s); 71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 72 intptr_t value, intptr_t addend); 73 74 /* The CIE and FDE header definitions will be common to all hosts. */ 75 typedef struct { 76 uint32_t len __attribute__((aligned((sizeof(void *))))); 77 uint32_t id; 78 uint8_t version; 79 char augmentation[1]; 80 uint8_t code_align; 81 uint8_t data_align; 82 uint8_t return_column; 83 } DebugFrameCIE; 84 85 typedef struct QEMU_PACKED { 86 uint32_t len __attribute__((aligned((sizeof(void *))))); 87 uint32_t cie_offset; 88 uintptr_t func_start; 89 uintptr_t func_len; 90 } DebugFrameFDEHeader; 91 92 typedef struct QEMU_PACKED { 93 DebugFrameCIE cie; 94 DebugFrameFDEHeader fde; 95 } DebugFrameHeader; 96 97 static void tcg_register_jit_int(const void *buf, size_t size, 98 const void *debug_frame, 99 size_t debug_frame_size) 100 __attribute__((unused)); 101 102 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 104 intptr_t arg2); 105 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 106 static void tcg_out_movi(TCGContext *s, TCGType type, 107 TCGReg ret, tcg_target_long arg); 108 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 109 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 110 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg); 111 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg); 112 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg); 113 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg); 114 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 115 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); 116 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); 117 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 118 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); 119 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 120 static void tcg_out_goto_tb(TCGContext *s, int which); 121 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 122 const TCGArg args[TCG_MAX_OP_ARGS], 123 const int const_args[TCG_MAX_OP_ARGS]); 124 #if TCG_TARGET_MAYBE_vec 125 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 126 TCGReg dst, TCGReg src); 127 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 128 TCGReg dst, TCGReg base, intptr_t offset); 129 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 130 TCGReg dst, int64_t arg); 131 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 132 unsigned vecl, unsigned vece, 133 const TCGArg args[TCG_MAX_OP_ARGS], 134 const int const_args[TCG_MAX_OP_ARGS]); 135 #else 136 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 137 TCGReg dst, TCGReg src) 138 { 139 g_assert_not_reached(); 140 } 141 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 142 TCGReg dst, TCGReg base, intptr_t offset) 143 { 144 g_assert_not_reached(); 145 } 146 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 147 TCGReg dst, int64_t arg) 148 { 149 g_assert_not_reached(); 150 } 151 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 152 unsigned vecl, unsigned vece, 153 const TCGArg args[TCG_MAX_OP_ARGS], 154 const int const_args[TCG_MAX_OP_ARGS]) 155 { 156 g_assert_not_reached(); 157 } 158 #endif 159 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 160 intptr_t arg2); 161 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 162 TCGReg base, intptr_t ofs); 163 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 164 const TCGHelperInfo *info); 165 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 166 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 167 #ifdef TCG_TARGET_NEED_LDST_LABELS 168 static int tcg_out_ldst_finalize(TCGContext *s); 169 #endif 170 171 TCGContext tcg_init_ctx; 172 __thread TCGContext *tcg_ctx; 173 174 TCGContext **tcg_ctxs; 175 unsigned int tcg_cur_ctxs; 176 unsigned int tcg_max_ctxs; 177 TCGv_env cpu_env = 0; 178 const void *tcg_code_gen_epilogue; 179 uintptr_t tcg_splitwx_diff; 180 181 #ifndef CONFIG_TCG_INTERPRETER 182 tcg_prologue_fn *tcg_qemu_tb_exec; 183 #endif 184 185 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 186 static TCGRegSet tcg_target_call_clobber_regs; 187 188 #if TCG_TARGET_INSN_UNIT_SIZE == 1 189 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 190 { 191 *s->code_ptr++ = v; 192 } 193 194 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 195 uint8_t v) 196 { 197 *p = v; 198 } 199 #endif 200 201 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 202 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 203 { 204 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 205 *s->code_ptr++ = v; 206 } else { 207 tcg_insn_unit *p = s->code_ptr; 208 memcpy(p, &v, sizeof(v)); 209 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 210 } 211 } 212 213 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 214 uint16_t v) 215 { 216 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 217 *p = v; 218 } else { 219 memcpy(p, &v, sizeof(v)); 220 } 221 } 222 #endif 223 224 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 225 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 226 { 227 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 228 *s->code_ptr++ = v; 229 } else { 230 tcg_insn_unit *p = s->code_ptr; 231 memcpy(p, &v, sizeof(v)); 232 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 233 } 234 } 235 236 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 237 uint32_t v) 238 { 239 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 240 *p = v; 241 } else { 242 memcpy(p, &v, sizeof(v)); 243 } 244 } 245 #endif 246 247 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 248 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 249 { 250 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 251 *s->code_ptr++ = v; 252 } else { 253 tcg_insn_unit *p = s->code_ptr; 254 memcpy(p, &v, sizeof(v)); 255 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 256 } 257 } 258 259 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 260 uint64_t v) 261 { 262 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 263 *p = v; 264 } else { 265 memcpy(p, &v, sizeof(v)); 266 } 267 } 268 #endif 269 270 /* label relocation processing */ 271 272 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 273 TCGLabel *l, intptr_t addend) 274 { 275 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 276 277 r->type = type; 278 r->ptr = code_ptr; 279 r->addend = addend; 280 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 281 } 282 283 static void tcg_out_label(TCGContext *s, TCGLabel *l) 284 { 285 tcg_debug_assert(!l->has_value); 286 l->has_value = 1; 287 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 288 } 289 290 TCGLabel *gen_new_label(void) 291 { 292 TCGContext *s = tcg_ctx; 293 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 294 295 memset(l, 0, sizeof(TCGLabel)); 296 l->id = s->nb_labels++; 297 QSIMPLEQ_INIT(&l->branches); 298 QSIMPLEQ_INIT(&l->relocs); 299 300 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 301 302 return l; 303 } 304 305 static bool tcg_resolve_relocs(TCGContext *s) 306 { 307 TCGLabel *l; 308 309 QSIMPLEQ_FOREACH(l, &s->labels, next) { 310 TCGRelocation *r; 311 uintptr_t value = l->u.value; 312 313 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 314 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 315 return false; 316 } 317 } 318 } 319 return true; 320 } 321 322 static void set_jmp_reset_offset(TCGContext *s, int which) 323 { 324 /* 325 * We will check for overflow at the end of the opcode loop in 326 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 327 */ 328 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 329 } 330 331 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 332 { 333 /* 334 * We will check for overflow at the end of the opcode loop in 335 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 336 */ 337 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 338 } 339 340 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 341 { 342 /* 343 * Return the read-execute version of the pointer, for the benefit 344 * of any pc-relative addressing mode. 345 */ 346 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 347 } 348 349 /* Signal overflow, starting over with fewer guest insns. */ 350 static G_NORETURN 351 void tcg_raise_tb_overflow(TCGContext *s) 352 { 353 siglongjmp(s->jmp_trans, -2); 354 } 355 356 typedef struct TCGMovExtend { 357 TCGReg dst; 358 TCGReg src; 359 TCGType dst_type; 360 TCGType src_type; 361 MemOp src_ext; 362 } TCGMovExtend; 363 364 /** 365 * tcg_out_movext -- move and extend 366 * @s: tcg context 367 * @dst_type: integral type for destination 368 * @dst: destination register 369 * @src_type: integral type for source 370 * @src_ext: extension to apply to source 371 * @src: source register 372 * 373 * Move or extend @src into @dst, depending on @src_ext and the types. 374 */ 375 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, 376 TCGType src_type, MemOp src_ext, TCGReg src) 377 { 378 switch (src_ext) { 379 case MO_UB: 380 tcg_out_ext8u(s, dst, src); 381 break; 382 case MO_SB: 383 tcg_out_ext8s(s, dst_type, dst, src); 384 break; 385 case MO_UW: 386 tcg_out_ext16u(s, dst, src); 387 break; 388 case MO_SW: 389 tcg_out_ext16s(s, dst_type, dst, src); 390 break; 391 case MO_UL: 392 case MO_SL: 393 if (dst_type == TCG_TYPE_I32) { 394 if (src_type == TCG_TYPE_I32) { 395 tcg_out_mov(s, TCG_TYPE_I32, dst, src); 396 } else { 397 tcg_out_extrl_i64_i32(s, dst, src); 398 } 399 } else if (src_type == TCG_TYPE_I32) { 400 if (src_ext & MO_SIGN) { 401 tcg_out_exts_i32_i64(s, dst, src); 402 } else { 403 tcg_out_extu_i32_i64(s, dst, src); 404 } 405 } else { 406 if (src_ext & MO_SIGN) { 407 tcg_out_ext32s(s, dst, src); 408 } else { 409 tcg_out_ext32u(s, dst, src); 410 } 411 } 412 break; 413 case MO_UQ: 414 tcg_debug_assert(TCG_TARGET_REG_BITS == 64); 415 if (dst_type == TCG_TYPE_I32) { 416 tcg_out_extrl_i64_i32(s, dst, src); 417 } else { 418 tcg_out_mov(s, TCG_TYPE_I64, dst, src); 419 } 420 break; 421 default: 422 g_assert_not_reached(); 423 } 424 } 425 426 /* Minor variations on a theme, using a structure. */ 427 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, 428 TCGReg src) 429 { 430 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); 431 } 432 433 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) 434 { 435 tcg_out_movext1_new_src(s, i, i->src); 436 } 437 438 /** 439 * tcg_out_movext2 -- move and extend two pair 440 * @s: tcg context 441 * @i1: first move description 442 * @i2: second move description 443 * @scratch: temporary register, or -1 for none 444 * 445 * As tcg_out_movext, for both @i1 and @i2, caring for overlap 446 * between the sources and destinations. 447 */ 448 449 static void __attribute__((unused)) 450 tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, 451 const TCGMovExtend *i2, int scratch) 452 { 453 TCGReg src1 = i1->src; 454 TCGReg src2 = i2->src; 455 456 if (i1->dst != src2) { 457 tcg_out_movext1(s, i1); 458 tcg_out_movext1(s, i2); 459 return; 460 } 461 if (i2->dst == src1) { 462 TCGType src1_type = i1->src_type; 463 TCGType src2_type = i2->src_type; 464 465 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { 466 /* The data is now in the correct registers, now extend. */ 467 src1 = i2->src; 468 src2 = i1->src; 469 } else { 470 tcg_debug_assert(scratch >= 0); 471 tcg_out_mov(s, src1_type, scratch, src1); 472 src1 = scratch; 473 } 474 } 475 tcg_out_movext1_new_src(s, i2, src2); 476 tcg_out_movext1_new_src(s, i1, src1); 477 } 478 479 #define C_PFX1(P, A) P##A 480 #define C_PFX2(P, A, B) P##A##_##B 481 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 482 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 483 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 484 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 485 486 /* Define an enumeration for the various combinations. */ 487 488 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 489 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 490 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 491 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 492 493 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 494 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 495 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 496 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 497 498 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 499 500 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 501 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 502 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 503 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 504 505 typedef enum { 506 #include "tcg-target-con-set.h" 507 } TCGConstraintSetIndex; 508 509 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 510 511 #undef C_O0_I1 512 #undef C_O0_I2 513 #undef C_O0_I3 514 #undef C_O0_I4 515 #undef C_O1_I1 516 #undef C_O1_I2 517 #undef C_O1_I3 518 #undef C_O1_I4 519 #undef C_N1_I2 520 #undef C_O2_I1 521 #undef C_O2_I2 522 #undef C_O2_I3 523 #undef C_O2_I4 524 525 /* Put all of the constraint sets into an array, indexed by the enum. */ 526 527 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 528 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 529 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 530 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 531 532 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 533 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 534 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 535 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 536 537 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 538 539 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 540 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 541 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 542 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 543 544 static const TCGTargetOpDef constraint_sets[] = { 545 #include "tcg-target-con-set.h" 546 }; 547 548 549 #undef C_O0_I1 550 #undef C_O0_I2 551 #undef C_O0_I3 552 #undef C_O0_I4 553 #undef C_O1_I1 554 #undef C_O1_I2 555 #undef C_O1_I3 556 #undef C_O1_I4 557 #undef C_N1_I2 558 #undef C_O2_I1 559 #undef C_O2_I2 560 #undef C_O2_I3 561 #undef C_O2_I4 562 563 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 564 565 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 566 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 567 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 568 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 569 570 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 571 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 572 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 573 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 574 575 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 576 577 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 578 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 579 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 580 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 581 582 #include "tcg-target.c.inc" 583 584 static void alloc_tcg_plugin_context(TCGContext *s) 585 { 586 #ifdef CONFIG_PLUGIN 587 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 588 s->plugin_tb->insns = 589 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 590 #endif 591 } 592 593 /* 594 * All TCG threads except the parent (i.e. the one that called tcg_context_init 595 * and registered the target's TCG globals) must register with this function 596 * before initiating translation. 597 * 598 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 599 * of tcg_region_init() for the reasoning behind this. 600 * 601 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 602 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 603 * is not used anymore for translation once this function is called. 604 * 605 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 606 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 607 */ 608 #ifdef CONFIG_USER_ONLY 609 void tcg_register_thread(void) 610 { 611 tcg_ctx = &tcg_init_ctx; 612 } 613 #else 614 void tcg_register_thread(void) 615 { 616 TCGContext *s = g_malloc(sizeof(*s)); 617 unsigned int i, n; 618 619 *s = tcg_init_ctx; 620 621 /* Relink mem_base. */ 622 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 623 if (tcg_init_ctx.temps[i].mem_base) { 624 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 625 tcg_debug_assert(b >= 0 && b < n); 626 s->temps[i].mem_base = &s->temps[b]; 627 } 628 } 629 630 /* Claim an entry in tcg_ctxs */ 631 n = qatomic_fetch_inc(&tcg_cur_ctxs); 632 g_assert(n < tcg_max_ctxs); 633 qatomic_set(&tcg_ctxs[n], s); 634 635 if (n > 0) { 636 alloc_tcg_plugin_context(s); 637 tcg_region_initial_alloc(s); 638 } 639 640 tcg_ctx = s; 641 } 642 #endif /* !CONFIG_USER_ONLY */ 643 644 /* pool based memory allocation */ 645 void *tcg_malloc_internal(TCGContext *s, int size) 646 { 647 TCGPool *p; 648 int pool_size; 649 650 if (size > TCG_POOL_CHUNK_SIZE) { 651 /* big malloc: insert a new pool (XXX: could optimize) */ 652 p = g_malloc(sizeof(TCGPool) + size); 653 p->size = size; 654 p->next = s->pool_first_large; 655 s->pool_first_large = p; 656 return p->data; 657 } else { 658 p = s->pool_current; 659 if (!p) { 660 p = s->pool_first; 661 if (!p) 662 goto new_pool; 663 } else { 664 if (!p->next) { 665 new_pool: 666 pool_size = TCG_POOL_CHUNK_SIZE; 667 p = g_malloc(sizeof(TCGPool) + pool_size); 668 p->size = pool_size; 669 p->next = NULL; 670 if (s->pool_current) { 671 s->pool_current->next = p; 672 } else { 673 s->pool_first = p; 674 } 675 } else { 676 p = p->next; 677 } 678 } 679 } 680 s->pool_current = p; 681 s->pool_cur = p->data + size; 682 s->pool_end = p->data + p->size; 683 return p->data; 684 } 685 686 void tcg_pool_reset(TCGContext *s) 687 { 688 TCGPool *p, *t; 689 for (p = s->pool_first_large; p; p = t) { 690 t = p->next; 691 g_free(p); 692 } 693 s->pool_first_large = NULL; 694 s->pool_cur = s->pool_end = NULL; 695 s->pool_current = NULL; 696 } 697 698 #include "exec/helper-proto.h" 699 700 static TCGHelperInfo all_helpers[] = { 701 #include "exec/helper-tcg.h" 702 }; 703 static GHashTable *helper_table; 704 705 #ifdef CONFIG_TCG_INTERPRETER 706 static ffi_type *typecode_to_ffi(int argmask) 707 { 708 /* 709 * libffi does not support __int128_t, so we have forced Int128 710 * to use the structure definition instead of the builtin type. 711 */ 712 static ffi_type *ffi_type_i128_elements[3] = { 713 &ffi_type_uint64, 714 &ffi_type_uint64, 715 NULL 716 }; 717 static ffi_type ffi_type_i128 = { 718 .size = 16, 719 .alignment = __alignof__(Int128), 720 .type = FFI_TYPE_STRUCT, 721 .elements = ffi_type_i128_elements, 722 }; 723 724 switch (argmask) { 725 case dh_typecode_void: 726 return &ffi_type_void; 727 case dh_typecode_i32: 728 return &ffi_type_uint32; 729 case dh_typecode_s32: 730 return &ffi_type_sint32; 731 case dh_typecode_i64: 732 return &ffi_type_uint64; 733 case dh_typecode_s64: 734 return &ffi_type_sint64; 735 case dh_typecode_ptr: 736 return &ffi_type_pointer; 737 case dh_typecode_i128: 738 return &ffi_type_i128; 739 } 740 g_assert_not_reached(); 741 } 742 743 static void init_ffi_layouts(void) 744 { 745 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 746 GHashTable *ffi_table = g_hash_table_new(NULL, NULL); 747 748 for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 749 TCGHelperInfo *info = &all_helpers[i]; 750 unsigned typemask = info->typemask; 751 gpointer hash = (gpointer)(uintptr_t)typemask; 752 struct { 753 ffi_cif cif; 754 ffi_type *args[]; 755 } *ca; 756 ffi_status status; 757 int nargs; 758 ffi_cif *cif; 759 760 cif = g_hash_table_lookup(ffi_table, hash); 761 if (cif) { 762 info->cif = cif; 763 continue; 764 } 765 766 /* Ignoring the return type, find the last non-zero field. */ 767 nargs = 32 - clz32(typemask >> 3); 768 nargs = DIV_ROUND_UP(nargs, 3); 769 assert(nargs <= MAX_CALL_IARGS); 770 771 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 772 ca->cif.rtype = typecode_to_ffi(typemask & 7); 773 ca->cif.nargs = nargs; 774 775 if (nargs != 0) { 776 ca->cif.arg_types = ca->args; 777 for (int j = 0; j < nargs; ++j) { 778 int typecode = extract32(typemask, (j + 1) * 3, 3); 779 ca->args[j] = typecode_to_ffi(typecode); 780 } 781 } 782 783 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 784 ca->cif.rtype, ca->cif.arg_types); 785 assert(status == FFI_OK); 786 787 cif = &ca->cif; 788 info->cif = cif; 789 g_hash_table_insert(ffi_table, hash, (gpointer)cif); 790 } 791 792 g_hash_table_destroy(ffi_table); 793 } 794 #endif /* CONFIG_TCG_INTERPRETER */ 795 796 typedef struct TCGCumulativeArgs { 797 int arg_idx; /* tcg_gen_callN args[] */ 798 int info_in_idx; /* TCGHelperInfo in[] */ 799 int arg_slot; /* regs+stack slot */ 800 int ref_slot; /* stack slots for references */ 801 } TCGCumulativeArgs; 802 803 static void layout_arg_even(TCGCumulativeArgs *cum) 804 { 805 cum->arg_slot += cum->arg_slot & 1; 806 } 807 808 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 809 TCGCallArgumentKind kind) 810 { 811 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 812 813 *loc = (TCGCallArgumentLoc){ 814 .kind = kind, 815 .arg_idx = cum->arg_idx, 816 .arg_slot = cum->arg_slot, 817 }; 818 cum->info_in_idx++; 819 cum->arg_slot++; 820 } 821 822 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 823 TCGHelperInfo *info, int n) 824 { 825 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 826 827 for (int i = 0; i < n; ++i) { 828 /* Layout all using the same arg_idx, adjusting the subindex. */ 829 loc[i] = (TCGCallArgumentLoc){ 830 .kind = TCG_CALL_ARG_NORMAL, 831 .arg_idx = cum->arg_idx, 832 .tmp_subindex = i, 833 .arg_slot = cum->arg_slot + i, 834 }; 835 } 836 cum->info_in_idx += n; 837 cum->arg_slot += n; 838 } 839 840 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 841 { 842 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 843 int n = 128 / TCG_TARGET_REG_BITS; 844 845 /* The first subindex carries the pointer. */ 846 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 847 848 /* 849 * The callee is allowed to clobber memory associated with 850 * structure pass by-reference. Therefore we must make copies. 851 * Allocate space from "ref_slot", which will be adjusted to 852 * follow the parameters on the stack. 853 */ 854 loc[0].ref_slot = cum->ref_slot; 855 856 /* 857 * Subsequent words also go into the reference slot, but 858 * do not accumulate into the regular arguments. 859 */ 860 for (int i = 1; i < n; ++i) { 861 loc[i] = (TCGCallArgumentLoc){ 862 .kind = TCG_CALL_ARG_BY_REF_N, 863 .arg_idx = cum->arg_idx, 864 .tmp_subindex = i, 865 .ref_slot = cum->ref_slot + i, 866 }; 867 } 868 cum->info_in_idx += n; 869 cum->ref_slot += n; 870 } 871 872 static void init_call_layout(TCGHelperInfo *info) 873 { 874 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 875 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 876 unsigned typemask = info->typemask; 877 unsigned typecode; 878 TCGCumulativeArgs cum = { }; 879 880 /* 881 * Parse and place any function return value. 882 */ 883 typecode = typemask & 7; 884 switch (typecode) { 885 case dh_typecode_void: 886 info->nr_out = 0; 887 break; 888 case dh_typecode_i32: 889 case dh_typecode_s32: 890 case dh_typecode_ptr: 891 info->nr_out = 1; 892 info->out_kind = TCG_CALL_RET_NORMAL; 893 break; 894 case dh_typecode_i64: 895 case dh_typecode_s64: 896 info->nr_out = 64 / TCG_TARGET_REG_BITS; 897 info->out_kind = TCG_CALL_RET_NORMAL; 898 /* Query the last register now to trigger any assert early. */ 899 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 900 break; 901 case dh_typecode_i128: 902 info->nr_out = 128 / TCG_TARGET_REG_BITS; 903 info->out_kind = TCG_TARGET_CALL_RET_I128; 904 switch (TCG_TARGET_CALL_RET_I128) { 905 case TCG_CALL_RET_NORMAL: 906 /* Query the last register now to trigger any assert early. */ 907 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 908 break; 909 case TCG_CALL_RET_BY_VEC: 910 /* Query the single register now to trigger any assert early. */ 911 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 912 break; 913 case TCG_CALL_RET_BY_REF: 914 /* 915 * Allocate the first argument to the output. 916 * We don't need to store this anywhere, just make it 917 * unavailable for use in the input loop below. 918 */ 919 cum.arg_slot = 1; 920 break; 921 default: 922 qemu_build_not_reached(); 923 } 924 break; 925 default: 926 g_assert_not_reached(); 927 } 928 929 /* 930 * Parse and place function arguments. 931 */ 932 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 933 TCGCallArgumentKind kind; 934 TCGType type; 935 936 typecode = typemask & 7; 937 switch (typecode) { 938 case dh_typecode_i32: 939 case dh_typecode_s32: 940 type = TCG_TYPE_I32; 941 break; 942 case dh_typecode_i64: 943 case dh_typecode_s64: 944 type = TCG_TYPE_I64; 945 break; 946 case dh_typecode_ptr: 947 type = TCG_TYPE_PTR; 948 break; 949 case dh_typecode_i128: 950 type = TCG_TYPE_I128; 951 break; 952 default: 953 g_assert_not_reached(); 954 } 955 956 switch (type) { 957 case TCG_TYPE_I32: 958 switch (TCG_TARGET_CALL_ARG_I32) { 959 case TCG_CALL_ARG_EVEN: 960 layout_arg_even(&cum); 961 /* fall through */ 962 case TCG_CALL_ARG_NORMAL: 963 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 964 break; 965 case TCG_CALL_ARG_EXTEND: 966 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 967 layout_arg_1(&cum, info, kind); 968 break; 969 default: 970 qemu_build_not_reached(); 971 } 972 break; 973 974 case TCG_TYPE_I64: 975 switch (TCG_TARGET_CALL_ARG_I64) { 976 case TCG_CALL_ARG_EVEN: 977 layout_arg_even(&cum); 978 /* fall through */ 979 case TCG_CALL_ARG_NORMAL: 980 if (TCG_TARGET_REG_BITS == 32) { 981 layout_arg_normal_n(&cum, info, 2); 982 } else { 983 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 984 } 985 break; 986 default: 987 qemu_build_not_reached(); 988 } 989 break; 990 991 case TCG_TYPE_I128: 992 switch (TCG_TARGET_CALL_ARG_I128) { 993 case TCG_CALL_ARG_EVEN: 994 layout_arg_even(&cum); 995 /* fall through */ 996 case TCG_CALL_ARG_NORMAL: 997 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 998 break; 999 case TCG_CALL_ARG_BY_REF: 1000 layout_arg_by_ref(&cum, info); 1001 break; 1002 default: 1003 qemu_build_not_reached(); 1004 } 1005 break; 1006 1007 default: 1008 g_assert_not_reached(); 1009 } 1010 } 1011 info->nr_in = cum.info_in_idx; 1012 1013 /* Validate that we didn't overrun the input array. */ 1014 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 1015 /* Validate the backend has enough argument space. */ 1016 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 1017 1018 /* 1019 * Relocate the "ref_slot" area to the end of the parameters. 1020 * Minimizing this stack offset helps code size for x86, 1021 * which has a signed 8-bit offset encoding. 1022 */ 1023 if (cum.ref_slot != 0) { 1024 int ref_base = 0; 1025 1026 if (cum.arg_slot > max_reg_slots) { 1027 int align = __alignof(Int128) / sizeof(tcg_target_long); 1028 1029 ref_base = cum.arg_slot - max_reg_slots; 1030 if (align > 1) { 1031 ref_base = ROUND_UP(ref_base, align); 1032 } 1033 } 1034 assert(ref_base + cum.ref_slot <= max_stk_slots); 1035 1036 if (ref_base != 0) { 1037 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 1038 TCGCallArgumentLoc *loc = &info->in[i]; 1039 switch (loc->kind) { 1040 case TCG_CALL_ARG_BY_REF: 1041 case TCG_CALL_ARG_BY_REF_N: 1042 loc->ref_slot += ref_base; 1043 break; 1044 default: 1045 break; 1046 } 1047 } 1048 } 1049 } 1050 } 1051 1052 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1053 static void process_op_defs(TCGContext *s); 1054 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1055 TCGReg reg, const char *name); 1056 1057 static void tcg_context_init(unsigned max_cpus) 1058 { 1059 TCGContext *s = &tcg_init_ctx; 1060 int op, total_args, n, i; 1061 TCGOpDef *def; 1062 TCGArgConstraint *args_ct; 1063 TCGTemp *ts; 1064 1065 memset(s, 0, sizeof(*s)); 1066 s->nb_globals = 0; 1067 1068 /* Count total number of arguments and allocate the corresponding 1069 space */ 1070 total_args = 0; 1071 for(op = 0; op < NB_OPS; op++) { 1072 def = &tcg_op_defs[op]; 1073 n = def->nb_iargs + def->nb_oargs; 1074 total_args += n; 1075 } 1076 1077 args_ct = g_new0(TCGArgConstraint, total_args); 1078 1079 for(op = 0; op < NB_OPS; op++) { 1080 def = &tcg_op_defs[op]; 1081 def->args_ct = args_ct; 1082 n = def->nb_iargs + def->nb_oargs; 1083 args_ct += n; 1084 } 1085 1086 /* Register helpers. */ 1087 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 1088 helper_table = g_hash_table_new(NULL, NULL); 1089 1090 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 1091 init_call_layout(&all_helpers[i]); 1092 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 1093 (gpointer)&all_helpers[i]); 1094 } 1095 1096 #ifdef CONFIG_TCG_INTERPRETER 1097 init_ffi_layouts(); 1098 #endif 1099 1100 tcg_target_init(s); 1101 process_op_defs(s); 1102 1103 /* Reverse the order of the saved registers, assuming they're all at 1104 the start of tcg_target_reg_alloc_order. */ 1105 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1106 int r = tcg_target_reg_alloc_order[n]; 1107 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1108 break; 1109 } 1110 } 1111 for (i = 0; i < n; ++i) { 1112 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1113 } 1114 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1115 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1116 } 1117 1118 alloc_tcg_plugin_context(s); 1119 1120 tcg_ctx = s; 1121 /* 1122 * In user-mode we simply share the init context among threads, since we 1123 * use a single region. See the documentation tcg_region_init() for the 1124 * reasoning behind this. 1125 * In softmmu we will have at most max_cpus TCG threads. 1126 */ 1127 #ifdef CONFIG_USER_ONLY 1128 tcg_ctxs = &tcg_ctx; 1129 tcg_cur_ctxs = 1; 1130 tcg_max_ctxs = 1; 1131 #else 1132 tcg_max_ctxs = max_cpus; 1133 tcg_ctxs = g_new0(TCGContext *, max_cpus); 1134 #endif 1135 1136 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1137 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1138 cpu_env = temp_tcgv_ptr(ts); 1139 } 1140 1141 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 1142 { 1143 tcg_context_init(max_cpus); 1144 tcg_region_init(tb_size, splitwx, max_cpus); 1145 } 1146 1147 /* 1148 * Allocate TBs right before their corresponding translated code, making 1149 * sure that TBs and code are on different cache lines. 1150 */ 1151 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1152 { 1153 uintptr_t align = qemu_icache_linesize; 1154 TranslationBlock *tb; 1155 void *next; 1156 1157 retry: 1158 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1159 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1160 1161 if (unlikely(next > s->code_gen_highwater)) { 1162 if (tcg_region_alloc(s)) { 1163 return NULL; 1164 } 1165 goto retry; 1166 } 1167 qatomic_set(&s->code_gen_ptr, next); 1168 s->data_gen_ptr = NULL; 1169 return tb; 1170 } 1171 1172 void tcg_prologue_init(TCGContext *s) 1173 { 1174 size_t prologue_size; 1175 1176 s->code_ptr = s->code_gen_ptr; 1177 s->code_buf = s->code_gen_ptr; 1178 s->data_gen_ptr = NULL; 1179 1180 #ifndef CONFIG_TCG_INTERPRETER 1181 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1182 #endif 1183 1184 #ifdef TCG_TARGET_NEED_POOL_LABELS 1185 s->pool_labels = NULL; 1186 #endif 1187 1188 qemu_thread_jit_write(); 1189 /* Generate the prologue. */ 1190 tcg_target_qemu_prologue(s); 1191 1192 #ifdef TCG_TARGET_NEED_POOL_LABELS 1193 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1194 { 1195 int result = tcg_out_pool_finalize(s); 1196 tcg_debug_assert(result == 0); 1197 } 1198 #endif 1199 1200 prologue_size = tcg_current_code_size(s); 1201 perf_report_prologue(s->code_gen_ptr, prologue_size); 1202 1203 #ifndef CONFIG_TCG_INTERPRETER 1204 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1205 (uintptr_t)s->code_buf, prologue_size); 1206 #endif 1207 1208 #ifdef DEBUG_DISAS 1209 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1210 FILE *logfile = qemu_log_trylock(); 1211 if (logfile) { 1212 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1213 if (s->data_gen_ptr) { 1214 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1215 size_t data_size = prologue_size - code_size; 1216 size_t i; 1217 1218 disas(logfile, s->code_gen_ptr, code_size); 1219 1220 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1221 if (sizeof(tcg_target_ulong) == 8) { 1222 fprintf(logfile, 1223 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1224 (uintptr_t)s->data_gen_ptr + i, 1225 *(uint64_t *)(s->data_gen_ptr + i)); 1226 } else { 1227 fprintf(logfile, 1228 "0x%08" PRIxPTR ": .long 0x%08x\n", 1229 (uintptr_t)s->data_gen_ptr + i, 1230 *(uint32_t *)(s->data_gen_ptr + i)); 1231 } 1232 } 1233 } else { 1234 disas(logfile, s->code_gen_ptr, prologue_size); 1235 } 1236 fprintf(logfile, "\n"); 1237 qemu_log_unlock(logfile); 1238 } 1239 } 1240 #endif 1241 1242 #ifndef CONFIG_TCG_INTERPRETER 1243 /* 1244 * Assert that goto_ptr is implemented completely, setting an epilogue. 1245 * For tci, we use NULL as the signal to return from the interpreter, 1246 * so skip this check. 1247 */ 1248 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1249 #endif 1250 1251 tcg_region_prologue_set(s); 1252 } 1253 1254 void tcg_func_start(TCGContext *s) 1255 { 1256 tcg_pool_reset(s); 1257 s->nb_temps = s->nb_globals; 1258 1259 /* No temps have been previously allocated for size or locality. */ 1260 memset(s->free_temps, 0, sizeof(s->free_temps)); 1261 1262 /* No constant temps have been previously allocated. */ 1263 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1264 if (s->const_table[i]) { 1265 g_hash_table_remove_all(s->const_table[i]); 1266 } 1267 } 1268 1269 s->nb_ops = 0; 1270 s->nb_labels = 0; 1271 s->current_frame_offset = s->frame_start; 1272 1273 #ifdef CONFIG_DEBUG_TCG 1274 s->goto_tb_issue_mask = 0; 1275 #endif 1276 1277 QTAILQ_INIT(&s->ops); 1278 QTAILQ_INIT(&s->free_ops); 1279 QSIMPLEQ_INIT(&s->labels); 1280 } 1281 1282 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1283 { 1284 int n = s->nb_temps++; 1285 1286 if (n >= TCG_MAX_TEMPS) { 1287 tcg_raise_tb_overflow(s); 1288 } 1289 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1290 } 1291 1292 static TCGTemp *tcg_global_alloc(TCGContext *s) 1293 { 1294 TCGTemp *ts; 1295 1296 tcg_debug_assert(s->nb_globals == s->nb_temps); 1297 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1298 s->nb_globals++; 1299 ts = tcg_temp_alloc(s); 1300 ts->kind = TEMP_GLOBAL; 1301 1302 return ts; 1303 } 1304 1305 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1306 TCGReg reg, const char *name) 1307 { 1308 TCGTemp *ts; 1309 1310 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); 1311 1312 ts = tcg_global_alloc(s); 1313 ts->base_type = type; 1314 ts->type = type; 1315 ts->kind = TEMP_FIXED; 1316 ts->reg = reg; 1317 ts->name = name; 1318 tcg_regset_set_reg(s->reserved_regs, reg); 1319 1320 return ts; 1321 } 1322 1323 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1324 { 1325 s->frame_start = start; 1326 s->frame_end = start + size; 1327 s->frame_temp 1328 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1329 } 1330 1331 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1332 intptr_t offset, const char *name) 1333 { 1334 TCGContext *s = tcg_ctx; 1335 TCGTemp *base_ts = tcgv_ptr_temp(base); 1336 TCGTemp *ts = tcg_global_alloc(s); 1337 int indirect_reg = 0; 1338 1339 switch (base_ts->kind) { 1340 case TEMP_FIXED: 1341 break; 1342 case TEMP_GLOBAL: 1343 /* We do not support double-indirect registers. */ 1344 tcg_debug_assert(!base_ts->indirect_reg); 1345 base_ts->indirect_base = 1; 1346 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1347 ? 2 : 1); 1348 indirect_reg = 1; 1349 break; 1350 default: 1351 g_assert_not_reached(); 1352 } 1353 1354 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1355 TCGTemp *ts2 = tcg_global_alloc(s); 1356 char buf[64]; 1357 1358 ts->base_type = TCG_TYPE_I64; 1359 ts->type = TCG_TYPE_I32; 1360 ts->indirect_reg = indirect_reg; 1361 ts->mem_allocated = 1; 1362 ts->mem_base = base_ts; 1363 ts->mem_offset = offset; 1364 pstrcpy(buf, sizeof(buf), name); 1365 pstrcat(buf, sizeof(buf), "_0"); 1366 ts->name = strdup(buf); 1367 1368 tcg_debug_assert(ts2 == ts + 1); 1369 ts2->base_type = TCG_TYPE_I64; 1370 ts2->type = TCG_TYPE_I32; 1371 ts2->indirect_reg = indirect_reg; 1372 ts2->mem_allocated = 1; 1373 ts2->mem_base = base_ts; 1374 ts2->mem_offset = offset + 4; 1375 ts2->temp_subindex = 1; 1376 pstrcpy(buf, sizeof(buf), name); 1377 pstrcat(buf, sizeof(buf), "_1"); 1378 ts2->name = strdup(buf); 1379 } else { 1380 ts->base_type = type; 1381 ts->type = type; 1382 ts->indirect_reg = indirect_reg; 1383 ts->mem_allocated = 1; 1384 ts->mem_base = base_ts; 1385 ts->mem_offset = offset; 1386 ts->name = name; 1387 } 1388 return ts; 1389 } 1390 1391 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1392 { 1393 TCGContext *s = tcg_ctx; 1394 TCGTemp *ts; 1395 int n; 1396 1397 if (kind == TEMP_EBB) { 1398 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1399 1400 if (idx < TCG_MAX_TEMPS) { 1401 /* There is already an available temp with the right type. */ 1402 clear_bit(idx, s->free_temps[type].l); 1403 1404 ts = &s->temps[idx]; 1405 ts->temp_allocated = 1; 1406 tcg_debug_assert(ts->base_type == type); 1407 tcg_debug_assert(ts->kind == kind); 1408 return ts; 1409 } 1410 } else { 1411 tcg_debug_assert(kind == TEMP_TB); 1412 } 1413 1414 switch (type) { 1415 case TCG_TYPE_I32: 1416 case TCG_TYPE_V64: 1417 case TCG_TYPE_V128: 1418 case TCG_TYPE_V256: 1419 n = 1; 1420 break; 1421 case TCG_TYPE_I64: 1422 n = 64 / TCG_TARGET_REG_BITS; 1423 break; 1424 case TCG_TYPE_I128: 1425 n = 128 / TCG_TARGET_REG_BITS; 1426 break; 1427 default: 1428 g_assert_not_reached(); 1429 } 1430 1431 ts = tcg_temp_alloc(s); 1432 ts->base_type = type; 1433 ts->temp_allocated = 1; 1434 ts->kind = kind; 1435 1436 if (n == 1) { 1437 ts->type = type; 1438 } else { 1439 ts->type = TCG_TYPE_REG; 1440 1441 for (int i = 1; i < n; ++i) { 1442 TCGTemp *ts2 = tcg_temp_alloc(s); 1443 1444 tcg_debug_assert(ts2 == ts + i); 1445 ts2->base_type = type; 1446 ts2->type = TCG_TYPE_REG; 1447 ts2->temp_allocated = 1; 1448 ts2->temp_subindex = i; 1449 ts2->kind = kind; 1450 } 1451 } 1452 return ts; 1453 } 1454 1455 TCGv_vec tcg_temp_new_vec(TCGType type) 1456 { 1457 TCGTemp *t; 1458 1459 #ifdef CONFIG_DEBUG_TCG 1460 switch (type) { 1461 case TCG_TYPE_V64: 1462 assert(TCG_TARGET_HAS_v64); 1463 break; 1464 case TCG_TYPE_V128: 1465 assert(TCG_TARGET_HAS_v128); 1466 break; 1467 case TCG_TYPE_V256: 1468 assert(TCG_TARGET_HAS_v256); 1469 break; 1470 default: 1471 g_assert_not_reached(); 1472 } 1473 #endif 1474 1475 t = tcg_temp_new_internal(type, TEMP_EBB); 1476 return temp_tcgv_vec(t); 1477 } 1478 1479 /* Create a new temp of the same type as an existing temp. */ 1480 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1481 { 1482 TCGTemp *t = tcgv_vec_temp(match); 1483 1484 tcg_debug_assert(t->temp_allocated != 0); 1485 1486 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 1487 return temp_tcgv_vec(t); 1488 } 1489 1490 void tcg_temp_free_internal(TCGTemp *ts) 1491 { 1492 TCGContext *s = tcg_ctx; 1493 1494 switch (ts->kind) { 1495 case TEMP_CONST: 1496 case TEMP_TB: 1497 /* Silently ignore free. */ 1498 break; 1499 case TEMP_EBB: 1500 tcg_debug_assert(ts->temp_allocated != 0); 1501 ts->temp_allocated = 0; 1502 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 1503 break; 1504 default: 1505 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 1506 g_assert_not_reached(); 1507 } 1508 } 1509 1510 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1511 { 1512 TCGContext *s = tcg_ctx; 1513 GHashTable *h = s->const_table[type]; 1514 TCGTemp *ts; 1515 1516 if (h == NULL) { 1517 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1518 s->const_table[type] = h; 1519 } 1520 1521 ts = g_hash_table_lookup(h, &val); 1522 if (ts == NULL) { 1523 int64_t *val_ptr; 1524 1525 ts = tcg_temp_alloc(s); 1526 1527 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1528 TCGTemp *ts2 = tcg_temp_alloc(s); 1529 1530 tcg_debug_assert(ts2 == ts + 1); 1531 1532 ts->base_type = TCG_TYPE_I64; 1533 ts->type = TCG_TYPE_I32; 1534 ts->kind = TEMP_CONST; 1535 ts->temp_allocated = 1; 1536 1537 ts2->base_type = TCG_TYPE_I64; 1538 ts2->type = TCG_TYPE_I32; 1539 ts2->kind = TEMP_CONST; 1540 ts2->temp_allocated = 1; 1541 ts2->temp_subindex = 1; 1542 1543 /* 1544 * Retain the full value of the 64-bit constant in the low 1545 * part, so that the hash table works. Actual uses will 1546 * truncate the value to the low part. 1547 */ 1548 ts[HOST_BIG_ENDIAN].val = val; 1549 ts[!HOST_BIG_ENDIAN].val = val >> 32; 1550 val_ptr = &ts[HOST_BIG_ENDIAN].val; 1551 } else { 1552 ts->base_type = type; 1553 ts->type = type; 1554 ts->kind = TEMP_CONST; 1555 ts->temp_allocated = 1; 1556 ts->val = val; 1557 val_ptr = &ts->val; 1558 } 1559 g_hash_table_insert(h, val_ptr, ts); 1560 } 1561 1562 return ts; 1563 } 1564 1565 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1566 { 1567 val = dup_const(vece, val); 1568 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1569 } 1570 1571 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1572 { 1573 TCGTemp *t = tcgv_vec_temp(match); 1574 1575 tcg_debug_assert(t->temp_allocated != 0); 1576 return tcg_constant_vec(t->base_type, vece, val); 1577 } 1578 1579 /* Return true if OP may appear in the opcode stream. 1580 Test the runtime variable that controls each opcode. */ 1581 bool tcg_op_supported(TCGOpcode op) 1582 { 1583 const bool have_vec 1584 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1585 1586 switch (op) { 1587 case INDEX_op_discard: 1588 case INDEX_op_set_label: 1589 case INDEX_op_call: 1590 case INDEX_op_br: 1591 case INDEX_op_mb: 1592 case INDEX_op_insn_start: 1593 case INDEX_op_exit_tb: 1594 case INDEX_op_goto_tb: 1595 case INDEX_op_goto_ptr: 1596 case INDEX_op_qemu_ld_i32: 1597 case INDEX_op_qemu_st_i32: 1598 case INDEX_op_qemu_ld_i64: 1599 case INDEX_op_qemu_st_i64: 1600 return true; 1601 1602 case INDEX_op_qemu_st8_i32: 1603 return TCG_TARGET_HAS_qemu_st8_i32; 1604 1605 case INDEX_op_mov_i32: 1606 case INDEX_op_setcond_i32: 1607 case INDEX_op_brcond_i32: 1608 case INDEX_op_ld8u_i32: 1609 case INDEX_op_ld8s_i32: 1610 case INDEX_op_ld16u_i32: 1611 case INDEX_op_ld16s_i32: 1612 case INDEX_op_ld_i32: 1613 case INDEX_op_st8_i32: 1614 case INDEX_op_st16_i32: 1615 case INDEX_op_st_i32: 1616 case INDEX_op_add_i32: 1617 case INDEX_op_sub_i32: 1618 case INDEX_op_mul_i32: 1619 case INDEX_op_and_i32: 1620 case INDEX_op_or_i32: 1621 case INDEX_op_xor_i32: 1622 case INDEX_op_shl_i32: 1623 case INDEX_op_shr_i32: 1624 case INDEX_op_sar_i32: 1625 return true; 1626 1627 case INDEX_op_movcond_i32: 1628 return TCG_TARGET_HAS_movcond_i32; 1629 case INDEX_op_div_i32: 1630 case INDEX_op_divu_i32: 1631 return TCG_TARGET_HAS_div_i32; 1632 case INDEX_op_rem_i32: 1633 case INDEX_op_remu_i32: 1634 return TCG_TARGET_HAS_rem_i32; 1635 case INDEX_op_div2_i32: 1636 case INDEX_op_divu2_i32: 1637 return TCG_TARGET_HAS_div2_i32; 1638 case INDEX_op_rotl_i32: 1639 case INDEX_op_rotr_i32: 1640 return TCG_TARGET_HAS_rot_i32; 1641 case INDEX_op_deposit_i32: 1642 return TCG_TARGET_HAS_deposit_i32; 1643 case INDEX_op_extract_i32: 1644 return TCG_TARGET_HAS_extract_i32; 1645 case INDEX_op_sextract_i32: 1646 return TCG_TARGET_HAS_sextract_i32; 1647 case INDEX_op_extract2_i32: 1648 return TCG_TARGET_HAS_extract2_i32; 1649 case INDEX_op_add2_i32: 1650 return TCG_TARGET_HAS_add2_i32; 1651 case INDEX_op_sub2_i32: 1652 return TCG_TARGET_HAS_sub2_i32; 1653 case INDEX_op_mulu2_i32: 1654 return TCG_TARGET_HAS_mulu2_i32; 1655 case INDEX_op_muls2_i32: 1656 return TCG_TARGET_HAS_muls2_i32; 1657 case INDEX_op_muluh_i32: 1658 return TCG_TARGET_HAS_muluh_i32; 1659 case INDEX_op_mulsh_i32: 1660 return TCG_TARGET_HAS_mulsh_i32; 1661 case INDEX_op_ext8s_i32: 1662 return TCG_TARGET_HAS_ext8s_i32; 1663 case INDEX_op_ext16s_i32: 1664 return TCG_TARGET_HAS_ext16s_i32; 1665 case INDEX_op_ext8u_i32: 1666 return TCG_TARGET_HAS_ext8u_i32; 1667 case INDEX_op_ext16u_i32: 1668 return TCG_TARGET_HAS_ext16u_i32; 1669 case INDEX_op_bswap16_i32: 1670 return TCG_TARGET_HAS_bswap16_i32; 1671 case INDEX_op_bswap32_i32: 1672 return TCG_TARGET_HAS_bswap32_i32; 1673 case INDEX_op_not_i32: 1674 return TCG_TARGET_HAS_not_i32; 1675 case INDEX_op_neg_i32: 1676 return TCG_TARGET_HAS_neg_i32; 1677 case INDEX_op_andc_i32: 1678 return TCG_TARGET_HAS_andc_i32; 1679 case INDEX_op_orc_i32: 1680 return TCG_TARGET_HAS_orc_i32; 1681 case INDEX_op_eqv_i32: 1682 return TCG_TARGET_HAS_eqv_i32; 1683 case INDEX_op_nand_i32: 1684 return TCG_TARGET_HAS_nand_i32; 1685 case INDEX_op_nor_i32: 1686 return TCG_TARGET_HAS_nor_i32; 1687 case INDEX_op_clz_i32: 1688 return TCG_TARGET_HAS_clz_i32; 1689 case INDEX_op_ctz_i32: 1690 return TCG_TARGET_HAS_ctz_i32; 1691 case INDEX_op_ctpop_i32: 1692 return TCG_TARGET_HAS_ctpop_i32; 1693 1694 case INDEX_op_brcond2_i32: 1695 case INDEX_op_setcond2_i32: 1696 return TCG_TARGET_REG_BITS == 32; 1697 1698 case INDEX_op_mov_i64: 1699 case INDEX_op_setcond_i64: 1700 case INDEX_op_brcond_i64: 1701 case INDEX_op_ld8u_i64: 1702 case INDEX_op_ld8s_i64: 1703 case INDEX_op_ld16u_i64: 1704 case INDEX_op_ld16s_i64: 1705 case INDEX_op_ld32u_i64: 1706 case INDEX_op_ld32s_i64: 1707 case INDEX_op_ld_i64: 1708 case INDEX_op_st8_i64: 1709 case INDEX_op_st16_i64: 1710 case INDEX_op_st32_i64: 1711 case INDEX_op_st_i64: 1712 case INDEX_op_add_i64: 1713 case INDEX_op_sub_i64: 1714 case INDEX_op_mul_i64: 1715 case INDEX_op_and_i64: 1716 case INDEX_op_or_i64: 1717 case INDEX_op_xor_i64: 1718 case INDEX_op_shl_i64: 1719 case INDEX_op_shr_i64: 1720 case INDEX_op_sar_i64: 1721 case INDEX_op_ext_i32_i64: 1722 case INDEX_op_extu_i32_i64: 1723 return TCG_TARGET_REG_BITS == 64; 1724 1725 case INDEX_op_movcond_i64: 1726 return TCG_TARGET_HAS_movcond_i64; 1727 case INDEX_op_div_i64: 1728 case INDEX_op_divu_i64: 1729 return TCG_TARGET_HAS_div_i64; 1730 case INDEX_op_rem_i64: 1731 case INDEX_op_remu_i64: 1732 return TCG_TARGET_HAS_rem_i64; 1733 case INDEX_op_div2_i64: 1734 case INDEX_op_divu2_i64: 1735 return TCG_TARGET_HAS_div2_i64; 1736 case INDEX_op_rotl_i64: 1737 case INDEX_op_rotr_i64: 1738 return TCG_TARGET_HAS_rot_i64; 1739 case INDEX_op_deposit_i64: 1740 return TCG_TARGET_HAS_deposit_i64; 1741 case INDEX_op_extract_i64: 1742 return TCG_TARGET_HAS_extract_i64; 1743 case INDEX_op_sextract_i64: 1744 return TCG_TARGET_HAS_sextract_i64; 1745 case INDEX_op_extract2_i64: 1746 return TCG_TARGET_HAS_extract2_i64; 1747 case INDEX_op_extrl_i64_i32: 1748 return TCG_TARGET_HAS_extrl_i64_i32; 1749 case INDEX_op_extrh_i64_i32: 1750 return TCG_TARGET_HAS_extrh_i64_i32; 1751 case INDEX_op_ext8s_i64: 1752 return TCG_TARGET_HAS_ext8s_i64; 1753 case INDEX_op_ext16s_i64: 1754 return TCG_TARGET_HAS_ext16s_i64; 1755 case INDEX_op_ext32s_i64: 1756 return TCG_TARGET_HAS_ext32s_i64; 1757 case INDEX_op_ext8u_i64: 1758 return TCG_TARGET_HAS_ext8u_i64; 1759 case INDEX_op_ext16u_i64: 1760 return TCG_TARGET_HAS_ext16u_i64; 1761 case INDEX_op_ext32u_i64: 1762 return TCG_TARGET_HAS_ext32u_i64; 1763 case INDEX_op_bswap16_i64: 1764 return TCG_TARGET_HAS_bswap16_i64; 1765 case INDEX_op_bswap32_i64: 1766 return TCG_TARGET_HAS_bswap32_i64; 1767 case INDEX_op_bswap64_i64: 1768 return TCG_TARGET_HAS_bswap64_i64; 1769 case INDEX_op_not_i64: 1770 return TCG_TARGET_HAS_not_i64; 1771 case INDEX_op_neg_i64: 1772 return TCG_TARGET_HAS_neg_i64; 1773 case INDEX_op_andc_i64: 1774 return TCG_TARGET_HAS_andc_i64; 1775 case INDEX_op_orc_i64: 1776 return TCG_TARGET_HAS_orc_i64; 1777 case INDEX_op_eqv_i64: 1778 return TCG_TARGET_HAS_eqv_i64; 1779 case INDEX_op_nand_i64: 1780 return TCG_TARGET_HAS_nand_i64; 1781 case INDEX_op_nor_i64: 1782 return TCG_TARGET_HAS_nor_i64; 1783 case INDEX_op_clz_i64: 1784 return TCG_TARGET_HAS_clz_i64; 1785 case INDEX_op_ctz_i64: 1786 return TCG_TARGET_HAS_ctz_i64; 1787 case INDEX_op_ctpop_i64: 1788 return TCG_TARGET_HAS_ctpop_i64; 1789 case INDEX_op_add2_i64: 1790 return TCG_TARGET_HAS_add2_i64; 1791 case INDEX_op_sub2_i64: 1792 return TCG_TARGET_HAS_sub2_i64; 1793 case INDEX_op_mulu2_i64: 1794 return TCG_TARGET_HAS_mulu2_i64; 1795 case INDEX_op_muls2_i64: 1796 return TCG_TARGET_HAS_muls2_i64; 1797 case INDEX_op_muluh_i64: 1798 return TCG_TARGET_HAS_muluh_i64; 1799 case INDEX_op_mulsh_i64: 1800 return TCG_TARGET_HAS_mulsh_i64; 1801 1802 case INDEX_op_mov_vec: 1803 case INDEX_op_dup_vec: 1804 case INDEX_op_dupm_vec: 1805 case INDEX_op_ld_vec: 1806 case INDEX_op_st_vec: 1807 case INDEX_op_add_vec: 1808 case INDEX_op_sub_vec: 1809 case INDEX_op_and_vec: 1810 case INDEX_op_or_vec: 1811 case INDEX_op_xor_vec: 1812 case INDEX_op_cmp_vec: 1813 return have_vec; 1814 case INDEX_op_dup2_vec: 1815 return have_vec && TCG_TARGET_REG_BITS == 32; 1816 case INDEX_op_not_vec: 1817 return have_vec && TCG_TARGET_HAS_not_vec; 1818 case INDEX_op_neg_vec: 1819 return have_vec && TCG_TARGET_HAS_neg_vec; 1820 case INDEX_op_abs_vec: 1821 return have_vec && TCG_TARGET_HAS_abs_vec; 1822 case INDEX_op_andc_vec: 1823 return have_vec && TCG_TARGET_HAS_andc_vec; 1824 case INDEX_op_orc_vec: 1825 return have_vec && TCG_TARGET_HAS_orc_vec; 1826 case INDEX_op_nand_vec: 1827 return have_vec && TCG_TARGET_HAS_nand_vec; 1828 case INDEX_op_nor_vec: 1829 return have_vec && TCG_TARGET_HAS_nor_vec; 1830 case INDEX_op_eqv_vec: 1831 return have_vec && TCG_TARGET_HAS_eqv_vec; 1832 case INDEX_op_mul_vec: 1833 return have_vec && TCG_TARGET_HAS_mul_vec; 1834 case INDEX_op_shli_vec: 1835 case INDEX_op_shri_vec: 1836 case INDEX_op_sari_vec: 1837 return have_vec && TCG_TARGET_HAS_shi_vec; 1838 case INDEX_op_shls_vec: 1839 case INDEX_op_shrs_vec: 1840 case INDEX_op_sars_vec: 1841 return have_vec && TCG_TARGET_HAS_shs_vec; 1842 case INDEX_op_shlv_vec: 1843 case INDEX_op_shrv_vec: 1844 case INDEX_op_sarv_vec: 1845 return have_vec && TCG_TARGET_HAS_shv_vec; 1846 case INDEX_op_rotli_vec: 1847 return have_vec && TCG_TARGET_HAS_roti_vec; 1848 case INDEX_op_rotls_vec: 1849 return have_vec && TCG_TARGET_HAS_rots_vec; 1850 case INDEX_op_rotlv_vec: 1851 case INDEX_op_rotrv_vec: 1852 return have_vec && TCG_TARGET_HAS_rotv_vec; 1853 case INDEX_op_ssadd_vec: 1854 case INDEX_op_usadd_vec: 1855 case INDEX_op_sssub_vec: 1856 case INDEX_op_ussub_vec: 1857 return have_vec && TCG_TARGET_HAS_sat_vec; 1858 case INDEX_op_smin_vec: 1859 case INDEX_op_umin_vec: 1860 case INDEX_op_smax_vec: 1861 case INDEX_op_umax_vec: 1862 return have_vec && TCG_TARGET_HAS_minmax_vec; 1863 case INDEX_op_bitsel_vec: 1864 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1865 case INDEX_op_cmpsel_vec: 1866 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1867 1868 default: 1869 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1870 return true; 1871 } 1872 } 1873 1874 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 1875 1876 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1877 { 1878 const TCGHelperInfo *info; 1879 TCGv_i64 extend_free[MAX_CALL_IARGS]; 1880 int n_extend = 0; 1881 TCGOp *op; 1882 int i, n, pi = 0, total_args; 1883 1884 info = g_hash_table_lookup(helper_table, (gpointer)func); 1885 total_args = info->nr_out + info->nr_in + 2; 1886 op = tcg_op_alloc(INDEX_op_call, total_args); 1887 1888 #ifdef CONFIG_PLUGIN 1889 /* Flag helpers that may affect guest state */ 1890 if (tcg_ctx->plugin_insn && 1891 !(info->flags & TCG_CALL_PLUGIN) && 1892 !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1893 tcg_ctx->plugin_insn->calls_helpers = true; 1894 } 1895 #endif 1896 1897 TCGOP_CALLO(op) = n = info->nr_out; 1898 switch (n) { 1899 case 0: 1900 tcg_debug_assert(ret == NULL); 1901 break; 1902 case 1: 1903 tcg_debug_assert(ret != NULL); 1904 op->args[pi++] = temp_arg(ret); 1905 break; 1906 case 2: 1907 case 4: 1908 tcg_debug_assert(ret != NULL); 1909 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 1910 tcg_debug_assert(ret->temp_subindex == 0); 1911 for (i = 0; i < n; ++i) { 1912 op->args[pi++] = temp_arg(ret + i); 1913 } 1914 break; 1915 default: 1916 g_assert_not_reached(); 1917 } 1918 1919 TCGOP_CALLI(op) = n = info->nr_in; 1920 for (i = 0; i < n; i++) { 1921 const TCGCallArgumentLoc *loc = &info->in[i]; 1922 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 1923 1924 switch (loc->kind) { 1925 case TCG_CALL_ARG_NORMAL: 1926 case TCG_CALL_ARG_BY_REF: 1927 case TCG_CALL_ARG_BY_REF_N: 1928 op->args[pi++] = temp_arg(ts); 1929 break; 1930 1931 case TCG_CALL_ARG_EXTEND_U: 1932 case TCG_CALL_ARG_EXTEND_S: 1933 { 1934 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 1935 TCGv_i32 orig = temp_tcgv_i32(ts); 1936 1937 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 1938 tcg_gen_ext_i32_i64(temp, orig); 1939 } else { 1940 tcg_gen_extu_i32_i64(temp, orig); 1941 } 1942 op->args[pi++] = tcgv_i64_arg(temp); 1943 extend_free[n_extend++] = temp; 1944 } 1945 break; 1946 1947 default: 1948 g_assert_not_reached(); 1949 } 1950 } 1951 op->args[pi++] = (uintptr_t)func; 1952 op->args[pi++] = (uintptr_t)info; 1953 tcg_debug_assert(pi == total_args); 1954 1955 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 1956 1957 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 1958 for (i = 0; i < n_extend; ++i) { 1959 tcg_temp_free_i64(extend_free[i]); 1960 } 1961 } 1962 1963 static void tcg_reg_alloc_start(TCGContext *s) 1964 { 1965 int i, n; 1966 1967 for (i = 0, n = s->nb_temps; i < n; i++) { 1968 TCGTemp *ts = &s->temps[i]; 1969 TCGTempVal val = TEMP_VAL_MEM; 1970 1971 switch (ts->kind) { 1972 case TEMP_CONST: 1973 val = TEMP_VAL_CONST; 1974 break; 1975 case TEMP_FIXED: 1976 val = TEMP_VAL_REG; 1977 break; 1978 case TEMP_GLOBAL: 1979 break; 1980 case TEMP_EBB: 1981 val = TEMP_VAL_DEAD; 1982 /* fall through */ 1983 case TEMP_TB: 1984 ts->mem_allocated = 0; 1985 break; 1986 default: 1987 g_assert_not_reached(); 1988 } 1989 ts->val_type = val; 1990 } 1991 1992 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1993 } 1994 1995 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1996 TCGTemp *ts) 1997 { 1998 int idx = temp_idx(ts); 1999 2000 switch (ts->kind) { 2001 case TEMP_FIXED: 2002 case TEMP_GLOBAL: 2003 pstrcpy(buf, buf_size, ts->name); 2004 break; 2005 case TEMP_TB: 2006 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2007 break; 2008 case TEMP_EBB: 2009 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2010 break; 2011 case TEMP_CONST: 2012 switch (ts->type) { 2013 case TCG_TYPE_I32: 2014 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2015 break; 2016 #if TCG_TARGET_REG_BITS > 32 2017 case TCG_TYPE_I64: 2018 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2019 break; 2020 #endif 2021 case TCG_TYPE_V64: 2022 case TCG_TYPE_V128: 2023 case TCG_TYPE_V256: 2024 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2025 64 << (ts->type - TCG_TYPE_V64), ts->val); 2026 break; 2027 default: 2028 g_assert_not_reached(); 2029 } 2030 break; 2031 } 2032 return buf; 2033 } 2034 2035 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2036 int buf_size, TCGArg arg) 2037 { 2038 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2039 } 2040 2041 static const char * const cond_name[] = 2042 { 2043 [TCG_COND_NEVER] = "never", 2044 [TCG_COND_ALWAYS] = "always", 2045 [TCG_COND_EQ] = "eq", 2046 [TCG_COND_NE] = "ne", 2047 [TCG_COND_LT] = "lt", 2048 [TCG_COND_GE] = "ge", 2049 [TCG_COND_LE] = "le", 2050 [TCG_COND_GT] = "gt", 2051 [TCG_COND_LTU] = "ltu", 2052 [TCG_COND_GEU] = "geu", 2053 [TCG_COND_LEU] = "leu", 2054 [TCG_COND_GTU] = "gtu" 2055 }; 2056 2057 static const char * const ldst_name[] = 2058 { 2059 [MO_UB] = "ub", 2060 [MO_SB] = "sb", 2061 [MO_LEUW] = "leuw", 2062 [MO_LESW] = "lesw", 2063 [MO_LEUL] = "leul", 2064 [MO_LESL] = "lesl", 2065 [MO_LEUQ] = "leq", 2066 [MO_BEUW] = "beuw", 2067 [MO_BESW] = "besw", 2068 [MO_BEUL] = "beul", 2069 [MO_BESL] = "besl", 2070 [MO_BEUQ] = "beq", 2071 }; 2072 2073 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2074 #ifdef TARGET_ALIGNED_ONLY 2075 [MO_UNALN >> MO_ASHIFT] = "un+", 2076 [MO_ALIGN >> MO_ASHIFT] = "", 2077 #else 2078 [MO_UNALN >> MO_ASHIFT] = "", 2079 [MO_ALIGN >> MO_ASHIFT] = "al+", 2080 #endif 2081 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2082 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2083 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2084 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2085 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2086 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2087 }; 2088 2089 static const char bswap_flag_name[][6] = { 2090 [TCG_BSWAP_IZ] = "iz", 2091 [TCG_BSWAP_OZ] = "oz", 2092 [TCG_BSWAP_OS] = "os", 2093 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2094 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2095 }; 2096 2097 static inline bool tcg_regset_single(TCGRegSet d) 2098 { 2099 return (d & (d - 1)) == 0; 2100 } 2101 2102 static inline TCGReg tcg_regset_first(TCGRegSet d) 2103 { 2104 if (TCG_TARGET_NB_REGS <= 32) { 2105 return ctz32(d); 2106 } else { 2107 return ctz64(d); 2108 } 2109 } 2110 2111 /* Return only the number of characters output -- no error return. */ 2112 #define ne_fprintf(...) \ 2113 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2114 2115 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2116 { 2117 char buf[128]; 2118 TCGOp *op; 2119 2120 QTAILQ_FOREACH(op, &s->ops, link) { 2121 int i, k, nb_oargs, nb_iargs, nb_cargs; 2122 const TCGOpDef *def; 2123 TCGOpcode c; 2124 int col = 0; 2125 2126 c = op->opc; 2127 def = &tcg_op_defs[c]; 2128 2129 if (c == INDEX_op_insn_start) { 2130 nb_oargs = 0; 2131 col += ne_fprintf(f, "\n ----"); 2132 2133 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2134 target_ulong a; 2135 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2136 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 2137 #else 2138 a = op->args[i]; 2139 #endif 2140 col += ne_fprintf(f, " " TARGET_FMT_lx, a); 2141 } 2142 } else if (c == INDEX_op_call) { 2143 const TCGHelperInfo *info = tcg_call_info(op); 2144 void *func = tcg_call_func(op); 2145 2146 /* variable number of arguments */ 2147 nb_oargs = TCGOP_CALLO(op); 2148 nb_iargs = TCGOP_CALLI(op); 2149 nb_cargs = def->nb_cargs; 2150 2151 col += ne_fprintf(f, " %s ", def->name); 2152 2153 /* 2154 * Print the function name from TCGHelperInfo, if available. 2155 * Note that plugins have a template function for the info, 2156 * but the actual function pointer comes from the plugin. 2157 */ 2158 if (func == info->func) { 2159 col += ne_fprintf(f, "%s", info->name); 2160 } else { 2161 col += ne_fprintf(f, "plugin(%p)", func); 2162 } 2163 2164 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2165 for (i = 0; i < nb_oargs; i++) { 2166 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2167 op->args[i])); 2168 } 2169 for (i = 0; i < nb_iargs; i++) { 2170 TCGArg arg = op->args[nb_oargs + i]; 2171 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2172 col += ne_fprintf(f, ",%s", t); 2173 } 2174 } else { 2175 col += ne_fprintf(f, " %s ", def->name); 2176 2177 nb_oargs = def->nb_oargs; 2178 nb_iargs = def->nb_iargs; 2179 nb_cargs = def->nb_cargs; 2180 2181 if (def->flags & TCG_OPF_VECTOR) { 2182 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 2183 8 << TCGOP_VECE(op)); 2184 } 2185 2186 k = 0; 2187 for (i = 0; i < nb_oargs; i++) { 2188 const char *sep = k ? "," : ""; 2189 col += ne_fprintf(f, "%s%s", sep, 2190 tcg_get_arg_str(s, buf, sizeof(buf), 2191 op->args[k++])); 2192 } 2193 for (i = 0; i < nb_iargs; i++) { 2194 const char *sep = k ? "," : ""; 2195 col += ne_fprintf(f, "%s%s", sep, 2196 tcg_get_arg_str(s, buf, sizeof(buf), 2197 op->args[k++])); 2198 } 2199 switch (c) { 2200 case INDEX_op_brcond_i32: 2201 case INDEX_op_setcond_i32: 2202 case INDEX_op_movcond_i32: 2203 case INDEX_op_brcond2_i32: 2204 case INDEX_op_setcond2_i32: 2205 case INDEX_op_brcond_i64: 2206 case INDEX_op_setcond_i64: 2207 case INDEX_op_movcond_i64: 2208 case INDEX_op_cmp_vec: 2209 case INDEX_op_cmpsel_vec: 2210 if (op->args[k] < ARRAY_SIZE(cond_name) 2211 && cond_name[op->args[k]]) { 2212 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2213 } else { 2214 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2215 } 2216 i = 1; 2217 break; 2218 case INDEX_op_qemu_ld_i32: 2219 case INDEX_op_qemu_st_i32: 2220 case INDEX_op_qemu_st8_i32: 2221 case INDEX_op_qemu_ld_i64: 2222 case INDEX_op_qemu_st_i64: 2223 { 2224 MemOpIdx oi = op->args[k++]; 2225 MemOp op = get_memop(oi); 2226 unsigned ix = get_mmuidx(oi); 2227 2228 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2229 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 2230 } else { 2231 const char *s_al, *s_op; 2232 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2233 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2234 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix); 2235 } 2236 i = 1; 2237 } 2238 break; 2239 case INDEX_op_bswap16_i32: 2240 case INDEX_op_bswap16_i64: 2241 case INDEX_op_bswap32_i32: 2242 case INDEX_op_bswap32_i64: 2243 case INDEX_op_bswap64_i64: 2244 { 2245 TCGArg flags = op->args[k]; 2246 const char *name = NULL; 2247 2248 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2249 name = bswap_flag_name[flags]; 2250 } 2251 if (name) { 2252 col += ne_fprintf(f, ",%s", name); 2253 } else { 2254 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2255 } 2256 i = k = 1; 2257 } 2258 break; 2259 default: 2260 i = 0; 2261 break; 2262 } 2263 switch (c) { 2264 case INDEX_op_set_label: 2265 case INDEX_op_br: 2266 case INDEX_op_brcond_i32: 2267 case INDEX_op_brcond_i64: 2268 case INDEX_op_brcond2_i32: 2269 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2270 arg_label(op->args[k])->id); 2271 i++, k++; 2272 break; 2273 case INDEX_op_mb: 2274 { 2275 TCGBar membar = op->args[k]; 2276 const char *b_op, *m_op; 2277 2278 switch (membar & TCG_BAR_SC) { 2279 case 0: 2280 b_op = "none"; 2281 break; 2282 case TCG_BAR_LDAQ: 2283 b_op = "acq"; 2284 break; 2285 case TCG_BAR_STRL: 2286 b_op = "rel"; 2287 break; 2288 case TCG_BAR_SC: 2289 b_op = "seq"; 2290 break; 2291 default: 2292 g_assert_not_reached(); 2293 } 2294 2295 switch (membar & TCG_MO_ALL) { 2296 case 0: 2297 m_op = "none"; 2298 break; 2299 case TCG_MO_LD_LD: 2300 m_op = "rr"; 2301 break; 2302 case TCG_MO_LD_ST: 2303 m_op = "rw"; 2304 break; 2305 case TCG_MO_ST_LD: 2306 m_op = "wr"; 2307 break; 2308 case TCG_MO_ST_ST: 2309 m_op = "ww"; 2310 break; 2311 case TCG_MO_LD_LD | TCG_MO_LD_ST: 2312 m_op = "rr+rw"; 2313 break; 2314 case TCG_MO_LD_LD | TCG_MO_ST_LD: 2315 m_op = "rr+wr"; 2316 break; 2317 case TCG_MO_LD_LD | TCG_MO_ST_ST: 2318 m_op = "rr+ww"; 2319 break; 2320 case TCG_MO_LD_ST | TCG_MO_ST_LD: 2321 m_op = "rw+wr"; 2322 break; 2323 case TCG_MO_LD_ST | TCG_MO_ST_ST: 2324 m_op = "rw+ww"; 2325 break; 2326 case TCG_MO_ST_LD | TCG_MO_ST_ST: 2327 m_op = "wr+ww"; 2328 break; 2329 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 2330 m_op = "rr+rw+wr"; 2331 break; 2332 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 2333 m_op = "rr+rw+ww"; 2334 break; 2335 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 2336 m_op = "rr+wr+ww"; 2337 break; 2338 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 2339 m_op = "rw+wr+ww"; 2340 break; 2341 case TCG_MO_ALL: 2342 m_op = "all"; 2343 break; 2344 default: 2345 g_assert_not_reached(); 2346 } 2347 2348 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 2349 i++, k++; 2350 } 2351 break; 2352 default: 2353 break; 2354 } 2355 for (; i < nb_cargs; i++, k++) { 2356 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 2357 op->args[k]); 2358 } 2359 } 2360 2361 if (have_prefs || op->life) { 2362 for (; col < 40; ++col) { 2363 putc(' ', f); 2364 } 2365 } 2366 2367 if (op->life) { 2368 unsigned life = op->life; 2369 2370 if (life & (SYNC_ARG * 3)) { 2371 ne_fprintf(f, " sync:"); 2372 for (i = 0; i < 2; ++i) { 2373 if (life & (SYNC_ARG << i)) { 2374 ne_fprintf(f, " %d", i); 2375 } 2376 } 2377 } 2378 life /= DEAD_ARG; 2379 if (life) { 2380 ne_fprintf(f, " dead:"); 2381 for (i = 0; life; ++i, life >>= 1) { 2382 if (life & 1) { 2383 ne_fprintf(f, " %d", i); 2384 } 2385 } 2386 } 2387 } 2388 2389 if (have_prefs) { 2390 for (i = 0; i < nb_oargs; ++i) { 2391 TCGRegSet set = output_pref(op, i); 2392 2393 if (i == 0) { 2394 ne_fprintf(f, " pref="); 2395 } else { 2396 ne_fprintf(f, ","); 2397 } 2398 if (set == 0) { 2399 ne_fprintf(f, "none"); 2400 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2401 ne_fprintf(f, "all"); 2402 #ifdef CONFIG_DEBUG_TCG 2403 } else if (tcg_regset_single(set)) { 2404 TCGReg reg = tcg_regset_first(set); 2405 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 2406 #endif 2407 } else if (TCG_TARGET_NB_REGS <= 32) { 2408 ne_fprintf(f, "0x%x", (uint32_t)set); 2409 } else { 2410 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 2411 } 2412 } 2413 } 2414 2415 putc('\n', f); 2416 } 2417 } 2418 2419 /* we give more priority to constraints with less registers */ 2420 static int get_constraint_priority(const TCGOpDef *def, int k) 2421 { 2422 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2423 int n = ctpop64(arg_ct->regs); 2424 2425 /* 2426 * Sort constraints of a single register first, which includes output 2427 * aliases (which must exactly match the input already allocated). 2428 */ 2429 if (n == 1 || arg_ct->oalias) { 2430 return INT_MAX; 2431 } 2432 2433 /* 2434 * Sort register pairs next, first then second immediately after. 2435 * Arbitrarily sort multiple pairs by the index of the first reg; 2436 * there shouldn't be many pairs. 2437 */ 2438 switch (arg_ct->pair) { 2439 case 1: 2440 case 3: 2441 return (k + 1) * 2; 2442 case 2: 2443 return (arg_ct->pair_index + 1) * 2 - 1; 2444 } 2445 2446 /* Finally, sort by decreasing register count. */ 2447 assert(n > 1); 2448 return -n; 2449 } 2450 2451 /* sort from highest priority to lowest */ 2452 static void sort_constraints(TCGOpDef *def, int start, int n) 2453 { 2454 int i, j; 2455 TCGArgConstraint *a = def->args_ct; 2456 2457 for (i = 0; i < n; i++) { 2458 a[start + i].sort_index = start + i; 2459 } 2460 if (n <= 1) { 2461 return; 2462 } 2463 for (i = 0; i < n - 1; i++) { 2464 for (j = i + 1; j < n; j++) { 2465 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2466 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2467 if (p1 < p2) { 2468 int tmp = a[start + i].sort_index; 2469 a[start + i].sort_index = a[start + j].sort_index; 2470 a[start + j].sort_index = tmp; 2471 } 2472 } 2473 } 2474 } 2475 2476 static void process_op_defs(TCGContext *s) 2477 { 2478 TCGOpcode op; 2479 2480 for (op = 0; op < NB_OPS; op++) { 2481 TCGOpDef *def = &tcg_op_defs[op]; 2482 const TCGTargetOpDef *tdefs; 2483 bool saw_alias_pair = false; 2484 int i, o, i2, o2, nb_args; 2485 2486 if (def->flags & TCG_OPF_NOT_PRESENT) { 2487 continue; 2488 } 2489 2490 nb_args = def->nb_iargs + def->nb_oargs; 2491 if (nb_args == 0) { 2492 continue; 2493 } 2494 2495 /* 2496 * Macro magic should make it impossible, but double-check that 2497 * the array index is in range. Since the signness of an enum 2498 * is implementation defined, force the result to unsigned. 2499 */ 2500 unsigned con_set = tcg_target_op_def(op); 2501 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2502 tdefs = &constraint_sets[con_set]; 2503 2504 for (i = 0; i < nb_args; i++) { 2505 const char *ct_str = tdefs->args_ct_str[i]; 2506 bool input_p = i >= def->nb_oargs; 2507 2508 /* Incomplete TCGTargetOpDef entry. */ 2509 tcg_debug_assert(ct_str != NULL); 2510 2511 switch (*ct_str) { 2512 case '0' ... '9': 2513 o = *ct_str - '0'; 2514 tcg_debug_assert(input_p); 2515 tcg_debug_assert(o < def->nb_oargs); 2516 tcg_debug_assert(def->args_ct[o].regs != 0); 2517 tcg_debug_assert(!def->args_ct[o].oalias); 2518 def->args_ct[i] = def->args_ct[o]; 2519 /* The output sets oalias. */ 2520 def->args_ct[o].oalias = 1; 2521 def->args_ct[o].alias_index = i; 2522 /* The input sets ialias. */ 2523 def->args_ct[i].ialias = 1; 2524 def->args_ct[i].alias_index = o; 2525 if (def->args_ct[i].pair) { 2526 saw_alias_pair = true; 2527 } 2528 tcg_debug_assert(ct_str[1] == '\0'); 2529 continue; 2530 2531 case '&': 2532 tcg_debug_assert(!input_p); 2533 def->args_ct[i].newreg = true; 2534 ct_str++; 2535 break; 2536 2537 case 'p': /* plus */ 2538 /* Allocate to the register after the previous. */ 2539 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2540 o = i - 1; 2541 tcg_debug_assert(!def->args_ct[o].pair); 2542 tcg_debug_assert(!def->args_ct[o].ct); 2543 def->args_ct[i] = (TCGArgConstraint){ 2544 .pair = 2, 2545 .pair_index = o, 2546 .regs = def->args_ct[o].regs << 1, 2547 }; 2548 def->args_ct[o].pair = 1; 2549 def->args_ct[o].pair_index = i; 2550 tcg_debug_assert(ct_str[1] == '\0'); 2551 continue; 2552 2553 case 'm': /* minus */ 2554 /* Allocate to the register before the previous. */ 2555 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2556 o = i - 1; 2557 tcg_debug_assert(!def->args_ct[o].pair); 2558 tcg_debug_assert(!def->args_ct[o].ct); 2559 def->args_ct[i] = (TCGArgConstraint){ 2560 .pair = 1, 2561 .pair_index = o, 2562 .regs = def->args_ct[o].regs >> 1, 2563 }; 2564 def->args_ct[o].pair = 2; 2565 def->args_ct[o].pair_index = i; 2566 tcg_debug_assert(ct_str[1] == '\0'); 2567 continue; 2568 } 2569 2570 do { 2571 switch (*ct_str) { 2572 case 'i': 2573 def->args_ct[i].ct |= TCG_CT_CONST; 2574 break; 2575 2576 /* Include all of the target-specific constraints. */ 2577 2578 #undef CONST 2579 #define CONST(CASE, MASK) \ 2580 case CASE: def->args_ct[i].ct |= MASK; break; 2581 #define REGS(CASE, MASK) \ 2582 case CASE: def->args_ct[i].regs |= MASK; break; 2583 2584 #include "tcg-target-con-str.h" 2585 2586 #undef REGS 2587 #undef CONST 2588 default: 2589 case '0' ... '9': 2590 case '&': 2591 case 'p': 2592 case 'm': 2593 /* Typo in TCGTargetOpDef constraint. */ 2594 g_assert_not_reached(); 2595 } 2596 } while (*++ct_str != '\0'); 2597 } 2598 2599 /* TCGTargetOpDef entry with too much information? */ 2600 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2601 2602 /* 2603 * Fix up output pairs that are aliased with inputs. 2604 * When we created the alias, we copied pair from the output. 2605 * There are three cases: 2606 * (1a) Pairs of inputs alias pairs of outputs. 2607 * (1b) One input aliases the first of a pair of outputs. 2608 * (2) One input aliases the second of a pair of outputs. 2609 * 2610 * Case 1a is handled by making sure that the pair_index'es are 2611 * properly updated so that they appear the same as a pair of inputs. 2612 * 2613 * Case 1b is handled by setting the pair_index of the input to 2614 * itself, simply so it doesn't point to an unrelated argument. 2615 * Since we don't encounter the "second" during the input allocation 2616 * phase, nothing happens with the second half of the input pair. 2617 * 2618 * Case 2 is handled by setting the second input to pair=3, the 2619 * first output to pair=3, and the pair_index'es to match. 2620 */ 2621 if (saw_alias_pair) { 2622 for (i = def->nb_oargs; i < nb_args; i++) { 2623 /* 2624 * Since [0-9pm] must be alone in the constraint string, 2625 * the only way they can both be set is if the pair comes 2626 * from the output alias. 2627 */ 2628 if (!def->args_ct[i].ialias) { 2629 continue; 2630 } 2631 switch (def->args_ct[i].pair) { 2632 case 0: 2633 break; 2634 case 1: 2635 o = def->args_ct[i].alias_index; 2636 o2 = def->args_ct[o].pair_index; 2637 tcg_debug_assert(def->args_ct[o].pair == 1); 2638 tcg_debug_assert(def->args_ct[o2].pair == 2); 2639 if (def->args_ct[o2].oalias) { 2640 /* Case 1a */ 2641 i2 = def->args_ct[o2].alias_index; 2642 tcg_debug_assert(def->args_ct[i2].pair == 2); 2643 def->args_ct[i2].pair_index = i; 2644 def->args_ct[i].pair_index = i2; 2645 } else { 2646 /* Case 1b */ 2647 def->args_ct[i].pair_index = i; 2648 } 2649 break; 2650 case 2: 2651 o = def->args_ct[i].alias_index; 2652 o2 = def->args_ct[o].pair_index; 2653 tcg_debug_assert(def->args_ct[o].pair == 2); 2654 tcg_debug_assert(def->args_ct[o2].pair == 1); 2655 if (def->args_ct[o2].oalias) { 2656 /* Case 1a */ 2657 i2 = def->args_ct[o2].alias_index; 2658 tcg_debug_assert(def->args_ct[i2].pair == 1); 2659 def->args_ct[i2].pair_index = i; 2660 def->args_ct[i].pair_index = i2; 2661 } else { 2662 /* Case 2 */ 2663 def->args_ct[i].pair = 3; 2664 def->args_ct[o2].pair = 3; 2665 def->args_ct[i].pair_index = o2; 2666 def->args_ct[o2].pair_index = i; 2667 } 2668 break; 2669 default: 2670 g_assert_not_reached(); 2671 } 2672 } 2673 } 2674 2675 /* sort the constraints (XXX: this is just an heuristic) */ 2676 sort_constraints(def, 0, def->nb_oargs); 2677 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2678 } 2679 } 2680 2681 static void remove_label_use(TCGOp *op, int idx) 2682 { 2683 TCGLabel *label = arg_label(op->args[idx]); 2684 TCGLabelUse *use; 2685 2686 QSIMPLEQ_FOREACH(use, &label->branches, next) { 2687 if (use->op == op) { 2688 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 2689 return; 2690 } 2691 } 2692 g_assert_not_reached(); 2693 } 2694 2695 void tcg_op_remove(TCGContext *s, TCGOp *op) 2696 { 2697 switch (op->opc) { 2698 case INDEX_op_br: 2699 remove_label_use(op, 0); 2700 break; 2701 case INDEX_op_brcond_i32: 2702 case INDEX_op_brcond_i64: 2703 remove_label_use(op, 3); 2704 break; 2705 case INDEX_op_brcond2_i32: 2706 remove_label_use(op, 5); 2707 break; 2708 default: 2709 break; 2710 } 2711 2712 QTAILQ_REMOVE(&s->ops, op, link); 2713 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2714 s->nb_ops--; 2715 2716 #ifdef CONFIG_PROFILER 2717 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2718 #endif 2719 } 2720 2721 void tcg_remove_ops_after(TCGOp *op) 2722 { 2723 TCGContext *s = tcg_ctx; 2724 2725 while (true) { 2726 TCGOp *last = tcg_last_op(); 2727 if (last == op) { 2728 return; 2729 } 2730 tcg_op_remove(s, last); 2731 } 2732 } 2733 2734 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 2735 { 2736 TCGContext *s = tcg_ctx; 2737 TCGOp *op = NULL; 2738 2739 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 2740 QTAILQ_FOREACH(op, &s->free_ops, link) { 2741 if (nargs <= op->nargs) { 2742 QTAILQ_REMOVE(&s->free_ops, op, link); 2743 nargs = op->nargs; 2744 goto found; 2745 } 2746 } 2747 } 2748 2749 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 2750 nargs = MAX(4, nargs); 2751 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 2752 2753 found: 2754 memset(op, 0, offsetof(TCGOp, link)); 2755 op->opc = opc; 2756 op->nargs = nargs; 2757 2758 /* Check for bitfield overflow. */ 2759 tcg_debug_assert(op->nargs == nargs); 2760 2761 s->nb_ops++; 2762 return op; 2763 } 2764 2765 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 2766 { 2767 TCGOp *op = tcg_op_alloc(opc, nargs); 2768 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2769 return op; 2770 } 2771 2772 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 2773 TCGOpcode opc, unsigned nargs) 2774 { 2775 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2776 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2777 return new_op; 2778 } 2779 2780 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 2781 TCGOpcode opc, unsigned nargs) 2782 { 2783 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2784 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2785 return new_op; 2786 } 2787 2788 static void move_label_uses(TCGLabel *to, TCGLabel *from) 2789 { 2790 TCGLabelUse *u; 2791 2792 QSIMPLEQ_FOREACH(u, &from->branches, next) { 2793 TCGOp *op = u->op; 2794 switch (op->opc) { 2795 case INDEX_op_br: 2796 op->args[0] = label_arg(to); 2797 break; 2798 case INDEX_op_brcond_i32: 2799 case INDEX_op_brcond_i64: 2800 op->args[3] = label_arg(to); 2801 break; 2802 case INDEX_op_brcond2_i32: 2803 op->args[5] = label_arg(to); 2804 break; 2805 default: 2806 g_assert_not_reached(); 2807 } 2808 } 2809 2810 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 2811 } 2812 2813 /* Reachable analysis : remove unreachable code. */ 2814 static void __attribute__((noinline)) 2815 reachable_code_pass(TCGContext *s) 2816 { 2817 TCGOp *op, *op_next, *op_prev; 2818 bool dead = false; 2819 2820 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2821 bool remove = dead; 2822 TCGLabel *label; 2823 2824 switch (op->opc) { 2825 case INDEX_op_set_label: 2826 label = arg_label(op->args[0]); 2827 2828 /* 2829 * Note that the first op in the TB is always a load, 2830 * so there is always something before a label. 2831 */ 2832 op_prev = QTAILQ_PREV(op, link); 2833 2834 /* 2835 * If we find two sequential labels, move all branches to 2836 * reference the second label and remove the first label. 2837 * Do this before branch to next optimization, so that the 2838 * middle label is out of the way. 2839 */ 2840 if (op_prev->opc == INDEX_op_set_label) { 2841 move_label_uses(label, arg_label(op_prev->args[0])); 2842 tcg_op_remove(s, op_prev); 2843 op_prev = QTAILQ_PREV(op, link); 2844 } 2845 2846 /* 2847 * Optimization can fold conditional branches to unconditional. 2848 * If we find a label which is preceded by an unconditional 2849 * branch to next, remove the branch. We couldn't do this when 2850 * processing the branch because any dead code between the branch 2851 * and label had not yet been removed. 2852 */ 2853 if (op_prev->opc == INDEX_op_br && 2854 label == arg_label(op_prev->args[0])) { 2855 tcg_op_remove(s, op_prev); 2856 /* Fall through means insns become live again. */ 2857 dead = false; 2858 } 2859 2860 if (QSIMPLEQ_EMPTY(&label->branches)) { 2861 /* 2862 * While there is an occasional backward branch, virtually 2863 * all branches generated by the translators are forward. 2864 * Which means that generally we will have already removed 2865 * all references to the label that will be, and there is 2866 * little to be gained by iterating. 2867 */ 2868 remove = true; 2869 } else { 2870 /* Once we see a label, insns become live again. */ 2871 dead = false; 2872 remove = false; 2873 } 2874 break; 2875 2876 case INDEX_op_br: 2877 case INDEX_op_exit_tb: 2878 case INDEX_op_goto_ptr: 2879 /* Unconditional branches; everything following is dead. */ 2880 dead = true; 2881 break; 2882 2883 case INDEX_op_call: 2884 /* Notice noreturn helper calls, raising exceptions. */ 2885 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 2886 dead = true; 2887 } 2888 break; 2889 2890 case INDEX_op_insn_start: 2891 /* Never remove -- we need to keep these for unwind. */ 2892 remove = false; 2893 break; 2894 2895 default: 2896 break; 2897 } 2898 2899 if (remove) { 2900 tcg_op_remove(s, op); 2901 } 2902 } 2903 } 2904 2905 #define TS_DEAD 1 2906 #define TS_MEM 2 2907 2908 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2909 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2910 2911 /* For liveness_pass_1, the register preferences for a given temp. */ 2912 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2913 { 2914 return ts->state_ptr; 2915 } 2916 2917 /* For liveness_pass_1, reset the preferences for a given temp to the 2918 * maximal regset for its type. 2919 */ 2920 static inline void la_reset_pref(TCGTemp *ts) 2921 { 2922 *la_temp_pref(ts) 2923 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2924 } 2925 2926 /* liveness analysis: end of function: all temps are dead, and globals 2927 should be in memory. */ 2928 static void la_func_end(TCGContext *s, int ng, int nt) 2929 { 2930 int i; 2931 2932 for (i = 0; i < ng; ++i) { 2933 s->temps[i].state = TS_DEAD | TS_MEM; 2934 la_reset_pref(&s->temps[i]); 2935 } 2936 for (i = ng; i < nt; ++i) { 2937 s->temps[i].state = TS_DEAD; 2938 la_reset_pref(&s->temps[i]); 2939 } 2940 } 2941 2942 /* liveness analysis: end of basic block: all temps are dead, globals 2943 and local temps should be in memory. */ 2944 static void la_bb_end(TCGContext *s, int ng, int nt) 2945 { 2946 int i; 2947 2948 for (i = 0; i < nt; ++i) { 2949 TCGTemp *ts = &s->temps[i]; 2950 int state; 2951 2952 switch (ts->kind) { 2953 case TEMP_FIXED: 2954 case TEMP_GLOBAL: 2955 case TEMP_TB: 2956 state = TS_DEAD | TS_MEM; 2957 break; 2958 case TEMP_EBB: 2959 case TEMP_CONST: 2960 state = TS_DEAD; 2961 break; 2962 default: 2963 g_assert_not_reached(); 2964 } 2965 ts->state = state; 2966 la_reset_pref(ts); 2967 } 2968 } 2969 2970 /* liveness analysis: sync globals back to memory. */ 2971 static void la_global_sync(TCGContext *s, int ng) 2972 { 2973 int i; 2974 2975 for (i = 0; i < ng; ++i) { 2976 int state = s->temps[i].state; 2977 s->temps[i].state = state | TS_MEM; 2978 if (state == TS_DEAD) { 2979 /* If the global was previously dead, reset prefs. */ 2980 la_reset_pref(&s->temps[i]); 2981 } 2982 } 2983 } 2984 2985 /* 2986 * liveness analysis: conditional branch: all temps are dead unless 2987 * explicitly live-across-conditional-branch, globals and local temps 2988 * should be synced. 2989 */ 2990 static void la_bb_sync(TCGContext *s, int ng, int nt) 2991 { 2992 la_global_sync(s, ng); 2993 2994 for (int i = ng; i < nt; ++i) { 2995 TCGTemp *ts = &s->temps[i]; 2996 int state; 2997 2998 switch (ts->kind) { 2999 case TEMP_TB: 3000 state = ts->state; 3001 ts->state = state | TS_MEM; 3002 if (state != TS_DEAD) { 3003 continue; 3004 } 3005 break; 3006 case TEMP_EBB: 3007 case TEMP_CONST: 3008 continue; 3009 default: 3010 g_assert_not_reached(); 3011 } 3012 la_reset_pref(&s->temps[i]); 3013 } 3014 } 3015 3016 /* liveness analysis: sync globals back to memory and kill. */ 3017 static void la_global_kill(TCGContext *s, int ng) 3018 { 3019 int i; 3020 3021 for (i = 0; i < ng; i++) { 3022 s->temps[i].state = TS_DEAD | TS_MEM; 3023 la_reset_pref(&s->temps[i]); 3024 } 3025 } 3026 3027 /* liveness analysis: note live globals crossing calls. */ 3028 static void la_cross_call(TCGContext *s, int nt) 3029 { 3030 TCGRegSet mask = ~tcg_target_call_clobber_regs; 3031 int i; 3032 3033 for (i = 0; i < nt; i++) { 3034 TCGTemp *ts = &s->temps[i]; 3035 if (!(ts->state & TS_DEAD)) { 3036 TCGRegSet *pset = la_temp_pref(ts); 3037 TCGRegSet set = *pset; 3038 3039 set &= mask; 3040 /* If the combination is not possible, restart. */ 3041 if (set == 0) { 3042 set = tcg_target_available_regs[ts->type] & mask; 3043 } 3044 *pset = set; 3045 } 3046 } 3047 } 3048 3049 /* 3050 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 3051 * to TEMP_EBB, if possible. 3052 */ 3053 static void __attribute__((noinline)) 3054 liveness_pass_0(TCGContext *s) 3055 { 3056 void * const multiple_ebb = (void *)(uintptr_t)-1; 3057 int nb_temps = s->nb_temps; 3058 TCGOp *op, *ebb; 3059 3060 for (int i = s->nb_globals; i < nb_temps; ++i) { 3061 s->temps[i].state_ptr = NULL; 3062 } 3063 3064 /* 3065 * Represent each EBB by the op at which it begins. In the case of 3066 * the first EBB, this is the first op, otherwise it is a label. 3067 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 3068 * within a single EBB, else MULTIPLE_EBB. 3069 */ 3070 ebb = QTAILQ_FIRST(&s->ops); 3071 QTAILQ_FOREACH(op, &s->ops, link) { 3072 const TCGOpDef *def; 3073 int nb_oargs, nb_iargs; 3074 3075 switch (op->opc) { 3076 case INDEX_op_set_label: 3077 ebb = op; 3078 continue; 3079 case INDEX_op_discard: 3080 continue; 3081 case INDEX_op_call: 3082 nb_oargs = TCGOP_CALLO(op); 3083 nb_iargs = TCGOP_CALLI(op); 3084 break; 3085 default: 3086 def = &tcg_op_defs[op->opc]; 3087 nb_oargs = def->nb_oargs; 3088 nb_iargs = def->nb_iargs; 3089 break; 3090 } 3091 3092 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 3093 TCGTemp *ts = arg_temp(op->args[i]); 3094 3095 if (ts->kind != TEMP_TB) { 3096 continue; 3097 } 3098 if (ts->state_ptr == NULL) { 3099 ts->state_ptr = ebb; 3100 } else if (ts->state_ptr != ebb) { 3101 ts->state_ptr = multiple_ebb; 3102 } 3103 } 3104 } 3105 3106 /* 3107 * For TEMP_TB that turned out not to be used beyond one EBB, 3108 * reduce the liveness to TEMP_EBB. 3109 */ 3110 for (int i = s->nb_globals; i < nb_temps; ++i) { 3111 TCGTemp *ts = &s->temps[i]; 3112 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 3113 ts->kind = TEMP_EBB; 3114 } 3115 } 3116 } 3117 3118 /* Liveness analysis : update the opc_arg_life array to tell if a 3119 given input arguments is dead. Instructions updating dead 3120 temporaries are removed. */ 3121 static void __attribute__((noinline)) 3122 liveness_pass_1(TCGContext *s) 3123 { 3124 int nb_globals = s->nb_globals; 3125 int nb_temps = s->nb_temps; 3126 TCGOp *op, *op_prev; 3127 TCGRegSet *prefs; 3128 int i; 3129 3130 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3131 for (i = 0; i < nb_temps; ++i) { 3132 s->temps[i].state_ptr = prefs + i; 3133 } 3134 3135 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3136 la_func_end(s, nb_globals, nb_temps); 3137 3138 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3139 int nb_iargs, nb_oargs; 3140 TCGOpcode opc_new, opc_new2; 3141 bool have_opc_new2; 3142 TCGLifeData arg_life = 0; 3143 TCGTemp *ts; 3144 TCGOpcode opc = op->opc; 3145 const TCGOpDef *def = &tcg_op_defs[opc]; 3146 3147 switch (opc) { 3148 case INDEX_op_call: 3149 { 3150 const TCGHelperInfo *info = tcg_call_info(op); 3151 int call_flags = tcg_call_flags(op); 3152 3153 nb_oargs = TCGOP_CALLO(op); 3154 nb_iargs = TCGOP_CALLI(op); 3155 3156 /* pure functions can be removed if their result is unused */ 3157 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3158 for (i = 0; i < nb_oargs; i++) { 3159 ts = arg_temp(op->args[i]); 3160 if (ts->state != TS_DEAD) { 3161 goto do_not_remove_call; 3162 } 3163 } 3164 goto do_remove; 3165 } 3166 do_not_remove_call: 3167 3168 /* Output args are dead. */ 3169 for (i = 0; i < nb_oargs; i++) { 3170 ts = arg_temp(op->args[i]); 3171 if (ts->state & TS_DEAD) { 3172 arg_life |= DEAD_ARG << i; 3173 } 3174 if (ts->state & TS_MEM) { 3175 arg_life |= SYNC_ARG << i; 3176 } 3177 ts->state = TS_DEAD; 3178 la_reset_pref(ts); 3179 } 3180 3181 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3182 memset(op->output_pref, 0, sizeof(op->output_pref)); 3183 3184 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3185 TCG_CALL_NO_READ_GLOBALS))) { 3186 la_global_kill(s, nb_globals); 3187 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3188 la_global_sync(s, nb_globals); 3189 } 3190 3191 /* Record arguments that die in this helper. */ 3192 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3193 ts = arg_temp(op->args[i]); 3194 if (ts->state & TS_DEAD) { 3195 arg_life |= DEAD_ARG << i; 3196 } 3197 } 3198 3199 /* For all live registers, remove call-clobbered prefs. */ 3200 la_cross_call(s, nb_temps); 3201 3202 /* 3203 * Input arguments are live for preceding opcodes. 3204 * 3205 * For those arguments that die, and will be allocated in 3206 * registers, clear the register set for that arg, to be 3207 * filled in below. For args that will be on the stack, 3208 * reset to any available reg. Process arguments in reverse 3209 * order so that if a temp is used more than once, the stack 3210 * reset to max happens before the register reset to 0. 3211 */ 3212 for (i = nb_iargs - 1; i >= 0; i--) { 3213 const TCGCallArgumentLoc *loc = &info->in[i]; 3214 ts = arg_temp(op->args[nb_oargs + i]); 3215 3216 if (ts->state & TS_DEAD) { 3217 switch (loc->kind) { 3218 case TCG_CALL_ARG_NORMAL: 3219 case TCG_CALL_ARG_EXTEND_U: 3220 case TCG_CALL_ARG_EXTEND_S: 3221 if (REG_P(loc)) { 3222 *la_temp_pref(ts) = 0; 3223 break; 3224 } 3225 /* fall through */ 3226 default: 3227 *la_temp_pref(ts) = 3228 tcg_target_available_regs[ts->type]; 3229 break; 3230 } 3231 ts->state &= ~TS_DEAD; 3232 } 3233 } 3234 3235 /* 3236 * For each input argument, add its input register to prefs. 3237 * If a temp is used once, this produces a single set bit; 3238 * if a temp is used multiple times, this produces a set. 3239 */ 3240 for (i = 0; i < nb_iargs; i++) { 3241 const TCGCallArgumentLoc *loc = &info->in[i]; 3242 ts = arg_temp(op->args[nb_oargs + i]); 3243 3244 switch (loc->kind) { 3245 case TCG_CALL_ARG_NORMAL: 3246 case TCG_CALL_ARG_EXTEND_U: 3247 case TCG_CALL_ARG_EXTEND_S: 3248 if (REG_P(loc)) { 3249 tcg_regset_set_reg(*la_temp_pref(ts), 3250 tcg_target_call_iarg_regs[loc->arg_slot]); 3251 } 3252 break; 3253 default: 3254 break; 3255 } 3256 } 3257 } 3258 break; 3259 case INDEX_op_insn_start: 3260 break; 3261 case INDEX_op_discard: 3262 /* mark the temporary as dead */ 3263 ts = arg_temp(op->args[0]); 3264 ts->state = TS_DEAD; 3265 la_reset_pref(ts); 3266 break; 3267 3268 case INDEX_op_add2_i32: 3269 opc_new = INDEX_op_add_i32; 3270 goto do_addsub2; 3271 case INDEX_op_sub2_i32: 3272 opc_new = INDEX_op_sub_i32; 3273 goto do_addsub2; 3274 case INDEX_op_add2_i64: 3275 opc_new = INDEX_op_add_i64; 3276 goto do_addsub2; 3277 case INDEX_op_sub2_i64: 3278 opc_new = INDEX_op_sub_i64; 3279 do_addsub2: 3280 nb_iargs = 4; 3281 nb_oargs = 2; 3282 /* Test if the high part of the operation is dead, but not 3283 the low part. The result can be optimized to a simple 3284 add or sub. This happens often for x86_64 guest when the 3285 cpu mode is set to 32 bit. */ 3286 if (arg_temp(op->args[1])->state == TS_DEAD) { 3287 if (arg_temp(op->args[0])->state == TS_DEAD) { 3288 goto do_remove; 3289 } 3290 /* Replace the opcode and adjust the args in place, 3291 leaving 3 unused args at the end. */ 3292 op->opc = opc = opc_new; 3293 op->args[1] = op->args[2]; 3294 op->args[2] = op->args[4]; 3295 /* Fall through and mark the single-word operation live. */ 3296 nb_iargs = 2; 3297 nb_oargs = 1; 3298 } 3299 goto do_not_remove; 3300 3301 case INDEX_op_mulu2_i32: 3302 opc_new = INDEX_op_mul_i32; 3303 opc_new2 = INDEX_op_muluh_i32; 3304 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 3305 goto do_mul2; 3306 case INDEX_op_muls2_i32: 3307 opc_new = INDEX_op_mul_i32; 3308 opc_new2 = INDEX_op_mulsh_i32; 3309 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 3310 goto do_mul2; 3311 case INDEX_op_mulu2_i64: 3312 opc_new = INDEX_op_mul_i64; 3313 opc_new2 = INDEX_op_muluh_i64; 3314 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 3315 goto do_mul2; 3316 case INDEX_op_muls2_i64: 3317 opc_new = INDEX_op_mul_i64; 3318 opc_new2 = INDEX_op_mulsh_i64; 3319 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 3320 goto do_mul2; 3321 do_mul2: 3322 nb_iargs = 2; 3323 nb_oargs = 2; 3324 if (arg_temp(op->args[1])->state == TS_DEAD) { 3325 if (arg_temp(op->args[0])->state == TS_DEAD) { 3326 /* Both parts of the operation are dead. */ 3327 goto do_remove; 3328 } 3329 /* The high part of the operation is dead; generate the low. */ 3330 op->opc = opc = opc_new; 3331 op->args[1] = op->args[2]; 3332 op->args[2] = op->args[3]; 3333 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 3334 /* The low part of the operation is dead; generate the high. */ 3335 op->opc = opc = opc_new2; 3336 op->args[0] = op->args[1]; 3337 op->args[1] = op->args[2]; 3338 op->args[2] = op->args[3]; 3339 } else { 3340 goto do_not_remove; 3341 } 3342 /* Mark the single-word operation live. */ 3343 nb_oargs = 1; 3344 goto do_not_remove; 3345 3346 default: 3347 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 3348 nb_iargs = def->nb_iargs; 3349 nb_oargs = def->nb_oargs; 3350 3351 /* Test if the operation can be removed because all 3352 its outputs are dead. We assume that nb_oargs == 0 3353 implies side effects */ 3354 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 3355 for (i = 0; i < nb_oargs; i++) { 3356 if (arg_temp(op->args[i])->state != TS_DEAD) { 3357 goto do_not_remove; 3358 } 3359 } 3360 goto do_remove; 3361 } 3362 goto do_not_remove; 3363 3364 do_remove: 3365 tcg_op_remove(s, op); 3366 break; 3367 3368 do_not_remove: 3369 for (i = 0; i < nb_oargs; i++) { 3370 ts = arg_temp(op->args[i]); 3371 3372 /* Remember the preference of the uses that followed. */ 3373 if (i < ARRAY_SIZE(op->output_pref)) { 3374 op->output_pref[i] = *la_temp_pref(ts); 3375 } 3376 3377 /* Output args are dead. */ 3378 if (ts->state & TS_DEAD) { 3379 arg_life |= DEAD_ARG << i; 3380 } 3381 if (ts->state & TS_MEM) { 3382 arg_life |= SYNC_ARG << i; 3383 } 3384 ts->state = TS_DEAD; 3385 la_reset_pref(ts); 3386 } 3387 3388 /* If end of basic block, update. */ 3389 if (def->flags & TCG_OPF_BB_EXIT) { 3390 la_func_end(s, nb_globals, nb_temps); 3391 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3392 la_bb_sync(s, nb_globals, nb_temps); 3393 } else if (def->flags & TCG_OPF_BB_END) { 3394 la_bb_end(s, nb_globals, nb_temps); 3395 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3396 la_global_sync(s, nb_globals); 3397 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3398 la_cross_call(s, nb_temps); 3399 } 3400 } 3401 3402 /* Record arguments that die in this opcode. */ 3403 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3404 ts = arg_temp(op->args[i]); 3405 if (ts->state & TS_DEAD) { 3406 arg_life |= DEAD_ARG << i; 3407 } 3408 } 3409 3410 /* Input arguments are live for preceding opcodes. */ 3411 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3412 ts = arg_temp(op->args[i]); 3413 if (ts->state & TS_DEAD) { 3414 /* For operands that were dead, initially allow 3415 all regs for the type. */ 3416 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3417 ts->state &= ~TS_DEAD; 3418 } 3419 } 3420 3421 /* Incorporate constraints for this operand. */ 3422 switch (opc) { 3423 case INDEX_op_mov_i32: 3424 case INDEX_op_mov_i64: 3425 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3426 have proper constraints. That said, special case 3427 moves to propagate preferences backward. */ 3428 if (IS_DEAD_ARG(1)) { 3429 *la_temp_pref(arg_temp(op->args[0])) 3430 = *la_temp_pref(arg_temp(op->args[1])); 3431 } 3432 break; 3433 3434 default: 3435 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3436 const TCGArgConstraint *ct = &def->args_ct[i]; 3437 TCGRegSet set, *pset; 3438 3439 ts = arg_temp(op->args[i]); 3440 pset = la_temp_pref(ts); 3441 set = *pset; 3442 3443 set &= ct->regs; 3444 if (ct->ialias) { 3445 set &= output_pref(op, ct->alias_index); 3446 } 3447 /* If the combination is not possible, restart. */ 3448 if (set == 0) { 3449 set = ct->regs; 3450 } 3451 *pset = set; 3452 } 3453 break; 3454 } 3455 break; 3456 } 3457 op->life = arg_life; 3458 } 3459 } 3460 3461 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3462 static bool __attribute__((noinline)) 3463 liveness_pass_2(TCGContext *s) 3464 { 3465 int nb_globals = s->nb_globals; 3466 int nb_temps, i; 3467 bool changes = false; 3468 TCGOp *op, *op_next; 3469 3470 /* Create a temporary for each indirect global. */ 3471 for (i = 0; i < nb_globals; ++i) { 3472 TCGTemp *its = &s->temps[i]; 3473 if (its->indirect_reg) { 3474 TCGTemp *dts = tcg_temp_alloc(s); 3475 dts->type = its->type; 3476 dts->base_type = its->base_type; 3477 dts->temp_subindex = its->temp_subindex; 3478 dts->kind = TEMP_EBB; 3479 its->state_ptr = dts; 3480 } else { 3481 its->state_ptr = NULL; 3482 } 3483 /* All globals begin dead. */ 3484 its->state = TS_DEAD; 3485 } 3486 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3487 TCGTemp *its = &s->temps[i]; 3488 its->state_ptr = NULL; 3489 its->state = TS_DEAD; 3490 } 3491 3492 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3493 TCGOpcode opc = op->opc; 3494 const TCGOpDef *def = &tcg_op_defs[opc]; 3495 TCGLifeData arg_life = op->life; 3496 int nb_iargs, nb_oargs, call_flags; 3497 TCGTemp *arg_ts, *dir_ts; 3498 3499 if (opc == INDEX_op_call) { 3500 nb_oargs = TCGOP_CALLO(op); 3501 nb_iargs = TCGOP_CALLI(op); 3502 call_flags = tcg_call_flags(op); 3503 } else { 3504 nb_iargs = def->nb_iargs; 3505 nb_oargs = def->nb_oargs; 3506 3507 /* Set flags similar to how calls require. */ 3508 if (def->flags & TCG_OPF_COND_BRANCH) { 3509 /* Like reading globals: sync_globals */ 3510 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3511 } else if (def->flags & TCG_OPF_BB_END) { 3512 /* Like writing globals: save_globals */ 3513 call_flags = 0; 3514 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3515 /* Like reading globals: sync_globals */ 3516 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3517 } else { 3518 /* No effect on globals. */ 3519 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3520 TCG_CALL_NO_WRITE_GLOBALS); 3521 } 3522 } 3523 3524 /* Make sure that input arguments are available. */ 3525 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3526 arg_ts = arg_temp(op->args[i]); 3527 dir_ts = arg_ts->state_ptr; 3528 if (dir_ts && arg_ts->state == TS_DEAD) { 3529 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3530 ? INDEX_op_ld_i32 3531 : INDEX_op_ld_i64); 3532 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 3533 3534 lop->args[0] = temp_arg(dir_ts); 3535 lop->args[1] = temp_arg(arg_ts->mem_base); 3536 lop->args[2] = arg_ts->mem_offset; 3537 3538 /* Loaded, but synced with memory. */ 3539 arg_ts->state = TS_MEM; 3540 } 3541 } 3542 3543 /* Perform input replacement, and mark inputs that became dead. 3544 No action is required except keeping temp_state up to date 3545 so that we reload when needed. */ 3546 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3547 arg_ts = arg_temp(op->args[i]); 3548 dir_ts = arg_ts->state_ptr; 3549 if (dir_ts) { 3550 op->args[i] = temp_arg(dir_ts); 3551 changes = true; 3552 if (IS_DEAD_ARG(i)) { 3553 arg_ts->state = TS_DEAD; 3554 } 3555 } 3556 } 3557 3558 /* Liveness analysis should ensure that the following are 3559 all correct, for call sites and basic block end points. */ 3560 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3561 /* Nothing to do */ 3562 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3563 for (i = 0; i < nb_globals; ++i) { 3564 /* Liveness should see that globals are synced back, 3565 that is, either TS_DEAD or TS_MEM. */ 3566 arg_ts = &s->temps[i]; 3567 tcg_debug_assert(arg_ts->state_ptr == 0 3568 || arg_ts->state != 0); 3569 } 3570 } else { 3571 for (i = 0; i < nb_globals; ++i) { 3572 /* Liveness should see that globals are saved back, 3573 that is, TS_DEAD, waiting to be reloaded. */ 3574 arg_ts = &s->temps[i]; 3575 tcg_debug_assert(arg_ts->state_ptr == 0 3576 || arg_ts->state == TS_DEAD); 3577 } 3578 } 3579 3580 /* Outputs become available. */ 3581 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3582 arg_ts = arg_temp(op->args[0]); 3583 dir_ts = arg_ts->state_ptr; 3584 if (dir_ts) { 3585 op->args[0] = temp_arg(dir_ts); 3586 changes = true; 3587 3588 /* The output is now live and modified. */ 3589 arg_ts->state = 0; 3590 3591 if (NEED_SYNC_ARG(0)) { 3592 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3593 ? INDEX_op_st_i32 3594 : INDEX_op_st_i64); 3595 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3596 TCGTemp *out_ts = dir_ts; 3597 3598 if (IS_DEAD_ARG(0)) { 3599 out_ts = arg_temp(op->args[1]); 3600 arg_ts->state = TS_DEAD; 3601 tcg_op_remove(s, op); 3602 } else { 3603 arg_ts->state = TS_MEM; 3604 } 3605 3606 sop->args[0] = temp_arg(out_ts); 3607 sop->args[1] = temp_arg(arg_ts->mem_base); 3608 sop->args[2] = arg_ts->mem_offset; 3609 } else { 3610 tcg_debug_assert(!IS_DEAD_ARG(0)); 3611 } 3612 } 3613 } else { 3614 for (i = 0; i < nb_oargs; i++) { 3615 arg_ts = arg_temp(op->args[i]); 3616 dir_ts = arg_ts->state_ptr; 3617 if (!dir_ts) { 3618 continue; 3619 } 3620 op->args[i] = temp_arg(dir_ts); 3621 changes = true; 3622 3623 /* The output is now live and modified. */ 3624 arg_ts->state = 0; 3625 3626 /* Sync outputs upon their last write. */ 3627 if (NEED_SYNC_ARG(i)) { 3628 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3629 ? INDEX_op_st_i32 3630 : INDEX_op_st_i64); 3631 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3632 3633 sop->args[0] = temp_arg(dir_ts); 3634 sop->args[1] = temp_arg(arg_ts->mem_base); 3635 sop->args[2] = arg_ts->mem_offset; 3636 3637 arg_ts->state = TS_MEM; 3638 } 3639 /* Drop outputs that are dead. */ 3640 if (IS_DEAD_ARG(i)) { 3641 arg_ts->state = TS_DEAD; 3642 } 3643 } 3644 } 3645 } 3646 3647 return changes; 3648 } 3649 3650 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3651 { 3652 intptr_t off; 3653 int size, align; 3654 3655 /* When allocating an object, look at the full type. */ 3656 size = tcg_type_size(ts->base_type); 3657 switch (ts->base_type) { 3658 case TCG_TYPE_I32: 3659 align = 4; 3660 break; 3661 case TCG_TYPE_I64: 3662 case TCG_TYPE_V64: 3663 align = 8; 3664 break; 3665 case TCG_TYPE_I128: 3666 case TCG_TYPE_V128: 3667 case TCG_TYPE_V256: 3668 /* 3669 * Note that we do not require aligned storage for V256, 3670 * and that we provide alignment for I128 to match V128, 3671 * even if that's above what the host ABI requires. 3672 */ 3673 align = 16; 3674 break; 3675 default: 3676 g_assert_not_reached(); 3677 } 3678 3679 /* 3680 * Assume the stack is sufficiently aligned. 3681 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3682 * and do not require 16 byte vector alignment. This seems slightly 3683 * easier than fully parameterizing the above switch statement. 3684 */ 3685 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3686 off = ROUND_UP(s->current_frame_offset, align); 3687 3688 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3689 if (off + size > s->frame_end) { 3690 tcg_raise_tb_overflow(s); 3691 } 3692 s->current_frame_offset = off + size; 3693 #if defined(__sparc__) 3694 off += TCG_TARGET_STACK_BIAS; 3695 #endif 3696 3697 /* If the object was subdivided, assign memory to all the parts. */ 3698 if (ts->base_type != ts->type) { 3699 int part_size = tcg_type_size(ts->type); 3700 int part_count = size / part_size; 3701 3702 /* 3703 * Each part is allocated sequentially in tcg_temp_new_internal. 3704 * Jump back to the first part by subtracting the current index. 3705 */ 3706 ts -= ts->temp_subindex; 3707 for (int i = 0; i < part_count; ++i) { 3708 ts[i].mem_offset = off + i * part_size; 3709 ts[i].mem_base = s->frame_temp; 3710 ts[i].mem_allocated = 1; 3711 } 3712 } else { 3713 ts->mem_offset = off; 3714 ts->mem_base = s->frame_temp; 3715 ts->mem_allocated = 1; 3716 } 3717 } 3718 3719 /* Assign @reg to @ts, and update reg_to_temp[]. */ 3720 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 3721 { 3722 if (ts->val_type == TEMP_VAL_REG) { 3723 TCGReg old = ts->reg; 3724 tcg_debug_assert(s->reg_to_temp[old] == ts); 3725 if (old == reg) { 3726 return; 3727 } 3728 s->reg_to_temp[old] = NULL; 3729 } 3730 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3731 s->reg_to_temp[reg] = ts; 3732 ts->val_type = TEMP_VAL_REG; 3733 ts->reg = reg; 3734 } 3735 3736 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 3737 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 3738 { 3739 tcg_debug_assert(type != TEMP_VAL_REG); 3740 if (ts->val_type == TEMP_VAL_REG) { 3741 TCGReg reg = ts->reg; 3742 tcg_debug_assert(s->reg_to_temp[reg] == ts); 3743 s->reg_to_temp[reg] = NULL; 3744 } 3745 ts->val_type = type; 3746 } 3747 3748 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3749 3750 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3751 mark it free; otherwise mark it dead. */ 3752 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3753 { 3754 TCGTempVal new_type; 3755 3756 switch (ts->kind) { 3757 case TEMP_FIXED: 3758 return; 3759 case TEMP_GLOBAL: 3760 case TEMP_TB: 3761 new_type = TEMP_VAL_MEM; 3762 break; 3763 case TEMP_EBB: 3764 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3765 break; 3766 case TEMP_CONST: 3767 new_type = TEMP_VAL_CONST; 3768 break; 3769 default: 3770 g_assert_not_reached(); 3771 } 3772 set_temp_val_nonreg(s, ts, new_type); 3773 } 3774 3775 /* Mark a temporary as dead. */ 3776 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3777 { 3778 temp_free_or_dead(s, ts, 1); 3779 } 3780 3781 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3782 registers needs to be allocated to store a constant. If 'free_or_dead' 3783 is non-zero, subsequently release the temporary; if it is positive, the 3784 temp is dead; if it is negative, the temp is free. */ 3785 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3786 TCGRegSet preferred_regs, int free_or_dead) 3787 { 3788 if (!temp_readonly(ts) && !ts->mem_coherent) { 3789 if (!ts->mem_allocated) { 3790 temp_allocate_frame(s, ts); 3791 } 3792 switch (ts->val_type) { 3793 case TEMP_VAL_CONST: 3794 /* If we're going to free the temp immediately, then we won't 3795 require it later in a register, so attempt to store the 3796 constant to memory directly. */ 3797 if (free_or_dead 3798 && tcg_out_sti(s, ts->type, ts->val, 3799 ts->mem_base->reg, ts->mem_offset)) { 3800 break; 3801 } 3802 temp_load(s, ts, tcg_target_available_regs[ts->type], 3803 allocated_regs, preferred_regs); 3804 /* fallthrough */ 3805 3806 case TEMP_VAL_REG: 3807 tcg_out_st(s, ts->type, ts->reg, 3808 ts->mem_base->reg, ts->mem_offset); 3809 break; 3810 3811 case TEMP_VAL_MEM: 3812 break; 3813 3814 case TEMP_VAL_DEAD: 3815 default: 3816 g_assert_not_reached(); 3817 } 3818 ts->mem_coherent = 1; 3819 } 3820 if (free_or_dead) { 3821 temp_free_or_dead(s, ts, free_or_dead); 3822 } 3823 } 3824 3825 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3826 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3827 { 3828 TCGTemp *ts = s->reg_to_temp[reg]; 3829 if (ts != NULL) { 3830 temp_sync(s, ts, allocated_regs, 0, -1); 3831 } 3832 } 3833 3834 /** 3835 * tcg_reg_alloc: 3836 * @required_regs: Set of registers in which we must allocate. 3837 * @allocated_regs: Set of registers which must be avoided. 3838 * @preferred_regs: Set of registers we should prefer. 3839 * @rev: True if we search the registers in "indirect" order. 3840 * 3841 * The allocated register must be in @required_regs & ~@allocated_regs, 3842 * but if we can put it in @preferred_regs we may save a move later. 3843 */ 3844 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3845 TCGRegSet allocated_regs, 3846 TCGRegSet preferred_regs, bool rev) 3847 { 3848 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3849 TCGRegSet reg_ct[2]; 3850 const int *order; 3851 3852 reg_ct[1] = required_regs & ~allocated_regs; 3853 tcg_debug_assert(reg_ct[1] != 0); 3854 reg_ct[0] = reg_ct[1] & preferred_regs; 3855 3856 /* Skip the preferred_regs option if it cannot be satisfied, 3857 or if the preference made no difference. */ 3858 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3859 3860 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3861 3862 /* Try free registers, preferences first. */ 3863 for (j = f; j < 2; j++) { 3864 TCGRegSet set = reg_ct[j]; 3865 3866 if (tcg_regset_single(set)) { 3867 /* One register in the set. */ 3868 TCGReg reg = tcg_regset_first(set); 3869 if (s->reg_to_temp[reg] == NULL) { 3870 return reg; 3871 } 3872 } else { 3873 for (i = 0; i < n; i++) { 3874 TCGReg reg = order[i]; 3875 if (s->reg_to_temp[reg] == NULL && 3876 tcg_regset_test_reg(set, reg)) { 3877 return reg; 3878 } 3879 } 3880 } 3881 } 3882 3883 /* We must spill something. */ 3884 for (j = f; j < 2; j++) { 3885 TCGRegSet set = reg_ct[j]; 3886 3887 if (tcg_regset_single(set)) { 3888 /* One register in the set. */ 3889 TCGReg reg = tcg_regset_first(set); 3890 tcg_reg_free(s, reg, allocated_regs); 3891 return reg; 3892 } else { 3893 for (i = 0; i < n; i++) { 3894 TCGReg reg = order[i]; 3895 if (tcg_regset_test_reg(set, reg)) { 3896 tcg_reg_free(s, reg, allocated_regs); 3897 return reg; 3898 } 3899 } 3900 } 3901 } 3902 3903 g_assert_not_reached(); 3904 } 3905 3906 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 3907 TCGRegSet allocated_regs, 3908 TCGRegSet preferred_regs, bool rev) 3909 { 3910 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3911 TCGRegSet reg_ct[2]; 3912 const int *order; 3913 3914 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 3915 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 3916 tcg_debug_assert(reg_ct[1] != 0); 3917 reg_ct[0] = reg_ct[1] & preferred_regs; 3918 3919 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3920 3921 /* 3922 * Skip the preferred_regs option if it cannot be satisfied, 3923 * or if the preference made no difference. 3924 */ 3925 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3926 3927 /* 3928 * Minimize the number of flushes by looking for 2 free registers first, 3929 * then a single flush, then two flushes. 3930 */ 3931 for (fmin = 2; fmin >= 0; fmin--) { 3932 for (j = k; j < 2; j++) { 3933 TCGRegSet set = reg_ct[j]; 3934 3935 for (i = 0; i < n; i++) { 3936 TCGReg reg = order[i]; 3937 3938 if (tcg_regset_test_reg(set, reg)) { 3939 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 3940 if (f >= fmin) { 3941 tcg_reg_free(s, reg, allocated_regs); 3942 tcg_reg_free(s, reg + 1, allocated_regs); 3943 return reg; 3944 } 3945 } 3946 } 3947 } 3948 } 3949 g_assert_not_reached(); 3950 } 3951 3952 /* Make sure the temporary is in a register. If needed, allocate the register 3953 from DESIRED while avoiding ALLOCATED. */ 3954 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3955 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3956 { 3957 TCGReg reg; 3958 3959 switch (ts->val_type) { 3960 case TEMP_VAL_REG: 3961 return; 3962 case TEMP_VAL_CONST: 3963 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3964 preferred_regs, ts->indirect_base); 3965 if (ts->type <= TCG_TYPE_I64) { 3966 tcg_out_movi(s, ts->type, reg, ts->val); 3967 } else { 3968 uint64_t val = ts->val; 3969 MemOp vece = MO_64; 3970 3971 /* 3972 * Find the minimal vector element that matches the constant. 3973 * The targets will, in general, have to do this search anyway, 3974 * do this generically. 3975 */ 3976 if (val == dup_const(MO_8, val)) { 3977 vece = MO_8; 3978 } else if (val == dup_const(MO_16, val)) { 3979 vece = MO_16; 3980 } else if (val == dup_const(MO_32, val)) { 3981 vece = MO_32; 3982 } 3983 3984 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3985 } 3986 ts->mem_coherent = 0; 3987 break; 3988 case TEMP_VAL_MEM: 3989 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3990 preferred_regs, ts->indirect_base); 3991 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3992 ts->mem_coherent = 1; 3993 break; 3994 case TEMP_VAL_DEAD: 3995 default: 3996 g_assert_not_reached(); 3997 } 3998 set_temp_val_reg(s, ts, reg); 3999 } 4000 4001 /* Save a temporary to memory. 'allocated_regs' is used in case a 4002 temporary registers needs to be allocated to store a constant. */ 4003 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 4004 { 4005 /* The liveness analysis already ensures that globals are back 4006 in memory. Keep an tcg_debug_assert for safety. */ 4007 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 4008 } 4009 4010 /* save globals to their canonical location and assume they can be 4011 modified be the following code. 'allocated_regs' is used in case a 4012 temporary registers needs to be allocated to store a constant. */ 4013 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 4014 { 4015 int i, n; 4016 4017 for (i = 0, n = s->nb_globals; i < n; i++) { 4018 temp_save(s, &s->temps[i], allocated_regs); 4019 } 4020 } 4021 4022 /* sync globals to their canonical location and assume they can be 4023 read by the following code. 'allocated_regs' is used in case a 4024 temporary registers needs to be allocated to store a constant. */ 4025 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 4026 { 4027 int i, n; 4028 4029 for (i = 0, n = s->nb_globals; i < n; i++) { 4030 TCGTemp *ts = &s->temps[i]; 4031 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 4032 || ts->kind == TEMP_FIXED 4033 || ts->mem_coherent); 4034 } 4035 } 4036 4037 /* at the end of a basic block, we assume all temporaries are dead and 4038 all globals are stored at their canonical location. */ 4039 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 4040 { 4041 int i; 4042 4043 for (i = s->nb_globals; i < s->nb_temps; i++) { 4044 TCGTemp *ts = &s->temps[i]; 4045 4046 switch (ts->kind) { 4047 case TEMP_TB: 4048 temp_save(s, ts, allocated_regs); 4049 break; 4050 case TEMP_EBB: 4051 /* The liveness analysis already ensures that temps are dead. 4052 Keep an tcg_debug_assert for safety. */ 4053 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 4054 break; 4055 case TEMP_CONST: 4056 /* Similarly, we should have freed any allocated register. */ 4057 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 4058 break; 4059 default: 4060 g_assert_not_reached(); 4061 } 4062 } 4063 4064 save_globals(s, allocated_regs); 4065 } 4066 4067 /* 4068 * At a conditional branch, we assume all temporaries are dead unless 4069 * explicitly live-across-conditional-branch; all globals and local 4070 * temps are synced to their location. 4071 */ 4072 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 4073 { 4074 sync_globals(s, allocated_regs); 4075 4076 for (int i = s->nb_globals; i < s->nb_temps; i++) { 4077 TCGTemp *ts = &s->temps[i]; 4078 /* 4079 * The liveness analysis already ensures that temps are dead. 4080 * Keep tcg_debug_asserts for safety. 4081 */ 4082 switch (ts->kind) { 4083 case TEMP_TB: 4084 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 4085 break; 4086 case TEMP_EBB: 4087 case TEMP_CONST: 4088 break; 4089 default: 4090 g_assert_not_reached(); 4091 } 4092 } 4093 } 4094 4095 /* 4096 * Specialized code generation for INDEX_op_mov_* with a constant. 4097 */ 4098 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 4099 tcg_target_ulong val, TCGLifeData arg_life, 4100 TCGRegSet preferred_regs) 4101 { 4102 /* ENV should not be modified. */ 4103 tcg_debug_assert(!temp_readonly(ots)); 4104 4105 /* The movi is not explicitly generated here. */ 4106 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 4107 ots->val = val; 4108 ots->mem_coherent = 0; 4109 if (NEED_SYNC_ARG(0)) { 4110 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 4111 } else if (IS_DEAD_ARG(0)) { 4112 temp_dead(s, ots); 4113 } 4114 } 4115 4116 /* 4117 * Specialized code generation for INDEX_op_mov_*. 4118 */ 4119 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4120 { 4121 const TCGLifeData arg_life = op->life; 4122 TCGRegSet allocated_regs, preferred_regs; 4123 TCGTemp *ts, *ots; 4124 TCGType otype, itype; 4125 TCGReg oreg, ireg; 4126 4127 allocated_regs = s->reserved_regs; 4128 preferred_regs = output_pref(op, 0); 4129 ots = arg_temp(op->args[0]); 4130 ts = arg_temp(op->args[1]); 4131 4132 /* ENV should not be modified. */ 4133 tcg_debug_assert(!temp_readonly(ots)); 4134 4135 /* Note that otype != itype for no-op truncation. */ 4136 otype = ots->type; 4137 itype = ts->type; 4138 4139 if (ts->val_type == TEMP_VAL_CONST) { 4140 /* propagate constant or generate sti */ 4141 tcg_target_ulong val = ts->val; 4142 if (IS_DEAD_ARG(1)) { 4143 temp_dead(s, ts); 4144 } 4145 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4146 return; 4147 } 4148 4149 /* If the source value is in memory we're going to be forced 4150 to have it in a register in order to perform the copy. Copy 4151 the SOURCE value into its own register first, that way we 4152 don't have to reload SOURCE the next time it is used. */ 4153 if (ts->val_type == TEMP_VAL_MEM) { 4154 temp_load(s, ts, tcg_target_available_regs[itype], 4155 allocated_regs, preferred_regs); 4156 } 4157 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4158 ireg = ts->reg; 4159 4160 if (IS_DEAD_ARG(0)) { 4161 /* mov to a non-saved dead register makes no sense (even with 4162 liveness analysis disabled). */ 4163 tcg_debug_assert(NEED_SYNC_ARG(0)); 4164 if (!ots->mem_allocated) { 4165 temp_allocate_frame(s, ots); 4166 } 4167 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4168 if (IS_DEAD_ARG(1)) { 4169 temp_dead(s, ts); 4170 } 4171 temp_dead(s, ots); 4172 return; 4173 } 4174 4175 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4176 /* 4177 * The mov can be suppressed. Kill input first, so that it 4178 * is unlinked from reg_to_temp, then set the output to the 4179 * reg that we saved from the input. 4180 */ 4181 temp_dead(s, ts); 4182 oreg = ireg; 4183 } else { 4184 if (ots->val_type == TEMP_VAL_REG) { 4185 oreg = ots->reg; 4186 } else { 4187 /* Make sure to not spill the input register during allocation. */ 4188 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4189 allocated_regs | ((TCGRegSet)1 << ireg), 4190 preferred_regs, ots->indirect_base); 4191 } 4192 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4193 /* 4194 * Cross register class move not supported. 4195 * Store the source register into the destination slot 4196 * and leave the destination temp as TEMP_VAL_MEM. 4197 */ 4198 assert(!temp_readonly(ots)); 4199 if (!ts->mem_allocated) { 4200 temp_allocate_frame(s, ots); 4201 } 4202 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4203 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4204 ots->mem_coherent = 1; 4205 return; 4206 } 4207 } 4208 set_temp_val_reg(s, ots, oreg); 4209 ots->mem_coherent = 0; 4210 4211 if (NEED_SYNC_ARG(0)) { 4212 temp_sync(s, ots, allocated_regs, 0, 0); 4213 } 4214 } 4215 4216 /* 4217 * Specialized code generation for INDEX_op_dup_vec. 4218 */ 4219 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4220 { 4221 const TCGLifeData arg_life = op->life; 4222 TCGRegSet dup_out_regs, dup_in_regs; 4223 TCGTemp *its, *ots; 4224 TCGType itype, vtype; 4225 unsigned vece; 4226 int lowpart_ofs; 4227 bool ok; 4228 4229 ots = arg_temp(op->args[0]); 4230 its = arg_temp(op->args[1]); 4231 4232 /* ENV should not be modified. */ 4233 tcg_debug_assert(!temp_readonly(ots)); 4234 4235 itype = its->type; 4236 vece = TCGOP_VECE(op); 4237 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4238 4239 if (its->val_type == TEMP_VAL_CONST) { 4240 /* Propagate constant via movi -> dupi. */ 4241 tcg_target_ulong val = its->val; 4242 if (IS_DEAD_ARG(1)) { 4243 temp_dead(s, its); 4244 } 4245 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4246 return; 4247 } 4248 4249 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4250 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 4251 4252 /* Allocate the output register now. */ 4253 if (ots->val_type != TEMP_VAL_REG) { 4254 TCGRegSet allocated_regs = s->reserved_regs; 4255 TCGReg oreg; 4256 4257 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4258 /* Make sure to not spill the input register. */ 4259 tcg_regset_set_reg(allocated_regs, its->reg); 4260 } 4261 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4262 output_pref(op, 0), ots->indirect_base); 4263 set_temp_val_reg(s, ots, oreg); 4264 } 4265 4266 switch (its->val_type) { 4267 case TEMP_VAL_REG: 4268 /* 4269 * The dup constriaints must be broad, covering all possible VECE. 4270 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4271 * to fail, indicating that extra moves are required for that case. 4272 */ 4273 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4274 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4275 goto done; 4276 } 4277 /* Try again from memory or a vector input register. */ 4278 } 4279 if (!its->mem_coherent) { 4280 /* 4281 * The input register is not synced, and so an extra store 4282 * would be required to use memory. Attempt an integer-vector 4283 * register move first. We do not have a TCGRegSet for this. 4284 */ 4285 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4286 break; 4287 } 4288 /* Sync the temp back to its slot and load from there. */ 4289 temp_sync(s, its, s->reserved_regs, 0, 0); 4290 } 4291 /* fall through */ 4292 4293 case TEMP_VAL_MEM: 4294 lowpart_ofs = 0; 4295 if (HOST_BIG_ENDIAN) { 4296 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 4297 } 4298 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 4299 its->mem_offset + lowpart_ofs)) { 4300 goto done; 4301 } 4302 /* Load the input into the destination vector register. */ 4303 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 4304 break; 4305 4306 default: 4307 g_assert_not_reached(); 4308 } 4309 4310 /* We now have a vector input register, so dup must succeed. */ 4311 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 4312 tcg_debug_assert(ok); 4313 4314 done: 4315 ots->mem_coherent = 0; 4316 if (IS_DEAD_ARG(1)) { 4317 temp_dead(s, its); 4318 } 4319 if (NEED_SYNC_ARG(0)) { 4320 temp_sync(s, ots, s->reserved_regs, 0, 0); 4321 } 4322 if (IS_DEAD_ARG(0)) { 4323 temp_dead(s, ots); 4324 } 4325 } 4326 4327 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 4328 { 4329 const TCGLifeData arg_life = op->life; 4330 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 4331 TCGRegSet i_allocated_regs; 4332 TCGRegSet o_allocated_regs; 4333 int i, k, nb_iargs, nb_oargs; 4334 TCGReg reg; 4335 TCGArg arg; 4336 const TCGArgConstraint *arg_ct; 4337 TCGTemp *ts; 4338 TCGArg new_args[TCG_MAX_OP_ARGS]; 4339 int const_args[TCG_MAX_OP_ARGS]; 4340 4341 nb_oargs = def->nb_oargs; 4342 nb_iargs = def->nb_iargs; 4343 4344 /* copy constants */ 4345 memcpy(new_args + nb_oargs + nb_iargs, 4346 op->args + nb_oargs + nb_iargs, 4347 sizeof(TCGArg) * def->nb_cargs); 4348 4349 i_allocated_regs = s->reserved_regs; 4350 o_allocated_regs = s->reserved_regs; 4351 4352 /* satisfy input constraints */ 4353 for (k = 0; k < nb_iargs; k++) { 4354 TCGRegSet i_preferred_regs, i_required_regs; 4355 bool allocate_new_reg, copyto_new_reg; 4356 TCGTemp *ts2; 4357 int i1, i2; 4358 4359 i = def->args_ct[nb_oargs + k].sort_index; 4360 arg = op->args[i]; 4361 arg_ct = &def->args_ct[i]; 4362 ts = arg_temp(arg); 4363 4364 if (ts->val_type == TEMP_VAL_CONST 4365 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 4366 /* constant is OK for instruction */ 4367 const_args[i] = 1; 4368 new_args[i] = ts->val; 4369 continue; 4370 } 4371 4372 reg = ts->reg; 4373 i_preferred_regs = 0; 4374 i_required_regs = arg_ct->regs; 4375 allocate_new_reg = false; 4376 copyto_new_reg = false; 4377 4378 switch (arg_ct->pair) { 4379 case 0: /* not paired */ 4380 if (arg_ct->ialias) { 4381 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4382 4383 /* 4384 * If the input is readonly, then it cannot also be an 4385 * output and aliased to itself. If the input is not 4386 * dead after the instruction, we must allocate a new 4387 * register and move it. 4388 */ 4389 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 4390 allocate_new_reg = true; 4391 } else if (ts->val_type == TEMP_VAL_REG) { 4392 /* 4393 * Check if the current register has already been 4394 * allocated for another input. 4395 */ 4396 allocate_new_reg = 4397 tcg_regset_test_reg(i_allocated_regs, reg); 4398 } 4399 } 4400 if (!allocate_new_reg) { 4401 temp_load(s, ts, i_required_regs, i_allocated_regs, 4402 i_preferred_regs); 4403 reg = ts->reg; 4404 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 4405 } 4406 if (allocate_new_reg) { 4407 /* 4408 * Allocate a new register matching the constraint 4409 * and move the temporary register into it. 4410 */ 4411 temp_load(s, ts, tcg_target_available_regs[ts->type], 4412 i_allocated_regs, 0); 4413 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 4414 i_preferred_regs, ts->indirect_base); 4415 copyto_new_reg = true; 4416 } 4417 break; 4418 4419 case 1: 4420 /* First of an input pair; if i1 == i2, the second is an output. */ 4421 i1 = i; 4422 i2 = arg_ct->pair_index; 4423 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 4424 4425 /* 4426 * It is easier to default to allocating a new pair 4427 * and to identify a few cases where it's not required. 4428 */ 4429 if (arg_ct->ialias) { 4430 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4431 if (IS_DEAD_ARG(i1) && 4432 IS_DEAD_ARG(i2) && 4433 !temp_readonly(ts) && 4434 ts->val_type == TEMP_VAL_REG && 4435 ts->reg < TCG_TARGET_NB_REGS - 1 && 4436 tcg_regset_test_reg(i_required_regs, reg) && 4437 !tcg_regset_test_reg(i_allocated_regs, reg) && 4438 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 4439 (ts2 4440 ? ts2->val_type == TEMP_VAL_REG && 4441 ts2->reg == reg + 1 && 4442 !temp_readonly(ts2) 4443 : s->reg_to_temp[reg + 1] == NULL)) { 4444 break; 4445 } 4446 } else { 4447 /* Without aliasing, the pair must also be an input. */ 4448 tcg_debug_assert(ts2); 4449 if (ts->val_type == TEMP_VAL_REG && 4450 ts2->val_type == TEMP_VAL_REG && 4451 ts2->reg == reg + 1 && 4452 tcg_regset_test_reg(i_required_regs, reg)) { 4453 break; 4454 } 4455 } 4456 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 4457 0, ts->indirect_base); 4458 goto do_pair; 4459 4460 case 2: /* pair second */ 4461 reg = new_args[arg_ct->pair_index] + 1; 4462 goto do_pair; 4463 4464 case 3: /* ialias with second output, no first input */ 4465 tcg_debug_assert(arg_ct->ialias); 4466 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4467 4468 if (IS_DEAD_ARG(i) && 4469 !temp_readonly(ts) && 4470 ts->val_type == TEMP_VAL_REG && 4471 reg > 0 && 4472 s->reg_to_temp[reg - 1] == NULL && 4473 tcg_regset_test_reg(i_required_regs, reg) && 4474 !tcg_regset_test_reg(i_allocated_regs, reg) && 4475 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 4476 tcg_regset_set_reg(i_allocated_regs, reg - 1); 4477 break; 4478 } 4479 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 4480 i_allocated_regs, 0, 4481 ts->indirect_base); 4482 tcg_regset_set_reg(i_allocated_regs, reg); 4483 reg += 1; 4484 goto do_pair; 4485 4486 do_pair: 4487 /* 4488 * If an aliased input is not dead after the instruction, 4489 * we must allocate a new register and move it. 4490 */ 4491 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 4492 TCGRegSet t_allocated_regs = i_allocated_regs; 4493 4494 /* 4495 * Because of the alias, and the continued life, make sure 4496 * that the temp is somewhere *other* than the reg pair, 4497 * and we get a copy in reg. 4498 */ 4499 tcg_regset_set_reg(t_allocated_regs, reg); 4500 tcg_regset_set_reg(t_allocated_regs, reg + 1); 4501 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 4502 /* If ts was already in reg, copy it somewhere else. */ 4503 TCGReg nr; 4504 bool ok; 4505 4506 tcg_debug_assert(ts->kind != TEMP_FIXED); 4507 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 4508 t_allocated_regs, 0, ts->indirect_base); 4509 ok = tcg_out_mov(s, ts->type, nr, reg); 4510 tcg_debug_assert(ok); 4511 4512 set_temp_val_reg(s, ts, nr); 4513 } else { 4514 temp_load(s, ts, tcg_target_available_regs[ts->type], 4515 t_allocated_regs, 0); 4516 copyto_new_reg = true; 4517 } 4518 } else { 4519 /* Preferably allocate to reg, otherwise copy. */ 4520 i_required_regs = (TCGRegSet)1 << reg; 4521 temp_load(s, ts, i_required_regs, i_allocated_regs, 4522 i_preferred_regs); 4523 copyto_new_reg = ts->reg != reg; 4524 } 4525 break; 4526 4527 default: 4528 g_assert_not_reached(); 4529 } 4530 4531 if (copyto_new_reg) { 4532 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4533 /* 4534 * Cross register class move not supported. Sync the 4535 * temp back to its slot and load from there. 4536 */ 4537 temp_sync(s, ts, i_allocated_regs, 0, 0); 4538 tcg_out_ld(s, ts->type, reg, 4539 ts->mem_base->reg, ts->mem_offset); 4540 } 4541 } 4542 new_args[i] = reg; 4543 const_args[i] = 0; 4544 tcg_regset_set_reg(i_allocated_regs, reg); 4545 } 4546 4547 /* mark dead temporaries and free the associated registers */ 4548 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4549 if (IS_DEAD_ARG(i)) { 4550 temp_dead(s, arg_temp(op->args[i])); 4551 } 4552 } 4553 4554 if (def->flags & TCG_OPF_COND_BRANCH) { 4555 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4556 } else if (def->flags & TCG_OPF_BB_END) { 4557 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4558 } else { 4559 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4560 /* XXX: permit generic clobber register list ? */ 4561 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4562 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4563 tcg_reg_free(s, i, i_allocated_regs); 4564 } 4565 } 4566 } 4567 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4568 /* sync globals if the op has side effects and might trigger 4569 an exception. */ 4570 sync_globals(s, i_allocated_regs); 4571 } 4572 4573 /* satisfy the output constraints */ 4574 for(k = 0; k < nb_oargs; k++) { 4575 i = def->args_ct[k].sort_index; 4576 arg = op->args[i]; 4577 arg_ct = &def->args_ct[i]; 4578 ts = arg_temp(arg); 4579 4580 /* ENV should not be modified. */ 4581 tcg_debug_assert(!temp_readonly(ts)); 4582 4583 switch (arg_ct->pair) { 4584 case 0: /* not paired */ 4585 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4586 reg = new_args[arg_ct->alias_index]; 4587 } else if (arg_ct->newreg) { 4588 reg = tcg_reg_alloc(s, arg_ct->regs, 4589 i_allocated_regs | o_allocated_regs, 4590 output_pref(op, k), ts->indirect_base); 4591 } else { 4592 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4593 output_pref(op, k), ts->indirect_base); 4594 } 4595 break; 4596 4597 case 1: /* first of pair */ 4598 tcg_debug_assert(!arg_ct->newreg); 4599 if (arg_ct->oalias) { 4600 reg = new_args[arg_ct->alias_index]; 4601 break; 4602 } 4603 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 4604 output_pref(op, k), ts->indirect_base); 4605 break; 4606 4607 case 2: /* second of pair */ 4608 tcg_debug_assert(!arg_ct->newreg); 4609 if (arg_ct->oalias) { 4610 reg = new_args[arg_ct->alias_index]; 4611 } else { 4612 reg = new_args[arg_ct->pair_index] + 1; 4613 } 4614 break; 4615 4616 case 3: /* first of pair, aliasing with a second input */ 4617 tcg_debug_assert(!arg_ct->newreg); 4618 reg = new_args[arg_ct->pair_index] - 1; 4619 break; 4620 4621 default: 4622 g_assert_not_reached(); 4623 } 4624 tcg_regset_set_reg(o_allocated_regs, reg); 4625 set_temp_val_reg(s, ts, reg); 4626 ts->mem_coherent = 0; 4627 new_args[i] = reg; 4628 } 4629 } 4630 4631 /* emit instruction */ 4632 switch (op->opc) { 4633 case INDEX_op_ext8s_i32: 4634 tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]); 4635 break; 4636 case INDEX_op_ext8s_i64: 4637 tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]); 4638 break; 4639 case INDEX_op_ext8u_i32: 4640 case INDEX_op_ext8u_i64: 4641 tcg_out_ext8u(s, new_args[0], new_args[1]); 4642 break; 4643 case INDEX_op_ext16s_i32: 4644 tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]); 4645 break; 4646 case INDEX_op_ext16s_i64: 4647 tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]); 4648 break; 4649 case INDEX_op_ext16u_i32: 4650 case INDEX_op_ext16u_i64: 4651 tcg_out_ext16u(s, new_args[0], new_args[1]); 4652 break; 4653 case INDEX_op_ext32s_i64: 4654 tcg_out_ext32s(s, new_args[0], new_args[1]); 4655 break; 4656 case INDEX_op_ext32u_i64: 4657 tcg_out_ext32u(s, new_args[0], new_args[1]); 4658 break; 4659 case INDEX_op_ext_i32_i64: 4660 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); 4661 break; 4662 case INDEX_op_extu_i32_i64: 4663 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); 4664 break; 4665 case INDEX_op_extrl_i64_i32: 4666 tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); 4667 break; 4668 default: 4669 if (def->flags & TCG_OPF_VECTOR) { 4670 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4671 new_args, const_args); 4672 } else { 4673 tcg_out_op(s, op->opc, new_args, const_args); 4674 } 4675 break; 4676 } 4677 4678 /* move the outputs in the correct register if needed */ 4679 for(i = 0; i < nb_oargs; i++) { 4680 ts = arg_temp(op->args[i]); 4681 4682 /* ENV should not be modified. */ 4683 tcg_debug_assert(!temp_readonly(ts)); 4684 4685 if (NEED_SYNC_ARG(i)) { 4686 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4687 } else if (IS_DEAD_ARG(i)) { 4688 temp_dead(s, ts); 4689 } 4690 } 4691 } 4692 4693 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4694 { 4695 const TCGLifeData arg_life = op->life; 4696 TCGTemp *ots, *itsl, *itsh; 4697 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4698 4699 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4700 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4701 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4702 4703 ots = arg_temp(op->args[0]); 4704 itsl = arg_temp(op->args[1]); 4705 itsh = arg_temp(op->args[2]); 4706 4707 /* ENV should not be modified. */ 4708 tcg_debug_assert(!temp_readonly(ots)); 4709 4710 /* Allocate the output register now. */ 4711 if (ots->val_type != TEMP_VAL_REG) { 4712 TCGRegSet allocated_regs = s->reserved_regs; 4713 TCGRegSet dup_out_regs = 4714 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4715 TCGReg oreg; 4716 4717 /* Make sure to not spill the input registers. */ 4718 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4719 tcg_regset_set_reg(allocated_regs, itsl->reg); 4720 } 4721 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4722 tcg_regset_set_reg(allocated_regs, itsh->reg); 4723 } 4724 4725 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4726 output_pref(op, 0), ots->indirect_base); 4727 set_temp_val_reg(s, ots, oreg); 4728 } 4729 4730 /* Promote dup2 of immediates to dupi_vec. */ 4731 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 4732 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 4733 MemOp vece = MO_64; 4734 4735 if (val == dup_const(MO_8, val)) { 4736 vece = MO_8; 4737 } else if (val == dup_const(MO_16, val)) { 4738 vece = MO_16; 4739 } else if (val == dup_const(MO_32, val)) { 4740 vece = MO_32; 4741 } 4742 4743 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 4744 goto done; 4745 } 4746 4747 /* If the two inputs form one 64-bit value, try dupm_vec. */ 4748 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 4749 itsh->temp_subindex == !HOST_BIG_ENDIAN && 4750 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 4751 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 4752 4753 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 4754 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 4755 4756 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 4757 its->mem_base->reg, its->mem_offset)) { 4758 goto done; 4759 } 4760 } 4761 4762 /* Fall back to generic expansion. */ 4763 return false; 4764 4765 done: 4766 ots->mem_coherent = 0; 4767 if (IS_DEAD_ARG(1)) { 4768 temp_dead(s, itsl); 4769 } 4770 if (IS_DEAD_ARG(2)) { 4771 temp_dead(s, itsh); 4772 } 4773 if (NEED_SYNC_ARG(0)) { 4774 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 4775 } else if (IS_DEAD_ARG(0)) { 4776 temp_dead(s, ots); 4777 } 4778 return true; 4779 } 4780 4781 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 4782 TCGRegSet allocated_regs) 4783 { 4784 if (ts->val_type == TEMP_VAL_REG) { 4785 if (ts->reg != reg) { 4786 tcg_reg_free(s, reg, allocated_regs); 4787 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4788 /* 4789 * Cross register class move not supported. Sync the 4790 * temp back to its slot and load from there. 4791 */ 4792 temp_sync(s, ts, allocated_regs, 0, 0); 4793 tcg_out_ld(s, ts->type, reg, 4794 ts->mem_base->reg, ts->mem_offset); 4795 } 4796 } 4797 } else { 4798 TCGRegSet arg_set = 0; 4799 4800 tcg_reg_free(s, reg, allocated_regs); 4801 tcg_regset_set_reg(arg_set, reg); 4802 temp_load(s, ts, arg_set, allocated_regs, 0); 4803 } 4804 } 4805 4806 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts, 4807 TCGRegSet allocated_regs) 4808 { 4809 /* 4810 * When the destination is on the stack, load up the temp and store. 4811 * If there are many call-saved registers, the temp might live to 4812 * see another use; otherwise it'll be discarded. 4813 */ 4814 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 4815 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 4816 TCG_TARGET_CALL_STACK_OFFSET + 4817 stk_slot * sizeof(tcg_target_long)); 4818 } 4819 4820 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 4821 TCGTemp *ts, TCGRegSet *allocated_regs) 4822 { 4823 if (REG_P(l)) { 4824 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 4825 load_arg_reg(s, reg, ts, *allocated_regs); 4826 tcg_regset_set_reg(*allocated_regs, reg); 4827 } else { 4828 load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs), 4829 ts, *allocated_regs); 4830 } 4831 } 4832 4833 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base, 4834 intptr_t ref_off, TCGRegSet *allocated_regs) 4835 { 4836 TCGReg reg; 4837 int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 4838 4839 if (stk_slot < 0) { 4840 reg = tcg_target_call_iarg_regs[arg_slot]; 4841 tcg_reg_free(s, reg, *allocated_regs); 4842 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4843 tcg_regset_set_reg(*allocated_regs, reg); 4844 } else { 4845 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 4846 *allocated_regs, 0, false); 4847 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4848 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 4849 TCG_TARGET_CALL_STACK_OFFSET 4850 + stk_slot * sizeof(tcg_target_long)); 4851 } 4852 } 4853 4854 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 4855 { 4856 const int nb_oargs = TCGOP_CALLO(op); 4857 const int nb_iargs = TCGOP_CALLI(op); 4858 const TCGLifeData arg_life = op->life; 4859 const TCGHelperInfo *info = tcg_call_info(op); 4860 TCGRegSet allocated_regs = s->reserved_regs; 4861 int i; 4862 4863 /* 4864 * Move inputs into place in reverse order, 4865 * so that we place stacked arguments first. 4866 */ 4867 for (i = nb_iargs - 1; i >= 0; --i) { 4868 const TCGCallArgumentLoc *loc = &info->in[i]; 4869 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 4870 4871 switch (loc->kind) { 4872 case TCG_CALL_ARG_NORMAL: 4873 case TCG_CALL_ARG_EXTEND_U: 4874 case TCG_CALL_ARG_EXTEND_S: 4875 load_arg_normal(s, loc, ts, &allocated_regs); 4876 break; 4877 case TCG_CALL_ARG_BY_REF: 4878 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4879 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 4880 TCG_TARGET_CALL_STACK_OFFSET 4881 + loc->ref_slot * sizeof(tcg_target_long), 4882 &allocated_regs); 4883 break; 4884 case TCG_CALL_ARG_BY_REF_N: 4885 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4886 break; 4887 default: 4888 g_assert_not_reached(); 4889 } 4890 } 4891 4892 /* Mark dead temporaries and free the associated registers. */ 4893 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4894 if (IS_DEAD_ARG(i)) { 4895 temp_dead(s, arg_temp(op->args[i])); 4896 } 4897 } 4898 4899 /* Clobber call registers. */ 4900 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4901 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4902 tcg_reg_free(s, i, allocated_regs); 4903 } 4904 } 4905 4906 /* 4907 * Save globals if they might be written by the helper, 4908 * sync them if they might be read. 4909 */ 4910 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 4911 /* Nothing to do */ 4912 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 4913 sync_globals(s, allocated_regs); 4914 } else { 4915 save_globals(s, allocated_regs); 4916 } 4917 4918 /* 4919 * If the ABI passes a pointer to the returned struct as the first 4920 * argument, load that now. Pass a pointer to the output home slot. 4921 */ 4922 if (info->out_kind == TCG_CALL_RET_BY_REF) { 4923 TCGTemp *ts = arg_temp(op->args[0]); 4924 4925 if (!ts->mem_allocated) { 4926 temp_allocate_frame(s, ts); 4927 } 4928 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 4929 } 4930 4931 tcg_out_call(s, tcg_call_func(op), info); 4932 4933 /* Assign output registers and emit moves if needed. */ 4934 switch (info->out_kind) { 4935 case TCG_CALL_RET_NORMAL: 4936 for (i = 0; i < nb_oargs; i++) { 4937 TCGTemp *ts = arg_temp(op->args[i]); 4938 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 4939 4940 /* ENV should not be modified. */ 4941 tcg_debug_assert(!temp_readonly(ts)); 4942 4943 set_temp_val_reg(s, ts, reg); 4944 ts->mem_coherent = 0; 4945 } 4946 break; 4947 4948 case TCG_CALL_RET_BY_VEC: 4949 { 4950 TCGTemp *ts = arg_temp(op->args[0]); 4951 4952 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 4953 tcg_debug_assert(ts->temp_subindex == 0); 4954 if (!ts->mem_allocated) { 4955 temp_allocate_frame(s, ts); 4956 } 4957 tcg_out_st(s, TCG_TYPE_V128, 4958 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 4959 ts->mem_base->reg, ts->mem_offset); 4960 } 4961 /* fall through to mark all parts in memory */ 4962 4963 case TCG_CALL_RET_BY_REF: 4964 /* The callee has performed a write through the reference. */ 4965 for (i = 0; i < nb_oargs; i++) { 4966 TCGTemp *ts = arg_temp(op->args[i]); 4967 ts->val_type = TEMP_VAL_MEM; 4968 } 4969 break; 4970 4971 default: 4972 g_assert_not_reached(); 4973 } 4974 4975 /* Flush or discard output registers as needed. */ 4976 for (i = 0; i < nb_oargs; i++) { 4977 TCGTemp *ts = arg_temp(op->args[i]); 4978 if (NEED_SYNC_ARG(i)) { 4979 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 4980 } else if (IS_DEAD_ARG(i)) { 4981 temp_dead(s, ts); 4982 } 4983 } 4984 } 4985 4986 #ifdef CONFIG_PROFILER 4987 4988 /* avoid copy/paste errors */ 4989 #define PROF_ADD(to, from, field) \ 4990 do { \ 4991 (to)->field += qatomic_read(&((from)->field)); \ 4992 } while (0) 4993 4994 #define PROF_MAX(to, from, field) \ 4995 do { \ 4996 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4997 if (val__ > (to)->field) { \ 4998 (to)->field = val__; \ 4999 } \ 5000 } while (0) 5001 5002 /* Pass in a zero'ed @prof */ 5003 static inline 5004 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 5005 { 5006 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 5007 unsigned int i; 5008 5009 for (i = 0; i < n_ctxs; i++) { 5010 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 5011 const TCGProfile *orig = &s->prof; 5012 5013 if (counters) { 5014 PROF_ADD(prof, orig, cpu_exec_time); 5015 PROF_ADD(prof, orig, tb_count1); 5016 PROF_ADD(prof, orig, tb_count); 5017 PROF_ADD(prof, orig, op_count); 5018 PROF_MAX(prof, orig, op_count_max); 5019 PROF_ADD(prof, orig, temp_count); 5020 PROF_MAX(prof, orig, temp_count_max); 5021 PROF_ADD(prof, orig, del_op_count); 5022 PROF_ADD(prof, orig, code_in_len); 5023 PROF_ADD(prof, orig, code_out_len); 5024 PROF_ADD(prof, orig, search_out_len); 5025 PROF_ADD(prof, orig, interm_time); 5026 PROF_ADD(prof, orig, code_time); 5027 PROF_ADD(prof, orig, la_time); 5028 PROF_ADD(prof, orig, opt_time); 5029 PROF_ADD(prof, orig, restore_count); 5030 PROF_ADD(prof, orig, restore_time); 5031 } 5032 if (table) { 5033 int i; 5034 5035 for (i = 0; i < NB_OPS; i++) { 5036 PROF_ADD(prof, orig, table_op_count[i]); 5037 } 5038 } 5039 } 5040 } 5041 5042 #undef PROF_ADD 5043 #undef PROF_MAX 5044 5045 static void tcg_profile_snapshot_counters(TCGProfile *prof) 5046 { 5047 tcg_profile_snapshot(prof, true, false); 5048 } 5049 5050 static void tcg_profile_snapshot_table(TCGProfile *prof) 5051 { 5052 tcg_profile_snapshot(prof, false, true); 5053 } 5054 5055 void tcg_dump_op_count(GString *buf) 5056 { 5057 TCGProfile prof = {}; 5058 int i; 5059 5060 tcg_profile_snapshot_table(&prof); 5061 for (i = 0; i < NB_OPS; i++) { 5062 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 5063 prof.table_op_count[i]); 5064 } 5065 } 5066 5067 int64_t tcg_cpu_exec_time(void) 5068 { 5069 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 5070 unsigned int i; 5071 int64_t ret = 0; 5072 5073 for (i = 0; i < n_ctxs; i++) { 5074 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 5075 const TCGProfile *prof = &s->prof; 5076 5077 ret += qatomic_read(&prof->cpu_exec_time); 5078 } 5079 return ret; 5080 } 5081 #else 5082 void tcg_dump_op_count(GString *buf) 5083 { 5084 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 5085 } 5086 5087 int64_t tcg_cpu_exec_time(void) 5088 { 5089 error_report("%s: TCG profiler not compiled", __func__); 5090 exit(EXIT_FAILURE); 5091 } 5092 #endif 5093 5094 5095 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) 5096 { 5097 #ifdef CONFIG_PROFILER 5098 TCGProfile *prof = &s->prof; 5099 #endif 5100 int i, num_insns; 5101 TCGOp *op; 5102 5103 #ifdef CONFIG_PROFILER 5104 { 5105 int n = 0; 5106 5107 QTAILQ_FOREACH(op, &s->ops, link) { 5108 n++; 5109 } 5110 qatomic_set(&prof->op_count, prof->op_count + n); 5111 if (n > prof->op_count_max) { 5112 qatomic_set(&prof->op_count_max, n); 5113 } 5114 5115 n = s->nb_temps; 5116 qatomic_set(&prof->temp_count, prof->temp_count + n); 5117 if (n > prof->temp_count_max) { 5118 qatomic_set(&prof->temp_count_max, n); 5119 } 5120 } 5121 #endif 5122 5123 #ifdef DEBUG_DISAS 5124 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 5125 && qemu_log_in_addr_range(pc_start))) { 5126 FILE *logfile = qemu_log_trylock(); 5127 if (logfile) { 5128 fprintf(logfile, "OP:\n"); 5129 tcg_dump_ops(s, logfile, false); 5130 fprintf(logfile, "\n"); 5131 qemu_log_unlock(logfile); 5132 } 5133 } 5134 #endif 5135 5136 #ifdef CONFIG_DEBUG_TCG 5137 /* Ensure all labels referenced have been emitted. */ 5138 { 5139 TCGLabel *l; 5140 bool error = false; 5141 5142 QSIMPLEQ_FOREACH(l, &s->labels, next) { 5143 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 5144 qemu_log_mask(CPU_LOG_TB_OP, 5145 "$L%d referenced but not present.\n", l->id); 5146 error = true; 5147 } 5148 } 5149 assert(!error); 5150 } 5151 #endif 5152 5153 #ifdef CONFIG_PROFILER 5154 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 5155 #endif 5156 5157 #ifdef USE_TCG_OPTIMIZATIONS 5158 tcg_optimize(s); 5159 #endif 5160 5161 #ifdef CONFIG_PROFILER 5162 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 5163 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 5164 #endif 5165 5166 reachable_code_pass(s); 5167 liveness_pass_0(s); 5168 liveness_pass_1(s); 5169 5170 if (s->nb_indirects > 0) { 5171 #ifdef DEBUG_DISAS 5172 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 5173 && qemu_log_in_addr_range(pc_start))) { 5174 FILE *logfile = qemu_log_trylock(); 5175 if (logfile) { 5176 fprintf(logfile, "OP before indirect lowering:\n"); 5177 tcg_dump_ops(s, logfile, false); 5178 fprintf(logfile, "\n"); 5179 qemu_log_unlock(logfile); 5180 } 5181 } 5182 #endif 5183 /* Replace indirect temps with direct temps. */ 5184 if (liveness_pass_2(s)) { 5185 /* If changes were made, re-run liveness. */ 5186 liveness_pass_1(s); 5187 } 5188 } 5189 5190 #ifdef CONFIG_PROFILER 5191 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 5192 #endif 5193 5194 #ifdef DEBUG_DISAS 5195 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 5196 && qemu_log_in_addr_range(pc_start))) { 5197 FILE *logfile = qemu_log_trylock(); 5198 if (logfile) { 5199 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 5200 tcg_dump_ops(s, logfile, true); 5201 fprintf(logfile, "\n"); 5202 qemu_log_unlock(logfile); 5203 } 5204 } 5205 #endif 5206 5207 /* Initialize goto_tb jump offsets. */ 5208 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 5209 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 5210 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 5211 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 5212 5213 tcg_reg_alloc_start(s); 5214 5215 /* 5216 * Reset the buffer pointers when restarting after overflow. 5217 * TODO: Move this into translate-all.c with the rest of the 5218 * buffer management. Having only this done here is confusing. 5219 */ 5220 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 5221 s->code_ptr = s->code_buf; 5222 5223 #ifdef TCG_TARGET_NEED_LDST_LABELS 5224 QSIMPLEQ_INIT(&s->ldst_labels); 5225 #endif 5226 #ifdef TCG_TARGET_NEED_POOL_LABELS 5227 s->pool_labels = NULL; 5228 #endif 5229 5230 num_insns = -1; 5231 QTAILQ_FOREACH(op, &s->ops, link) { 5232 TCGOpcode opc = op->opc; 5233 5234 #ifdef CONFIG_PROFILER 5235 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 5236 #endif 5237 5238 switch (opc) { 5239 case INDEX_op_mov_i32: 5240 case INDEX_op_mov_i64: 5241 case INDEX_op_mov_vec: 5242 tcg_reg_alloc_mov(s, op); 5243 break; 5244 case INDEX_op_dup_vec: 5245 tcg_reg_alloc_dup(s, op); 5246 break; 5247 case INDEX_op_insn_start: 5248 if (num_insns >= 0) { 5249 size_t off = tcg_current_code_size(s); 5250 s->gen_insn_end_off[num_insns] = off; 5251 /* Assert that we do not overflow our stored offset. */ 5252 assert(s->gen_insn_end_off[num_insns] == off); 5253 } 5254 num_insns++; 5255 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 5256 target_ulong a; 5257 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 5258 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 5259 #else 5260 a = op->args[i]; 5261 #endif 5262 s->gen_insn_data[num_insns][i] = a; 5263 } 5264 break; 5265 case INDEX_op_discard: 5266 temp_dead(s, arg_temp(op->args[0])); 5267 break; 5268 case INDEX_op_set_label: 5269 tcg_reg_alloc_bb_end(s, s->reserved_regs); 5270 tcg_out_label(s, arg_label(op->args[0])); 5271 break; 5272 case INDEX_op_call: 5273 tcg_reg_alloc_call(s, op); 5274 break; 5275 case INDEX_op_exit_tb: 5276 tcg_out_exit_tb(s, op->args[0]); 5277 break; 5278 case INDEX_op_goto_tb: 5279 tcg_out_goto_tb(s, op->args[0]); 5280 break; 5281 case INDEX_op_dup2_vec: 5282 if (tcg_reg_alloc_dup2(s, op)) { 5283 break; 5284 } 5285 /* fall through */ 5286 default: 5287 /* Sanity check that we've not introduced any unhandled opcodes. */ 5288 tcg_debug_assert(tcg_op_supported(opc)); 5289 /* Note: in order to speed up the code, it would be much 5290 faster to have specialized register allocator functions for 5291 some common argument patterns */ 5292 tcg_reg_alloc_op(s, op); 5293 break; 5294 } 5295 /* Test for (pending) buffer overflow. The assumption is that any 5296 one operation beginning below the high water mark cannot overrun 5297 the buffer completely. Thus we can test for overflow after 5298 generating code without having to check during generation. */ 5299 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 5300 return -1; 5301 } 5302 /* Test for TB overflow, as seen by gen_insn_end_off. */ 5303 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 5304 return -2; 5305 } 5306 } 5307 tcg_debug_assert(num_insns >= 0); 5308 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 5309 5310 /* Generate TB finalization at the end of block */ 5311 #ifdef TCG_TARGET_NEED_LDST_LABELS 5312 i = tcg_out_ldst_finalize(s); 5313 if (i < 0) { 5314 return i; 5315 } 5316 #endif 5317 #ifdef TCG_TARGET_NEED_POOL_LABELS 5318 i = tcg_out_pool_finalize(s); 5319 if (i < 0) { 5320 return i; 5321 } 5322 #endif 5323 if (!tcg_resolve_relocs(s)) { 5324 return -2; 5325 } 5326 5327 #ifndef CONFIG_TCG_INTERPRETER 5328 /* flush instruction cache */ 5329 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 5330 (uintptr_t)s->code_buf, 5331 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 5332 #endif 5333 5334 return tcg_current_code_size(s); 5335 } 5336 5337 #ifdef CONFIG_PROFILER 5338 void tcg_dump_info(GString *buf) 5339 { 5340 TCGProfile prof = {}; 5341 const TCGProfile *s; 5342 int64_t tb_count; 5343 int64_t tb_div_count; 5344 int64_t tot; 5345 5346 tcg_profile_snapshot_counters(&prof); 5347 s = &prof; 5348 tb_count = s->tb_count; 5349 tb_div_count = tb_count ? tb_count : 1; 5350 tot = s->interm_time + s->code_time; 5351 5352 g_string_append_printf(buf, "JIT cycles %" PRId64 5353 " (%0.3f s at 2.4 GHz)\n", 5354 tot, tot / 2.4e9); 5355 g_string_append_printf(buf, "translated TBs %" PRId64 5356 " (aborted=%" PRId64 " %0.1f%%)\n", 5357 tb_count, s->tb_count1 - tb_count, 5358 (double)(s->tb_count1 - s->tb_count) 5359 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 5360 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 5361 (double)s->op_count / tb_div_count, s->op_count_max); 5362 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 5363 (double)s->del_op_count / tb_div_count); 5364 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 5365 (double)s->temp_count / tb_div_count, 5366 s->temp_count_max); 5367 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 5368 (double)s->code_out_len / tb_div_count); 5369 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 5370 (double)s->search_out_len / tb_div_count); 5371 5372 g_string_append_printf(buf, "cycles/op %0.1f\n", 5373 s->op_count ? (double)tot / s->op_count : 0); 5374 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 5375 s->code_in_len ? (double)tot / s->code_in_len : 0); 5376 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 5377 s->code_out_len ? (double)tot / s->code_out_len : 0); 5378 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 5379 s->search_out_len ? 5380 (double)tot / s->search_out_len : 0); 5381 if (tot == 0) { 5382 tot = 1; 5383 } 5384 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 5385 (double)s->interm_time / tot * 100.0); 5386 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 5387 (double)s->code_time / tot * 100.0); 5388 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 5389 (double)s->opt_time / (s->code_time ? 5390 s->code_time : 1) 5391 * 100.0); 5392 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 5393 (double)s->la_time / (s->code_time ? 5394 s->code_time : 1) * 100.0); 5395 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 5396 s->restore_count); 5397 g_string_append_printf(buf, " avg cycles %0.1f\n", 5398 s->restore_count ? 5399 (double)s->restore_time / s->restore_count : 0); 5400 } 5401 #else 5402 void tcg_dump_info(GString *buf) 5403 { 5404 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 5405 } 5406 #endif 5407 5408 #ifdef ELF_HOST_MACHINE 5409 /* In order to use this feature, the backend needs to do three things: 5410 5411 (1) Define ELF_HOST_MACHINE to indicate both what value to 5412 put into the ELF image and to indicate support for the feature. 5413 5414 (2) Define tcg_register_jit. This should create a buffer containing 5415 the contents of a .debug_frame section that describes the post- 5416 prologue unwind info for the tcg machine. 5417 5418 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 5419 */ 5420 5421 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 5422 typedef enum { 5423 JIT_NOACTION = 0, 5424 JIT_REGISTER_FN, 5425 JIT_UNREGISTER_FN 5426 } jit_actions_t; 5427 5428 struct jit_code_entry { 5429 struct jit_code_entry *next_entry; 5430 struct jit_code_entry *prev_entry; 5431 const void *symfile_addr; 5432 uint64_t symfile_size; 5433 }; 5434 5435 struct jit_descriptor { 5436 uint32_t version; 5437 uint32_t action_flag; 5438 struct jit_code_entry *relevant_entry; 5439 struct jit_code_entry *first_entry; 5440 }; 5441 5442 void __jit_debug_register_code(void) __attribute__((noinline)); 5443 void __jit_debug_register_code(void) 5444 { 5445 asm(""); 5446 } 5447 5448 /* Must statically initialize the version, because GDB may check 5449 the version before we can set it. */ 5450 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 5451 5452 /* End GDB interface. */ 5453 5454 static int find_string(const char *strtab, const char *str) 5455 { 5456 const char *p = strtab + 1; 5457 5458 while (1) { 5459 if (strcmp(p, str) == 0) { 5460 return p - strtab; 5461 } 5462 p += strlen(p) + 1; 5463 } 5464 } 5465 5466 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 5467 const void *debug_frame, 5468 size_t debug_frame_size) 5469 { 5470 struct __attribute__((packed)) DebugInfo { 5471 uint32_t len; 5472 uint16_t version; 5473 uint32_t abbrev; 5474 uint8_t ptr_size; 5475 uint8_t cu_die; 5476 uint16_t cu_lang; 5477 uintptr_t cu_low_pc; 5478 uintptr_t cu_high_pc; 5479 uint8_t fn_die; 5480 char fn_name[16]; 5481 uintptr_t fn_low_pc; 5482 uintptr_t fn_high_pc; 5483 uint8_t cu_eoc; 5484 }; 5485 5486 struct ElfImage { 5487 ElfW(Ehdr) ehdr; 5488 ElfW(Phdr) phdr; 5489 ElfW(Shdr) shdr[7]; 5490 ElfW(Sym) sym[2]; 5491 struct DebugInfo di; 5492 uint8_t da[24]; 5493 char str[80]; 5494 }; 5495 5496 struct ElfImage *img; 5497 5498 static const struct ElfImage img_template = { 5499 .ehdr = { 5500 .e_ident[EI_MAG0] = ELFMAG0, 5501 .e_ident[EI_MAG1] = ELFMAG1, 5502 .e_ident[EI_MAG2] = ELFMAG2, 5503 .e_ident[EI_MAG3] = ELFMAG3, 5504 .e_ident[EI_CLASS] = ELF_CLASS, 5505 .e_ident[EI_DATA] = ELF_DATA, 5506 .e_ident[EI_VERSION] = EV_CURRENT, 5507 .e_type = ET_EXEC, 5508 .e_machine = ELF_HOST_MACHINE, 5509 .e_version = EV_CURRENT, 5510 .e_phoff = offsetof(struct ElfImage, phdr), 5511 .e_shoff = offsetof(struct ElfImage, shdr), 5512 .e_ehsize = sizeof(ElfW(Shdr)), 5513 .e_phentsize = sizeof(ElfW(Phdr)), 5514 .e_phnum = 1, 5515 .e_shentsize = sizeof(ElfW(Shdr)), 5516 .e_shnum = ARRAY_SIZE(img->shdr), 5517 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 5518 #ifdef ELF_HOST_FLAGS 5519 .e_flags = ELF_HOST_FLAGS, 5520 #endif 5521 #ifdef ELF_OSABI 5522 .e_ident[EI_OSABI] = ELF_OSABI, 5523 #endif 5524 }, 5525 .phdr = { 5526 .p_type = PT_LOAD, 5527 .p_flags = PF_X, 5528 }, 5529 .shdr = { 5530 [0] = { .sh_type = SHT_NULL }, 5531 /* Trick: The contents of code_gen_buffer are not present in 5532 this fake ELF file; that got allocated elsewhere. Therefore 5533 we mark .text as SHT_NOBITS (similar to .bss) so that readers 5534 will not look for contents. We can record any address. */ 5535 [1] = { /* .text */ 5536 .sh_type = SHT_NOBITS, 5537 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 5538 }, 5539 [2] = { /* .debug_info */ 5540 .sh_type = SHT_PROGBITS, 5541 .sh_offset = offsetof(struct ElfImage, di), 5542 .sh_size = sizeof(struct DebugInfo), 5543 }, 5544 [3] = { /* .debug_abbrev */ 5545 .sh_type = SHT_PROGBITS, 5546 .sh_offset = offsetof(struct ElfImage, da), 5547 .sh_size = sizeof(img->da), 5548 }, 5549 [4] = { /* .debug_frame */ 5550 .sh_type = SHT_PROGBITS, 5551 .sh_offset = sizeof(struct ElfImage), 5552 }, 5553 [5] = { /* .symtab */ 5554 .sh_type = SHT_SYMTAB, 5555 .sh_offset = offsetof(struct ElfImage, sym), 5556 .sh_size = sizeof(img->sym), 5557 .sh_info = 1, 5558 .sh_link = ARRAY_SIZE(img->shdr) - 1, 5559 .sh_entsize = sizeof(ElfW(Sym)), 5560 }, 5561 [6] = { /* .strtab */ 5562 .sh_type = SHT_STRTAB, 5563 .sh_offset = offsetof(struct ElfImage, str), 5564 .sh_size = sizeof(img->str), 5565 } 5566 }, 5567 .sym = { 5568 [1] = { /* code_gen_buffer */ 5569 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 5570 .st_shndx = 1, 5571 } 5572 }, 5573 .di = { 5574 .len = sizeof(struct DebugInfo) - 4, 5575 .version = 2, 5576 .ptr_size = sizeof(void *), 5577 .cu_die = 1, 5578 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 5579 .fn_die = 2, 5580 .fn_name = "code_gen_buffer" 5581 }, 5582 .da = { 5583 1, /* abbrev number (the cu) */ 5584 0x11, 1, /* DW_TAG_compile_unit, has children */ 5585 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 5586 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5587 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5588 0, 0, /* end of abbrev */ 5589 2, /* abbrev number (the fn) */ 5590 0x2e, 0, /* DW_TAG_subprogram, no children */ 5591 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 5592 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5593 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5594 0, 0, /* end of abbrev */ 5595 0 /* no more abbrev */ 5596 }, 5597 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 5598 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 5599 }; 5600 5601 /* We only need a single jit entry; statically allocate it. */ 5602 static struct jit_code_entry one_entry; 5603 5604 uintptr_t buf = (uintptr_t)buf_ptr; 5605 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 5606 DebugFrameHeader *dfh; 5607 5608 img = g_malloc(img_size); 5609 *img = img_template; 5610 5611 img->phdr.p_vaddr = buf; 5612 img->phdr.p_paddr = buf; 5613 img->phdr.p_memsz = buf_size; 5614 5615 img->shdr[1].sh_name = find_string(img->str, ".text"); 5616 img->shdr[1].sh_addr = buf; 5617 img->shdr[1].sh_size = buf_size; 5618 5619 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 5620 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 5621 5622 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 5623 img->shdr[4].sh_size = debug_frame_size; 5624 5625 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 5626 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 5627 5628 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 5629 img->sym[1].st_value = buf; 5630 img->sym[1].st_size = buf_size; 5631 5632 img->di.cu_low_pc = buf; 5633 img->di.cu_high_pc = buf + buf_size; 5634 img->di.fn_low_pc = buf; 5635 img->di.fn_high_pc = buf + buf_size; 5636 5637 dfh = (DebugFrameHeader *)(img + 1); 5638 memcpy(dfh, debug_frame, debug_frame_size); 5639 dfh->fde.func_start = buf; 5640 dfh->fde.func_len = buf_size; 5641 5642 #ifdef DEBUG_JIT 5643 /* Enable this block to be able to debug the ELF image file creation. 5644 One can use readelf, objdump, or other inspection utilities. */ 5645 { 5646 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 5647 FILE *f = fopen(jit, "w+b"); 5648 if (f) { 5649 if (fwrite(img, img_size, 1, f) != img_size) { 5650 /* Avoid stupid unused return value warning for fwrite. */ 5651 } 5652 fclose(f); 5653 } 5654 } 5655 #endif 5656 5657 one_entry.symfile_addr = img; 5658 one_entry.symfile_size = img_size; 5659 5660 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 5661 __jit_debug_descriptor.relevant_entry = &one_entry; 5662 __jit_debug_descriptor.first_entry = &one_entry; 5663 __jit_debug_register_code(); 5664 } 5665 #else 5666 /* No support for the feature. Provide the entry point expected by exec.c, 5667 and implement the internal function we declared earlier. */ 5668 5669 static void tcg_register_jit_int(const void *buf, size_t size, 5670 const void *debug_frame, 5671 size_t debug_frame_size) 5672 { 5673 } 5674 5675 void tcg_register_jit(const void *buf, size_t buf_size) 5676 { 5677 } 5678 #endif /* ELF_HOST_MACHINE */ 5679 5680 #if !TCG_TARGET_MAYBE_vec 5681 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 5682 { 5683 g_assert_not_reached(); 5684 } 5685 #endif 5686