1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/cacheflush.h" 38 #include "qemu/cacheinfo.h" 39 40 /* Note: the long term plan is to reduce the dependencies on the QEMU 41 CPU definitions. Currently they are used for qemu_ld/st 42 instructions */ 43 #define NO_CPU_IO_DEFS 44 45 #include "exec/exec-all.h" 46 #include "tcg/tcg-op.h" 47 48 #if UINTPTR_MAX == UINT32_MAX 49 # define ELF_CLASS ELFCLASS32 50 #else 51 # define ELF_CLASS ELFCLASS64 52 #endif 53 #if HOST_BIG_ENDIAN 54 # define ELF_DATA ELFDATA2MSB 55 #else 56 # define ELF_DATA ELFDATA2LSB 57 #endif 58 59 #include "elf.h" 60 #include "exec/log.h" 61 #include "tcg/tcg-ldst.h" 62 #include "tcg-internal.h" 63 #include "accel/tcg/perf.h" 64 65 /* Forward declarations for functions declared in tcg-target.c.inc and 66 used here. */ 67 static void tcg_target_init(TCGContext *s); 68 static void tcg_target_qemu_prologue(TCGContext *s); 69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 70 intptr_t value, intptr_t addend); 71 72 /* The CIE and FDE header definitions will be common to all hosts. */ 73 typedef struct { 74 uint32_t len __attribute__((aligned((sizeof(void *))))); 75 uint32_t id; 76 uint8_t version; 77 char augmentation[1]; 78 uint8_t code_align; 79 uint8_t data_align; 80 uint8_t return_column; 81 } DebugFrameCIE; 82 83 typedef struct QEMU_PACKED { 84 uint32_t len __attribute__((aligned((sizeof(void *))))); 85 uint32_t cie_offset; 86 uintptr_t func_start; 87 uintptr_t func_len; 88 } DebugFrameFDEHeader; 89 90 typedef struct QEMU_PACKED { 91 DebugFrameCIE cie; 92 DebugFrameFDEHeader fde; 93 } DebugFrameHeader; 94 95 static void tcg_register_jit_int(const void *buf, size_t size, 96 const void *debug_frame, 97 size_t debug_frame_size) 98 __attribute__((unused)); 99 100 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 102 intptr_t arg2); 103 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 104 static void tcg_out_movi(TCGContext *s, TCGType type, 105 TCGReg ret, tcg_target_long arg); 106 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 107 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 108 static void tcg_out_goto_tb(TCGContext *s, int which); 109 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 110 const TCGArg args[TCG_MAX_OP_ARGS], 111 const int const_args[TCG_MAX_OP_ARGS]); 112 #if TCG_TARGET_MAYBE_vec 113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 114 TCGReg dst, TCGReg src); 115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 116 TCGReg dst, TCGReg base, intptr_t offset); 117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 118 TCGReg dst, int64_t arg); 119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 120 unsigned vecl, unsigned vece, 121 const TCGArg args[TCG_MAX_OP_ARGS], 122 const int const_args[TCG_MAX_OP_ARGS]); 123 #else 124 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 125 TCGReg dst, TCGReg src) 126 { 127 g_assert_not_reached(); 128 } 129 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 130 TCGReg dst, TCGReg base, intptr_t offset) 131 { 132 g_assert_not_reached(); 133 } 134 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 135 TCGReg dst, int64_t arg) 136 { 137 g_assert_not_reached(); 138 } 139 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 140 unsigned vecl, unsigned vece, 141 const TCGArg args[TCG_MAX_OP_ARGS], 142 const int const_args[TCG_MAX_OP_ARGS]) 143 { 144 g_assert_not_reached(); 145 } 146 #endif 147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 148 intptr_t arg2); 149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 150 TCGReg base, intptr_t ofs); 151 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 152 const TCGHelperInfo *info); 153 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 154 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 155 #ifdef TCG_TARGET_NEED_LDST_LABELS 156 static int tcg_out_ldst_finalize(TCGContext *s); 157 #endif 158 159 TCGContext tcg_init_ctx; 160 __thread TCGContext *tcg_ctx; 161 162 TCGContext **tcg_ctxs; 163 unsigned int tcg_cur_ctxs; 164 unsigned int tcg_max_ctxs; 165 TCGv_env cpu_env = 0; 166 const void *tcg_code_gen_epilogue; 167 uintptr_t tcg_splitwx_diff; 168 169 #ifndef CONFIG_TCG_INTERPRETER 170 tcg_prologue_fn *tcg_qemu_tb_exec; 171 #endif 172 173 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 174 static TCGRegSet tcg_target_call_clobber_regs; 175 176 #if TCG_TARGET_INSN_UNIT_SIZE == 1 177 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 178 { 179 *s->code_ptr++ = v; 180 } 181 182 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 183 uint8_t v) 184 { 185 *p = v; 186 } 187 #endif 188 189 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 190 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 191 { 192 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 193 *s->code_ptr++ = v; 194 } else { 195 tcg_insn_unit *p = s->code_ptr; 196 memcpy(p, &v, sizeof(v)); 197 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 198 } 199 } 200 201 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 202 uint16_t v) 203 { 204 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 205 *p = v; 206 } else { 207 memcpy(p, &v, sizeof(v)); 208 } 209 } 210 #endif 211 212 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 213 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 214 { 215 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 216 *s->code_ptr++ = v; 217 } else { 218 tcg_insn_unit *p = s->code_ptr; 219 memcpy(p, &v, sizeof(v)); 220 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 221 } 222 } 223 224 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 225 uint32_t v) 226 { 227 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 228 *p = v; 229 } else { 230 memcpy(p, &v, sizeof(v)); 231 } 232 } 233 #endif 234 235 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 236 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 237 { 238 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 239 *s->code_ptr++ = v; 240 } else { 241 tcg_insn_unit *p = s->code_ptr; 242 memcpy(p, &v, sizeof(v)); 243 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 244 } 245 } 246 247 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 248 uint64_t v) 249 { 250 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 251 *p = v; 252 } else { 253 memcpy(p, &v, sizeof(v)); 254 } 255 } 256 #endif 257 258 /* label relocation processing */ 259 260 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 261 TCGLabel *l, intptr_t addend) 262 { 263 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 264 265 r->type = type; 266 r->ptr = code_ptr; 267 r->addend = addend; 268 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 269 } 270 271 static void tcg_out_label(TCGContext *s, TCGLabel *l) 272 { 273 tcg_debug_assert(!l->has_value); 274 l->has_value = 1; 275 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 276 } 277 278 TCGLabel *gen_new_label(void) 279 { 280 TCGContext *s = tcg_ctx; 281 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 282 283 memset(l, 0, sizeof(TCGLabel)); 284 l->id = s->nb_labels++; 285 QSIMPLEQ_INIT(&l->relocs); 286 287 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 288 289 return l; 290 } 291 292 static bool tcg_resolve_relocs(TCGContext *s) 293 { 294 TCGLabel *l; 295 296 QSIMPLEQ_FOREACH(l, &s->labels, next) { 297 TCGRelocation *r; 298 uintptr_t value = l->u.value; 299 300 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 301 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 302 return false; 303 } 304 } 305 } 306 return true; 307 } 308 309 static void set_jmp_reset_offset(TCGContext *s, int which) 310 { 311 /* 312 * We will check for overflow at the end of the opcode loop in 313 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 314 */ 315 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 316 } 317 318 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 319 { 320 /* 321 * We will check for overflow at the end of the opcode loop in 322 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 323 */ 324 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 325 } 326 327 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 328 { 329 /* 330 * Return the read-execute version of the pointer, for the benefit 331 * of any pc-relative addressing mode. 332 */ 333 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 334 } 335 336 /* Signal overflow, starting over with fewer guest insns. */ 337 static G_NORETURN 338 void tcg_raise_tb_overflow(TCGContext *s) 339 { 340 siglongjmp(s->jmp_trans, -2); 341 } 342 343 #define C_PFX1(P, A) P##A 344 #define C_PFX2(P, A, B) P##A##_##B 345 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 346 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 347 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 348 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 349 350 /* Define an enumeration for the various combinations. */ 351 352 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 353 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 354 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 355 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 356 357 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 358 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 359 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 360 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 361 362 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 363 364 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 365 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 366 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 367 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 368 369 typedef enum { 370 #include "tcg-target-con-set.h" 371 } TCGConstraintSetIndex; 372 373 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 374 375 #undef C_O0_I1 376 #undef C_O0_I2 377 #undef C_O0_I3 378 #undef C_O0_I4 379 #undef C_O1_I1 380 #undef C_O1_I2 381 #undef C_O1_I3 382 #undef C_O1_I4 383 #undef C_N1_I2 384 #undef C_O2_I1 385 #undef C_O2_I2 386 #undef C_O2_I3 387 #undef C_O2_I4 388 389 /* Put all of the constraint sets into an array, indexed by the enum. */ 390 391 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 392 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 393 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 394 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 395 396 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 397 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 398 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 399 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 400 401 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 402 403 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 404 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 405 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 406 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 407 408 static const TCGTargetOpDef constraint_sets[] = { 409 #include "tcg-target-con-set.h" 410 }; 411 412 413 #undef C_O0_I1 414 #undef C_O0_I2 415 #undef C_O0_I3 416 #undef C_O0_I4 417 #undef C_O1_I1 418 #undef C_O1_I2 419 #undef C_O1_I3 420 #undef C_O1_I4 421 #undef C_N1_I2 422 #undef C_O2_I1 423 #undef C_O2_I2 424 #undef C_O2_I3 425 #undef C_O2_I4 426 427 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 428 429 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 430 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 431 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 432 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 433 434 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 435 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 436 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 437 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 438 439 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 440 441 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 442 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 443 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 444 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 445 446 #include "tcg-target.c.inc" 447 448 static void alloc_tcg_plugin_context(TCGContext *s) 449 { 450 #ifdef CONFIG_PLUGIN 451 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 452 s->plugin_tb->insns = 453 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 454 #endif 455 } 456 457 /* 458 * All TCG threads except the parent (i.e. the one that called tcg_context_init 459 * and registered the target's TCG globals) must register with this function 460 * before initiating translation. 461 * 462 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 463 * of tcg_region_init() for the reasoning behind this. 464 * 465 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 466 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 467 * is not used anymore for translation once this function is called. 468 * 469 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 470 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 471 */ 472 #ifdef CONFIG_USER_ONLY 473 void tcg_register_thread(void) 474 { 475 tcg_ctx = &tcg_init_ctx; 476 } 477 #else 478 void tcg_register_thread(void) 479 { 480 TCGContext *s = g_malloc(sizeof(*s)); 481 unsigned int i, n; 482 483 *s = tcg_init_ctx; 484 485 /* Relink mem_base. */ 486 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 487 if (tcg_init_ctx.temps[i].mem_base) { 488 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 489 tcg_debug_assert(b >= 0 && b < n); 490 s->temps[i].mem_base = &s->temps[b]; 491 } 492 } 493 494 /* Claim an entry in tcg_ctxs */ 495 n = qatomic_fetch_inc(&tcg_cur_ctxs); 496 g_assert(n < tcg_max_ctxs); 497 qatomic_set(&tcg_ctxs[n], s); 498 499 if (n > 0) { 500 alloc_tcg_plugin_context(s); 501 tcg_region_initial_alloc(s); 502 } 503 504 tcg_ctx = s; 505 } 506 #endif /* !CONFIG_USER_ONLY */ 507 508 /* pool based memory allocation */ 509 void *tcg_malloc_internal(TCGContext *s, int size) 510 { 511 TCGPool *p; 512 int pool_size; 513 514 if (size > TCG_POOL_CHUNK_SIZE) { 515 /* big malloc: insert a new pool (XXX: could optimize) */ 516 p = g_malloc(sizeof(TCGPool) + size); 517 p->size = size; 518 p->next = s->pool_first_large; 519 s->pool_first_large = p; 520 return p->data; 521 } else { 522 p = s->pool_current; 523 if (!p) { 524 p = s->pool_first; 525 if (!p) 526 goto new_pool; 527 } else { 528 if (!p->next) { 529 new_pool: 530 pool_size = TCG_POOL_CHUNK_SIZE; 531 p = g_malloc(sizeof(TCGPool) + pool_size); 532 p->size = pool_size; 533 p->next = NULL; 534 if (s->pool_current) { 535 s->pool_current->next = p; 536 } else { 537 s->pool_first = p; 538 } 539 } else { 540 p = p->next; 541 } 542 } 543 } 544 s->pool_current = p; 545 s->pool_cur = p->data + size; 546 s->pool_end = p->data + p->size; 547 return p->data; 548 } 549 550 void tcg_pool_reset(TCGContext *s) 551 { 552 TCGPool *p, *t; 553 for (p = s->pool_first_large; p; p = t) { 554 t = p->next; 555 g_free(p); 556 } 557 s->pool_first_large = NULL; 558 s->pool_cur = s->pool_end = NULL; 559 s->pool_current = NULL; 560 } 561 562 #include "exec/helper-proto.h" 563 564 static TCGHelperInfo all_helpers[] = { 565 #include "exec/helper-tcg.h" 566 }; 567 static GHashTable *helper_table; 568 569 #ifdef CONFIG_TCG_INTERPRETER 570 static ffi_type *typecode_to_ffi(int argmask) 571 { 572 /* 573 * libffi does not support __int128_t, so we have forced Int128 574 * to use the structure definition instead of the builtin type. 575 */ 576 static ffi_type *ffi_type_i128_elements[3] = { 577 &ffi_type_uint64, 578 &ffi_type_uint64, 579 NULL 580 }; 581 static ffi_type ffi_type_i128 = { 582 .size = 16, 583 .alignment = __alignof__(Int128), 584 .type = FFI_TYPE_STRUCT, 585 .elements = ffi_type_i128_elements, 586 }; 587 588 switch (argmask) { 589 case dh_typecode_void: 590 return &ffi_type_void; 591 case dh_typecode_i32: 592 return &ffi_type_uint32; 593 case dh_typecode_s32: 594 return &ffi_type_sint32; 595 case dh_typecode_i64: 596 return &ffi_type_uint64; 597 case dh_typecode_s64: 598 return &ffi_type_sint64; 599 case dh_typecode_ptr: 600 return &ffi_type_pointer; 601 case dh_typecode_i128: 602 return &ffi_type_i128; 603 } 604 g_assert_not_reached(); 605 } 606 607 static void init_ffi_layouts(void) 608 { 609 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 610 GHashTable *ffi_table = g_hash_table_new(NULL, NULL); 611 612 for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 613 TCGHelperInfo *info = &all_helpers[i]; 614 unsigned typemask = info->typemask; 615 gpointer hash = (gpointer)(uintptr_t)typemask; 616 struct { 617 ffi_cif cif; 618 ffi_type *args[]; 619 } *ca; 620 ffi_status status; 621 int nargs; 622 ffi_cif *cif; 623 624 cif = g_hash_table_lookup(ffi_table, hash); 625 if (cif) { 626 info->cif = cif; 627 continue; 628 } 629 630 /* Ignoring the return type, find the last non-zero field. */ 631 nargs = 32 - clz32(typemask >> 3); 632 nargs = DIV_ROUND_UP(nargs, 3); 633 assert(nargs <= MAX_CALL_IARGS); 634 635 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 636 ca->cif.rtype = typecode_to_ffi(typemask & 7); 637 ca->cif.nargs = nargs; 638 639 if (nargs != 0) { 640 ca->cif.arg_types = ca->args; 641 for (int j = 0; j < nargs; ++j) { 642 int typecode = extract32(typemask, (j + 1) * 3, 3); 643 ca->args[j] = typecode_to_ffi(typecode); 644 } 645 } 646 647 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 648 ca->cif.rtype, ca->cif.arg_types); 649 assert(status == FFI_OK); 650 651 cif = &ca->cif; 652 info->cif = cif; 653 g_hash_table_insert(ffi_table, hash, (gpointer)cif); 654 } 655 656 g_hash_table_destroy(ffi_table); 657 } 658 #endif /* CONFIG_TCG_INTERPRETER */ 659 660 typedef struct TCGCumulativeArgs { 661 int arg_idx; /* tcg_gen_callN args[] */ 662 int info_in_idx; /* TCGHelperInfo in[] */ 663 int arg_slot; /* regs+stack slot */ 664 int ref_slot; /* stack slots for references */ 665 } TCGCumulativeArgs; 666 667 static void layout_arg_even(TCGCumulativeArgs *cum) 668 { 669 cum->arg_slot += cum->arg_slot & 1; 670 } 671 672 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 673 TCGCallArgumentKind kind) 674 { 675 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 676 677 *loc = (TCGCallArgumentLoc){ 678 .kind = kind, 679 .arg_idx = cum->arg_idx, 680 .arg_slot = cum->arg_slot, 681 }; 682 cum->info_in_idx++; 683 cum->arg_slot++; 684 } 685 686 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 687 TCGHelperInfo *info, int n) 688 { 689 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 690 691 for (int i = 0; i < n; ++i) { 692 /* Layout all using the same arg_idx, adjusting the subindex. */ 693 loc[i] = (TCGCallArgumentLoc){ 694 .kind = TCG_CALL_ARG_NORMAL, 695 .arg_idx = cum->arg_idx, 696 .tmp_subindex = i, 697 .arg_slot = cum->arg_slot + i, 698 }; 699 } 700 cum->info_in_idx += n; 701 cum->arg_slot += n; 702 } 703 704 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 705 { 706 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 707 int n = 128 / TCG_TARGET_REG_BITS; 708 709 /* The first subindex carries the pointer. */ 710 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 711 712 /* 713 * The callee is allowed to clobber memory associated with 714 * structure pass by-reference. Therefore we must make copies. 715 * Allocate space from "ref_slot", which will be adjusted to 716 * follow the parameters on the stack. 717 */ 718 loc[0].ref_slot = cum->ref_slot; 719 720 /* 721 * Subsequent words also go into the reference slot, but 722 * do not accumulate into the regular arguments. 723 */ 724 for (int i = 1; i < n; ++i) { 725 loc[i] = (TCGCallArgumentLoc){ 726 .kind = TCG_CALL_ARG_BY_REF_N, 727 .arg_idx = cum->arg_idx, 728 .tmp_subindex = i, 729 .ref_slot = cum->ref_slot + i, 730 }; 731 } 732 cum->info_in_idx += n; 733 cum->ref_slot += n; 734 } 735 736 static void init_call_layout(TCGHelperInfo *info) 737 { 738 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 739 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 740 unsigned typemask = info->typemask; 741 unsigned typecode; 742 TCGCumulativeArgs cum = { }; 743 744 /* 745 * Parse and place any function return value. 746 */ 747 typecode = typemask & 7; 748 switch (typecode) { 749 case dh_typecode_void: 750 info->nr_out = 0; 751 break; 752 case dh_typecode_i32: 753 case dh_typecode_s32: 754 case dh_typecode_ptr: 755 info->nr_out = 1; 756 info->out_kind = TCG_CALL_RET_NORMAL; 757 break; 758 case dh_typecode_i64: 759 case dh_typecode_s64: 760 info->nr_out = 64 / TCG_TARGET_REG_BITS; 761 info->out_kind = TCG_CALL_RET_NORMAL; 762 /* Query the last register now to trigger any assert early. */ 763 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 764 break; 765 case dh_typecode_i128: 766 info->nr_out = 128 / TCG_TARGET_REG_BITS; 767 info->out_kind = TCG_TARGET_CALL_RET_I128; 768 switch (TCG_TARGET_CALL_RET_I128) { 769 case TCG_CALL_RET_NORMAL: 770 /* Query the last register now to trigger any assert early. */ 771 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 772 break; 773 case TCG_CALL_RET_BY_VEC: 774 /* Query the single register now to trigger any assert early. */ 775 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 776 break; 777 case TCG_CALL_RET_BY_REF: 778 /* 779 * Allocate the first argument to the output. 780 * We don't need to store this anywhere, just make it 781 * unavailable for use in the input loop below. 782 */ 783 cum.arg_slot = 1; 784 break; 785 default: 786 qemu_build_not_reached(); 787 } 788 break; 789 default: 790 g_assert_not_reached(); 791 } 792 793 /* 794 * Parse and place function arguments. 795 */ 796 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 797 TCGCallArgumentKind kind; 798 TCGType type; 799 800 typecode = typemask & 7; 801 switch (typecode) { 802 case dh_typecode_i32: 803 case dh_typecode_s32: 804 type = TCG_TYPE_I32; 805 break; 806 case dh_typecode_i64: 807 case dh_typecode_s64: 808 type = TCG_TYPE_I64; 809 break; 810 case dh_typecode_ptr: 811 type = TCG_TYPE_PTR; 812 break; 813 case dh_typecode_i128: 814 type = TCG_TYPE_I128; 815 break; 816 default: 817 g_assert_not_reached(); 818 } 819 820 switch (type) { 821 case TCG_TYPE_I32: 822 switch (TCG_TARGET_CALL_ARG_I32) { 823 case TCG_CALL_ARG_EVEN: 824 layout_arg_even(&cum); 825 /* fall through */ 826 case TCG_CALL_ARG_NORMAL: 827 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 828 break; 829 case TCG_CALL_ARG_EXTEND: 830 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 831 layout_arg_1(&cum, info, kind); 832 break; 833 default: 834 qemu_build_not_reached(); 835 } 836 break; 837 838 case TCG_TYPE_I64: 839 switch (TCG_TARGET_CALL_ARG_I64) { 840 case TCG_CALL_ARG_EVEN: 841 layout_arg_even(&cum); 842 /* fall through */ 843 case TCG_CALL_ARG_NORMAL: 844 if (TCG_TARGET_REG_BITS == 32) { 845 layout_arg_normal_n(&cum, info, 2); 846 } else { 847 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 848 } 849 break; 850 default: 851 qemu_build_not_reached(); 852 } 853 break; 854 855 case TCG_TYPE_I128: 856 switch (TCG_TARGET_CALL_ARG_I128) { 857 case TCG_CALL_ARG_EVEN: 858 layout_arg_even(&cum); 859 /* fall through */ 860 case TCG_CALL_ARG_NORMAL: 861 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 862 break; 863 case TCG_CALL_ARG_BY_REF: 864 layout_arg_by_ref(&cum, info); 865 break; 866 default: 867 qemu_build_not_reached(); 868 } 869 break; 870 871 default: 872 g_assert_not_reached(); 873 } 874 } 875 info->nr_in = cum.info_in_idx; 876 877 /* Validate that we didn't overrun the input array. */ 878 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 879 /* Validate the backend has enough argument space. */ 880 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 881 882 /* 883 * Relocate the "ref_slot" area to the end of the parameters. 884 * Minimizing this stack offset helps code size for x86, 885 * which has a signed 8-bit offset encoding. 886 */ 887 if (cum.ref_slot != 0) { 888 int ref_base = 0; 889 890 if (cum.arg_slot > max_reg_slots) { 891 int align = __alignof(Int128) / sizeof(tcg_target_long); 892 893 ref_base = cum.arg_slot - max_reg_slots; 894 if (align > 1) { 895 ref_base = ROUND_UP(ref_base, align); 896 } 897 } 898 assert(ref_base + cum.ref_slot <= max_stk_slots); 899 900 if (ref_base != 0) { 901 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 902 TCGCallArgumentLoc *loc = &info->in[i]; 903 switch (loc->kind) { 904 case TCG_CALL_ARG_BY_REF: 905 case TCG_CALL_ARG_BY_REF_N: 906 loc->ref_slot += ref_base; 907 break; 908 default: 909 break; 910 } 911 } 912 } 913 } 914 } 915 916 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 917 static void process_op_defs(TCGContext *s); 918 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 919 TCGReg reg, const char *name); 920 921 static void tcg_context_init(unsigned max_cpus) 922 { 923 TCGContext *s = &tcg_init_ctx; 924 int op, total_args, n, i; 925 TCGOpDef *def; 926 TCGArgConstraint *args_ct; 927 TCGTemp *ts; 928 929 memset(s, 0, sizeof(*s)); 930 s->nb_globals = 0; 931 932 /* Count total number of arguments and allocate the corresponding 933 space */ 934 total_args = 0; 935 for(op = 0; op < NB_OPS; op++) { 936 def = &tcg_op_defs[op]; 937 n = def->nb_iargs + def->nb_oargs; 938 total_args += n; 939 } 940 941 args_ct = g_new0(TCGArgConstraint, total_args); 942 943 for(op = 0; op < NB_OPS; op++) { 944 def = &tcg_op_defs[op]; 945 def->args_ct = args_ct; 946 n = def->nb_iargs + def->nb_oargs; 947 args_ct += n; 948 } 949 950 /* Register helpers. */ 951 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 952 helper_table = g_hash_table_new(NULL, NULL); 953 954 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 955 init_call_layout(&all_helpers[i]); 956 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 957 (gpointer)&all_helpers[i]); 958 } 959 960 #ifdef CONFIG_TCG_INTERPRETER 961 init_ffi_layouts(); 962 #endif 963 964 tcg_target_init(s); 965 process_op_defs(s); 966 967 /* Reverse the order of the saved registers, assuming they're all at 968 the start of tcg_target_reg_alloc_order. */ 969 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 970 int r = tcg_target_reg_alloc_order[n]; 971 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 972 break; 973 } 974 } 975 for (i = 0; i < n; ++i) { 976 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 977 } 978 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 979 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 980 } 981 982 alloc_tcg_plugin_context(s); 983 984 tcg_ctx = s; 985 /* 986 * In user-mode we simply share the init context among threads, since we 987 * use a single region. See the documentation tcg_region_init() for the 988 * reasoning behind this. 989 * In softmmu we will have at most max_cpus TCG threads. 990 */ 991 #ifdef CONFIG_USER_ONLY 992 tcg_ctxs = &tcg_ctx; 993 tcg_cur_ctxs = 1; 994 tcg_max_ctxs = 1; 995 #else 996 tcg_max_ctxs = max_cpus; 997 tcg_ctxs = g_new0(TCGContext *, max_cpus); 998 #endif 999 1000 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1001 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1002 cpu_env = temp_tcgv_ptr(ts); 1003 } 1004 1005 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 1006 { 1007 tcg_context_init(max_cpus); 1008 tcg_region_init(tb_size, splitwx, max_cpus); 1009 } 1010 1011 /* 1012 * Allocate TBs right before their corresponding translated code, making 1013 * sure that TBs and code are on different cache lines. 1014 */ 1015 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1016 { 1017 uintptr_t align = qemu_icache_linesize; 1018 TranslationBlock *tb; 1019 void *next; 1020 1021 retry: 1022 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1023 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1024 1025 if (unlikely(next > s->code_gen_highwater)) { 1026 if (tcg_region_alloc(s)) { 1027 return NULL; 1028 } 1029 goto retry; 1030 } 1031 qatomic_set(&s->code_gen_ptr, next); 1032 s->data_gen_ptr = NULL; 1033 return tb; 1034 } 1035 1036 void tcg_prologue_init(TCGContext *s) 1037 { 1038 size_t prologue_size; 1039 1040 s->code_ptr = s->code_gen_ptr; 1041 s->code_buf = s->code_gen_ptr; 1042 s->data_gen_ptr = NULL; 1043 1044 #ifndef CONFIG_TCG_INTERPRETER 1045 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1046 #endif 1047 1048 #ifdef TCG_TARGET_NEED_POOL_LABELS 1049 s->pool_labels = NULL; 1050 #endif 1051 1052 qemu_thread_jit_write(); 1053 /* Generate the prologue. */ 1054 tcg_target_qemu_prologue(s); 1055 1056 #ifdef TCG_TARGET_NEED_POOL_LABELS 1057 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1058 { 1059 int result = tcg_out_pool_finalize(s); 1060 tcg_debug_assert(result == 0); 1061 } 1062 #endif 1063 1064 prologue_size = tcg_current_code_size(s); 1065 perf_report_prologue(s->code_gen_ptr, prologue_size); 1066 1067 #ifndef CONFIG_TCG_INTERPRETER 1068 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1069 (uintptr_t)s->code_buf, prologue_size); 1070 #endif 1071 1072 #ifdef DEBUG_DISAS 1073 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1074 FILE *logfile = qemu_log_trylock(); 1075 if (logfile) { 1076 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1077 if (s->data_gen_ptr) { 1078 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1079 size_t data_size = prologue_size - code_size; 1080 size_t i; 1081 1082 disas(logfile, s->code_gen_ptr, code_size); 1083 1084 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1085 if (sizeof(tcg_target_ulong) == 8) { 1086 fprintf(logfile, 1087 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1088 (uintptr_t)s->data_gen_ptr + i, 1089 *(uint64_t *)(s->data_gen_ptr + i)); 1090 } else { 1091 fprintf(logfile, 1092 "0x%08" PRIxPTR ": .long 0x%08x\n", 1093 (uintptr_t)s->data_gen_ptr + i, 1094 *(uint32_t *)(s->data_gen_ptr + i)); 1095 } 1096 } 1097 } else { 1098 disas(logfile, s->code_gen_ptr, prologue_size); 1099 } 1100 fprintf(logfile, "\n"); 1101 qemu_log_unlock(logfile); 1102 } 1103 } 1104 #endif 1105 1106 #ifndef CONFIG_TCG_INTERPRETER 1107 /* 1108 * Assert that goto_ptr is implemented completely, setting an epilogue. 1109 * For tci, we use NULL as the signal to return from the interpreter, 1110 * so skip this check. 1111 */ 1112 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1113 #endif 1114 1115 tcg_region_prologue_set(s); 1116 } 1117 1118 void tcg_func_start(TCGContext *s) 1119 { 1120 tcg_pool_reset(s); 1121 s->nb_temps = s->nb_globals; 1122 1123 /* No temps have been previously allocated for size or locality. */ 1124 memset(s->free_temps, 0, sizeof(s->free_temps)); 1125 1126 /* No constant temps have been previously allocated. */ 1127 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1128 if (s->const_table[i]) { 1129 g_hash_table_remove_all(s->const_table[i]); 1130 } 1131 } 1132 1133 s->nb_ops = 0; 1134 s->nb_labels = 0; 1135 s->current_frame_offset = s->frame_start; 1136 1137 #ifdef CONFIG_DEBUG_TCG 1138 s->goto_tb_issue_mask = 0; 1139 #endif 1140 1141 QTAILQ_INIT(&s->ops); 1142 QTAILQ_INIT(&s->free_ops); 1143 QSIMPLEQ_INIT(&s->labels); 1144 } 1145 1146 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1147 { 1148 int n = s->nb_temps++; 1149 1150 if (n >= TCG_MAX_TEMPS) { 1151 tcg_raise_tb_overflow(s); 1152 } 1153 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1154 } 1155 1156 static TCGTemp *tcg_global_alloc(TCGContext *s) 1157 { 1158 TCGTemp *ts; 1159 1160 tcg_debug_assert(s->nb_globals == s->nb_temps); 1161 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1162 s->nb_globals++; 1163 ts = tcg_temp_alloc(s); 1164 ts->kind = TEMP_GLOBAL; 1165 1166 return ts; 1167 } 1168 1169 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1170 TCGReg reg, const char *name) 1171 { 1172 TCGTemp *ts; 1173 1174 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1175 tcg_abort(); 1176 } 1177 1178 ts = tcg_global_alloc(s); 1179 ts->base_type = type; 1180 ts->type = type; 1181 ts->kind = TEMP_FIXED; 1182 ts->reg = reg; 1183 ts->name = name; 1184 tcg_regset_set_reg(s->reserved_regs, reg); 1185 1186 return ts; 1187 } 1188 1189 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1190 { 1191 s->frame_start = start; 1192 s->frame_end = start + size; 1193 s->frame_temp 1194 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1195 } 1196 1197 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1198 intptr_t offset, const char *name) 1199 { 1200 TCGContext *s = tcg_ctx; 1201 TCGTemp *base_ts = tcgv_ptr_temp(base); 1202 TCGTemp *ts = tcg_global_alloc(s); 1203 int indirect_reg = 0; 1204 1205 switch (base_ts->kind) { 1206 case TEMP_FIXED: 1207 break; 1208 case TEMP_GLOBAL: 1209 /* We do not support double-indirect registers. */ 1210 tcg_debug_assert(!base_ts->indirect_reg); 1211 base_ts->indirect_base = 1; 1212 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1213 ? 2 : 1); 1214 indirect_reg = 1; 1215 break; 1216 default: 1217 g_assert_not_reached(); 1218 } 1219 1220 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1221 TCGTemp *ts2 = tcg_global_alloc(s); 1222 char buf[64]; 1223 1224 ts->base_type = TCG_TYPE_I64; 1225 ts->type = TCG_TYPE_I32; 1226 ts->indirect_reg = indirect_reg; 1227 ts->mem_allocated = 1; 1228 ts->mem_base = base_ts; 1229 ts->mem_offset = offset; 1230 pstrcpy(buf, sizeof(buf), name); 1231 pstrcat(buf, sizeof(buf), "_0"); 1232 ts->name = strdup(buf); 1233 1234 tcg_debug_assert(ts2 == ts + 1); 1235 ts2->base_type = TCG_TYPE_I64; 1236 ts2->type = TCG_TYPE_I32; 1237 ts2->indirect_reg = indirect_reg; 1238 ts2->mem_allocated = 1; 1239 ts2->mem_base = base_ts; 1240 ts2->mem_offset = offset + 4; 1241 ts2->temp_subindex = 1; 1242 pstrcpy(buf, sizeof(buf), name); 1243 pstrcat(buf, sizeof(buf), "_1"); 1244 ts2->name = strdup(buf); 1245 } else { 1246 ts->base_type = type; 1247 ts->type = type; 1248 ts->indirect_reg = indirect_reg; 1249 ts->mem_allocated = 1; 1250 ts->mem_base = base_ts; 1251 ts->mem_offset = offset; 1252 ts->name = name; 1253 } 1254 return ts; 1255 } 1256 1257 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1258 { 1259 TCGContext *s = tcg_ctx; 1260 TCGTemp *ts; 1261 int n; 1262 1263 if (kind == TEMP_EBB) { 1264 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1265 1266 if (idx < TCG_MAX_TEMPS) { 1267 /* There is already an available temp with the right type. */ 1268 clear_bit(idx, s->free_temps[type].l); 1269 1270 ts = &s->temps[idx]; 1271 ts->temp_allocated = 1; 1272 tcg_debug_assert(ts->base_type == type); 1273 tcg_debug_assert(ts->kind == kind); 1274 goto done; 1275 } 1276 } else { 1277 tcg_debug_assert(kind == TEMP_TB); 1278 } 1279 1280 switch (type) { 1281 case TCG_TYPE_I32: 1282 case TCG_TYPE_V64: 1283 case TCG_TYPE_V128: 1284 case TCG_TYPE_V256: 1285 n = 1; 1286 break; 1287 case TCG_TYPE_I64: 1288 n = 64 / TCG_TARGET_REG_BITS; 1289 break; 1290 case TCG_TYPE_I128: 1291 n = 128 / TCG_TARGET_REG_BITS; 1292 break; 1293 default: 1294 g_assert_not_reached(); 1295 } 1296 1297 ts = tcg_temp_alloc(s); 1298 ts->base_type = type; 1299 ts->temp_allocated = 1; 1300 ts->kind = kind; 1301 1302 if (n == 1) { 1303 ts->type = type; 1304 } else { 1305 ts->type = TCG_TYPE_REG; 1306 1307 for (int i = 1; i < n; ++i) { 1308 TCGTemp *ts2 = tcg_temp_alloc(s); 1309 1310 tcg_debug_assert(ts2 == ts + i); 1311 ts2->base_type = type; 1312 ts2->type = TCG_TYPE_REG; 1313 ts2->temp_allocated = 1; 1314 ts2->temp_subindex = i; 1315 ts2->kind = kind; 1316 } 1317 } 1318 1319 done: 1320 #if defined(CONFIG_DEBUG_TCG) 1321 s->temps_in_use++; 1322 #endif 1323 return ts; 1324 } 1325 1326 TCGv_vec tcg_temp_new_vec(TCGType type) 1327 { 1328 TCGTemp *t; 1329 1330 #ifdef CONFIG_DEBUG_TCG 1331 switch (type) { 1332 case TCG_TYPE_V64: 1333 assert(TCG_TARGET_HAS_v64); 1334 break; 1335 case TCG_TYPE_V128: 1336 assert(TCG_TARGET_HAS_v128); 1337 break; 1338 case TCG_TYPE_V256: 1339 assert(TCG_TARGET_HAS_v256); 1340 break; 1341 default: 1342 g_assert_not_reached(); 1343 } 1344 #endif 1345 1346 t = tcg_temp_new_internal(type, TEMP_EBB); 1347 return temp_tcgv_vec(t); 1348 } 1349 1350 /* Create a new temp of the same type as an existing temp. */ 1351 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1352 { 1353 TCGTemp *t = tcgv_vec_temp(match); 1354 1355 tcg_debug_assert(t->temp_allocated != 0); 1356 1357 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 1358 return temp_tcgv_vec(t); 1359 } 1360 1361 void tcg_temp_free_internal(TCGTemp *ts) 1362 { 1363 TCGContext *s = tcg_ctx; 1364 1365 switch (ts->kind) { 1366 case TEMP_CONST: 1367 /* 1368 * In order to simplify users of tcg_constant_*, 1369 * silently ignore free. 1370 */ 1371 return; 1372 case TEMP_EBB: 1373 case TEMP_TB: 1374 break; 1375 default: 1376 g_assert_not_reached(); 1377 } 1378 1379 tcg_debug_assert(ts->temp_allocated != 0); 1380 ts->temp_allocated = 0; 1381 1382 #if defined(CONFIG_DEBUG_TCG) 1383 assert(s->temps_in_use > 0); 1384 s->temps_in_use--; 1385 #endif 1386 1387 if (ts->kind == TEMP_EBB) { 1388 int idx = temp_idx(ts); 1389 set_bit(idx, s->free_temps[ts->base_type].l); 1390 } 1391 } 1392 1393 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1394 { 1395 TCGContext *s = tcg_ctx; 1396 GHashTable *h = s->const_table[type]; 1397 TCGTemp *ts; 1398 1399 if (h == NULL) { 1400 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1401 s->const_table[type] = h; 1402 } 1403 1404 ts = g_hash_table_lookup(h, &val); 1405 if (ts == NULL) { 1406 int64_t *val_ptr; 1407 1408 ts = tcg_temp_alloc(s); 1409 1410 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1411 TCGTemp *ts2 = tcg_temp_alloc(s); 1412 1413 tcg_debug_assert(ts2 == ts + 1); 1414 1415 ts->base_type = TCG_TYPE_I64; 1416 ts->type = TCG_TYPE_I32; 1417 ts->kind = TEMP_CONST; 1418 ts->temp_allocated = 1; 1419 1420 ts2->base_type = TCG_TYPE_I64; 1421 ts2->type = TCG_TYPE_I32; 1422 ts2->kind = TEMP_CONST; 1423 ts2->temp_allocated = 1; 1424 ts2->temp_subindex = 1; 1425 1426 /* 1427 * Retain the full value of the 64-bit constant in the low 1428 * part, so that the hash table works. Actual uses will 1429 * truncate the value to the low part. 1430 */ 1431 ts[HOST_BIG_ENDIAN].val = val; 1432 ts[!HOST_BIG_ENDIAN].val = val >> 32; 1433 val_ptr = &ts[HOST_BIG_ENDIAN].val; 1434 } else { 1435 ts->base_type = type; 1436 ts->type = type; 1437 ts->kind = TEMP_CONST; 1438 ts->temp_allocated = 1; 1439 ts->val = val; 1440 val_ptr = &ts->val; 1441 } 1442 g_hash_table_insert(h, val_ptr, ts); 1443 } 1444 1445 return ts; 1446 } 1447 1448 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1449 { 1450 val = dup_const(vece, val); 1451 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1452 } 1453 1454 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1455 { 1456 TCGTemp *t = tcgv_vec_temp(match); 1457 1458 tcg_debug_assert(t->temp_allocated != 0); 1459 return tcg_constant_vec(t->base_type, vece, val); 1460 } 1461 1462 TCGv_i32 tcg_const_i32(int32_t val) 1463 { 1464 TCGv_i32 t0; 1465 t0 = tcg_temp_new_i32(); 1466 tcg_gen_movi_i32(t0, val); 1467 return t0; 1468 } 1469 1470 TCGv_i64 tcg_const_i64(int64_t val) 1471 { 1472 TCGv_i64 t0; 1473 t0 = tcg_temp_new_i64(); 1474 tcg_gen_movi_i64(t0, val); 1475 return t0; 1476 } 1477 1478 TCGv_i32 tcg_const_local_i32(int32_t val) 1479 { 1480 TCGv_i32 t0; 1481 t0 = tcg_temp_local_new_i32(); 1482 tcg_gen_movi_i32(t0, val); 1483 return t0; 1484 } 1485 1486 TCGv_i64 tcg_const_local_i64(int64_t val) 1487 { 1488 TCGv_i64 t0; 1489 t0 = tcg_temp_local_new_i64(); 1490 tcg_gen_movi_i64(t0, val); 1491 return t0; 1492 } 1493 1494 #if defined(CONFIG_DEBUG_TCG) 1495 void tcg_clear_temp_count(void) 1496 { 1497 TCGContext *s = tcg_ctx; 1498 s->temps_in_use = 0; 1499 } 1500 1501 int tcg_check_temp_count(void) 1502 { 1503 TCGContext *s = tcg_ctx; 1504 if (s->temps_in_use) { 1505 /* Clear the count so that we don't give another 1506 * warning immediately next time around. 1507 */ 1508 s->temps_in_use = 0; 1509 return 1; 1510 } 1511 return 0; 1512 } 1513 #endif 1514 1515 /* Return true if OP may appear in the opcode stream. 1516 Test the runtime variable that controls each opcode. */ 1517 bool tcg_op_supported(TCGOpcode op) 1518 { 1519 const bool have_vec 1520 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1521 1522 switch (op) { 1523 case INDEX_op_discard: 1524 case INDEX_op_set_label: 1525 case INDEX_op_call: 1526 case INDEX_op_br: 1527 case INDEX_op_mb: 1528 case INDEX_op_insn_start: 1529 case INDEX_op_exit_tb: 1530 case INDEX_op_goto_tb: 1531 case INDEX_op_goto_ptr: 1532 case INDEX_op_qemu_ld_i32: 1533 case INDEX_op_qemu_st_i32: 1534 case INDEX_op_qemu_ld_i64: 1535 case INDEX_op_qemu_st_i64: 1536 return true; 1537 1538 case INDEX_op_qemu_st8_i32: 1539 return TCG_TARGET_HAS_qemu_st8_i32; 1540 1541 case INDEX_op_mov_i32: 1542 case INDEX_op_setcond_i32: 1543 case INDEX_op_brcond_i32: 1544 case INDEX_op_ld8u_i32: 1545 case INDEX_op_ld8s_i32: 1546 case INDEX_op_ld16u_i32: 1547 case INDEX_op_ld16s_i32: 1548 case INDEX_op_ld_i32: 1549 case INDEX_op_st8_i32: 1550 case INDEX_op_st16_i32: 1551 case INDEX_op_st_i32: 1552 case INDEX_op_add_i32: 1553 case INDEX_op_sub_i32: 1554 case INDEX_op_mul_i32: 1555 case INDEX_op_and_i32: 1556 case INDEX_op_or_i32: 1557 case INDEX_op_xor_i32: 1558 case INDEX_op_shl_i32: 1559 case INDEX_op_shr_i32: 1560 case INDEX_op_sar_i32: 1561 return true; 1562 1563 case INDEX_op_movcond_i32: 1564 return TCG_TARGET_HAS_movcond_i32; 1565 case INDEX_op_div_i32: 1566 case INDEX_op_divu_i32: 1567 return TCG_TARGET_HAS_div_i32; 1568 case INDEX_op_rem_i32: 1569 case INDEX_op_remu_i32: 1570 return TCG_TARGET_HAS_rem_i32; 1571 case INDEX_op_div2_i32: 1572 case INDEX_op_divu2_i32: 1573 return TCG_TARGET_HAS_div2_i32; 1574 case INDEX_op_rotl_i32: 1575 case INDEX_op_rotr_i32: 1576 return TCG_TARGET_HAS_rot_i32; 1577 case INDEX_op_deposit_i32: 1578 return TCG_TARGET_HAS_deposit_i32; 1579 case INDEX_op_extract_i32: 1580 return TCG_TARGET_HAS_extract_i32; 1581 case INDEX_op_sextract_i32: 1582 return TCG_TARGET_HAS_sextract_i32; 1583 case INDEX_op_extract2_i32: 1584 return TCG_TARGET_HAS_extract2_i32; 1585 case INDEX_op_add2_i32: 1586 return TCG_TARGET_HAS_add2_i32; 1587 case INDEX_op_sub2_i32: 1588 return TCG_TARGET_HAS_sub2_i32; 1589 case INDEX_op_mulu2_i32: 1590 return TCG_TARGET_HAS_mulu2_i32; 1591 case INDEX_op_muls2_i32: 1592 return TCG_TARGET_HAS_muls2_i32; 1593 case INDEX_op_muluh_i32: 1594 return TCG_TARGET_HAS_muluh_i32; 1595 case INDEX_op_mulsh_i32: 1596 return TCG_TARGET_HAS_mulsh_i32; 1597 case INDEX_op_ext8s_i32: 1598 return TCG_TARGET_HAS_ext8s_i32; 1599 case INDEX_op_ext16s_i32: 1600 return TCG_TARGET_HAS_ext16s_i32; 1601 case INDEX_op_ext8u_i32: 1602 return TCG_TARGET_HAS_ext8u_i32; 1603 case INDEX_op_ext16u_i32: 1604 return TCG_TARGET_HAS_ext16u_i32; 1605 case INDEX_op_bswap16_i32: 1606 return TCG_TARGET_HAS_bswap16_i32; 1607 case INDEX_op_bswap32_i32: 1608 return TCG_TARGET_HAS_bswap32_i32; 1609 case INDEX_op_not_i32: 1610 return TCG_TARGET_HAS_not_i32; 1611 case INDEX_op_neg_i32: 1612 return TCG_TARGET_HAS_neg_i32; 1613 case INDEX_op_andc_i32: 1614 return TCG_TARGET_HAS_andc_i32; 1615 case INDEX_op_orc_i32: 1616 return TCG_TARGET_HAS_orc_i32; 1617 case INDEX_op_eqv_i32: 1618 return TCG_TARGET_HAS_eqv_i32; 1619 case INDEX_op_nand_i32: 1620 return TCG_TARGET_HAS_nand_i32; 1621 case INDEX_op_nor_i32: 1622 return TCG_TARGET_HAS_nor_i32; 1623 case INDEX_op_clz_i32: 1624 return TCG_TARGET_HAS_clz_i32; 1625 case INDEX_op_ctz_i32: 1626 return TCG_TARGET_HAS_ctz_i32; 1627 case INDEX_op_ctpop_i32: 1628 return TCG_TARGET_HAS_ctpop_i32; 1629 1630 case INDEX_op_brcond2_i32: 1631 case INDEX_op_setcond2_i32: 1632 return TCG_TARGET_REG_BITS == 32; 1633 1634 case INDEX_op_mov_i64: 1635 case INDEX_op_setcond_i64: 1636 case INDEX_op_brcond_i64: 1637 case INDEX_op_ld8u_i64: 1638 case INDEX_op_ld8s_i64: 1639 case INDEX_op_ld16u_i64: 1640 case INDEX_op_ld16s_i64: 1641 case INDEX_op_ld32u_i64: 1642 case INDEX_op_ld32s_i64: 1643 case INDEX_op_ld_i64: 1644 case INDEX_op_st8_i64: 1645 case INDEX_op_st16_i64: 1646 case INDEX_op_st32_i64: 1647 case INDEX_op_st_i64: 1648 case INDEX_op_add_i64: 1649 case INDEX_op_sub_i64: 1650 case INDEX_op_mul_i64: 1651 case INDEX_op_and_i64: 1652 case INDEX_op_or_i64: 1653 case INDEX_op_xor_i64: 1654 case INDEX_op_shl_i64: 1655 case INDEX_op_shr_i64: 1656 case INDEX_op_sar_i64: 1657 case INDEX_op_ext_i32_i64: 1658 case INDEX_op_extu_i32_i64: 1659 return TCG_TARGET_REG_BITS == 64; 1660 1661 case INDEX_op_movcond_i64: 1662 return TCG_TARGET_HAS_movcond_i64; 1663 case INDEX_op_div_i64: 1664 case INDEX_op_divu_i64: 1665 return TCG_TARGET_HAS_div_i64; 1666 case INDEX_op_rem_i64: 1667 case INDEX_op_remu_i64: 1668 return TCG_TARGET_HAS_rem_i64; 1669 case INDEX_op_div2_i64: 1670 case INDEX_op_divu2_i64: 1671 return TCG_TARGET_HAS_div2_i64; 1672 case INDEX_op_rotl_i64: 1673 case INDEX_op_rotr_i64: 1674 return TCG_TARGET_HAS_rot_i64; 1675 case INDEX_op_deposit_i64: 1676 return TCG_TARGET_HAS_deposit_i64; 1677 case INDEX_op_extract_i64: 1678 return TCG_TARGET_HAS_extract_i64; 1679 case INDEX_op_sextract_i64: 1680 return TCG_TARGET_HAS_sextract_i64; 1681 case INDEX_op_extract2_i64: 1682 return TCG_TARGET_HAS_extract2_i64; 1683 case INDEX_op_extrl_i64_i32: 1684 return TCG_TARGET_HAS_extrl_i64_i32; 1685 case INDEX_op_extrh_i64_i32: 1686 return TCG_TARGET_HAS_extrh_i64_i32; 1687 case INDEX_op_ext8s_i64: 1688 return TCG_TARGET_HAS_ext8s_i64; 1689 case INDEX_op_ext16s_i64: 1690 return TCG_TARGET_HAS_ext16s_i64; 1691 case INDEX_op_ext32s_i64: 1692 return TCG_TARGET_HAS_ext32s_i64; 1693 case INDEX_op_ext8u_i64: 1694 return TCG_TARGET_HAS_ext8u_i64; 1695 case INDEX_op_ext16u_i64: 1696 return TCG_TARGET_HAS_ext16u_i64; 1697 case INDEX_op_ext32u_i64: 1698 return TCG_TARGET_HAS_ext32u_i64; 1699 case INDEX_op_bswap16_i64: 1700 return TCG_TARGET_HAS_bswap16_i64; 1701 case INDEX_op_bswap32_i64: 1702 return TCG_TARGET_HAS_bswap32_i64; 1703 case INDEX_op_bswap64_i64: 1704 return TCG_TARGET_HAS_bswap64_i64; 1705 case INDEX_op_not_i64: 1706 return TCG_TARGET_HAS_not_i64; 1707 case INDEX_op_neg_i64: 1708 return TCG_TARGET_HAS_neg_i64; 1709 case INDEX_op_andc_i64: 1710 return TCG_TARGET_HAS_andc_i64; 1711 case INDEX_op_orc_i64: 1712 return TCG_TARGET_HAS_orc_i64; 1713 case INDEX_op_eqv_i64: 1714 return TCG_TARGET_HAS_eqv_i64; 1715 case INDEX_op_nand_i64: 1716 return TCG_TARGET_HAS_nand_i64; 1717 case INDEX_op_nor_i64: 1718 return TCG_TARGET_HAS_nor_i64; 1719 case INDEX_op_clz_i64: 1720 return TCG_TARGET_HAS_clz_i64; 1721 case INDEX_op_ctz_i64: 1722 return TCG_TARGET_HAS_ctz_i64; 1723 case INDEX_op_ctpop_i64: 1724 return TCG_TARGET_HAS_ctpop_i64; 1725 case INDEX_op_add2_i64: 1726 return TCG_TARGET_HAS_add2_i64; 1727 case INDEX_op_sub2_i64: 1728 return TCG_TARGET_HAS_sub2_i64; 1729 case INDEX_op_mulu2_i64: 1730 return TCG_TARGET_HAS_mulu2_i64; 1731 case INDEX_op_muls2_i64: 1732 return TCG_TARGET_HAS_muls2_i64; 1733 case INDEX_op_muluh_i64: 1734 return TCG_TARGET_HAS_muluh_i64; 1735 case INDEX_op_mulsh_i64: 1736 return TCG_TARGET_HAS_mulsh_i64; 1737 1738 case INDEX_op_mov_vec: 1739 case INDEX_op_dup_vec: 1740 case INDEX_op_dupm_vec: 1741 case INDEX_op_ld_vec: 1742 case INDEX_op_st_vec: 1743 case INDEX_op_add_vec: 1744 case INDEX_op_sub_vec: 1745 case INDEX_op_and_vec: 1746 case INDEX_op_or_vec: 1747 case INDEX_op_xor_vec: 1748 case INDEX_op_cmp_vec: 1749 return have_vec; 1750 case INDEX_op_dup2_vec: 1751 return have_vec && TCG_TARGET_REG_BITS == 32; 1752 case INDEX_op_not_vec: 1753 return have_vec && TCG_TARGET_HAS_not_vec; 1754 case INDEX_op_neg_vec: 1755 return have_vec && TCG_TARGET_HAS_neg_vec; 1756 case INDEX_op_abs_vec: 1757 return have_vec && TCG_TARGET_HAS_abs_vec; 1758 case INDEX_op_andc_vec: 1759 return have_vec && TCG_TARGET_HAS_andc_vec; 1760 case INDEX_op_orc_vec: 1761 return have_vec && TCG_TARGET_HAS_orc_vec; 1762 case INDEX_op_nand_vec: 1763 return have_vec && TCG_TARGET_HAS_nand_vec; 1764 case INDEX_op_nor_vec: 1765 return have_vec && TCG_TARGET_HAS_nor_vec; 1766 case INDEX_op_eqv_vec: 1767 return have_vec && TCG_TARGET_HAS_eqv_vec; 1768 case INDEX_op_mul_vec: 1769 return have_vec && TCG_TARGET_HAS_mul_vec; 1770 case INDEX_op_shli_vec: 1771 case INDEX_op_shri_vec: 1772 case INDEX_op_sari_vec: 1773 return have_vec && TCG_TARGET_HAS_shi_vec; 1774 case INDEX_op_shls_vec: 1775 case INDEX_op_shrs_vec: 1776 case INDEX_op_sars_vec: 1777 return have_vec && TCG_TARGET_HAS_shs_vec; 1778 case INDEX_op_shlv_vec: 1779 case INDEX_op_shrv_vec: 1780 case INDEX_op_sarv_vec: 1781 return have_vec && TCG_TARGET_HAS_shv_vec; 1782 case INDEX_op_rotli_vec: 1783 return have_vec && TCG_TARGET_HAS_roti_vec; 1784 case INDEX_op_rotls_vec: 1785 return have_vec && TCG_TARGET_HAS_rots_vec; 1786 case INDEX_op_rotlv_vec: 1787 case INDEX_op_rotrv_vec: 1788 return have_vec && TCG_TARGET_HAS_rotv_vec; 1789 case INDEX_op_ssadd_vec: 1790 case INDEX_op_usadd_vec: 1791 case INDEX_op_sssub_vec: 1792 case INDEX_op_ussub_vec: 1793 return have_vec && TCG_TARGET_HAS_sat_vec; 1794 case INDEX_op_smin_vec: 1795 case INDEX_op_umin_vec: 1796 case INDEX_op_smax_vec: 1797 case INDEX_op_umax_vec: 1798 return have_vec && TCG_TARGET_HAS_minmax_vec; 1799 case INDEX_op_bitsel_vec: 1800 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1801 case INDEX_op_cmpsel_vec: 1802 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1803 1804 default: 1805 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1806 return true; 1807 } 1808 } 1809 1810 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 1811 1812 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1813 { 1814 const TCGHelperInfo *info; 1815 TCGv_i64 extend_free[MAX_CALL_IARGS]; 1816 int n_extend = 0; 1817 TCGOp *op; 1818 int i, n, pi = 0, total_args; 1819 1820 info = g_hash_table_lookup(helper_table, (gpointer)func); 1821 total_args = info->nr_out + info->nr_in + 2; 1822 op = tcg_op_alloc(INDEX_op_call, total_args); 1823 1824 #ifdef CONFIG_PLUGIN 1825 /* Flag helpers that may affect guest state */ 1826 if (tcg_ctx->plugin_insn && 1827 !(info->flags & TCG_CALL_PLUGIN) && 1828 !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1829 tcg_ctx->plugin_insn->calls_helpers = true; 1830 } 1831 #endif 1832 1833 TCGOP_CALLO(op) = n = info->nr_out; 1834 switch (n) { 1835 case 0: 1836 tcg_debug_assert(ret == NULL); 1837 break; 1838 case 1: 1839 tcg_debug_assert(ret != NULL); 1840 op->args[pi++] = temp_arg(ret); 1841 break; 1842 case 2: 1843 case 4: 1844 tcg_debug_assert(ret != NULL); 1845 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 1846 tcg_debug_assert(ret->temp_subindex == 0); 1847 for (i = 0; i < n; ++i) { 1848 op->args[pi++] = temp_arg(ret + i); 1849 } 1850 break; 1851 default: 1852 g_assert_not_reached(); 1853 } 1854 1855 TCGOP_CALLI(op) = n = info->nr_in; 1856 for (i = 0; i < n; i++) { 1857 const TCGCallArgumentLoc *loc = &info->in[i]; 1858 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 1859 1860 switch (loc->kind) { 1861 case TCG_CALL_ARG_NORMAL: 1862 case TCG_CALL_ARG_BY_REF: 1863 case TCG_CALL_ARG_BY_REF_N: 1864 op->args[pi++] = temp_arg(ts); 1865 break; 1866 1867 case TCG_CALL_ARG_EXTEND_U: 1868 case TCG_CALL_ARG_EXTEND_S: 1869 { 1870 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 1871 TCGv_i32 orig = temp_tcgv_i32(ts); 1872 1873 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 1874 tcg_gen_ext_i32_i64(temp, orig); 1875 } else { 1876 tcg_gen_extu_i32_i64(temp, orig); 1877 } 1878 op->args[pi++] = tcgv_i64_arg(temp); 1879 extend_free[n_extend++] = temp; 1880 } 1881 break; 1882 1883 default: 1884 g_assert_not_reached(); 1885 } 1886 } 1887 op->args[pi++] = (uintptr_t)func; 1888 op->args[pi++] = (uintptr_t)info; 1889 tcg_debug_assert(pi == total_args); 1890 1891 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 1892 1893 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 1894 for (i = 0; i < n_extend; ++i) { 1895 tcg_temp_free_i64(extend_free[i]); 1896 } 1897 } 1898 1899 static void tcg_reg_alloc_start(TCGContext *s) 1900 { 1901 int i, n; 1902 1903 for (i = 0, n = s->nb_temps; i < n; i++) { 1904 TCGTemp *ts = &s->temps[i]; 1905 TCGTempVal val = TEMP_VAL_MEM; 1906 1907 switch (ts->kind) { 1908 case TEMP_CONST: 1909 val = TEMP_VAL_CONST; 1910 break; 1911 case TEMP_FIXED: 1912 val = TEMP_VAL_REG; 1913 break; 1914 case TEMP_GLOBAL: 1915 break; 1916 case TEMP_EBB: 1917 val = TEMP_VAL_DEAD; 1918 /* fall through */ 1919 case TEMP_TB: 1920 ts->mem_allocated = 0; 1921 break; 1922 default: 1923 g_assert_not_reached(); 1924 } 1925 ts->val_type = val; 1926 } 1927 1928 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1929 } 1930 1931 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1932 TCGTemp *ts) 1933 { 1934 int idx = temp_idx(ts); 1935 1936 switch (ts->kind) { 1937 case TEMP_FIXED: 1938 case TEMP_GLOBAL: 1939 pstrcpy(buf, buf_size, ts->name); 1940 break; 1941 case TEMP_TB: 1942 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1943 break; 1944 case TEMP_EBB: 1945 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1946 break; 1947 case TEMP_CONST: 1948 switch (ts->type) { 1949 case TCG_TYPE_I32: 1950 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 1951 break; 1952 #if TCG_TARGET_REG_BITS > 32 1953 case TCG_TYPE_I64: 1954 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 1955 break; 1956 #endif 1957 case TCG_TYPE_V64: 1958 case TCG_TYPE_V128: 1959 case TCG_TYPE_V256: 1960 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 1961 64 << (ts->type - TCG_TYPE_V64), ts->val); 1962 break; 1963 default: 1964 g_assert_not_reached(); 1965 } 1966 break; 1967 } 1968 return buf; 1969 } 1970 1971 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1972 int buf_size, TCGArg arg) 1973 { 1974 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1975 } 1976 1977 static const char * const cond_name[] = 1978 { 1979 [TCG_COND_NEVER] = "never", 1980 [TCG_COND_ALWAYS] = "always", 1981 [TCG_COND_EQ] = "eq", 1982 [TCG_COND_NE] = "ne", 1983 [TCG_COND_LT] = "lt", 1984 [TCG_COND_GE] = "ge", 1985 [TCG_COND_LE] = "le", 1986 [TCG_COND_GT] = "gt", 1987 [TCG_COND_LTU] = "ltu", 1988 [TCG_COND_GEU] = "geu", 1989 [TCG_COND_LEU] = "leu", 1990 [TCG_COND_GTU] = "gtu" 1991 }; 1992 1993 static const char * const ldst_name[] = 1994 { 1995 [MO_UB] = "ub", 1996 [MO_SB] = "sb", 1997 [MO_LEUW] = "leuw", 1998 [MO_LESW] = "lesw", 1999 [MO_LEUL] = "leul", 2000 [MO_LESL] = "lesl", 2001 [MO_LEUQ] = "leq", 2002 [MO_BEUW] = "beuw", 2003 [MO_BESW] = "besw", 2004 [MO_BEUL] = "beul", 2005 [MO_BESL] = "besl", 2006 [MO_BEUQ] = "beq", 2007 }; 2008 2009 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2010 #ifdef TARGET_ALIGNED_ONLY 2011 [MO_UNALN >> MO_ASHIFT] = "un+", 2012 [MO_ALIGN >> MO_ASHIFT] = "", 2013 #else 2014 [MO_UNALN >> MO_ASHIFT] = "", 2015 [MO_ALIGN >> MO_ASHIFT] = "al+", 2016 #endif 2017 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2018 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2019 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2020 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2021 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2022 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2023 }; 2024 2025 static const char bswap_flag_name[][6] = { 2026 [TCG_BSWAP_IZ] = "iz", 2027 [TCG_BSWAP_OZ] = "oz", 2028 [TCG_BSWAP_OS] = "os", 2029 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2030 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2031 }; 2032 2033 static inline bool tcg_regset_single(TCGRegSet d) 2034 { 2035 return (d & (d - 1)) == 0; 2036 } 2037 2038 static inline TCGReg tcg_regset_first(TCGRegSet d) 2039 { 2040 if (TCG_TARGET_NB_REGS <= 32) { 2041 return ctz32(d); 2042 } else { 2043 return ctz64(d); 2044 } 2045 } 2046 2047 /* Return only the number of characters output -- no error return. */ 2048 #define ne_fprintf(...) \ 2049 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2050 2051 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2052 { 2053 char buf[128]; 2054 TCGOp *op; 2055 2056 QTAILQ_FOREACH(op, &s->ops, link) { 2057 int i, k, nb_oargs, nb_iargs, nb_cargs; 2058 const TCGOpDef *def; 2059 TCGOpcode c; 2060 int col = 0; 2061 2062 c = op->opc; 2063 def = &tcg_op_defs[c]; 2064 2065 if (c == INDEX_op_insn_start) { 2066 nb_oargs = 0; 2067 col += ne_fprintf(f, "\n ----"); 2068 2069 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2070 target_ulong a; 2071 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2072 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 2073 #else 2074 a = op->args[i]; 2075 #endif 2076 col += ne_fprintf(f, " " TARGET_FMT_lx, a); 2077 } 2078 } else if (c == INDEX_op_call) { 2079 const TCGHelperInfo *info = tcg_call_info(op); 2080 void *func = tcg_call_func(op); 2081 2082 /* variable number of arguments */ 2083 nb_oargs = TCGOP_CALLO(op); 2084 nb_iargs = TCGOP_CALLI(op); 2085 nb_cargs = def->nb_cargs; 2086 2087 col += ne_fprintf(f, " %s ", def->name); 2088 2089 /* 2090 * Print the function name from TCGHelperInfo, if available. 2091 * Note that plugins have a template function for the info, 2092 * but the actual function pointer comes from the plugin. 2093 */ 2094 if (func == info->func) { 2095 col += ne_fprintf(f, "%s", info->name); 2096 } else { 2097 col += ne_fprintf(f, "plugin(%p)", func); 2098 } 2099 2100 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2101 for (i = 0; i < nb_oargs; i++) { 2102 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2103 op->args[i])); 2104 } 2105 for (i = 0; i < nb_iargs; i++) { 2106 TCGArg arg = op->args[nb_oargs + i]; 2107 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2108 col += ne_fprintf(f, ",%s", t); 2109 } 2110 } else { 2111 col += ne_fprintf(f, " %s ", def->name); 2112 2113 nb_oargs = def->nb_oargs; 2114 nb_iargs = def->nb_iargs; 2115 nb_cargs = def->nb_cargs; 2116 2117 if (def->flags & TCG_OPF_VECTOR) { 2118 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 2119 8 << TCGOP_VECE(op)); 2120 } 2121 2122 k = 0; 2123 for (i = 0; i < nb_oargs; i++) { 2124 const char *sep = k ? "," : ""; 2125 col += ne_fprintf(f, "%s%s", sep, 2126 tcg_get_arg_str(s, buf, sizeof(buf), 2127 op->args[k++])); 2128 } 2129 for (i = 0; i < nb_iargs; i++) { 2130 const char *sep = k ? "," : ""; 2131 col += ne_fprintf(f, "%s%s", sep, 2132 tcg_get_arg_str(s, buf, sizeof(buf), 2133 op->args[k++])); 2134 } 2135 switch (c) { 2136 case INDEX_op_brcond_i32: 2137 case INDEX_op_setcond_i32: 2138 case INDEX_op_movcond_i32: 2139 case INDEX_op_brcond2_i32: 2140 case INDEX_op_setcond2_i32: 2141 case INDEX_op_brcond_i64: 2142 case INDEX_op_setcond_i64: 2143 case INDEX_op_movcond_i64: 2144 case INDEX_op_cmp_vec: 2145 case INDEX_op_cmpsel_vec: 2146 if (op->args[k] < ARRAY_SIZE(cond_name) 2147 && cond_name[op->args[k]]) { 2148 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2149 } else { 2150 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2151 } 2152 i = 1; 2153 break; 2154 case INDEX_op_qemu_ld_i32: 2155 case INDEX_op_qemu_st_i32: 2156 case INDEX_op_qemu_st8_i32: 2157 case INDEX_op_qemu_ld_i64: 2158 case INDEX_op_qemu_st_i64: 2159 { 2160 MemOpIdx oi = op->args[k++]; 2161 MemOp op = get_memop(oi); 2162 unsigned ix = get_mmuidx(oi); 2163 2164 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2165 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 2166 } else { 2167 const char *s_al, *s_op; 2168 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2169 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2170 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix); 2171 } 2172 i = 1; 2173 } 2174 break; 2175 case INDEX_op_bswap16_i32: 2176 case INDEX_op_bswap16_i64: 2177 case INDEX_op_bswap32_i32: 2178 case INDEX_op_bswap32_i64: 2179 case INDEX_op_bswap64_i64: 2180 { 2181 TCGArg flags = op->args[k]; 2182 const char *name = NULL; 2183 2184 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2185 name = bswap_flag_name[flags]; 2186 } 2187 if (name) { 2188 col += ne_fprintf(f, ",%s", name); 2189 } else { 2190 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2191 } 2192 i = k = 1; 2193 } 2194 break; 2195 default: 2196 i = 0; 2197 break; 2198 } 2199 switch (c) { 2200 case INDEX_op_set_label: 2201 case INDEX_op_br: 2202 case INDEX_op_brcond_i32: 2203 case INDEX_op_brcond_i64: 2204 case INDEX_op_brcond2_i32: 2205 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2206 arg_label(op->args[k])->id); 2207 i++, k++; 2208 break; 2209 default: 2210 break; 2211 } 2212 for (; i < nb_cargs; i++, k++) { 2213 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 2214 op->args[k]); 2215 } 2216 } 2217 2218 if (have_prefs || op->life) { 2219 for (; col < 40; ++col) { 2220 putc(' ', f); 2221 } 2222 } 2223 2224 if (op->life) { 2225 unsigned life = op->life; 2226 2227 if (life & (SYNC_ARG * 3)) { 2228 ne_fprintf(f, " sync:"); 2229 for (i = 0; i < 2; ++i) { 2230 if (life & (SYNC_ARG << i)) { 2231 ne_fprintf(f, " %d", i); 2232 } 2233 } 2234 } 2235 life /= DEAD_ARG; 2236 if (life) { 2237 ne_fprintf(f, " dead:"); 2238 for (i = 0; life; ++i, life >>= 1) { 2239 if (life & 1) { 2240 ne_fprintf(f, " %d", i); 2241 } 2242 } 2243 } 2244 } 2245 2246 if (have_prefs) { 2247 for (i = 0; i < nb_oargs; ++i) { 2248 TCGRegSet set = output_pref(op, i); 2249 2250 if (i == 0) { 2251 ne_fprintf(f, " pref="); 2252 } else { 2253 ne_fprintf(f, ","); 2254 } 2255 if (set == 0) { 2256 ne_fprintf(f, "none"); 2257 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2258 ne_fprintf(f, "all"); 2259 #ifdef CONFIG_DEBUG_TCG 2260 } else if (tcg_regset_single(set)) { 2261 TCGReg reg = tcg_regset_first(set); 2262 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 2263 #endif 2264 } else if (TCG_TARGET_NB_REGS <= 32) { 2265 ne_fprintf(f, "0x%x", (uint32_t)set); 2266 } else { 2267 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 2268 } 2269 } 2270 } 2271 2272 putc('\n', f); 2273 } 2274 } 2275 2276 /* we give more priority to constraints with less registers */ 2277 static int get_constraint_priority(const TCGOpDef *def, int k) 2278 { 2279 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2280 int n = ctpop64(arg_ct->regs); 2281 2282 /* 2283 * Sort constraints of a single register first, which includes output 2284 * aliases (which must exactly match the input already allocated). 2285 */ 2286 if (n == 1 || arg_ct->oalias) { 2287 return INT_MAX; 2288 } 2289 2290 /* 2291 * Sort register pairs next, first then second immediately after. 2292 * Arbitrarily sort multiple pairs by the index of the first reg; 2293 * there shouldn't be many pairs. 2294 */ 2295 switch (arg_ct->pair) { 2296 case 1: 2297 case 3: 2298 return (k + 1) * 2; 2299 case 2: 2300 return (arg_ct->pair_index + 1) * 2 - 1; 2301 } 2302 2303 /* Finally, sort by decreasing register count. */ 2304 assert(n > 1); 2305 return -n; 2306 } 2307 2308 /* sort from highest priority to lowest */ 2309 static void sort_constraints(TCGOpDef *def, int start, int n) 2310 { 2311 int i, j; 2312 TCGArgConstraint *a = def->args_ct; 2313 2314 for (i = 0; i < n; i++) { 2315 a[start + i].sort_index = start + i; 2316 } 2317 if (n <= 1) { 2318 return; 2319 } 2320 for (i = 0; i < n - 1; i++) { 2321 for (j = i + 1; j < n; j++) { 2322 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2323 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2324 if (p1 < p2) { 2325 int tmp = a[start + i].sort_index; 2326 a[start + i].sort_index = a[start + j].sort_index; 2327 a[start + j].sort_index = tmp; 2328 } 2329 } 2330 } 2331 } 2332 2333 static void process_op_defs(TCGContext *s) 2334 { 2335 TCGOpcode op; 2336 2337 for (op = 0; op < NB_OPS; op++) { 2338 TCGOpDef *def = &tcg_op_defs[op]; 2339 const TCGTargetOpDef *tdefs; 2340 bool saw_alias_pair = false; 2341 int i, o, i2, o2, nb_args; 2342 2343 if (def->flags & TCG_OPF_NOT_PRESENT) { 2344 continue; 2345 } 2346 2347 nb_args = def->nb_iargs + def->nb_oargs; 2348 if (nb_args == 0) { 2349 continue; 2350 } 2351 2352 /* 2353 * Macro magic should make it impossible, but double-check that 2354 * the array index is in range. Since the signness of an enum 2355 * is implementation defined, force the result to unsigned. 2356 */ 2357 unsigned con_set = tcg_target_op_def(op); 2358 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2359 tdefs = &constraint_sets[con_set]; 2360 2361 for (i = 0; i < nb_args; i++) { 2362 const char *ct_str = tdefs->args_ct_str[i]; 2363 bool input_p = i >= def->nb_oargs; 2364 2365 /* Incomplete TCGTargetOpDef entry. */ 2366 tcg_debug_assert(ct_str != NULL); 2367 2368 switch (*ct_str) { 2369 case '0' ... '9': 2370 o = *ct_str - '0'; 2371 tcg_debug_assert(input_p); 2372 tcg_debug_assert(o < def->nb_oargs); 2373 tcg_debug_assert(def->args_ct[o].regs != 0); 2374 tcg_debug_assert(!def->args_ct[o].oalias); 2375 def->args_ct[i] = def->args_ct[o]; 2376 /* The output sets oalias. */ 2377 def->args_ct[o].oalias = 1; 2378 def->args_ct[o].alias_index = i; 2379 /* The input sets ialias. */ 2380 def->args_ct[i].ialias = 1; 2381 def->args_ct[i].alias_index = o; 2382 if (def->args_ct[i].pair) { 2383 saw_alias_pair = true; 2384 } 2385 tcg_debug_assert(ct_str[1] == '\0'); 2386 continue; 2387 2388 case '&': 2389 tcg_debug_assert(!input_p); 2390 def->args_ct[i].newreg = true; 2391 ct_str++; 2392 break; 2393 2394 case 'p': /* plus */ 2395 /* Allocate to the register after the previous. */ 2396 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2397 o = i - 1; 2398 tcg_debug_assert(!def->args_ct[o].pair); 2399 tcg_debug_assert(!def->args_ct[o].ct); 2400 def->args_ct[i] = (TCGArgConstraint){ 2401 .pair = 2, 2402 .pair_index = o, 2403 .regs = def->args_ct[o].regs << 1, 2404 }; 2405 def->args_ct[o].pair = 1; 2406 def->args_ct[o].pair_index = i; 2407 tcg_debug_assert(ct_str[1] == '\0'); 2408 continue; 2409 2410 case 'm': /* minus */ 2411 /* Allocate to the register before the previous. */ 2412 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2413 o = i - 1; 2414 tcg_debug_assert(!def->args_ct[o].pair); 2415 tcg_debug_assert(!def->args_ct[o].ct); 2416 def->args_ct[i] = (TCGArgConstraint){ 2417 .pair = 1, 2418 .pair_index = o, 2419 .regs = def->args_ct[o].regs >> 1, 2420 }; 2421 def->args_ct[o].pair = 2; 2422 def->args_ct[o].pair_index = i; 2423 tcg_debug_assert(ct_str[1] == '\0'); 2424 continue; 2425 } 2426 2427 do { 2428 switch (*ct_str) { 2429 case 'i': 2430 def->args_ct[i].ct |= TCG_CT_CONST; 2431 break; 2432 2433 /* Include all of the target-specific constraints. */ 2434 2435 #undef CONST 2436 #define CONST(CASE, MASK) \ 2437 case CASE: def->args_ct[i].ct |= MASK; break; 2438 #define REGS(CASE, MASK) \ 2439 case CASE: def->args_ct[i].regs |= MASK; break; 2440 2441 #include "tcg-target-con-str.h" 2442 2443 #undef REGS 2444 #undef CONST 2445 default: 2446 case '0' ... '9': 2447 case '&': 2448 case 'p': 2449 case 'm': 2450 /* Typo in TCGTargetOpDef constraint. */ 2451 g_assert_not_reached(); 2452 } 2453 } while (*++ct_str != '\0'); 2454 } 2455 2456 /* TCGTargetOpDef entry with too much information? */ 2457 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2458 2459 /* 2460 * Fix up output pairs that are aliased with inputs. 2461 * When we created the alias, we copied pair from the output. 2462 * There are three cases: 2463 * (1a) Pairs of inputs alias pairs of outputs. 2464 * (1b) One input aliases the first of a pair of outputs. 2465 * (2) One input aliases the second of a pair of outputs. 2466 * 2467 * Case 1a is handled by making sure that the pair_index'es are 2468 * properly updated so that they appear the same as a pair of inputs. 2469 * 2470 * Case 1b is handled by setting the pair_index of the input to 2471 * itself, simply so it doesn't point to an unrelated argument. 2472 * Since we don't encounter the "second" during the input allocation 2473 * phase, nothing happens with the second half of the input pair. 2474 * 2475 * Case 2 is handled by setting the second input to pair=3, the 2476 * first output to pair=3, and the pair_index'es to match. 2477 */ 2478 if (saw_alias_pair) { 2479 for (i = def->nb_oargs; i < nb_args; i++) { 2480 /* 2481 * Since [0-9pm] must be alone in the constraint string, 2482 * the only way they can both be set is if the pair comes 2483 * from the output alias. 2484 */ 2485 if (!def->args_ct[i].ialias) { 2486 continue; 2487 } 2488 switch (def->args_ct[i].pair) { 2489 case 0: 2490 break; 2491 case 1: 2492 o = def->args_ct[i].alias_index; 2493 o2 = def->args_ct[o].pair_index; 2494 tcg_debug_assert(def->args_ct[o].pair == 1); 2495 tcg_debug_assert(def->args_ct[o2].pair == 2); 2496 if (def->args_ct[o2].oalias) { 2497 /* Case 1a */ 2498 i2 = def->args_ct[o2].alias_index; 2499 tcg_debug_assert(def->args_ct[i2].pair == 2); 2500 def->args_ct[i2].pair_index = i; 2501 def->args_ct[i].pair_index = i2; 2502 } else { 2503 /* Case 1b */ 2504 def->args_ct[i].pair_index = i; 2505 } 2506 break; 2507 case 2: 2508 o = def->args_ct[i].alias_index; 2509 o2 = def->args_ct[o].pair_index; 2510 tcg_debug_assert(def->args_ct[o].pair == 2); 2511 tcg_debug_assert(def->args_ct[o2].pair == 1); 2512 if (def->args_ct[o2].oalias) { 2513 /* Case 1a */ 2514 i2 = def->args_ct[o2].alias_index; 2515 tcg_debug_assert(def->args_ct[i2].pair == 1); 2516 def->args_ct[i2].pair_index = i; 2517 def->args_ct[i].pair_index = i2; 2518 } else { 2519 /* Case 2 */ 2520 def->args_ct[i].pair = 3; 2521 def->args_ct[o2].pair = 3; 2522 def->args_ct[i].pair_index = o2; 2523 def->args_ct[o2].pair_index = i; 2524 } 2525 break; 2526 default: 2527 g_assert_not_reached(); 2528 } 2529 } 2530 } 2531 2532 /* sort the constraints (XXX: this is just an heuristic) */ 2533 sort_constraints(def, 0, def->nb_oargs); 2534 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2535 } 2536 } 2537 2538 void tcg_op_remove(TCGContext *s, TCGOp *op) 2539 { 2540 TCGLabel *label; 2541 2542 switch (op->opc) { 2543 case INDEX_op_br: 2544 label = arg_label(op->args[0]); 2545 label->refs--; 2546 break; 2547 case INDEX_op_brcond_i32: 2548 case INDEX_op_brcond_i64: 2549 label = arg_label(op->args[3]); 2550 label->refs--; 2551 break; 2552 case INDEX_op_brcond2_i32: 2553 label = arg_label(op->args[5]); 2554 label->refs--; 2555 break; 2556 default: 2557 break; 2558 } 2559 2560 QTAILQ_REMOVE(&s->ops, op, link); 2561 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2562 s->nb_ops--; 2563 2564 #ifdef CONFIG_PROFILER 2565 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2566 #endif 2567 } 2568 2569 void tcg_remove_ops_after(TCGOp *op) 2570 { 2571 TCGContext *s = tcg_ctx; 2572 2573 while (true) { 2574 TCGOp *last = tcg_last_op(); 2575 if (last == op) { 2576 return; 2577 } 2578 tcg_op_remove(s, last); 2579 } 2580 } 2581 2582 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 2583 { 2584 TCGContext *s = tcg_ctx; 2585 TCGOp *op = NULL; 2586 2587 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 2588 QTAILQ_FOREACH(op, &s->free_ops, link) { 2589 if (nargs <= op->nargs) { 2590 QTAILQ_REMOVE(&s->free_ops, op, link); 2591 nargs = op->nargs; 2592 goto found; 2593 } 2594 } 2595 } 2596 2597 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 2598 nargs = MAX(4, nargs); 2599 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 2600 2601 found: 2602 memset(op, 0, offsetof(TCGOp, link)); 2603 op->opc = opc; 2604 op->nargs = nargs; 2605 2606 /* Check for bitfield overflow. */ 2607 tcg_debug_assert(op->nargs == nargs); 2608 2609 s->nb_ops++; 2610 return op; 2611 } 2612 2613 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 2614 { 2615 TCGOp *op = tcg_op_alloc(opc, nargs); 2616 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2617 return op; 2618 } 2619 2620 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 2621 TCGOpcode opc, unsigned nargs) 2622 { 2623 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2624 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2625 return new_op; 2626 } 2627 2628 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 2629 TCGOpcode opc, unsigned nargs) 2630 { 2631 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2632 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2633 return new_op; 2634 } 2635 2636 /* Reachable analysis : remove unreachable code. */ 2637 static void __attribute__((noinline)) 2638 reachable_code_pass(TCGContext *s) 2639 { 2640 TCGOp *op, *op_next, *op_prev; 2641 bool dead = false; 2642 2643 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2644 bool remove = dead; 2645 TCGLabel *label; 2646 2647 switch (op->opc) { 2648 case INDEX_op_set_label: 2649 label = arg_label(op->args[0]); 2650 2651 /* 2652 * Optimization can fold conditional branches to unconditional. 2653 * If we find a label which is preceded by an unconditional 2654 * branch to next, remove the branch. We couldn't do this when 2655 * processing the branch because any dead code between the branch 2656 * and label had not yet been removed. 2657 */ 2658 op_prev = QTAILQ_PREV(op, link); 2659 if (op_prev->opc == INDEX_op_br && 2660 label == arg_label(op_prev->args[0])) { 2661 tcg_op_remove(s, op_prev); 2662 /* Fall through means insns become live again. */ 2663 dead = false; 2664 } 2665 2666 if (label->refs == 0) { 2667 /* 2668 * While there is an occasional backward branch, virtually 2669 * all branches generated by the translators are forward. 2670 * Which means that generally we will have already removed 2671 * all references to the label that will be, and there is 2672 * little to be gained by iterating. 2673 */ 2674 remove = true; 2675 } else { 2676 /* Once we see a label, insns become live again. */ 2677 dead = false; 2678 remove = false; 2679 } 2680 break; 2681 2682 case INDEX_op_br: 2683 case INDEX_op_exit_tb: 2684 case INDEX_op_goto_ptr: 2685 /* Unconditional branches; everything following is dead. */ 2686 dead = true; 2687 break; 2688 2689 case INDEX_op_call: 2690 /* Notice noreturn helper calls, raising exceptions. */ 2691 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 2692 dead = true; 2693 } 2694 break; 2695 2696 case INDEX_op_insn_start: 2697 /* Never remove -- we need to keep these for unwind. */ 2698 remove = false; 2699 break; 2700 2701 default: 2702 break; 2703 } 2704 2705 if (remove) { 2706 tcg_op_remove(s, op); 2707 } 2708 } 2709 } 2710 2711 #define TS_DEAD 1 2712 #define TS_MEM 2 2713 2714 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2715 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2716 2717 /* For liveness_pass_1, the register preferences for a given temp. */ 2718 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2719 { 2720 return ts->state_ptr; 2721 } 2722 2723 /* For liveness_pass_1, reset the preferences for a given temp to the 2724 * maximal regset for its type. 2725 */ 2726 static inline void la_reset_pref(TCGTemp *ts) 2727 { 2728 *la_temp_pref(ts) 2729 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2730 } 2731 2732 /* liveness analysis: end of function: all temps are dead, and globals 2733 should be in memory. */ 2734 static void la_func_end(TCGContext *s, int ng, int nt) 2735 { 2736 int i; 2737 2738 for (i = 0; i < ng; ++i) { 2739 s->temps[i].state = TS_DEAD | TS_MEM; 2740 la_reset_pref(&s->temps[i]); 2741 } 2742 for (i = ng; i < nt; ++i) { 2743 s->temps[i].state = TS_DEAD; 2744 la_reset_pref(&s->temps[i]); 2745 } 2746 } 2747 2748 /* liveness analysis: end of basic block: all temps are dead, globals 2749 and local temps should be in memory. */ 2750 static void la_bb_end(TCGContext *s, int ng, int nt) 2751 { 2752 int i; 2753 2754 for (i = 0; i < nt; ++i) { 2755 TCGTemp *ts = &s->temps[i]; 2756 int state; 2757 2758 switch (ts->kind) { 2759 case TEMP_FIXED: 2760 case TEMP_GLOBAL: 2761 case TEMP_TB: 2762 state = TS_DEAD | TS_MEM; 2763 break; 2764 case TEMP_EBB: 2765 case TEMP_CONST: 2766 state = TS_DEAD; 2767 break; 2768 default: 2769 g_assert_not_reached(); 2770 } 2771 ts->state = state; 2772 la_reset_pref(ts); 2773 } 2774 } 2775 2776 /* liveness analysis: sync globals back to memory. */ 2777 static void la_global_sync(TCGContext *s, int ng) 2778 { 2779 int i; 2780 2781 for (i = 0; i < ng; ++i) { 2782 int state = s->temps[i].state; 2783 s->temps[i].state = state | TS_MEM; 2784 if (state == TS_DEAD) { 2785 /* If the global was previously dead, reset prefs. */ 2786 la_reset_pref(&s->temps[i]); 2787 } 2788 } 2789 } 2790 2791 /* 2792 * liveness analysis: conditional branch: all temps are dead unless 2793 * explicitly live-across-conditional-branch, globals and local temps 2794 * should be synced. 2795 */ 2796 static void la_bb_sync(TCGContext *s, int ng, int nt) 2797 { 2798 la_global_sync(s, ng); 2799 2800 for (int i = ng; i < nt; ++i) { 2801 TCGTemp *ts = &s->temps[i]; 2802 int state; 2803 2804 switch (ts->kind) { 2805 case TEMP_TB: 2806 state = ts->state; 2807 ts->state = state | TS_MEM; 2808 if (state != TS_DEAD) { 2809 continue; 2810 } 2811 break; 2812 case TEMP_EBB: 2813 case TEMP_CONST: 2814 continue; 2815 default: 2816 g_assert_not_reached(); 2817 } 2818 la_reset_pref(&s->temps[i]); 2819 } 2820 } 2821 2822 /* liveness analysis: sync globals back to memory and kill. */ 2823 static void la_global_kill(TCGContext *s, int ng) 2824 { 2825 int i; 2826 2827 for (i = 0; i < ng; i++) { 2828 s->temps[i].state = TS_DEAD | TS_MEM; 2829 la_reset_pref(&s->temps[i]); 2830 } 2831 } 2832 2833 /* liveness analysis: note live globals crossing calls. */ 2834 static void la_cross_call(TCGContext *s, int nt) 2835 { 2836 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2837 int i; 2838 2839 for (i = 0; i < nt; i++) { 2840 TCGTemp *ts = &s->temps[i]; 2841 if (!(ts->state & TS_DEAD)) { 2842 TCGRegSet *pset = la_temp_pref(ts); 2843 TCGRegSet set = *pset; 2844 2845 set &= mask; 2846 /* If the combination is not possible, restart. */ 2847 if (set == 0) { 2848 set = tcg_target_available_regs[ts->type] & mask; 2849 } 2850 *pset = set; 2851 } 2852 } 2853 } 2854 2855 /* 2856 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 2857 * to TEMP_EBB, if possible. 2858 */ 2859 static void __attribute__((noinline)) 2860 liveness_pass_0(TCGContext *s) 2861 { 2862 void * const multiple_ebb = (void *)(uintptr_t)-1; 2863 int nb_temps = s->nb_temps; 2864 TCGOp *op, *ebb; 2865 2866 for (int i = s->nb_globals; i < nb_temps; ++i) { 2867 s->temps[i].state_ptr = NULL; 2868 } 2869 2870 /* 2871 * Represent each EBB by the op at which it begins. In the case of 2872 * the first EBB, this is the first op, otherwise it is a label. 2873 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 2874 * within a single EBB, else MULTIPLE_EBB. 2875 */ 2876 ebb = QTAILQ_FIRST(&s->ops); 2877 QTAILQ_FOREACH(op, &s->ops, link) { 2878 const TCGOpDef *def; 2879 int nb_oargs, nb_iargs; 2880 2881 switch (op->opc) { 2882 case INDEX_op_set_label: 2883 ebb = op; 2884 continue; 2885 case INDEX_op_discard: 2886 continue; 2887 case INDEX_op_call: 2888 nb_oargs = TCGOP_CALLO(op); 2889 nb_iargs = TCGOP_CALLI(op); 2890 break; 2891 default: 2892 def = &tcg_op_defs[op->opc]; 2893 nb_oargs = def->nb_oargs; 2894 nb_iargs = def->nb_iargs; 2895 break; 2896 } 2897 2898 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 2899 TCGTemp *ts = arg_temp(op->args[i]); 2900 2901 if (ts->kind != TEMP_TB) { 2902 continue; 2903 } 2904 if (ts->state_ptr == NULL) { 2905 ts->state_ptr = ebb; 2906 } else if (ts->state_ptr != ebb) { 2907 ts->state_ptr = multiple_ebb; 2908 } 2909 } 2910 } 2911 2912 /* 2913 * For TEMP_TB that turned out not to be used beyond one EBB, 2914 * reduce the liveness to TEMP_EBB. 2915 */ 2916 for (int i = s->nb_globals; i < nb_temps; ++i) { 2917 TCGTemp *ts = &s->temps[i]; 2918 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 2919 ts->kind = TEMP_EBB; 2920 } 2921 } 2922 } 2923 2924 /* Liveness analysis : update the opc_arg_life array to tell if a 2925 given input arguments is dead. Instructions updating dead 2926 temporaries are removed. */ 2927 static void __attribute__((noinline)) 2928 liveness_pass_1(TCGContext *s) 2929 { 2930 int nb_globals = s->nb_globals; 2931 int nb_temps = s->nb_temps; 2932 TCGOp *op, *op_prev; 2933 TCGRegSet *prefs; 2934 int i; 2935 2936 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2937 for (i = 0; i < nb_temps; ++i) { 2938 s->temps[i].state_ptr = prefs + i; 2939 } 2940 2941 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2942 la_func_end(s, nb_globals, nb_temps); 2943 2944 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2945 int nb_iargs, nb_oargs; 2946 TCGOpcode opc_new, opc_new2; 2947 bool have_opc_new2; 2948 TCGLifeData arg_life = 0; 2949 TCGTemp *ts; 2950 TCGOpcode opc = op->opc; 2951 const TCGOpDef *def = &tcg_op_defs[opc]; 2952 2953 switch (opc) { 2954 case INDEX_op_call: 2955 { 2956 const TCGHelperInfo *info = tcg_call_info(op); 2957 int call_flags = tcg_call_flags(op); 2958 2959 nb_oargs = TCGOP_CALLO(op); 2960 nb_iargs = TCGOP_CALLI(op); 2961 2962 /* pure functions can be removed if their result is unused */ 2963 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2964 for (i = 0; i < nb_oargs; i++) { 2965 ts = arg_temp(op->args[i]); 2966 if (ts->state != TS_DEAD) { 2967 goto do_not_remove_call; 2968 } 2969 } 2970 goto do_remove; 2971 } 2972 do_not_remove_call: 2973 2974 /* Output args are dead. */ 2975 for (i = 0; i < nb_oargs; i++) { 2976 ts = arg_temp(op->args[i]); 2977 if (ts->state & TS_DEAD) { 2978 arg_life |= DEAD_ARG << i; 2979 } 2980 if (ts->state & TS_MEM) { 2981 arg_life |= SYNC_ARG << i; 2982 } 2983 ts->state = TS_DEAD; 2984 la_reset_pref(ts); 2985 } 2986 2987 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 2988 memset(op->output_pref, 0, sizeof(op->output_pref)); 2989 2990 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2991 TCG_CALL_NO_READ_GLOBALS))) { 2992 la_global_kill(s, nb_globals); 2993 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2994 la_global_sync(s, nb_globals); 2995 } 2996 2997 /* Record arguments that die in this helper. */ 2998 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2999 ts = arg_temp(op->args[i]); 3000 if (ts->state & TS_DEAD) { 3001 arg_life |= DEAD_ARG << i; 3002 } 3003 } 3004 3005 /* For all live registers, remove call-clobbered prefs. */ 3006 la_cross_call(s, nb_temps); 3007 3008 /* 3009 * Input arguments are live for preceding opcodes. 3010 * 3011 * For those arguments that die, and will be allocated in 3012 * registers, clear the register set for that arg, to be 3013 * filled in below. For args that will be on the stack, 3014 * reset to any available reg. Process arguments in reverse 3015 * order so that if a temp is used more than once, the stack 3016 * reset to max happens before the register reset to 0. 3017 */ 3018 for (i = nb_iargs - 1; i >= 0; i--) { 3019 const TCGCallArgumentLoc *loc = &info->in[i]; 3020 ts = arg_temp(op->args[nb_oargs + i]); 3021 3022 if (ts->state & TS_DEAD) { 3023 switch (loc->kind) { 3024 case TCG_CALL_ARG_NORMAL: 3025 case TCG_CALL_ARG_EXTEND_U: 3026 case TCG_CALL_ARG_EXTEND_S: 3027 if (REG_P(loc)) { 3028 *la_temp_pref(ts) = 0; 3029 break; 3030 } 3031 /* fall through */ 3032 default: 3033 *la_temp_pref(ts) = 3034 tcg_target_available_regs[ts->type]; 3035 break; 3036 } 3037 ts->state &= ~TS_DEAD; 3038 } 3039 } 3040 3041 /* 3042 * For each input argument, add its input register to prefs. 3043 * If a temp is used once, this produces a single set bit; 3044 * if a temp is used multiple times, this produces a set. 3045 */ 3046 for (i = 0; i < nb_iargs; i++) { 3047 const TCGCallArgumentLoc *loc = &info->in[i]; 3048 ts = arg_temp(op->args[nb_oargs + i]); 3049 3050 switch (loc->kind) { 3051 case TCG_CALL_ARG_NORMAL: 3052 case TCG_CALL_ARG_EXTEND_U: 3053 case TCG_CALL_ARG_EXTEND_S: 3054 if (REG_P(loc)) { 3055 tcg_regset_set_reg(*la_temp_pref(ts), 3056 tcg_target_call_iarg_regs[loc->arg_slot]); 3057 } 3058 break; 3059 default: 3060 break; 3061 } 3062 } 3063 } 3064 break; 3065 case INDEX_op_insn_start: 3066 break; 3067 case INDEX_op_discard: 3068 /* mark the temporary as dead */ 3069 ts = arg_temp(op->args[0]); 3070 ts->state = TS_DEAD; 3071 la_reset_pref(ts); 3072 break; 3073 3074 case INDEX_op_add2_i32: 3075 opc_new = INDEX_op_add_i32; 3076 goto do_addsub2; 3077 case INDEX_op_sub2_i32: 3078 opc_new = INDEX_op_sub_i32; 3079 goto do_addsub2; 3080 case INDEX_op_add2_i64: 3081 opc_new = INDEX_op_add_i64; 3082 goto do_addsub2; 3083 case INDEX_op_sub2_i64: 3084 opc_new = INDEX_op_sub_i64; 3085 do_addsub2: 3086 nb_iargs = 4; 3087 nb_oargs = 2; 3088 /* Test if the high part of the operation is dead, but not 3089 the low part. The result can be optimized to a simple 3090 add or sub. This happens often for x86_64 guest when the 3091 cpu mode is set to 32 bit. */ 3092 if (arg_temp(op->args[1])->state == TS_DEAD) { 3093 if (arg_temp(op->args[0])->state == TS_DEAD) { 3094 goto do_remove; 3095 } 3096 /* Replace the opcode and adjust the args in place, 3097 leaving 3 unused args at the end. */ 3098 op->opc = opc = opc_new; 3099 op->args[1] = op->args[2]; 3100 op->args[2] = op->args[4]; 3101 /* Fall through and mark the single-word operation live. */ 3102 nb_iargs = 2; 3103 nb_oargs = 1; 3104 } 3105 goto do_not_remove; 3106 3107 case INDEX_op_mulu2_i32: 3108 opc_new = INDEX_op_mul_i32; 3109 opc_new2 = INDEX_op_muluh_i32; 3110 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 3111 goto do_mul2; 3112 case INDEX_op_muls2_i32: 3113 opc_new = INDEX_op_mul_i32; 3114 opc_new2 = INDEX_op_mulsh_i32; 3115 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 3116 goto do_mul2; 3117 case INDEX_op_mulu2_i64: 3118 opc_new = INDEX_op_mul_i64; 3119 opc_new2 = INDEX_op_muluh_i64; 3120 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 3121 goto do_mul2; 3122 case INDEX_op_muls2_i64: 3123 opc_new = INDEX_op_mul_i64; 3124 opc_new2 = INDEX_op_mulsh_i64; 3125 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 3126 goto do_mul2; 3127 do_mul2: 3128 nb_iargs = 2; 3129 nb_oargs = 2; 3130 if (arg_temp(op->args[1])->state == TS_DEAD) { 3131 if (arg_temp(op->args[0])->state == TS_DEAD) { 3132 /* Both parts of the operation are dead. */ 3133 goto do_remove; 3134 } 3135 /* The high part of the operation is dead; generate the low. */ 3136 op->opc = opc = opc_new; 3137 op->args[1] = op->args[2]; 3138 op->args[2] = op->args[3]; 3139 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 3140 /* The low part of the operation is dead; generate the high. */ 3141 op->opc = opc = opc_new2; 3142 op->args[0] = op->args[1]; 3143 op->args[1] = op->args[2]; 3144 op->args[2] = op->args[3]; 3145 } else { 3146 goto do_not_remove; 3147 } 3148 /* Mark the single-word operation live. */ 3149 nb_oargs = 1; 3150 goto do_not_remove; 3151 3152 default: 3153 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 3154 nb_iargs = def->nb_iargs; 3155 nb_oargs = def->nb_oargs; 3156 3157 /* Test if the operation can be removed because all 3158 its outputs are dead. We assume that nb_oargs == 0 3159 implies side effects */ 3160 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 3161 for (i = 0; i < nb_oargs; i++) { 3162 if (arg_temp(op->args[i])->state != TS_DEAD) { 3163 goto do_not_remove; 3164 } 3165 } 3166 goto do_remove; 3167 } 3168 goto do_not_remove; 3169 3170 do_remove: 3171 tcg_op_remove(s, op); 3172 break; 3173 3174 do_not_remove: 3175 for (i = 0; i < nb_oargs; i++) { 3176 ts = arg_temp(op->args[i]); 3177 3178 /* Remember the preference of the uses that followed. */ 3179 if (i < ARRAY_SIZE(op->output_pref)) { 3180 op->output_pref[i] = *la_temp_pref(ts); 3181 } 3182 3183 /* Output args are dead. */ 3184 if (ts->state & TS_DEAD) { 3185 arg_life |= DEAD_ARG << i; 3186 } 3187 if (ts->state & TS_MEM) { 3188 arg_life |= SYNC_ARG << i; 3189 } 3190 ts->state = TS_DEAD; 3191 la_reset_pref(ts); 3192 } 3193 3194 /* If end of basic block, update. */ 3195 if (def->flags & TCG_OPF_BB_EXIT) { 3196 la_func_end(s, nb_globals, nb_temps); 3197 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3198 la_bb_sync(s, nb_globals, nb_temps); 3199 } else if (def->flags & TCG_OPF_BB_END) { 3200 la_bb_end(s, nb_globals, nb_temps); 3201 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3202 la_global_sync(s, nb_globals); 3203 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3204 la_cross_call(s, nb_temps); 3205 } 3206 } 3207 3208 /* Record arguments that die in this opcode. */ 3209 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3210 ts = arg_temp(op->args[i]); 3211 if (ts->state & TS_DEAD) { 3212 arg_life |= DEAD_ARG << i; 3213 } 3214 } 3215 3216 /* Input arguments are live for preceding opcodes. */ 3217 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3218 ts = arg_temp(op->args[i]); 3219 if (ts->state & TS_DEAD) { 3220 /* For operands that were dead, initially allow 3221 all regs for the type. */ 3222 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3223 ts->state &= ~TS_DEAD; 3224 } 3225 } 3226 3227 /* Incorporate constraints for this operand. */ 3228 switch (opc) { 3229 case INDEX_op_mov_i32: 3230 case INDEX_op_mov_i64: 3231 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3232 have proper constraints. That said, special case 3233 moves to propagate preferences backward. */ 3234 if (IS_DEAD_ARG(1)) { 3235 *la_temp_pref(arg_temp(op->args[0])) 3236 = *la_temp_pref(arg_temp(op->args[1])); 3237 } 3238 break; 3239 3240 default: 3241 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3242 const TCGArgConstraint *ct = &def->args_ct[i]; 3243 TCGRegSet set, *pset; 3244 3245 ts = arg_temp(op->args[i]); 3246 pset = la_temp_pref(ts); 3247 set = *pset; 3248 3249 set &= ct->regs; 3250 if (ct->ialias) { 3251 set &= output_pref(op, ct->alias_index); 3252 } 3253 /* If the combination is not possible, restart. */ 3254 if (set == 0) { 3255 set = ct->regs; 3256 } 3257 *pset = set; 3258 } 3259 break; 3260 } 3261 break; 3262 } 3263 op->life = arg_life; 3264 } 3265 } 3266 3267 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3268 static bool __attribute__((noinline)) 3269 liveness_pass_2(TCGContext *s) 3270 { 3271 int nb_globals = s->nb_globals; 3272 int nb_temps, i; 3273 bool changes = false; 3274 TCGOp *op, *op_next; 3275 3276 /* Create a temporary for each indirect global. */ 3277 for (i = 0; i < nb_globals; ++i) { 3278 TCGTemp *its = &s->temps[i]; 3279 if (its->indirect_reg) { 3280 TCGTemp *dts = tcg_temp_alloc(s); 3281 dts->type = its->type; 3282 dts->base_type = its->base_type; 3283 dts->temp_subindex = its->temp_subindex; 3284 dts->kind = TEMP_EBB; 3285 its->state_ptr = dts; 3286 } else { 3287 its->state_ptr = NULL; 3288 } 3289 /* All globals begin dead. */ 3290 its->state = TS_DEAD; 3291 } 3292 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3293 TCGTemp *its = &s->temps[i]; 3294 its->state_ptr = NULL; 3295 its->state = TS_DEAD; 3296 } 3297 3298 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3299 TCGOpcode opc = op->opc; 3300 const TCGOpDef *def = &tcg_op_defs[opc]; 3301 TCGLifeData arg_life = op->life; 3302 int nb_iargs, nb_oargs, call_flags; 3303 TCGTemp *arg_ts, *dir_ts; 3304 3305 if (opc == INDEX_op_call) { 3306 nb_oargs = TCGOP_CALLO(op); 3307 nb_iargs = TCGOP_CALLI(op); 3308 call_flags = tcg_call_flags(op); 3309 } else { 3310 nb_iargs = def->nb_iargs; 3311 nb_oargs = def->nb_oargs; 3312 3313 /* Set flags similar to how calls require. */ 3314 if (def->flags & TCG_OPF_COND_BRANCH) { 3315 /* Like reading globals: sync_globals */ 3316 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3317 } else if (def->flags & TCG_OPF_BB_END) { 3318 /* Like writing globals: save_globals */ 3319 call_flags = 0; 3320 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3321 /* Like reading globals: sync_globals */ 3322 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3323 } else { 3324 /* No effect on globals. */ 3325 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3326 TCG_CALL_NO_WRITE_GLOBALS); 3327 } 3328 } 3329 3330 /* Make sure that input arguments are available. */ 3331 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3332 arg_ts = arg_temp(op->args[i]); 3333 dir_ts = arg_ts->state_ptr; 3334 if (dir_ts && arg_ts->state == TS_DEAD) { 3335 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3336 ? INDEX_op_ld_i32 3337 : INDEX_op_ld_i64); 3338 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 3339 3340 lop->args[0] = temp_arg(dir_ts); 3341 lop->args[1] = temp_arg(arg_ts->mem_base); 3342 lop->args[2] = arg_ts->mem_offset; 3343 3344 /* Loaded, but synced with memory. */ 3345 arg_ts->state = TS_MEM; 3346 } 3347 } 3348 3349 /* Perform input replacement, and mark inputs that became dead. 3350 No action is required except keeping temp_state up to date 3351 so that we reload when needed. */ 3352 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3353 arg_ts = arg_temp(op->args[i]); 3354 dir_ts = arg_ts->state_ptr; 3355 if (dir_ts) { 3356 op->args[i] = temp_arg(dir_ts); 3357 changes = true; 3358 if (IS_DEAD_ARG(i)) { 3359 arg_ts->state = TS_DEAD; 3360 } 3361 } 3362 } 3363 3364 /* Liveness analysis should ensure that the following are 3365 all correct, for call sites and basic block end points. */ 3366 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3367 /* Nothing to do */ 3368 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3369 for (i = 0; i < nb_globals; ++i) { 3370 /* Liveness should see that globals are synced back, 3371 that is, either TS_DEAD or TS_MEM. */ 3372 arg_ts = &s->temps[i]; 3373 tcg_debug_assert(arg_ts->state_ptr == 0 3374 || arg_ts->state != 0); 3375 } 3376 } else { 3377 for (i = 0; i < nb_globals; ++i) { 3378 /* Liveness should see that globals are saved back, 3379 that is, TS_DEAD, waiting to be reloaded. */ 3380 arg_ts = &s->temps[i]; 3381 tcg_debug_assert(arg_ts->state_ptr == 0 3382 || arg_ts->state == TS_DEAD); 3383 } 3384 } 3385 3386 /* Outputs become available. */ 3387 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3388 arg_ts = arg_temp(op->args[0]); 3389 dir_ts = arg_ts->state_ptr; 3390 if (dir_ts) { 3391 op->args[0] = temp_arg(dir_ts); 3392 changes = true; 3393 3394 /* The output is now live and modified. */ 3395 arg_ts->state = 0; 3396 3397 if (NEED_SYNC_ARG(0)) { 3398 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3399 ? INDEX_op_st_i32 3400 : INDEX_op_st_i64); 3401 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3402 TCGTemp *out_ts = dir_ts; 3403 3404 if (IS_DEAD_ARG(0)) { 3405 out_ts = arg_temp(op->args[1]); 3406 arg_ts->state = TS_DEAD; 3407 tcg_op_remove(s, op); 3408 } else { 3409 arg_ts->state = TS_MEM; 3410 } 3411 3412 sop->args[0] = temp_arg(out_ts); 3413 sop->args[1] = temp_arg(arg_ts->mem_base); 3414 sop->args[2] = arg_ts->mem_offset; 3415 } else { 3416 tcg_debug_assert(!IS_DEAD_ARG(0)); 3417 } 3418 } 3419 } else { 3420 for (i = 0; i < nb_oargs; i++) { 3421 arg_ts = arg_temp(op->args[i]); 3422 dir_ts = arg_ts->state_ptr; 3423 if (!dir_ts) { 3424 continue; 3425 } 3426 op->args[i] = temp_arg(dir_ts); 3427 changes = true; 3428 3429 /* The output is now live and modified. */ 3430 arg_ts->state = 0; 3431 3432 /* Sync outputs upon their last write. */ 3433 if (NEED_SYNC_ARG(i)) { 3434 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3435 ? INDEX_op_st_i32 3436 : INDEX_op_st_i64); 3437 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3438 3439 sop->args[0] = temp_arg(dir_ts); 3440 sop->args[1] = temp_arg(arg_ts->mem_base); 3441 sop->args[2] = arg_ts->mem_offset; 3442 3443 arg_ts->state = TS_MEM; 3444 } 3445 /* Drop outputs that are dead. */ 3446 if (IS_DEAD_ARG(i)) { 3447 arg_ts->state = TS_DEAD; 3448 } 3449 } 3450 } 3451 } 3452 3453 return changes; 3454 } 3455 3456 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3457 { 3458 intptr_t off; 3459 int size, align; 3460 3461 /* When allocating an object, look at the full type. */ 3462 size = tcg_type_size(ts->base_type); 3463 switch (ts->base_type) { 3464 case TCG_TYPE_I32: 3465 align = 4; 3466 break; 3467 case TCG_TYPE_I64: 3468 case TCG_TYPE_V64: 3469 align = 8; 3470 break; 3471 case TCG_TYPE_I128: 3472 case TCG_TYPE_V128: 3473 case TCG_TYPE_V256: 3474 /* 3475 * Note that we do not require aligned storage for V256, 3476 * and that we provide alignment for I128 to match V128, 3477 * even if that's above what the host ABI requires. 3478 */ 3479 align = 16; 3480 break; 3481 default: 3482 g_assert_not_reached(); 3483 } 3484 3485 /* 3486 * Assume the stack is sufficiently aligned. 3487 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3488 * and do not require 16 byte vector alignment. This seems slightly 3489 * easier than fully parameterizing the above switch statement. 3490 */ 3491 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3492 off = ROUND_UP(s->current_frame_offset, align); 3493 3494 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3495 if (off + size > s->frame_end) { 3496 tcg_raise_tb_overflow(s); 3497 } 3498 s->current_frame_offset = off + size; 3499 #if defined(__sparc__) 3500 off += TCG_TARGET_STACK_BIAS; 3501 #endif 3502 3503 /* If the object was subdivided, assign memory to all the parts. */ 3504 if (ts->base_type != ts->type) { 3505 int part_size = tcg_type_size(ts->type); 3506 int part_count = size / part_size; 3507 3508 /* 3509 * Each part is allocated sequentially in tcg_temp_new_internal. 3510 * Jump back to the first part by subtracting the current index. 3511 */ 3512 ts -= ts->temp_subindex; 3513 for (int i = 0; i < part_count; ++i) { 3514 ts[i].mem_offset = off + i * part_size; 3515 ts[i].mem_base = s->frame_temp; 3516 ts[i].mem_allocated = 1; 3517 } 3518 } else { 3519 ts->mem_offset = off; 3520 ts->mem_base = s->frame_temp; 3521 ts->mem_allocated = 1; 3522 } 3523 } 3524 3525 /* Assign @reg to @ts, and update reg_to_temp[]. */ 3526 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 3527 { 3528 if (ts->val_type == TEMP_VAL_REG) { 3529 TCGReg old = ts->reg; 3530 tcg_debug_assert(s->reg_to_temp[old] == ts); 3531 if (old == reg) { 3532 return; 3533 } 3534 s->reg_to_temp[old] = NULL; 3535 } 3536 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3537 s->reg_to_temp[reg] = ts; 3538 ts->val_type = TEMP_VAL_REG; 3539 ts->reg = reg; 3540 } 3541 3542 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 3543 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 3544 { 3545 tcg_debug_assert(type != TEMP_VAL_REG); 3546 if (ts->val_type == TEMP_VAL_REG) { 3547 TCGReg reg = ts->reg; 3548 tcg_debug_assert(s->reg_to_temp[reg] == ts); 3549 s->reg_to_temp[reg] = NULL; 3550 } 3551 ts->val_type = type; 3552 } 3553 3554 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3555 3556 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3557 mark it free; otherwise mark it dead. */ 3558 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3559 { 3560 TCGTempVal new_type; 3561 3562 switch (ts->kind) { 3563 case TEMP_FIXED: 3564 return; 3565 case TEMP_GLOBAL: 3566 case TEMP_TB: 3567 new_type = TEMP_VAL_MEM; 3568 break; 3569 case TEMP_EBB: 3570 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3571 break; 3572 case TEMP_CONST: 3573 new_type = TEMP_VAL_CONST; 3574 break; 3575 default: 3576 g_assert_not_reached(); 3577 } 3578 set_temp_val_nonreg(s, ts, new_type); 3579 } 3580 3581 /* Mark a temporary as dead. */ 3582 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3583 { 3584 temp_free_or_dead(s, ts, 1); 3585 } 3586 3587 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3588 registers needs to be allocated to store a constant. If 'free_or_dead' 3589 is non-zero, subsequently release the temporary; if it is positive, the 3590 temp is dead; if it is negative, the temp is free. */ 3591 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3592 TCGRegSet preferred_regs, int free_or_dead) 3593 { 3594 if (!temp_readonly(ts) && !ts->mem_coherent) { 3595 if (!ts->mem_allocated) { 3596 temp_allocate_frame(s, ts); 3597 } 3598 switch (ts->val_type) { 3599 case TEMP_VAL_CONST: 3600 /* If we're going to free the temp immediately, then we won't 3601 require it later in a register, so attempt to store the 3602 constant to memory directly. */ 3603 if (free_or_dead 3604 && tcg_out_sti(s, ts->type, ts->val, 3605 ts->mem_base->reg, ts->mem_offset)) { 3606 break; 3607 } 3608 temp_load(s, ts, tcg_target_available_regs[ts->type], 3609 allocated_regs, preferred_regs); 3610 /* fallthrough */ 3611 3612 case TEMP_VAL_REG: 3613 tcg_out_st(s, ts->type, ts->reg, 3614 ts->mem_base->reg, ts->mem_offset); 3615 break; 3616 3617 case TEMP_VAL_MEM: 3618 break; 3619 3620 case TEMP_VAL_DEAD: 3621 default: 3622 tcg_abort(); 3623 } 3624 ts->mem_coherent = 1; 3625 } 3626 if (free_or_dead) { 3627 temp_free_or_dead(s, ts, free_or_dead); 3628 } 3629 } 3630 3631 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3632 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3633 { 3634 TCGTemp *ts = s->reg_to_temp[reg]; 3635 if (ts != NULL) { 3636 temp_sync(s, ts, allocated_regs, 0, -1); 3637 } 3638 } 3639 3640 /** 3641 * tcg_reg_alloc: 3642 * @required_regs: Set of registers in which we must allocate. 3643 * @allocated_regs: Set of registers which must be avoided. 3644 * @preferred_regs: Set of registers we should prefer. 3645 * @rev: True if we search the registers in "indirect" order. 3646 * 3647 * The allocated register must be in @required_regs & ~@allocated_regs, 3648 * but if we can put it in @preferred_regs we may save a move later. 3649 */ 3650 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3651 TCGRegSet allocated_regs, 3652 TCGRegSet preferred_regs, bool rev) 3653 { 3654 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3655 TCGRegSet reg_ct[2]; 3656 const int *order; 3657 3658 reg_ct[1] = required_regs & ~allocated_regs; 3659 tcg_debug_assert(reg_ct[1] != 0); 3660 reg_ct[0] = reg_ct[1] & preferred_regs; 3661 3662 /* Skip the preferred_regs option if it cannot be satisfied, 3663 or if the preference made no difference. */ 3664 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3665 3666 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3667 3668 /* Try free registers, preferences first. */ 3669 for (j = f; j < 2; j++) { 3670 TCGRegSet set = reg_ct[j]; 3671 3672 if (tcg_regset_single(set)) { 3673 /* One register in the set. */ 3674 TCGReg reg = tcg_regset_first(set); 3675 if (s->reg_to_temp[reg] == NULL) { 3676 return reg; 3677 } 3678 } else { 3679 for (i = 0; i < n; i++) { 3680 TCGReg reg = order[i]; 3681 if (s->reg_to_temp[reg] == NULL && 3682 tcg_regset_test_reg(set, reg)) { 3683 return reg; 3684 } 3685 } 3686 } 3687 } 3688 3689 /* We must spill something. */ 3690 for (j = f; j < 2; j++) { 3691 TCGRegSet set = reg_ct[j]; 3692 3693 if (tcg_regset_single(set)) { 3694 /* One register in the set. */ 3695 TCGReg reg = tcg_regset_first(set); 3696 tcg_reg_free(s, reg, allocated_regs); 3697 return reg; 3698 } else { 3699 for (i = 0; i < n; i++) { 3700 TCGReg reg = order[i]; 3701 if (tcg_regset_test_reg(set, reg)) { 3702 tcg_reg_free(s, reg, allocated_regs); 3703 return reg; 3704 } 3705 } 3706 } 3707 } 3708 3709 tcg_abort(); 3710 } 3711 3712 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 3713 TCGRegSet allocated_regs, 3714 TCGRegSet preferred_regs, bool rev) 3715 { 3716 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3717 TCGRegSet reg_ct[2]; 3718 const int *order; 3719 3720 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 3721 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 3722 tcg_debug_assert(reg_ct[1] != 0); 3723 reg_ct[0] = reg_ct[1] & preferred_regs; 3724 3725 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3726 3727 /* 3728 * Skip the preferred_regs option if it cannot be satisfied, 3729 * or if the preference made no difference. 3730 */ 3731 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3732 3733 /* 3734 * Minimize the number of flushes by looking for 2 free registers first, 3735 * then a single flush, then two flushes. 3736 */ 3737 for (fmin = 2; fmin >= 0; fmin--) { 3738 for (j = k; j < 2; j++) { 3739 TCGRegSet set = reg_ct[j]; 3740 3741 for (i = 0; i < n; i++) { 3742 TCGReg reg = order[i]; 3743 3744 if (tcg_regset_test_reg(set, reg)) { 3745 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 3746 if (f >= fmin) { 3747 tcg_reg_free(s, reg, allocated_regs); 3748 tcg_reg_free(s, reg + 1, allocated_regs); 3749 return reg; 3750 } 3751 } 3752 } 3753 } 3754 } 3755 tcg_abort(); 3756 } 3757 3758 /* Make sure the temporary is in a register. If needed, allocate the register 3759 from DESIRED while avoiding ALLOCATED. */ 3760 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3761 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3762 { 3763 TCGReg reg; 3764 3765 switch (ts->val_type) { 3766 case TEMP_VAL_REG: 3767 return; 3768 case TEMP_VAL_CONST: 3769 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3770 preferred_regs, ts->indirect_base); 3771 if (ts->type <= TCG_TYPE_I64) { 3772 tcg_out_movi(s, ts->type, reg, ts->val); 3773 } else { 3774 uint64_t val = ts->val; 3775 MemOp vece = MO_64; 3776 3777 /* 3778 * Find the minimal vector element that matches the constant. 3779 * The targets will, in general, have to do this search anyway, 3780 * do this generically. 3781 */ 3782 if (val == dup_const(MO_8, val)) { 3783 vece = MO_8; 3784 } else if (val == dup_const(MO_16, val)) { 3785 vece = MO_16; 3786 } else if (val == dup_const(MO_32, val)) { 3787 vece = MO_32; 3788 } 3789 3790 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3791 } 3792 ts->mem_coherent = 0; 3793 break; 3794 case TEMP_VAL_MEM: 3795 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3796 preferred_regs, ts->indirect_base); 3797 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3798 ts->mem_coherent = 1; 3799 break; 3800 case TEMP_VAL_DEAD: 3801 default: 3802 tcg_abort(); 3803 } 3804 set_temp_val_reg(s, ts, reg); 3805 } 3806 3807 /* Save a temporary to memory. 'allocated_regs' is used in case a 3808 temporary registers needs to be allocated to store a constant. */ 3809 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3810 { 3811 /* The liveness analysis already ensures that globals are back 3812 in memory. Keep an tcg_debug_assert for safety. */ 3813 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3814 } 3815 3816 /* save globals to their canonical location and assume they can be 3817 modified be the following code. 'allocated_regs' is used in case a 3818 temporary registers needs to be allocated to store a constant. */ 3819 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3820 { 3821 int i, n; 3822 3823 for (i = 0, n = s->nb_globals; i < n; i++) { 3824 temp_save(s, &s->temps[i], allocated_regs); 3825 } 3826 } 3827 3828 /* sync globals to their canonical location and assume they can be 3829 read by the following code. 'allocated_regs' is used in case a 3830 temporary registers needs to be allocated to store a constant. */ 3831 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3832 { 3833 int i, n; 3834 3835 for (i = 0, n = s->nb_globals; i < n; i++) { 3836 TCGTemp *ts = &s->temps[i]; 3837 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3838 || ts->kind == TEMP_FIXED 3839 || ts->mem_coherent); 3840 } 3841 } 3842 3843 /* at the end of a basic block, we assume all temporaries are dead and 3844 all globals are stored at their canonical location. */ 3845 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3846 { 3847 int i; 3848 3849 for (i = s->nb_globals; i < s->nb_temps; i++) { 3850 TCGTemp *ts = &s->temps[i]; 3851 3852 switch (ts->kind) { 3853 case TEMP_TB: 3854 temp_save(s, ts, allocated_regs); 3855 break; 3856 case TEMP_EBB: 3857 /* The liveness analysis already ensures that temps are dead. 3858 Keep an tcg_debug_assert for safety. */ 3859 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3860 break; 3861 case TEMP_CONST: 3862 /* Similarly, we should have freed any allocated register. */ 3863 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3864 break; 3865 default: 3866 g_assert_not_reached(); 3867 } 3868 } 3869 3870 save_globals(s, allocated_regs); 3871 } 3872 3873 /* 3874 * At a conditional branch, we assume all temporaries are dead unless 3875 * explicitly live-across-conditional-branch; all globals and local 3876 * temps are synced to their location. 3877 */ 3878 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3879 { 3880 sync_globals(s, allocated_regs); 3881 3882 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3883 TCGTemp *ts = &s->temps[i]; 3884 /* 3885 * The liveness analysis already ensures that temps are dead. 3886 * Keep tcg_debug_asserts for safety. 3887 */ 3888 switch (ts->kind) { 3889 case TEMP_TB: 3890 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3891 break; 3892 case TEMP_EBB: 3893 case TEMP_CONST: 3894 break; 3895 default: 3896 g_assert_not_reached(); 3897 } 3898 } 3899 } 3900 3901 /* 3902 * Specialized code generation for INDEX_op_mov_* with a constant. 3903 */ 3904 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3905 tcg_target_ulong val, TCGLifeData arg_life, 3906 TCGRegSet preferred_regs) 3907 { 3908 /* ENV should not be modified. */ 3909 tcg_debug_assert(!temp_readonly(ots)); 3910 3911 /* The movi is not explicitly generated here. */ 3912 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 3913 ots->val = val; 3914 ots->mem_coherent = 0; 3915 if (NEED_SYNC_ARG(0)) { 3916 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3917 } else if (IS_DEAD_ARG(0)) { 3918 temp_dead(s, ots); 3919 } 3920 } 3921 3922 /* 3923 * Specialized code generation for INDEX_op_mov_*. 3924 */ 3925 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3926 { 3927 const TCGLifeData arg_life = op->life; 3928 TCGRegSet allocated_regs, preferred_regs; 3929 TCGTemp *ts, *ots; 3930 TCGType otype, itype; 3931 TCGReg oreg, ireg; 3932 3933 allocated_regs = s->reserved_regs; 3934 preferred_regs = output_pref(op, 0); 3935 ots = arg_temp(op->args[0]); 3936 ts = arg_temp(op->args[1]); 3937 3938 /* ENV should not be modified. */ 3939 tcg_debug_assert(!temp_readonly(ots)); 3940 3941 /* Note that otype != itype for no-op truncation. */ 3942 otype = ots->type; 3943 itype = ts->type; 3944 3945 if (ts->val_type == TEMP_VAL_CONST) { 3946 /* propagate constant or generate sti */ 3947 tcg_target_ulong val = ts->val; 3948 if (IS_DEAD_ARG(1)) { 3949 temp_dead(s, ts); 3950 } 3951 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3952 return; 3953 } 3954 3955 /* If the source value is in memory we're going to be forced 3956 to have it in a register in order to perform the copy. Copy 3957 the SOURCE value into its own register first, that way we 3958 don't have to reload SOURCE the next time it is used. */ 3959 if (ts->val_type == TEMP_VAL_MEM) { 3960 temp_load(s, ts, tcg_target_available_regs[itype], 3961 allocated_regs, preferred_regs); 3962 } 3963 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3964 ireg = ts->reg; 3965 3966 if (IS_DEAD_ARG(0)) { 3967 /* mov to a non-saved dead register makes no sense (even with 3968 liveness analysis disabled). */ 3969 tcg_debug_assert(NEED_SYNC_ARG(0)); 3970 if (!ots->mem_allocated) { 3971 temp_allocate_frame(s, ots); 3972 } 3973 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 3974 if (IS_DEAD_ARG(1)) { 3975 temp_dead(s, ts); 3976 } 3977 temp_dead(s, ots); 3978 return; 3979 } 3980 3981 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 3982 /* 3983 * The mov can be suppressed. Kill input first, so that it 3984 * is unlinked from reg_to_temp, then set the output to the 3985 * reg that we saved from the input. 3986 */ 3987 temp_dead(s, ts); 3988 oreg = ireg; 3989 } else { 3990 if (ots->val_type == TEMP_VAL_REG) { 3991 oreg = ots->reg; 3992 } else { 3993 /* Make sure to not spill the input register during allocation. */ 3994 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3995 allocated_regs | ((TCGRegSet)1 << ireg), 3996 preferred_regs, ots->indirect_base); 3997 } 3998 if (!tcg_out_mov(s, otype, oreg, ireg)) { 3999 /* 4000 * Cross register class move not supported. 4001 * Store the source register into the destination slot 4002 * and leave the destination temp as TEMP_VAL_MEM. 4003 */ 4004 assert(!temp_readonly(ots)); 4005 if (!ts->mem_allocated) { 4006 temp_allocate_frame(s, ots); 4007 } 4008 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4009 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4010 ots->mem_coherent = 1; 4011 return; 4012 } 4013 } 4014 set_temp_val_reg(s, ots, oreg); 4015 ots->mem_coherent = 0; 4016 4017 if (NEED_SYNC_ARG(0)) { 4018 temp_sync(s, ots, allocated_regs, 0, 0); 4019 } 4020 } 4021 4022 /* 4023 * Specialized code generation for INDEX_op_dup_vec. 4024 */ 4025 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4026 { 4027 const TCGLifeData arg_life = op->life; 4028 TCGRegSet dup_out_regs, dup_in_regs; 4029 TCGTemp *its, *ots; 4030 TCGType itype, vtype; 4031 unsigned vece; 4032 int lowpart_ofs; 4033 bool ok; 4034 4035 ots = arg_temp(op->args[0]); 4036 its = arg_temp(op->args[1]); 4037 4038 /* ENV should not be modified. */ 4039 tcg_debug_assert(!temp_readonly(ots)); 4040 4041 itype = its->type; 4042 vece = TCGOP_VECE(op); 4043 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4044 4045 if (its->val_type == TEMP_VAL_CONST) { 4046 /* Propagate constant via movi -> dupi. */ 4047 tcg_target_ulong val = its->val; 4048 if (IS_DEAD_ARG(1)) { 4049 temp_dead(s, its); 4050 } 4051 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4052 return; 4053 } 4054 4055 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4056 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 4057 4058 /* Allocate the output register now. */ 4059 if (ots->val_type != TEMP_VAL_REG) { 4060 TCGRegSet allocated_regs = s->reserved_regs; 4061 TCGReg oreg; 4062 4063 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4064 /* Make sure to not spill the input register. */ 4065 tcg_regset_set_reg(allocated_regs, its->reg); 4066 } 4067 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4068 output_pref(op, 0), ots->indirect_base); 4069 set_temp_val_reg(s, ots, oreg); 4070 } 4071 4072 switch (its->val_type) { 4073 case TEMP_VAL_REG: 4074 /* 4075 * The dup constriaints must be broad, covering all possible VECE. 4076 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4077 * to fail, indicating that extra moves are required for that case. 4078 */ 4079 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4080 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4081 goto done; 4082 } 4083 /* Try again from memory or a vector input register. */ 4084 } 4085 if (!its->mem_coherent) { 4086 /* 4087 * The input register is not synced, and so an extra store 4088 * would be required to use memory. Attempt an integer-vector 4089 * register move first. We do not have a TCGRegSet for this. 4090 */ 4091 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4092 break; 4093 } 4094 /* Sync the temp back to its slot and load from there. */ 4095 temp_sync(s, its, s->reserved_regs, 0, 0); 4096 } 4097 /* fall through */ 4098 4099 case TEMP_VAL_MEM: 4100 lowpart_ofs = 0; 4101 if (HOST_BIG_ENDIAN) { 4102 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 4103 } 4104 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 4105 its->mem_offset + lowpart_ofs)) { 4106 goto done; 4107 } 4108 /* Load the input into the destination vector register. */ 4109 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 4110 break; 4111 4112 default: 4113 g_assert_not_reached(); 4114 } 4115 4116 /* We now have a vector input register, so dup must succeed. */ 4117 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 4118 tcg_debug_assert(ok); 4119 4120 done: 4121 ots->mem_coherent = 0; 4122 if (IS_DEAD_ARG(1)) { 4123 temp_dead(s, its); 4124 } 4125 if (NEED_SYNC_ARG(0)) { 4126 temp_sync(s, ots, s->reserved_regs, 0, 0); 4127 } 4128 if (IS_DEAD_ARG(0)) { 4129 temp_dead(s, ots); 4130 } 4131 } 4132 4133 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 4134 { 4135 const TCGLifeData arg_life = op->life; 4136 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 4137 TCGRegSet i_allocated_regs; 4138 TCGRegSet o_allocated_regs; 4139 int i, k, nb_iargs, nb_oargs; 4140 TCGReg reg; 4141 TCGArg arg; 4142 const TCGArgConstraint *arg_ct; 4143 TCGTemp *ts; 4144 TCGArg new_args[TCG_MAX_OP_ARGS]; 4145 int const_args[TCG_MAX_OP_ARGS]; 4146 4147 nb_oargs = def->nb_oargs; 4148 nb_iargs = def->nb_iargs; 4149 4150 /* copy constants */ 4151 memcpy(new_args + nb_oargs + nb_iargs, 4152 op->args + nb_oargs + nb_iargs, 4153 sizeof(TCGArg) * def->nb_cargs); 4154 4155 i_allocated_regs = s->reserved_regs; 4156 o_allocated_regs = s->reserved_regs; 4157 4158 /* satisfy input constraints */ 4159 for (k = 0; k < nb_iargs; k++) { 4160 TCGRegSet i_preferred_regs, i_required_regs; 4161 bool allocate_new_reg, copyto_new_reg; 4162 TCGTemp *ts2; 4163 int i1, i2; 4164 4165 i = def->args_ct[nb_oargs + k].sort_index; 4166 arg = op->args[i]; 4167 arg_ct = &def->args_ct[i]; 4168 ts = arg_temp(arg); 4169 4170 if (ts->val_type == TEMP_VAL_CONST 4171 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 4172 /* constant is OK for instruction */ 4173 const_args[i] = 1; 4174 new_args[i] = ts->val; 4175 continue; 4176 } 4177 4178 reg = ts->reg; 4179 i_preferred_regs = 0; 4180 i_required_regs = arg_ct->regs; 4181 allocate_new_reg = false; 4182 copyto_new_reg = false; 4183 4184 switch (arg_ct->pair) { 4185 case 0: /* not paired */ 4186 if (arg_ct->ialias) { 4187 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4188 4189 /* 4190 * If the input is readonly, then it cannot also be an 4191 * output and aliased to itself. If the input is not 4192 * dead after the instruction, we must allocate a new 4193 * register and move it. 4194 */ 4195 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 4196 allocate_new_reg = true; 4197 } else if (ts->val_type == TEMP_VAL_REG) { 4198 /* 4199 * Check if the current register has already been 4200 * allocated for another input. 4201 */ 4202 allocate_new_reg = 4203 tcg_regset_test_reg(i_allocated_regs, reg); 4204 } 4205 } 4206 if (!allocate_new_reg) { 4207 temp_load(s, ts, i_required_regs, i_allocated_regs, 4208 i_preferred_regs); 4209 reg = ts->reg; 4210 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 4211 } 4212 if (allocate_new_reg) { 4213 /* 4214 * Allocate a new register matching the constraint 4215 * and move the temporary register into it. 4216 */ 4217 temp_load(s, ts, tcg_target_available_regs[ts->type], 4218 i_allocated_regs, 0); 4219 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 4220 i_preferred_regs, ts->indirect_base); 4221 copyto_new_reg = true; 4222 } 4223 break; 4224 4225 case 1: 4226 /* First of an input pair; if i1 == i2, the second is an output. */ 4227 i1 = i; 4228 i2 = arg_ct->pair_index; 4229 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 4230 4231 /* 4232 * It is easier to default to allocating a new pair 4233 * and to identify a few cases where it's not required. 4234 */ 4235 if (arg_ct->ialias) { 4236 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4237 if (IS_DEAD_ARG(i1) && 4238 IS_DEAD_ARG(i2) && 4239 !temp_readonly(ts) && 4240 ts->val_type == TEMP_VAL_REG && 4241 ts->reg < TCG_TARGET_NB_REGS - 1 && 4242 tcg_regset_test_reg(i_required_regs, reg) && 4243 !tcg_regset_test_reg(i_allocated_regs, reg) && 4244 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 4245 (ts2 4246 ? ts2->val_type == TEMP_VAL_REG && 4247 ts2->reg == reg + 1 && 4248 !temp_readonly(ts2) 4249 : s->reg_to_temp[reg + 1] == NULL)) { 4250 break; 4251 } 4252 } else { 4253 /* Without aliasing, the pair must also be an input. */ 4254 tcg_debug_assert(ts2); 4255 if (ts->val_type == TEMP_VAL_REG && 4256 ts2->val_type == TEMP_VAL_REG && 4257 ts2->reg == reg + 1 && 4258 tcg_regset_test_reg(i_required_regs, reg)) { 4259 break; 4260 } 4261 } 4262 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 4263 0, ts->indirect_base); 4264 goto do_pair; 4265 4266 case 2: /* pair second */ 4267 reg = new_args[arg_ct->pair_index] + 1; 4268 goto do_pair; 4269 4270 case 3: /* ialias with second output, no first input */ 4271 tcg_debug_assert(arg_ct->ialias); 4272 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4273 4274 if (IS_DEAD_ARG(i) && 4275 !temp_readonly(ts) && 4276 ts->val_type == TEMP_VAL_REG && 4277 reg > 0 && 4278 s->reg_to_temp[reg - 1] == NULL && 4279 tcg_regset_test_reg(i_required_regs, reg) && 4280 !tcg_regset_test_reg(i_allocated_regs, reg) && 4281 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 4282 tcg_regset_set_reg(i_allocated_regs, reg - 1); 4283 break; 4284 } 4285 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 4286 i_allocated_regs, 0, 4287 ts->indirect_base); 4288 tcg_regset_set_reg(i_allocated_regs, reg); 4289 reg += 1; 4290 goto do_pair; 4291 4292 do_pair: 4293 /* 4294 * If an aliased input is not dead after the instruction, 4295 * we must allocate a new register and move it. 4296 */ 4297 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 4298 TCGRegSet t_allocated_regs = i_allocated_regs; 4299 4300 /* 4301 * Because of the alias, and the continued life, make sure 4302 * that the temp is somewhere *other* than the reg pair, 4303 * and we get a copy in reg. 4304 */ 4305 tcg_regset_set_reg(t_allocated_regs, reg); 4306 tcg_regset_set_reg(t_allocated_regs, reg + 1); 4307 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 4308 /* If ts was already in reg, copy it somewhere else. */ 4309 TCGReg nr; 4310 bool ok; 4311 4312 tcg_debug_assert(ts->kind != TEMP_FIXED); 4313 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 4314 t_allocated_regs, 0, ts->indirect_base); 4315 ok = tcg_out_mov(s, ts->type, nr, reg); 4316 tcg_debug_assert(ok); 4317 4318 set_temp_val_reg(s, ts, nr); 4319 } else { 4320 temp_load(s, ts, tcg_target_available_regs[ts->type], 4321 t_allocated_regs, 0); 4322 copyto_new_reg = true; 4323 } 4324 } else { 4325 /* Preferably allocate to reg, otherwise copy. */ 4326 i_required_regs = (TCGRegSet)1 << reg; 4327 temp_load(s, ts, i_required_regs, i_allocated_regs, 4328 i_preferred_regs); 4329 copyto_new_reg = ts->reg != reg; 4330 } 4331 break; 4332 4333 default: 4334 g_assert_not_reached(); 4335 } 4336 4337 if (copyto_new_reg) { 4338 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4339 /* 4340 * Cross register class move not supported. Sync the 4341 * temp back to its slot and load from there. 4342 */ 4343 temp_sync(s, ts, i_allocated_regs, 0, 0); 4344 tcg_out_ld(s, ts->type, reg, 4345 ts->mem_base->reg, ts->mem_offset); 4346 } 4347 } 4348 new_args[i] = reg; 4349 const_args[i] = 0; 4350 tcg_regset_set_reg(i_allocated_regs, reg); 4351 } 4352 4353 /* mark dead temporaries and free the associated registers */ 4354 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4355 if (IS_DEAD_ARG(i)) { 4356 temp_dead(s, arg_temp(op->args[i])); 4357 } 4358 } 4359 4360 if (def->flags & TCG_OPF_COND_BRANCH) { 4361 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4362 } else if (def->flags & TCG_OPF_BB_END) { 4363 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4364 } else { 4365 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4366 /* XXX: permit generic clobber register list ? */ 4367 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4368 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4369 tcg_reg_free(s, i, i_allocated_regs); 4370 } 4371 } 4372 } 4373 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4374 /* sync globals if the op has side effects and might trigger 4375 an exception. */ 4376 sync_globals(s, i_allocated_regs); 4377 } 4378 4379 /* satisfy the output constraints */ 4380 for(k = 0; k < nb_oargs; k++) { 4381 i = def->args_ct[k].sort_index; 4382 arg = op->args[i]; 4383 arg_ct = &def->args_ct[i]; 4384 ts = arg_temp(arg); 4385 4386 /* ENV should not be modified. */ 4387 tcg_debug_assert(!temp_readonly(ts)); 4388 4389 switch (arg_ct->pair) { 4390 case 0: /* not paired */ 4391 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4392 reg = new_args[arg_ct->alias_index]; 4393 } else if (arg_ct->newreg) { 4394 reg = tcg_reg_alloc(s, arg_ct->regs, 4395 i_allocated_regs | o_allocated_regs, 4396 output_pref(op, k), ts->indirect_base); 4397 } else { 4398 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4399 output_pref(op, k), ts->indirect_base); 4400 } 4401 break; 4402 4403 case 1: /* first of pair */ 4404 tcg_debug_assert(!arg_ct->newreg); 4405 if (arg_ct->oalias) { 4406 reg = new_args[arg_ct->alias_index]; 4407 break; 4408 } 4409 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 4410 output_pref(op, k), ts->indirect_base); 4411 break; 4412 4413 case 2: /* second of pair */ 4414 tcg_debug_assert(!arg_ct->newreg); 4415 if (arg_ct->oalias) { 4416 reg = new_args[arg_ct->alias_index]; 4417 } else { 4418 reg = new_args[arg_ct->pair_index] + 1; 4419 } 4420 break; 4421 4422 case 3: /* first of pair, aliasing with a second input */ 4423 tcg_debug_assert(!arg_ct->newreg); 4424 reg = new_args[arg_ct->pair_index] - 1; 4425 break; 4426 4427 default: 4428 g_assert_not_reached(); 4429 } 4430 tcg_regset_set_reg(o_allocated_regs, reg); 4431 set_temp_val_reg(s, ts, reg); 4432 ts->mem_coherent = 0; 4433 new_args[i] = reg; 4434 } 4435 } 4436 4437 /* emit instruction */ 4438 if (def->flags & TCG_OPF_VECTOR) { 4439 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4440 new_args, const_args); 4441 } else { 4442 tcg_out_op(s, op->opc, new_args, const_args); 4443 } 4444 4445 /* move the outputs in the correct register if needed */ 4446 for(i = 0; i < nb_oargs; i++) { 4447 ts = arg_temp(op->args[i]); 4448 4449 /* ENV should not be modified. */ 4450 tcg_debug_assert(!temp_readonly(ts)); 4451 4452 if (NEED_SYNC_ARG(i)) { 4453 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4454 } else if (IS_DEAD_ARG(i)) { 4455 temp_dead(s, ts); 4456 } 4457 } 4458 } 4459 4460 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4461 { 4462 const TCGLifeData arg_life = op->life; 4463 TCGTemp *ots, *itsl, *itsh; 4464 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4465 4466 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4467 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4468 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4469 4470 ots = arg_temp(op->args[0]); 4471 itsl = arg_temp(op->args[1]); 4472 itsh = arg_temp(op->args[2]); 4473 4474 /* ENV should not be modified. */ 4475 tcg_debug_assert(!temp_readonly(ots)); 4476 4477 /* Allocate the output register now. */ 4478 if (ots->val_type != TEMP_VAL_REG) { 4479 TCGRegSet allocated_regs = s->reserved_regs; 4480 TCGRegSet dup_out_regs = 4481 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4482 TCGReg oreg; 4483 4484 /* Make sure to not spill the input registers. */ 4485 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4486 tcg_regset_set_reg(allocated_regs, itsl->reg); 4487 } 4488 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4489 tcg_regset_set_reg(allocated_regs, itsh->reg); 4490 } 4491 4492 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4493 output_pref(op, 0), ots->indirect_base); 4494 set_temp_val_reg(s, ots, oreg); 4495 } 4496 4497 /* Promote dup2 of immediates to dupi_vec. */ 4498 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 4499 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 4500 MemOp vece = MO_64; 4501 4502 if (val == dup_const(MO_8, val)) { 4503 vece = MO_8; 4504 } else if (val == dup_const(MO_16, val)) { 4505 vece = MO_16; 4506 } else if (val == dup_const(MO_32, val)) { 4507 vece = MO_32; 4508 } 4509 4510 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 4511 goto done; 4512 } 4513 4514 /* If the two inputs form one 64-bit value, try dupm_vec. */ 4515 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 4516 itsh->temp_subindex == !HOST_BIG_ENDIAN && 4517 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 4518 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 4519 4520 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 4521 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 4522 4523 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 4524 its->mem_base->reg, its->mem_offset)) { 4525 goto done; 4526 } 4527 } 4528 4529 /* Fall back to generic expansion. */ 4530 return false; 4531 4532 done: 4533 ots->mem_coherent = 0; 4534 if (IS_DEAD_ARG(1)) { 4535 temp_dead(s, itsl); 4536 } 4537 if (IS_DEAD_ARG(2)) { 4538 temp_dead(s, itsh); 4539 } 4540 if (NEED_SYNC_ARG(0)) { 4541 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 4542 } else if (IS_DEAD_ARG(0)) { 4543 temp_dead(s, ots); 4544 } 4545 return true; 4546 } 4547 4548 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 4549 TCGRegSet allocated_regs) 4550 { 4551 if (ts->val_type == TEMP_VAL_REG) { 4552 if (ts->reg != reg) { 4553 tcg_reg_free(s, reg, allocated_regs); 4554 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4555 /* 4556 * Cross register class move not supported. Sync the 4557 * temp back to its slot and load from there. 4558 */ 4559 temp_sync(s, ts, allocated_regs, 0, 0); 4560 tcg_out_ld(s, ts->type, reg, 4561 ts->mem_base->reg, ts->mem_offset); 4562 } 4563 } 4564 } else { 4565 TCGRegSet arg_set = 0; 4566 4567 tcg_reg_free(s, reg, allocated_regs); 4568 tcg_regset_set_reg(arg_set, reg); 4569 temp_load(s, ts, arg_set, allocated_regs, 0); 4570 } 4571 } 4572 4573 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts, 4574 TCGRegSet allocated_regs) 4575 { 4576 /* 4577 * When the destination is on the stack, load up the temp and store. 4578 * If there are many call-saved registers, the temp might live to 4579 * see another use; otherwise it'll be discarded. 4580 */ 4581 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 4582 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 4583 TCG_TARGET_CALL_STACK_OFFSET + 4584 stk_slot * sizeof(tcg_target_long)); 4585 } 4586 4587 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 4588 TCGTemp *ts, TCGRegSet *allocated_regs) 4589 { 4590 if (REG_P(l)) { 4591 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 4592 load_arg_reg(s, reg, ts, *allocated_regs); 4593 tcg_regset_set_reg(*allocated_regs, reg); 4594 } else { 4595 load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs), 4596 ts, *allocated_regs); 4597 } 4598 } 4599 4600 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base, 4601 intptr_t ref_off, TCGRegSet *allocated_regs) 4602 { 4603 TCGReg reg; 4604 int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 4605 4606 if (stk_slot < 0) { 4607 reg = tcg_target_call_iarg_regs[arg_slot]; 4608 tcg_reg_free(s, reg, *allocated_regs); 4609 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4610 tcg_regset_set_reg(*allocated_regs, reg); 4611 } else { 4612 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 4613 *allocated_regs, 0, false); 4614 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4615 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 4616 TCG_TARGET_CALL_STACK_OFFSET 4617 + stk_slot * sizeof(tcg_target_long)); 4618 } 4619 } 4620 4621 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 4622 { 4623 const int nb_oargs = TCGOP_CALLO(op); 4624 const int nb_iargs = TCGOP_CALLI(op); 4625 const TCGLifeData arg_life = op->life; 4626 const TCGHelperInfo *info = tcg_call_info(op); 4627 TCGRegSet allocated_regs = s->reserved_regs; 4628 int i; 4629 4630 /* 4631 * Move inputs into place in reverse order, 4632 * so that we place stacked arguments first. 4633 */ 4634 for (i = nb_iargs - 1; i >= 0; --i) { 4635 const TCGCallArgumentLoc *loc = &info->in[i]; 4636 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 4637 4638 switch (loc->kind) { 4639 case TCG_CALL_ARG_NORMAL: 4640 case TCG_CALL_ARG_EXTEND_U: 4641 case TCG_CALL_ARG_EXTEND_S: 4642 load_arg_normal(s, loc, ts, &allocated_regs); 4643 break; 4644 case TCG_CALL_ARG_BY_REF: 4645 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4646 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 4647 TCG_TARGET_CALL_STACK_OFFSET 4648 + loc->ref_slot * sizeof(tcg_target_long), 4649 &allocated_regs); 4650 break; 4651 case TCG_CALL_ARG_BY_REF_N: 4652 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4653 break; 4654 default: 4655 g_assert_not_reached(); 4656 } 4657 } 4658 4659 /* Mark dead temporaries and free the associated registers. */ 4660 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4661 if (IS_DEAD_ARG(i)) { 4662 temp_dead(s, arg_temp(op->args[i])); 4663 } 4664 } 4665 4666 /* Clobber call registers. */ 4667 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4668 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4669 tcg_reg_free(s, i, allocated_regs); 4670 } 4671 } 4672 4673 /* 4674 * Save globals if they might be written by the helper, 4675 * sync them if they might be read. 4676 */ 4677 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 4678 /* Nothing to do */ 4679 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 4680 sync_globals(s, allocated_regs); 4681 } else { 4682 save_globals(s, allocated_regs); 4683 } 4684 4685 /* 4686 * If the ABI passes a pointer to the returned struct as the first 4687 * argument, load that now. Pass a pointer to the output home slot. 4688 */ 4689 if (info->out_kind == TCG_CALL_RET_BY_REF) { 4690 TCGTemp *ts = arg_temp(op->args[0]); 4691 4692 if (!ts->mem_allocated) { 4693 temp_allocate_frame(s, ts); 4694 } 4695 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 4696 } 4697 4698 tcg_out_call(s, tcg_call_func(op), info); 4699 4700 /* Assign output registers and emit moves if needed. */ 4701 switch (info->out_kind) { 4702 case TCG_CALL_RET_NORMAL: 4703 for (i = 0; i < nb_oargs; i++) { 4704 TCGTemp *ts = arg_temp(op->args[i]); 4705 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 4706 4707 /* ENV should not be modified. */ 4708 tcg_debug_assert(!temp_readonly(ts)); 4709 4710 set_temp_val_reg(s, ts, reg); 4711 ts->mem_coherent = 0; 4712 } 4713 break; 4714 4715 case TCG_CALL_RET_BY_VEC: 4716 { 4717 TCGTemp *ts = arg_temp(op->args[0]); 4718 4719 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 4720 tcg_debug_assert(ts->temp_subindex == 0); 4721 if (!ts->mem_allocated) { 4722 temp_allocate_frame(s, ts); 4723 } 4724 tcg_out_st(s, TCG_TYPE_V128, 4725 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 4726 ts->mem_base->reg, ts->mem_offset); 4727 } 4728 /* fall through to mark all parts in memory */ 4729 4730 case TCG_CALL_RET_BY_REF: 4731 /* The callee has performed a write through the reference. */ 4732 for (i = 0; i < nb_oargs; i++) { 4733 TCGTemp *ts = arg_temp(op->args[i]); 4734 ts->val_type = TEMP_VAL_MEM; 4735 } 4736 break; 4737 4738 default: 4739 g_assert_not_reached(); 4740 } 4741 4742 /* Flush or discard output registers as needed. */ 4743 for (i = 0; i < nb_oargs; i++) { 4744 TCGTemp *ts = arg_temp(op->args[i]); 4745 if (NEED_SYNC_ARG(i)) { 4746 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 4747 } else if (IS_DEAD_ARG(i)) { 4748 temp_dead(s, ts); 4749 } 4750 } 4751 } 4752 4753 #ifdef CONFIG_PROFILER 4754 4755 /* avoid copy/paste errors */ 4756 #define PROF_ADD(to, from, field) \ 4757 do { \ 4758 (to)->field += qatomic_read(&((from)->field)); \ 4759 } while (0) 4760 4761 #define PROF_MAX(to, from, field) \ 4762 do { \ 4763 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4764 if (val__ > (to)->field) { \ 4765 (to)->field = val__; \ 4766 } \ 4767 } while (0) 4768 4769 /* Pass in a zero'ed @prof */ 4770 static inline 4771 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4772 { 4773 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4774 unsigned int i; 4775 4776 for (i = 0; i < n_ctxs; i++) { 4777 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4778 const TCGProfile *orig = &s->prof; 4779 4780 if (counters) { 4781 PROF_ADD(prof, orig, cpu_exec_time); 4782 PROF_ADD(prof, orig, tb_count1); 4783 PROF_ADD(prof, orig, tb_count); 4784 PROF_ADD(prof, orig, op_count); 4785 PROF_MAX(prof, orig, op_count_max); 4786 PROF_ADD(prof, orig, temp_count); 4787 PROF_MAX(prof, orig, temp_count_max); 4788 PROF_ADD(prof, orig, del_op_count); 4789 PROF_ADD(prof, orig, code_in_len); 4790 PROF_ADD(prof, orig, code_out_len); 4791 PROF_ADD(prof, orig, search_out_len); 4792 PROF_ADD(prof, orig, interm_time); 4793 PROF_ADD(prof, orig, code_time); 4794 PROF_ADD(prof, orig, la_time); 4795 PROF_ADD(prof, orig, opt_time); 4796 PROF_ADD(prof, orig, restore_count); 4797 PROF_ADD(prof, orig, restore_time); 4798 } 4799 if (table) { 4800 int i; 4801 4802 for (i = 0; i < NB_OPS; i++) { 4803 PROF_ADD(prof, orig, table_op_count[i]); 4804 } 4805 } 4806 } 4807 } 4808 4809 #undef PROF_ADD 4810 #undef PROF_MAX 4811 4812 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4813 { 4814 tcg_profile_snapshot(prof, true, false); 4815 } 4816 4817 static void tcg_profile_snapshot_table(TCGProfile *prof) 4818 { 4819 tcg_profile_snapshot(prof, false, true); 4820 } 4821 4822 void tcg_dump_op_count(GString *buf) 4823 { 4824 TCGProfile prof = {}; 4825 int i; 4826 4827 tcg_profile_snapshot_table(&prof); 4828 for (i = 0; i < NB_OPS; i++) { 4829 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 4830 prof.table_op_count[i]); 4831 } 4832 } 4833 4834 int64_t tcg_cpu_exec_time(void) 4835 { 4836 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4837 unsigned int i; 4838 int64_t ret = 0; 4839 4840 for (i = 0; i < n_ctxs; i++) { 4841 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4842 const TCGProfile *prof = &s->prof; 4843 4844 ret += qatomic_read(&prof->cpu_exec_time); 4845 } 4846 return ret; 4847 } 4848 #else 4849 void tcg_dump_op_count(GString *buf) 4850 { 4851 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4852 } 4853 4854 int64_t tcg_cpu_exec_time(void) 4855 { 4856 error_report("%s: TCG profiler not compiled", __func__); 4857 exit(EXIT_FAILURE); 4858 } 4859 #endif 4860 4861 4862 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) 4863 { 4864 #ifdef CONFIG_PROFILER 4865 TCGProfile *prof = &s->prof; 4866 #endif 4867 int i, num_insns; 4868 TCGOp *op; 4869 4870 #ifdef CONFIG_PROFILER 4871 { 4872 int n = 0; 4873 4874 QTAILQ_FOREACH(op, &s->ops, link) { 4875 n++; 4876 } 4877 qatomic_set(&prof->op_count, prof->op_count + n); 4878 if (n > prof->op_count_max) { 4879 qatomic_set(&prof->op_count_max, n); 4880 } 4881 4882 n = s->nb_temps; 4883 qatomic_set(&prof->temp_count, prof->temp_count + n); 4884 if (n > prof->temp_count_max) { 4885 qatomic_set(&prof->temp_count_max, n); 4886 } 4887 } 4888 #endif 4889 4890 #ifdef DEBUG_DISAS 4891 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4892 && qemu_log_in_addr_range(pc_start))) { 4893 FILE *logfile = qemu_log_trylock(); 4894 if (logfile) { 4895 fprintf(logfile, "OP:\n"); 4896 tcg_dump_ops(s, logfile, false); 4897 fprintf(logfile, "\n"); 4898 qemu_log_unlock(logfile); 4899 } 4900 } 4901 #endif 4902 4903 #ifdef CONFIG_DEBUG_TCG 4904 /* Ensure all labels referenced have been emitted. */ 4905 { 4906 TCGLabel *l; 4907 bool error = false; 4908 4909 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4910 if (unlikely(!l->present) && l->refs) { 4911 qemu_log_mask(CPU_LOG_TB_OP, 4912 "$L%d referenced but not present.\n", l->id); 4913 error = true; 4914 } 4915 } 4916 assert(!error); 4917 } 4918 #endif 4919 4920 #ifdef CONFIG_PROFILER 4921 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4922 #endif 4923 4924 #ifdef USE_TCG_OPTIMIZATIONS 4925 tcg_optimize(s); 4926 #endif 4927 4928 #ifdef CONFIG_PROFILER 4929 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4930 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4931 #endif 4932 4933 reachable_code_pass(s); 4934 liveness_pass_0(s); 4935 liveness_pass_1(s); 4936 4937 if (s->nb_indirects > 0) { 4938 #ifdef DEBUG_DISAS 4939 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4940 && qemu_log_in_addr_range(pc_start))) { 4941 FILE *logfile = qemu_log_trylock(); 4942 if (logfile) { 4943 fprintf(logfile, "OP before indirect lowering:\n"); 4944 tcg_dump_ops(s, logfile, false); 4945 fprintf(logfile, "\n"); 4946 qemu_log_unlock(logfile); 4947 } 4948 } 4949 #endif 4950 /* Replace indirect temps with direct temps. */ 4951 if (liveness_pass_2(s)) { 4952 /* If changes were made, re-run liveness. */ 4953 liveness_pass_1(s); 4954 } 4955 } 4956 4957 #ifdef CONFIG_PROFILER 4958 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4959 #endif 4960 4961 #ifdef DEBUG_DISAS 4962 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4963 && qemu_log_in_addr_range(pc_start))) { 4964 FILE *logfile = qemu_log_trylock(); 4965 if (logfile) { 4966 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 4967 tcg_dump_ops(s, logfile, true); 4968 fprintf(logfile, "\n"); 4969 qemu_log_unlock(logfile); 4970 } 4971 } 4972 #endif 4973 4974 /* Initialize goto_tb jump offsets. */ 4975 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 4976 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 4977 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 4978 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 4979 4980 tcg_reg_alloc_start(s); 4981 4982 /* 4983 * Reset the buffer pointers when restarting after overflow. 4984 * TODO: Move this into translate-all.c with the rest of the 4985 * buffer management. Having only this done here is confusing. 4986 */ 4987 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 4988 s->code_ptr = s->code_buf; 4989 4990 #ifdef TCG_TARGET_NEED_LDST_LABELS 4991 QSIMPLEQ_INIT(&s->ldst_labels); 4992 #endif 4993 #ifdef TCG_TARGET_NEED_POOL_LABELS 4994 s->pool_labels = NULL; 4995 #endif 4996 4997 num_insns = -1; 4998 QTAILQ_FOREACH(op, &s->ops, link) { 4999 TCGOpcode opc = op->opc; 5000 5001 #ifdef CONFIG_PROFILER 5002 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 5003 #endif 5004 5005 switch (opc) { 5006 case INDEX_op_mov_i32: 5007 case INDEX_op_mov_i64: 5008 case INDEX_op_mov_vec: 5009 tcg_reg_alloc_mov(s, op); 5010 break; 5011 case INDEX_op_dup_vec: 5012 tcg_reg_alloc_dup(s, op); 5013 break; 5014 case INDEX_op_insn_start: 5015 if (num_insns >= 0) { 5016 size_t off = tcg_current_code_size(s); 5017 s->gen_insn_end_off[num_insns] = off; 5018 /* Assert that we do not overflow our stored offset. */ 5019 assert(s->gen_insn_end_off[num_insns] == off); 5020 } 5021 num_insns++; 5022 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 5023 target_ulong a; 5024 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 5025 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 5026 #else 5027 a = op->args[i]; 5028 #endif 5029 s->gen_insn_data[num_insns][i] = a; 5030 } 5031 break; 5032 case INDEX_op_discard: 5033 temp_dead(s, arg_temp(op->args[0])); 5034 break; 5035 case INDEX_op_set_label: 5036 tcg_reg_alloc_bb_end(s, s->reserved_regs); 5037 tcg_out_label(s, arg_label(op->args[0])); 5038 break; 5039 case INDEX_op_call: 5040 tcg_reg_alloc_call(s, op); 5041 break; 5042 case INDEX_op_exit_tb: 5043 tcg_out_exit_tb(s, op->args[0]); 5044 break; 5045 case INDEX_op_goto_tb: 5046 tcg_out_goto_tb(s, op->args[0]); 5047 break; 5048 case INDEX_op_dup2_vec: 5049 if (tcg_reg_alloc_dup2(s, op)) { 5050 break; 5051 } 5052 /* fall through */ 5053 default: 5054 /* Sanity check that we've not introduced any unhandled opcodes. */ 5055 tcg_debug_assert(tcg_op_supported(opc)); 5056 /* Note: in order to speed up the code, it would be much 5057 faster to have specialized register allocator functions for 5058 some common argument patterns */ 5059 tcg_reg_alloc_op(s, op); 5060 break; 5061 } 5062 /* Test for (pending) buffer overflow. The assumption is that any 5063 one operation beginning below the high water mark cannot overrun 5064 the buffer completely. Thus we can test for overflow after 5065 generating code without having to check during generation. */ 5066 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 5067 return -1; 5068 } 5069 /* Test for TB overflow, as seen by gen_insn_end_off. */ 5070 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 5071 return -2; 5072 } 5073 } 5074 tcg_debug_assert(num_insns >= 0); 5075 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 5076 5077 /* Generate TB finalization at the end of block */ 5078 #ifdef TCG_TARGET_NEED_LDST_LABELS 5079 i = tcg_out_ldst_finalize(s); 5080 if (i < 0) { 5081 return i; 5082 } 5083 #endif 5084 #ifdef TCG_TARGET_NEED_POOL_LABELS 5085 i = tcg_out_pool_finalize(s); 5086 if (i < 0) { 5087 return i; 5088 } 5089 #endif 5090 if (!tcg_resolve_relocs(s)) { 5091 return -2; 5092 } 5093 5094 #ifndef CONFIG_TCG_INTERPRETER 5095 /* flush instruction cache */ 5096 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 5097 (uintptr_t)s->code_buf, 5098 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 5099 #endif 5100 5101 return tcg_current_code_size(s); 5102 } 5103 5104 #ifdef CONFIG_PROFILER 5105 void tcg_dump_info(GString *buf) 5106 { 5107 TCGProfile prof = {}; 5108 const TCGProfile *s; 5109 int64_t tb_count; 5110 int64_t tb_div_count; 5111 int64_t tot; 5112 5113 tcg_profile_snapshot_counters(&prof); 5114 s = &prof; 5115 tb_count = s->tb_count; 5116 tb_div_count = tb_count ? tb_count : 1; 5117 tot = s->interm_time + s->code_time; 5118 5119 g_string_append_printf(buf, "JIT cycles %" PRId64 5120 " (%0.3f s at 2.4 GHz)\n", 5121 tot, tot / 2.4e9); 5122 g_string_append_printf(buf, "translated TBs %" PRId64 5123 " (aborted=%" PRId64 " %0.1f%%)\n", 5124 tb_count, s->tb_count1 - tb_count, 5125 (double)(s->tb_count1 - s->tb_count) 5126 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 5127 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 5128 (double)s->op_count / tb_div_count, s->op_count_max); 5129 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 5130 (double)s->del_op_count / tb_div_count); 5131 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 5132 (double)s->temp_count / tb_div_count, 5133 s->temp_count_max); 5134 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 5135 (double)s->code_out_len / tb_div_count); 5136 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 5137 (double)s->search_out_len / tb_div_count); 5138 5139 g_string_append_printf(buf, "cycles/op %0.1f\n", 5140 s->op_count ? (double)tot / s->op_count : 0); 5141 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 5142 s->code_in_len ? (double)tot / s->code_in_len : 0); 5143 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 5144 s->code_out_len ? (double)tot / s->code_out_len : 0); 5145 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 5146 s->search_out_len ? 5147 (double)tot / s->search_out_len : 0); 5148 if (tot == 0) { 5149 tot = 1; 5150 } 5151 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 5152 (double)s->interm_time / tot * 100.0); 5153 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 5154 (double)s->code_time / tot * 100.0); 5155 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 5156 (double)s->opt_time / (s->code_time ? 5157 s->code_time : 1) 5158 * 100.0); 5159 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 5160 (double)s->la_time / (s->code_time ? 5161 s->code_time : 1) * 100.0); 5162 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 5163 s->restore_count); 5164 g_string_append_printf(buf, " avg cycles %0.1f\n", 5165 s->restore_count ? 5166 (double)s->restore_time / s->restore_count : 0); 5167 } 5168 #else 5169 void tcg_dump_info(GString *buf) 5170 { 5171 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 5172 } 5173 #endif 5174 5175 #ifdef ELF_HOST_MACHINE 5176 /* In order to use this feature, the backend needs to do three things: 5177 5178 (1) Define ELF_HOST_MACHINE to indicate both what value to 5179 put into the ELF image and to indicate support for the feature. 5180 5181 (2) Define tcg_register_jit. This should create a buffer containing 5182 the contents of a .debug_frame section that describes the post- 5183 prologue unwind info for the tcg machine. 5184 5185 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 5186 */ 5187 5188 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 5189 typedef enum { 5190 JIT_NOACTION = 0, 5191 JIT_REGISTER_FN, 5192 JIT_UNREGISTER_FN 5193 } jit_actions_t; 5194 5195 struct jit_code_entry { 5196 struct jit_code_entry *next_entry; 5197 struct jit_code_entry *prev_entry; 5198 const void *symfile_addr; 5199 uint64_t symfile_size; 5200 }; 5201 5202 struct jit_descriptor { 5203 uint32_t version; 5204 uint32_t action_flag; 5205 struct jit_code_entry *relevant_entry; 5206 struct jit_code_entry *first_entry; 5207 }; 5208 5209 void __jit_debug_register_code(void) __attribute__((noinline)); 5210 void __jit_debug_register_code(void) 5211 { 5212 asm(""); 5213 } 5214 5215 /* Must statically initialize the version, because GDB may check 5216 the version before we can set it. */ 5217 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 5218 5219 /* End GDB interface. */ 5220 5221 static int find_string(const char *strtab, const char *str) 5222 { 5223 const char *p = strtab + 1; 5224 5225 while (1) { 5226 if (strcmp(p, str) == 0) { 5227 return p - strtab; 5228 } 5229 p += strlen(p) + 1; 5230 } 5231 } 5232 5233 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 5234 const void *debug_frame, 5235 size_t debug_frame_size) 5236 { 5237 struct __attribute__((packed)) DebugInfo { 5238 uint32_t len; 5239 uint16_t version; 5240 uint32_t abbrev; 5241 uint8_t ptr_size; 5242 uint8_t cu_die; 5243 uint16_t cu_lang; 5244 uintptr_t cu_low_pc; 5245 uintptr_t cu_high_pc; 5246 uint8_t fn_die; 5247 char fn_name[16]; 5248 uintptr_t fn_low_pc; 5249 uintptr_t fn_high_pc; 5250 uint8_t cu_eoc; 5251 }; 5252 5253 struct ElfImage { 5254 ElfW(Ehdr) ehdr; 5255 ElfW(Phdr) phdr; 5256 ElfW(Shdr) shdr[7]; 5257 ElfW(Sym) sym[2]; 5258 struct DebugInfo di; 5259 uint8_t da[24]; 5260 char str[80]; 5261 }; 5262 5263 struct ElfImage *img; 5264 5265 static const struct ElfImage img_template = { 5266 .ehdr = { 5267 .e_ident[EI_MAG0] = ELFMAG0, 5268 .e_ident[EI_MAG1] = ELFMAG1, 5269 .e_ident[EI_MAG2] = ELFMAG2, 5270 .e_ident[EI_MAG3] = ELFMAG3, 5271 .e_ident[EI_CLASS] = ELF_CLASS, 5272 .e_ident[EI_DATA] = ELF_DATA, 5273 .e_ident[EI_VERSION] = EV_CURRENT, 5274 .e_type = ET_EXEC, 5275 .e_machine = ELF_HOST_MACHINE, 5276 .e_version = EV_CURRENT, 5277 .e_phoff = offsetof(struct ElfImage, phdr), 5278 .e_shoff = offsetof(struct ElfImage, shdr), 5279 .e_ehsize = sizeof(ElfW(Shdr)), 5280 .e_phentsize = sizeof(ElfW(Phdr)), 5281 .e_phnum = 1, 5282 .e_shentsize = sizeof(ElfW(Shdr)), 5283 .e_shnum = ARRAY_SIZE(img->shdr), 5284 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 5285 #ifdef ELF_HOST_FLAGS 5286 .e_flags = ELF_HOST_FLAGS, 5287 #endif 5288 #ifdef ELF_OSABI 5289 .e_ident[EI_OSABI] = ELF_OSABI, 5290 #endif 5291 }, 5292 .phdr = { 5293 .p_type = PT_LOAD, 5294 .p_flags = PF_X, 5295 }, 5296 .shdr = { 5297 [0] = { .sh_type = SHT_NULL }, 5298 /* Trick: The contents of code_gen_buffer are not present in 5299 this fake ELF file; that got allocated elsewhere. Therefore 5300 we mark .text as SHT_NOBITS (similar to .bss) so that readers 5301 will not look for contents. We can record any address. */ 5302 [1] = { /* .text */ 5303 .sh_type = SHT_NOBITS, 5304 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 5305 }, 5306 [2] = { /* .debug_info */ 5307 .sh_type = SHT_PROGBITS, 5308 .sh_offset = offsetof(struct ElfImage, di), 5309 .sh_size = sizeof(struct DebugInfo), 5310 }, 5311 [3] = { /* .debug_abbrev */ 5312 .sh_type = SHT_PROGBITS, 5313 .sh_offset = offsetof(struct ElfImage, da), 5314 .sh_size = sizeof(img->da), 5315 }, 5316 [4] = { /* .debug_frame */ 5317 .sh_type = SHT_PROGBITS, 5318 .sh_offset = sizeof(struct ElfImage), 5319 }, 5320 [5] = { /* .symtab */ 5321 .sh_type = SHT_SYMTAB, 5322 .sh_offset = offsetof(struct ElfImage, sym), 5323 .sh_size = sizeof(img->sym), 5324 .sh_info = 1, 5325 .sh_link = ARRAY_SIZE(img->shdr) - 1, 5326 .sh_entsize = sizeof(ElfW(Sym)), 5327 }, 5328 [6] = { /* .strtab */ 5329 .sh_type = SHT_STRTAB, 5330 .sh_offset = offsetof(struct ElfImage, str), 5331 .sh_size = sizeof(img->str), 5332 } 5333 }, 5334 .sym = { 5335 [1] = { /* code_gen_buffer */ 5336 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 5337 .st_shndx = 1, 5338 } 5339 }, 5340 .di = { 5341 .len = sizeof(struct DebugInfo) - 4, 5342 .version = 2, 5343 .ptr_size = sizeof(void *), 5344 .cu_die = 1, 5345 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 5346 .fn_die = 2, 5347 .fn_name = "code_gen_buffer" 5348 }, 5349 .da = { 5350 1, /* abbrev number (the cu) */ 5351 0x11, 1, /* DW_TAG_compile_unit, has children */ 5352 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 5353 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5354 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5355 0, 0, /* end of abbrev */ 5356 2, /* abbrev number (the fn) */ 5357 0x2e, 0, /* DW_TAG_subprogram, no children */ 5358 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 5359 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5360 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5361 0, 0, /* end of abbrev */ 5362 0 /* no more abbrev */ 5363 }, 5364 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 5365 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 5366 }; 5367 5368 /* We only need a single jit entry; statically allocate it. */ 5369 static struct jit_code_entry one_entry; 5370 5371 uintptr_t buf = (uintptr_t)buf_ptr; 5372 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 5373 DebugFrameHeader *dfh; 5374 5375 img = g_malloc(img_size); 5376 *img = img_template; 5377 5378 img->phdr.p_vaddr = buf; 5379 img->phdr.p_paddr = buf; 5380 img->phdr.p_memsz = buf_size; 5381 5382 img->shdr[1].sh_name = find_string(img->str, ".text"); 5383 img->shdr[1].sh_addr = buf; 5384 img->shdr[1].sh_size = buf_size; 5385 5386 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 5387 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 5388 5389 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 5390 img->shdr[4].sh_size = debug_frame_size; 5391 5392 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 5393 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 5394 5395 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 5396 img->sym[1].st_value = buf; 5397 img->sym[1].st_size = buf_size; 5398 5399 img->di.cu_low_pc = buf; 5400 img->di.cu_high_pc = buf + buf_size; 5401 img->di.fn_low_pc = buf; 5402 img->di.fn_high_pc = buf + buf_size; 5403 5404 dfh = (DebugFrameHeader *)(img + 1); 5405 memcpy(dfh, debug_frame, debug_frame_size); 5406 dfh->fde.func_start = buf; 5407 dfh->fde.func_len = buf_size; 5408 5409 #ifdef DEBUG_JIT 5410 /* Enable this block to be able to debug the ELF image file creation. 5411 One can use readelf, objdump, or other inspection utilities. */ 5412 { 5413 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 5414 FILE *f = fopen(jit, "w+b"); 5415 if (f) { 5416 if (fwrite(img, img_size, 1, f) != img_size) { 5417 /* Avoid stupid unused return value warning for fwrite. */ 5418 } 5419 fclose(f); 5420 } 5421 } 5422 #endif 5423 5424 one_entry.symfile_addr = img; 5425 one_entry.symfile_size = img_size; 5426 5427 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 5428 __jit_debug_descriptor.relevant_entry = &one_entry; 5429 __jit_debug_descriptor.first_entry = &one_entry; 5430 __jit_debug_register_code(); 5431 } 5432 #else 5433 /* No support for the feature. Provide the entry point expected by exec.c, 5434 and implement the internal function we declared earlier. */ 5435 5436 static void tcg_register_jit_int(const void *buf, size_t size, 5437 const void *debug_frame, 5438 size_t debug_frame_size) 5439 { 5440 } 5441 5442 void tcg_register_jit(const void *buf, size_t buf_size) 5443 { 5444 } 5445 #endif /* ELF_HOST_MACHINE */ 5446 5447 #if !TCG_TARGET_MAYBE_vec 5448 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 5449 { 5450 g_assert_not_reached(); 5451 } 5452 #endif 5453