1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/cacheflush.h" 38 #include "qemu/cacheinfo.h" 39 #include "qemu/timer.h" 40 41 /* Note: the long term plan is to reduce the dependencies on the QEMU 42 CPU definitions. Currently they are used for qemu_ld/st 43 instructions */ 44 #define NO_CPU_IO_DEFS 45 46 #include "exec/exec-all.h" 47 #include "tcg/tcg-op.h" 48 49 #if UINTPTR_MAX == UINT32_MAX 50 # define ELF_CLASS ELFCLASS32 51 #else 52 # define ELF_CLASS ELFCLASS64 53 #endif 54 #if HOST_BIG_ENDIAN 55 # define ELF_DATA ELFDATA2MSB 56 #else 57 # define ELF_DATA ELFDATA2LSB 58 #endif 59 60 #include "elf.h" 61 #include "exec/log.h" 62 #include "tcg/tcg-ldst.h" 63 #include "tcg-internal.h" 64 #include "accel/tcg/perf.h" 65 66 /* Forward declarations for functions declared in tcg-target.c.inc and 67 used here. */ 68 static void tcg_target_init(TCGContext *s); 69 static void tcg_target_qemu_prologue(TCGContext *s); 70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 71 intptr_t value, intptr_t addend); 72 73 /* The CIE and FDE header definitions will be common to all hosts. */ 74 typedef struct { 75 uint32_t len __attribute__((aligned((sizeof(void *))))); 76 uint32_t id; 77 uint8_t version; 78 char augmentation[1]; 79 uint8_t code_align; 80 uint8_t data_align; 81 uint8_t return_column; 82 } DebugFrameCIE; 83 84 typedef struct QEMU_PACKED { 85 uint32_t len __attribute__((aligned((sizeof(void *))))); 86 uint32_t cie_offset; 87 uintptr_t func_start; 88 uintptr_t func_len; 89 } DebugFrameFDEHeader; 90 91 typedef struct QEMU_PACKED { 92 DebugFrameCIE cie; 93 DebugFrameFDEHeader fde; 94 } DebugFrameHeader; 95 96 static void tcg_register_jit_int(const void *buf, size_t size, 97 const void *debug_frame, 98 size_t debug_frame_size) 99 __attribute__((unused)); 100 101 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 103 intptr_t arg2); 104 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 105 static void tcg_out_movi(TCGContext *s, TCGType type, 106 TCGReg ret, tcg_target_long arg); 107 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 108 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 109 static void tcg_out_goto_tb(TCGContext *s, int which); 110 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 111 const TCGArg args[TCG_MAX_OP_ARGS], 112 const int const_args[TCG_MAX_OP_ARGS]); 113 #if TCG_TARGET_MAYBE_vec 114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 115 TCGReg dst, TCGReg src); 116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 117 TCGReg dst, TCGReg base, intptr_t offset); 118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 119 TCGReg dst, int64_t arg); 120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 121 unsigned vecl, unsigned vece, 122 const TCGArg args[TCG_MAX_OP_ARGS], 123 const int const_args[TCG_MAX_OP_ARGS]); 124 #else 125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 126 TCGReg dst, TCGReg src) 127 { 128 g_assert_not_reached(); 129 } 130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 131 TCGReg dst, TCGReg base, intptr_t offset) 132 { 133 g_assert_not_reached(); 134 } 135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 136 TCGReg dst, int64_t arg) 137 { 138 g_assert_not_reached(); 139 } 140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 141 unsigned vecl, unsigned vece, 142 const TCGArg args[TCG_MAX_OP_ARGS], 143 const int const_args[TCG_MAX_OP_ARGS]) 144 { 145 g_assert_not_reached(); 146 } 147 #endif 148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 149 intptr_t arg2); 150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 151 TCGReg base, intptr_t ofs); 152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 153 const TCGHelperInfo *info); 154 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 155 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 156 #ifdef TCG_TARGET_NEED_LDST_LABELS 157 static int tcg_out_ldst_finalize(TCGContext *s); 158 #endif 159 160 TCGContext tcg_init_ctx; 161 __thread TCGContext *tcg_ctx; 162 163 TCGContext **tcg_ctxs; 164 unsigned int tcg_cur_ctxs; 165 unsigned int tcg_max_ctxs; 166 TCGv_env cpu_env = 0; 167 const void *tcg_code_gen_epilogue; 168 uintptr_t tcg_splitwx_diff; 169 170 #ifndef CONFIG_TCG_INTERPRETER 171 tcg_prologue_fn *tcg_qemu_tb_exec; 172 #endif 173 174 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 175 static TCGRegSet tcg_target_call_clobber_regs; 176 177 #if TCG_TARGET_INSN_UNIT_SIZE == 1 178 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 179 { 180 *s->code_ptr++ = v; 181 } 182 183 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 184 uint8_t v) 185 { 186 *p = v; 187 } 188 #endif 189 190 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 191 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 192 { 193 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 194 *s->code_ptr++ = v; 195 } else { 196 tcg_insn_unit *p = s->code_ptr; 197 memcpy(p, &v, sizeof(v)); 198 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 199 } 200 } 201 202 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 203 uint16_t v) 204 { 205 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 206 *p = v; 207 } else { 208 memcpy(p, &v, sizeof(v)); 209 } 210 } 211 #endif 212 213 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 214 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 215 { 216 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 217 *s->code_ptr++ = v; 218 } else { 219 tcg_insn_unit *p = s->code_ptr; 220 memcpy(p, &v, sizeof(v)); 221 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 222 } 223 } 224 225 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 226 uint32_t v) 227 { 228 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 229 *p = v; 230 } else { 231 memcpy(p, &v, sizeof(v)); 232 } 233 } 234 #endif 235 236 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 237 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 238 { 239 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 240 *s->code_ptr++ = v; 241 } else { 242 tcg_insn_unit *p = s->code_ptr; 243 memcpy(p, &v, sizeof(v)); 244 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 245 } 246 } 247 248 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 249 uint64_t v) 250 { 251 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 252 *p = v; 253 } else { 254 memcpy(p, &v, sizeof(v)); 255 } 256 } 257 #endif 258 259 /* label relocation processing */ 260 261 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 262 TCGLabel *l, intptr_t addend) 263 { 264 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 265 266 r->type = type; 267 r->ptr = code_ptr; 268 r->addend = addend; 269 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 270 } 271 272 static void tcg_out_label(TCGContext *s, TCGLabel *l) 273 { 274 tcg_debug_assert(!l->has_value); 275 l->has_value = 1; 276 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 277 } 278 279 TCGLabel *gen_new_label(void) 280 { 281 TCGContext *s = tcg_ctx; 282 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 283 284 memset(l, 0, sizeof(TCGLabel)); 285 l->id = s->nb_labels++; 286 QSIMPLEQ_INIT(&l->branches); 287 QSIMPLEQ_INIT(&l->relocs); 288 289 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 290 291 return l; 292 } 293 294 static bool tcg_resolve_relocs(TCGContext *s) 295 { 296 TCGLabel *l; 297 298 QSIMPLEQ_FOREACH(l, &s->labels, next) { 299 TCGRelocation *r; 300 uintptr_t value = l->u.value; 301 302 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 303 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 304 return false; 305 } 306 } 307 } 308 return true; 309 } 310 311 static void set_jmp_reset_offset(TCGContext *s, int which) 312 { 313 /* 314 * We will check for overflow at the end of the opcode loop in 315 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 316 */ 317 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 318 } 319 320 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 321 { 322 /* 323 * We will check for overflow at the end of the opcode loop in 324 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 325 */ 326 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 327 } 328 329 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 330 { 331 /* 332 * Return the read-execute version of the pointer, for the benefit 333 * of any pc-relative addressing mode. 334 */ 335 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 336 } 337 338 /* Signal overflow, starting over with fewer guest insns. */ 339 static G_NORETURN 340 void tcg_raise_tb_overflow(TCGContext *s) 341 { 342 siglongjmp(s->jmp_trans, -2); 343 } 344 345 #define C_PFX1(P, A) P##A 346 #define C_PFX2(P, A, B) P##A##_##B 347 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 348 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 349 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 350 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 351 352 /* Define an enumeration for the various combinations. */ 353 354 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 355 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 356 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 357 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 358 359 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 360 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 361 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 362 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 363 364 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 365 366 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 367 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 368 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 369 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 370 371 typedef enum { 372 #include "tcg-target-con-set.h" 373 } TCGConstraintSetIndex; 374 375 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 376 377 #undef C_O0_I1 378 #undef C_O0_I2 379 #undef C_O0_I3 380 #undef C_O0_I4 381 #undef C_O1_I1 382 #undef C_O1_I2 383 #undef C_O1_I3 384 #undef C_O1_I4 385 #undef C_N1_I2 386 #undef C_O2_I1 387 #undef C_O2_I2 388 #undef C_O2_I3 389 #undef C_O2_I4 390 391 /* Put all of the constraint sets into an array, indexed by the enum. */ 392 393 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 394 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 395 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 396 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 397 398 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 399 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 400 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 401 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 402 403 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 404 405 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 406 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 407 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 408 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 409 410 static const TCGTargetOpDef constraint_sets[] = { 411 #include "tcg-target-con-set.h" 412 }; 413 414 415 #undef C_O0_I1 416 #undef C_O0_I2 417 #undef C_O0_I3 418 #undef C_O0_I4 419 #undef C_O1_I1 420 #undef C_O1_I2 421 #undef C_O1_I3 422 #undef C_O1_I4 423 #undef C_N1_I2 424 #undef C_O2_I1 425 #undef C_O2_I2 426 #undef C_O2_I3 427 #undef C_O2_I4 428 429 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 430 431 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 432 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 433 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 434 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 435 436 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 437 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 438 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 439 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 440 441 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 442 443 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 444 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 445 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 446 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 447 448 #include "tcg-target.c.inc" 449 450 static void alloc_tcg_plugin_context(TCGContext *s) 451 { 452 #ifdef CONFIG_PLUGIN 453 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 454 s->plugin_tb->insns = 455 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 456 #endif 457 } 458 459 /* 460 * All TCG threads except the parent (i.e. the one that called tcg_context_init 461 * and registered the target's TCG globals) must register with this function 462 * before initiating translation. 463 * 464 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 465 * of tcg_region_init() for the reasoning behind this. 466 * 467 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 468 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 469 * is not used anymore for translation once this function is called. 470 * 471 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 472 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 473 */ 474 #ifdef CONFIG_USER_ONLY 475 void tcg_register_thread(void) 476 { 477 tcg_ctx = &tcg_init_ctx; 478 } 479 #else 480 void tcg_register_thread(void) 481 { 482 TCGContext *s = g_malloc(sizeof(*s)); 483 unsigned int i, n; 484 485 *s = tcg_init_ctx; 486 487 /* Relink mem_base. */ 488 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 489 if (tcg_init_ctx.temps[i].mem_base) { 490 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 491 tcg_debug_assert(b >= 0 && b < n); 492 s->temps[i].mem_base = &s->temps[b]; 493 } 494 } 495 496 /* Claim an entry in tcg_ctxs */ 497 n = qatomic_fetch_inc(&tcg_cur_ctxs); 498 g_assert(n < tcg_max_ctxs); 499 qatomic_set(&tcg_ctxs[n], s); 500 501 if (n > 0) { 502 alloc_tcg_plugin_context(s); 503 tcg_region_initial_alloc(s); 504 } 505 506 tcg_ctx = s; 507 } 508 #endif /* !CONFIG_USER_ONLY */ 509 510 /* pool based memory allocation */ 511 void *tcg_malloc_internal(TCGContext *s, int size) 512 { 513 TCGPool *p; 514 int pool_size; 515 516 if (size > TCG_POOL_CHUNK_SIZE) { 517 /* big malloc: insert a new pool (XXX: could optimize) */ 518 p = g_malloc(sizeof(TCGPool) + size); 519 p->size = size; 520 p->next = s->pool_first_large; 521 s->pool_first_large = p; 522 return p->data; 523 } else { 524 p = s->pool_current; 525 if (!p) { 526 p = s->pool_first; 527 if (!p) 528 goto new_pool; 529 } else { 530 if (!p->next) { 531 new_pool: 532 pool_size = TCG_POOL_CHUNK_SIZE; 533 p = g_malloc(sizeof(TCGPool) + pool_size); 534 p->size = pool_size; 535 p->next = NULL; 536 if (s->pool_current) { 537 s->pool_current->next = p; 538 } else { 539 s->pool_first = p; 540 } 541 } else { 542 p = p->next; 543 } 544 } 545 } 546 s->pool_current = p; 547 s->pool_cur = p->data + size; 548 s->pool_end = p->data + p->size; 549 return p->data; 550 } 551 552 void tcg_pool_reset(TCGContext *s) 553 { 554 TCGPool *p, *t; 555 for (p = s->pool_first_large; p; p = t) { 556 t = p->next; 557 g_free(p); 558 } 559 s->pool_first_large = NULL; 560 s->pool_cur = s->pool_end = NULL; 561 s->pool_current = NULL; 562 } 563 564 #include "exec/helper-proto.h" 565 566 static TCGHelperInfo all_helpers[] = { 567 #include "exec/helper-tcg.h" 568 }; 569 static GHashTable *helper_table; 570 571 #ifdef CONFIG_TCG_INTERPRETER 572 static ffi_type *typecode_to_ffi(int argmask) 573 { 574 /* 575 * libffi does not support __int128_t, so we have forced Int128 576 * to use the structure definition instead of the builtin type. 577 */ 578 static ffi_type *ffi_type_i128_elements[3] = { 579 &ffi_type_uint64, 580 &ffi_type_uint64, 581 NULL 582 }; 583 static ffi_type ffi_type_i128 = { 584 .size = 16, 585 .alignment = __alignof__(Int128), 586 .type = FFI_TYPE_STRUCT, 587 .elements = ffi_type_i128_elements, 588 }; 589 590 switch (argmask) { 591 case dh_typecode_void: 592 return &ffi_type_void; 593 case dh_typecode_i32: 594 return &ffi_type_uint32; 595 case dh_typecode_s32: 596 return &ffi_type_sint32; 597 case dh_typecode_i64: 598 return &ffi_type_uint64; 599 case dh_typecode_s64: 600 return &ffi_type_sint64; 601 case dh_typecode_ptr: 602 return &ffi_type_pointer; 603 case dh_typecode_i128: 604 return &ffi_type_i128; 605 } 606 g_assert_not_reached(); 607 } 608 609 static void init_ffi_layouts(void) 610 { 611 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 612 GHashTable *ffi_table = g_hash_table_new(NULL, NULL); 613 614 for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 615 TCGHelperInfo *info = &all_helpers[i]; 616 unsigned typemask = info->typemask; 617 gpointer hash = (gpointer)(uintptr_t)typemask; 618 struct { 619 ffi_cif cif; 620 ffi_type *args[]; 621 } *ca; 622 ffi_status status; 623 int nargs; 624 ffi_cif *cif; 625 626 cif = g_hash_table_lookup(ffi_table, hash); 627 if (cif) { 628 info->cif = cif; 629 continue; 630 } 631 632 /* Ignoring the return type, find the last non-zero field. */ 633 nargs = 32 - clz32(typemask >> 3); 634 nargs = DIV_ROUND_UP(nargs, 3); 635 assert(nargs <= MAX_CALL_IARGS); 636 637 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 638 ca->cif.rtype = typecode_to_ffi(typemask & 7); 639 ca->cif.nargs = nargs; 640 641 if (nargs != 0) { 642 ca->cif.arg_types = ca->args; 643 for (int j = 0; j < nargs; ++j) { 644 int typecode = extract32(typemask, (j + 1) * 3, 3); 645 ca->args[j] = typecode_to_ffi(typecode); 646 } 647 } 648 649 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 650 ca->cif.rtype, ca->cif.arg_types); 651 assert(status == FFI_OK); 652 653 cif = &ca->cif; 654 info->cif = cif; 655 g_hash_table_insert(ffi_table, hash, (gpointer)cif); 656 } 657 658 g_hash_table_destroy(ffi_table); 659 } 660 #endif /* CONFIG_TCG_INTERPRETER */ 661 662 typedef struct TCGCumulativeArgs { 663 int arg_idx; /* tcg_gen_callN args[] */ 664 int info_in_idx; /* TCGHelperInfo in[] */ 665 int arg_slot; /* regs+stack slot */ 666 int ref_slot; /* stack slots for references */ 667 } TCGCumulativeArgs; 668 669 static void layout_arg_even(TCGCumulativeArgs *cum) 670 { 671 cum->arg_slot += cum->arg_slot & 1; 672 } 673 674 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 675 TCGCallArgumentKind kind) 676 { 677 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 678 679 *loc = (TCGCallArgumentLoc){ 680 .kind = kind, 681 .arg_idx = cum->arg_idx, 682 .arg_slot = cum->arg_slot, 683 }; 684 cum->info_in_idx++; 685 cum->arg_slot++; 686 } 687 688 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 689 TCGHelperInfo *info, int n) 690 { 691 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 692 693 for (int i = 0; i < n; ++i) { 694 /* Layout all using the same arg_idx, adjusting the subindex. */ 695 loc[i] = (TCGCallArgumentLoc){ 696 .kind = TCG_CALL_ARG_NORMAL, 697 .arg_idx = cum->arg_idx, 698 .tmp_subindex = i, 699 .arg_slot = cum->arg_slot + i, 700 }; 701 } 702 cum->info_in_idx += n; 703 cum->arg_slot += n; 704 } 705 706 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 707 { 708 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 709 int n = 128 / TCG_TARGET_REG_BITS; 710 711 /* The first subindex carries the pointer. */ 712 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 713 714 /* 715 * The callee is allowed to clobber memory associated with 716 * structure pass by-reference. Therefore we must make copies. 717 * Allocate space from "ref_slot", which will be adjusted to 718 * follow the parameters on the stack. 719 */ 720 loc[0].ref_slot = cum->ref_slot; 721 722 /* 723 * Subsequent words also go into the reference slot, but 724 * do not accumulate into the regular arguments. 725 */ 726 for (int i = 1; i < n; ++i) { 727 loc[i] = (TCGCallArgumentLoc){ 728 .kind = TCG_CALL_ARG_BY_REF_N, 729 .arg_idx = cum->arg_idx, 730 .tmp_subindex = i, 731 .ref_slot = cum->ref_slot + i, 732 }; 733 } 734 cum->info_in_idx += n; 735 cum->ref_slot += n; 736 } 737 738 static void init_call_layout(TCGHelperInfo *info) 739 { 740 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 741 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 742 unsigned typemask = info->typemask; 743 unsigned typecode; 744 TCGCumulativeArgs cum = { }; 745 746 /* 747 * Parse and place any function return value. 748 */ 749 typecode = typemask & 7; 750 switch (typecode) { 751 case dh_typecode_void: 752 info->nr_out = 0; 753 break; 754 case dh_typecode_i32: 755 case dh_typecode_s32: 756 case dh_typecode_ptr: 757 info->nr_out = 1; 758 info->out_kind = TCG_CALL_RET_NORMAL; 759 break; 760 case dh_typecode_i64: 761 case dh_typecode_s64: 762 info->nr_out = 64 / TCG_TARGET_REG_BITS; 763 info->out_kind = TCG_CALL_RET_NORMAL; 764 /* Query the last register now to trigger any assert early. */ 765 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 766 break; 767 case dh_typecode_i128: 768 info->nr_out = 128 / TCG_TARGET_REG_BITS; 769 info->out_kind = TCG_TARGET_CALL_RET_I128; 770 switch (TCG_TARGET_CALL_RET_I128) { 771 case TCG_CALL_RET_NORMAL: 772 /* Query the last register now to trigger any assert early. */ 773 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 774 break; 775 case TCG_CALL_RET_BY_VEC: 776 /* Query the single register now to trigger any assert early. */ 777 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 778 break; 779 case TCG_CALL_RET_BY_REF: 780 /* 781 * Allocate the first argument to the output. 782 * We don't need to store this anywhere, just make it 783 * unavailable for use in the input loop below. 784 */ 785 cum.arg_slot = 1; 786 break; 787 default: 788 qemu_build_not_reached(); 789 } 790 break; 791 default: 792 g_assert_not_reached(); 793 } 794 795 /* 796 * Parse and place function arguments. 797 */ 798 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 799 TCGCallArgumentKind kind; 800 TCGType type; 801 802 typecode = typemask & 7; 803 switch (typecode) { 804 case dh_typecode_i32: 805 case dh_typecode_s32: 806 type = TCG_TYPE_I32; 807 break; 808 case dh_typecode_i64: 809 case dh_typecode_s64: 810 type = TCG_TYPE_I64; 811 break; 812 case dh_typecode_ptr: 813 type = TCG_TYPE_PTR; 814 break; 815 case dh_typecode_i128: 816 type = TCG_TYPE_I128; 817 break; 818 default: 819 g_assert_not_reached(); 820 } 821 822 switch (type) { 823 case TCG_TYPE_I32: 824 switch (TCG_TARGET_CALL_ARG_I32) { 825 case TCG_CALL_ARG_EVEN: 826 layout_arg_even(&cum); 827 /* fall through */ 828 case TCG_CALL_ARG_NORMAL: 829 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 830 break; 831 case TCG_CALL_ARG_EXTEND: 832 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 833 layout_arg_1(&cum, info, kind); 834 break; 835 default: 836 qemu_build_not_reached(); 837 } 838 break; 839 840 case TCG_TYPE_I64: 841 switch (TCG_TARGET_CALL_ARG_I64) { 842 case TCG_CALL_ARG_EVEN: 843 layout_arg_even(&cum); 844 /* fall through */ 845 case TCG_CALL_ARG_NORMAL: 846 if (TCG_TARGET_REG_BITS == 32) { 847 layout_arg_normal_n(&cum, info, 2); 848 } else { 849 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 850 } 851 break; 852 default: 853 qemu_build_not_reached(); 854 } 855 break; 856 857 case TCG_TYPE_I128: 858 switch (TCG_TARGET_CALL_ARG_I128) { 859 case TCG_CALL_ARG_EVEN: 860 layout_arg_even(&cum); 861 /* fall through */ 862 case TCG_CALL_ARG_NORMAL: 863 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 864 break; 865 case TCG_CALL_ARG_BY_REF: 866 layout_arg_by_ref(&cum, info); 867 break; 868 default: 869 qemu_build_not_reached(); 870 } 871 break; 872 873 default: 874 g_assert_not_reached(); 875 } 876 } 877 info->nr_in = cum.info_in_idx; 878 879 /* Validate that we didn't overrun the input array. */ 880 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 881 /* Validate the backend has enough argument space. */ 882 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 883 884 /* 885 * Relocate the "ref_slot" area to the end of the parameters. 886 * Minimizing this stack offset helps code size for x86, 887 * which has a signed 8-bit offset encoding. 888 */ 889 if (cum.ref_slot != 0) { 890 int ref_base = 0; 891 892 if (cum.arg_slot > max_reg_slots) { 893 int align = __alignof(Int128) / sizeof(tcg_target_long); 894 895 ref_base = cum.arg_slot - max_reg_slots; 896 if (align > 1) { 897 ref_base = ROUND_UP(ref_base, align); 898 } 899 } 900 assert(ref_base + cum.ref_slot <= max_stk_slots); 901 902 if (ref_base != 0) { 903 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 904 TCGCallArgumentLoc *loc = &info->in[i]; 905 switch (loc->kind) { 906 case TCG_CALL_ARG_BY_REF: 907 case TCG_CALL_ARG_BY_REF_N: 908 loc->ref_slot += ref_base; 909 break; 910 default: 911 break; 912 } 913 } 914 } 915 } 916 } 917 918 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 919 static void process_op_defs(TCGContext *s); 920 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 921 TCGReg reg, const char *name); 922 923 static void tcg_context_init(unsigned max_cpus) 924 { 925 TCGContext *s = &tcg_init_ctx; 926 int op, total_args, n, i; 927 TCGOpDef *def; 928 TCGArgConstraint *args_ct; 929 TCGTemp *ts; 930 931 memset(s, 0, sizeof(*s)); 932 s->nb_globals = 0; 933 934 /* Count total number of arguments and allocate the corresponding 935 space */ 936 total_args = 0; 937 for(op = 0; op < NB_OPS; op++) { 938 def = &tcg_op_defs[op]; 939 n = def->nb_iargs + def->nb_oargs; 940 total_args += n; 941 } 942 943 args_ct = g_new0(TCGArgConstraint, total_args); 944 945 for(op = 0; op < NB_OPS; op++) { 946 def = &tcg_op_defs[op]; 947 def->args_ct = args_ct; 948 n = def->nb_iargs + def->nb_oargs; 949 args_ct += n; 950 } 951 952 /* Register helpers. */ 953 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 954 helper_table = g_hash_table_new(NULL, NULL); 955 956 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 957 init_call_layout(&all_helpers[i]); 958 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 959 (gpointer)&all_helpers[i]); 960 } 961 962 #ifdef CONFIG_TCG_INTERPRETER 963 init_ffi_layouts(); 964 #endif 965 966 tcg_target_init(s); 967 process_op_defs(s); 968 969 /* Reverse the order of the saved registers, assuming they're all at 970 the start of tcg_target_reg_alloc_order. */ 971 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 972 int r = tcg_target_reg_alloc_order[n]; 973 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 974 break; 975 } 976 } 977 for (i = 0; i < n; ++i) { 978 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 979 } 980 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 981 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 982 } 983 984 alloc_tcg_plugin_context(s); 985 986 tcg_ctx = s; 987 /* 988 * In user-mode we simply share the init context among threads, since we 989 * use a single region. See the documentation tcg_region_init() for the 990 * reasoning behind this. 991 * In softmmu we will have at most max_cpus TCG threads. 992 */ 993 #ifdef CONFIG_USER_ONLY 994 tcg_ctxs = &tcg_ctx; 995 tcg_cur_ctxs = 1; 996 tcg_max_ctxs = 1; 997 #else 998 tcg_max_ctxs = max_cpus; 999 tcg_ctxs = g_new0(TCGContext *, max_cpus); 1000 #endif 1001 1002 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1003 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1004 cpu_env = temp_tcgv_ptr(ts); 1005 } 1006 1007 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 1008 { 1009 tcg_context_init(max_cpus); 1010 tcg_region_init(tb_size, splitwx, max_cpus); 1011 } 1012 1013 /* 1014 * Allocate TBs right before their corresponding translated code, making 1015 * sure that TBs and code are on different cache lines. 1016 */ 1017 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1018 { 1019 uintptr_t align = qemu_icache_linesize; 1020 TranslationBlock *tb; 1021 void *next; 1022 1023 retry: 1024 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1025 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1026 1027 if (unlikely(next > s->code_gen_highwater)) { 1028 if (tcg_region_alloc(s)) { 1029 return NULL; 1030 } 1031 goto retry; 1032 } 1033 qatomic_set(&s->code_gen_ptr, next); 1034 s->data_gen_ptr = NULL; 1035 return tb; 1036 } 1037 1038 void tcg_prologue_init(TCGContext *s) 1039 { 1040 size_t prologue_size; 1041 1042 s->code_ptr = s->code_gen_ptr; 1043 s->code_buf = s->code_gen_ptr; 1044 s->data_gen_ptr = NULL; 1045 1046 #ifndef CONFIG_TCG_INTERPRETER 1047 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1048 #endif 1049 1050 #ifdef TCG_TARGET_NEED_POOL_LABELS 1051 s->pool_labels = NULL; 1052 #endif 1053 1054 qemu_thread_jit_write(); 1055 /* Generate the prologue. */ 1056 tcg_target_qemu_prologue(s); 1057 1058 #ifdef TCG_TARGET_NEED_POOL_LABELS 1059 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1060 { 1061 int result = tcg_out_pool_finalize(s); 1062 tcg_debug_assert(result == 0); 1063 } 1064 #endif 1065 1066 prologue_size = tcg_current_code_size(s); 1067 perf_report_prologue(s->code_gen_ptr, prologue_size); 1068 1069 #ifndef CONFIG_TCG_INTERPRETER 1070 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1071 (uintptr_t)s->code_buf, prologue_size); 1072 #endif 1073 1074 #ifdef DEBUG_DISAS 1075 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1076 FILE *logfile = qemu_log_trylock(); 1077 if (logfile) { 1078 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1079 if (s->data_gen_ptr) { 1080 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1081 size_t data_size = prologue_size - code_size; 1082 size_t i; 1083 1084 disas(logfile, s->code_gen_ptr, code_size); 1085 1086 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1087 if (sizeof(tcg_target_ulong) == 8) { 1088 fprintf(logfile, 1089 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1090 (uintptr_t)s->data_gen_ptr + i, 1091 *(uint64_t *)(s->data_gen_ptr + i)); 1092 } else { 1093 fprintf(logfile, 1094 "0x%08" PRIxPTR ": .long 0x%08x\n", 1095 (uintptr_t)s->data_gen_ptr + i, 1096 *(uint32_t *)(s->data_gen_ptr + i)); 1097 } 1098 } 1099 } else { 1100 disas(logfile, s->code_gen_ptr, prologue_size); 1101 } 1102 fprintf(logfile, "\n"); 1103 qemu_log_unlock(logfile); 1104 } 1105 } 1106 #endif 1107 1108 #ifndef CONFIG_TCG_INTERPRETER 1109 /* 1110 * Assert that goto_ptr is implemented completely, setting an epilogue. 1111 * For tci, we use NULL as the signal to return from the interpreter, 1112 * so skip this check. 1113 */ 1114 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1115 #endif 1116 1117 tcg_region_prologue_set(s); 1118 } 1119 1120 void tcg_func_start(TCGContext *s) 1121 { 1122 tcg_pool_reset(s); 1123 s->nb_temps = s->nb_globals; 1124 1125 /* No temps have been previously allocated for size or locality. */ 1126 memset(s->free_temps, 0, sizeof(s->free_temps)); 1127 1128 /* No constant temps have been previously allocated. */ 1129 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1130 if (s->const_table[i]) { 1131 g_hash_table_remove_all(s->const_table[i]); 1132 } 1133 } 1134 1135 s->nb_ops = 0; 1136 s->nb_labels = 0; 1137 s->current_frame_offset = s->frame_start; 1138 1139 #ifdef CONFIG_DEBUG_TCG 1140 s->goto_tb_issue_mask = 0; 1141 #endif 1142 1143 QTAILQ_INIT(&s->ops); 1144 QTAILQ_INIT(&s->free_ops); 1145 QSIMPLEQ_INIT(&s->labels); 1146 } 1147 1148 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1149 { 1150 int n = s->nb_temps++; 1151 1152 if (n >= TCG_MAX_TEMPS) { 1153 tcg_raise_tb_overflow(s); 1154 } 1155 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1156 } 1157 1158 static TCGTemp *tcg_global_alloc(TCGContext *s) 1159 { 1160 TCGTemp *ts; 1161 1162 tcg_debug_assert(s->nb_globals == s->nb_temps); 1163 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1164 s->nb_globals++; 1165 ts = tcg_temp_alloc(s); 1166 ts->kind = TEMP_GLOBAL; 1167 1168 return ts; 1169 } 1170 1171 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1172 TCGReg reg, const char *name) 1173 { 1174 TCGTemp *ts; 1175 1176 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1177 tcg_abort(); 1178 } 1179 1180 ts = tcg_global_alloc(s); 1181 ts->base_type = type; 1182 ts->type = type; 1183 ts->kind = TEMP_FIXED; 1184 ts->reg = reg; 1185 ts->name = name; 1186 tcg_regset_set_reg(s->reserved_regs, reg); 1187 1188 return ts; 1189 } 1190 1191 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1192 { 1193 s->frame_start = start; 1194 s->frame_end = start + size; 1195 s->frame_temp 1196 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1197 } 1198 1199 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1200 intptr_t offset, const char *name) 1201 { 1202 TCGContext *s = tcg_ctx; 1203 TCGTemp *base_ts = tcgv_ptr_temp(base); 1204 TCGTemp *ts = tcg_global_alloc(s); 1205 int indirect_reg = 0; 1206 1207 switch (base_ts->kind) { 1208 case TEMP_FIXED: 1209 break; 1210 case TEMP_GLOBAL: 1211 /* We do not support double-indirect registers. */ 1212 tcg_debug_assert(!base_ts->indirect_reg); 1213 base_ts->indirect_base = 1; 1214 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1215 ? 2 : 1); 1216 indirect_reg = 1; 1217 break; 1218 default: 1219 g_assert_not_reached(); 1220 } 1221 1222 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1223 TCGTemp *ts2 = tcg_global_alloc(s); 1224 char buf[64]; 1225 1226 ts->base_type = TCG_TYPE_I64; 1227 ts->type = TCG_TYPE_I32; 1228 ts->indirect_reg = indirect_reg; 1229 ts->mem_allocated = 1; 1230 ts->mem_base = base_ts; 1231 ts->mem_offset = offset; 1232 pstrcpy(buf, sizeof(buf), name); 1233 pstrcat(buf, sizeof(buf), "_0"); 1234 ts->name = strdup(buf); 1235 1236 tcg_debug_assert(ts2 == ts + 1); 1237 ts2->base_type = TCG_TYPE_I64; 1238 ts2->type = TCG_TYPE_I32; 1239 ts2->indirect_reg = indirect_reg; 1240 ts2->mem_allocated = 1; 1241 ts2->mem_base = base_ts; 1242 ts2->mem_offset = offset + 4; 1243 ts2->temp_subindex = 1; 1244 pstrcpy(buf, sizeof(buf), name); 1245 pstrcat(buf, sizeof(buf), "_1"); 1246 ts2->name = strdup(buf); 1247 } else { 1248 ts->base_type = type; 1249 ts->type = type; 1250 ts->indirect_reg = indirect_reg; 1251 ts->mem_allocated = 1; 1252 ts->mem_base = base_ts; 1253 ts->mem_offset = offset; 1254 ts->name = name; 1255 } 1256 return ts; 1257 } 1258 1259 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1260 { 1261 TCGContext *s = tcg_ctx; 1262 TCGTemp *ts; 1263 int n; 1264 1265 if (kind == TEMP_EBB) { 1266 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1267 1268 if (idx < TCG_MAX_TEMPS) { 1269 /* There is already an available temp with the right type. */ 1270 clear_bit(idx, s->free_temps[type].l); 1271 1272 ts = &s->temps[idx]; 1273 ts->temp_allocated = 1; 1274 tcg_debug_assert(ts->base_type == type); 1275 tcg_debug_assert(ts->kind == kind); 1276 goto done; 1277 } 1278 } else { 1279 tcg_debug_assert(kind == TEMP_TB); 1280 } 1281 1282 switch (type) { 1283 case TCG_TYPE_I32: 1284 case TCG_TYPE_V64: 1285 case TCG_TYPE_V128: 1286 case TCG_TYPE_V256: 1287 n = 1; 1288 break; 1289 case TCG_TYPE_I64: 1290 n = 64 / TCG_TARGET_REG_BITS; 1291 break; 1292 case TCG_TYPE_I128: 1293 n = 128 / TCG_TARGET_REG_BITS; 1294 break; 1295 default: 1296 g_assert_not_reached(); 1297 } 1298 1299 ts = tcg_temp_alloc(s); 1300 ts->base_type = type; 1301 ts->temp_allocated = 1; 1302 ts->kind = kind; 1303 1304 if (n == 1) { 1305 ts->type = type; 1306 } else { 1307 ts->type = TCG_TYPE_REG; 1308 1309 for (int i = 1; i < n; ++i) { 1310 TCGTemp *ts2 = tcg_temp_alloc(s); 1311 1312 tcg_debug_assert(ts2 == ts + i); 1313 ts2->base_type = type; 1314 ts2->type = TCG_TYPE_REG; 1315 ts2->temp_allocated = 1; 1316 ts2->temp_subindex = i; 1317 ts2->kind = kind; 1318 } 1319 } 1320 1321 done: 1322 #if defined(CONFIG_DEBUG_TCG) 1323 s->temps_in_use++; 1324 #endif 1325 return ts; 1326 } 1327 1328 TCGv_vec tcg_temp_new_vec(TCGType type) 1329 { 1330 TCGTemp *t; 1331 1332 #ifdef CONFIG_DEBUG_TCG 1333 switch (type) { 1334 case TCG_TYPE_V64: 1335 assert(TCG_TARGET_HAS_v64); 1336 break; 1337 case TCG_TYPE_V128: 1338 assert(TCG_TARGET_HAS_v128); 1339 break; 1340 case TCG_TYPE_V256: 1341 assert(TCG_TARGET_HAS_v256); 1342 break; 1343 default: 1344 g_assert_not_reached(); 1345 } 1346 #endif 1347 1348 t = tcg_temp_new_internal(type, TEMP_EBB); 1349 return temp_tcgv_vec(t); 1350 } 1351 1352 /* Create a new temp of the same type as an existing temp. */ 1353 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1354 { 1355 TCGTemp *t = tcgv_vec_temp(match); 1356 1357 tcg_debug_assert(t->temp_allocated != 0); 1358 1359 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 1360 return temp_tcgv_vec(t); 1361 } 1362 1363 void tcg_temp_free_internal(TCGTemp *ts) 1364 { 1365 TCGContext *s = tcg_ctx; 1366 1367 switch (ts->kind) { 1368 case TEMP_CONST: 1369 /* 1370 * In order to simplify users of tcg_constant_*, 1371 * silently ignore free. 1372 */ 1373 return; 1374 case TEMP_EBB: 1375 case TEMP_TB: 1376 break; 1377 default: 1378 g_assert_not_reached(); 1379 } 1380 1381 tcg_debug_assert(ts->temp_allocated != 0); 1382 ts->temp_allocated = 0; 1383 1384 #if defined(CONFIG_DEBUG_TCG) 1385 assert(s->temps_in_use > 0); 1386 s->temps_in_use--; 1387 #endif 1388 1389 if (ts->kind == TEMP_EBB) { 1390 int idx = temp_idx(ts); 1391 set_bit(idx, s->free_temps[ts->base_type].l); 1392 } 1393 } 1394 1395 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1396 { 1397 TCGContext *s = tcg_ctx; 1398 GHashTable *h = s->const_table[type]; 1399 TCGTemp *ts; 1400 1401 if (h == NULL) { 1402 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1403 s->const_table[type] = h; 1404 } 1405 1406 ts = g_hash_table_lookup(h, &val); 1407 if (ts == NULL) { 1408 int64_t *val_ptr; 1409 1410 ts = tcg_temp_alloc(s); 1411 1412 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1413 TCGTemp *ts2 = tcg_temp_alloc(s); 1414 1415 tcg_debug_assert(ts2 == ts + 1); 1416 1417 ts->base_type = TCG_TYPE_I64; 1418 ts->type = TCG_TYPE_I32; 1419 ts->kind = TEMP_CONST; 1420 ts->temp_allocated = 1; 1421 1422 ts2->base_type = TCG_TYPE_I64; 1423 ts2->type = TCG_TYPE_I32; 1424 ts2->kind = TEMP_CONST; 1425 ts2->temp_allocated = 1; 1426 ts2->temp_subindex = 1; 1427 1428 /* 1429 * Retain the full value of the 64-bit constant in the low 1430 * part, so that the hash table works. Actual uses will 1431 * truncate the value to the low part. 1432 */ 1433 ts[HOST_BIG_ENDIAN].val = val; 1434 ts[!HOST_BIG_ENDIAN].val = val >> 32; 1435 val_ptr = &ts[HOST_BIG_ENDIAN].val; 1436 } else { 1437 ts->base_type = type; 1438 ts->type = type; 1439 ts->kind = TEMP_CONST; 1440 ts->temp_allocated = 1; 1441 ts->val = val; 1442 val_ptr = &ts->val; 1443 } 1444 g_hash_table_insert(h, val_ptr, ts); 1445 } 1446 1447 return ts; 1448 } 1449 1450 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1451 { 1452 val = dup_const(vece, val); 1453 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1454 } 1455 1456 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1457 { 1458 TCGTemp *t = tcgv_vec_temp(match); 1459 1460 tcg_debug_assert(t->temp_allocated != 0); 1461 return tcg_constant_vec(t->base_type, vece, val); 1462 } 1463 1464 TCGv_i32 tcg_const_i32(int32_t val) 1465 { 1466 TCGv_i32 t0; 1467 t0 = tcg_temp_new_i32(); 1468 tcg_gen_movi_i32(t0, val); 1469 return t0; 1470 } 1471 1472 TCGv_i64 tcg_const_i64(int64_t val) 1473 { 1474 TCGv_i64 t0; 1475 t0 = tcg_temp_new_i64(); 1476 tcg_gen_movi_i64(t0, val); 1477 return t0; 1478 } 1479 1480 #if defined(CONFIG_DEBUG_TCG) 1481 void tcg_clear_temp_count(void) 1482 { 1483 TCGContext *s = tcg_ctx; 1484 s->temps_in_use = 0; 1485 } 1486 1487 int tcg_check_temp_count(void) 1488 { 1489 TCGContext *s = tcg_ctx; 1490 if (s->temps_in_use) { 1491 /* Clear the count so that we don't give another 1492 * warning immediately next time around. 1493 */ 1494 s->temps_in_use = 0; 1495 return 1; 1496 } 1497 return 0; 1498 } 1499 #endif 1500 1501 /* Return true if OP may appear in the opcode stream. 1502 Test the runtime variable that controls each opcode. */ 1503 bool tcg_op_supported(TCGOpcode op) 1504 { 1505 const bool have_vec 1506 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1507 1508 switch (op) { 1509 case INDEX_op_discard: 1510 case INDEX_op_set_label: 1511 case INDEX_op_call: 1512 case INDEX_op_br: 1513 case INDEX_op_mb: 1514 case INDEX_op_insn_start: 1515 case INDEX_op_exit_tb: 1516 case INDEX_op_goto_tb: 1517 case INDEX_op_goto_ptr: 1518 case INDEX_op_qemu_ld_i32: 1519 case INDEX_op_qemu_st_i32: 1520 case INDEX_op_qemu_ld_i64: 1521 case INDEX_op_qemu_st_i64: 1522 return true; 1523 1524 case INDEX_op_qemu_st8_i32: 1525 return TCG_TARGET_HAS_qemu_st8_i32; 1526 1527 case INDEX_op_mov_i32: 1528 case INDEX_op_setcond_i32: 1529 case INDEX_op_brcond_i32: 1530 case INDEX_op_ld8u_i32: 1531 case INDEX_op_ld8s_i32: 1532 case INDEX_op_ld16u_i32: 1533 case INDEX_op_ld16s_i32: 1534 case INDEX_op_ld_i32: 1535 case INDEX_op_st8_i32: 1536 case INDEX_op_st16_i32: 1537 case INDEX_op_st_i32: 1538 case INDEX_op_add_i32: 1539 case INDEX_op_sub_i32: 1540 case INDEX_op_mul_i32: 1541 case INDEX_op_and_i32: 1542 case INDEX_op_or_i32: 1543 case INDEX_op_xor_i32: 1544 case INDEX_op_shl_i32: 1545 case INDEX_op_shr_i32: 1546 case INDEX_op_sar_i32: 1547 return true; 1548 1549 case INDEX_op_movcond_i32: 1550 return TCG_TARGET_HAS_movcond_i32; 1551 case INDEX_op_div_i32: 1552 case INDEX_op_divu_i32: 1553 return TCG_TARGET_HAS_div_i32; 1554 case INDEX_op_rem_i32: 1555 case INDEX_op_remu_i32: 1556 return TCG_TARGET_HAS_rem_i32; 1557 case INDEX_op_div2_i32: 1558 case INDEX_op_divu2_i32: 1559 return TCG_TARGET_HAS_div2_i32; 1560 case INDEX_op_rotl_i32: 1561 case INDEX_op_rotr_i32: 1562 return TCG_TARGET_HAS_rot_i32; 1563 case INDEX_op_deposit_i32: 1564 return TCG_TARGET_HAS_deposit_i32; 1565 case INDEX_op_extract_i32: 1566 return TCG_TARGET_HAS_extract_i32; 1567 case INDEX_op_sextract_i32: 1568 return TCG_TARGET_HAS_sextract_i32; 1569 case INDEX_op_extract2_i32: 1570 return TCG_TARGET_HAS_extract2_i32; 1571 case INDEX_op_add2_i32: 1572 return TCG_TARGET_HAS_add2_i32; 1573 case INDEX_op_sub2_i32: 1574 return TCG_TARGET_HAS_sub2_i32; 1575 case INDEX_op_mulu2_i32: 1576 return TCG_TARGET_HAS_mulu2_i32; 1577 case INDEX_op_muls2_i32: 1578 return TCG_TARGET_HAS_muls2_i32; 1579 case INDEX_op_muluh_i32: 1580 return TCG_TARGET_HAS_muluh_i32; 1581 case INDEX_op_mulsh_i32: 1582 return TCG_TARGET_HAS_mulsh_i32; 1583 case INDEX_op_ext8s_i32: 1584 return TCG_TARGET_HAS_ext8s_i32; 1585 case INDEX_op_ext16s_i32: 1586 return TCG_TARGET_HAS_ext16s_i32; 1587 case INDEX_op_ext8u_i32: 1588 return TCG_TARGET_HAS_ext8u_i32; 1589 case INDEX_op_ext16u_i32: 1590 return TCG_TARGET_HAS_ext16u_i32; 1591 case INDEX_op_bswap16_i32: 1592 return TCG_TARGET_HAS_bswap16_i32; 1593 case INDEX_op_bswap32_i32: 1594 return TCG_TARGET_HAS_bswap32_i32; 1595 case INDEX_op_not_i32: 1596 return TCG_TARGET_HAS_not_i32; 1597 case INDEX_op_neg_i32: 1598 return TCG_TARGET_HAS_neg_i32; 1599 case INDEX_op_andc_i32: 1600 return TCG_TARGET_HAS_andc_i32; 1601 case INDEX_op_orc_i32: 1602 return TCG_TARGET_HAS_orc_i32; 1603 case INDEX_op_eqv_i32: 1604 return TCG_TARGET_HAS_eqv_i32; 1605 case INDEX_op_nand_i32: 1606 return TCG_TARGET_HAS_nand_i32; 1607 case INDEX_op_nor_i32: 1608 return TCG_TARGET_HAS_nor_i32; 1609 case INDEX_op_clz_i32: 1610 return TCG_TARGET_HAS_clz_i32; 1611 case INDEX_op_ctz_i32: 1612 return TCG_TARGET_HAS_ctz_i32; 1613 case INDEX_op_ctpop_i32: 1614 return TCG_TARGET_HAS_ctpop_i32; 1615 1616 case INDEX_op_brcond2_i32: 1617 case INDEX_op_setcond2_i32: 1618 return TCG_TARGET_REG_BITS == 32; 1619 1620 case INDEX_op_mov_i64: 1621 case INDEX_op_setcond_i64: 1622 case INDEX_op_brcond_i64: 1623 case INDEX_op_ld8u_i64: 1624 case INDEX_op_ld8s_i64: 1625 case INDEX_op_ld16u_i64: 1626 case INDEX_op_ld16s_i64: 1627 case INDEX_op_ld32u_i64: 1628 case INDEX_op_ld32s_i64: 1629 case INDEX_op_ld_i64: 1630 case INDEX_op_st8_i64: 1631 case INDEX_op_st16_i64: 1632 case INDEX_op_st32_i64: 1633 case INDEX_op_st_i64: 1634 case INDEX_op_add_i64: 1635 case INDEX_op_sub_i64: 1636 case INDEX_op_mul_i64: 1637 case INDEX_op_and_i64: 1638 case INDEX_op_or_i64: 1639 case INDEX_op_xor_i64: 1640 case INDEX_op_shl_i64: 1641 case INDEX_op_shr_i64: 1642 case INDEX_op_sar_i64: 1643 case INDEX_op_ext_i32_i64: 1644 case INDEX_op_extu_i32_i64: 1645 return TCG_TARGET_REG_BITS == 64; 1646 1647 case INDEX_op_movcond_i64: 1648 return TCG_TARGET_HAS_movcond_i64; 1649 case INDEX_op_div_i64: 1650 case INDEX_op_divu_i64: 1651 return TCG_TARGET_HAS_div_i64; 1652 case INDEX_op_rem_i64: 1653 case INDEX_op_remu_i64: 1654 return TCG_TARGET_HAS_rem_i64; 1655 case INDEX_op_div2_i64: 1656 case INDEX_op_divu2_i64: 1657 return TCG_TARGET_HAS_div2_i64; 1658 case INDEX_op_rotl_i64: 1659 case INDEX_op_rotr_i64: 1660 return TCG_TARGET_HAS_rot_i64; 1661 case INDEX_op_deposit_i64: 1662 return TCG_TARGET_HAS_deposit_i64; 1663 case INDEX_op_extract_i64: 1664 return TCG_TARGET_HAS_extract_i64; 1665 case INDEX_op_sextract_i64: 1666 return TCG_TARGET_HAS_sextract_i64; 1667 case INDEX_op_extract2_i64: 1668 return TCG_TARGET_HAS_extract2_i64; 1669 case INDEX_op_extrl_i64_i32: 1670 return TCG_TARGET_HAS_extrl_i64_i32; 1671 case INDEX_op_extrh_i64_i32: 1672 return TCG_TARGET_HAS_extrh_i64_i32; 1673 case INDEX_op_ext8s_i64: 1674 return TCG_TARGET_HAS_ext8s_i64; 1675 case INDEX_op_ext16s_i64: 1676 return TCG_TARGET_HAS_ext16s_i64; 1677 case INDEX_op_ext32s_i64: 1678 return TCG_TARGET_HAS_ext32s_i64; 1679 case INDEX_op_ext8u_i64: 1680 return TCG_TARGET_HAS_ext8u_i64; 1681 case INDEX_op_ext16u_i64: 1682 return TCG_TARGET_HAS_ext16u_i64; 1683 case INDEX_op_ext32u_i64: 1684 return TCG_TARGET_HAS_ext32u_i64; 1685 case INDEX_op_bswap16_i64: 1686 return TCG_TARGET_HAS_bswap16_i64; 1687 case INDEX_op_bswap32_i64: 1688 return TCG_TARGET_HAS_bswap32_i64; 1689 case INDEX_op_bswap64_i64: 1690 return TCG_TARGET_HAS_bswap64_i64; 1691 case INDEX_op_not_i64: 1692 return TCG_TARGET_HAS_not_i64; 1693 case INDEX_op_neg_i64: 1694 return TCG_TARGET_HAS_neg_i64; 1695 case INDEX_op_andc_i64: 1696 return TCG_TARGET_HAS_andc_i64; 1697 case INDEX_op_orc_i64: 1698 return TCG_TARGET_HAS_orc_i64; 1699 case INDEX_op_eqv_i64: 1700 return TCG_TARGET_HAS_eqv_i64; 1701 case INDEX_op_nand_i64: 1702 return TCG_TARGET_HAS_nand_i64; 1703 case INDEX_op_nor_i64: 1704 return TCG_TARGET_HAS_nor_i64; 1705 case INDEX_op_clz_i64: 1706 return TCG_TARGET_HAS_clz_i64; 1707 case INDEX_op_ctz_i64: 1708 return TCG_TARGET_HAS_ctz_i64; 1709 case INDEX_op_ctpop_i64: 1710 return TCG_TARGET_HAS_ctpop_i64; 1711 case INDEX_op_add2_i64: 1712 return TCG_TARGET_HAS_add2_i64; 1713 case INDEX_op_sub2_i64: 1714 return TCG_TARGET_HAS_sub2_i64; 1715 case INDEX_op_mulu2_i64: 1716 return TCG_TARGET_HAS_mulu2_i64; 1717 case INDEX_op_muls2_i64: 1718 return TCG_TARGET_HAS_muls2_i64; 1719 case INDEX_op_muluh_i64: 1720 return TCG_TARGET_HAS_muluh_i64; 1721 case INDEX_op_mulsh_i64: 1722 return TCG_TARGET_HAS_mulsh_i64; 1723 1724 case INDEX_op_mov_vec: 1725 case INDEX_op_dup_vec: 1726 case INDEX_op_dupm_vec: 1727 case INDEX_op_ld_vec: 1728 case INDEX_op_st_vec: 1729 case INDEX_op_add_vec: 1730 case INDEX_op_sub_vec: 1731 case INDEX_op_and_vec: 1732 case INDEX_op_or_vec: 1733 case INDEX_op_xor_vec: 1734 case INDEX_op_cmp_vec: 1735 return have_vec; 1736 case INDEX_op_dup2_vec: 1737 return have_vec && TCG_TARGET_REG_BITS == 32; 1738 case INDEX_op_not_vec: 1739 return have_vec && TCG_TARGET_HAS_not_vec; 1740 case INDEX_op_neg_vec: 1741 return have_vec && TCG_TARGET_HAS_neg_vec; 1742 case INDEX_op_abs_vec: 1743 return have_vec && TCG_TARGET_HAS_abs_vec; 1744 case INDEX_op_andc_vec: 1745 return have_vec && TCG_TARGET_HAS_andc_vec; 1746 case INDEX_op_orc_vec: 1747 return have_vec && TCG_TARGET_HAS_orc_vec; 1748 case INDEX_op_nand_vec: 1749 return have_vec && TCG_TARGET_HAS_nand_vec; 1750 case INDEX_op_nor_vec: 1751 return have_vec && TCG_TARGET_HAS_nor_vec; 1752 case INDEX_op_eqv_vec: 1753 return have_vec && TCG_TARGET_HAS_eqv_vec; 1754 case INDEX_op_mul_vec: 1755 return have_vec && TCG_TARGET_HAS_mul_vec; 1756 case INDEX_op_shli_vec: 1757 case INDEX_op_shri_vec: 1758 case INDEX_op_sari_vec: 1759 return have_vec && TCG_TARGET_HAS_shi_vec; 1760 case INDEX_op_shls_vec: 1761 case INDEX_op_shrs_vec: 1762 case INDEX_op_sars_vec: 1763 return have_vec && TCG_TARGET_HAS_shs_vec; 1764 case INDEX_op_shlv_vec: 1765 case INDEX_op_shrv_vec: 1766 case INDEX_op_sarv_vec: 1767 return have_vec && TCG_TARGET_HAS_shv_vec; 1768 case INDEX_op_rotli_vec: 1769 return have_vec && TCG_TARGET_HAS_roti_vec; 1770 case INDEX_op_rotls_vec: 1771 return have_vec && TCG_TARGET_HAS_rots_vec; 1772 case INDEX_op_rotlv_vec: 1773 case INDEX_op_rotrv_vec: 1774 return have_vec && TCG_TARGET_HAS_rotv_vec; 1775 case INDEX_op_ssadd_vec: 1776 case INDEX_op_usadd_vec: 1777 case INDEX_op_sssub_vec: 1778 case INDEX_op_ussub_vec: 1779 return have_vec && TCG_TARGET_HAS_sat_vec; 1780 case INDEX_op_smin_vec: 1781 case INDEX_op_umin_vec: 1782 case INDEX_op_smax_vec: 1783 case INDEX_op_umax_vec: 1784 return have_vec && TCG_TARGET_HAS_minmax_vec; 1785 case INDEX_op_bitsel_vec: 1786 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1787 case INDEX_op_cmpsel_vec: 1788 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1789 1790 default: 1791 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1792 return true; 1793 } 1794 } 1795 1796 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 1797 1798 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1799 { 1800 const TCGHelperInfo *info; 1801 TCGv_i64 extend_free[MAX_CALL_IARGS]; 1802 int n_extend = 0; 1803 TCGOp *op; 1804 int i, n, pi = 0, total_args; 1805 1806 info = g_hash_table_lookup(helper_table, (gpointer)func); 1807 total_args = info->nr_out + info->nr_in + 2; 1808 op = tcg_op_alloc(INDEX_op_call, total_args); 1809 1810 #ifdef CONFIG_PLUGIN 1811 /* Flag helpers that may affect guest state */ 1812 if (tcg_ctx->plugin_insn && 1813 !(info->flags & TCG_CALL_PLUGIN) && 1814 !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1815 tcg_ctx->plugin_insn->calls_helpers = true; 1816 } 1817 #endif 1818 1819 TCGOP_CALLO(op) = n = info->nr_out; 1820 switch (n) { 1821 case 0: 1822 tcg_debug_assert(ret == NULL); 1823 break; 1824 case 1: 1825 tcg_debug_assert(ret != NULL); 1826 op->args[pi++] = temp_arg(ret); 1827 break; 1828 case 2: 1829 case 4: 1830 tcg_debug_assert(ret != NULL); 1831 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 1832 tcg_debug_assert(ret->temp_subindex == 0); 1833 for (i = 0; i < n; ++i) { 1834 op->args[pi++] = temp_arg(ret + i); 1835 } 1836 break; 1837 default: 1838 g_assert_not_reached(); 1839 } 1840 1841 TCGOP_CALLI(op) = n = info->nr_in; 1842 for (i = 0; i < n; i++) { 1843 const TCGCallArgumentLoc *loc = &info->in[i]; 1844 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 1845 1846 switch (loc->kind) { 1847 case TCG_CALL_ARG_NORMAL: 1848 case TCG_CALL_ARG_BY_REF: 1849 case TCG_CALL_ARG_BY_REF_N: 1850 op->args[pi++] = temp_arg(ts); 1851 break; 1852 1853 case TCG_CALL_ARG_EXTEND_U: 1854 case TCG_CALL_ARG_EXTEND_S: 1855 { 1856 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 1857 TCGv_i32 orig = temp_tcgv_i32(ts); 1858 1859 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 1860 tcg_gen_ext_i32_i64(temp, orig); 1861 } else { 1862 tcg_gen_extu_i32_i64(temp, orig); 1863 } 1864 op->args[pi++] = tcgv_i64_arg(temp); 1865 extend_free[n_extend++] = temp; 1866 } 1867 break; 1868 1869 default: 1870 g_assert_not_reached(); 1871 } 1872 } 1873 op->args[pi++] = (uintptr_t)func; 1874 op->args[pi++] = (uintptr_t)info; 1875 tcg_debug_assert(pi == total_args); 1876 1877 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 1878 1879 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 1880 for (i = 0; i < n_extend; ++i) { 1881 tcg_temp_free_i64(extend_free[i]); 1882 } 1883 } 1884 1885 static void tcg_reg_alloc_start(TCGContext *s) 1886 { 1887 int i, n; 1888 1889 for (i = 0, n = s->nb_temps; i < n; i++) { 1890 TCGTemp *ts = &s->temps[i]; 1891 TCGTempVal val = TEMP_VAL_MEM; 1892 1893 switch (ts->kind) { 1894 case TEMP_CONST: 1895 val = TEMP_VAL_CONST; 1896 break; 1897 case TEMP_FIXED: 1898 val = TEMP_VAL_REG; 1899 break; 1900 case TEMP_GLOBAL: 1901 break; 1902 case TEMP_EBB: 1903 val = TEMP_VAL_DEAD; 1904 /* fall through */ 1905 case TEMP_TB: 1906 ts->mem_allocated = 0; 1907 break; 1908 default: 1909 g_assert_not_reached(); 1910 } 1911 ts->val_type = val; 1912 } 1913 1914 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1915 } 1916 1917 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1918 TCGTemp *ts) 1919 { 1920 int idx = temp_idx(ts); 1921 1922 switch (ts->kind) { 1923 case TEMP_FIXED: 1924 case TEMP_GLOBAL: 1925 pstrcpy(buf, buf_size, ts->name); 1926 break; 1927 case TEMP_TB: 1928 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1929 break; 1930 case TEMP_EBB: 1931 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1932 break; 1933 case TEMP_CONST: 1934 switch (ts->type) { 1935 case TCG_TYPE_I32: 1936 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 1937 break; 1938 #if TCG_TARGET_REG_BITS > 32 1939 case TCG_TYPE_I64: 1940 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 1941 break; 1942 #endif 1943 case TCG_TYPE_V64: 1944 case TCG_TYPE_V128: 1945 case TCG_TYPE_V256: 1946 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 1947 64 << (ts->type - TCG_TYPE_V64), ts->val); 1948 break; 1949 default: 1950 g_assert_not_reached(); 1951 } 1952 break; 1953 } 1954 return buf; 1955 } 1956 1957 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1958 int buf_size, TCGArg arg) 1959 { 1960 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1961 } 1962 1963 static const char * const cond_name[] = 1964 { 1965 [TCG_COND_NEVER] = "never", 1966 [TCG_COND_ALWAYS] = "always", 1967 [TCG_COND_EQ] = "eq", 1968 [TCG_COND_NE] = "ne", 1969 [TCG_COND_LT] = "lt", 1970 [TCG_COND_GE] = "ge", 1971 [TCG_COND_LE] = "le", 1972 [TCG_COND_GT] = "gt", 1973 [TCG_COND_LTU] = "ltu", 1974 [TCG_COND_GEU] = "geu", 1975 [TCG_COND_LEU] = "leu", 1976 [TCG_COND_GTU] = "gtu" 1977 }; 1978 1979 static const char * const ldst_name[] = 1980 { 1981 [MO_UB] = "ub", 1982 [MO_SB] = "sb", 1983 [MO_LEUW] = "leuw", 1984 [MO_LESW] = "lesw", 1985 [MO_LEUL] = "leul", 1986 [MO_LESL] = "lesl", 1987 [MO_LEUQ] = "leq", 1988 [MO_BEUW] = "beuw", 1989 [MO_BESW] = "besw", 1990 [MO_BEUL] = "beul", 1991 [MO_BESL] = "besl", 1992 [MO_BEUQ] = "beq", 1993 }; 1994 1995 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1996 #ifdef TARGET_ALIGNED_ONLY 1997 [MO_UNALN >> MO_ASHIFT] = "un+", 1998 [MO_ALIGN >> MO_ASHIFT] = "", 1999 #else 2000 [MO_UNALN >> MO_ASHIFT] = "", 2001 [MO_ALIGN >> MO_ASHIFT] = "al+", 2002 #endif 2003 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2004 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2005 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2006 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2007 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2008 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2009 }; 2010 2011 static const char bswap_flag_name[][6] = { 2012 [TCG_BSWAP_IZ] = "iz", 2013 [TCG_BSWAP_OZ] = "oz", 2014 [TCG_BSWAP_OS] = "os", 2015 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2016 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2017 }; 2018 2019 static inline bool tcg_regset_single(TCGRegSet d) 2020 { 2021 return (d & (d - 1)) == 0; 2022 } 2023 2024 static inline TCGReg tcg_regset_first(TCGRegSet d) 2025 { 2026 if (TCG_TARGET_NB_REGS <= 32) { 2027 return ctz32(d); 2028 } else { 2029 return ctz64(d); 2030 } 2031 } 2032 2033 /* Return only the number of characters output -- no error return. */ 2034 #define ne_fprintf(...) \ 2035 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2036 2037 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2038 { 2039 char buf[128]; 2040 TCGOp *op; 2041 2042 QTAILQ_FOREACH(op, &s->ops, link) { 2043 int i, k, nb_oargs, nb_iargs, nb_cargs; 2044 const TCGOpDef *def; 2045 TCGOpcode c; 2046 int col = 0; 2047 2048 c = op->opc; 2049 def = &tcg_op_defs[c]; 2050 2051 if (c == INDEX_op_insn_start) { 2052 nb_oargs = 0; 2053 col += ne_fprintf(f, "\n ----"); 2054 2055 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2056 target_ulong a; 2057 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2058 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 2059 #else 2060 a = op->args[i]; 2061 #endif 2062 col += ne_fprintf(f, " " TARGET_FMT_lx, a); 2063 } 2064 } else if (c == INDEX_op_call) { 2065 const TCGHelperInfo *info = tcg_call_info(op); 2066 void *func = tcg_call_func(op); 2067 2068 /* variable number of arguments */ 2069 nb_oargs = TCGOP_CALLO(op); 2070 nb_iargs = TCGOP_CALLI(op); 2071 nb_cargs = def->nb_cargs; 2072 2073 col += ne_fprintf(f, " %s ", def->name); 2074 2075 /* 2076 * Print the function name from TCGHelperInfo, if available. 2077 * Note that plugins have a template function for the info, 2078 * but the actual function pointer comes from the plugin. 2079 */ 2080 if (func == info->func) { 2081 col += ne_fprintf(f, "%s", info->name); 2082 } else { 2083 col += ne_fprintf(f, "plugin(%p)", func); 2084 } 2085 2086 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2087 for (i = 0; i < nb_oargs; i++) { 2088 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2089 op->args[i])); 2090 } 2091 for (i = 0; i < nb_iargs; i++) { 2092 TCGArg arg = op->args[nb_oargs + i]; 2093 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2094 col += ne_fprintf(f, ",%s", t); 2095 } 2096 } else { 2097 col += ne_fprintf(f, " %s ", def->name); 2098 2099 nb_oargs = def->nb_oargs; 2100 nb_iargs = def->nb_iargs; 2101 nb_cargs = def->nb_cargs; 2102 2103 if (def->flags & TCG_OPF_VECTOR) { 2104 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 2105 8 << TCGOP_VECE(op)); 2106 } 2107 2108 k = 0; 2109 for (i = 0; i < nb_oargs; i++) { 2110 const char *sep = k ? "," : ""; 2111 col += ne_fprintf(f, "%s%s", sep, 2112 tcg_get_arg_str(s, buf, sizeof(buf), 2113 op->args[k++])); 2114 } 2115 for (i = 0; i < nb_iargs; i++) { 2116 const char *sep = k ? "," : ""; 2117 col += ne_fprintf(f, "%s%s", sep, 2118 tcg_get_arg_str(s, buf, sizeof(buf), 2119 op->args[k++])); 2120 } 2121 switch (c) { 2122 case INDEX_op_brcond_i32: 2123 case INDEX_op_setcond_i32: 2124 case INDEX_op_movcond_i32: 2125 case INDEX_op_brcond2_i32: 2126 case INDEX_op_setcond2_i32: 2127 case INDEX_op_brcond_i64: 2128 case INDEX_op_setcond_i64: 2129 case INDEX_op_movcond_i64: 2130 case INDEX_op_cmp_vec: 2131 case INDEX_op_cmpsel_vec: 2132 if (op->args[k] < ARRAY_SIZE(cond_name) 2133 && cond_name[op->args[k]]) { 2134 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2135 } else { 2136 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2137 } 2138 i = 1; 2139 break; 2140 case INDEX_op_qemu_ld_i32: 2141 case INDEX_op_qemu_st_i32: 2142 case INDEX_op_qemu_st8_i32: 2143 case INDEX_op_qemu_ld_i64: 2144 case INDEX_op_qemu_st_i64: 2145 { 2146 MemOpIdx oi = op->args[k++]; 2147 MemOp op = get_memop(oi); 2148 unsigned ix = get_mmuidx(oi); 2149 2150 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2151 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 2152 } else { 2153 const char *s_al, *s_op; 2154 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2155 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2156 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix); 2157 } 2158 i = 1; 2159 } 2160 break; 2161 case INDEX_op_bswap16_i32: 2162 case INDEX_op_bswap16_i64: 2163 case INDEX_op_bswap32_i32: 2164 case INDEX_op_bswap32_i64: 2165 case INDEX_op_bswap64_i64: 2166 { 2167 TCGArg flags = op->args[k]; 2168 const char *name = NULL; 2169 2170 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2171 name = bswap_flag_name[flags]; 2172 } 2173 if (name) { 2174 col += ne_fprintf(f, ",%s", name); 2175 } else { 2176 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2177 } 2178 i = k = 1; 2179 } 2180 break; 2181 default: 2182 i = 0; 2183 break; 2184 } 2185 switch (c) { 2186 case INDEX_op_set_label: 2187 case INDEX_op_br: 2188 case INDEX_op_brcond_i32: 2189 case INDEX_op_brcond_i64: 2190 case INDEX_op_brcond2_i32: 2191 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2192 arg_label(op->args[k])->id); 2193 i++, k++; 2194 break; 2195 default: 2196 break; 2197 } 2198 for (; i < nb_cargs; i++, k++) { 2199 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 2200 op->args[k]); 2201 } 2202 } 2203 2204 if (have_prefs || op->life) { 2205 for (; col < 40; ++col) { 2206 putc(' ', f); 2207 } 2208 } 2209 2210 if (op->life) { 2211 unsigned life = op->life; 2212 2213 if (life & (SYNC_ARG * 3)) { 2214 ne_fprintf(f, " sync:"); 2215 for (i = 0; i < 2; ++i) { 2216 if (life & (SYNC_ARG << i)) { 2217 ne_fprintf(f, " %d", i); 2218 } 2219 } 2220 } 2221 life /= DEAD_ARG; 2222 if (life) { 2223 ne_fprintf(f, " dead:"); 2224 for (i = 0; life; ++i, life >>= 1) { 2225 if (life & 1) { 2226 ne_fprintf(f, " %d", i); 2227 } 2228 } 2229 } 2230 } 2231 2232 if (have_prefs) { 2233 for (i = 0; i < nb_oargs; ++i) { 2234 TCGRegSet set = output_pref(op, i); 2235 2236 if (i == 0) { 2237 ne_fprintf(f, " pref="); 2238 } else { 2239 ne_fprintf(f, ","); 2240 } 2241 if (set == 0) { 2242 ne_fprintf(f, "none"); 2243 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2244 ne_fprintf(f, "all"); 2245 #ifdef CONFIG_DEBUG_TCG 2246 } else if (tcg_regset_single(set)) { 2247 TCGReg reg = tcg_regset_first(set); 2248 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 2249 #endif 2250 } else if (TCG_TARGET_NB_REGS <= 32) { 2251 ne_fprintf(f, "0x%x", (uint32_t)set); 2252 } else { 2253 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 2254 } 2255 } 2256 } 2257 2258 putc('\n', f); 2259 } 2260 } 2261 2262 /* we give more priority to constraints with less registers */ 2263 static int get_constraint_priority(const TCGOpDef *def, int k) 2264 { 2265 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2266 int n = ctpop64(arg_ct->regs); 2267 2268 /* 2269 * Sort constraints of a single register first, which includes output 2270 * aliases (which must exactly match the input already allocated). 2271 */ 2272 if (n == 1 || arg_ct->oalias) { 2273 return INT_MAX; 2274 } 2275 2276 /* 2277 * Sort register pairs next, first then second immediately after. 2278 * Arbitrarily sort multiple pairs by the index of the first reg; 2279 * there shouldn't be many pairs. 2280 */ 2281 switch (arg_ct->pair) { 2282 case 1: 2283 case 3: 2284 return (k + 1) * 2; 2285 case 2: 2286 return (arg_ct->pair_index + 1) * 2 - 1; 2287 } 2288 2289 /* Finally, sort by decreasing register count. */ 2290 assert(n > 1); 2291 return -n; 2292 } 2293 2294 /* sort from highest priority to lowest */ 2295 static void sort_constraints(TCGOpDef *def, int start, int n) 2296 { 2297 int i, j; 2298 TCGArgConstraint *a = def->args_ct; 2299 2300 for (i = 0; i < n; i++) { 2301 a[start + i].sort_index = start + i; 2302 } 2303 if (n <= 1) { 2304 return; 2305 } 2306 for (i = 0; i < n - 1; i++) { 2307 for (j = i + 1; j < n; j++) { 2308 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2309 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2310 if (p1 < p2) { 2311 int tmp = a[start + i].sort_index; 2312 a[start + i].sort_index = a[start + j].sort_index; 2313 a[start + j].sort_index = tmp; 2314 } 2315 } 2316 } 2317 } 2318 2319 static void process_op_defs(TCGContext *s) 2320 { 2321 TCGOpcode op; 2322 2323 for (op = 0; op < NB_OPS; op++) { 2324 TCGOpDef *def = &tcg_op_defs[op]; 2325 const TCGTargetOpDef *tdefs; 2326 bool saw_alias_pair = false; 2327 int i, o, i2, o2, nb_args; 2328 2329 if (def->flags & TCG_OPF_NOT_PRESENT) { 2330 continue; 2331 } 2332 2333 nb_args = def->nb_iargs + def->nb_oargs; 2334 if (nb_args == 0) { 2335 continue; 2336 } 2337 2338 /* 2339 * Macro magic should make it impossible, but double-check that 2340 * the array index is in range. Since the signness of an enum 2341 * is implementation defined, force the result to unsigned. 2342 */ 2343 unsigned con_set = tcg_target_op_def(op); 2344 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2345 tdefs = &constraint_sets[con_set]; 2346 2347 for (i = 0; i < nb_args; i++) { 2348 const char *ct_str = tdefs->args_ct_str[i]; 2349 bool input_p = i >= def->nb_oargs; 2350 2351 /* Incomplete TCGTargetOpDef entry. */ 2352 tcg_debug_assert(ct_str != NULL); 2353 2354 switch (*ct_str) { 2355 case '0' ... '9': 2356 o = *ct_str - '0'; 2357 tcg_debug_assert(input_p); 2358 tcg_debug_assert(o < def->nb_oargs); 2359 tcg_debug_assert(def->args_ct[o].regs != 0); 2360 tcg_debug_assert(!def->args_ct[o].oalias); 2361 def->args_ct[i] = def->args_ct[o]; 2362 /* The output sets oalias. */ 2363 def->args_ct[o].oalias = 1; 2364 def->args_ct[o].alias_index = i; 2365 /* The input sets ialias. */ 2366 def->args_ct[i].ialias = 1; 2367 def->args_ct[i].alias_index = o; 2368 if (def->args_ct[i].pair) { 2369 saw_alias_pair = true; 2370 } 2371 tcg_debug_assert(ct_str[1] == '\0'); 2372 continue; 2373 2374 case '&': 2375 tcg_debug_assert(!input_p); 2376 def->args_ct[i].newreg = true; 2377 ct_str++; 2378 break; 2379 2380 case 'p': /* plus */ 2381 /* Allocate to the register after the previous. */ 2382 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2383 o = i - 1; 2384 tcg_debug_assert(!def->args_ct[o].pair); 2385 tcg_debug_assert(!def->args_ct[o].ct); 2386 def->args_ct[i] = (TCGArgConstraint){ 2387 .pair = 2, 2388 .pair_index = o, 2389 .regs = def->args_ct[o].regs << 1, 2390 }; 2391 def->args_ct[o].pair = 1; 2392 def->args_ct[o].pair_index = i; 2393 tcg_debug_assert(ct_str[1] == '\0'); 2394 continue; 2395 2396 case 'm': /* minus */ 2397 /* Allocate to the register before the previous. */ 2398 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2399 o = i - 1; 2400 tcg_debug_assert(!def->args_ct[o].pair); 2401 tcg_debug_assert(!def->args_ct[o].ct); 2402 def->args_ct[i] = (TCGArgConstraint){ 2403 .pair = 1, 2404 .pair_index = o, 2405 .regs = def->args_ct[o].regs >> 1, 2406 }; 2407 def->args_ct[o].pair = 2; 2408 def->args_ct[o].pair_index = i; 2409 tcg_debug_assert(ct_str[1] == '\0'); 2410 continue; 2411 } 2412 2413 do { 2414 switch (*ct_str) { 2415 case 'i': 2416 def->args_ct[i].ct |= TCG_CT_CONST; 2417 break; 2418 2419 /* Include all of the target-specific constraints. */ 2420 2421 #undef CONST 2422 #define CONST(CASE, MASK) \ 2423 case CASE: def->args_ct[i].ct |= MASK; break; 2424 #define REGS(CASE, MASK) \ 2425 case CASE: def->args_ct[i].regs |= MASK; break; 2426 2427 #include "tcg-target-con-str.h" 2428 2429 #undef REGS 2430 #undef CONST 2431 default: 2432 case '0' ... '9': 2433 case '&': 2434 case 'p': 2435 case 'm': 2436 /* Typo in TCGTargetOpDef constraint. */ 2437 g_assert_not_reached(); 2438 } 2439 } while (*++ct_str != '\0'); 2440 } 2441 2442 /* TCGTargetOpDef entry with too much information? */ 2443 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2444 2445 /* 2446 * Fix up output pairs that are aliased with inputs. 2447 * When we created the alias, we copied pair from the output. 2448 * There are three cases: 2449 * (1a) Pairs of inputs alias pairs of outputs. 2450 * (1b) One input aliases the first of a pair of outputs. 2451 * (2) One input aliases the second of a pair of outputs. 2452 * 2453 * Case 1a is handled by making sure that the pair_index'es are 2454 * properly updated so that they appear the same as a pair of inputs. 2455 * 2456 * Case 1b is handled by setting the pair_index of the input to 2457 * itself, simply so it doesn't point to an unrelated argument. 2458 * Since we don't encounter the "second" during the input allocation 2459 * phase, nothing happens with the second half of the input pair. 2460 * 2461 * Case 2 is handled by setting the second input to pair=3, the 2462 * first output to pair=3, and the pair_index'es to match. 2463 */ 2464 if (saw_alias_pair) { 2465 for (i = def->nb_oargs; i < nb_args; i++) { 2466 /* 2467 * Since [0-9pm] must be alone in the constraint string, 2468 * the only way they can both be set is if the pair comes 2469 * from the output alias. 2470 */ 2471 if (!def->args_ct[i].ialias) { 2472 continue; 2473 } 2474 switch (def->args_ct[i].pair) { 2475 case 0: 2476 break; 2477 case 1: 2478 o = def->args_ct[i].alias_index; 2479 o2 = def->args_ct[o].pair_index; 2480 tcg_debug_assert(def->args_ct[o].pair == 1); 2481 tcg_debug_assert(def->args_ct[o2].pair == 2); 2482 if (def->args_ct[o2].oalias) { 2483 /* Case 1a */ 2484 i2 = def->args_ct[o2].alias_index; 2485 tcg_debug_assert(def->args_ct[i2].pair == 2); 2486 def->args_ct[i2].pair_index = i; 2487 def->args_ct[i].pair_index = i2; 2488 } else { 2489 /* Case 1b */ 2490 def->args_ct[i].pair_index = i; 2491 } 2492 break; 2493 case 2: 2494 o = def->args_ct[i].alias_index; 2495 o2 = def->args_ct[o].pair_index; 2496 tcg_debug_assert(def->args_ct[o].pair == 2); 2497 tcg_debug_assert(def->args_ct[o2].pair == 1); 2498 if (def->args_ct[o2].oalias) { 2499 /* Case 1a */ 2500 i2 = def->args_ct[o2].alias_index; 2501 tcg_debug_assert(def->args_ct[i2].pair == 1); 2502 def->args_ct[i2].pair_index = i; 2503 def->args_ct[i].pair_index = i2; 2504 } else { 2505 /* Case 2 */ 2506 def->args_ct[i].pair = 3; 2507 def->args_ct[o2].pair = 3; 2508 def->args_ct[i].pair_index = o2; 2509 def->args_ct[o2].pair_index = i; 2510 } 2511 break; 2512 default: 2513 g_assert_not_reached(); 2514 } 2515 } 2516 } 2517 2518 /* sort the constraints (XXX: this is just an heuristic) */ 2519 sort_constraints(def, 0, def->nb_oargs); 2520 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2521 } 2522 } 2523 2524 static void remove_label_use(TCGOp *op, int idx) 2525 { 2526 TCGLabel *label = arg_label(op->args[idx]); 2527 TCGLabelUse *use; 2528 2529 QSIMPLEQ_FOREACH(use, &label->branches, next) { 2530 if (use->op == op) { 2531 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 2532 return; 2533 } 2534 } 2535 g_assert_not_reached(); 2536 } 2537 2538 void tcg_op_remove(TCGContext *s, TCGOp *op) 2539 { 2540 switch (op->opc) { 2541 case INDEX_op_br: 2542 remove_label_use(op, 0); 2543 break; 2544 case INDEX_op_brcond_i32: 2545 case INDEX_op_brcond_i64: 2546 remove_label_use(op, 3); 2547 break; 2548 case INDEX_op_brcond2_i32: 2549 remove_label_use(op, 5); 2550 break; 2551 default: 2552 break; 2553 } 2554 2555 QTAILQ_REMOVE(&s->ops, op, link); 2556 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2557 s->nb_ops--; 2558 2559 #ifdef CONFIG_PROFILER 2560 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2561 #endif 2562 } 2563 2564 void tcg_remove_ops_after(TCGOp *op) 2565 { 2566 TCGContext *s = tcg_ctx; 2567 2568 while (true) { 2569 TCGOp *last = tcg_last_op(); 2570 if (last == op) { 2571 return; 2572 } 2573 tcg_op_remove(s, last); 2574 } 2575 } 2576 2577 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 2578 { 2579 TCGContext *s = tcg_ctx; 2580 TCGOp *op = NULL; 2581 2582 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 2583 QTAILQ_FOREACH(op, &s->free_ops, link) { 2584 if (nargs <= op->nargs) { 2585 QTAILQ_REMOVE(&s->free_ops, op, link); 2586 nargs = op->nargs; 2587 goto found; 2588 } 2589 } 2590 } 2591 2592 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 2593 nargs = MAX(4, nargs); 2594 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 2595 2596 found: 2597 memset(op, 0, offsetof(TCGOp, link)); 2598 op->opc = opc; 2599 op->nargs = nargs; 2600 2601 /* Check for bitfield overflow. */ 2602 tcg_debug_assert(op->nargs == nargs); 2603 2604 s->nb_ops++; 2605 return op; 2606 } 2607 2608 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 2609 { 2610 TCGOp *op = tcg_op_alloc(opc, nargs); 2611 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2612 return op; 2613 } 2614 2615 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 2616 TCGOpcode opc, unsigned nargs) 2617 { 2618 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2619 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2620 return new_op; 2621 } 2622 2623 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 2624 TCGOpcode opc, unsigned nargs) 2625 { 2626 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2627 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2628 return new_op; 2629 } 2630 2631 static void move_label_uses(TCGLabel *to, TCGLabel *from) 2632 { 2633 TCGLabelUse *u; 2634 2635 QSIMPLEQ_FOREACH(u, &from->branches, next) { 2636 TCGOp *op = u->op; 2637 switch (op->opc) { 2638 case INDEX_op_br: 2639 op->args[0] = label_arg(to); 2640 break; 2641 case INDEX_op_brcond_i32: 2642 case INDEX_op_brcond_i64: 2643 op->args[3] = label_arg(to); 2644 break; 2645 case INDEX_op_brcond2_i32: 2646 op->args[5] = label_arg(to); 2647 break; 2648 default: 2649 g_assert_not_reached(); 2650 } 2651 } 2652 2653 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 2654 } 2655 2656 /* Reachable analysis : remove unreachable code. */ 2657 static void __attribute__((noinline)) 2658 reachable_code_pass(TCGContext *s) 2659 { 2660 TCGOp *op, *op_next, *op_prev; 2661 bool dead = false; 2662 2663 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2664 bool remove = dead; 2665 TCGLabel *label; 2666 2667 switch (op->opc) { 2668 case INDEX_op_set_label: 2669 label = arg_label(op->args[0]); 2670 2671 /* 2672 * Note that the first op in the TB is always a load, 2673 * so there is always something before a label. 2674 */ 2675 op_prev = QTAILQ_PREV(op, link); 2676 2677 /* 2678 * If we find two sequential labels, move all branches to 2679 * reference the second label and remove the first label. 2680 * Do this before branch to next optimization, so that the 2681 * middle label is out of the way. 2682 */ 2683 if (op_prev->opc == INDEX_op_set_label) { 2684 move_label_uses(label, arg_label(op_prev->args[0])); 2685 tcg_op_remove(s, op_prev); 2686 op_prev = QTAILQ_PREV(op, link); 2687 } 2688 2689 /* 2690 * Optimization can fold conditional branches to unconditional. 2691 * If we find a label which is preceded by an unconditional 2692 * branch to next, remove the branch. We couldn't do this when 2693 * processing the branch because any dead code between the branch 2694 * and label had not yet been removed. 2695 */ 2696 if (op_prev->opc == INDEX_op_br && 2697 label == arg_label(op_prev->args[0])) { 2698 tcg_op_remove(s, op_prev); 2699 /* Fall through means insns become live again. */ 2700 dead = false; 2701 } 2702 2703 if (QSIMPLEQ_EMPTY(&label->branches)) { 2704 /* 2705 * While there is an occasional backward branch, virtually 2706 * all branches generated by the translators are forward. 2707 * Which means that generally we will have already removed 2708 * all references to the label that will be, and there is 2709 * little to be gained by iterating. 2710 */ 2711 remove = true; 2712 } else { 2713 /* Once we see a label, insns become live again. */ 2714 dead = false; 2715 remove = false; 2716 } 2717 break; 2718 2719 case INDEX_op_br: 2720 case INDEX_op_exit_tb: 2721 case INDEX_op_goto_ptr: 2722 /* Unconditional branches; everything following is dead. */ 2723 dead = true; 2724 break; 2725 2726 case INDEX_op_call: 2727 /* Notice noreturn helper calls, raising exceptions. */ 2728 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 2729 dead = true; 2730 } 2731 break; 2732 2733 case INDEX_op_insn_start: 2734 /* Never remove -- we need to keep these for unwind. */ 2735 remove = false; 2736 break; 2737 2738 default: 2739 break; 2740 } 2741 2742 if (remove) { 2743 tcg_op_remove(s, op); 2744 } 2745 } 2746 } 2747 2748 #define TS_DEAD 1 2749 #define TS_MEM 2 2750 2751 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2752 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2753 2754 /* For liveness_pass_1, the register preferences for a given temp. */ 2755 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2756 { 2757 return ts->state_ptr; 2758 } 2759 2760 /* For liveness_pass_1, reset the preferences for a given temp to the 2761 * maximal regset for its type. 2762 */ 2763 static inline void la_reset_pref(TCGTemp *ts) 2764 { 2765 *la_temp_pref(ts) 2766 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2767 } 2768 2769 /* liveness analysis: end of function: all temps are dead, and globals 2770 should be in memory. */ 2771 static void la_func_end(TCGContext *s, int ng, int nt) 2772 { 2773 int i; 2774 2775 for (i = 0; i < ng; ++i) { 2776 s->temps[i].state = TS_DEAD | TS_MEM; 2777 la_reset_pref(&s->temps[i]); 2778 } 2779 for (i = ng; i < nt; ++i) { 2780 s->temps[i].state = TS_DEAD; 2781 la_reset_pref(&s->temps[i]); 2782 } 2783 } 2784 2785 /* liveness analysis: end of basic block: all temps are dead, globals 2786 and local temps should be in memory. */ 2787 static void la_bb_end(TCGContext *s, int ng, int nt) 2788 { 2789 int i; 2790 2791 for (i = 0; i < nt; ++i) { 2792 TCGTemp *ts = &s->temps[i]; 2793 int state; 2794 2795 switch (ts->kind) { 2796 case TEMP_FIXED: 2797 case TEMP_GLOBAL: 2798 case TEMP_TB: 2799 state = TS_DEAD | TS_MEM; 2800 break; 2801 case TEMP_EBB: 2802 case TEMP_CONST: 2803 state = TS_DEAD; 2804 break; 2805 default: 2806 g_assert_not_reached(); 2807 } 2808 ts->state = state; 2809 la_reset_pref(ts); 2810 } 2811 } 2812 2813 /* liveness analysis: sync globals back to memory. */ 2814 static void la_global_sync(TCGContext *s, int ng) 2815 { 2816 int i; 2817 2818 for (i = 0; i < ng; ++i) { 2819 int state = s->temps[i].state; 2820 s->temps[i].state = state | TS_MEM; 2821 if (state == TS_DEAD) { 2822 /* If the global was previously dead, reset prefs. */ 2823 la_reset_pref(&s->temps[i]); 2824 } 2825 } 2826 } 2827 2828 /* 2829 * liveness analysis: conditional branch: all temps are dead unless 2830 * explicitly live-across-conditional-branch, globals and local temps 2831 * should be synced. 2832 */ 2833 static void la_bb_sync(TCGContext *s, int ng, int nt) 2834 { 2835 la_global_sync(s, ng); 2836 2837 for (int i = ng; i < nt; ++i) { 2838 TCGTemp *ts = &s->temps[i]; 2839 int state; 2840 2841 switch (ts->kind) { 2842 case TEMP_TB: 2843 state = ts->state; 2844 ts->state = state | TS_MEM; 2845 if (state != TS_DEAD) { 2846 continue; 2847 } 2848 break; 2849 case TEMP_EBB: 2850 case TEMP_CONST: 2851 continue; 2852 default: 2853 g_assert_not_reached(); 2854 } 2855 la_reset_pref(&s->temps[i]); 2856 } 2857 } 2858 2859 /* liveness analysis: sync globals back to memory and kill. */ 2860 static void la_global_kill(TCGContext *s, int ng) 2861 { 2862 int i; 2863 2864 for (i = 0; i < ng; i++) { 2865 s->temps[i].state = TS_DEAD | TS_MEM; 2866 la_reset_pref(&s->temps[i]); 2867 } 2868 } 2869 2870 /* liveness analysis: note live globals crossing calls. */ 2871 static void la_cross_call(TCGContext *s, int nt) 2872 { 2873 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2874 int i; 2875 2876 for (i = 0; i < nt; i++) { 2877 TCGTemp *ts = &s->temps[i]; 2878 if (!(ts->state & TS_DEAD)) { 2879 TCGRegSet *pset = la_temp_pref(ts); 2880 TCGRegSet set = *pset; 2881 2882 set &= mask; 2883 /* If the combination is not possible, restart. */ 2884 if (set == 0) { 2885 set = tcg_target_available_regs[ts->type] & mask; 2886 } 2887 *pset = set; 2888 } 2889 } 2890 } 2891 2892 /* 2893 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 2894 * to TEMP_EBB, if possible. 2895 */ 2896 static void __attribute__((noinline)) 2897 liveness_pass_0(TCGContext *s) 2898 { 2899 void * const multiple_ebb = (void *)(uintptr_t)-1; 2900 int nb_temps = s->nb_temps; 2901 TCGOp *op, *ebb; 2902 2903 for (int i = s->nb_globals; i < nb_temps; ++i) { 2904 s->temps[i].state_ptr = NULL; 2905 } 2906 2907 /* 2908 * Represent each EBB by the op at which it begins. In the case of 2909 * the first EBB, this is the first op, otherwise it is a label. 2910 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 2911 * within a single EBB, else MULTIPLE_EBB. 2912 */ 2913 ebb = QTAILQ_FIRST(&s->ops); 2914 QTAILQ_FOREACH(op, &s->ops, link) { 2915 const TCGOpDef *def; 2916 int nb_oargs, nb_iargs; 2917 2918 switch (op->opc) { 2919 case INDEX_op_set_label: 2920 ebb = op; 2921 continue; 2922 case INDEX_op_discard: 2923 continue; 2924 case INDEX_op_call: 2925 nb_oargs = TCGOP_CALLO(op); 2926 nb_iargs = TCGOP_CALLI(op); 2927 break; 2928 default: 2929 def = &tcg_op_defs[op->opc]; 2930 nb_oargs = def->nb_oargs; 2931 nb_iargs = def->nb_iargs; 2932 break; 2933 } 2934 2935 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 2936 TCGTemp *ts = arg_temp(op->args[i]); 2937 2938 if (ts->kind != TEMP_TB) { 2939 continue; 2940 } 2941 if (ts->state_ptr == NULL) { 2942 ts->state_ptr = ebb; 2943 } else if (ts->state_ptr != ebb) { 2944 ts->state_ptr = multiple_ebb; 2945 } 2946 } 2947 } 2948 2949 /* 2950 * For TEMP_TB that turned out not to be used beyond one EBB, 2951 * reduce the liveness to TEMP_EBB. 2952 */ 2953 for (int i = s->nb_globals; i < nb_temps; ++i) { 2954 TCGTemp *ts = &s->temps[i]; 2955 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 2956 ts->kind = TEMP_EBB; 2957 } 2958 } 2959 } 2960 2961 /* Liveness analysis : update the opc_arg_life array to tell if a 2962 given input arguments is dead. Instructions updating dead 2963 temporaries are removed. */ 2964 static void __attribute__((noinline)) 2965 liveness_pass_1(TCGContext *s) 2966 { 2967 int nb_globals = s->nb_globals; 2968 int nb_temps = s->nb_temps; 2969 TCGOp *op, *op_prev; 2970 TCGRegSet *prefs; 2971 int i; 2972 2973 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2974 for (i = 0; i < nb_temps; ++i) { 2975 s->temps[i].state_ptr = prefs + i; 2976 } 2977 2978 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2979 la_func_end(s, nb_globals, nb_temps); 2980 2981 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2982 int nb_iargs, nb_oargs; 2983 TCGOpcode opc_new, opc_new2; 2984 bool have_opc_new2; 2985 TCGLifeData arg_life = 0; 2986 TCGTemp *ts; 2987 TCGOpcode opc = op->opc; 2988 const TCGOpDef *def = &tcg_op_defs[opc]; 2989 2990 switch (opc) { 2991 case INDEX_op_call: 2992 { 2993 const TCGHelperInfo *info = tcg_call_info(op); 2994 int call_flags = tcg_call_flags(op); 2995 2996 nb_oargs = TCGOP_CALLO(op); 2997 nb_iargs = TCGOP_CALLI(op); 2998 2999 /* pure functions can be removed if their result is unused */ 3000 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3001 for (i = 0; i < nb_oargs; i++) { 3002 ts = arg_temp(op->args[i]); 3003 if (ts->state != TS_DEAD) { 3004 goto do_not_remove_call; 3005 } 3006 } 3007 goto do_remove; 3008 } 3009 do_not_remove_call: 3010 3011 /* Output args are dead. */ 3012 for (i = 0; i < nb_oargs; i++) { 3013 ts = arg_temp(op->args[i]); 3014 if (ts->state & TS_DEAD) { 3015 arg_life |= DEAD_ARG << i; 3016 } 3017 if (ts->state & TS_MEM) { 3018 arg_life |= SYNC_ARG << i; 3019 } 3020 ts->state = TS_DEAD; 3021 la_reset_pref(ts); 3022 } 3023 3024 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3025 memset(op->output_pref, 0, sizeof(op->output_pref)); 3026 3027 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3028 TCG_CALL_NO_READ_GLOBALS))) { 3029 la_global_kill(s, nb_globals); 3030 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3031 la_global_sync(s, nb_globals); 3032 } 3033 3034 /* Record arguments that die in this helper. */ 3035 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3036 ts = arg_temp(op->args[i]); 3037 if (ts->state & TS_DEAD) { 3038 arg_life |= DEAD_ARG << i; 3039 } 3040 } 3041 3042 /* For all live registers, remove call-clobbered prefs. */ 3043 la_cross_call(s, nb_temps); 3044 3045 /* 3046 * Input arguments are live for preceding opcodes. 3047 * 3048 * For those arguments that die, and will be allocated in 3049 * registers, clear the register set for that arg, to be 3050 * filled in below. For args that will be on the stack, 3051 * reset to any available reg. Process arguments in reverse 3052 * order so that if a temp is used more than once, the stack 3053 * reset to max happens before the register reset to 0. 3054 */ 3055 for (i = nb_iargs - 1; i >= 0; i--) { 3056 const TCGCallArgumentLoc *loc = &info->in[i]; 3057 ts = arg_temp(op->args[nb_oargs + i]); 3058 3059 if (ts->state & TS_DEAD) { 3060 switch (loc->kind) { 3061 case TCG_CALL_ARG_NORMAL: 3062 case TCG_CALL_ARG_EXTEND_U: 3063 case TCG_CALL_ARG_EXTEND_S: 3064 if (REG_P(loc)) { 3065 *la_temp_pref(ts) = 0; 3066 break; 3067 } 3068 /* fall through */ 3069 default: 3070 *la_temp_pref(ts) = 3071 tcg_target_available_regs[ts->type]; 3072 break; 3073 } 3074 ts->state &= ~TS_DEAD; 3075 } 3076 } 3077 3078 /* 3079 * For each input argument, add its input register to prefs. 3080 * If a temp is used once, this produces a single set bit; 3081 * if a temp is used multiple times, this produces a set. 3082 */ 3083 for (i = 0; i < nb_iargs; i++) { 3084 const TCGCallArgumentLoc *loc = &info->in[i]; 3085 ts = arg_temp(op->args[nb_oargs + i]); 3086 3087 switch (loc->kind) { 3088 case TCG_CALL_ARG_NORMAL: 3089 case TCG_CALL_ARG_EXTEND_U: 3090 case TCG_CALL_ARG_EXTEND_S: 3091 if (REG_P(loc)) { 3092 tcg_regset_set_reg(*la_temp_pref(ts), 3093 tcg_target_call_iarg_regs[loc->arg_slot]); 3094 } 3095 break; 3096 default: 3097 break; 3098 } 3099 } 3100 } 3101 break; 3102 case INDEX_op_insn_start: 3103 break; 3104 case INDEX_op_discard: 3105 /* mark the temporary as dead */ 3106 ts = arg_temp(op->args[0]); 3107 ts->state = TS_DEAD; 3108 la_reset_pref(ts); 3109 break; 3110 3111 case INDEX_op_add2_i32: 3112 opc_new = INDEX_op_add_i32; 3113 goto do_addsub2; 3114 case INDEX_op_sub2_i32: 3115 opc_new = INDEX_op_sub_i32; 3116 goto do_addsub2; 3117 case INDEX_op_add2_i64: 3118 opc_new = INDEX_op_add_i64; 3119 goto do_addsub2; 3120 case INDEX_op_sub2_i64: 3121 opc_new = INDEX_op_sub_i64; 3122 do_addsub2: 3123 nb_iargs = 4; 3124 nb_oargs = 2; 3125 /* Test if the high part of the operation is dead, but not 3126 the low part. The result can be optimized to a simple 3127 add or sub. This happens often for x86_64 guest when the 3128 cpu mode is set to 32 bit. */ 3129 if (arg_temp(op->args[1])->state == TS_DEAD) { 3130 if (arg_temp(op->args[0])->state == TS_DEAD) { 3131 goto do_remove; 3132 } 3133 /* Replace the opcode and adjust the args in place, 3134 leaving 3 unused args at the end. */ 3135 op->opc = opc = opc_new; 3136 op->args[1] = op->args[2]; 3137 op->args[2] = op->args[4]; 3138 /* Fall through and mark the single-word operation live. */ 3139 nb_iargs = 2; 3140 nb_oargs = 1; 3141 } 3142 goto do_not_remove; 3143 3144 case INDEX_op_mulu2_i32: 3145 opc_new = INDEX_op_mul_i32; 3146 opc_new2 = INDEX_op_muluh_i32; 3147 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 3148 goto do_mul2; 3149 case INDEX_op_muls2_i32: 3150 opc_new = INDEX_op_mul_i32; 3151 opc_new2 = INDEX_op_mulsh_i32; 3152 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 3153 goto do_mul2; 3154 case INDEX_op_mulu2_i64: 3155 opc_new = INDEX_op_mul_i64; 3156 opc_new2 = INDEX_op_muluh_i64; 3157 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 3158 goto do_mul2; 3159 case INDEX_op_muls2_i64: 3160 opc_new = INDEX_op_mul_i64; 3161 opc_new2 = INDEX_op_mulsh_i64; 3162 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 3163 goto do_mul2; 3164 do_mul2: 3165 nb_iargs = 2; 3166 nb_oargs = 2; 3167 if (arg_temp(op->args[1])->state == TS_DEAD) { 3168 if (arg_temp(op->args[0])->state == TS_DEAD) { 3169 /* Both parts of the operation are dead. */ 3170 goto do_remove; 3171 } 3172 /* The high part of the operation is dead; generate the low. */ 3173 op->opc = opc = opc_new; 3174 op->args[1] = op->args[2]; 3175 op->args[2] = op->args[3]; 3176 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 3177 /* The low part of the operation is dead; generate the high. */ 3178 op->opc = opc = opc_new2; 3179 op->args[0] = op->args[1]; 3180 op->args[1] = op->args[2]; 3181 op->args[2] = op->args[3]; 3182 } else { 3183 goto do_not_remove; 3184 } 3185 /* Mark the single-word operation live. */ 3186 nb_oargs = 1; 3187 goto do_not_remove; 3188 3189 default: 3190 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 3191 nb_iargs = def->nb_iargs; 3192 nb_oargs = def->nb_oargs; 3193 3194 /* Test if the operation can be removed because all 3195 its outputs are dead. We assume that nb_oargs == 0 3196 implies side effects */ 3197 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 3198 for (i = 0; i < nb_oargs; i++) { 3199 if (arg_temp(op->args[i])->state != TS_DEAD) { 3200 goto do_not_remove; 3201 } 3202 } 3203 goto do_remove; 3204 } 3205 goto do_not_remove; 3206 3207 do_remove: 3208 tcg_op_remove(s, op); 3209 break; 3210 3211 do_not_remove: 3212 for (i = 0; i < nb_oargs; i++) { 3213 ts = arg_temp(op->args[i]); 3214 3215 /* Remember the preference of the uses that followed. */ 3216 if (i < ARRAY_SIZE(op->output_pref)) { 3217 op->output_pref[i] = *la_temp_pref(ts); 3218 } 3219 3220 /* Output args are dead. */ 3221 if (ts->state & TS_DEAD) { 3222 arg_life |= DEAD_ARG << i; 3223 } 3224 if (ts->state & TS_MEM) { 3225 arg_life |= SYNC_ARG << i; 3226 } 3227 ts->state = TS_DEAD; 3228 la_reset_pref(ts); 3229 } 3230 3231 /* If end of basic block, update. */ 3232 if (def->flags & TCG_OPF_BB_EXIT) { 3233 la_func_end(s, nb_globals, nb_temps); 3234 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3235 la_bb_sync(s, nb_globals, nb_temps); 3236 } else if (def->flags & TCG_OPF_BB_END) { 3237 la_bb_end(s, nb_globals, nb_temps); 3238 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3239 la_global_sync(s, nb_globals); 3240 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3241 la_cross_call(s, nb_temps); 3242 } 3243 } 3244 3245 /* Record arguments that die in this opcode. */ 3246 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3247 ts = arg_temp(op->args[i]); 3248 if (ts->state & TS_DEAD) { 3249 arg_life |= DEAD_ARG << i; 3250 } 3251 } 3252 3253 /* Input arguments are live for preceding opcodes. */ 3254 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3255 ts = arg_temp(op->args[i]); 3256 if (ts->state & TS_DEAD) { 3257 /* For operands that were dead, initially allow 3258 all regs for the type. */ 3259 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3260 ts->state &= ~TS_DEAD; 3261 } 3262 } 3263 3264 /* Incorporate constraints for this operand. */ 3265 switch (opc) { 3266 case INDEX_op_mov_i32: 3267 case INDEX_op_mov_i64: 3268 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3269 have proper constraints. That said, special case 3270 moves to propagate preferences backward. */ 3271 if (IS_DEAD_ARG(1)) { 3272 *la_temp_pref(arg_temp(op->args[0])) 3273 = *la_temp_pref(arg_temp(op->args[1])); 3274 } 3275 break; 3276 3277 default: 3278 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3279 const TCGArgConstraint *ct = &def->args_ct[i]; 3280 TCGRegSet set, *pset; 3281 3282 ts = arg_temp(op->args[i]); 3283 pset = la_temp_pref(ts); 3284 set = *pset; 3285 3286 set &= ct->regs; 3287 if (ct->ialias) { 3288 set &= output_pref(op, ct->alias_index); 3289 } 3290 /* If the combination is not possible, restart. */ 3291 if (set == 0) { 3292 set = ct->regs; 3293 } 3294 *pset = set; 3295 } 3296 break; 3297 } 3298 break; 3299 } 3300 op->life = arg_life; 3301 } 3302 } 3303 3304 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3305 static bool __attribute__((noinline)) 3306 liveness_pass_2(TCGContext *s) 3307 { 3308 int nb_globals = s->nb_globals; 3309 int nb_temps, i; 3310 bool changes = false; 3311 TCGOp *op, *op_next; 3312 3313 /* Create a temporary for each indirect global. */ 3314 for (i = 0; i < nb_globals; ++i) { 3315 TCGTemp *its = &s->temps[i]; 3316 if (its->indirect_reg) { 3317 TCGTemp *dts = tcg_temp_alloc(s); 3318 dts->type = its->type; 3319 dts->base_type = its->base_type; 3320 dts->temp_subindex = its->temp_subindex; 3321 dts->kind = TEMP_EBB; 3322 its->state_ptr = dts; 3323 } else { 3324 its->state_ptr = NULL; 3325 } 3326 /* All globals begin dead. */ 3327 its->state = TS_DEAD; 3328 } 3329 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3330 TCGTemp *its = &s->temps[i]; 3331 its->state_ptr = NULL; 3332 its->state = TS_DEAD; 3333 } 3334 3335 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3336 TCGOpcode opc = op->opc; 3337 const TCGOpDef *def = &tcg_op_defs[opc]; 3338 TCGLifeData arg_life = op->life; 3339 int nb_iargs, nb_oargs, call_flags; 3340 TCGTemp *arg_ts, *dir_ts; 3341 3342 if (opc == INDEX_op_call) { 3343 nb_oargs = TCGOP_CALLO(op); 3344 nb_iargs = TCGOP_CALLI(op); 3345 call_flags = tcg_call_flags(op); 3346 } else { 3347 nb_iargs = def->nb_iargs; 3348 nb_oargs = def->nb_oargs; 3349 3350 /* Set flags similar to how calls require. */ 3351 if (def->flags & TCG_OPF_COND_BRANCH) { 3352 /* Like reading globals: sync_globals */ 3353 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3354 } else if (def->flags & TCG_OPF_BB_END) { 3355 /* Like writing globals: save_globals */ 3356 call_flags = 0; 3357 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3358 /* Like reading globals: sync_globals */ 3359 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3360 } else { 3361 /* No effect on globals. */ 3362 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3363 TCG_CALL_NO_WRITE_GLOBALS); 3364 } 3365 } 3366 3367 /* Make sure that input arguments are available. */ 3368 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3369 arg_ts = arg_temp(op->args[i]); 3370 dir_ts = arg_ts->state_ptr; 3371 if (dir_ts && arg_ts->state == TS_DEAD) { 3372 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3373 ? INDEX_op_ld_i32 3374 : INDEX_op_ld_i64); 3375 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 3376 3377 lop->args[0] = temp_arg(dir_ts); 3378 lop->args[1] = temp_arg(arg_ts->mem_base); 3379 lop->args[2] = arg_ts->mem_offset; 3380 3381 /* Loaded, but synced with memory. */ 3382 arg_ts->state = TS_MEM; 3383 } 3384 } 3385 3386 /* Perform input replacement, and mark inputs that became dead. 3387 No action is required except keeping temp_state up to date 3388 so that we reload when needed. */ 3389 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3390 arg_ts = arg_temp(op->args[i]); 3391 dir_ts = arg_ts->state_ptr; 3392 if (dir_ts) { 3393 op->args[i] = temp_arg(dir_ts); 3394 changes = true; 3395 if (IS_DEAD_ARG(i)) { 3396 arg_ts->state = TS_DEAD; 3397 } 3398 } 3399 } 3400 3401 /* Liveness analysis should ensure that the following are 3402 all correct, for call sites and basic block end points. */ 3403 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3404 /* Nothing to do */ 3405 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3406 for (i = 0; i < nb_globals; ++i) { 3407 /* Liveness should see that globals are synced back, 3408 that is, either TS_DEAD or TS_MEM. */ 3409 arg_ts = &s->temps[i]; 3410 tcg_debug_assert(arg_ts->state_ptr == 0 3411 || arg_ts->state != 0); 3412 } 3413 } else { 3414 for (i = 0; i < nb_globals; ++i) { 3415 /* Liveness should see that globals are saved back, 3416 that is, TS_DEAD, waiting to be reloaded. */ 3417 arg_ts = &s->temps[i]; 3418 tcg_debug_assert(arg_ts->state_ptr == 0 3419 || arg_ts->state == TS_DEAD); 3420 } 3421 } 3422 3423 /* Outputs become available. */ 3424 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3425 arg_ts = arg_temp(op->args[0]); 3426 dir_ts = arg_ts->state_ptr; 3427 if (dir_ts) { 3428 op->args[0] = temp_arg(dir_ts); 3429 changes = true; 3430 3431 /* The output is now live and modified. */ 3432 arg_ts->state = 0; 3433 3434 if (NEED_SYNC_ARG(0)) { 3435 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3436 ? INDEX_op_st_i32 3437 : INDEX_op_st_i64); 3438 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3439 TCGTemp *out_ts = dir_ts; 3440 3441 if (IS_DEAD_ARG(0)) { 3442 out_ts = arg_temp(op->args[1]); 3443 arg_ts->state = TS_DEAD; 3444 tcg_op_remove(s, op); 3445 } else { 3446 arg_ts->state = TS_MEM; 3447 } 3448 3449 sop->args[0] = temp_arg(out_ts); 3450 sop->args[1] = temp_arg(arg_ts->mem_base); 3451 sop->args[2] = arg_ts->mem_offset; 3452 } else { 3453 tcg_debug_assert(!IS_DEAD_ARG(0)); 3454 } 3455 } 3456 } else { 3457 for (i = 0; i < nb_oargs; i++) { 3458 arg_ts = arg_temp(op->args[i]); 3459 dir_ts = arg_ts->state_ptr; 3460 if (!dir_ts) { 3461 continue; 3462 } 3463 op->args[i] = temp_arg(dir_ts); 3464 changes = true; 3465 3466 /* The output is now live and modified. */ 3467 arg_ts->state = 0; 3468 3469 /* Sync outputs upon their last write. */ 3470 if (NEED_SYNC_ARG(i)) { 3471 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3472 ? INDEX_op_st_i32 3473 : INDEX_op_st_i64); 3474 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3475 3476 sop->args[0] = temp_arg(dir_ts); 3477 sop->args[1] = temp_arg(arg_ts->mem_base); 3478 sop->args[2] = arg_ts->mem_offset; 3479 3480 arg_ts->state = TS_MEM; 3481 } 3482 /* Drop outputs that are dead. */ 3483 if (IS_DEAD_ARG(i)) { 3484 arg_ts->state = TS_DEAD; 3485 } 3486 } 3487 } 3488 } 3489 3490 return changes; 3491 } 3492 3493 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3494 { 3495 intptr_t off; 3496 int size, align; 3497 3498 /* When allocating an object, look at the full type. */ 3499 size = tcg_type_size(ts->base_type); 3500 switch (ts->base_type) { 3501 case TCG_TYPE_I32: 3502 align = 4; 3503 break; 3504 case TCG_TYPE_I64: 3505 case TCG_TYPE_V64: 3506 align = 8; 3507 break; 3508 case TCG_TYPE_I128: 3509 case TCG_TYPE_V128: 3510 case TCG_TYPE_V256: 3511 /* 3512 * Note that we do not require aligned storage for V256, 3513 * and that we provide alignment for I128 to match V128, 3514 * even if that's above what the host ABI requires. 3515 */ 3516 align = 16; 3517 break; 3518 default: 3519 g_assert_not_reached(); 3520 } 3521 3522 /* 3523 * Assume the stack is sufficiently aligned. 3524 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3525 * and do not require 16 byte vector alignment. This seems slightly 3526 * easier than fully parameterizing the above switch statement. 3527 */ 3528 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3529 off = ROUND_UP(s->current_frame_offset, align); 3530 3531 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3532 if (off + size > s->frame_end) { 3533 tcg_raise_tb_overflow(s); 3534 } 3535 s->current_frame_offset = off + size; 3536 #if defined(__sparc__) 3537 off += TCG_TARGET_STACK_BIAS; 3538 #endif 3539 3540 /* If the object was subdivided, assign memory to all the parts. */ 3541 if (ts->base_type != ts->type) { 3542 int part_size = tcg_type_size(ts->type); 3543 int part_count = size / part_size; 3544 3545 /* 3546 * Each part is allocated sequentially in tcg_temp_new_internal. 3547 * Jump back to the first part by subtracting the current index. 3548 */ 3549 ts -= ts->temp_subindex; 3550 for (int i = 0; i < part_count; ++i) { 3551 ts[i].mem_offset = off + i * part_size; 3552 ts[i].mem_base = s->frame_temp; 3553 ts[i].mem_allocated = 1; 3554 } 3555 } else { 3556 ts->mem_offset = off; 3557 ts->mem_base = s->frame_temp; 3558 ts->mem_allocated = 1; 3559 } 3560 } 3561 3562 /* Assign @reg to @ts, and update reg_to_temp[]. */ 3563 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 3564 { 3565 if (ts->val_type == TEMP_VAL_REG) { 3566 TCGReg old = ts->reg; 3567 tcg_debug_assert(s->reg_to_temp[old] == ts); 3568 if (old == reg) { 3569 return; 3570 } 3571 s->reg_to_temp[old] = NULL; 3572 } 3573 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3574 s->reg_to_temp[reg] = ts; 3575 ts->val_type = TEMP_VAL_REG; 3576 ts->reg = reg; 3577 } 3578 3579 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 3580 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 3581 { 3582 tcg_debug_assert(type != TEMP_VAL_REG); 3583 if (ts->val_type == TEMP_VAL_REG) { 3584 TCGReg reg = ts->reg; 3585 tcg_debug_assert(s->reg_to_temp[reg] == ts); 3586 s->reg_to_temp[reg] = NULL; 3587 } 3588 ts->val_type = type; 3589 } 3590 3591 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3592 3593 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3594 mark it free; otherwise mark it dead. */ 3595 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3596 { 3597 TCGTempVal new_type; 3598 3599 switch (ts->kind) { 3600 case TEMP_FIXED: 3601 return; 3602 case TEMP_GLOBAL: 3603 case TEMP_TB: 3604 new_type = TEMP_VAL_MEM; 3605 break; 3606 case TEMP_EBB: 3607 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3608 break; 3609 case TEMP_CONST: 3610 new_type = TEMP_VAL_CONST; 3611 break; 3612 default: 3613 g_assert_not_reached(); 3614 } 3615 set_temp_val_nonreg(s, ts, new_type); 3616 } 3617 3618 /* Mark a temporary as dead. */ 3619 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3620 { 3621 temp_free_or_dead(s, ts, 1); 3622 } 3623 3624 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3625 registers needs to be allocated to store a constant. If 'free_or_dead' 3626 is non-zero, subsequently release the temporary; if it is positive, the 3627 temp is dead; if it is negative, the temp is free. */ 3628 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3629 TCGRegSet preferred_regs, int free_or_dead) 3630 { 3631 if (!temp_readonly(ts) && !ts->mem_coherent) { 3632 if (!ts->mem_allocated) { 3633 temp_allocate_frame(s, ts); 3634 } 3635 switch (ts->val_type) { 3636 case TEMP_VAL_CONST: 3637 /* If we're going to free the temp immediately, then we won't 3638 require it later in a register, so attempt to store the 3639 constant to memory directly. */ 3640 if (free_or_dead 3641 && tcg_out_sti(s, ts->type, ts->val, 3642 ts->mem_base->reg, ts->mem_offset)) { 3643 break; 3644 } 3645 temp_load(s, ts, tcg_target_available_regs[ts->type], 3646 allocated_regs, preferred_regs); 3647 /* fallthrough */ 3648 3649 case TEMP_VAL_REG: 3650 tcg_out_st(s, ts->type, ts->reg, 3651 ts->mem_base->reg, ts->mem_offset); 3652 break; 3653 3654 case TEMP_VAL_MEM: 3655 break; 3656 3657 case TEMP_VAL_DEAD: 3658 default: 3659 tcg_abort(); 3660 } 3661 ts->mem_coherent = 1; 3662 } 3663 if (free_or_dead) { 3664 temp_free_or_dead(s, ts, free_or_dead); 3665 } 3666 } 3667 3668 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3669 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3670 { 3671 TCGTemp *ts = s->reg_to_temp[reg]; 3672 if (ts != NULL) { 3673 temp_sync(s, ts, allocated_regs, 0, -1); 3674 } 3675 } 3676 3677 /** 3678 * tcg_reg_alloc: 3679 * @required_regs: Set of registers in which we must allocate. 3680 * @allocated_regs: Set of registers which must be avoided. 3681 * @preferred_regs: Set of registers we should prefer. 3682 * @rev: True if we search the registers in "indirect" order. 3683 * 3684 * The allocated register must be in @required_regs & ~@allocated_regs, 3685 * but if we can put it in @preferred_regs we may save a move later. 3686 */ 3687 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3688 TCGRegSet allocated_regs, 3689 TCGRegSet preferred_regs, bool rev) 3690 { 3691 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3692 TCGRegSet reg_ct[2]; 3693 const int *order; 3694 3695 reg_ct[1] = required_regs & ~allocated_regs; 3696 tcg_debug_assert(reg_ct[1] != 0); 3697 reg_ct[0] = reg_ct[1] & preferred_regs; 3698 3699 /* Skip the preferred_regs option if it cannot be satisfied, 3700 or if the preference made no difference. */ 3701 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3702 3703 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3704 3705 /* Try free registers, preferences first. */ 3706 for (j = f; j < 2; j++) { 3707 TCGRegSet set = reg_ct[j]; 3708 3709 if (tcg_regset_single(set)) { 3710 /* One register in the set. */ 3711 TCGReg reg = tcg_regset_first(set); 3712 if (s->reg_to_temp[reg] == NULL) { 3713 return reg; 3714 } 3715 } else { 3716 for (i = 0; i < n; i++) { 3717 TCGReg reg = order[i]; 3718 if (s->reg_to_temp[reg] == NULL && 3719 tcg_regset_test_reg(set, reg)) { 3720 return reg; 3721 } 3722 } 3723 } 3724 } 3725 3726 /* We must spill something. */ 3727 for (j = f; j < 2; j++) { 3728 TCGRegSet set = reg_ct[j]; 3729 3730 if (tcg_regset_single(set)) { 3731 /* One register in the set. */ 3732 TCGReg reg = tcg_regset_first(set); 3733 tcg_reg_free(s, reg, allocated_regs); 3734 return reg; 3735 } else { 3736 for (i = 0; i < n; i++) { 3737 TCGReg reg = order[i]; 3738 if (tcg_regset_test_reg(set, reg)) { 3739 tcg_reg_free(s, reg, allocated_regs); 3740 return reg; 3741 } 3742 } 3743 } 3744 } 3745 3746 tcg_abort(); 3747 } 3748 3749 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 3750 TCGRegSet allocated_regs, 3751 TCGRegSet preferred_regs, bool rev) 3752 { 3753 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3754 TCGRegSet reg_ct[2]; 3755 const int *order; 3756 3757 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 3758 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 3759 tcg_debug_assert(reg_ct[1] != 0); 3760 reg_ct[0] = reg_ct[1] & preferred_regs; 3761 3762 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3763 3764 /* 3765 * Skip the preferred_regs option if it cannot be satisfied, 3766 * or if the preference made no difference. 3767 */ 3768 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3769 3770 /* 3771 * Minimize the number of flushes by looking for 2 free registers first, 3772 * then a single flush, then two flushes. 3773 */ 3774 for (fmin = 2; fmin >= 0; fmin--) { 3775 for (j = k; j < 2; j++) { 3776 TCGRegSet set = reg_ct[j]; 3777 3778 for (i = 0; i < n; i++) { 3779 TCGReg reg = order[i]; 3780 3781 if (tcg_regset_test_reg(set, reg)) { 3782 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 3783 if (f >= fmin) { 3784 tcg_reg_free(s, reg, allocated_regs); 3785 tcg_reg_free(s, reg + 1, allocated_regs); 3786 return reg; 3787 } 3788 } 3789 } 3790 } 3791 } 3792 tcg_abort(); 3793 } 3794 3795 /* Make sure the temporary is in a register. If needed, allocate the register 3796 from DESIRED while avoiding ALLOCATED. */ 3797 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3798 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3799 { 3800 TCGReg reg; 3801 3802 switch (ts->val_type) { 3803 case TEMP_VAL_REG: 3804 return; 3805 case TEMP_VAL_CONST: 3806 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3807 preferred_regs, ts->indirect_base); 3808 if (ts->type <= TCG_TYPE_I64) { 3809 tcg_out_movi(s, ts->type, reg, ts->val); 3810 } else { 3811 uint64_t val = ts->val; 3812 MemOp vece = MO_64; 3813 3814 /* 3815 * Find the minimal vector element that matches the constant. 3816 * The targets will, in general, have to do this search anyway, 3817 * do this generically. 3818 */ 3819 if (val == dup_const(MO_8, val)) { 3820 vece = MO_8; 3821 } else if (val == dup_const(MO_16, val)) { 3822 vece = MO_16; 3823 } else if (val == dup_const(MO_32, val)) { 3824 vece = MO_32; 3825 } 3826 3827 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3828 } 3829 ts->mem_coherent = 0; 3830 break; 3831 case TEMP_VAL_MEM: 3832 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3833 preferred_regs, ts->indirect_base); 3834 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3835 ts->mem_coherent = 1; 3836 break; 3837 case TEMP_VAL_DEAD: 3838 default: 3839 tcg_abort(); 3840 } 3841 set_temp_val_reg(s, ts, reg); 3842 } 3843 3844 /* Save a temporary to memory. 'allocated_regs' is used in case a 3845 temporary registers needs to be allocated to store a constant. */ 3846 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3847 { 3848 /* The liveness analysis already ensures that globals are back 3849 in memory. Keep an tcg_debug_assert for safety. */ 3850 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3851 } 3852 3853 /* save globals to their canonical location and assume they can be 3854 modified be the following code. 'allocated_regs' is used in case a 3855 temporary registers needs to be allocated to store a constant. */ 3856 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3857 { 3858 int i, n; 3859 3860 for (i = 0, n = s->nb_globals; i < n; i++) { 3861 temp_save(s, &s->temps[i], allocated_regs); 3862 } 3863 } 3864 3865 /* sync globals to their canonical location and assume they can be 3866 read by the following code. 'allocated_regs' is used in case a 3867 temporary registers needs to be allocated to store a constant. */ 3868 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3869 { 3870 int i, n; 3871 3872 for (i = 0, n = s->nb_globals; i < n; i++) { 3873 TCGTemp *ts = &s->temps[i]; 3874 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3875 || ts->kind == TEMP_FIXED 3876 || ts->mem_coherent); 3877 } 3878 } 3879 3880 /* at the end of a basic block, we assume all temporaries are dead and 3881 all globals are stored at their canonical location. */ 3882 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3883 { 3884 int i; 3885 3886 for (i = s->nb_globals; i < s->nb_temps; i++) { 3887 TCGTemp *ts = &s->temps[i]; 3888 3889 switch (ts->kind) { 3890 case TEMP_TB: 3891 temp_save(s, ts, allocated_regs); 3892 break; 3893 case TEMP_EBB: 3894 /* The liveness analysis already ensures that temps are dead. 3895 Keep an tcg_debug_assert for safety. */ 3896 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3897 break; 3898 case TEMP_CONST: 3899 /* Similarly, we should have freed any allocated register. */ 3900 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3901 break; 3902 default: 3903 g_assert_not_reached(); 3904 } 3905 } 3906 3907 save_globals(s, allocated_regs); 3908 } 3909 3910 /* 3911 * At a conditional branch, we assume all temporaries are dead unless 3912 * explicitly live-across-conditional-branch; all globals and local 3913 * temps are synced to their location. 3914 */ 3915 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3916 { 3917 sync_globals(s, allocated_regs); 3918 3919 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3920 TCGTemp *ts = &s->temps[i]; 3921 /* 3922 * The liveness analysis already ensures that temps are dead. 3923 * Keep tcg_debug_asserts for safety. 3924 */ 3925 switch (ts->kind) { 3926 case TEMP_TB: 3927 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3928 break; 3929 case TEMP_EBB: 3930 case TEMP_CONST: 3931 break; 3932 default: 3933 g_assert_not_reached(); 3934 } 3935 } 3936 } 3937 3938 /* 3939 * Specialized code generation for INDEX_op_mov_* with a constant. 3940 */ 3941 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3942 tcg_target_ulong val, TCGLifeData arg_life, 3943 TCGRegSet preferred_regs) 3944 { 3945 /* ENV should not be modified. */ 3946 tcg_debug_assert(!temp_readonly(ots)); 3947 3948 /* The movi is not explicitly generated here. */ 3949 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 3950 ots->val = val; 3951 ots->mem_coherent = 0; 3952 if (NEED_SYNC_ARG(0)) { 3953 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3954 } else if (IS_DEAD_ARG(0)) { 3955 temp_dead(s, ots); 3956 } 3957 } 3958 3959 /* 3960 * Specialized code generation for INDEX_op_mov_*. 3961 */ 3962 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3963 { 3964 const TCGLifeData arg_life = op->life; 3965 TCGRegSet allocated_regs, preferred_regs; 3966 TCGTemp *ts, *ots; 3967 TCGType otype, itype; 3968 TCGReg oreg, ireg; 3969 3970 allocated_regs = s->reserved_regs; 3971 preferred_regs = output_pref(op, 0); 3972 ots = arg_temp(op->args[0]); 3973 ts = arg_temp(op->args[1]); 3974 3975 /* ENV should not be modified. */ 3976 tcg_debug_assert(!temp_readonly(ots)); 3977 3978 /* Note that otype != itype for no-op truncation. */ 3979 otype = ots->type; 3980 itype = ts->type; 3981 3982 if (ts->val_type == TEMP_VAL_CONST) { 3983 /* propagate constant or generate sti */ 3984 tcg_target_ulong val = ts->val; 3985 if (IS_DEAD_ARG(1)) { 3986 temp_dead(s, ts); 3987 } 3988 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3989 return; 3990 } 3991 3992 /* If the source value is in memory we're going to be forced 3993 to have it in a register in order to perform the copy. Copy 3994 the SOURCE value into its own register first, that way we 3995 don't have to reload SOURCE the next time it is used. */ 3996 if (ts->val_type == TEMP_VAL_MEM) { 3997 temp_load(s, ts, tcg_target_available_regs[itype], 3998 allocated_regs, preferred_regs); 3999 } 4000 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4001 ireg = ts->reg; 4002 4003 if (IS_DEAD_ARG(0)) { 4004 /* mov to a non-saved dead register makes no sense (even with 4005 liveness analysis disabled). */ 4006 tcg_debug_assert(NEED_SYNC_ARG(0)); 4007 if (!ots->mem_allocated) { 4008 temp_allocate_frame(s, ots); 4009 } 4010 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4011 if (IS_DEAD_ARG(1)) { 4012 temp_dead(s, ts); 4013 } 4014 temp_dead(s, ots); 4015 return; 4016 } 4017 4018 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4019 /* 4020 * The mov can be suppressed. Kill input first, so that it 4021 * is unlinked from reg_to_temp, then set the output to the 4022 * reg that we saved from the input. 4023 */ 4024 temp_dead(s, ts); 4025 oreg = ireg; 4026 } else { 4027 if (ots->val_type == TEMP_VAL_REG) { 4028 oreg = ots->reg; 4029 } else { 4030 /* Make sure to not spill the input register during allocation. */ 4031 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4032 allocated_regs | ((TCGRegSet)1 << ireg), 4033 preferred_regs, ots->indirect_base); 4034 } 4035 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4036 /* 4037 * Cross register class move not supported. 4038 * Store the source register into the destination slot 4039 * and leave the destination temp as TEMP_VAL_MEM. 4040 */ 4041 assert(!temp_readonly(ots)); 4042 if (!ts->mem_allocated) { 4043 temp_allocate_frame(s, ots); 4044 } 4045 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4046 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4047 ots->mem_coherent = 1; 4048 return; 4049 } 4050 } 4051 set_temp_val_reg(s, ots, oreg); 4052 ots->mem_coherent = 0; 4053 4054 if (NEED_SYNC_ARG(0)) { 4055 temp_sync(s, ots, allocated_regs, 0, 0); 4056 } 4057 } 4058 4059 /* 4060 * Specialized code generation for INDEX_op_dup_vec. 4061 */ 4062 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4063 { 4064 const TCGLifeData arg_life = op->life; 4065 TCGRegSet dup_out_regs, dup_in_regs; 4066 TCGTemp *its, *ots; 4067 TCGType itype, vtype; 4068 unsigned vece; 4069 int lowpart_ofs; 4070 bool ok; 4071 4072 ots = arg_temp(op->args[0]); 4073 its = arg_temp(op->args[1]); 4074 4075 /* ENV should not be modified. */ 4076 tcg_debug_assert(!temp_readonly(ots)); 4077 4078 itype = its->type; 4079 vece = TCGOP_VECE(op); 4080 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4081 4082 if (its->val_type == TEMP_VAL_CONST) { 4083 /* Propagate constant via movi -> dupi. */ 4084 tcg_target_ulong val = its->val; 4085 if (IS_DEAD_ARG(1)) { 4086 temp_dead(s, its); 4087 } 4088 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4089 return; 4090 } 4091 4092 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4093 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 4094 4095 /* Allocate the output register now. */ 4096 if (ots->val_type != TEMP_VAL_REG) { 4097 TCGRegSet allocated_regs = s->reserved_regs; 4098 TCGReg oreg; 4099 4100 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4101 /* Make sure to not spill the input register. */ 4102 tcg_regset_set_reg(allocated_regs, its->reg); 4103 } 4104 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4105 output_pref(op, 0), ots->indirect_base); 4106 set_temp_val_reg(s, ots, oreg); 4107 } 4108 4109 switch (its->val_type) { 4110 case TEMP_VAL_REG: 4111 /* 4112 * The dup constriaints must be broad, covering all possible VECE. 4113 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4114 * to fail, indicating that extra moves are required for that case. 4115 */ 4116 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4117 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4118 goto done; 4119 } 4120 /* Try again from memory or a vector input register. */ 4121 } 4122 if (!its->mem_coherent) { 4123 /* 4124 * The input register is not synced, and so an extra store 4125 * would be required to use memory. Attempt an integer-vector 4126 * register move first. We do not have a TCGRegSet for this. 4127 */ 4128 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4129 break; 4130 } 4131 /* Sync the temp back to its slot and load from there. */ 4132 temp_sync(s, its, s->reserved_regs, 0, 0); 4133 } 4134 /* fall through */ 4135 4136 case TEMP_VAL_MEM: 4137 lowpart_ofs = 0; 4138 if (HOST_BIG_ENDIAN) { 4139 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 4140 } 4141 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 4142 its->mem_offset + lowpart_ofs)) { 4143 goto done; 4144 } 4145 /* Load the input into the destination vector register. */ 4146 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 4147 break; 4148 4149 default: 4150 g_assert_not_reached(); 4151 } 4152 4153 /* We now have a vector input register, so dup must succeed. */ 4154 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 4155 tcg_debug_assert(ok); 4156 4157 done: 4158 ots->mem_coherent = 0; 4159 if (IS_DEAD_ARG(1)) { 4160 temp_dead(s, its); 4161 } 4162 if (NEED_SYNC_ARG(0)) { 4163 temp_sync(s, ots, s->reserved_regs, 0, 0); 4164 } 4165 if (IS_DEAD_ARG(0)) { 4166 temp_dead(s, ots); 4167 } 4168 } 4169 4170 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 4171 { 4172 const TCGLifeData arg_life = op->life; 4173 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 4174 TCGRegSet i_allocated_regs; 4175 TCGRegSet o_allocated_regs; 4176 int i, k, nb_iargs, nb_oargs; 4177 TCGReg reg; 4178 TCGArg arg; 4179 const TCGArgConstraint *arg_ct; 4180 TCGTemp *ts; 4181 TCGArg new_args[TCG_MAX_OP_ARGS]; 4182 int const_args[TCG_MAX_OP_ARGS]; 4183 4184 nb_oargs = def->nb_oargs; 4185 nb_iargs = def->nb_iargs; 4186 4187 /* copy constants */ 4188 memcpy(new_args + nb_oargs + nb_iargs, 4189 op->args + nb_oargs + nb_iargs, 4190 sizeof(TCGArg) * def->nb_cargs); 4191 4192 i_allocated_regs = s->reserved_regs; 4193 o_allocated_regs = s->reserved_regs; 4194 4195 /* satisfy input constraints */ 4196 for (k = 0; k < nb_iargs; k++) { 4197 TCGRegSet i_preferred_regs, i_required_regs; 4198 bool allocate_new_reg, copyto_new_reg; 4199 TCGTemp *ts2; 4200 int i1, i2; 4201 4202 i = def->args_ct[nb_oargs + k].sort_index; 4203 arg = op->args[i]; 4204 arg_ct = &def->args_ct[i]; 4205 ts = arg_temp(arg); 4206 4207 if (ts->val_type == TEMP_VAL_CONST 4208 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 4209 /* constant is OK for instruction */ 4210 const_args[i] = 1; 4211 new_args[i] = ts->val; 4212 continue; 4213 } 4214 4215 reg = ts->reg; 4216 i_preferred_regs = 0; 4217 i_required_regs = arg_ct->regs; 4218 allocate_new_reg = false; 4219 copyto_new_reg = false; 4220 4221 switch (arg_ct->pair) { 4222 case 0: /* not paired */ 4223 if (arg_ct->ialias) { 4224 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4225 4226 /* 4227 * If the input is readonly, then it cannot also be an 4228 * output and aliased to itself. If the input is not 4229 * dead after the instruction, we must allocate a new 4230 * register and move it. 4231 */ 4232 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 4233 allocate_new_reg = true; 4234 } else if (ts->val_type == TEMP_VAL_REG) { 4235 /* 4236 * Check if the current register has already been 4237 * allocated for another input. 4238 */ 4239 allocate_new_reg = 4240 tcg_regset_test_reg(i_allocated_regs, reg); 4241 } 4242 } 4243 if (!allocate_new_reg) { 4244 temp_load(s, ts, i_required_regs, i_allocated_regs, 4245 i_preferred_regs); 4246 reg = ts->reg; 4247 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 4248 } 4249 if (allocate_new_reg) { 4250 /* 4251 * Allocate a new register matching the constraint 4252 * and move the temporary register into it. 4253 */ 4254 temp_load(s, ts, tcg_target_available_regs[ts->type], 4255 i_allocated_regs, 0); 4256 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 4257 i_preferred_regs, ts->indirect_base); 4258 copyto_new_reg = true; 4259 } 4260 break; 4261 4262 case 1: 4263 /* First of an input pair; if i1 == i2, the second is an output. */ 4264 i1 = i; 4265 i2 = arg_ct->pair_index; 4266 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 4267 4268 /* 4269 * It is easier to default to allocating a new pair 4270 * and to identify a few cases where it's not required. 4271 */ 4272 if (arg_ct->ialias) { 4273 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4274 if (IS_DEAD_ARG(i1) && 4275 IS_DEAD_ARG(i2) && 4276 !temp_readonly(ts) && 4277 ts->val_type == TEMP_VAL_REG && 4278 ts->reg < TCG_TARGET_NB_REGS - 1 && 4279 tcg_regset_test_reg(i_required_regs, reg) && 4280 !tcg_regset_test_reg(i_allocated_regs, reg) && 4281 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 4282 (ts2 4283 ? ts2->val_type == TEMP_VAL_REG && 4284 ts2->reg == reg + 1 && 4285 !temp_readonly(ts2) 4286 : s->reg_to_temp[reg + 1] == NULL)) { 4287 break; 4288 } 4289 } else { 4290 /* Without aliasing, the pair must also be an input. */ 4291 tcg_debug_assert(ts2); 4292 if (ts->val_type == TEMP_VAL_REG && 4293 ts2->val_type == TEMP_VAL_REG && 4294 ts2->reg == reg + 1 && 4295 tcg_regset_test_reg(i_required_regs, reg)) { 4296 break; 4297 } 4298 } 4299 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 4300 0, ts->indirect_base); 4301 goto do_pair; 4302 4303 case 2: /* pair second */ 4304 reg = new_args[arg_ct->pair_index] + 1; 4305 goto do_pair; 4306 4307 case 3: /* ialias with second output, no first input */ 4308 tcg_debug_assert(arg_ct->ialias); 4309 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4310 4311 if (IS_DEAD_ARG(i) && 4312 !temp_readonly(ts) && 4313 ts->val_type == TEMP_VAL_REG && 4314 reg > 0 && 4315 s->reg_to_temp[reg - 1] == NULL && 4316 tcg_regset_test_reg(i_required_regs, reg) && 4317 !tcg_regset_test_reg(i_allocated_regs, reg) && 4318 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 4319 tcg_regset_set_reg(i_allocated_regs, reg - 1); 4320 break; 4321 } 4322 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 4323 i_allocated_regs, 0, 4324 ts->indirect_base); 4325 tcg_regset_set_reg(i_allocated_regs, reg); 4326 reg += 1; 4327 goto do_pair; 4328 4329 do_pair: 4330 /* 4331 * If an aliased input is not dead after the instruction, 4332 * we must allocate a new register and move it. 4333 */ 4334 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 4335 TCGRegSet t_allocated_regs = i_allocated_regs; 4336 4337 /* 4338 * Because of the alias, and the continued life, make sure 4339 * that the temp is somewhere *other* than the reg pair, 4340 * and we get a copy in reg. 4341 */ 4342 tcg_regset_set_reg(t_allocated_regs, reg); 4343 tcg_regset_set_reg(t_allocated_regs, reg + 1); 4344 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 4345 /* If ts was already in reg, copy it somewhere else. */ 4346 TCGReg nr; 4347 bool ok; 4348 4349 tcg_debug_assert(ts->kind != TEMP_FIXED); 4350 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 4351 t_allocated_regs, 0, ts->indirect_base); 4352 ok = tcg_out_mov(s, ts->type, nr, reg); 4353 tcg_debug_assert(ok); 4354 4355 set_temp_val_reg(s, ts, nr); 4356 } else { 4357 temp_load(s, ts, tcg_target_available_regs[ts->type], 4358 t_allocated_regs, 0); 4359 copyto_new_reg = true; 4360 } 4361 } else { 4362 /* Preferably allocate to reg, otherwise copy. */ 4363 i_required_regs = (TCGRegSet)1 << reg; 4364 temp_load(s, ts, i_required_regs, i_allocated_regs, 4365 i_preferred_regs); 4366 copyto_new_reg = ts->reg != reg; 4367 } 4368 break; 4369 4370 default: 4371 g_assert_not_reached(); 4372 } 4373 4374 if (copyto_new_reg) { 4375 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4376 /* 4377 * Cross register class move not supported. Sync the 4378 * temp back to its slot and load from there. 4379 */ 4380 temp_sync(s, ts, i_allocated_regs, 0, 0); 4381 tcg_out_ld(s, ts->type, reg, 4382 ts->mem_base->reg, ts->mem_offset); 4383 } 4384 } 4385 new_args[i] = reg; 4386 const_args[i] = 0; 4387 tcg_regset_set_reg(i_allocated_regs, reg); 4388 } 4389 4390 /* mark dead temporaries and free the associated registers */ 4391 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4392 if (IS_DEAD_ARG(i)) { 4393 temp_dead(s, arg_temp(op->args[i])); 4394 } 4395 } 4396 4397 if (def->flags & TCG_OPF_COND_BRANCH) { 4398 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4399 } else if (def->flags & TCG_OPF_BB_END) { 4400 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4401 } else { 4402 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4403 /* XXX: permit generic clobber register list ? */ 4404 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4405 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4406 tcg_reg_free(s, i, i_allocated_regs); 4407 } 4408 } 4409 } 4410 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4411 /* sync globals if the op has side effects and might trigger 4412 an exception. */ 4413 sync_globals(s, i_allocated_regs); 4414 } 4415 4416 /* satisfy the output constraints */ 4417 for(k = 0; k < nb_oargs; k++) { 4418 i = def->args_ct[k].sort_index; 4419 arg = op->args[i]; 4420 arg_ct = &def->args_ct[i]; 4421 ts = arg_temp(arg); 4422 4423 /* ENV should not be modified. */ 4424 tcg_debug_assert(!temp_readonly(ts)); 4425 4426 switch (arg_ct->pair) { 4427 case 0: /* not paired */ 4428 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4429 reg = new_args[arg_ct->alias_index]; 4430 } else if (arg_ct->newreg) { 4431 reg = tcg_reg_alloc(s, arg_ct->regs, 4432 i_allocated_regs | o_allocated_regs, 4433 output_pref(op, k), ts->indirect_base); 4434 } else { 4435 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4436 output_pref(op, k), ts->indirect_base); 4437 } 4438 break; 4439 4440 case 1: /* first of pair */ 4441 tcg_debug_assert(!arg_ct->newreg); 4442 if (arg_ct->oalias) { 4443 reg = new_args[arg_ct->alias_index]; 4444 break; 4445 } 4446 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 4447 output_pref(op, k), ts->indirect_base); 4448 break; 4449 4450 case 2: /* second of pair */ 4451 tcg_debug_assert(!arg_ct->newreg); 4452 if (arg_ct->oalias) { 4453 reg = new_args[arg_ct->alias_index]; 4454 } else { 4455 reg = new_args[arg_ct->pair_index] + 1; 4456 } 4457 break; 4458 4459 case 3: /* first of pair, aliasing with a second input */ 4460 tcg_debug_assert(!arg_ct->newreg); 4461 reg = new_args[arg_ct->pair_index] - 1; 4462 break; 4463 4464 default: 4465 g_assert_not_reached(); 4466 } 4467 tcg_regset_set_reg(o_allocated_regs, reg); 4468 set_temp_val_reg(s, ts, reg); 4469 ts->mem_coherent = 0; 4470 new_args[i] = reg; 4471 } 4472 } 4473 4474 /* emit instruction */ 4475 if (def->flags & TCG_OPF_VECTOR) { 4476 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4477 new_args, const_args); 4478 } else { 4479 tcg_out_op(s, op->opc, new_args, const_args); 4480 } 4481 4482 /* move the outputs in the correct register if needed */ 4483 for(i = 0; i < nb_oargs; i++) { 4484 ts = arg_temp(op->args[i]); 4485 4486 /* ENV should not be modified. */ 4487 tcg_debug_assert(!temp_readonly(ts)); 4488 4489 if (NEED_SYNC_ARG(i)) { 4490 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4491 } else if (IS_DEAD_ARG(i)) { 4492 temp_dead(s, ts); 4493 } 4494 } 4495 } 4496 4497 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4498 { 4499 const TCGLifeData arg_life = op->life; 4500 TCGTemp *ots, *itsl, *itsh; 4501 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4502 4503 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4504 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4505 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4506 4507 ots = arg_temp(op->args[0]); 4508 itsl = arg_temp(op->args[1]); 4509 itsh = arg_temp(op->args[2]); 4510 4511 /* ENV should not be modified. */ 4512 tcg_debug_assert(!temp_readonly(ots)); 4513 4514 /* Allocate the output register now. */ 4515 if (ots->val_type != TEMP_VAL_REG) { 4516 TCGRegSet allocated_regs = s->reserved_regs; 4517 TCGRegSet dup_out_regs = 4518 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4519 TCGReg oreg; 4520 4521 /* Make sure to not spill the input registers. */ 4522 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4523 tcg_regset_set_reg(allocated_regs, itsl->reg); 4524 } 4525 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4526 tcg_regset_set_reg(allocated_regs, itsh->reg); 4527 } 4528 4529 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4530 output_pref(op, 0), ots->indirect_base); 4531 set_temp_val_reg(s, ots, oreg); 4532 } 4533 4534 /* Promote dup2 of immediates to dupi_vec. */ 4535 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 4536 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 4537 MemOp vece = MO_64; 4538 4539 if (val == dup_const(MO_8, val)) { 4540 vece = MO_8; 4541 } else if (val == dup_const(MO_16, val)) { 4542 vece = MO_16; 4543 } else if (val == dup_const(MO_32, val)) { 4544 vece = MO_32; 4545 } 4546 4547 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 4548 goto done; 4549 } 4550 4551 /* If the two inputs form one 64-bit value, try dupm_vec. */ 4552 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 4553 itsh->temp_subindex == !HOST_BIG_ENDIAN && 4554 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 4555 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 4556 4557 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 4558 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 4559 4560 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 4561 its->mem_base->reg, its->mem_offset)) { 4562 goto done; 4563 } 4564 } 4565 4566 /* Fall back to generic expansion. */ 4567 return false; 4568 4569 done: 4570 ots->mem_coherent = 0; 4571 if (IS_DEAD_ARG(1)) { 4572 temp_dead(s, itsl); 4573 } 4574 if (IS_DEAD_ARG(2)) { 4575 temp_dead(s, itsh); 4576 } 4577 if (NEED_SYNC_ARG(0)) { 4578 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 4579 } else if (IS_DEAD_ARG(0)) { 4580 temp_dead(s, ots); 4581 } 4582 return true; 4583 } 4584 4585 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 4586 TCGRegSet allocated_regs) 4587 { 4588 if (ts->val_type == TEMP_VAL_REG) { 4589 if (ts->reg != reg) { 4590 tcg_reg_free(s, reg, allocated_regs); 4591 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4592 /* 4593 * Cross register class move not supported. Sync the 4594 * temp back to its slot and load from there. 4595 */ 4596 temp_sync(s, ts, allocated_regs, 0, 0); 4597 tcg_out_ld(s, ts->type, reg, 4598 ts->mem_base->reg, ts->mem_offset); 4599 } 4600 } 4601 } else { 4602 TCGRegSet arg_set = 0; 4603 4604 tcg_reg_free(s, reg, allocated_regs); 4605 tcg_regset_set_reg(arg_set, reg); 4606 temp_load(s, ts, arg_set, allocated_regs, 0); 4607 } 4608 } 4609 4610 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts, 4611 TCGRegSet allocated_regs) 4612 { 4613 /* 4614 * When the destination is on the stack, load up the temp and store. 4615 * If there are many call-saved registers, the temp might live to 4616 * see another use; otherwise it'll be discarded. 4617 */ 4618 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 4619 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 4620 TCG_TARGET_CALL_STACK_OFFSET + 4621 stk_slot * sizeof(tcg_target_long)); 4622 } 4623 4624 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 4625 TCGTemp *ts, TCGRegSet *allocated_regs) 4626 { 4627 if (REG_P(l)) { 4628 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 4629 load_arg_reg(s, reg, ts, *allocated_regs); 4630 tcg_regset_set_reg(*allocated_regs, reg); 4631 } else { 4632 load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs), 4633 ts, *allocated_regs); 4634 } 4635 } 4636 4637 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base, 4638 intptr_t ref_off, TCGRegSet *allocated_regs) 4639 { 4640 TCGReg reg; 4641 int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 4642 4643 if (stk_slot < 0) { 4644 reg = tcg_target_call_iarg_regs[arg_slot]; 4645 tcg_reg_free(s, reg, *allocated_regs); 4646 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4647 tcg_regset_set_reg(*allocated_regs, reg); 4648 } else { 4649 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 4650 *allocated_regs, 0, false); 4651 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4652 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 4653 TCG_TARGET_CALL_STACK_OFFSET 4654 + stk_slot * sizeof(tcg_target_long)); 4655 } 4656 } 4657 4658 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 4659 { 4660 const int nb_oargs = TCGOP_CALLO(op); 4661 const int nb_iargs = TCGOP_CALLI(op); 4662 const TCGLifeData arg_life = op->life; 4663 const TCGHelperInfo *info = tcg_call_info(op); 4664 TCGRegSet allocated_regs = s->reserved_regs; 4665 int i; 4666 4667 /* 4668 * Move inputs into place in reverse order, 4669 * so that we place stacked arguments first. 4670 */ 4671 for (i = nb_iargs - 1; i >= 0; --i) { 4672 const TCGCallArgumentLoc *loc = &info->in[i]; 4673 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 4674 4675 switch (loc->kind) { 4676 case TCG_CALL_ARG_NORMAL: 4677 case TCG_CALL_ARG_EXTEND_U: 4678 case TCG_CALL_ARG_EXTEND_S: 4679 load_arg_normal(s, loc, ts, &allocated_regs); 4680 break; 4681 case TCG_CALL_ARG_BY_REF: 4682 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4683 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 4684 TCG_TARGET_CALL_STACK_OFFSET 4685 + loc->ref_slot * sizeof(tcg_target_long), 4686 &allocated_regs); 4687 break; 4688 case TCG_CALL_ARG_BY_REF_N: 4689 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4690 break; 4691 default: 4692 g_assert_not_reached(); 4693 } 4694 } 4695 4696 /* Mark dead temporaries and free the associated registers. */ 4697 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4698 if (IS_DEAD_ARG(i)) { 4699 temp_dead(s, arg_temp(op->args[i])); 4700 } 4701 } 4702 4703 /* Clobber call registers. */ 4704 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4705 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4706 tcg_reg_free(s, i, allocated_regs); 4707 } 4708 } 4709 4710 /* 4711 * Save globals if they might be written by the helper, 4712 * sync them if they might be read. 4713 */ 4714 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 4715 /* Nothing to do */ 4716 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 4717 sync_globals(s, allocated_regs); 4718 } else { 4719 save_globals(s, allocated_regs); 4720 } 4721 4722 /* 4723 * If the ABI passes a pointer to the returned struct as the first 4724 * argument, load that now. Pass a pointer to the output home slot. 4725 */ 4726 if (info->out_kind == TCG_CALL_RET_BY_REF) { 4727 TCGTemp *ts = arg_temp(op->args[0]); 4728 4729 if (!ts->mem_allocated) { 4730 temp_allocate_frame(s, ts); 4731 } 4732 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 4733 } 4734 4735 tcg_out_call(s, tcg_call_func(op), info); 4736 4737 /* Assign output registers and emit moves if needed. */ 4738 switch (info->out_kind) { 4739 case TCG_CALL_RET_NORMAL: 4740 for (i = 0; i < nb_oargs; i++) { 4741 TCGTemp *ts = arg_temp(op->args[i]); 4742 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 4743 4744 /* ENV should not be modified. */ 4745 tcg_debug_assert(!temp_readonly(ts)); 4746 4747 set_temp_val_reg(s, ts, reg); 4748 ts->mem_coherent = 0; 4749 } 4750 break; 4751 4752 case TCG_CALL_RET_BY_VEC: 4753 { 4754 TCGTemp *ts = arg_temp(op->args[0]); 4755 4756 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 4757 tcg_debug_assert(ts->temp_subindex == 0); 4758 if (!ts->mem_allocated) { 4759 temp_allocate_frame(s, ts); 4760 } 4761 tcg_out_st(s, TCG_TYPE_V128, 4762 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 4763 ts->mem_base->reg, ts->mem_offset); 4764 } 4765 /* fall through to mark all parts in memory */ 4766 4767 case TCG_CALL_RET_BY_REF: 4768 /* The callee has performed a write through the reference. */ 4769 for (i = 0; i < nb_oargs; i++) { 4770 TCGTemp *ts = arg_temp(op->args[i]); 4771 ts->val_type = TEMP_VAL_MEM; 4772 } 4773 break; 4774 4775 default: 4776 g_assert_not_reached(); 4777 } 4778 4779 /* Flush or discard output registers as needed. */ 4780 for (i = 0; i < nb_oargs; i++) { 4781 TCGTemp *ts = arg_temp(op->args[i]); 4782 if (NEED_SYNC_ARG(i)) { 4783 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 4784 } else if (IS_DEAD_ARG(i)) { 4785 temp_dead(s, ts); 4786 } 4787 } 4788 } 4789 4790 #ifdef CONFIG_PROFILER 4791 4792 /* avoid copy/paste errors */ 4793 #define PROF_ADD(to, from, field) \ 4794 do { \ 4795 (to)->field += qatomic_read(&((from)->field)); \ 4796 } while (0) 4797 4798 #define PROF_MAX(to, from, field) \ 4799 do { \ 4800 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4801 if (val__ > (to)->field) { \ 4802 (to)->field = val__; \ 4803 } \ 4804 } while (0) 4805 4806 /* Pass in a zero'ed @prof */ 4807 static inline 4808 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4809 { 4810 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4811 unsigned int i; 4812 4813 for (i = 0; i < n_ctxs; i++) { 4814 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4815 const TCGProfile *orig = &s->prof; 4816 4817 if (counters) { 4818 PROF_ADD(prof, orig, cpu_exec_time); 4819 PROF_ADD(prof, orig, tb_count1); 4820 PROF_ADD(prof, orig, tb_count); 4821 PROF_ADD(prof, orig, op_count); 4822 PROF_MAX(prof, orig, op_count_max); 4823 PROF_ADD(prof, orig, temp_count); 4824 PROF_MAX(prof, orig, temp_count_max); 4825 PROF_ADD(prof, orig, del_op_count); 4826 PROF_ADD(prof, orig, code_in_len); 4827 PROF_ADD(prof, orig, code_out_len); 4828 PROF_ADD(prof, orig, search_out_len); 4829 PROF_ADD(prof, orig, interm_time); 4830 PROF_ADD(prof, orig, code_time); 4831 PROF_ADD(prof, orig, la_time); 4832 PROF_ADD(prof, orig, opt_time); 4833 PROF_ADD(prof, orig, restore_count); 4834 PROF_ADD(prof, orig, restore_time); 4835 } 4836 if (table) { 4837 int i; 4838 4839 for (i = 0; i < NB_OPS; i++) { 4840 PROF_ADD(prof, orig, table_op_count[i]); 4841 } 4842 } 4843 } 4844 } 4845 4846 #undef PROF_ADD 4847 #undef PROF_MAX 4848 4849 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4850 { 4851 tcg_profile_snapshot(prof, true, false); 4852 } 4853 4854 static void tcg_profile_snapshot_table(TCGProfile *prof) 4855 { 4856 tcg_profile_snapshot(prof, false, true); 4857 } 4858 4859 void tcg_dump_op_count(GString *buf) 4860 { 4861 TCGProfile prof = {}; 4862 int i; 4863 4864 tcg_profile_snapshot_table(&prof); 4865 for (i = 0; i < NB_OPS; i++) { 4866 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 4867 prof.table_op_count[i]); 4868 } 4869 } 4870 4871 int64_t tcg_cpu_exec_time(void) 4872 { 4873 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4874 unsigned int i; 4875 int64_t ret = 0; 4876 4877 for (i = 0; i < n_ctxs; i++) { 4878 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4879 const TCGProfile *prof = &s->prof; 4880 4881 ret += qatomic_read(&prof->cpu_exec_time); 4882 } 4883 return ret; 4884 } 4885 #else 4886 void tcg_dump_op_count(GString *buf) 4887 { 4888 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4889 } 4890 4891 int64_t tcg_cpu_exec_time(void) 4892 { 4893 error_report("%s: TCG profiler not compiled", __func__); 4894 exit(EXIT_FAILURE); 4895 } 4896 #endif 4897 4898 4899 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) 4900 { 4901 #ifdef CONFIG_PROFILER 4902 TCGProfile *prof = &s->prof; 4903 #endif 4904 int i, num_insns; 4905 TCGOp *op; 4906 4907 #ifdef CONFIG_PROFILER 4908 { 4909 int n = 0; 4910 4911 QTAILQ_FOREACH(op, &s->ops, link) { 4912 n++; 4913 } 4914 qatomic_set(&prof->op_count, prof->op_count + n); 4915 if (n > prof->op_count_max) { 4916 qatomic_set(&prof->op_count_max, n); 4917 } 4918 4919 n = s->nb_temps; 4920 qatomic_set(&prof->temp_count, prof->temp_count + n); 4921 if (n > prof->temp_count_max) { 4922 qatomic_set(&prof->temp_count_max, n); 4923 } 4924 } 4925 #endif 4926 4927 #ifdef DEBUG_DISAS 4928 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4929 && qemu_log_in_addr_range(pc_start))) { 4930 FILE *logfile = qemu_log_trylock(); 4931 if (logfile) { 4932 fprintf(logfile, "OP:\n"); 4933 tcg_dump_ops(s, logfile, false); 4934 fprintf(logfile, "\n"); 4935 qemu_log_unlock(logfile); 4936 } 4937 } 4938 #endif 4939 4940 #ifdef CONFIG_DEBUG_TCG 4941 /* Ensure all labels referenced have been emitted. */ 4942 { 4943 TCGLabel *l; 4944 bool error = false; 4945 4946 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4947 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 4948 qemu_log_mask(CPU_LOG_TB_OP, 4949 "$L%d referenced but not present.\n", l->id); 4950 error = true; 4951 } 4952 } 4953 assert(!error); 4954 } 4955 #endif 4956 4957 #ifdef CONFIG_PROFILER 4958 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4959 #endif 4960 4961 #ifdef USE_TCG_OPTIMIZATIONS 4962 tcg_optimize(s); 4963 #endif 4964 4965 #ifdef CONFIG_PROFILER 4966 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4967 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4968 #endif 4969 4970 reachable_code_pass(s); 4971 liveness_pass_0(s); 4972 liveness_pass_1(s); 4973 4974 if (s->nb_indirects > 0) { 4975 #ifdef DEBUG_DISAS 4976 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4977 && qemu_log_in_addr_range(pc_start))) { 4978 FILE *logfile = qemu_log_trylock(); 4979 if (logfile) { 4980 fprintf(logfile, "OP before indirect lowering:\n"); 4981 tcg_dump_ops(s, logfile, false); 4982 fprintf(logfile, "\n"); 4983 qemu_log_unlock(logfile); 4984 } 4985 } 4986 #endif 4987 /* Replace indirect temps with direct temps. */ 4988 if (liveness_pass_2(s)) { 4989 /* If changes were made, re-run liveness. */ 4990 liveness_pass_1(s); 4991 } 4992 } 4993 4994 #ifdef CONFIG_PROFILER 4995 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4996 #endif 4997 4998 #ifdef DEBUG_DISAS 4999 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 5000 && qemu_log_in_addr_range(pc_start))) { 5001 FILE *logfile = qemu_log_trylock(); 5002 if (logfile) { 5003 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 5004 tcg_dump_ops(s, logfile, true); 5005 fprintf(logfile, "\n"); 5006 qemu_log_unlock(logfile); 5007 } 5008 } 5009 #endif 5010 5011 /* Initialize goto_tb jump offsets. */ 5012 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 5013 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 5014 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 5015 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 5016 5017 tcg_reg_alloc_start(s); 5018 5019 /* 5020 * Reset the buffer pointers when restarting after overflow. 5021 * TODO: Move this into translate-all.c with the rest of the 5022 * buffer management. Having only this done here is confusing. 5023 */ 5024 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 5025 s->code_ptr = s->code_buf; 5026 5027 #ifdef TCG_TARGET_NEED_LDST_LABELS 5028 QSIMPLEQ_INIT(&s->ldst_labels); 5029 #endif 5030 #ifdef TCG_TARGET_NEED_POOL_LABELS 5031 s->pool_labels = NULL; 5032 #endif 5033 5034 num_insns = -1; 5035 QTAILQ_FOREACH(op, &s->ops, link) { 5036 TCGOpcode opc = op->opc; 5037 5038 #ifdef CONFIG_PROFILER 5039 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 5040 #endif 5041 5042 switch (opc) { 5043 case INDEX_op_mov_i32: 5044 case INDEX_op_mov_i64: 5045 case INDEX_op_mov_vec: 5046 tcg_reg_alloc_mov(s, op); 5047 break; 5048 case INDEX_op_dup_vec: 5049 tcg_reg_alloc_dup(s, op); 5050 break; 5051 case INDEX_op_insn_start: 5052 if (num_insns >= 0) { 5053 size_t off = tcg_current_code_size(s); 5054 s->gen_insn_end_off[num_insns] = off; 5055 /* Assert that we do not overflow our stored offset. */ 5056 assert(s->gen_insn_end_off[num_insns] == off); 5057 } 5058 num_insns++; 5059 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 5060 target_ulong a; 5061 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 5062 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 5063 #else 5064 a = op->args[i]; 5065 #endif 5066 s->gen_insn_data[num_insns][i] = a; 5067 } 5068 break; 5069 case INDEX_op_discard: 5070 temp_dead(s, arg_temp(op->args[0])); 5071 break; 5072 case INDEX_op_set_label: 5073 tcg_reg_alloc_bb_end(s, s->reserved_regs); 5074 tcg_out_label(s, arg_label(op->args[0])); 5075 break; 5076 case INDEX_op_call: 5077 tcg_reg_alloc_call(s, op); 5078 break; 5079 case INDEX_op_exit_tb: 5080 tcg_out_exit_tb(s, op->args[0]); 5081 break; 5082 case INDEX_op_goto_tb: 5083 tcg_out_goto_tb(s, op->args[0]); 5084 break; 5085 case INDEX_op_dup2_vec: 5086 if (tcg_reg_alloc_dup2(s, op)) { 5087 break; 5088 } 5089 /* fall through */ 5090 default: 5091 /* Sanity check that we've not introduced any unhandled opcodes. */ 5092 tcg_debug_assert(tcg_op_supported(opc)); 5093 /* Note: in order to speed up the code, it would be much 5094 faster to have specialized register allocator functions for 5095 some common argument patterns */ 5096 tcg_reg_alloc_op(s, op); 5097 break; 5098 } 5099 /* Test for (pending) buffer overflow. The assumption is that any 5100 one operation beginning below the high water mark cannot overrun 5101 the buffer completely. Thus we can test for overflow after 5102 generating code without having to check during generation. */ 5103 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 5104 return -1; 5105 } 5106 /* Test for TB overflow, as seen by gen_insn_end_off. */ 5107 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 5108 return -2; 5109 } 5110 } 5111 tcg_debug_assert(num_insns >= 0); 5112 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 5113 5114 /* Generate TB finalization at the end of block */ 5115 #ifdef TCG_TARGET_NEED_LDST_LABELS 5116 i = tcg_out_ldst_finalize(s); 5117 if (i < 0) { 5118 return i; 5119 } 5120 #endif 5121 #ifdef TCG_TARGET_NEED_POOL_LABELS 5122 i = tcg_out_pool_finalize(s); 5123 if (i < 0) { 5124 return i; 5125 } 5126 #endif 5127 if (!tcg_resolve_relocs(s)) { 5128 return -2; 5129 } 5130 5131 #ifndef CONFIG_TCG_INTERPRETER 5132 /* flush instruction cache */ 5133 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 5134 (uintptr_t)s->code_buf, 5135 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 5136 #endif 5137 5138 return tcg_current_code_size(s); 5139 } 5140 5141 #ifdef CONFIG_PROFILER 5142 void tcg_dump_info(GString *buf) 5143 { 5144 TCGProfile prof = {}; 5145 const TCGProfile *s; 5146 int64_t tb_count; 5147 int64_t tb_div_count; 5148 int64_t tot; 5149 5150 tcg_profile_snapshot_counters(&prof); 5151 s = &prof; 5152 tb_count = s->tb_count; 5153 tb_div_count = tb_count ? tb_count : 1; 5154 tot = s->interm_time + s->code_time; 5155 5156 g_string_append_printf(buf, "JIT cycles %" PRId64 5157 " (%0.3f s at 2.4 GHz)\n", 5158 tot, tot / 2.4e9); 5159 g_string_append_printf(buf, "translated TBs %" PRId64 5160 " (aborted=%" PRId64 " %0.1f%%)\n", 5161 tb_count, s->tb_count1 - tb_count, 5162 (double)(s->tb_count1 - s->tb_count) 5163 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 5164 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 5165 (double)s->op_count / tb_div_count, s->op_count_max); 5166 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 5167 (double)s->del_op_count / tb_div_count); 5168 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 5169 (double)s->temp_count / tb_div_count, 5170 s->temp_count_max); 5171 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 5172 (double)s->code_out_len / tb_div_count); 5173 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 5174 (double)s->search_out_len / tb_div_count); 5175 5176 g_string_append_printf(buf, "cycles/op %0.1f\n", 5177 s->op_count ? (double)tot / s->op_count : 0); 5178 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 5179 s->code_in_len ? (double)tot / s->code_in_len : 0); 5180 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 5181 s->code_out_len ? (double)tot / s->code_out_len : 0); 5182 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 5183 s->search_out_len ? 5184 (double)tot / s->search_out_len : 0); 5185 if (tot == 0) { 5186 tot = 1; 5187 } 5188 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 5189 (double)s->interm_time / tot * 100.0); 5190 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 5191 (double)s->code_time / tot * 100.0); 5192 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 5193 (double)s->opt_time / (s->code_time ? 5194 s->code_time : 1) 5195 * 100.0); 5196 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 5197 (double)s->la_time / (s->code_time ? 5198 s->code_time : 1) * 100.0); 5199 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 5200 s->restore_count); 5201 g_string_append_printf(buf, " avg cycles %0.1f\n", 5202 s->restore_count ? 5203 (double)s->restore_time / s->restore_count : 0); 5204 } 5205 #else 5206 void tcg_dump_info(GString *buf) 5207 { 5208 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 5209 } 5210 #endif 5211 5212 #ifdef ELF_HOST_MACHINE 5213 /* In order to use this feature, the backend needs to do three things: 5214 5215 (1) Define ELF_HOST_MACHINE to indicate both what value to 5216 put into the ELF image and to indicate support for the feature. 5217 5218 (2) Define tcg_register_jit. This should create a buffer containing 5219 the contents of a .debug_frame section that describes the post- 5220 prologue unwind info for the tcg machine. 5221 5222 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 5223 */ 5224 5225 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 5226 typedef enum { 5227 JIT_NOACTION = 0, 5228 JIT_REGISTER_FN, 5229 JIT_UNREGISTER_FN 5230 } jit_actions_t; 5231 5232 struct jit_code_entry { 5233 struct jit_code_entry *next_entry; 5234 struct jit_code_entry *prev_entry; 5235 const void *symfile_addr; 5236 uint64_t symfile_size; 5237 }; 5238 5239 struct jit_descriptor { 5240 uint32_t version; 5241 uint32_t action_flag; 5242 struct jit_code_entry *relevant_entry; 5243 struct jit_code_entry *first_entry; 5244 }; 5245 5246 void __jit_debug_register_code(void) __attribute__((noinline)); 5247 void __jit_debug_register_code(void) 5248 { 5249 asm(""); 5250 } 5251 5252 /* Must statically initialize the version, because GDB may check 5253 the version before we can set it. */ 5254 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 5255 5256 /* End GDB interface. */ 5257 5258 static int find_string(const char *strtab, const char *str) 5259 { 5260 const char *p = strtab + 1; 5261 5262 while (1) { 5263 if (strcmp(p, str) == 0) { 5264 return p - strtab; 5265 } 5266 p += strlen(p) + 1; 5267 } 5268 } 5269 5270 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 5271 const void *debug_frame, 5272 size_t debug_frame_size) 5273 { 5274 struct __attribute__((packed)) DebugInfo { 5275 uint32_t len; 5276 uint16_t version; 5277 uint32_t abbrev; 5278 uint8_t ptr_size; 5279 uint8_t cu_die; 5280 uint16_t cu_lang; 5281 uintptr_t cu_low_pc; 5282 uintptr_t cu_high_pc; 5283 uint8_t fn_die; 5284 char fn_name[16]; 5285 uintptr_t fn_low_pc; 5286 uintptr_t fn_high_pc; 5287 uint8_t cu_eoc; 5288 }; 5289 5290 struct ElfImage { 5291 ElfW(Ehdr) ehdr; 5292 ElfW(Phdr) phdr; 5293 ElfW(Shdr) shdr[7]; 5294 ElfW(Sym) sym[2]; 5295 struct DebugInfo di; 5296 uint8_t da[24]; 5297 char str[80]; 5298 }; 5299 5300 struct ElfImage *img; 5301 5302 static const struct ElfImage img_template = { 5303 .ehdr = { 5304 .e_ident[EI_MAG0] = ELFMAG0, 5305 .e_ident[EI_MAG1] = ELFMAG1, 5306 .e_ident[EI_MAG2] = ELFMAG2, 5307 .e_ident[EI_MAG3] = ELFMAG3, 5308 .e_ident[EI_CLASS] = ELF_CLASS, 5309 .e_ident[EI_DATA] = ELF_DATA, 5310 .e_ident[EI_VERSION] = EV_CURRENT, 5311 .e_type = ET_EXEC, 5312 .e_machine = ELF_HOST_MACHINE, 5313 .e_version = EV_CURRENT, 5314 .e_phoff = offsetof(struct ElfImage, phdr), 5315 .e_shoff = offsetof(struct ElfImage, shdr), 5316 .e_ehsize = sizeof(ElfW(Shdr)), 5317 .e_phentsize = sizeof(ElfW(Phdr)), 5318 .e_phnum = 1, 5319 .e_shentsize = sizeof(ElfW(Shdr)), 5320 .e_shnum = ARRAY_SIZE(img->shdr), 5321 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 5322 #ifdef ELF_HOST_FLAGS 5323 .e_flags = ELF_HOST_FLAGS, 5324 #endif 5325 #ifdef ELF_OSABI 5326 .e_ident[EI_OSABI] = ELF_OSABI, 5327 #endif 5328 }, 5329 .phdr = { 5330 .p_type = PT_LOAD, 5331 .p_flags = PF_X, 5332 }, 5333 .shdr = { 5334 [0] = { .sh_type = SHT_NULL }, 5335 /* Trick: The contents of code_gen_buffer are not present in 5336 this fake ELF file; that got allocated elsewhere. Therefore 5337 we mark .text as SHT_NOBITS (similar to .bss) so that readers 5338 will not look for contents. We can record any address. */ 5339 [1] = { /* .text */ 5340 .sh_type = SHT_NOBITS, 5341 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 5342 }, 5343 [2] = { /* .debug_info */ 5344 .sh_type = SHT_PROGBITS, 5345 .sh_offset = offsetof(struct ElfImage, di), 5346 .sh_size = sizeof(struct DebugInfo), 5347 }, 5348 [3] = { /* .debug_abbrev */ 5349 .sh_type = SHT_PROGBITS, 5350 .sh_offset = offsetof(struct ElfImage, da), 5351 .sh_size = sizeof(img->da), 5352 }, 5353 [4] = { /* .debug_frame */ 5354 .sh_type = SHT_PROGBITS, 5355 .sh_offset = sizeof(struct ElfImage), 5356 }, 5357 [5] = { /* .symtab */ 5358 .sh_type = SHT_SYMTAB, 5359 .sh_offset = offsetof(struct ElfImage, sym), 5360 .sh_size = sizeof(img->sym), 5361 .sh_info = 1, 5362 .sh_link = ARRAY_SIZE(img->shdr) - 1, 5363 .sh_entsize = sizeof(ElfW(Sym)), 5364 }, 5365 [6] = { /* .strtab */ 5366 .sh_type = SHT_STRTAB, 5367 .sh_offset = offsetof(struct ElfImage, str), 5368 .sh_size = sizeof(img->str), 5369 } 5370 }, 5371 .sym = { 5372 [1] = { /* code_gen_buffer */ 5373 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 5374 .st_shndx = 1, 5375 } 5376 }, 5377 .di = { 5378 .len = sizeof(struct DebugInfo) - 4, 5379 .version = 2, 5380 .ptr_size = sizeof(void *), 5381 .cu_die = 1, 5382 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 5383 .fn_die = 2, 5384 .fn_name = "code_gen_buffer" 5385 }, 5386 .da = { 5387 1, /* abbrev number (the cu) */ 5388 0x11, 1, /* DW_TAG_compile_unit, has children */ 5389 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 5390 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5391 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5392 0, 0, /* end of abbrev */ 5393 2, /* abbrev number (the fn) */ 5394 0x2e, 0, /* DW_TAG_subprogram, no children */ 5395 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 5396 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5397 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5398 0, 0, /* end of abbrev */ 5399 0 /* no more abbrev */ 5400 }, 5401 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 5402 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 5403 }; 5404 5405 /* We only need a single jit entry; statically allocate it. */ 5406 static struct jit_code_entry one_entry; 5407 5408 uintptr_t buf = (uintptr_t)buf_ptr; 5409 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 5410 DebugFrameHeader *dfh; 5411 5412 img = g_malloc(img_size); 5413 *img = img_template; 5414 5415 img->phdr.p_vaddr = buf; 5416 img->phdr.p_paddr = buf; 5417 img->phdr.p_memsz = buf_size; 5418 5419 img->shdr[1].sh_name = find_string(img->str, ".text"); 5420 img->shdr[1].sh_addr = buf; 5421 img->shdr[1].sh_size = buf_size; 5422 5423 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 5424 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 5425 5426 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 5427 img->shdr[4].sh_size = debug_frame_size; 5428 5429 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 5430 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 5431 5432 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 5433 img->sym[1].st_value = buf; 5434 img->sym[1].st_size = buf_size; 5435 5436 img->di.cu_low_pc = buf; 5437 img->di.cu_high_pc = buf + buf_size; 5438 img->di.fn_low_pc = buf; 5439 img->di.fn_high_pc = buf + buf_size; 5440 5441 dfh = (DebugFrameHeader *)(img + 1); 5442 memcpy(dfh, debug_frame, debug_frame_size); 5443 dfh->fde.func_start = buf; 5444 dfh->fde.func_len = buf_size; 5445 5446 #ifdef DEBUG_JIT 5447 /* Enable this block to be able to debug the ELF image file creation. 5448 One can use readelf, objdump, or other inspection utilities. */ 5449 { 5450 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 5451 FILE *f = fopen(jit, "w+b"); 5452 if (f) { 5453 if (fwrite(img, img_size, 1, f) != img_size) { 5454 /* Avoid stupid unused return value warning for fwrite. */ 5455 } 5456 fclose(f); 5457 } 5458 } 5459 #endif 5460 5461 one_entry.symfile_addr = img; 5462 one_entry.symfile_size = img_size; 5463 5464 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 5465 __jit_debug_descriptor.relevant_entry = &one_entry; 5466 __jit_debug_descriptor.first_entry = &one_entry; 5467 __jit_debug_register_code(); 5468 } 5469 #else 5470 /* No support for the feature. Provide the entry point expected by exec.c, 5471 and implement the internal function we declared earlier. */ 5472 5473 static void tcg_register_jit_int(const void *buf, size_t size, 5474 const void *debug_frame, 5475 size_t debug_frame_size) 5476 { 5477 } 5478 5479 void tcg_register_jit(const void *buf, size_t buf_size) 5480 { 5481 } 5482 #endif /* ELF_HOST_MACHINE */ 5483 5484 #if !TCG_TARGET_MAYBE_vec 5485 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 5486 { 5487 g_assert_not_reached(); 5488 } 5489 #endif 5490