1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/cacheflush.h" 38 #include "qemu/cacheinfo.h" 39 40 /* Note: the long term plan is to reduce the dependencies on the QEMU 41 CPU definitions. Currently they are used for qemu_ld/st 42 instructions */ 43 #define NO_CPU_IO_DEFS 44 45 #include "exec/exec-all.h" 46 #include "tcg/tcg-op.h" 47 48 #if UINTPTR_MAX == UINT32_MAX 49 # define ELF_CLASS ELFCLASS32 50 #else 51 # define ELF_CLASS ELFCLASS64 52 #endif 53 #if HOST_BIG_ENDIAN 54 # define ELF_DATA ELFDATA2MSB 55 #else 56 # define ELF_DATA ELFDATA2LSB 57 #endif 58 59 #include "elf.h" 60 #include "exec/log.h" 61 #include "tcg/tcg-ldst.h" 62 #include "tcg-internal.h" 63 #include "accel/tcg/perf.h" 64 65 /* Forward declarations for functions declared in tcg-target.c.inc and 66 used here. */ 67 static void tcg_target_init(TCGContext *s); 68 static void tcg_target_qemu_prologue(TCGContext *s); 69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 70 intptr_t value, intptr_t addend); 71 72 /* The CIE and FDE header definitions will be common to all hosts. */ 73 typedef struct { 74 uint32_t len __attribute__((aligned((sizeof(void *))))); 75 uint32_t id; 76 uint8_t version; 77 char augmentation[1]; 78 uint8_t code_align; 79 uint8_t data_align; 80 uint8_t return_column; 81 } DebugFrameCIE; 82 83 typedef struct QEMU_PACKED { 84 uint32_t len __attribute__((aligned((sizeof(void *))))); 85 uint32_t cie_offset; 86 uintptr_t func_start; 87 uintptr_t func_len; 88 } DebugFrameFDEHeader; 89 90 typedef struct QEMU_PACKED { 91 DebugFrameCIE cie; 92 DebugFrameFDEHeader fde; 93 } DebugFrameHeader; 94 95 static void tcg_register_jit_int(const void *buf, size_t size, 96 const void *debug_frame, 97 size_t debug_frame_size) 98 __attribute__((unused)); 99 100 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 102 intptr_t arg2); 103 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 104 static void tcg_out_movi(TCGContext *s, TCGType type, 105 TCGReg ret, tcg_target_long arg); 106 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 107 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 108 static void tcg_out_goto_tb(TCGContext *s, int which); 109 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 110 const TCGArg args[TCG_MAX_OP_ARGS], 111 const int const_args[TCG_MAX_OP_ARGS]); 112 #if TCG_TARGET_MAYBE_vec 113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 114 TCGReg dst, TCGReg src); 115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 116 TCGReg dst, TCGReg base, intptr_t offset); 117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 118 TCGReg dst, int64_t arg); 119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 120 unsigned vecl, unsigned vece, 121 const TCGArg args[TCG_MAX_OP_ARGS], 122 const int const_args[TCG_MAX_OP_ARGS]); 123 #else 124 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 125 TCGReg dst, TCGReg src) 126 { 127 g_assert_not_reached(); 128 } 129 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 130 TCGReg dst, TCGReg base, intptr_t offset) 131 { 132 g_assert_not_reached(); 133 } 134 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 135 TCGReg dst, int64_t arg) 136 { 137 g_assert_not_reached(); 138 } 139 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 140 unsigned vecl, unsigned vece, 141 const TCGArg args[TCG_MAX_OP_ARGS], 142 const int const_args[TCG_MAX_OP_ARGS]) 143 { 144 g_assert_not_reached(); 145 } 146 #endif 147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 148 intptr_t arg2); 149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 150 TCGReg base, intptr_t ofs); 151 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 152 const TCGHelperInfo *info); 153 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 154 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 155 #ifdef TCG_TARGET_NEED_LDST_LABELS 156 static int tcg_out_ldst_finalize(TCGContext *s); 157 #endif 158 159 TCGContext tcg_init_ctx; 160 __thread TCGContext *tcg_ctx; 161 162 TCGContext **tcg_ctxs; 163 unsigned int tcg_cur_ctxs; 164 unsigned int tcg_max_ctxs; 165 TCGv_env cpu_env = 0; 166 const void *tcg_code_gen_epilogue; 167 uintptr_t tcg_splitwx_diff; 168 169 #ifndef CONFIG_TCG_INTERPRETER 170 tcg_prologue_fn *tcg_qemu_tb_exec; 171 #endif 172 173 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 174 static TCGRegSet tcg_target_call_clobber_regs; 175 176 #if TCG_TARGET_INSN_UNIT_SIZE == 1 177 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 178 { 179 *s->code_ptr++ = v; 180 } 181 182 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 183 uint8_t v) 184 { 185 *p = v; 186 } 187 #endif 188 189 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 190 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 191 { 192 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 193 *s->code_ptr++ = v; 194 } else { 195 tcg_insn_unit *p = s->code_ptr; 196 memcpy(p, &v, sizeof(v)); 197 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 198 } 199 } 200 201 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 202 uint16_t v) 203 { 204 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 205 *p = v; 206 } else { 207 memcpy(p, &v, sizeof(v)); 208 } 209 } 210 #endif 211 212 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 213 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 214 { 215 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 216 *s->code_ptr++ = v; 217 } else { 218 tcg_insn_unit *p = s->code_ptr; 219 memcpy(p, &v, sizeof(v)); 220 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 221 } 222 } 223 224 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 225 uint32_t v) 226 { 227 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 228 *p = v; 229 } else { 230 memcpy(p, &v, sizeof(v)); 231 } 232 } 233 #endif 234 235 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 236 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 237 { 238 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 239 *s->code_ptr++ = v; 240 } else { 241 tcg_insn_unit *p = s->code_ptr; 242 memcpy(p, &v, sizeof(v)); 243 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 244 } 245 } 246 247 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 248 uint64_t v) 249 { 250 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 251 *p = v; 252 } else { 253 memcpy(p, &v, sizeof(v)); 254 } 255 } 256 #endif 257 258 /* label relocation processing */ 259 260 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 261 TCGLabel *l, intptr_t addend) 262 { 263 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 264 265 r->type = type; 266 r->ptr = code_ptr; 267 r->addend = addend; 268 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 269 } 270 271 static void tcg_out_label(TCGContext *s, TCGLabel *l) 272 { 273 tcg_debug_assert(!l->has_value); 274 l->has_value = 1; 275 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 276 } 277 278 TCGLabel *gen_new_label(void) 279 { 280 TCGContext *s = tcg_ctx; 281 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 282 283 memset(l, 0, sizeof(TCGLabel)); 284 l->id = s->nb_labels++; 285 QSIMPLEQ_INIT(&l->relocs); 286 287 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 288 289 return l; 290 } 291 292 static bool tcg_resolve_relocs(TCGContext *s) 293 { 294 TCGLabel *l; 295 296 QSIMPLEQ_FOREACH(l, &s->labels, next) { 297 TCGRelocation *r; 298 uintptr_t value = l->u.value; 299 300 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 301 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 302 return false; 303 } 304 } 305 } 306 return true; 307 } 308 309 static void set_jmp_reset_offset(TCGContext *s, int which) 310 { 311 /* 312 * We will check for overflow at the end of the opcode loop in 313 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 314 */ 315 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 316 } 317 318 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 319 { 320 /* 321 * We will check for overflow at the end of the opcode loop in 322 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 323 */ 324 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 325 } 326 327 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 328 { 329 /* 330 * Return the read-execute version of the pointer, for the benefit 331 * of any pc-relative addressing mode. 332 */ 333 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 334 } 335 336 /* Signal overflow, starting over with fewer guest insns. */ 337 static G_NORETURN 338 void tcg_raise_tb_overflow(TCGContext *s) 339 { 340 siglongjmp(s->jmp_trans, -2); 341 } 342 343 #define C_PFX1(P, A) P##A 344 #define C_PFX2(P, A, B) P##A##_##B 345 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 346 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 347 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 348 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 349 350 /* Define an enumeration for the various combinations. */ 351 352 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 353 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 354 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 355 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 356 357 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 358 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 359 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 360 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 361 362 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 363 364 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 365 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 366 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 367 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 368 369 typedef enum { 370 #include "tcg-target-con-set.h" 371 } TCGConstraintSetIndex; 372 373 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 374 375 #undef C_O0_I1 376 #undef C_O0_I2 377 #undef C_O0_I3 378 #undef C_O0_I4 379 #undef C_O1_I1 380 #undef C_O1_I2 381 #undef C_O1_I3 382 #undef C_O1_I4 383 #undef C_N1_I2 384 #undef C_O2_I1 385 #undef C_O2_I2 386 #undef C_O2_I3 387 #undef C_O2_I4 388 389 /* Put all of the constraint sets into an array, indexed by the enum. */ 390 391 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 392 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 393 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 394 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 395 396 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 397 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 398 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 399 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 400 401 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 402 403 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 404 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 405 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 406 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 407 408 static const TCGTargetOpDef constraint_sets[] = { 409 #include "tcg-target-con-set.h" 410 }; 411 412 413 #undef C_O0_I1 414 #undef C_O0_I2 415 #undef C_O0_I3 416 #undef C_O0_I4 417 #undef C_O1_I1 418 #undef C_O1_I2 419 #undef C_O1_I3 420 #undef C_O1_I4 421 #undef C_N1_I2 422 #undef C_O2_I1 423 #undef C_O2_I2 424 #undef C_O2_I3 425 #undef C_O2_I4 426 427 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 428 429 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 430 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 431 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 432 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 433 434 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 435 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 436 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 437 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 438 439 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 440 441 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 442 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 443 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 444 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 445 446 #include "tcg-target.c.inc" 447 448 static void alloc_tcg_plugin_context(TCGContext *s) 449 { 450 #ifdef CONFIG_PLUGIN 451 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 452 s->plugin_tb->insns = 453 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 454 #endif 455 } 456 457 /* 458 * All TCG threads except the parent (i.e. the one that called tcg_context_init 459 * and registered the target's TCG globals) must register with this function 460 * before initiating translation. 461 * 462 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 463 * of tcg_region_init() for the reasoning behind this. 464 * 465 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 466 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 467 * is not used anymore for translation once this function is called. 468 * 469 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 470 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 471 */ 472 #ifdef CONFIG_USER_ONLY 473 void tcg_register_thread(void) 474 { 475 tcg_ctx = &tcg_init_ctx; 476 } 477 #else 478 void tcg_register_thread(void) 479 { 480 TCGContext *s = g_malloc(sizeof(*s)); 481 unsigned int i, n; 482 483 *s = tcg_init_ctx; 484 485 /* Relink mem_base. */ 486 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 487 if (tcg_init_ctx.temps[i].mem_base) { 488 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 489 tcg_debug_assert(b >= 0 && b < n); 490 s->temps[i].mem_base = &s->temps[b]; 491 } 492 } 493 494 /* Claim an entry in tcg_ctxs */ 495 n = qatomic_fetch_inc(&tcg_cur_ctxs); 496 g_assert(n < tcg_max_ctxs); 497 qatomic_set(&tcg_ctxs[n], s); 498 499 if (n > 0) { 500 alloc_tcg_plugin_context(s); 501 tcg_region_initial_alloc(s); 502 } 503 504 tcg_ctx = s; 505 } 506 #endif /* !CONFIG_USER_ONLY */ 507 508 /* pool based memory allocation */ 509 void *tcg_malloc_internal(TCGContext *s, int size) 510 { 511 TCGPool *p; 512 int pool_size; 513 514 if (size > TCG_POOL_CHUNK_SIZE) { 515 /* big malloc: insert a new pool (XXX: could optimize) */ 516 p = g_malloc(sizeof(TCGPool) + size); 517 p->size = size; 518 p->next = s->pool_first_large; 519 s->pool_first_large = p; 520 return p->data; 521 } else { 522 p = s->pool_current; 523 if (!p) { 524 p = s->pool_first; 525 if (!p) 526 goto new_pool; 527 } else { 528 if (!p->next) { 529 new_pool: 530 pool_size = TCG_POOL_CHUNK_SIZE; 531 p = g_malloc(sizeof(TCGPool) + pool_size); 532 p->size = pool_size; 533 p->next = NULL; 534 if (s->pool_current) { 535 s->pool_current->next = p; 536 } else { 537 s->pool_first = p; 538 } 539 } else { 540 p = p->next; 541 } 542 } 543 } 544 s->pool_current = p; 545 s->pool_cur = p->data + size; 546 s->pool_end = p->data + p->size; 547 return p->data; 548 } 549 550 void tcg_pool_reset(TCGContext *s) 551 { 552 TCGPool *p, *t; 553 for (p = s->pool_first_large; p; p = t) { 554 t = p->next; 555 g_free(p); 556 } 557 s->pool_first_large = NULL; 558 s->pool_cur = s->pool_end = NULL; 559 s->pool_current = NULL; 560 } 561 562 #include "exec/helper-proto.h" 563 564 static TCGHelperInfo all_helpers[] = { 565 #include "exec/helper-tcg.h" 566 }; 567 static GHashTable *helper_table; 568 569 #ifdef CONFIG_TCG_INTERPRETER 570 static ffi_type *typecode_to_ffi(int argmask) 571 { 572 /* 573 * libffi does not support __int128_t, so we have forced Int128 574 * to use the structure definition instead of the builtin type. 575 */ 576 static ffi_type *ffi_type_i128_elements[3] = { 577 &ffi_type_uint64, 578 &ffi_type_uint64, 579 NULL 580 }; 581 static ffi_type ffi_type_i128 = { 582 .size = 16, 583 .alignment = __alignof__(Int128), 584 .type = FFI_TYPE_STRUCT, 585 .elements = ffi_type_i128_elements, 586 }; 587 588 switch (argmask) { 589 case dh_typecode_void: 590 return &ffi_type_void; 591 case dh_typecode_i32: 592 return &ffi_type_uint32; 593 case dh_typecode_s32: 594 return &ffi_type_sint32; 595 case dh_typecode_i64: 596 return &ffi_type_uint64; 597 case dh_typecode_s64: 598 return &ffi_type_sint64; 599 case dh_typecode_ptr: 600 return &ffi_type_pointer; 601 case dh_typecode_i128: 602 return &ffi_type_i128; 603 } 604 g_assert_not_reached(); 605 } 606 607 static void init_ffi_layouts(void) 608 { 609 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 610 GHashTable *ffi_table = g_hash_table_new(NULL, NULL); 611 612 for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 613 TCGHelperInfo *info = &all_helpers[i]; 614 unsigned typemask = info->typemask; 615 gpointer hash = (gpointer)(uintptr_t)typemask; 616 struct { 617 ffi_cif cif; 618 ffi_type *args[]; 619 } *ca; 620 ffi_status status; 621 int nargs; 622 ffi_cif *cif; 623 624 cif = g_hash_table_lookup(ffi_table, hash); 625 if (cif) { 626 info->cif = cif; 627 continue; 628 } 629 630 /* Ignoring the return type, find the last non-zero field. */ 631 nargs = 32 - clz32(typemask >> 3); 632 nargs = DIV_ROUND_UP(nargs, 3); 633 assert(nargs <= MAX_CALL_IARGS); 634 635 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 636 ca->cif.rtype = typecode_to_ffi(typemask & 7); 637 ca->cif.nargs = nargs; 638 639 if (nargs != 0) { 640 ca->cif.arg_types = ca->args; 641 for (int j = 0; j < nargs; ++j) { 642 int typecode = extract32(typemask, (j + 1) * 3, 3); 643 ca->args[j] = typecode_to_ffi(typecode); 644 } 645 } 646 647 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 648 ca->cif.rtype, ca->cif.arg_types); 649 assert(status == FFI_OK); 650 651 cif = &ca->cif; 652 info->cif = cif; 653 g_hash_table_insert(ffi_table, hash, (gpointer)cif); 654 } 655 656 g_hash_table_destroy(ffi_table); 657 } 658 #endif /* CONFIG_TCG_INTERPRETER */ 659 660 typedef struct TCGCumulativeArgs { 661 int arg_idx; /* tcg_gen_callN args[] */ 662 int info_in_idx; /* TCGHelperInfo in[] */ 663 int arg_slot; /* regs+stack slot */ 664 int ref_slot; /* stack slots for references */ 665 } TCGCumulativeArgs; 666 667 static void layout_arg_even(TCGCumulativeArgs *cum) 668 { 669 cum->arg_slot += cum->arg_slot & 1; 670 } 671 672 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 673 TCGCallArgumentKind kind) 674 { 675 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 676 677 *loc = (TCGCallArgumentLoc){ 678 .kind = kind, 679 .arg_idx = cum->arg_idx, 680 .arg_slot = cum->arg_slot, 681 }; 682 cum->info_in_idx++; 683 cum->arg_slot++; 684 } 685 686 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 687 TCGHelperInfo *info, int n) 688 { 689 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 690 691 for (int i = 0; i < n; ++i) { 692 /* Layout all using the same arg_idx, adjusting the subindex. */ 693 loc[i] = (TCGCallArgumentLoc){ 694 .kind = TCG_CALL_ARG_NORMAL, 695 .arg_idx = cum->arg_idx, 696 .tmp_subindex = i, 697 .arg_slot = cum->arg_slot + i, 698 }; 699 } 700 cum->info_in_idx += n; 701 cum->arg_slot += n; 702 } 703 704 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 705 { 706 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 707 int n = 128 / TCG_TARGET_REG_BITS; 708 709 /* The first subindex carries the pointer. */ 710 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 711 712 /* 713 * The callee is allowed to clobber memory associated with 714 * structure pass by-reference. Therefore we must make copies. 715 * Allocate space from "ref_slot", which will be adjusted to 716 * follow the parameters on the stack. 717 */ 718 loc[0].ref_slot = cum->ref_slot; 719 720 /* 721 * Subsequent words also go into the reference slot, but 722 * do not accumulate into the regular arguments. 723 */ 724 for (int i = 1; i < n; ++i) { 725 loc[i] = (TCGCallArgumentLoc){ 726 .kind = TCG_CALL_ARG_BY_REF_N, 727 .arg_idx = cum->arg_idx, 728 .tmp_subindex = i, 729 .ref_slot = cum->ref_slot + i, 730 }; 731 } 732 cum->info_in_idx += n; 733 cum->ref_slot += n; 734 } 735 736 static void init_call_layout(TCGHelperInfo *info) 737 { 738 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 739 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 740 unsigned typemask = info->typemask; 741 unsigned typecode; 742 TCGCumulativeArgs cum = { }; 743 744 /* 745 * Parse and place any function return value. 746 */ 747 typecode = typemask & 7; 748 switch (typecode) { 749 case dh_typecode_void: 750 info->nr_out = 0; 751 break; 752 case dh_typecode_i32: 753 case dh_typecode_s32: 754 case dh_typecode_ptr: 755 info->nr_out = 1; 756 info->out_kind = TCG_CALL_RET_NORMAL; 757 break; 758 case dh_typecode_i64: 759 case dh_typecode_s64: 760 info->nr_out = 64 / TCG_TARGET_REG_BITS; 761 info->out_kind = TCG_CALL_RET_NORMAL; 762 /* Query the last register now to trigger any assert early. */ 763 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 764 break; 765 case dh_typecode_i128: 766 info->nr_out = 128 / TCG_TARGET_REG_BITS; 767 info->out_kind = TCG_TARGET_CALL_RET_I128; 768 switch (TCG_TARGET_CALL_RET_I128) { 769 case TCG_CALL_RET_NORMAL: 770 /* Query the last register now to trigger any assert early. */ 771 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 772 break; 773 case TCG_CALL_RET_BY_VEC: 774 /* Query the single register now to trigger any assert early. */ 775 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 776 break; 777 case TCG_CALL_RET_BY_REF: 778 /* 779 * Allocate the first argument to the output. 780 * We don't need to store this anywhere, just make it 781 * unavailable for use in the input loop below. 782 */ 783 cum.arg_slot = 1; 784 break; 785 default: 786 qemu_build_not_reached(); 787 } 788 break; 789 default: 790 g_assert_not_reached(); 791 } 792 793 /* 794 * Parse and place function arguments. 795 */ 796 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 797 TCGCallArgumentKind kind; 798 TCGType type; 799 800 typecode = typemask & 7; 801 switch (typecode) { 802 case dh_typecode_i32: 803 case dh_typecode_s32: 804 type = TCG_TYPE_I32; 805 break; 806 case dh_typecode_i64: 807 case dh_typecode_s64: 808 type = TCG_TYPE_I64; 809 break; 810 case dh_typecode_ptr: 811 type = TCG_TYPE_PTR; 812 break; 813 case dh_typecode_i128: 814 type = TCG_TYPE_I128; 815 break; 816 default: 817 g_assert_not_reached(); 818 } 819 820 switch (type) { 821 case TCG_TYPE_I32: 822 switch (TCG_TARGET_CALL_ARG_I32) { 823 case TCG_CALL_ARG_EVEN: 824 layout_arg_even(&cum); 825 /* fall through */ 826 case TCG_CALL_ARG_NORMAL: 827 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 828 break; 829 case TCG_CALL_ARG_EXTEND: 830 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 831 layout_arg_1(&cum, info, kind); 832 break; 833 default: 834 qemu_build_not_reached(); 835 } 836 break; 837 838 case TCG_TYPE_I64: 839 switch (TCG_TARGET_CALL_ARG_I64) { 840 case TCG_CALL_ARG_EVEN: 841 layout_arg_even(&cum); 842 /* fall through */ 843 case TCG_CALL_ARG_NORMAL: 844 if (TCG_TARGET_REG_BITS == 32) { 845 layout_arg_normal_n(&cum, info, 2); 846 } else { 847 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 848 } 849 break; 850 default: 851 qemu_build_not_reached(); 852 } 853 break; 854 855 case TCG_TYPE_I128: 856 switch (TCG_TARGET_CALL_ARG_I128) { 857 case TCG_CALL_ARG_EVEN: 858 layout_arg_even(&cum); 859 /* fall through */ 860 case TCG_CALL_ARG_NORMAL: 861 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 862 break; 863 case TCG_CALL_ARG_BY_REF: 864 layout_arg_by_ref(&cum, info); 865 break; 866 default: 867 qemu_build_not_reached(); 868 } 869 break; 870 871 default: 872 g_assert_not_reached(); 873 } 874 } 875 info->nr_in = cum.info_in_idx; 876 877 /* Validate that we didn't overrun the input array. */ 878 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 879 /* Validate the backend has enough argument space. */ 880 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 881 882 /* 883 * Relocate the "ref_slot" area to the end of the parameters. 884 * Minimizing this stack offset helps code size for x86, 885 * which has a signed 8-bit offset encoding. 886 */ 887 if (cum.ref_slot != 0) { 888 int ref_base = 0; 889 890 if (cum.arg_slot > max_reg_slots) { 891 int align = __alignof(Int128) / sizeof(tcg_target_long); 892 893 ref_base = cum.arg_slot - max_reg_slots; 894 if (align > 1) { 895 ref_base = ROUND_UP(ref_base, align); 896 } 897 } 898 assert(ref_base + cum.ref_slot <= max_stk_slots); 899 900 if (ref_base != 0) { 901 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 902 TCGCallArgumentLoc *loc = &info->in[i]; 903 switch (loc->kind) { 904 case TCG_CALL_ARG_BY_REF: 905 case TCG_CALL_ARG_BY_REF_N: 906 loc->ref_slot += ref_base; 907 break; 908 default: 909 break; 910 } 911 } 912 } 913 } 914 } 915 916 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 917 static void process_op_defs(TCGContext *s); 918 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 919 TCGReg reg, const char *name); 920 921 static void tcg_context_init(unsigned max_cpus) 922 { 923 TCGContext *s = &tcg_init_ctx; 924 int op, total_args, n, i; 925 TCGOpDef *def; 926 TCGArgConstraint *args_ct; 927 TCGTemp *ts; 928 929 memset(s, 0, sizeof(*s)); 930 s->nb_globals = 0; 931 932 /* Count total number of arguments and allocate the corresponding 933 space */ 934 total_args = 0; 935 for(op = 0; op < NB_OPS; op++) { 936 def = &tcg_op_defs[op]; 937 n = def->nb_iargs + def->nb_oargs; 938 total_args += n; 939 } 940 941 args_ct = g_new0(TCGArgConstraint, total_args); 942 943 for(op = 0; op < NB_OPS; op++) { 944 def = &tcg_op_defs[op]; 945 def->args_ct = args_ct; 946 n = def->nb_iargs + def->nb_oargs; 947 args_ct += n; 948 } 949 950 /* Register helpers. */ 951 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 952 helper_table = g_hash_table_new(NULL, NULL); 953 954 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 955 init_call_layout(&all_helpers[i]); 956 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 957 (gpointer)&all_helpers[i]); 958 } 959 960 #ifdef CONFIG_TCG_INTERPRETER 961 init_ffi_layouts(); 962 #endif 963 964 tcg_target_init(s); 965 process_op_defs(s); 966 967 /* Reverse the order of the saved registers, assuming they're all at 968 the start of tcg_target_reg_alloc_order. */ 969 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 970 int r = tcg_target_reg_alloc_order[n]; 971 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 972 break; 973 } 974 } 975 for (i = 0; i < n; ++i) { 976 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 977 } 978 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 979 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 980 } 981 982 alloc_tcg_plugin_context(s); 983 984 tcg_ctx = s; 985 /* 986 * In user-mode we simply share the init context among threads, since we 987 * use a single region. See the documentation tcg_region_init() for the 988 * reasoning behind this. 989 * In softmmu we will have at most max_cpus TCG threads. 990 */ 991 #ifdef CONFIG_USER_ONLY 992 tcg_ctxs = &tcg_ctx; 993 tcg_cur_ctxs = 1; 994 tcg_max_ctxs = 1; 995 #else 996 tcg_max_ctxs = max_cpus; 997 tcg_ctxs = g_new0(TCGContext *, max_cpus); 998 #endif 999 1000 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1001 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1002 cpu_env = temp_tcgv_ptr(ts); 1003 } 1004 1005 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 1006 { 1007 tcg_context_init(max_cpus); 1008 tcg_region_init(tb_size, splitwx, max_cpus); 1009 } 1010 1011 /* 1012 * Allocate TBs right before their corresponding translated code, making 1013 * sure that TBs and code are on different cache lines. 1014 */ 1015 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1016 { 1017 uintptr_t align = qemu_icache_linesize; 1018 TranslationBlock *tb; 1019 void *next; 1020 1021 retry: 1022 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1023 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1024 1025 if (unlikely(next > s->code_gen_highwater)) { 1026 if (tcg_region_alloc(s)) { 1027 return NULL; 1028 } 1029 goto retry; 1030 } 1031 qatomic_set(&s->code_gen_ptr, next); 1032 s->data_gen_ptr = NULL; 1033 return tb; 1034 } 1035 1036 void tcg_prologue_init(TCGContext *s) 1037 { 1038 size_t prologue_size; 1039 1040 s->code_ptr = s->code_gen_ptr; 1041 s->code_buf = s->code_gen_ptr; 1042 s->data_gen_ptr = NULL; 1043 1044 #ifndef CONFIG_TCG_INTERPRETER 1045 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1046 #endif 1047 1048 #ifdef TCG_TARGET_NEED_POOL_LABELS 1049 s->pool_labels = NULL; 1050 #endif 1051 1052 qemu_thread_jit_write(); 1053 /* Generate the prologue. */ 1054 tcg_target_qemu_prologue(s); 1055 1056 #ifdef TCG_TARGET_NEED_POOL_LABELS 1057 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1058 { 1059 int result = tcg_out_pool_finalize(s); 1060 tcg_debug_assert(result == 0); 1061 } 1062 #endif 1063 1064 prologue_size = tcg_current_code_size(s); 1065 perf_report_prologue(s->code_gen_ptr, prologue_size); 1066 1067 #ifndef CONFIG_TCG_INTERPRETER 1068 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1069 (uintptr_t)s->code_buf, prologue_size); 1070 #endif 1071 1072 #ifdef DEBUG_DISAS 1073 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1074 FILE *logfile = qemu_log_trylock(); 1075 if (logfile) { 1076 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1077 if (s->data_gen_ptr) { 1078 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1079 size_t data_size = prologue_size - code_size; 1080 size_t i; 1081 1082 disas(logfile, s->code_gen_ptr, code_size); 1083 1084 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1085 if (sizeof(tcg_target_ulong) == 8) { 1086 fprintf(logfile, 1087 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1088 (uintptr_t)s->data_gen_ptr + i, 1089 *(uint64_t *)(s->data_gen_ptr + i)); 1090 } else { 1091 fprintf(logfile, 1092 "0x%08" PRIxPTR ": .long 0x%08x\n", 1093 (uintptr_t)s->data_gen_ptr + i, 1094 *(uint32_t *)(s->data_gen_ptr + i)); 1095 } 1096 } 1097 } else { 1098 disas(logfile, s->code_gen_ptr, prologue_size); 1099 } 1100 fprintf(logfile, "\n"); 1101 qemu_log_unlock(logfile); 1102 } 1103 } 1104 #endif 1105 1106 #ifndef CONFIG_TCG_INTERPRETER 1107 /* 1108 * Assert that goto_ptr is implemented completely, setting an epilogue. 1109 * For tci, we use NULL as the signal to return from the interpreter, 1110 * so skip this check. 1111 */ 1112 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1113 #endif 1114 1115 tcg_region_prologue_set(s); 1116 } 1117 1118 void tcg_func_start(TCGContext *s) 1119 { 1120 tcg_pool_reset(s); 1121 s->nb_temps = s->nb_globals; 1122 1123 /* No temps have been previously allocated for size or locality. */ 1124 memset(s->free_temps, 0, sizeof(s->free_temps)); 1125 1126 /* No constant temps have been previously allocated. */ 1127 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1128 if (s->const_table[i]) { 1129 g_hash_table_remove_all(s->const_table[i]); 1130 } 1131 } 1132 1133 s->nb_ops = 0; 1134 s->nb_labels = 0; 1135 s->current_frame_offset = s->frame_start; 1136 1137 #ifdef CONFIG_DEBUG_TCG 1138 s->goto_tb_issue_mask = 0; 1139 #endif 1140 1141 QTAILQ_INIT(&s->ops); 1142 QTAILQ_INIT(&s->free_ops); 1143 QSIMPLEQ_INIT(&s->labels); 1144 } 1145 1146 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1147 { 1148 int n = s->nb_temps++; 1149 1150 if (n >= TCG_MAX_TEMPS) { 1151 tcg_raise_tb_overflow(s); 1152 } 1153 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1154 } 1155 1156 static TCGTemp *tcg_global_alloc(TCGContext *s) 1157 { 1158 TCGTemp *ts; 1159 1160 tcg_debug_assert(s->nb_globals == s->nb_temps); 1161 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1162 s->nb_globals++; 1163 ts = tcg_temp_alloc(s); 1164 ts->kind = TEMP_GLOBAL; 1165 1166 return ts; 1167 } 1168 1169 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1170 TCGReg reg, const char *name) 1171 { 1172 TCGTemp *ts; 1173 1174 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1175 tcg_abort(); 1176 } 1177 1178 ts = tcg_global_alloc(s); 1179 ts->base_type = type; 1180 ts->type = type; 1181 ts->kind = TEMP_FIXED; 1182 ts->reg = reg; 1183 ts->name = name; 1184 tcg_regset_set_reg(s->reserved_regs, reg); 1185 1186 return ts; 1187 } 1188 1189 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1190 { 1191 s->frame_start = start; 1192 s->frame_end = start + size; 1193 s->frame_temp 1194 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1195 } 1196 1197 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1198 intptr_t offset, const char *name) 1199 { 1200 TCGContext *s = tcg_ctx; 1201 TCGTemp *base_ts = tcgv_ptr_temp(base); 1202 TCGTemp *ts = tcg_global_alloc(s); 1203 int indirect_reg = 0; 1204 1205 switch (base_ts->kind) { 1206 case TEMP_FIXED: 1207 break; 1208 case TEMP_GLOBAL: 1209 /* We do not support double-indirect registers. */ 1210 tcg_debug_assert(!base_ts->indirect_reg); 1211 base_ts->indirect_base = 1; 1212 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1213 ? 2 : 1); 1214 indirect_reg = 1; 1215 break; 1216 default: 1217 g_assert_not_reached(); 1218 } 1219 1220 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1221 TCGTemp *ts2 = tcg_global_alloc(s); 1222 char buf[64]; 1223 1224 ts->base_type = TCG_TYPE_I64; 1225 ts->type = TCG_TYPE_I32; 1226 ts->indirect_reg = indirect_reg; 1227 ts->mem_allocated = 1; 1228 ts->mem_base = base_ts; 1229 ts->mem_offset = offset; 1230 pstrcpy(buf, sizeof(buf), name); 1231 pstrcat(buf, sizeof(buf), "_0"); 1232 ts->name = strdup(buf); 1233 1234 tcg_debug_assert(ts2 == ts + 1); 1235 ts2->base_type = TCG_TYPE_I64; 1236 ts2->type = TCG_TYPE_I32; 1237 ts2->indirect_reg = indirect_reg; 1238 ts2->mem_allocated = 1; 1239 ts2->mem_base = base_ts; 1240 ts2->mem_offset = offset + 4; 1241 ts2->temp_subindex = 1; 1242 pstrcpy(buf, sizeof(buf), name); 1243 pstrcat(buf, sizeof(buf), "_1"); 1244 ts2->name = strdup(buf); 1245 } else { 1246 ts->base_type = type; 1247 ts->type = type; 1248 ts->indirect_reg = indirect_reg; 1249 ts->mem_allocated = 1; 1250 ts->mem_base = base_ts; 1251 ts->mem_offset = offset; 1252 ts->name = name; 1253 } 1254 return ts; 1255 } 1256 1257 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 1258 { 1259 TCGContext *s = tcg_ctx; 1260 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL; 1261 TCGTemp *ts; 1262 int idx, k; 1263 1264 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 1265 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 1266 if (idx < TCG_MAX_TEMPS) { 1267 /* There is already an available temp with the right type. */ 1268 clear_bit(idx, s->free_temps[k].l); 1269 1270 ts = &s->temps[idx]; 1271 ts->temp_allocated = 1; 1272 tcg_debug_assert(ts->base_type == type); 1273 tcg_debug_assert(ts->kind == kind); 1274 } else { 1275 int i, n; 1276 1277 switch (type) { 1278 case TCG_TYPE_I32: 1279 case TCG_TYPE_V64: 1280 case TCG_TYPE_V128: 1281 case TCG_TYPE_V256: 1282 n = 1; 1283 break; 1284 case TCG_TYPE_I64: 1285 n = 64 / TCG_TARGET_REG_BITS; 1286 break; 1287 case TCG_TYPE_I128: 1288 n = 128 / TCG_TARGET_REG_BITS; 1289 break; 1290 default: 1291 g_assert_not_reached(); 1292 } 1293 1294 ts = tcg_temp_alloc(s); 1295 ts->base_type = type; 1296 ts->temp_allocated = 1; 1297 ts->kind = kind; 1298 1299 if (n == 1) { 1300 ts->type = type; 1301 } else { 1302 ts->type = TCG_TYPE_REG; 1303 1304 for (i = 1; i < n; ++i) { 1305 TCGTemp *ts2 = tcg_temp_alloc(s); 1306 1307 tcg_debug_assert(ts2 == ts + i); 1308 ts2->base_type = type; 1309 ts2->type = TCG_TYPE_REG; 1310 ts2->temp_allocated = 1; 1311 ts2->temp_subindex = i; 1312 ts2->kind = kind; 1313 } 1314 } 1315 } 1316 1317 #if defined(CONFIG_DEBUG_TCG) 1318 s->temps_in_use++; 1319 #endif 1320 return ts; 1321 } 1322 1323 TCGv_vec tcg_temp_new_vec(TCGType type) 1324 { 1325 TCGTemp *t; 1326 1327 #ifdef CONFIG_DEBUG_TCG 1328 switch (type) { 1329 case TCG_TYPE_V64: 1330 assert(TCG_TARGET_HAS_v64); 1331 break; 1332 case TCG_TYPE_V128: 1333 assert(TCG_TARGET_HAS_v128); 1334 break; 1335 case TCG_TYPE_V256: 1336 assert(TCG_TARGET_HAS_v256); 1337 break; 1338 default: 1339 g_assert_not_reached(); 1340 } 1341 #endif 1342 1343 t = tcg_temp_new_internal(type, 0); 1344 return temp_tcgv_vec(t); 1345 } 1346 1347 /* Create a new temp of the same type as an existing temp. */ 1348 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1349 { 1350 TCGTemp *t = tcgv_vec_temp(match); 1351 1352 tcg_debug_assert(t->temp_allocated != 0); 1353 1354 t = tcg_temp_new_internal(t->base_type, 0); 1355 return temp_tcgv_vec(t); 1356 } 1357 1358 void tcg_temp_free_internal(TCGTemp *ts) 1359 { 1360 TCGContext *s = tcg_ctx; 1361 int k, idx; 1362 1363 switch (ts->kind) { 1364 case TEMP_CONST: 1365 /* 1366 * In order to simplify users of tcg_constant_*, 1367 * silently ignore free. 1368 */ 1369 return; 1370 case TEMP_NORMAL: 1371 case TEMP_LOCAL: 1372 break; 1373 default: 1374 g_assert_not_reached(); 1375 } 1376 1377 tcg_debug_assert(ts->temp_allocated != 0); 1378 ts->temp_allocated = 0; 1379 1380 #if defined(CONFIG_DEBUG_TCG) 1381 assert(s->temps_in_use > 0); 1382 s->temps_in_use--; 1383 #endif 1384 1385 idx = temp_idx(ts); 1386 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT); 1387 set_bit(idx, s->free_temps[k].l); 1388 } 1389 1390 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1391 { 1392 TCGContext *s = tcg_ctx; 1393 GHashTable *h = s->const_table[type]; 1394 TCGTemp *ts; 1395 1396 if (h == NULL) { 1397 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1398 s->const_table[type] = h; 1399 } 1400 1401 ts = g_hash_table_lookup(h, &val); 1402 if (ts == NULL) { 1403 int64_t *val_ptr; 1404 1405 ts = tcg_temp_alloc(s); 1406 1407 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1408 TCGTemp *ts2 = tcg_temp_alloc(s); 1409 1410 tcg_debug_assert(ts2 == ts + 1); 1411 1412 ts->base_type = TCG_TYPE_I64; 1413 ts->type = TCG_TYPE_I32; 1414 ts->kind = TEMP_CONST; 1415 ts->temp_allocated = 1; 1416 1417 ts2->base_type = TCG_TYPE_I64; 1418 ts2->type = TCG_TYPE_I32; 1419 ts2->kind = TEMP_CONST; 1420 ts2->temp_allocated = 1; 1421 ts2->temp_subindex = 1; 1422 1423 /* 1424 * Retain the full value of the 64-bit constant in the low 1425 * part, so that the hash table works. Actual uses will 1426 * truncate the value to the low part. 1427 */ 1428 ts[HOST_BIG_ENDIAN].val = val; 1429 ts[!HOST_BIG_ENDIAN].val = val >> 32; 1430 val_ptr = &ts[HOST_BIG_ENDIAN].val; 1431 } else { 1432 ts->base_type = type; 1433 ts->type = type; 1434 ts->kind = TEMP_CONST; 1435 ts->temp_allocated = 1; 1436 ts->val = val; 1437 val_ptr = &ts->val; 1438 } 1439 g_hash_table_insert(h, val_ptr, ts); 1440 } 1441 1442 return ts; 1443 } 1444 1445 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1446 { 1447 val = dup_const(vece, val); 1448 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1449 } 1450 1451 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1452 { 1453 TCGTemp *t = tcgv_vec_temp(match); 1454 1455 tcg_debug_assert(t->temp_allocated != 0); 1456 return tcg_constant_vec(t->base_type, vece, val); 1457 } 1458 1459 TCGv_i32 tcg_const_i32(int32_t val) 1460 { 1461 TCGv_i32 t0; 1462 t0 = tcg_temp_new_i32(); 1463 tcg_gen_movi_i32(t0, val); 1464 return t0; 1465 } 1466 1467 TCGv_i64 tcg_const_i64(int64_t val) 1468 { 1469 TCGv_i64 t0; 1470 t0 = tcg_temp_new_i64(); 1471 tcg_gen_movi_i64(t0, val); 1472 return t0; 1473 } 1474 1475 TCGv_i32 tcg_const_local_i32(int32_t val) 1476 { 1477 TCGv_i32 t0; 1478 t0 = tcg_temp_local_new_i32(); 1479 tcg_gen_movi_i32(t0, val); 1480 return t0; 1481 } 1482 1483 TCGv_i64 tcg_const_local_i64(int64_t val) 1484 { 1485 TCGv_i64 t0; 1486 t0 = tcg_temp_local_new_i64(); 1487 tcg_gen_movi_i64(t0, val); 1488 return t0; 1489 } 1490 1491 #if defined(CONFIG_DEBUG_TCG) 1492 void tcg_clear_temp_count(void) 1493 { 1494 TCGContext *s = tcg_ctx; 1495 s->temps_in_use = 0; 1496 } 1497 1498 int tcg_check_temp_count(void) 1499 { 1500 TCGContext *s = tcg_ctx; 1501 if (s->temps_in_use) { 1502 /* Clear the count so that we don't give another 1503 * warning immediately next time around. 1504 */ 1505 s->temps_in_use = 0; 1506 return 1; 1507 } 1508 return 0; 1509 } 1510 #endif 1511 1512 /* Return true if OP may appear in the opcode stream. 1513 Test the runtime variable that controls each opcode. */ 1514 bool tcg_op_supported(TCGOpcode op) 1515 { 1516 const bool have_vec 1517 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1518 1519 switch (op) { 1520 case INDEX_op_discard: 1521 case INDEX_op_set_label: 1522 case INDEX_op_call: 1523 case INDEX_op_br: 1524 case INDEX_op_mb: 1525 case INDEX_op_insn_start: 1526 case INDEX_op_exit_tb: 1527 case INDEX_op_goto_tb: 1528 case INDEX_op_goto_ptr: 1529 case INDEX_op_qemu_ld_i32: 1530 case INDEX_op_qemu_st_i32: 1531 case INDEX_op_qemu_ld_i64: 1532 case INDEX_op_qemu_st_i64: 1533 return true; 1534 1535 case INDEX_op_qemu_st8_i32: 1536 return TCG_TARGET_HAS_qemu_st8_i32; 1537 1538 case INDEX_op_mov_i32: 1539 case INDEX_op_setcond_i32: 1540 case INDEX_op_brcond_i32: 1541 case INDEX_op_ld8u_i32: 1542 case INDEX_op_ld8s_i32: 1543 case INDEX_op_ld16u_i32: 1544 case INDEX_op_ld16s_i32: 1545 case INDEX_op_ld_i32: 1546 case INDEX_op_st8_i32: 1547 case INDEX_op_st16_i32: 1548 case INDEX_op_st_i32: 1549 case INDEX_op_add_i32: 1550 case INDEX_op_sub_i32: 1551 case INDEX_op_mul_i32: 1552 case INDEX_op_and_i32: 1553 case INDEX_op_or_i32: 1554 case INDEX_op_xor_i32: 1555 case INDEX_op_shl_i32: 1556 case INDEX_op_shr_i32: 1557 case INDEX_op_sar_i32: 1558 return true; 1559 1560 case INDEX_op_movcond_i32: 1561 return TCG_TARGET_HAS_movcond_i32; 1562 case INDEX_op_div_i32: 1563 case INDEX_op_divu_i32: 1564 return TCG_TARGET_HAS_div_i32; 1565 case INDEX_op_rem_i32: 1566 case INDEX_op_remu_i32: 1567 return TCG_TARGET_HAS_rem_i32; 1568 case INDEX_op_div2_i32: 1569 case INDEX_op_divu2_i32: 1570 return TCG_TARGET_HAS_div2_i32; 1571 case INDEX_op_rotl_i32: 1572 case INDEX_op_rotr_i32: 1573 return TCG_TARGET_HAS_rot_i32; 1574 case INDEX_op_deposit_i32: 1575 return TCG_TARGET_HAS_deposit_i32; 1576 case INDEX_op_extract_i32: 1577 return TCG_TARGET_HAS_extract_i32; 1578 case INDEX_op_sextract_i32: 1579 return TCG_TARGET_HAS_sextract_i32; 1580 case INDEX_op_extract2_i32: 1581 return TCG_TARGET_HAS_extract2_i32; 1582 case INDEX_op_add2_i32: 1583 return TCG_TARGET_HAS_add2_i32; 1584 case INDEX_op_sub2_i32: 1585 return TCG_TARGET_HAS_sub2_i32; 1586 case INDEX_op_mulu2_i32: 1587 return TCG_TARGET_HAS_mulu2_i32; 1588 case INDEX_op_muls2_i32: 1589 return TCG_TARGET_HAS_muls2_i32; 1590 case INDEX_op_muluh_i32: 1591 return TCG_TARGET_HAS_muluh_i32; 1592 case INDEX_op_mulsh_i32: 1593 return TCG_TARGET_HAS_mulsh_i32; 1594 case INDEX_op_ext8s_i32: 1595 return TCG_TARGET_HAS_ext8s_i32; 1596 case INDEX_op_ext16s_i32: 1597 return TCG_TARGET_HAS_ext16s_i32; 1598 case INDEX_op_ext8u_i32: 1599 return TCG_TARGET_HAS_ext8u_i32; 1600 case INDEX_op_ext16u_i32: 1601 return TCG_TARGET_HAS_ext16u_i32; 1602 case INDEX_op_bswap16_i32: 1603 return TCG_TARGET_HAS_bswap16_i32; 1604 case INDEX_op_bswap32_i32: 1605 return TCG_TARGET_HAS_bswap32_i32; 1606 case INDEX_op_not_i32: 1607 return TCG_TARGET_HAS_not_i32; 1608 case INDEX_op_neg_i32: 1609 return TCG_TARGET_HAS_neg_i32; 1610 case INDEX_op_andc_i32: 1611 return TCG_TARGET_HAS_andc_i32; 1612 case INDEX_op_orc_i32: 1613 return TCG_TARGET_HAS_orc_i32; 1614 case INDEX_op_eqv_i32: 1615 return TCG_TARGET_HAS_eqv_i32; 1616 case INDEX_op_nand_i32: 1617 return TCG_TARGET_HAS_nand_i32; 1618 case INDEX_op_nor_i32: 1619 return TCG_TARGET_HAS_nor_i32; 1620 case INDEX_op_clz_i32: 1621 return TCG_TARGET_HAS_clz_i32; 1622 case INDEX_op_ctz_i32: 1623 return TCG_TARGET_HAS_ctz_i32; 1624 case INDEX_op_ctpop_i32: 1625 return TCG_TARGET_HAS_ctpop_i32; 1626 1627 case INDEX_op_brcond2_i32: 1628 case INDEX_op_setcond2_i32: 1629 return TCG_TARGET_REG_BITS == 32; 1630 1631 case INDEX_op_mov_i64: 1632 case INDEX_op_setcond_i64: 1633 case INDEX_op_brcond_i64: 1634 case INDEX_op_ld8u_i64: 1635 case INDEX_op_ld8s_i64: 1636 case INDEX_op_ld16u_i64: 1637 case INDEX_op_ld16s_i64: 1638 case INDEX_op_ld32u_i64: 1639 case INDEX_op_ld32s_i64: 1640 case INDEX_op_ld_i64: 1641 case INDEX_op_st8_i64: 1642 case INDEX_op_st16_i64: 1643 case INDEX_op_st32_i64: 1644 case INDEX_op_st_i64: 1645 case INDEX_op_add_i64: 1646 case INDEX_op_sub_i64: 1647 case INDEX_op_mul_i64: 1648 case INDEX_op_and_i64: 1649 case INDEX_op_or_i64: 1650 case INDEX_op_xor_i64: 1651 case INDEX_op_shl_i64: 1652 case INDEX_op_shr_i64: 1653 case INDEX_op_sar_i64: 1654 case INDEX_op_ext_i32_i64: 1655 case INDEX_op_extu_i32_i64: 1656 return TCG_TARGET_REG_BITS == 64; 1657 1658 case INDEX_op_movcond_i64: 1659 return TCG_TARGET_HAS_movcond_i64; 1660 case INDEX_op_div_i64: 1661 case INDEX_op_divu_i64: 1662 return TCG_TARGET_HAS_div_i64; 1663 case INDEX_op_rem_i64: 1664 case INDEX_op_remu_i64: 1665 return TCG_TARGET_HAS_rem_i64; 1666 case INDEX_op_div2_i64: 1667 case INDEX_op_divu2_i64: 1668 return TCG_TARGET_HAS_div2_i64; 1669 case INDEX_op_rotl_i64: 1670 case INDEX_op_rotr_i64: 1671 return TCG_TARGET_HAS_rot_i64; 1672 case INDEX_op_deposit_i64: 1673 return TCG_TARGET_HAS_deposit_i64; 1674 case INDEX_op_extract_i64: 1675 return TCG_TARGET_HAS_extract_i64; 1676 case INDEX_op_sextract_i64: 1677 return TCG_TARGET_HAS_sextract_i64; 1678 case INDEX_op_extract2_i64: 1679 return TCG_TARGET_HAS_extract2_i64; 1680 case INDEX_op_extrl_i64_i32: 1681 return TCG_TARGET_HAS_extrl_i64_i32; 1682 case INDEX_op_extrh_i64_i32: 1683 return TCG_TARGET_HAS_extrh_i64_i32; 1684 case INDEX_op_ext8s_i64: 1685 return TCG_TARGET_HAS_ext8s_i64; 1686 case INDEX_op_ext16s_i64: 1687 return TCG_TARGET_HAS_ext16s_i64; 1688 case INDEX_op_ext32s_i64: 1689 return TCG_TARGET_HAS_ext32s_i64; 1690 case INDEX_op_ext8u_i64: 1691 return TCG_TARGET_HAS_ext8u_i64; 1692 case INDEX_op_ext16u_i64: 1693 return TCG_TARGET_HAS_ext16u_i64; 1694 case INDEX_op_ext32u_i64: 1695 return TCG_TARGET_HAS_ext32u_i64; 1696 case INDEX_op_bswap16_i64: 1697 return TCG_TARGET_HAS_bswap16_i64; 1698 case INDEX_op_bswap32_i64: 1699 return TCG_TARGET_HAS_bswap32_i64; 1700 case INDEX_op_bswap64_i64: 1701 return TCG_TARGET_HAS_bswap64_i64; 1702 case INDEX_op_not_i64: 1703 return TCG_TARGET_HAS_not_i64; 1704 case INDEX_op_neg_i64: 1705 return TCG_TARGET_HAS_neg_i64; 1706 case INDEX_op_andc_i64: 1707 return TCG_TARGET_HAS_andc_i64; 1708 case INDEX_op_orc_i64: 1709 return TCG_TARGET_HAS_orc_i64; 1710 case INDEX_op_eqv_i64: 1711 return TCG_TARGET_HAS_eqv_i64; 1712 case INDEX_op_nand_i64: 1713 return TCG_TARGET_HAS_nand_i64; 1714 case INDEX_op_nor_i64: 1715 return TCG_TARGET_HAS_nor_i64; 1716 case INDEX_op_clz_i64: 1717 return TCG_TARGET_HAS_clz_i64; 1718 case INDEX_op_ctz_i64: 1719 return TCG_TARGET_HAS_ctz_i64; 1720 case INDEX_op_ctpop_i64: 1721 return TCG_TARGET_HAS_ctpop_i64; 1722 case INDEX_op_add2_i64: 1723 return TCG_TARGET_HAS_add2_i64; 1724 case INDEX_op_sub2_i64: 1725 return TCG_TARGET_HAS_sub2_i64; 1726 case INDEX_op_mulu2_i64: 1727 return TCG_TARGET_HAS_mulu2_i64; 1728 case INDEX_op_muls2_i64: 1729 return TCG_TARGET_HAS_muls2_i64; 1730 case INDEX_op_muluh_i64: 1731 return TCG_TARGET_HAS_muluh_i64; 1732 case INDEX_op_mulsh_i64: 1733 return TCG_TARGET_HAS_mulsh_i64; 1734 1735 case INDEX_op_mov_vec: 1736 case INDEX_op_dup_vec: 1737 case INDEX_op_dupm_vec: 1738 case INDEX_op_ld_vec: 1739 case INDEX_op_st_vec: 1740 case INDEX_op_add_vec: 1741 case INDEX_op_sub_vec: 1742 case INDEX_op_and_vec: 1743 case INDEX_op_or_vec: 1744 case INDEX_op_xor_vec: 1745 case INDEX_op_cmp_vec: 1746 return have_vec; 1747 case INDEX_op_dup2_vec: 1748 return have_vec && TCG_TARGET_REG_BITS == 32; 1749 case INDEX_op_not_vec: 1750 return have_vec && TCG_TARGET_HAS_not_vec; 1751 case INDEX_op_neg_vec: 1752 return have_vec && TCG_TARGET_HAS_neg_vec; 1753 case INDEX_op_abs_vec: 1754 return have_vec && TCG_TARGET_HAS_abs_vec; 1755 case INDEX_op_andc_vec: 1756 return have_vec && TCG_TARGET_HAS_andc_vec; 1757 case INDEX_op_orc_vec: 1758 return have_vec && TCG_TARGET_HAS_orc_vec; 1759 case INDEX_op_nand_vec: 1760 return have_vec && TCG_TARGET_HAS_nand_vec; 1761 case INDEX_op_nor_vec: 1762 return have_vec && TCG_TARGET_HAS_nor_vec; 1763 case INDEX_op_eqv_vec: 1764 return have_vec && TCG_TARGET_HAS_eqv_vec; 1765 case INDEX_op_mul_vec: 1766 return have_vec && TCG_TARGET_HAS_mul_vec; 1767 case INDEX_op_shli_vec: 1768 case INDEX_op_shri_vec: 1769 case INDEX_op_sari_vec: 1770 return have_vec && TCG_TARGET_HAS_shi_vec; 1771 case INDEX_op_shls_vec: 1772 case INDEX_op_shrs_vec: 1773 case INDEX_op_sars_vec: 1774 return have_vec && TCG_TARGET_HAS_shs_vec; 1775 case INDEX_op_shlv_vec: 1776 case INDEX_op_shrv_vec: 1777 case INDEX_op_sarv_vec: 1778 return have_vec && TCG_TARGET_HAS_shv_vec; 1779 case INDEX_op_rotli_vec: 1780 return have_vec && TCG_TARGET_HAS_roti_vec; 1781 case INDEX_op_rotls_vec: 1782 return have_vec && TCG_TARGET_HAS_rots_vec; 1783 case INDEX_op_rotlv_vec: 1784 case INDEX_op_rotrv_vec: 1785 return have_vec && TCG_TARGET_HAS_rotv_vec; 1786 case INDEX_op_ssadd_vec: 1787 case INDEX_op_usadd_vec: 1788 case INDEX_op_sssub_vec: 1789 case INDEX_op_ussub_vec: 1790 return have_vec && TCG_TARGET_HAS_sat_vec; 1791 case INDEX_op_smin_vec: 1792 case INDEX_op_umin_vec: 1793 case INDEX_op_smax_vec: 1794 case INDEX_op_umax_vec: 1795 return have_vec && TCG_TARGET_HAS_minmax_vec; 1796 case INDEX_op_bitsel_vec: 1797 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1798 case INDEX_op_cmpsel_vec: 1799 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1800 1801 default: 1802 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1803 return true; 1804 } 1805 } 1806 1807 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 1808 1809 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1810 { 1811 const TCGHelperInfo *info; 1812 TCGv_i64 extend_free[MAX_CALL_IARGS]; 1813 int n_extend = 0; 1814 TCGOp *op; 1815 int i, n, pi = 0, total_args; 1816 1817 info = g_hash_table_lookup(helper_table, (gpointer)func); 1818 total_args = info->nr_out + info->nr_in + 2; 1819 op = tcg_op_alloc(INDEX_op_call, total_args); 1820 1821 #ifdef CONFIG_PLUGIN 1822 /* Flag helpers that may affect guest state */ 1823 if (tcg_ctx->plugin_insn && 1824 !(info->flags & TCG_CALL_PLUGIN) && 1825 !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1826 tcg_ctx->plugin_insn->calls_helpers = true; 1827 } 1828 #endif 1829 1830 TCGOP_CALLO(op) = n = info->nr_out; 1831 switch (n) { 1832 case 0: 1833 tcg_debug_assert(ret == NULL); 1834 break; 1835 case 1: 1836 tcg_debug_assert(ret != NULL); 1837 op->args[pi++] = temp_arg(ret); 1838 break; 1839 case 2: 1840 case 4: 1841 tcg_debug_assert(ret != NULL); 1842 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 1843 tcg_debug_assert(ret->temp_subindex == 0); 1844 for (i = 0; i < n; ++i) { 1845 op->args[pi++] = temp_arg(ret + i); 1846 } 1847 break; 1848 default: 1849 g_assert_not_reached(); 1850 } 1851 1852 TCGOP_CALLI(op) = n = info->nr_in; 1853 for (i = 0; i < n; i++) { 1854 const TCGCallArgumentLoc *loc = &info->in[i]; 1855 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 1856 1857 switch (loc->kind) { 1858 case TCG_CALL_ARG_NORMAL: 1859 case TCG_CALL_ARG_BY_REF: 1860 case TCG_CALL_ARG_BY_REF_N: 1861 op->args[pi++] = temp_arg(ts); 1862 break; 1863 1864 case TCG_CALL_ARG_EXTEND_U: 1865 case TCG_CALL_ARG_EXTEND_S: 1866 { 1867 TCGv_i64 temp = tcg_temp_new_i64(); 1868 TCGv_i32 orig = temp_tcgv_i32(ts); 1869 1870 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 1871 tcg_gen_ext_i32_i64(temp, orig); 1872 } else { 1873 tcg_gen_extu_i32_i64(temp, orig); 1874 } 1875 op->args[pi++] = tcgv_i64_arg(temp); 1876 extend_free[n_extend++] = temp; 1877 } 1878 break; 1879 1880 default: 1881 g_assert_not_reached(); 1882 } 1883 } 1884 op->args[pi++] = (uintptr_t)func; 1885 op->args[pi++] = (uintptr_t)info; 1886 tcg_debug_assert(pi == total_args); 1887 1888 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 1889 1890 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 1891 for (i = 0; i < n_extend; ++i) { 1892 tcg_temp_free_i64(extend_free[i]); 1893 } 1894 } 1895 1896 static void tcg_reg_alloc_start(TCGContext *s) 1897 { 1898 int i, n; 1899 1900 for (i = 0, n = s->nb_temps; i < n; i++) { 1901 TCGTemp *ts = &s->temps[i]; 1902 TCGTempVal val = TEMP_VAL_MEM; 1903 1904 switch (ts->kind) { 1905 case TEMP_CONST: 1906 val = TEMP_VAL_CONST; 1907 break; 1908 case TEMP_FIXED: 1909 val = TEMP_VAL_REG; 1910 break; 1911 case TEMP_GLOBAL: 1912 break; 1913 case TEMP_NORMAL: 1914 case TEMP_EBB: 1915 val = TEMP_VAL_DEAD; 1916 /* fall through */ 1917 case TEMP_LOCAL: 1918 ts->mem_allocated = 0; 1919 break; 1920 default: 1921 g_assert_not_reached(); 1922 } 1923 ts->val_type = val; 1924 } 1925 1926 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1927 } 1928 1929 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1930 TCGTemp *ts) 1931 { 1932 int idx = temp_idx(ts); 1933 1934 switch (ts->kind) { 1935 case TEMP_FIXED: 1936 case TEMP_GLOBAL: 1937 pstrcpy(buf, buf_size, ts->name); 1938 break; 1939 case TEMP_LOCAL: 1940 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1941 break; 1942 case TEMP_EBB: 1943 snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals); 1944 break; 1945 case TEMP_NORMAL: 1946 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1947 break; 1948 case TEMP_CONST: 1949 switch (ts->type) { 1950 case TCG_TYPE_I32: 1951 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 1952 break; 1953 #if TCG_TARGET_REG_BITS > 32 1954 case TCG_TYPE_I64: 1955 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 1956 break; 1957 #endif 1958 case TCG_TYPE_V64: 1959 case TCG_TYPE_V128: 1960 case TCG_TYPE_V256: 1961 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 1962 64 << (ts->type - TCG_TYPE_V64), ts->val); 1963 break; 1964 default: 1965 g_assert_not_reached(); 1966 } 1967 break; 1968 } 1969 return buf; 1970 } 1971 1972 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1973 int buf_size, TCGArg arg) 1974 { 1975 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1976 } 1977 1978 static const char * const cond_name[] = 1979 { 1980 [TCG_COND_NEVER] = "never", 1981 [TCG_COND_ALWAYS] = "always", 1982 [TCG_COND_EQ] = "eq", 1983 [TCG_COND_NE] = "ne", 1984 [TCG_COND_LT] = "lt", 1985 [TCG_COND_GE] = "ge", 1986 [TCG_COND_LE] = "le", 1987 [TCG_COND_GT] = "gt", 1988 [TCG_COND_LTU] = "ltu", 1989 [TCG_COND_GEU] = "geu", 1990 [TCG_COND_LEU] = "leu", 1991 [TCG_COND_GTU] = "gtu" 1992 }; 1993 1994 static const char * const ldst_name[] = 1995 { 1996 [MO_UB] = "ub", 1997 [MO_SB] = "sb", 1998 [MO_LEUW] = "leuw", 1999 [MO_LESW] = "lesw", 2000 [MO_LEUL] = "leul", 2001 [MO_LESL] = "lesl", 2002 [MO_LEUQ] = "leq", 2003 [MO_BEUW] = "beuw", 2004 [MO_BESW] = "besw", 2005 [MO_BEUL] = "beul", 2006 [MO_BESL] = "besl", 2007 [MO_BEUQ] = "beq", 2008 }; 2009 2010 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2011 #ifdef TARGET_ALIGNED_ONLY 2012 [MO_UNALN >> MO_ASHIFT] = "un+", 2013 [MO_ALIGN >> MO_ASHIFT] = "", 2014 #else 2015 [MO_UNALN >> MO_ASHIFT] = "", 2016 [MO_ALIGN >> MO_ASHIFT] = "al+", 2017 #endif 2018 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2019 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2020 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2021 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2022 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2023 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2024 }; 2025 2026 static const char bswap_flag_name[][6] = { 2027 [TCG_BSWAP_IZ] = "iz", 2028 [TCG_BSWAP_OZ] = "oz", 2029 [TCG_BSWAP_OS] = "os", 2030 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2031 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2032 }; 2033 2034 static inline bool tcg_regset_single(TCGRegSet d) 2035 { 2036 return (d & (d - 1)) == 0; 2037 } 2038 2039 static inline TCGReg tcg_regset_first(TCGRegSet d) 2040 { 2041 if (TCG_TARGET_NB_REGS <= 32) { 2042 return ctz32(d); 2043 } else { 2044 return ctz64(d); 2045 } 2046 } 2047 2048 /* Return only the number of characters output -- no error return. */ 2049 #define ne_fprintf(...) \ 2050 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2051 2052 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2053 { 2054 char buf[128]; 2055 TCGOp *op; 2056 2057 QTAILQ_FOREACH(op, &s->ops, link) { 2058 int i, k, nb_oargs, nb_iargs, nb_cargs; 2059 const TCGOpDef *def; 2060 TCGOpcode c; 2061 int col = 0; 2062 2063 c = op->opc; 2064 def = &tcg_op_defs[c]; 2065 2066 if (c == INDEX_op_insn_start) { 2067 nb_oargs = 0; 2068 col += ne_fprintf(f, "\n ----"); 2069 2070 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2071 target_ulong a; 2072 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2073 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 2074 #else 2075 a = op->args[i]; 2076 #endif 2077 col += ne_fprintf(f, " " TARGET_FMT_lx, a); 2078 } 2079 } else if (c == INDEX_op_call) { 2080 const TCGHelperInfo *info = tcg_call_info(op); 2081 void *func = tcg_call_func(op); 2082 2083 /* variable number of arguments */ 2084 nb_oargs = TCGOP_CALLO(op); 2085 nb_iargs = TCGOP_CALLI(op); 2086 nb_cargs = def->nb_cargs; 2087 2088 col += ne_fprintf(f, " %s ", def->name); 2089 2090 /* 2091 * Print the function name from TCGHelperInfo, if available. 2092 * Note that plugins have a template function for the info, 2093 * but the actual function pointer comes from the plugin. 2094 */ 2095 if (func == info->func) { 2096 col += ne_fprintf(f, "%s", info->name); 2097 } else { 2098 col += ne_fprintf(f, "plugin(%p)", func); 2099 } 2100 2101 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2102 for (i = 0; i < nb_oargs; i++) { 2103 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2104 op->args[i])); 2105 } 2106 for (i = 0; i < nb_iargs; i++) { 2107 TCGArg arg = op->args[nb_oargs + i]; 2108 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2109 col += ne_fprintf(f, ",%s", t); 2110 } 2111 } else { 2112 col += ne_fprintf(f, " %s ", def->name); 2113 2114 nb_oargs = def->nb_oargs; 2115 nb_iargs = def->nb_iargs; 2116 nb_cargs = def->nb_cargs; 2117 2118 if (def->flags & TCG_OPF_VECTOR) { 2119 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 2120 8 << TCGOP_VECE(op)); 2121 } 2122 2123 k = 0; 2124 for (i = 0; i < nb_oargs; i++) { 2125 const char *sep = k ? "," : ""; 2126 col += ne_fprintf(f, "%s%s", sep, 2127 tcg_get_arg_str(s, buf, sizeof(buf), 2128 op->args[k++])); 2129 } 2130 for (i = 0; i < nb_iargs; i++) { 2131 const char *sep = k ? "," : ""; 2132 col += ne_fprintf(f, "%s%s", sep, 2133 tcg_get_arg_str(s, buf, sizeof(buf), 2134 op->args[k++])); 2135 } 2136 switch (c) { 2137 case INDEX_op_brcond_i32: 2138 case INDEX_op_setcond_i32: 2139 case INDEX_op_movcond_i32: 2140 case INDEX_op_brcond2_i32: 2141 case INDEX_op_setcond2_i32: 2142 case INDEX_op_brcond_i64: 2143 case INDEX_op_setcond_i64: 2144 case INDEX_op_movcond_i64: 2145 case INDEX_op_cmp_vec: 2146 case INDEX_op_cmpsel_vec: 2147 if (op->args[k] < ARRAY_SIZE(cond_name) 2148 && cond_name[op->args[k]]) { 2149 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2150 } else { 2151 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2152 } 2153 i = 1; 2154 break; 2155 case INDEX_op_qemu_ld_i32: 2156 case INDEX_op_qemu_st_i32: 2157 case INDEX_op_qemu_st8_i32: 2158 case INDEX_op_qemu_ld_i64: 2159 case INDEX_op_qemu_st_i64: 2160 { 2161 MemOpIdx oi = op->args[k++]; 2162 MemOp op = get_memop(oi); 2163 unsigned ix = get_mmuidx(oi); 2164 2165 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2166 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 2167 } else { 2168 const char *s_al, *s_op; 2169 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2170 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2171 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix); 2172 } 2173 i = 1; 2174 } 2175 break; 2176 case INDEX_op_bswap16_i32: 2177 case INDEX_op_bswap16_i64: 2178 case INDEX_op_bswap32_i32: 2179 case INDEX_op_bswap32_i64: 2180 case INDEX_op_bswap64_i64: 2181 { 2182 TCGArg flags = op->args[k]; 2183 const char *name = NULL; 2184 2185 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2186 name = bswap_flag_name[flags]; 2187 } 2188 if (name) { 2189 col += ne_fprintf(f, ",%s", name); 2190 } else { 2191 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2192 } 2193 i = k = 1; 2194 } 2195 break; 2196 default: 2197 i = 0; 2198 break; 2199 } 2200 switch (c) { 2201 case INDEX_op_set_label: 2202 case INDEX_op_br: 2203 case INDEX_op_brcond_i32: 2204 case INDEX_op_brcond_i64: 2205 case INDEX_op_brcond2_i32: 2206 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2207 arg_label(op->args[k])->id); 2208 i++, k++; 2209 break; 2210 default: 2211 break; 2212 } 2213 for (; i < nb_cargs; i++, k++) { 2214 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 2215 op->args[k]); 2216 } 2217 } 2218 2219 if (have_prefs || op->life) { 2220 for (; col < 40; ++col) { 2221 putc(' ', f); 2222 } 2223 } 2224 2225 if (op->life) { 2226 unsigned life = op->life; 2227 2228 if (life & (SYNC_ARG * 3)) { 2229 ne_fprintf(f, " sync:"); 2230 for (i = 0; i < 2; ++i) { 2231 if (life & (SYNC_ARG << i)) { 2232 ne_fprintf(f, " %d", i); 2233 } 2234 } 2235 } 2236 life /= DEAD_ARG; 2237 if (life) { 2238 ne_fprintf(f, " dead:"); 2239 for (i = 0; life; ++i, life >>= 1) { 2240 if (life & 1) { 2241 ne_fprintf(f, " %d", i); 2242 } 2243 } 2244 } 2245 } 2246 2247 if (have_prefs) { 2248 for (i = 0; i < nb_oargs; ++i) { 2249 TCGRegSet set = output_pref(op, i); 2250 2251 if (i == 0) { 2252 ne_fprintf(f, " pref="); 2253 } else { 2254 ne_fprintf(f, ","); 2255 } 2256 if (set == 0) { 2257 ne_fprintf(f, "none"); 2258 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2259 ne_fprintf(f, "all"); 2260 #ifdef CONFIG_DEBUG_TCG 2261 } else if (tcg_regset_single(set)) { 2262 TCGReg reg = tcg_regset_first(set); 2263 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 2264 #endif 2265 } else if (TCG_TARGET_NB_REGS <= 32) { 2266 ne_fprintf(f, "0x%x", (uint32_t)set); 2267 } else { 2268 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 2269 } 2270 } 2271 } 2272 2273 putc('\n', f); 2274 } 2275 } 2276 2277 /* we give more priority to constraints with less registers */ 2278 static int get_constraint_priority(const TCGOpDef *def, int k) 2279 { 2280 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2281 int n = ctpop64(arg_ct->regs); 2282 2283 /* 2284 * Sort constraints of a single register first, which includes output 2285 * aliases (which must exactly match the input already allocated). 2286 */ 2287 if (n == 1 || arg_ct->oalias) { 2288 return INT_MAX; 2289 } 2290 2291 /* 2292 * Sort register pairs next, first then second immediately after. 2293 * Arbitrarily sort multiple pairs by the index of the first reg; 2294 * there shouldn't be many pairs. 2295 */ 2296 switch (arg_ct->pair) { 2297 case 1: 2298 case 3: 2299 return (k + 1) * 2; 2300 case 2: 2301 return (arg_ct->pair_index + 1) * 2 - 1; 2302 } 2303 2304 /* Finally, sort by decreasing register count. */ 2305 assert(n > 1); 2306 return -n; 2307 } 2308 2309 /* sort from highest priority to lowest */ 2310 static void sort_constraints(TCGOpDef *def, int start, int n) 2311 { 2312 int i, j; 2313 TCGArgConstraint *a = def->args_ct; 2314 2315 for (i = 0; i < n; i++) { 2316 a[start + i].sort_index = start + i; 2317 } 2318 if (n <= 1) { 2319 return; 2320 } 2321 for (i = 0; i < n - 1; i++) { 2322 for (j = i + 1; j < n; j++) { 2323 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2324 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2325 if (p1 < p2) { 2326 int tmp = a[start + i].sort_index; 2327 a[start + i].sort_index = a[start + j].sort_index; 2328 a[start + j].sort_index = tmp; 2329 } 2330 } 2331 } 2332 } 2333 2334 static void process_op_defs(TCGContext *s) 2335 { 2336 TCGOpcode op; 2337 2338 for (op = 0; op < NB_OPS; op++) { 2339 TCGOpDef *def = &tcg_op_defs[op]; 2340 const TCGTargetOpDef *tdefs; 2341 bool saw_alias_pair = false; 2342 int i, o, i2, o2, nb_args; 2343 2344 if (def->flags & TCG_OPF_NOT_PRESENT) { 2345 continue; 2346 } 2347 2348 nb_args = def->nb_iargs + def->nb_oargs; 2349 if (nb_args == 0) { 2350 continue; 2351 } 2352 2353 /* 2354 * Macro magic should make it impossible, but double-check that 2355 * the array index is in range. Since the signness of an enum 2356 * is implementation defined, force the result to unsigned. 2357 */ 2358 unsigned con_set = tcg_target_op_def(op); 2359 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2360 tdefs = &constraint_sets[con_set]; 2361 2362 for (i = 0; i < nb_args; i++) { 2363 const char *ct_str = tdefs->args_ct_str[i]; 2364 bool input_p = i >= def->nb_oargs; 2365 2366 /* Incomplete TCGTargetOpDef entry. */ 2367 tcg_debug_assert(ct_str != NULL); 2368 2369 switch (*ct_str) { 2370 case '0' ... '9': 2371 o = *ct_str - '0'; 2372 tcg_debug_assert(input_p); 2373 tcg_debug_assert(o < def->nb_oargs); 2374 tcg_debug_assert(def->args_ct[o].regs != 0); 2375 tcg_debug_assert(!def->args_ct[o].oalias); 2376 def->args_ct[i] = def->args_ct[o]; 2377 /* The output sets oalias. */ 2378 def->args_ct[o].oalias = 1; 2379 def->args_ct[o].alias_index = i; 2380 /* The input sets ialias. */ 2381 def->args_ct[i].ialias = 1; 2382 def->args_ct[i].alias_index = o; 2383 if (def->args_ct[i].pair) { 2384 saw_alias_pair = true; 2385 } 2386 tcg_debug_assert(ct_str[1] == '\0'); 2387 continue; 2388 2389 case '&': 2390 tcg_debug_assert(!input_p); 2391 def->args_ct[i].newreg = true; 2392 ct_str++; 2393 break; 2394 2395 case 'p': /* plus */ 2396 /* Allocate to the register after the previous. */ 2397 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2398 o = i - 1; 2399 tcg_debug_assert(!def->args_ct[o].pair); 2400 tcg_debug_assert(!def->args_ct[o].ct); 2401 def->args_ct[i] = (TCGArgConstraint){ 2402 .pair = 2, 2403 .pair_index = o, 2404 .regs = def->args_ct[o].regs << 1, 2405 }; 2406 def->args_ct[o].pair = 1; 2407 def->args_ct[o].pair_index = i; 2408 tcg_debug_assert(ct_str[1] == '\0'); 2409 continue; 2410 2411 case 'm': /* minus */ 2412 /* Allocate to the register before the previous. */ 2413 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2414 o = i - 1; 2415 tcg_debug_assert(!def->args_ct[o].pair); 2416 tcg_debug_assert(!def->args_ct[o].ct); 2417 def->args_ct[i] = (TCGArgConstraint){ 2418 .pair = 1, 2419 .pair_index = o, 2420 .regs = def->args_ct[o].regs >> 1, 2421 }; 2422 def->args_ct[o].pair = 2; 2423 def->args_ct[o].pair_index = i; 2424 tcg_debug_assert(ct_str[1] == '\0'); 2425 continue; 2426 } 2427 2428 do { 2429 switch (*ct_str) { 2430 case 'i': 2431 def->args_ct[i].ct |= TCG_CT_CONST; 2432 break; 2433 2434 /* Include all of the target-specific constraints. */ 2435 2436 #undef CONST 2437 #define CONST(CASE, MASK) \ 2438 case CASE: def->args_ct[i].ct |= MASK; break; 2439 #define REGS(CASE, MASK) \ 2440 case CASE: def->args_ct[i].regs |= MASK; break; 2441 2442 #include "tcg-target-con-str.h" 2443 2444 #undef REGS 2445 #undef CONST 2446 default: 2447 case '0' ... '9': 2448 case '&': 2449 case 'p': 2450 case 'm': 2451 /* Typo in TCGTargetOpDef constraint. */ 2452 g_assert_not_reached(); 2453 } 2454 } while (*++ct_str != '\0'); 2455 } 2456 2457 /* TCGTargetOpDef entry with too much information? */ 2458 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2459 2460 /* 2461 * Fix up output pairs that are aliased with inputs. 2462 * When we created the alias, we copied pair from the output. 2463 * There are three cases: 2464 * (1a) Pairs of inputs alias pairs of outputs. 2465 * (1b) One input aliases the first of a pair of outputs. 2466 * (2) One input aliases the second of a pair of outputs. 2467 * 2468 * Case 1a is handled by making sure that the pair_index'es are 2469 * properly updated so that they appear the same as a pair of inputs. 2470 * 2471 * Case 1b is handled by setting the pair_index of the input to 2472 * itself, simply so it doesn't point to an unrelated argument. 2473 * Since we don't encounter the "second" during the input allocation 2474 * phase, nothing happens with the second half of the input pair. 2475 * 2476 * Case 2 is handled by setting the second input to pair=3, the 2477 * first output to pair=3, and the pair_index'es to match. 2478 */ 2479 if (saw_alias_pair) { 2480 for (i = def->nb_oargs; i < nb_args; i++) { 2481 /* 2482 * Since [0-9pm] must be alone in the constraint string, 2483 * the only way they can both be set is if the pair comes 2484 * from the output alias. 2485 */ 2486 if (!def->args_ct[i].ialias) { 2487 continue; 2488 } 2489 switch (def->args_ct[i].pair) { 2490 case 0: 2491 break; 2492 case 1: 2493 o = def->args_ct[i].alias_index; 2494 o2 = def->args_ct[o].pair_index; 2495 tcg_debug_assert(def->args_ct[o].pair == 1); 2496 tcg_debug_assert(def->args_ct[o2].pair == 2); 2497 if (def->args_ct[o2].oalias) { 2498 /* Case 1a */ 2499 i2 = def->args_ct[o2].alias_index; 2500 tcg_debug_assert(def->args_ct[i2].pair == 2); 2501 def->args_ct[i2].pair_index = i; 2502 def->args_ct[i].pair_index = i2; 2503 } else { 2504 /* Case 1b */ 2505 def->args_ct[i].pair_index = i; 2506 } 2507 break; 2508 case 2: 2509 o = def->args_ct[i].alias_index; 2510 o2 = def->args_ct[o].pair_index; 2511 tcg_debug_assert(def->args_ct[o].pair == 2); 2512 tcg_debug_assert(def->args_ct[o2].pair == 1); 2513 if (def->args_ct[o2].oalias) { 2514 /* Case 1a */ 2515 i2 = def->args_ct[o2].alias_index; 2516 tcg_debug_assert(def->args_ct[i2].pair == 1); 2517 def->args_ct[i2].pair_index = i; 2518 def->args_ct[i].pair_index = i2; 2519 } else { 2520 /* Case 2 */ 2521 def->args_ct[i].pair = 3; 2522 def->args_ct[o2].pair = 3; 2523 def->args_ct[i].pair_index = o2; 2524 def->args_ct[o2].pair_index = i; 2525 } 2526 break; 2527 default: 2528 g_assert_not_reached(); 2529 } 2530 } 2531 } 2532 2533 /* sort the constraints (XXX: this is just an heuristic) */ 2534 sort_constraints(def, 0, def->nb_oargs); 2535 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2536 } 2537 } 2538 2539 void tcg_op_remove(TCGContext *s, TCGOp *op) 2540 { 2541 TCGLabel *label; 2542 2543 switch (op->opc) { 2544 case INDEX_op_br: 2545 label = arg_label(op->args[0]); 2546 label->refs--; 2547 break; 2548 case INDEX_op_brcond_i32: 2549 case INDEX_op_brcond_i64: 2550 label = arg_label(op->args[3]); 2551 label->refs--; 2552 break; 2553 case INDEX_op_brcond2_i32: 2554 label = arg_label(op->args[5]); 2555 label->refs--; 2556 break; 2557 default: 2558 break; 2559 } 2560 2561 QTAILQ_REMOVE(&s->ops, op, link); 2562 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2563 s->nb_ops--; 2564 2565 #ifdef CONFIG_PROFILER 2566 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2567 #endif 2568 } 2569 2570 void tcg_remove_ops_after(TCGOp *op) 2571 { 2572 TCGContext *s = tcg_ctx; 2573 2574 while (true) { 2575 TCGOp *last = tcg_last_op(); 2576 if (last == op) { 2577 return; 2578 } 2579 tcg_op_remove(s, last); 2580 } 2581 } 2582 2583 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 2584 { 2585 TCGContext *s = tcg_ctx; 2586 TCGOp *op = NULL; 2587 2588 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 2589 QTAILQ_FOREACH(op, &s->free_ops, link) { 2590 if (nargs <= op->nargs) { 2591 QTAILQ_REMOVE(&s->free_ops, op, link); 2592 nargs = op->nargs; 2593 goto found; 2594 } 2595 } 2596 } 2597 2598 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 2599 nargs = MAX(4, nargs); 2600 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 2601 2602 found: 2603 memset(op, 0, offsetof(TCGOp, link)); 2604 op->opc = opc; 2605 op->nargs = nargs; 2606 2607 /* Check for bitfield overflow. */ 2608 tcg_debug_assert(op->nargs == nargs); 2609 2610 s->nb_ops++; 2611 return op; 2612 } 2613 2614 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 2615 { 2616 TCGOp *op = tcg_op_alloc(opc, nargs); 2617 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2618 return op; 2619 } 2620 2621 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 2622 TCGOpcode opc, unsigned nargs) 2623 { 2624 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2625 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2626 return new_op; 2627 } 2628 2629 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 2630 TCGOpcode opc, unsigned nargs) 2631 { 2632 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2633 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2634 return new_op; 2635 } 2636 2637 /* Reachable analysis : remove unreachable code. */ 2638 static void reachable_code_pass(TCGContext *s) 2639 { 2640 TCGOp *op, *op_next; 2641 bool dead = false; 2642 2643 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2644 bool remove = dead; 2645 TCGLabel *label; 2646 2647 switch (op->opc) { 2648 case INDEX_op_set_label: 2649 label = arg_label(op->args[0]); 2650 if (label->refs == 0) { 2651 /* 2652 * While there is an occasional backward branch, virtually 2653 * all branches generated by the translators are forward. 2654 * Which means that generally we will have already removed 2655 * all references to the label that will be, and there is 2656 * little to be gained by iterating. 2657 */ 2658 remove = true; 2659 } else { 2660 /* Once we see a label, insns become live again. */ 2661 dead = false; 2662 remove = false; 2663 2664 /* 2665 * Optimization can fold conditional branches to unconditional. 2666 * If we find a label with one reference which is preceded by 2667 * an unconditional branch to it, remove both. This needed to 2668 * wait until the dead code in between them was removed. 2669 */ 2670 if (label->refs == 1) { 2671 TCGOp *op_prev = QTAILQ_PREV(op, link); 2672 if (op_prev->opc == INDEX_op_br && 2673 label == arg_label(op_prev->args[0])) { 2674 tcg_op_remove(s, op_prev); 2675 remove = true; 2676 } 2677 } 2678 } 2679 break; 2680 2681 case INDEX_op_br: 2682 case INDEX_op_exit_tb: 2683 case INDEX_op_goto_ptr: 2684 /* Unconditional branches; everything following is dead. */ 2685 dead = true; 2686 break; 2687 2688 case INDEX_op_call: 2689 /* Notice noreturn helper calls, raising exceptions. */ 2690 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 2691 dead = true; 2692 } 2693 break; 2694 2695 case INDEX_op_insn_start: 2696 /* Never remove -- we need to keep these for unwind. */ 2697 remove = false; 2698 break; 2699 2700 default: 2701 break; 2702 } 2703 2704 if (remove) { 2705 tcg_op_remove(s, op); 2706 } 2707 } 2708 } 2709 2710 #define TS_DEAD 1 2711 #define TS_MEM 2 2712 2713 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2714 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2715 2716 /* For liveness_pass_1, the register preferences for a given temp. */ 2717 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2718 { 2719 return ts->state_ptr; 2720 } 2721 2722 /* For liveness_pass_1, reset the preferences for a given temp to the 2723 * maximal regset for its type. 2724 */ 2725 static inline void la_reset_pref(TCGTemp *ts) 2726 { 2727 *la_temp_pref(ts) 2728 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2729 } 2730 2731 /* liveness analysis: end of function: all temps are dead, and globals 2732 should be in memory. */ 2733 static void la_func_end(TCGContext *s, int ng, int nt) 2734 { 2735 int i; 2736 2737 for (i = 0; i < ng; ++i) { 2738 s->temps[i].state = TS_DEAD | TS_MEM; 2739 la_reset_pref(&s->temps[i]); 2740 } 2741 for (i = ng; i < nt; ++i) { 2742 s->temps[i].state = TS_DEAD; 2743 la_reset_pref(&s->temps[i]); 2744 } 2745 } 2746 2747 /* liveness analysis: end of basic block: all temps are dead, globals 2748 and local temps should be in memory. */ 2749 static void la_bb_end(TCGContext *s, int ng, int nt) 2750 { 2751 int i; 2752 2753 for (i = 0; i < nt; ++i) { 2754 TCGTemp *ts = &s->temps[i]; 2755 int state; 2756 2757 switch (ts->kind) { 2758 case TEMP_FIXED: 2759 case TEMP_GLOBAL: 2760 case TEMP_LOCAL: 2761 state = TS_DEAD | TS_MEM; 2762 break; 2763 case TEMP_NORMAL: 2764 case TEMP_EBB: 2765 case TEMP_CONST: 2766 state = TS_DEAD; 2767 break; 2768 default: 2769 g_assert_not_reached(); 2770 } 2771 ts->state = state; 2772 la_reset_pref(ts); 2773 } 2774 } 2775 2776 /* liveness analysis: sync globals back to memory. */ 2777 static void la_global_sync(TCGContext *s, int ng) 2778 { 2779 int i; 2780 2781 for (i = 0; i < ng; ++i) { 2782 int state = s->temps[i].state; 2783 s->temps[i].state = state | TS_MEM; 2784 if (state == TS_DEAD) { 2785 /* If the global was previously dead, reset prefs. */ 2786 la_reset_pref(&s->temps[i]); 2787 } 2788 } 2789 } 2790 2791 /* 2792 * liveness analysis: conditional branch: all temps are dead unless 2793 * explicitly live-across-conditional-branch, globals and local temps 2794 * should be synced. 2795 */ 2796 static void la_bb_sync(TCGContext *s, int ng, int nt) 2797 { 2798 la_global_sync(s, ng); 2799 2800 for (int i = ng; i < nt; ++i) { 2801 TCGTemp *ts = &s->temps[i]; 2802 int state; 2803 2804 switch (ts->kind) { 2805 case TEMP_LOCAL: 2806 state = ts->state; 2807 ts->state = state | TS_MEM; 2808 if (state != TS_DEAD) { 2809 continue; 2810 } 2811 break; 2812 case TEMP_NORMAL: 2813 s->temps[i].state = TS_DEAD; 2814 break; 2815 case TEMP_EBB: 2816 case TEMP_CONST: 2817 continue; 2818 default: 2819 g_assert_not_reached(); 2820 } 2821 la_reset_pref(&s->temps[i]); 2822 } 2823 } 2824 2825 /* liveness analysis: sync globals back to memory and kill. */ 2826 static void la_global_kill(TCGContext *s, int ng) 2827 { 2828 int i; 2829 2830 for (i = 0; i < ng; i++) { 2831 s->temps[i].state = TS_DEAD | TS_MEM; 2832 la_reset_pref(&s->temps[i]); 2833 } 2834 } 2835 2836 /* liveness analysis: note live globals crossing calls. */ 2837 static void la_cross_call(TCGContext *s, int nt) 2838 { 2839 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2840 int i; 2841 2842 for (i = 0; i < nt; i++) { 2843 TCGTemp *ts = &s->temps[i]; 2844 if (!(ts->state & TS_DEAD)) { 2845 TCGRegSet *pset = la_temp_pref(ts); 2846 TCGRegSet set = *pset; 2847 2848 set &= mask; 2849 /* If the combination is not possible, restart. */ 2850 if (set == 0) { 2851 set = tcg_target_available_regs[ts->type] & mask; 2852 } 2853 *pset = set; 2854 } 2855 } 2856 } 2857 2858 /* Liveness analysis : update the opc_arg_life array to tell if a 2859 given input arguments is dead. Instructions updating dead 2860 temporaries are removed. */ 2861 static void liveness_pass_1(TCGContext *s) 2862 { 2863 int nb_globals = s->nb_globals; 2864 int nb_temps = s->nb_temps; 2865 TCGOp *op, *op_prev; 2866 TCGRegSet *prefs; 2867 int i; 2868 2869 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2870 for (i = 0; i < nb_temps; ++i) { 2871 s->temps[i].state_ptr = prefs + i; 2872 } 2873 2874 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2875 la_func_end(s, nb_globals, nb_temps); 2876 2877 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2878 int nb_iargs, nb_oargs; 2879 TCGOpcode opc_new, opc_new2; 2880 bool have_opc_new2; 2881 TCGLifeData arg_life = 0; 2882 TCGTemp *ts; 2883 TCGOpcode opc = op->opc; 2884 const TCGOpDef *def = &tcg_op_defs[opc]; 2885 2886 switch (opc) { 2887 case INDEX_op_call: 2888 { 2889 const TCGHelperInfo *info = tcg_call_info(op); 2890 int call_flags = tcg_call_flags(op); 2891 2892 nb_oargs = TCGOP_CALLO(op); 2893 nb_iargs = TCGOP_CALLI(op); 2894 2895 /* pure functions can be removed if their result is unused */ 2896 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2897 for (i = 0; i < nb_oargs; i++) { 2898 ts = arg_temp(op->args[i]); 2899 if (ts->state != TS_DEAD) { 2900 goto do_not_remove_call; 2901 } 2902 } 2903 goto do_remove; 2904 } 2905 do_not_remove_call: 2906 2907 /* Output args are dead. */ 2908 for (i = 0; i < nb_oargs; i++) { 2909 ts = arg_temp(op->args[i]); 2910 if (ts->state & TS_DEAD) { 2911 arg_life |= DEAD_ARG << i; 2912 } 2913 if (ts->state & TS_MEM) { 2914 arg_life |= SYNC_ARG << i; 2915 } 2916 ts->state = TS_DEAD; 2917 la_reset_pref(ts); 2918 } 2919 2920 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 2921 memset(op->output_pref, 0, sizeof(op->output_pref)); 2922 2923 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2924 TCG_CALL_NO_READ_GLOBALS))) { 2925 la_global_kill(s, nb_globals); 2926 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2927 la_global_sync(s, nb_globals); 2928 } 2929 2930 /* Record arguments that die in this helper. */ 2931 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2932 ts = arg_temp(op->args[i]); 2933 if (ts->state & TS_DEAD) { 2934 arg_life |= DEAD_ARG << i; 2935 } 2936 } 2937 2938 /* For all live registers, remove call-clobbered prefs. */ 2939 la_cross_call(s, nb_temps); 2940 2941 /* 2942 * Input arguments are live for preceding opcodes. 2943 * 2944 * For those arguments that die, and will be allocated in 2945 * registers, clear the register set for that arg, to be 2946 * filled in below. For args that will be on the stack, 2947 * reset to any available reg. Process arguments in reverse 2948 * order so that if a temp is used more than once, the stack 2949 * reset to max happens before the register reset to 0. 2950 */ 2951 for (i = nb_iargs - 1; i >= 0; i--) { 2952 const TCGCallArgumentLoc *loc = &info->in[i]; 2953 ts = arg_temp(op->args[nb_oargs + i]); 2954 2955 if (ts->state & TS_DEAD) { 2956 switch (loc->kind) { 2957 case TCG_CALL_ARG_NORMAL: 2958 case TCG_CALL_ARG_EXTEND_U: 2959 case TCG_CALL_ARG_EXTEND_S: 2960 if (REG_P(loc)) { 2961 *la_temp_pref(ts) = 0; 2962 break; 2963 } 2964 /* fall through */ 2965 default: 2966 *la_temp_pref(ts) = 2967 tcg_target_available_regs[ts->type]; 2968 break; 2969 } 2970 ts->state &= ~TS_DEAD; 2971 } 2972 } 2973 2974 /* 2975 * For each input argument, add its input register to prefs. 2976 * If a temp is used once, this produces a single set bit; 2977 * if a temp is used multiple times, this produces a set. 2978 */ 2979 for (i = 0; i < nb_iargs; i++) { 2980 const TCGCallArgumentLoc *loc = &info->in[i]; 2981 ts = arg_temp(op->args[nb_oargs + i]); 2982 2983 switch (loc->kind) { 2984 case TCG_CALL_ARG_NORMAL: 2985 case TCG_CALL_ARG_EXTEND_U: 2986 case TCG_CALL_ARG_EXTEND_S: 2987 if (REG_P(loc)) { 2988 tcg_regset_set_reg(*la_temp_pref(ts), 2989 tcg_target_call_iarg_regs[loc->arg_slot]); 2990 } 2991 break; 2992 default: 2993 break; 2994 } 2995 } 2996 } 2997 break; 2998 case INDEX_op_insn_start: 2999 break; 3000 case INDEX_op_discard: 3001 /* mark the temporary as dead */ 3002 ts = arg_temp(op->args[0]); 3003 ts->state = TS_DEAD; 3004 la_reset_pref(ts); 3005 break; 3006 3007 case INDEX_op_add2_i32: 3008 opc_new = INDEX_op_add_i32; 3009 goto do_addsub2; 3010 case INDEX_op_sub2_i32: 3011 opc_new = INDEX_op_sub_i32; 3012 goto do_addsub2; 3013 case INDEX_op_add2_i64: 3014 opc_new = INDEX_op_add_i64; 3015 goto do_addsub2; 3016 case INDEX_op_sub2_i64: 3017 opc_new = INDEX_op_sub_i64; 3018 do_addsub2: 3019 nb_iargs = 4; 3020 nb_oargs = 2; 3021 /* Test if the high part of the operation is dead, but not 3022 the low part. The result can be optimized to a simple 3023 add or sub. This happens often for x86_64 guest when the 3024 cpu mode is set to 32 bit. */ 3025 if (arg_temp(op->args[1])->state == TS_DEAD) { 3026 if (arg_temp(op->args[0])->state == TS_DEAD) { 3027 goto do_remove; 3028 } 3029 /* Replace the opcode and adjust the args in place, 3030 leaving 3 unused args at the end. */ 3031 op->opc = opc = opc_new; 3032 op->args[1] = op->args[2]; 3033 op->args[2] = op->args[4]; 3034 /* Fall through and mark the single-word operation live. */ 3035 nb_iargs = 2; 3036 nb_oargs = 1; 3037 } 3038 goto do_not_remove; 3039 3040 case INDEX_op_mulu2_i32: 3041 opc_new = INDEX_op_mul_i32; 3042 opc_new2 = INDEX_op_muluh_i32; 3043 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 3044 goto do_mul2; 3045 case INDEX_op_muls2_i32: 3046 opc_new = INDEX_op_mul_i32; 3047 opc_new2 = INDEX_op_mulsh_i32; 3048 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 3049 goto do_mul2; 3050 case INDEX_op_mulu2_i64: 3051 opc_new = INDEX_op_mul_i64; 3052 opc_new2 = INDEX_op_muluh_i64; 3053 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 3054 goto do_mul2; 3055 case INDEX_op_muls2_i64: 3056 opc_new = INDEX_op_mul_i64; 3057 opc_new2 = INDEX_op_mulsh_i64; 3058 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 3059 goto do_mul2; 3060 do_mul2: 3061 nb_iargs = 2; 3062 nb_oargs = 2; 3063 if (arg_temp(op->args[1])->state == TS_DEAD) { 3064 if (arg_temp(op->args[0])->state == TS_DEAD) { 3065 /* Both parts of the operation are dead. */ 3066 goto do_remove; 3067 } 3068 /* The high part of the operation is dead; generate the low. */ 3069 op->opc = opc = opc_new; 3070 op->args[1] = op->args[2]; 3071 op->args[2] = op->args[3]; 3072 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 3073 /* The low part of the operation is dead; generate the high. */ 3074 op->opc = opc = opc_new2; 3075 op->args[0] = op->args[1]; 3076 op->args[1] = op->args[2]; 3077 op->args[2] = op->args[3]; 3078 } else { 3079 goto do_not_remove; 3080 } 3081 /* Mark the single-word operation live. */ 3082 nb_oargs = 1; 3083 goto do_not_remove; 3084 3085 default: 3086 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 3087 nb_iargs = def->nb_iargs; 3088 nb_oargs = def->nb_oargs; 3089 3090 /* Test if the operation can be removed because all 3091 its outputs are dead. We assume that nb_oargs == 0 3092 implies side effects */ 3093 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 3094 for (i = 0; i < nb_oargs; i++) { 3095 if (arg_temp(op->args[i])->state != TS_DEAD) { 3096 goto do_not_remove; 3097 } 3098 } 3099 goto do_remove; 3100 } 3101 goto do_not_remove; 3102 3103 do_remove: 3104 tcg_op_remove(s, op); 3105 break; 3106 3107 do_not_remove: 3108 for (i = 0; i < nb_oargs; i++) { 3109 ts = arg_temp(op->args[i]); 3110 3111 /* Remember the preference of the uses that followed. */ 3112 if (i < ARRAY_SIZE(op->output_pref)) { 3113 op->output_pref[i] = *la_temp_pref(ts); 3114 } 3115 3116 /* Output args are dead. */ 3117 if (ts->state & TS_DEAD) { 3118 arg_life |= DEAD_ARG << i; 3119 } 3120 if (ts->state & TS_MEM) { 3121 arg_life |= SYNC_ARG << i; 3122 } 3123 ts->state = TS_DEAD; 3124 la_reset_pref(ts); 3125 } 3126 3127 /* If end of basic block, update. */ 3128 if (def->flags & TCG_OPF_BB_EXIT) { 3129 la_func_end(s, nb_globals, nb_temps); 3130 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3131 la_bb_sync(s, nb_globals, nb_temps); 3132 } else if (def->flags & TCG_OPF_BB_END) { 3133 la_bb_end(s, nb_globals, nb_temps); 3134 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3135 la_global_sync(s, nb_globals); 3136 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3137 la_cross_call(s, nb_temps); 3138 } 3139 } 3140 3141 /* Record arguments that die in this opcode. */ 3142 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3143 ts = arg_temp(op->args[i]); 3144 if (ts->state & TS_DEAD) { 3145 arg_life |= DEAD_ARG << i; 3146 } 3147 } 3148 3149 /* Input arguments are live for preceding opcodes. */ 3150 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3151 ts = arg_temp(op->args[i]); 3152 if (ts->state & TS_DEAD) { 3153 /* For operands that were dead, initially allow 3154 all regs for the type. */ 3155 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3156 ts->state &= ~TS_DEAD; 3157 } 3158 } 3159 3160 /* Incorporate constraints for this operand. */ 3161 switch (opc) { 3162 case INDEX_op_mov_i32: 3163 case INDEX_op_mov_i64: 3164 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3165 have proper constraints. That said, special case 3166 moves to propagate preferences backward. */ 3167 if (IS_DEAD_ARG(1)) { 3168 *la_temp_pref(arg_temp(op->args[0])) 3169 = *la_temp_pref(arg_temp(op->args[1])); 3170 } 3171 break; 3172 3173 default: 3174 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3175 const TCGArgConstraint *ct = &def->args_ct[i]; 3176 TCGRegSet set, *pset; 3177 3178 ts = arg_temp(op->args[i]); 3179 pset = la_temp_pref(ts); 3180 set = *pset; 3181 3182 set &= ct->regs; 3183 if (ct->ialias) { 3184 set &= output_pref(op, ct->alias_index); 3185 } 3186 /* If the combination is not possible, restart. */ 3187 if (set == 0) { 3188 set = ct->regs; 3189 } 3190 *pset = set; 3191 } 3192 break; 3193 } 3194 break; 3195 } 3196 op->life = arg_life; 3197 } 3198 } 3199 3200 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3201 static bool liveness_pass_2(TCGContext *s) 3202 { 3203 int nb_globals = s->nb_globals; 3204 int nb_temps, i; 3205 bool changes = false; 3206 TCGOp *op, *op_next; 3207 3208 /* Create a temporary for each indirect global. */ 3209 for (i = 0; i < nb_globals; ++i) { 3210 TCGTemp *its = &s->temps[i]; 3211 if (its->indirect_reg) { 3212 TCGTemp *dts = tcg_temp_alloc(s); 3213 dts->type = its->type; 3214 dts->base_type = its->base_type; 3215 dts->temp_subindex = its->temp_subindex; 3216 dts->kind = TEMP_EBB; 3217 its->state_ptr = dts; 3218 } else { 3219 its->state_ptr = NULL; 3220 } 3221 /* All globals begin dead. */ 3222 its->state = TS_DEAD; 3223 } 3224 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3225 TCGTemp *its = &s->temps[i]; 3226 its->state_ptr = NULL; 3227 its->state = TS_DEAD; 3228 } 3229 3230 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3231 TCGOpcode opc = op->opc; 3232 const TCGOpDef *def = &tcg_op_defs[opc]; 3233 TCGLifeData arg_life = op->life; 3234 int nb_iargs, nb_oargs, call_flags; 3235 TCGTemp *arg_ts, *dir_ts; 3236 3237 if (opc == INDEX_op_call) { 3238 nb_oargs = TCGOP_CALLO(op); 3239 nb_iargs = TCGOP_CALLI(op); 3240 call_flags = tcg_call_flags(op); 3241 } else { 3242 nb_iargs = def->nb_iargs; 3243 nb_oargs = def->nb_oargs; 3244 3245 /* Set flags similar to how calls require. */ 3246 if (def->flags & TCG_OPF_COND_BRANCH) { 3247 /* Like reading globals: sync_globals */ 3248 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3249 } else if (def->flags & TCG_OPF_BB_END) { 3250 /* Like writing globals: save_globals */ 3251 call_flags = 0; 3252 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3253 /* Like reading globals: sync_globals */ 3254 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3255 } else { 3256 /* No effect on globals. */ 3257 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3258 TCG_CALL_NO_WRITE_GLOBALS); 3259 } 3260 } 3261 3262 /* Make sure that input arguments are available. */ 3263 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3264 arg_ts = arg_temp(op->args[i]); 3265 dir_ts = arg_ts->state_ptr; 3266 if (dir_ts && arg_ts->state == TS_DEAD) { 3267 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3268 ? INDEX_op_ld_i32 3269 : INDEX_op_ld_i64); 3270 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 3271 3272 lop->args[0] = temp_arg(dir_ts); 3273 lop->args[1] = temp_arg(arg_ts->mem_base); 3274 lop->args[2] = arg_ts->mem_offset; 3275 3276 /* Loaded, but synced with memory. */ 3277 arg_ts->state = TS_MEM; 3278 } 3279 } 3280 3281 /* Perform input replacement, and mark inputs that became dead. 3282 No action is required except keeping temp_state up to date 3283 so that we reload when needed. */ 3284 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3285 arg_ts = arg_temp(op->args[i]); 3286 dir_ts = arg_ts->state_ptr; 3287 if (dir_ts) { 3288 op->args[i] = temp_arg(dir_ts); 3289 changes = true; 3290 if (IS_DEAD_ARG(i)) { 3291 arg_ts->state = TS_DEAD; 3292 } 3293 } 3294 } 3295 3296 /* Liveness analysis should ensure that the following are 3297 all correct, for call sites and basic block end points. */ 3298 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3299 /* Nothing to do */ 3300 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3301 for (i = 0; i < nb_globals; ++i) { 3302 /* Liveness should see that globals are synced back, 3303 that is, either TS_DEAD or TS_MEM. */ 3304 arg_ts = &s->temps[i]; 3305 tcg_debug_assert(arg_ts->state_ptr == 0 3306 || arg_ts->state != 0); 3307 } 3308 } else { 3309 for (i = 0; i < nb_globals; ++i) { 3310 /* Liveness should see that globals are saved back, 3311 that is, TS_DEAD, waiting to be reloaded. */ 3312 arg_ts = &s->temps[i]; 3313 tcg_debug_assert(arg_ts->state_ptr == 0 3314 || arg_ts->state == TS_DEAD); 3315 } 3316 } 3317 3318 /* Outputs become available. */ 3319 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3320 arg_ts = arg_temp(op->args[0]); 3321 dir_ts = arg_ts->state_ptr; 3322 if (dir_ts) { 3323 op->args[0] = temp_arg(dir_ts); 3324 changes = true; 3325 3326 /* The output is now live and modified. */ 3327 arg_ts->state = 0; 3328 3329 if (NEED_SYNC_ARG(0)) { 3330 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3331 ? INDEX_op_st_i32 3332 : INDEX_op_st_i64); 3333 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3334 TCGTemp *out_ts = dir_ts; 3335 3336 if (IS_DEAD_ARG(0)) { 3337 out_ts = arg_temp(op->args[1]); 3338 arg_ts->state = TS_DEAD; 3339 tcg_op_remove(s, op); 3340 } else { 3341 arg_ts->state = TS_MEM; 3342 } 3343 3344 sop->args[0] = temp_arg(out_ts); 3345 sop->args[1] = temp_arg(arg_ts->mem_base); 3346 sop->args[2] = arg_ts->mem_offset; 3347 } else { 3348 tcg_debug_assert(!IS_DEAD_ARG(0)); 3349 } 3350 } 3351 } else { 3352 for (i = 0; i < nb_oargs; i++) { 3353 arg_ts = arg_temp(op->args[i]); 3354 dir_ts = arg_ts->state_ptr; 3355 if (!dir_ts) { 3356 continue; 3357 } 3358 op->args[i] = temp_arg(dir_ts); 3359 changes = true; 3360 3361 /* The output is now live and modified. */ 3362 arg_ts->state = 0; 3363 3364 /* Sync outputs upon their last write. */ 3365 if (NEED_SYNC_ARG(i)) { 3366 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3367 ? INDEX_op_st_i32 3368 : INDEX_op_st_i64); 3369 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3370 3371 sop->args[0] = temp_arg(dir_ts); 3372 sop->args[1] = temp_arg(arg_ts->mem_base); 3373 sop->args[2] = arg_ts->mem_offset; 3374 3375 arg_ts->state = TS_MEM; 3376 } 3377 /* Drop outputs that are dead. */ 3378 if (IS_DEAD_ARG(i)) { 3379 arg_ts->state = TS_DEAD; 3380 } 3381 } 3382 } 3383 } 3384 3385 return changes; 3386 } 3387 3388 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3389 { 3390 intptr_t off; 3391 int size, align; 3392 3393 /* When allocating an object, look at the full type. */ 3394 size = tcg_type_size(ts->base_type); 3395 switch (ts->base_type) { 3396 case TCG_TYPE_I32: 3397 align = 4; 3398 break; 3399 case TCG_TYPE_I64: 3400 case TCG_TYPE_V64: 3401 align = 8; 3402 break; 3403 case TCG_TYPE_I128: 3404 case TCG_TYPE_V128: 3405 case TCG_TYPE_V256: 3406 /* 3407 * Note that we do not require aligned storage for V256, 3408 * and that we provide alignment for I128 to match V128, 3409 * even if that's above what the host ABI requires. 3410 */ 3411 align = 16; 3412 break; 3413 default: 3414 g_assert_not_reached(); 3415 } 3416 3417 /* 3418 * Assume the stack is sufficiently aligned. 3419 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3420 * and do not require 16 byte vector alignment. This seems slightly 3421 * easier than fully parameterizing the above switch statement. 3422 */ 3423 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3424 off = ROUND_UP(s->current_frame_offset, align); 3425 3426 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3427 if (off + size > s->frame_end) { 3428 tcg_raise_tb_overflow(s); 3429 } 3430 s->current_frame_offset = off + size; 3431 #if defined(__sparc__) 3432 off += TCG_TARGET_STACK_BIAS; 3433 #endif 3434 3435 /* If the object was subdivided, assign memory to all the parts. */ 3436 if (ts->base_type != ts->type) { 3437 int part_size = tcg_type_size(ts->type); 3438 int part_count = size / part_size; 3439 3440 /* 3441 * Each part is allocated sequentially in tcg_temp_new_internal. 3442 * Jump back to the first part by subtracting the current index. 3443 */ 3444 ts -= ts->temp_subindex; 3445 for (int i = 0; i < part_count; ++i) { 3446 ts[i].mem_offset = off + i * part_size; 3447 ts[i].mem_base = s->frame_temp; 3448 ts[i].mem_allocated = 1; 3449 } 3450 } else { 3451 ts->mem_offset = off; 3452 ts->mem_base = s->frame_temp; 3453 ts->mem_allocated = 1; 3454 } 3455 } 3456 3457 /* Assign @reg to @ts, and update reg_to_temp[]. */ 3458 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 3459 { 3460 if (ts->val_type == TEMP_VAL_REG) { 3461 TCGReg old = ts->reg; 3462 tcg_debug_assert(s->reg_to_temp[old] == ts); 3463 if (old == reg) { 3464 return; 3465 } 3466 s->reg_to_temp[old] = NULL; 3467 } 3468 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3469 s->reg_to_temp[reg] = ts; 3470 ts->val_type = TEMP_VAL_REG; 3471 ts->reg = reg; 3472 } 3473 3474 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 3475 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 3476 { 3477 tcg_debug_assert(type != TEMP_VAL_REG); 3478 if (ts->val_type == TEMP_VAL_REG) { 3479 TCGReg reg = ts->reg; 3480 tcg_debug_assert(s->reg_to_temp[reg] == ts); 3481 s->reg_to_temp[reg] = NULL; 3482 } 3483 ts->val_type = type; 3484 } 3485 3486 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3487 3488 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3489 mark it free; otherwise mark it dead. */ 3490 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3491 { 3492 TCGTempVal new_type; 3493 3494 switch (ts->kind) { 3495 case TEMP_FIXED: 3496 return; 3497 case TEMP_GLOBAL: 3498 case TEMP_LOCAL: 3499 new_type = TEMP_VAL_MEM; 3500 break; 3501 case TEMP_NORMAL: 3502 case TEMP_EBB: 3503 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3504 break; 3505 case TEMP_CONST: 3506 new_type = TEMP_VAL_CONST; 3507 break; 3508 default: 3509 g_assert_not_reached(); 3510 } 3511 set_temp_val_nonreg(s, ts, new_type); 3512 } 3513 3514 /* Mark a temporary as dead. */ 3515 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3516 { 3517 temp_free_or_dead(s, ts, 1); 3518 } 3519 3520 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3521 registers needs to be allocated to store a constant. If 'free_or_dead' 3522 is non-zero, subsequently release the temporary; if it is positive, the 3523 temp is dead; if it is negative, the temp is free. */ 3524 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3525 TCGRegSet preferred_regs, int free_or_dead) 3526 { 3527 if (!temp_readonly(ts) && !ts->mem_coherent) { 3528 if (!ts->mem_allocated) { 3529 temp_allocate_frame(s, ts); 3530 } 3531 switch (ts->val_type) { 3532 case TEMP_VAL_CONST: 3533 /* If we're going to free the temp immediately, then we won't 3534 require it later in a register, so attempt to store the 3535 constant to memory directly. */ 3536 if (free_or_dead 3537 && tcg_out_sti(s, ts->type, ts->val, 3538 ts->mem_base->reg, ts->mem_offset)) { 3539 break; 3540 } 3541 temp_load(s, ts, tcg_target_available_regs[ts->type], 3542 allocated_regs, preferred_regs); 3543 /* fallthrough */ 3544 3545 case TEMP_VAL_REG: 3546 tcg_out_st(s, ts->type, ts->reg, 3547 ts->mem_base->reg, ts->mem_offset); 3548 break; 3549 3550 case TEMP_VAL_MEM: 3551 break; 3552 3553 case TEMP_VAL_DEAD: 3554 default: 3555 tcg_abort(); 3556 } 3557 ts->mem_coherent = 1; 3558 } 3559 if (free_or_dead) { 3560 temp_free_or_dead(s, ts, free_or_dead); 3561 } 3562 } 3563 3564 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3565 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3566 { 3567 TCGTemp *ts = s->reg_to_temp[reg]; 3568 if (ts != NULL) { 3569 temp_sync(s, ts, allocated_regs, 0, -1); 3570 } 3571 } 3572 3573 /** 3574 * tcg_reg_alloc: 3575 * @required_regs: Set of registers in which we must allocate. 3576 * @allocated_regs: Set of registers which must be avoided. 3577 * @preferred_regs: Set of registers we should prefer. 3578 * @rev: True if we search the registers in "indirect" order. 3579 * 3580 * The allocated register must be in @required_regs & ~@allocated_regs, 3581 * but if we can put it in @preferred_regs we may save a move later. 3582 */ 3583 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3584 TCGRegSet allocated_regs, 3585 TCGRegSet preferred_regs, bool rev) 3586 { 3587 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3588 TCGRegSet reg_ct[2]; 3589 const int *order; 3590 3591 reg_ct[1] = required_regs & ~allocated_regs; 3592 tcg_debug_assert(reg_ct[1] != 0); 3593 reg_ct[0] = reg_ct[1] & preferred_regs; 3594 3595 /* Skip the preferred_regs option if it cannot be satisfied, 3596 or if the preference made no difference. */ 3597 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3598 3599 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3600 3601 /* Try free registers, preferences first. */ 3602 for (j = f; j < 2; j++) { 3603 TCGRegSet set = reg_ct[j]; 3604 3605 if (tcg_regset_single(set)) { 3606 /* One register in the set. */ 3607 TCGReg reg = tcg_regset_first(set); 3608 if (s->reg_to_temp[reg] == NULL) { 3609 return reg; 3610 } 3611 } else { 3612 for (i = 0; i < n; i++) { 3613 TCGReg reg = order[i]; 3614 if (s->reg_to_temp[reg] == NULL && 3615 tcg_regset_test_reg(set, reg)) { 3616 return reg; 3617 } 3618 } 3619 } 3620 } 3621 3622 /* We must spill something. */ 3623 for (j = f; j < 2; j++) { 3624 TCGRegSet set = reg_ct[j]; 3625 3626 if (tcg_regset_single(set)) { 3627 /* One register in the set. */ 3628 TCGReg reg = tcg_regset_first(set); 3629 tcg_reg_free(s, reg, allocated_regs); 3630 return reg; 3631 } else { 3632 for (i = 0; i < n; i++) { 3633 TCGReg reg = order[i]; 3634 if (tcg_regset_test_reg(set, reg)) { 3635 tcg_reg_free(s, reg, allocated_regs); 3636 return reg; 3637 } 3638 } 3639 } 3640 } 3641 3642 tcg_abort(); 3643 } 3644 3645 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 3646 TCGRegSet allocated_regs, 3647 TCGRegSet preferred_regs, bool rev) 3648 { 3649 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3650 TCGRegSet reg_ct[2]; 3651 const int *order; 3652 3653 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 3654 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 3655 tcg_debug_assert(reg_ct[1] != 0); 3656 reg_ct[0] = reg_ct[1] & preferred_regs; 3657 3658 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3659 3660 /* 3661 * Skip the preferred_regs option if it cannot be satisfied, 3662 * or if the preference made no difference. 3663 */ 3664 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3665 3666 /* 3667 * Minimize the number of flushes by looking for 2 free registers first, 3668 * then a single flush, then two flushes. 3669 */ 3670 for (fmin = 2; fmin >= 0; fmin--) { 3671 for (j = k; j < 2; j++) { 3672 TCGRegSet set = reg_ct[j]; 3673 3674 for (i = 0; i < n; i++) { 3675 TCGReg reg = order[i]; 3676 3677 if (tcg_regset_test_reg(set, reg)) { 3678 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 3679 if (f >= fmin) { 3680 tcg_reg_free(s, reg, allocated_regs); 3681 tcg_reg_free(s, reg + 1, allocated_regs); 3682 return reg; 3683 } 3684 } 3685 } 3686 } 3687 } 3688 tcg_abort(); 3689 } 3690 3691 /* Make sure the temporary is in a register. If needed, allocate the register 3692 from DESIRED while avoiding ALLOCATED. */ 3693 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3694 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3695 { 3696 TCGReg reg; 3697 3698 switch (ts->val_type) { 3699 case TEMP_VAL_REG: 3700 return; 3701 case TEMP_VAL_CONST: 3702 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3703 preferred_regs, ts->indirect_base); 3704 if (ts->type <= TCG_TYPE_I64) { 3705 tcg_out_movi(s, ts->type, reg, ts->val); 3706 } else { 3707 uint64_t val = ts->val; 3708 MemOp vece = MO_64; 3709 3710 /* 3711 * Find the minimal vector element that matches the constant. 3712 * The targets will, in general, have to do this search anyway, 3713 * do this generically. 3714 */ 3715 if (val == dup_const(MO_8, val)) { 3716 vece = MO_8; 3717 } else if (val == dup_const(MO_16, val)) { 3718 vece = MO_16; 3719 } else if (val == dup_const(MO_32, val)) { 3720 vece = MO_32; 3721 } 3722 3723 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3724 } 3725 ts->mem_coherent = 0; 3726 break; 3727 case TEMP_VAL_MEM: 3728 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3729 preferred_regs, ts->indirect_base); 3730 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3731 ts->mem_coherent = 1; 3732 break; 3733 case TEMP_VAL_DEAD: 3734 default: 3735 tcg_abort(); 3736 } 3737 set_temp_val_reg(s, ts, reg); 3738 } 3739 3740 /* Save a temporary to memory. 'allocated_regs' is used in case a 3741 temporary registers needs to be allocated to store a constant. */ 3742 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3743 { 3744 /* The liveness analysis already ensures that globals are back 3745 in memory. Keep an tcg_debug_assert for safety. */ 3746 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3747 } 3748 3749 /* save globals to their canonical location and assume they can be 3750 modified be the following code. 'allocated_regs' is used in case a 3751 temporary registers needs to be allocated to store a constant. */ 3752 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3753 { 3754 int i, n; 3755 3756 for (i = 0, n = s->nb_globals; i < n; i++) { 3757 temp_save(s, &s->temps[i], allocated_regs); 3758 } 3759 } 3760 3761 /* sync globals to their canonical location and assume they can be 3762 read by the following code. 'allocated_regs' is used in case a 3763 temporary registers needs to be allocated to store a constant. */ 3764 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3765 { 3766 int i, n; 3767 3768 for (i = 0, n = s->nb_globals; i < n; i++) { 3769 TCGTemp *ts = &s->temps[i]; 3770 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3771 || ts->kind == TEMP_FIXED 3772 || ts->mem_coherent); 3773 } 3774 } 3775 3776 /* at the end of a basic block, we assume all temporaries are dead and 3777 all globals are stored at their canonical location. */ 3778 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3779 { 3780 int i; 3781 3782 for (i = s->nb_globals; i < s->nb_temps; i++) { 3783 TCGTemp *ts = &s->temps[i]; 3784 3785 switch (ts->kind) { 3786 case TEMP_LOCAL: 3787 temp_save(s, ts, allocated_regs); 3788 break; 3789 case TEMP_NORMAL: 3790 case TEMP_EBB: 3791 /* The liveness analysis already ensures that temps are dead. 3792 Keep an tcg_debug_assert for safety. */ 3793 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3794 break; 3795 case TEMP_CONST: 3796 /* Similarly, we should have freed any allocated register. */ 3797 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3798 break; 3799 default: 3800 g_assert_not_reached(); 3801 } 3802 } 3803 3804 save_globals(s, allocated_regs); 3805 } 3806 3807 /* 3808 * At a conditional branch, we assume all temporaries are dead unless 3809 * explicitly live-across-conditional-branch; all globals and local 3810 * temps are synced to their location. 3811 */ 3812 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3813 { 3814 sync_globals(s, allocated_regs); 3815 3816 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3817 TCGTemp *ts = &s->temps[i]; 3818 /* 3819 * The liveness analysis already ensures that temps are dead. 3820 * Keep tcg_debug_asserts for safety. 3821 */ 3822 switch (ts->kind) { 3823 case TEMP_LOCAL: 3824 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3825 break; 3826 case TEMP_NORMAL: 3827 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3828 break; 3829 case TEMP_EBB: 3830 case TEMP_CONST: 3831 break; 3832 default: 3833 g_assert_not_reached(); 3834 } 3835 } 3836 } 3837 3838 /* 3839 * Specialized code generation for INDEX_op_mov_* with a constant. 3840 */ 3841 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3842 tcg_target_ulong val, TCGLifeData arg_life, 3843 TCGRegSet preferred_regs) 3844 { 3845 /* ENV should not be modified. */ 3846 tcg_debug_assert(!temp_readonly(ots)); 3847 3848 /* The movi is not explicitly generated here. */ 3849 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 3850 ots->val = val; 3851 ots->mem_coherent = 0; 3852 if (NEED_SYNC_ARG(0)) { 3853 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3854 } else if (IS_DEAD_ARG(0)) { 3855 temp_dead(s, ots); 3856 } 3857 } 3858 3859 /* 3860 * Specialized code generation for INDEX_op_mov_*. 3861 */ 3862 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3863 { 3864 const TCGLifeData arg_life = op->life; 3865 TCGRegSet allocated_regs, preferred_regs; 3866 TCGTemp *ts, *ots; 3867 TCGType otype, itype; 3868 TCGReg oreg, ireg; 3869 3870 allocated_regs = s->reserved_regs; 3871 preferred_regs = output_pref(op, 0); 3872 ots = arg_temp(op->args[0]); 3873 ts = arg_temp(op->args[1]); 3874 3875 /* ENV should not be modified. */ 3876 tcg_debug_assert(!temp_readonly(ots)); 3877 3878 /* Note that otype != itype for no-op truncation. */ 3879 otype = ots->type; 3880 itype = ts->type; 3881 3882 if (ts->val_type == TEMP_VAL_CONST) { 3883 /* propagate constant or generate sti */ 3884 tcg_target_ulong val = ts->val; 3885 if (IS_DEAD_ARG(1)) { 3886 temp_dead(s, ts); 3887 } 3888 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3889 return; 3890 } 3891 3892 /* If the source value is in memory we're going to be forced 3893 to have it in a register in order to perform the copy. Copy 3894 the SOURCE value into its own register first, that way we 3895 don't have to reload SOURCE the next time it is used. */ 3896 if (ts->val_type == TEMP_VAL_MEM) { 3897 temp_load(s, ts, tcg_target_available_regs[itype], 3898 allocated_regs, preferred_regs); 3899 } 3900 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3901 ireg = ts->reg; 3902 3903 if (IS_DEAD_ARG(0)) { 3904 /* mov to a non-saved dead register makes no sense (even with 3905 liveness analysis disabled). */ 3906 tcg_debug_assert(NEED_SYNC_ARG(0)); 3907 if (!ots->mem_allocated) { 3908 temp_allocate_frame(s, ots); 3909 } 3910 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 3911 if (IS_DEAD_ARG(1)) { 3912 temp_dead(s, ts); 3913 } 3914 temp_dead(s, ots); 3915 return; 3916 } 3917 3918 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 3919 /* 3920 * The mov can be suppressed. Kill input first, so that it 3921 * is unlinked from reg_to_temp, then set the output to the 3922 * reg that we saved from the input. 3923 */ 3924 temp_dead(s, ts); 3925 oreg = ireg; 3926 } else { 3927 if (ots->val_type == TEMP_VAL_REG) { 3928 oreg = ots->reg; 3929 } else { 3930 /* Make sure to not spill the input register during allocation. */ 3931 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3932 allocated_regs | ((TCGRegSet)1 << ireg), 3933 preferred_regs, ots->indirect_base); 3934 } 3935 if (!tcg_out_mov(s, otype, oreg, ireg)) { 3936 /* 3937 * Cross register class move not supported. 3938 * Store the source register into the destination slot 3939 * and leave the destination temp as TEMP_VAL_MEM. 3940 */ 3941 assert(!temp_readonly(ots)); 3942 if (!ts->mem_allocated) { 3943 temp_allocate_frame(s, ots); 3944 } 3945 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 3946 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 3947 ots->mem_coherent = 1; 3948 return; 3949 } 3950 } 3951 set_temp_val_reg(s, ots, oreg); 3952 ots->mem_coherent = 0; 3953 3954 if (NEED_SYNC_ARG(0)) { 3955 temp_sync(s, ots, allocated_regs, 0, 0); 3956 } 3957 } 3958 3959 /* 3960 * Specialized code generation for INDEX_op_dup_vec. 3961 */ 3962 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 3963 { 3964 const TCGLifeData arg_life = op->life; 3965 TCGRegSet dup_out_regs, dup_in_regs; 3966 TCGTemp *its, *ots; 3967 TCGType itype, vtype; 3968 unsigned vece; 3969 int lowpart_ofs; 3970 bool ok; 3971 3972 ots = arg_temp(op->args[0]); 3973 its = arg_temp(op->args[1]); 3974 3975 /* ENV should not be modified. */ 3976 tcg_debug_assert(!temp_readonly(ots)); 3977 3978 itype = its->type; 3979 vece = TCGOP_VECE(op); 3980 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3981 3982 if (its->val_type == TEMP_VAL_CONST) { 3983 /* Propagate constant via movi -> dupi. */ 3984 tcg_target_ulong val = its->val; 3985 if (IS_DEAD_ARG(1)) { 3986 temp_dead(s, its); 3987 } 3988 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 3989 return; 3990 } 3991 3992 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3993 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 3994 3995 /* Allocate the output register now. */ 3996 if (ots->val_type != TEMP_VAL_REG) { 3997 TCGRegSet allocated_regs = s->reserved_regs; 3998 TCGReg oreg; 3999 4000 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4001 /* Make sure to not spill the input register. */ 4002 tcg_regset_set_reg(allocated_regs, its->reg); 4003 } 4004 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4005 output_pref(op, 0), ots->indirect_base); 4006 set_temp_val_reg(s, ots, oreg); 4007 } 4008 4009 switch (its->val_type) { 4010 case TEMP_VAL_REG: 4011 /* 4012 * The dup constriaints must be broad, covering all possible VECE. 4013 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4014 * to fail, indicating that extra moves are required for that case. 4015 */ 4016 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4017 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4018 goto done; 4019 } 4020 /* Try again from memory or a vector input register. */ 4021 } 4022 if (!its->mem_coherent) { 4023 /* 4024 * The input register is not synced, and so an extra store 4025 * would be required to use memory. Attempt an integer-vector 4026 * register move first. We do not have a TCGRegSet for this. 4027 */ 4028 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4029 break; 4030 } 4031 /* Sync the temp back to its slot and load from there. */ 4032 temp_sync(s, its, s->reserved_regs, 0, 0); 4033 } 4034 /* fall through */ 4035 4036 case TEMP_VAL_MEM: 4037 lowpart_ofs = 0; 4038 if (HOST_BIG_ENDIAN) { 4039 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 4040 } 4041 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 4042 its->mem_offset + lowpart_ofs)) { 4043 goto done; 4044 } 4045 /* Load the input into the destination vector register. */ 4046 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 4047 break; 4048 4049 default: 4050 g_assert_not_reached(); 4051 } 4052 4053 /* We now have a vector input register, so dup must succeed. */ 4054 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 4055 tcg_debug_assert(ok); 4056 4057 done: 4058 ots->mem_coherent = 0; 4059 if (IS_DEAD_ARG(1)) { 4060 temp_dead(s, its); 4061 } 4062 if (NEED_SYNC_ARG(0)) { 4063 temp_sync(s, ots, s->reserved_regs, 0, 0); 4064 } 4065 if (IS_DEAD_ARG(0)) { 4066 temp_dead(s, ots); 4067 } 4068 } 4069 4070 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 4071 { 4072 const TCGLifeData arg_life = op->life; 4073 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 4074 TCGRegSet i_allocated_regs; 4075 TCGRegSet o_allocated_regs; 4076 int i, k, nb_iargs, nb_oargs; 4077 TCGReg reg; 4078 TCGArg arg; 4079 const TCGArgConstraint *arg_ct; 4080 TCGTemp *ts; 4081 TCGArg new_args[TCG_MAX_OP_ARGS]; 4082 int const_args[TCG_MAX_OP_ARGS]; 4083 4084 nb_oargs = def->nb_oargs; 4085 nb_iargs = def->nb_iargs; 4086 4087 /* copy constants */ 4088 memcpy(new_args + nb_oargs + nb_iargs, 4089 op->args + nb_oargs + nb_iargs, 4090 sizeof(TCGArg) * def->nb_cargs); 4091 4092 i_allocated_regs = s->reserved_regs; 4093 o_allocated_regs = s->reserved_regs; 4094 4095 /* satisfy input constraints */ 4096 for (k = 0; k < nb_iargs; k++) { 4097 TCGRegSet i_preferred_regs, i_required_regs; 4098 bool allocate_new_reg, copyto_new_reg; 4099 TCGTemp *ts2; 4100 int i1, i2; 4101 4102 i = def->args_ct[nb_oargs + k].sort_index; 4103 arg = op->args[i]; 4104 arg_ct = &def->args_ct[i]; 4105 ts = arg_temp(arg); 4106 4107 if (ts->val_type == TEMP_VAL_CONST 4108 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 4109 /* constant is OK for instruction */ 4110 const_args[i] = 1; 4111 new_args[i] = ts->val; 4112 continue; 4113 } 4114 4115 reg = ts->reg; 4116 i_preferred_regs = 0; 4117 i_required_regs = arg_ct->regs; 4118 allocate_new_reg = false; 4119 copyto_new_reg = false; 4120 4121 switch (arg_ct->pair) { 4122 case 0: /* not paired */ 4123 if (arg_ct->ialias) { 4124 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4125 4126 /* 4127 * If the input is readonly, then it cannot also be an 4128 * output and aliased to itself. If the input is not 4129 * dead after the instruction, we must allocate a new 4130 * register and move it. 4131 */ 4132 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 4133 allocate_new_reg = true; 4134 } else if (ts->val_type == TEMP_VAL_REG) { 4135 /* 4136 * Check if the current register has already been 4137 * allocated for another input. 4138 */ 4139 allocate_new_reg = 4140 tcg_regset_test_reg(i_allocated_regs, reg); 4141 } 4142 } 4143 if (!allocate_new_reg) { 4144 temp_load(s, ts, i_required_regs, i_allocated_regs, 4145 i_preferred_regs); 4146 reg = ts->reg; 4147 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 4148 } 4149 if (allocate_new_reg) { 4150 /* 4151 * Allocate a new register matching the constraint 4152 * and move the temporary register into it. 4153 */ 4154 temp_load(s, ts, tcg_target_available_regs[ts->type], 4155 i_allocated_regs, 0); 4156 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 4157 i_preferred_regs, ts->indirect_base); 4158 copyto_new_reg = true; 4159 } 4160 break; 4161 4162 case 1: 4163 /* First of an input pair; if i1 == i2, the second is an output. */ 4164 i1 = i; 4165 i2 = arg_ct->pair_index; 4166 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 4167 4168 /* 4169 * It is easier to default to allocating a new pair 4170 * and to identify a few cases where it's not required. 4171 */ 4172 if (arg_ct->ialias) { 4173 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4174 if (IS_DEAD_ARG(i1) && 4175 IS_DEAD_ARG(i2) && 4176 !temp_readonly(ts) && 4177 ts->val_type == TEMP_VAL_REG && 4178 ts->reg < TCG_TARGET_NB_REGS - 1 && 4179 tcg_regset_test_reg(i_required_regs, reg) && 4180 !tcg_regset_test_reg(i_allocated_regs, reg) && 4181 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 4182 (ts2 4183 ? ts2->val_type == TEMP_VAL_REG && 4184 ts2->reg == reg + 1 && 4185 !temp_readonly(ts2) 4186 : s->reg_to_temp[reg + 1] == NULL)) { 4187 break; 4188 } 4189 } else { 4190 /* Without aliasing, the pair must also be an input. */ 4191 tcg_debug_assert(ts2); 4192 if (ts->val_type == TEMP_VAL_REG && 4193 ts2->val_type == TEMP_VAL_REG && 4194 ts2->reg == reg + 1 && 4195 tcg_regset_test_reg(i_required_regs, reg)) { 4196 break; 4197 } 4198 } 4199 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 4200 0, ts->indirect_base); 4201 goto do_pair; 4202 4203 case 2: /* pair second */ 4204 reg = new_args[arg_ct->pair_index] + 1; 4205 goto do_pair; 4206 4207 case 3: /* ialias with second output, no first input */ 4208 tcg_debug_assert(arg_ct->ialias); 4209 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4210 4211 if (IS_DEAD_ARG(i) && 4212 !temp_readonly(ts) && 4213 ts->val_type == TEMP_VAL_REG && 4214 reg > 0 && 4215 s->reg_to_temp[reg - 1] == NULL && 4216 tcg_regset_test_reg(i_required_regs, reg) && 4217 !tcg_regset_test_reg(i_allocated_regs, reg) && 4218 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 4219 tcg_regset_set_reg(i_allocated_regs, reg - 1); 4220 break; 4221 } 4222 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 4223 i_allocated_regs, 0, 4224 ts->indirect_base); 4225 tcg_regset_set_reg(i_allocated_regs, reg); 4226 reg += 1; 4227 goto do_pair; 4228 4229 do_pair: 4230 /* 4231 * If an aliased input is not dead after the instruction, 4232 * we must allocate a new register and move it. 4233 */ 4234 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 4235 TCGRegSet t_allocated_regs = i_allocated_regs; 4236 4237 /* 4238 * Because of the alias, and the continued life, make sure 4239 * that the temp is somewhere *other* than the reg pair, 4240 * and we get a copy in reg. 4241 */ 4242 tcg_regset_set_reg(t_allocated_regs, reg); 4243 tcg_regset_set_reg(t_allocated_regs, reg + 1); 4244 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 4245 /* If ts was already in reg, copy it somewhere else. */ 4246 TCGReg nr; 4247 bool ok; 4248 4249 tcg_debug_assert(ts->kind != TEMP_FIXED); 4250 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 4251 t_allocated_regs, 0, ts->indirect_base); 4252 ok = tcg_out_mov(s, ts->type, nr, reg); 4253 tcg_debug_assert(ok); 4254 4255 set_temp_val_reg(s, ts, nr); 4256 } else { 4257 temp_load(s, ts, tcg_target_available_regs[ts->type], 4258 t_allocated_regs, 0); 4259 copyto_new_reg = true; 4260 } 4261 } else { 4262 /* Preferably allocate to reg, otherwise copy. */ 4263 i_required_regs = (TCGRegSet)1 << reg; 4264 temp_load(s, ts, i_required_regs, i_allocated_regs, 4265 i_preferred_regs); 4266 copyto_new_reg = ts->reg != reg; 4267 } 4268 break; 4269 4270 default: 4271 g_assert_not_reached(); 4272 } 4273 4274 if (copyto_new_reg) { 4275 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4276 /* 4277 * Cross register class move not supported. Sync the 4278 * temp back to its slot and load from there. 4279 */ 4280 temp_sync(s, ts, i_allocated_regs, 0, 0); 4281 tcg_out_ld(s, ts->type, reg, 4282 ts->mem_base->reg, ts->mem_offset); 4283 } 4284 } 4285 new_args[i] = reg; 4286 const_args[i] = 0; 4287 tcg_regset_set_reg(i_allocated_regs, reg); 4288 } 4289 4290 /* mark dead temporaries and free the associated registers */ 4291 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4292 if (IS_DEAD_ARG(i)) { 4293 temp_dead(s, arg_temp(op->args[i])); 4294 } 4295 } 4296 4297 if (def->flags & TCG_OPF_COND_BRANCH) { 4298 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4299 } else if (def->flags & TCG_OPF_BB_END) { 4300 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4301 } else { 4302 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4303 /* XXX: permit generic clobber register list ? */ 4304 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4305 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4306 tcg_reg_free(s, i, i_allocated_regs); 4307 } 4308 } 4309 } 4310 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4311 /* sync globals if the op has side effects and might trigger 4312 an exception. */ 4313 sync_globals(s, i_allocated_regs); 4314 } 4315 4316 /* satisfy the output constraints */ 4317 for(k = 0; k < nb_oargs; k++) { 4318 i = def->args_ct[k].sort_index; 4319 arg = op->args[i]; 4320 arg_ct = &def->args_ct[i]; 4321 ts = arg_temp(arg); 4322 4323 /* ENV should not be modified. */ 4324 tcg_debug_assert(!temp_readonly(ts)); 4325 4326 switch (arg_ct->pair) { 4327 case 0: /* not paired */ 4328 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4329 reg = new_args[arg_ct->alias_index]; 4330 } else if (arg_ct->newreg) { 4331 reg = tcg_reg_alloc(s, arg_ct->regs, 4332 i_allocated_regs | o_allocated_regs, 4333 output_pref(op, k), ts->indirect_base); 4334 } else { 4335 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4336 output_pref(op, k), ts->indirect_base); 4337 } 4338 break; 4339 4340 case 1: /* first of pair */ 4341 tcg_debug_assert(!arg_ct->newreg); 4342 if (arg_ct->oalias) { 4343 reg = new_args[arg_ct->alias_index]; 4344 break; 4345 } 4346 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 4347 output_pref(op, k), ts->indirect_base); 4348 break; 4349 4350 case 2: /* second of pair */ 4351 tcg_debug_assert(!arg_ct->newreg); 4352 if (arg_ct->oalias) { 4353 reg = new_args[arg_ct->alias_index]; 4354 } else { 4355 reg = new_args[arg_ct->pair_index] + 1; 4356 } 4357 break; 4358 4359 case 3: /* first of pair, aliasing with a second input */ 4360 tcg_debug_assert(!arg_ct->newreg); 4361 reg = new_args[arg_ct->pair_index] - 1; 4362 break; 4363 4364 default: 4365 g_assert_not_reached(); 4366 } 4367 tcg_regset_set_reg(o_allocated_regs, reg); 4368 set_temp_val_reg(s, ts, reg); 4369 ts->mem_coherent = 0; 4370 new_args[i] = reg; 4371 } 4372 } 4373 4374 /* emit instruction */ 4375 if (def->flags & TCG_OPF_VECTOR) { 4376 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4377 new_args, const_args); 4378 } else { 4379 tcg_out_op(s, op->opc, new_args, const_args); 4380 } 4381 4382 /* move the outputs in the correct register if needed */ 4383 for(i = 0; i < nb_oargs; i++) { 4384 ts = arg_temp(op->args[i]); 4385 4386 /* ENV should not be modified. */ 4387 tcg_debug_assert(!temp_readonly(ts)); 4388 4389 if (NEED_SYNC_ARG(i)) { 4390 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4391 } else if (IS_DEAD_ARG(i)) { 4392 temp_dead(s, ts); 4393 } 4394 } 4395 } 4396 4397 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4398 { 4399 const TCGLifeData arg_life = op->life; 4400 TCGTemp *ots, *itsl, *itsh; 4401 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4402 4403 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4404 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4405 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4406 4407 ots = arg_temp(op->args[0]); 4408 itsl = arg_temp(op->args[1]); 4409 itsh = arg_temp(op->args[2]); 4410 4411 /* ENV should not be modified. */ 4412 tcg_debug_assert(!temp_readonly(ots)); 4413 4414 /* Allocate the output register now. */ 4415 if (ots->val_type != TEMP_VAL_REG) { 4416 TCGRegSet allocated_regs = s->reserved_regs; 4417 TCGRegSet dup_out_regs = 4418 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4419 TCGReg oreg; 4420 4421 /* Make sure to not spill the input registers. */ 4422 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4423 tcg_regset_set_reg(allocated_regs, itsl->reg); 4424 } 4425 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4426 tcg_regset_set_reg(allocated_regs, itsh->reg); 4427 } 4428 4429 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4430 output_pref(op, 0), ots->indirect_base); 4431 set_temp_val_reg(s, ots, oreg); 4432 } 4433 4434 /* Promote dup2 of immediates to dupi_vec. */ 4435 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 4436 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 4437 MemOp vece = MO_64; 4438 4439 if (val == dup_const(MO_8, val)) { 4440 vece = MO_8; 4441 } else if (val == dup_const(MO_16, val)) { 4442 vece = MO_16; 4443 } else if (val == dup_const(MO_32, val)) { 4444 vece = MO_32; 4445 } 4446 4447 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 4448 goto done; 4449 } 4450 4451 /* If the two inputs form one 64-bit value, try dupm_vec. */ 4452 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 4453 itsh->temp_subindex == !HOST_BIG_ENDIAN && 4454 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 4455 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 4456 4457 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 4458 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 4459 4460 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 4461 its->mem_base->reg, its->mem_offset)) { 4462 goto done; 4463 } 4464 } 4465 4466 /* Fall back to generic expansion. */ 4467 return false; 4468 4469 done: 4470 ots->mem_coherent = 0; 4471 if (IS_DEAD_ARG(1)) { 4472 temp_dead(s, itsl); 4473 } 4474 if (IS_DEAD_ARG(2)) { 4475 temp_dead(s, itsh); 4476 } 4477 if (NEED_SYNC_ARG(0)) { 4478 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 4479 } else if (IS_DEAD_ARG(0)) { 4480 temp_dead(s, ots); 4481 } 4482 return true; 4483 } 4484 4485 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 4486 TCGRegSet allocated_regs) 4487 { 4488 if (ts->val_type == TEMP_VAL_REG) { 4489 if (ts->reg != reg) { 4490 tcg_reg_free(s, reg, allocated_regs); 4491 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4492 /* 4493 * Cross register class move not supported. Sync the 4494 * temp back to its slot and load from there. 4495 */ 4496 temp_sync(s, ts, allocated_regs, 0, 0); 4497 tcg_out_ld(s, ts->type, reg, 4498 ts->mem_base->reg, ts->mem_offset); 4499 } 4500 } 4501 } else { 4502 TCGRegSet arg_set = 0; 4503 4504 tcg_reg_free(s, reg, allocated_regs); 4505 tcg_regset_set_reg(arg_set, reg); 4506 temp_load(s, ts, arg_set, allocated_regs, 0); 4507 } 4508 } 4509 4510 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts, 4511 TCGRegSet allocated_regs) 4512 { 4513 /* 4514 * When the destination is on the stack, load up the temp and store. 4515 * If there are many call-saved registers, the temp might live to 4516 * see another use; otherwise it'll be discarded. 4517 */ 4518 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 4519 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 4520 TCG_TARGET_CALL_STACK_OFFSET + 4521 stk_slot * sizeof(tcg_target_long)); 4522 } 4523 4524 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 4525 TCGTemp *ts, TCGRegSet *allocated_regs) 4526 { 4527 if (REG_P(l)) { 4528 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 4529 load_arg_reg(s, reg, ts, *allocated_regs); 4530 tcg_regset_set_reg(*allocated_regs, reg); 4531 } else { 4532 load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs), 4533 ts, *allocated_regs); 4534 } 4535 } 4536 4537 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base, 4538 intptr_t ref_off, TCGRegSet *allocated_regs) 4539 { 4540 TCGReg reg; 4541 int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 4542 4543 if (stk_slot < 0) { 4544 reg = tcg_target_call_iarg_regs[arg_slot]; 4545 tcg_reg_free(s, reg, *allocated_regs); 4546 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4547 tcg_regset_set_reg(*allocated_regs, reg); 4548 } else { 4549 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 4550 *allocated_regs, 0, false); 4551 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4552 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 4553 TCG_TARGET_CALL_STACK_OFFSET 4554 + stk_slot * sizeof(tcg_target_long)); 4555 } 4556 } 4557 4558 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 4559 { 4560 const int nb_oargs = TCGOP_CALLO(op); 4561 const int nb_iargs = TCGOP_CALLI(op); 4562 const TCGLifeData arg_life = op->life; 4563 const TCGHelperInfo *info = tcg_call_info(op); 4564 TCGRegSet allocated_regs = s->reserved_regs; 4565 int i; 4566 4567 /* 4568 * Move inputs into place in reverse order, 4569 * so that we place stacked arguments first. 4570 */ 4571 for (i = nb_iargs - 1; i >= 0; --i) { 4572 const TCGCallArgumentLoc *loc = &info->in[i]; 4573 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 4574 4575 switch (loc->kind) { 4576 case TCG_CALL_ARG_NORMAL: 4577 case TCG_CALL_ARG_EXTEND_U: 4578 case TCG_CALL_ARG_EXTEND_S: 4579 load_arg_normal(s, loc, ts, &allocated_regs); 4580 break; 4581 case TCG_CALL_ARG_BY_REF: 4582 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4583 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 4584 TCG_TARGET_CALL_STACK_OFFSET 4585 + loc->ref_slot * sizeof(tcg_target_long), 4586 &allocated_regs); 4587 break; 4588 case TCG_CALL_ARG_BY_REF_N: 4589 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4590 break; 4591 default: 4592 g_assert_not_reached(); 4593 } 4594 } 4595 4596 /* Mark dead temporaries and free the associated registers. */ 4597 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4598 if (IS_DEAD_ARG(i)) { 4599 temp_dead(s, arg_temp(op->args[i])); 4600 } 4601 } 4602 4603 /* Clobber call registers. */ 4604 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4605 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4606 tcg_reg_free(s, i, allocated_regs); 4607 } 4608 } 4609 4610 /* 4611 * Save globals if they might be written by the helper, 4612 * sync them if they might be read. 4613 */ 4614 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 4615 /* Nothing to do */ 4616 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 4617 sync_globals(s, allocated_regs); 4618 } else { 4619 save_globals(s, allocated_regs); 4620 } 4621 4622 /* 4623 * If the ABI passes a pointer to the returned struct as the first 4624 * argument, load that now. Pass a pointer to the output home slot. 4625 */ 4626 if (info->out_kind == TCG_CALL_RET_BY_REF) { 4627 TCGTemp *ts = arg_temp(op->args[0]); 4628 4629 if (!ts->mem_allocated) { 4630 temp_allocate_frame(s, ts); 4631 } 4632 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 4633 } 4634 4635 tcg_out_call(s, tcg_call_func(op), info); 4636 4637 /* Assign output registers and emit moves if needed. */ 4638 switch (info->out_kind) { 4639 case TCG_CALL_RET_NORMAL: 4640 for (i = 0; i < nb_oargs; i++) { 4641 TCGTemp *ts = arg_temp(op->args[i]); 4642 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 4643 4644 /* ENV should not be modified. */ 4645 tcg_debug_assert(!temp_readonly(ts)); 4646 4647 set_temp_val_reg(s, ts, reg); 4648 ts->mem_coherent = 0; 4649 } 4650 break; 4651 4652 case TCG_CALL_RET_BY_VEC: 4653 { 4654 TCGTemp *ts = arg_temp(op->args[0]); 4655 4656 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 4657 tcg_debug_assert(ts->temp_subindex == 0); 4658 if (!ts->mem_allocated) { 4659 temp_allocate_frame(s, ts); 4660 } 4661 tcg_out_st(s, TCG_TYPE_V128, 4662 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 4663 ts->mem_base->reg, ts->mem_offset); 4664 } 4665 /* fall through to mark all parts in memory */ 4666 4667 case TCG_CALL_RET_BY_REF: 4668 /* The callee has performed a write through the reference. */ 4669 for (i = 0; i < nb_oargs; i++) { 4670 TCGTemp *ts = arg_temp(op->args[i]); 4671 ts->val_type = TEMP_VAL_MEM; 4672 } 4673 break; 4674 4675 default: 4676 g_assert_not_reached(); 4677 } 4678 4679 /* Flush or discard output registers as needed. */ 4680 for (i = 0; i < nb_oargs; i++) { 4681 TCGTemp *ts = arg_temp(op->args[i]); 4682 if (NEED_SYNC_ARG(i)) { 4683 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 4684 } else if (IS_DEAD_ARG(i)) { 4685 temp_dead(s, ts); 4686 } 4687 } 4688 } 4689 4690 #ifdef CONFIG_PROFILER 4691 4692 /* avoid copy/paste errors */ 4693 #define PROF_ADD(to, from, field) \ 4694 do { \ 4695 (to)->field += qatomic_read(&((from)->field)); \ 4696 } while (0) 4697 4698 #define PROF_MAX(to, from, field) \ 4699 do { \ 4700 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4701 if (val__ > (to)->field) { \ 4702 (to)->field = val__; \ 4703 } \ 4704 } while (0) 4705 4706 /* Pass in a zero'ed @prof */ 4707 static inline 4708 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4709 { 4710 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4711 unsigned int i; 4712 4713 for (i = 0; i < n_ctxs; i++) { 4714 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4715 const TCGProfile *orig = &s->prof; 4716 4717 if (counters) { 4718 PROF_ADD(prof, orig, cpu_exec_time); 4719 PROF_ADD(prof, orig, tb_count1); 4720 PROF_ADD(prof, orig, tb_count); 4721 PROF_ADD(prof, orig, op_count); 4722 PROF_MAX(prof, orig, op_count_max); 4723 PROF_ADD(prof, orig, temp_count); 4724 PROF_MAX(prof, orig, temp_count_max); 4725 PROF_ADD(prof, orig, del_op_count); 4726 PROF_ADD(prof, orig, code_in_len); 4727 PROF_ADD(prof, orig, code_out_len); 4728 PROF_ADD(prof, orig, search_out_len); 4729 PROF_ADD(prof, orig, interm_time); 4730 PROF_ADD(prof, orig, code_time); 4731 PROF_ADD(prof, orig, la_time); 4732 PROF_ADD(prof, orig, opt_time); 4733 PROF_ADD(prof, orig, restore_count); 4734 PROF_ADD(prof, orig, restore_time); 4735 } 4736 if (table) { 4737 int i; 4738 4739 for (i = 0; i < NB_OPS; i++) { 4740 PROF_ADD(prof, orig, table_op_count[i]); 4741 } 4742 } 4743 } 4744 } 4745 4746 #undef PROF_ADD 4747 #undef PROF_MAX 4748 4749 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4750 { 4751 tcg_profile_snapshot(prof, true, false); 4752 } 4753 4754 static void tcg_profile_snapshot_table(TCGProfile *prof) 4755 { 4756 tcg_profile_snapshot(prof, false, true); 4757 } 4758 4759 void tcg_dump_op_count(GString *buf) 4760 { 4761 TCGProfile prof = {}; 4762 int i; 4763 4764 tcg_profile_snapshot_table(&prof); 4765 for (i = 0; i < NB_OPS; i++) { 4766 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 4767 prof.table_op_count[i]); 4768 } 4769 } 4770 4771 int64_t tcg_cpu_exec_time(void) 4772 { 4773 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4774 unsigned int i; 4775 int64_t ret = 0; 4776 4777 for (i = 0; i < n_ctxs; i++) { 4778 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4779 const TCGProfile *prof = &s->prof; 4780 4781 ret += qatomic_read(&prof->cpu_exec_time); 4782 } 4783 return ret; 4784 } 4785 #else 4786 void tcg_dump_op_count(GString *buf) 4787 { 4788 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4789 } 4790 4791 int64_t tcg_cpu_exec_time(void) 4792 { 4793 error_report("%s: TCG profiler not compiled", __func__); 4794 exit(EXIT_FAILURE); 4795 } 4796 #endif 4797 4798 4799 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) 4800 { 4801 #ifdef CONFIG_PROFILER 4802 TCGProfile *prof = &s->prof; 4803 #endif 4804 int i, num_insns; 4805 TCGOp *op; 4806 4807 #ifdef CONFIG_PROFILER 4808 { 4809 int n = 0; 4810 4811 QTAILQ_FOREACH(op, &s->ops, link) { 4812 n++; 4813 } 4814 qatomic_set(&prof->op_count, prof->op_count + n); 4815 if (n > prof->op_count_max) { 4816 qatomic_set(&prof->op_count_max, n); 4817 } 4818 4819 n = s->nb_temps; 4820 qatomic_set(&prof->temp_count, prof->temp_count + n); 4821 if (n > prof->temp_count_max) { 4822 qatomic_set(&prof->temp_count_max, n); 4823 } 4824 } 4825 #endif 4826 4827 #ifdef DEBUG_DISAS 4828 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4829 && qemu_log_in_addr_range(pc_start))) { 4830 FILE *logfile = qemu_log_trylock(); 4831 if (logfile) { 4832 fprintf(logfile, "OP:\n"); 4833 tcg_dump_ops(s, logfile, false); 4834 fprintf(logfile, "\n"); 4835 qemu_log_unlock(logfile); 4836 } 4837 } 4838 #endif 4839 4840 #ifdef CONFIG_DEBUG_TCG 4841 /* Ensure all labels referenced have been emitted. */ 4842 { 4843 TCGLabel *l; 4844 bool error = false; 4845 4846 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4847 if (unlikely(!l->present) && l->refs) { 4848 qemu_log_mask(CPU_LOG_TB_OP, 4849 "$L%d referenced but not present.\n", l->id); 4850 error = true; 4851 } 4852 } 4853 assert(!error); 4854 } 4855 #endif 4856 4857 #ifdef CONFIG_PROFILER 4858 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4859 #endif 4860 4861 #ifdef USE_TCG_OPTIMIZATIONS 4862 tcg_optimize(s); 4863 #endif 4864 4865 #ifdef CONFIG_PROFILER 4866 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4867 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4868 #endif 4869 4870 reachable_code_pass(s); 4871 liveness_pass_1(s); 4872 4873 if (s->nb_indirects > 0) { 4874 #ifdef DEBUG_DISAS 4875 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4876 && qemu_log_in_addr_range(pc_start))) { 4877 FILE *logfile = qemu_log_trylock(); 4878 if (logfile) { 4879 fprintf(logfile, "OP before indirect lowering:\n"); 4880 tcg_dump_ops(s, logfile, false); 4881 fprintf(logfile, "\n"); 4882 qemu_log_unlock(logfile); 4883 } 4884 } 4885 #endif 4886 /* Replace indirect temps with direct temps. */ 4887 if (liveness_pass_2(s)) { 4888 /* If changes were made, re-run liveness. */ 4889 liveness_pass_1(s); 4890 } 4891 } 4892 4893 #ifdef CONFIG_PROFILER 4894 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4895 #endif 4896 4897 #ifdef DEBUG_DISAS 4898 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4899 && qemu_log_in_addr_range(pc_start))) { 4900 FILE *logfile = qemu_log_trylock(); 4901 if (logfile) { 4902 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 4903 tcg_dump_ops(s, logfile, true); 4904 fprintf(logfile, "\n"); 4905 qemu_log_unlock(logfile); 4906 } 4907 } 4908 #endif 4909 4910 /* Initialize goto_tb jump offsets. */ 4911 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 4912 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 4913 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 4914 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 4915 4916 tcg_reg_alloc_start(s); 4917 4918 /* 4919 * Reset the buffer pointers when restarting after overflow. 4920 * TODO: Move this into translate-all.c with the rest of the 4921 * buffer management. Having only this done here is confusing. 4922 */ 4923 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 4924 s->code_ptr = s->code_buf; 4925 4926 #ifdef TCG_TARGET_NEED_LDST_LABELS 4927 QSIMPLEQ_INIT(&s->ldst_labels); 4928 #endif 4929 #ifdef TCG_TARGET_NEED_POOL_LABELS 4930 s->pool_labels = NULL; 4931 #endif 4932 4933 num_insns = -1; 4934 QTAILQ_FOREACH(op, &s->ops, link) { 4935 TCGOpcode opc = op->opc; 4936 4937 #ifdef CONFIG_PROFILER 4938 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 4939 #endif 4940 4941 switch (opc) { 4942 case INDEX_op_mov_i32: 4943 case INDEX_op_mov_i64: 4944 case INDEX_op_mov_vec: 4945 tcg_reg_alloc_mov(s, op); 4946 break; 4947 case INDEX_op_dup_vec: 4948 tcg_reg_alloc_dup(s, op); 4949 break; 4950 case INDEX_op_insn_start: 4951 if (num_insns >= 0) { 4952 size_t off = tcg_current_code_size(s); 4953 s->gen_insn_end_off[num_insns] = off; 4954 /* Assert that we do not overflow our stored offset. */ 4955 assert(s->gen_insn_end_off[num_insns] == off); 4956 } 4957 num_insns++; 4958 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 4959 target_ulong a; 4960 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 4961 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 4962 #else 4963 a = op->args[i]; 4964 #endif 4965 s->gen_insn_data[num_insns][i] = a; 4966 } 4967 break; 4968 case INDEX_op_discard: 4969 temp_dead(s, arg_temp(op->args[0])); 4970 break; 4971 case INDEX_op_set_label: 4972 tcg_reg_alloc_bb_end(s, s->reserved_regs); 4973 tcg_out_label(s, arg_label(op->args[0])); 4974 break; 4975 case INDEX_op_call: 4976 tcg_reg_alloc_call(s, op); 4977 break; 4978 case INDEX_op_exit_tb: 4979 tcg_out_exit_tb(s, op->args[0]); 4980 break; 4981 case INDEX_op_goto_tb: 4982 tcg_out_goto_tb(s, op->args[0]); 4983 break; 4984 case INDEX_op_dup2_vec: 4985 if (tcg_reg_alloc_dup2(s, op)) { 4986 break; 4987 } 4988 /* fall through */ 4989 default: 4990 /* Sanity check that we've not introduced any unhandled opcodes. */ 4991 tcg_debug_assert(tcg_op_supported(opc)); 4992 /* Note: in order to speed up the code, it would be much 4993 faster to have specialized register allocator functions for 4994 some common argument patterns */ 4995 tcg_reg_alloc_op(s, op); 4996 break; 4997 } 4998 /* Test for (pending) buffer overflow. The assumption is that any 4999 one operation beginning below the high water mark cannot overrun 5000 the buffer completely. Thus we can test for overflow after 5001 generating code without having to check during generation. */ 5002 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 5003 return -1; 5004 } 5005 /* Test for TB overflow, as seen by gen_insn_end_off. */ 5006 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 5007 return -2; 5008 } 5009 } 5010 tcg_debug_assert(num_insns >= 0); 5011 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 5012 5013 /* Generate TB finalization at the end of block */ 5014 #ifdef TCG_TARGET_NEED_LDST_LABELS 5015 i = tcg_out_ldst_finalize(s); 5016 if (i < 0) { 5017 return i; 5018 } 5019 #endif 5020 #ifdef TCG_TARGET_NEED_POOL_LABELS 5021 i = tcg_out_pool_finalize(s); 5022 if (i < 0) { 5023 return i; 5024 } 5025 #endif 5026 if (!tcg_resolve_relocs(s)) { 5027 return -2; 5028 } 5029 5030 #ifndef CONFIG_TCG_INTERPRETER 5031 /* flush instruction cache */ 5032 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 5033 (uintptr_t)s->code_buf, 5034 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 5035 #endif 5036 5037 return tcg_current_code_size(s); 5038 } 5039 5040 #ifdef CONFIG_PROFILER 5041 void tcg_dump_info(GString *buf) 5042 { 5043 TCGProfile prof = {}; 5044 const TCGProfile *s; 5045 int64_t tb_count; 5046 int64_t tb_div_count; 5047 int64_t tot; 5048 5049 tcg_profile_snapshot_counters(&prof); 5050 s = &prof; 5051 tb_count = s->tb_count; 5052 tb_div_count = tb_count ? tb_count : 1; 5053 tot = s->interm_time + s->code_time; 5054 5055 g_string_append_printf(buf, "JIT cycles %" PRId64 5056 " (%0.3f s at 2.4 GHz)\n", 5057 tot, tot / 2.4e9); 5058 g_string_append_printf(buf, "translated TBs %" PRId64 5059 " (aborted=%" PRId64 " %0.1f%%)\n", 5060 tb_count, s->tb_count1 - tb_count, 5061 (double)(s->tb_count1 - s->tb_count) 5062 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 5063 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 5064 (double)s->op_count / tb_div_count, s->op_count_max); 5065 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 5066 (double)s->del_op_count / tb_div_count); 5067 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 5068 (double)s->temp_count / tb_div_count, 5069 s->temp_count_max); 5070 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 5071 (double)s->code_out_len / tb_div_count); 5072 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 5073 (double)s->search_out_len / tb_div_count); 5074 5075 g_string_append_printf(buf, "cycles/op %0.1f\n", 5076 s->op_count ? (double)tot / s->op_count : 0); 5077 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 5078 s->code_in_len ? (double)tot / s->code_in_len : 0); 5079 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 5080 s->code_out_len ? (double)tot / s->code_out_len : 0); 5081 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 5082 s->search_out_len ? 5083 (double)tot / s->search_out_len : 0); 5084 if (tot == 0) { 5085 tot = 1; 5086 } 5087 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 5088 (double)s->interm_time / tot * 100.0); 5089 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 5090 (double)s->code_time / tot * 100.0); 5091 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 5092 (double)s->opt_time / (s->code_time ? 5093 s->code_time : 1) 5094 * 100.0); 5095 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 5096 (double)s->la_time / (s->code_time ? 5097 s->code_time : 1) * 100.0); 5098 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 5099 s->restore_count); 5100 g_string_append_printf(buf, " avg cycles %0.1f\n", 5101 s->restore_count ? 5102 (double)s->restore_time / s->restore_count : 0); 5103 } 5104 #else 5105 void tcg_dump_info(GString *buf) 5106 { 5107 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 5108 } 5109 #endif 5110 5111 #ifdef ELF_HOST_MACHINE 5112 /* In order to use this feature, the backend needs to do three things: 5113 5114 (1) Define ELF_HOST_MACHINE to indicate both what value to 5115 put into the ELF image and to indicate support for the feature. 5116 5117 (2) Define tcg_register_jit. This should create a buffer containing 5118 the contents of a .debug_frame section that describes the post- 5119 prologue unwind info for the tcg machine. 5120 5121 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 5122 */ 5123 5124 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 5125 typedef enum { 5126 JIT_NOACTION = 0, 5127 JIT_REGISTER_FN, 5128 JIT_UNREGISTER_FN 5129 } jit_actions_t; 5130 5131 struct jit_code_entry { 5132 struct jit_code_entry *next_entry; 5133 struct jit_code_entry *prev_entry; 5134 const void *symfile_addr; 5135 uint64_t symfile_size; 5136 }; 5137 5138 struct jit_descriptor { 5139 uint32_t version; 5140 uint32_t action_flag; 5141 struct jit_code_entry *relevant_entry; 5142 struct jit_code_entry *first_entry; 5143 }; 5144 5145 void __jit_debug_register_code(void) __attribute__((noinline)); 5146 void __jit_debug_register_code(void) 5147 { 5148 asm(""); 5149 } 5150 5151 /* Must statically initialize the version, because GDB may check 5152 the version before we can set it. */ 5153 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 5154 5155 /* End GDB interface. */ 5156 5157 static int find_string(const char *strtab, const char *str) 5158 { 5159 const char *p = strtab + 1; 5160 5161 while (1) { 5162 if (strcmp(p, str) == 0) { 5163 return p - strtab; 5164 } 5165 p += strlen(p) + 1; 5166 } 5167 } 5168 5169 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 5170 const void *debug_frame, 5171 size_t debug_frame_size) 5172 { 5173 struct __attribute__((packed)) DebugInfo { 5174 uint32_t len; 5175 uint16_t version; 5176 uint32_t abbrev; 5177 uint8_t ptr_size; 5178 uint8_t cu_die; 5179 uint16_t cu_lang; 5180 uintptr_t cu_low_pc; 5181 uintptr_t cu_high_pc; 5182 uint8_t fn_die; 5183 char fn_name[16]; 5184 uintptr_t fn_low_pc; 5185 uintptr_t fn_high_pc; 5186 uint8_t cu_eoc; 5187 }; 5188 5189 struct ElfImage { 5190 ElfW(Ehdr) ehdr; 5191 ElfW(Phdr) phdr; 5192 ElfW(Shdr) shdr[7]; 5193 ElfW(Sym) sym[2]; 5194 struct DebugInfo di; 5195 uint8_t da[24]; 5196 char str[80]; 5197 }; 5198 5199 struct ElfImage *img; 5200 5201 static const struct ElfImage img_template = { 5202 .ehdr = { 5203 .e_ident[EI_MAG0] = ELFMAG0, 5204 .e_ident[EI_MAG1] = ELFMAG1, 5205 .e_ident[EI_MAG2] = ELFMAG2, 5206 .e_ident[EI_MAG3] = ELFMAG3, 5207 .e_ident[EI_CLASS] = ELF_CLASS, 5208 .e_ident[EI_DATA] = ELF_DATA, 5209 .e_ident[EI_VERSION] = EV_CURRENT, 5210 .e_type = ET_EXEC, 5211 .e_machine = ELF_HOST_MACHINE, 5212 .e_version = EV_CURRENT, 5213 .e_phoff = offsetof(struct ElfImage, phdr), 5214 .e_shoff = offsetof(struct ElfImage, shdr), 5215 .e_ehsize = sizeof(ElfW(Shdr)), 5216 .e_phentsize = sizeof(ElfW(Phdr)), 5217 .e_phnum = 1, 5218 .e_shentsize = sizeof(ElfW(Shdr)), 5219 .e_shnum = ARRAY_SIZE(img->shdr), 5220 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 5221 #ifdef ELF_HOST_FLAGS 5222 .e_flags = ELF_HOST_FLAGS, 5223 #endif 5224 #ifdef ELF_OSABI 5225 .e_ident[EI_OSABI] = ELF_OSABI, 5226 #endif 5227 }, 5228 .phdr = { 5229 .p_type = PT_LOAD, 5230 .p_flags = PF_X, 5231 }, 5232 .shdr = { 5233 [0] = { .sh_type = SHT_NULL }, 5234 /* Trick: The contents of code_gen_buffer are not present in 5235 this fake ELF file; that got allocated elsewhere. Therefore 5236 we mark .text as SHT_NOBITS (similar to .bss) so that readers 5237 will not look for contents. We can record any address. */ 5238 [1] = { /* .text */ 5239 .sh_type = SHT_NOBITS, 5240 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 5241 }, 5242 [2] = { /* .debug_info */ 5243 .sh_type = SHT_PROGBITS, 5244 .sh_offset = offsetof(struct ElfImage, di), 5245 .sh_size = sizeof(struct DebugInfo), 5246 }, 5247 [3] = { /* .debug_abbrev */ 5248 .sh_type = SHT_PROGBITS, 5249 .sh_offset = offsetof(struct ElfImage, da), 5250 .sh_size = sizeof(img->da), 5251 }, 5252 [4] = { /* .debug_frame */ 5253 .sh_type = SHT_PROGBITS, 5254 .sh_offset = sizeof(struct ElfImage), 5255 }, 5256 [5] = { /* .symtab */ 5257 .sh_type = SHT_SYMTAB, 5258 .sh_offset = offsetof(struct ElfImage, sym), 5259 .sh_size = sizeof(img->sym), 5260 .sh_info = 1, 5261 .sh_link = ARRAY_SIZE(img->shdr) - 1, 5262 .sh_entsize = sizeof(ElfW(Sym)), 5263 }, 5264 [6] = { /* .strtab */ 5265 .sh_type = SHT_STRTAB, 5266 .sh_offset = offsetof(struct ElfImage, str), 5267 .sh_size = sizeof(img->str), 5268 } 5269 }, 5270 .sym = { 5271 [1] = { /* code_gen_buffer */ 5272 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 5273 .st_shndx = 1, 5274 } 5275 }, 5276 .di = { 5277 .len = sizeof(struct DebugInfo) - 4, 5278 .version = 2, 5279 .ptr_size = sizeof(void *), 5280 .cu_die = 1, 5281 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 5282 .fn_die = 2, 5283 .fn_name = "code_gen_buffer" 5284 }, 5285 .da = { 5286 1, /* abbrev number (the cu) */ 5287 0x11, 1, /* DW_TAG_compile_unit, has children */ 5288 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 5289 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5290 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5291 0, 0, /* end of abbrev */ 5292 2, /* abbrev number (the fn) */ 5293 0x2e, 0, /* DW_TAG_subprogram, no children */ 5294 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 5295 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5296 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5297 0, 0, /* end of abbrev */ 5298 0 /* no more abbrev */ 5299 }, 5300 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 5301 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 5302 }; 5303 5304 /* We only need a single jit entry; statically allocate it. */ 5305 static struct jit_code_entry one_entry; 5306 5307 uintptr_t buf = (uintptr_t)buf_ptr; 5308 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 5309 DebugFrameHeader *dfh; 5310 5311 img = g_malloc(img_size); 5312 *img = img_template; 5313 5314 img->phdr.p_vaddr = buf; 5315 img->phdr.p_paddr = buf; 5316 img->phdr.p_memsz = buf_size; 5317 5318 img->shdr[1].sh_name = find_string(img->str, ".text"); 5319 img->shdr[1].sh_addr = buf; 5320 img->shdr[1].sh_size = buf_size; 5321 5322 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 5323 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 5324 5325 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 5326 img->shdr[4].sh_size = debug_frame_size; 5327 5328 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 5329 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 5330 5331 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 5332 img->sym[1].st_value = buf; 5333 img->sym[1].st_size = buf_size; 5334 5335 img->di.cu_low_pc = buf; 5336 img->di.cu_high_pc = buf + buf_size; 5337 img->di.fn_low_pc = buf; 5338 img->di.fn_high_pc = buf + buf_size; 5339 5340 dfh = (DebugFrameHeader *)(img + 1); 5341 memcpy(dfh, debug_frame, debug_frame_size); 5342 dfh->fde.func_start = buf; 5343 dfh->fde.func_len = buf_size; 5344 5345 #ifdef DEBUG_JIT 5346 /* Enable this block to be able to debug the ELF image file creation. 5347 One can use readelf, objdump, or other inspection utilities. */ 5348 { 5349 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 5350 FILE *f = fopen(jit, "w+b"); 5351 if (f) { 5352 if (fwrite(img, img_size, 1, f) != img_size) { 5353 /* Avoid stupid unused return value warning for fwrite. */ 5354 } 5355 fclose(f); 5356 } 5357 } 5358 #endif 5359 5360 one_entry.symfile_addr = img; 5361 one_entry.symfile_size = img_size; 5362 5363 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 5364 __jit_debug_descriptor.relevant_entry = &one_entry; 5365 __jit_debug_descriptor.first_entry = &one_entry; 5366 __jit_debug_register_code(); 5367 } 5368 #else 5369 /* No support for the feature. Provide the entry point expected by exec.c, 5370 and implement the internal function we declared earlier. */ 5371 5372 static void tcg_register_jit_int(const void *buf, size_t size, 5373 const void *debug_frame, 5374 size_t debug_frame_size) 5375 { 5376 } 5377 5378 void tcg_register_jit(const void *buf, size_t buf_size) 5379 { 5380 } 5381 #endif /* ELF_HOST_MACHINE */ 5382 5383 #if !TCG_TARGET_MAYBE_vec 5384 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 5385 { 5386 g_assert_not_reached(); 5387 } 5388 #endif 5389