1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/cacheflush.h" 38 #include "qemu/cacheinfo.h" 39 40 /* Note: the long term plan is to reduce the dependencies on the QEMU 41 CPU definitions. Currently they are used for qemu_ld/st 42 instructions */ 43 #define NO_CPU_IO_DEFS 44 45 #include "exec/exec-all.h" 46 #include "tcg/tcg-op.h" 47 48 #if UINTPTR_MAX == UINT32_MAX 49 # define ELF_CLASS ELFCLASS32 50 #else 51 # define ELF_CLASS ELFCLASS64 52 #endif 53 #if HOST_BIG_ENDIAN 54 # define ELF_DATA ELFDATA2MSB 55 #else 56 # define ELF_DATA ELFDATA2LSB 57 #endif 58 59 #include "elf.h" 60 #include "exec/log.h" 61 #include "tcg/tcg-ldst.h" 62 #include "tcg-internal.h" 63 #include "accel/tcg/perf.h" 64 65 /* Forward declarations for functions declared in tcg-target.c.inc and 66 used here. */ 67 static void tcg_target_init(TCGContext *s); 68 static void tcg_target_qemu_prologue(TCGContext *s); 69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 70 intptr_t value, intptr_t addend); 71 72 /* The CIE and FDE header definitions will be common to all hosts. */ 73 typedef struct { 74 uint32_t len __attribute__((aligned((sizeof(void *))))); 75 uint32_t id; 76 uint8_t version; 77 char augmentation[1]; 78 uint8_t code_align; 79 uint8_t data_align; 80 uint8_t return_column; 81 } DebugFrameCIE; 82 83 typedef struct QEMU_PACKED { 84 uint32_t len __attribute__((aligned((sizeof(void *))))); 85 uint32_t cie_offset; 86 uintptr_t func_start; 87 uintptr_t func_len; 88 } DebugFrameFDEHeader; 89 90 typedef struct QEMU_PACKED { 91 DebugFrameCIE cie; 92 DebugFrameFDEHeader fde; 93 } DebugFrameHeader; 94 95 static void tcg_register_jit_int(const void *buf, size_t size, 96 const void *debug_frame, 97 size_t debug_frame_size) 98 __attribute__((unused)); 99 100 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 102 intptr_t arg2); 103 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 104 static void tcg_out_movi(TCGContext *s, TCGType type, 105 TCGReg ret, tcg_target_long arg); 106 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 107 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 108 static void tcg_out_goto_tb(TCGContext *s, int which); 109 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 110 const TCGArg args[TCG_MAX_OP_ARGS], 111 const int const_args[TCG_MAX_OP_ARGS]); 112 #if TCG_TARGET_MAYBE_vec 113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 114 TCGReg dst, TCGReg src); 115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 116 TCGReg dst, TCGReg base, intptr_t offset); 117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 118 TCGReg dst, int64_t arg); 119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 120 unsigned vecl, unsigned vece, 121 const TCGArg args[TCG_MAX_OP_ARGS], 122 const int const_args[TCG_MAX_OP_ARGS]); 123 #else 124 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 125 TCGReg dst, TCGReg src) 126 { 127 g_assert_not_reached(); 128 } 129 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 130 TCGReg dst, TCGReg base, intptr_t offset) 131 { 132 g_assert_not_reached(); 133 } 134 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 135 TCGReg dst, int64_t arg) 136 { 137 g_assert_not_reached(); 138 } 139 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 140 unsigned vecl, unsigned vece, 141 const TCGArg args[TCG_MAX_OP_ARGS], 142 const int const_args[TCG_MAX_OP_ARGS]) 143 { 144 g_assert_not_reached(); 145 } 146 #endif 147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 148 intptr_t arg2); 149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 150 TCGReg base, intptr_t ofs); 151 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 152 const TCGHelperInfo *info); 153 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 154 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 155 #ifdef TCG_TARGET_NEED_LDST_LABELS 156 static int tcg_out_ldst_finalize(TCGContext *s); 157 #endif 158 159 TCGContext tcg_init_ctx; 160 __thread TCGContext *tcg_ctx; 161 162 TCGContext **tcg_ctxs; 163 unsigned int tcg_cur_ctxs; 164 unsigned int tcg_max_ctxs; 165 TCGv_env cpu_env = 0; 166 const void *tcg_code_gen_epilogue; 167 uintptr_t tcg_splitwx_diff; 168 169 #ifndef CONFIG_TCG_INTERPRETER 170 tcg_prologue_fn *tcg_qemu_tb_exec; 171 #endif 172 173 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 174 static TCGRegSet tcg_target_call_clobber_regs; 175 176 #if TCG_TARGET_INSN_UNIT_SIZE == 1 177 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 178 { 179 *s->code_ptr++ = v; 180 } 181 182 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 183 uint8_t v) 184 { 185 *p = v; 186 } 187 #endif 188 189 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 190 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 191 { 192 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 193 *s->code_ptr++ = v; 194 } else { 195 tcg_insn_unit *p = s->code_ptr; 196 memcpy(p, &v, sizeof(v)); 197 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 198 } 199 } 200 201 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 202 uint16_t v) 203 { 204 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 205 *p = v; 206 } else { 207 memcpy(p, &v, sizeof(v)); 208 } 209 } 210 #endif 211 212 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 213 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 214 { 215 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 216 *s->code_ptr++ = v; 217 } else { 218 tcg_insn_unit *p = s->code_ptr; 219 memcpy(p, &v, sizeof(v)); 220 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 221 } 222 } 223 224 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 225 uint32_t v) 226 { 227 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 228 *p = v; 229 } else { 230 memcpy(p, &v, sizeof(v)); 231 } 232 } 233 #endif 234 235 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 236 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 237 { 238 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 239 *s->code_ptr++ = v; 240 } else { 241 tcg_insn_unit *p = s->code_ptr; 242 memcpy(p, &v, sizeof(v)); 243 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 244 } 245 } 246 247 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 248 uint64_t v) 249 { 250 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 251 *p = v; 252 } else { 253 memcpy(p, &v, sizeof(v)); 254 } 255 } 256 #endif 257 258 /* label relocation processing */ 259 260 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 261 TCGLabel *l, intptr_t addend) 262 { 263 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 264 265 r->type = type; 266 r->ptr = code_ptr; 267 r->addend = addend; 268 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 269 } 270 271 static void tcg_out_label(TCGContext *s, TCGLabel *l) 272 { 273 tcg_debug_assert(!l->has_value); 274 l->has_value = 1; 275 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 276 } 277 278 TCGLabel *gen_new_label(void) 279 { 280 TCGContext *s = tcg_ctx; 281 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 282 283 memset(l, 0, sizeof(TCGLabel)); 284 l->id = s->nb_labels++; 285 QSIMPLEQ_INIT(&l->relocs); 286 287 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 288 289 return l; 290 } 291 292 static bool tcg_resolve_relocs(TCGContext *s) 293 { 294 TCGLabel *l; 295 296 QSIMPLEQ_FOREACH(l, &s->labels, next) { 297 TCGRelocation *r; 298 uintptr_t value = l->u.value; 299 300 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 301 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 302 return false; 303 } 304 } 305 } 306 return true; 307 } 308 309 static void set_jmp_reset_offset(TCGContext *s, int which) 310 { 311 /* 312 * We will check for overflow at the end of the opcode loop in 313 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 314 */ 315 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 316 } 317 318 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 319 { 320 /* 321 * We will check for overflow at the end of the opcode loop in 322 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 323 */ 324 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 325 } 326 327 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 328 { 329 /* 330 * Return the read-execute version of the pointer, for the benefit 331 * of any pc-relative addressing mode. 332 */ 333 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 334 } 335 336 /* Signal overflow, starting over with fewer guest insns. */ 337 static G_NORETURN 338 void tcg_raise_tb_overflow(TCGContext *s) 339 { 340 siglongjmp(s->jmp_trans, -2); 341 } 342 343 #define C_PFX1(P, A) P##A 344 #define C_PFX2(P, A, B) P##A##_##B 345 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 346 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 347 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 348 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 349 350 /* Define an enumeration for the various combinations. */ 351 352 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 353 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 354 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 355 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 356 357 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 358 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 359 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 360 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 361 362 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 363 364 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 365 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 366 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 367 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 368 369 typedef enum { 370 #include "tcg-target-con-set.h" 371 } TCGConstraintSetIndex; 372 373 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 374 375 #undef C_O0_I1 376 #undef C_O0_I2 377 #undef C_O0_I3 378 #undef C_O0_I4 379 #undef C_O1_I1 380 #undef C_O1_I2 381 #undef C_O1_I3 382 #undef C_O1_I4 383 #undef C_N1_I2 384 #undef C_O2_I1 385 #undef C_O2_I2 386 #undef C_O2_I3 387 #undef C_O2_I4 388 389 /* Put all of the constraint sets into an array, indexed by the enum. */ 390 391 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 392 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 393 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 394 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 395 396 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 397 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 398 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 399 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 400 401 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 402 403 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 404 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 405 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 406 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 407 408 static const TCGTargetOpDef constraint_sets[] = { 409 #include "tcg-target-con-set.h" 410 }; 411 412 413 #undef C_O0_I1 414 #undef C_O0_I2 415 #undef C_O0_I3 416 #undef C_O0_I4 417 #undef C_O1_I1 418 #undef C_O1_I2 419 #undef C_O1_I3 420 #undef C_O1_I4 421 #undef C_N1_I2 422 #undef C_O2_I1 423 #undef C_O2_I2 424 #undef C_O2_I3 425 #undef C_O2_I4 426 427 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 428 429 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 430 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 431 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 432 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 433 434 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 435 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 436 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 437 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 438 439 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 440 441 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 442 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 443 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 444 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 445 446 #include "tcg-target.c.inc" 447 448 static void alloc_tcg_plugin_context(TCGContext *s) 449 { 450 #ifdef CONFIG_PLUGIN 451 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 452 s->plugin_tb->insns = 453 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 454 #endif 455 } 456 457 /* 458 * All TCG threads except the parent (i.e. the one that called tcg_context_init 459 * and registered the target's TCG globals) must register with this function 460 * before initiating translation. 461 * 462 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 463 * of tcg_region_init() for the reasoning behind this. 464 * 465 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 466 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 467 * is not used anymore for translation once this function is called. 468 * 469 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 470 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 471 */ 472 #ifdef CONFIG_USER_ONLY 473 void tcg_register_thread(void) 474 { 475 tcg_ctx = &tcg_init_ctx; 476 } 477 #else 478 void tcg_register_thread(void) 479 { 480 TCGContext *s = g_malloc(sizeof(*s)); 481 unsigned int i, n; 482 483 *s = tcg_init_ctx; 484 485 /* Relink mem_base. */ 486 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 487 if (tcg_init_ctx.temps[i].mem_base) { 488 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 489 tcg_debug_assert(b >= 0 && b < n); 490 s->temps[i].mem_base = &s->temps[b]; 491 } 492 } 493 494 /* Claim an entry in tcg_ctxs */ 495 n = qatomic_fetch_inc(&tcg_cur_ctxs); 496 g_assert(n < tcg_max_ctxs); 497 qatomic_set(&tcg_ctxs[n], s); 498 499 if (n > 0) { 500 alloc_tcg_plugin_context(s); 501 tcg_region_initial_alloc(s); 502 } 503 504 tcg_ctx = s; 505 } 506 #endif /* !CONFIG_USER_ONLY */ 507 508 /* pool based memory allocation */ 509 void *tcg_malloc_internal(TCGContext *s, int size) 510 { 511 TCGPool *p; 512 int pool_size; 513 514 if (size > TCG_POOL_CHUNK_SIZE) { 515 /* big malloc: insert a new pool (XXX: could optimize) */ 516 p = g_malloc(sizeof(TCGPool) + size); 517 p->size = size; 518 p->next = s->pool_first_large; 519 s->pool_first_large = p; 520 return p->data; 521 } else { 522 p = s->pool_current; 523 if (!p) { 524 p = s->pool_first; 525 if (!p) 526 goto new_pool; 527 } else { 528 if (!p->next) { 529 new_pool: 530 pool_size = TCG_POOL_CHUNK_SIZE; 531 p = g_malloc(sizeof(TCGPool) + pool_size); 532 p->size = pool_size; 533 p->next = NULL; 534 if (s->pool_current) { 535 s->pool_current->next = p; 536 } else { 537 s->pool_first = p; 538 } 539 } else { 540 p = p->next; 541 } 542 } 543 } 544 s->pool_current = p; 545 s->pool_cur = p->data + size; 546 s->pool_end = p->data + p->size; 547 return p->data; 548 } 549 550 void tcg_pool_reset(TCGContext *s) 551 { 552 TCGPool *p, *t; 553 for (p = s->pool_first_large; p; p = t) { 554 t = p->next; 555 g_free(p); 556 } 557 s->pool_first_large = NULL; 558 s->pool_cur = s->pool_end = NULL; 559 s->pool_current = NULL; 560 } 561 562 #include "exec/helper-proto.h" 563 564 static TCGHelperInfo all_helpers[] = { 565 #include "exec/helper-tcg.h" 566 }; 567 static GHashTable *helper_table; 568 569 #ifdef CONFIG_TCG_INTERPRETER 570 static ffi_type *typecode_to_ffi(int argmask) 571 { 572 /* 573 * libffi does not support __int128_t, so we have forced Int128 574 * to use the structure definition instead of the builtin type. 575 */ 576 static ffi_type *ffi_type_i128_elements[3] = { 577 &ffi_type_uint64, 578 &ffi_type_uint64, 579 NULL 580 }; 581 static ffi_type ffi_type_i128 = { 582 .size = 16, 583 .alignment = __alignof__(Int128), 584 .type = FFI_TYPE_STRUCT, 585 .elements = ffi_type_i128_elements, 586 }; 587 588 switch (argmask) { 589 case dh_typecode_void: 590 return &ffi_type_void; 591 case dh_typecode_i32: 592 return &ffi_type_uint32; 593 case dh_typecode_s32: 594 return &ffi_type_sint32; 595 case dh_typecode_i64: 596 return &ffi_type_uint64; 597 case dh_typecode_s64: 598 return &ffi_type_sint64; 599 case dh_typecode_ptr: 600 return &ffi_type_pointer; 601 case dh_typecode_i128: 602 return &ffi_type_i128; 603 } 604 g_assert_not_reached(); 605 } 606 607 static void init_ffi_layouts(void) 608 { 609 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 610 GHashTable *ffi_table = g_hash_table_new(NULL, NULL); 611 612 for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 613 TCGHelperInfo *info = &all_helpers[i]; 614 unsigned typemask = info->typemask; 615 gpointer hash = (gpointer)(uintptr_t)typemask; 616 struct { 617 ffi_cif cif; 618 ffi_type *args[]; 619 } *ca; 620 ffi_status status; 621 int nargs; 622 ffi_cif *cif; 623 624 cif = g_hash_table_lookup(ffi_table, hash); 625 if (cif) { 626 info->cif = cif; 627 continue; 628 } 629 630 /* Ignoring the return type, find the last non-zero field. */ 631 nargs = 32 - clz32(typemask >> 3); 632 nargs = DIV_ROUND_UP(nargs, 3); 633 assert(nargs <= MAX_CALL_IARGS); 634 635 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 636 ca->cif.rtype = typecode_to_ffi(typemask & 7); 637 ca->cif.nargs = nargs; 638 639 if (nargs != 0) { 640 ca->cif.arg_types = ca->args; 641 for (int j = 0; j < nargs; ++j) { 642 int typecode = extract32(typemask, (j + 1) * 3, 3); 643 ca->args[j] = typecode_to_ffi(typecode); 644 } 645 } 646 647 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 648 ca->cif.rtype, ca->cif.arg_types); 649 assert(status == FFI_OK); 650 651 cif = &ca->cif; 652 info->cif = cif; 653 g_hash_table_insert(ffi_table, hash, (gpointer)cif); 654 } 655 656 g_hash_table_destroy(ffi_table); 657 } 658 #endif /* CONFIG_TCG_INTERPRETER */ 659 660 typedef struct TCGCumulativeArgs { 661 int arg_idx; /* tcg_gen_callN args[] */ 662 int info_in_idx; /* TCGHelperInfo in[] */ 663 int arg_slot; /* regs+stack slot */ 664 int ref_slot; /* stack slots for references */ 665 } TCGCumulativeArgs; 666 667 static void layout_arg_even(TCGCumulativeArgs *cum) 668 { 669 cum->arg_slot += cum->arg_slot & 1; 670 } 671 672 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 673 TCGCallArgumentKind kind) 674 { 675 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 676 677 *loc = (TCGCallArgumentLoc){ 678 .kind = kind, 679 .arg_idx = cum->arg_idx, 680 .arg_slot = cum->arg_slot, 681 }; 682 cum->info_in_idx++; 683 cum->arg_slot++; 684 } 685 686 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 687 TCGHelperInfo *info, int n) 688 { 689 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 690 691 for (int i = 0; i < n; ++i) { 692 /* Layout all using the same arg_idx, adjusting the subindex. */ 693 loc[i] = (TCGCallArgumentLoc){ 694 .kind = TCG_CALL_ARG_NORMAL, 695 .arg_idx = cum->arg_idx, 696 .tmp_subindex = i, 697 .arg_slot = cum->arg_slot + i, 698 }; 699 } 700 cum->info_in_idx += n; 701 cum->arg_slot += n; 702 } 703 704 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 705 { 706 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 707 int n = 128 / TCG_TARGET_REG_BITS; 708 709 /* The first subindex carries the pointer. */ 710 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 711 712 /* 713 * The callee is allowed to clobber memory associated with 714 * structure pass by-reference. Therefore we must make copies. 715 * Allocate space from "ref_slot", which will be adjusted to 716 * follow the parameters on the stack. 717 */ 718 loc[0].ref_slot = cum->ref_slot; 719 720 /* 721 * Subsequent words also go into the reference slot, but 722 * do not accumulate into the regular arguments. 723 */ 724 for (int i = 1; i < n; ++i) { 725 loc[i] = (TCGCallArgumentLoc){ 726 .kind = TCG_CALL_ARG_BY_REF_N, 727 .arg_idx = cum->arg_idx, 728 .tmp_subindex = i, 729 .ref_slot = cum->ref_slot + i, 730 }; 731 } 732 cum->info_in_idx += n; 733 cum->ref_slot += n; 734 } 735 736 static void init_call_layout(TCGHelperInfo *info) 737 { 738 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 739 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 740 unsigned typemask = info->typemask; 741 unsigned typecode; 742 TCGCumulativeArgs cum = { }; 743 744 /* 745 * Parse and place any function return value. 746 */ 747 typecode = typemask & 7; 748 switch (typecode) { 749 case dh_typecode_void: 750 info->nr_out = 0; 751 break; 752 case dh_typecode_i32: 753 case dh_typecode_s32: 754 case dh_typecode_ptr: 755 info->nr_out = 1; 756 info->out_kind = TCG_CALL_RET_NORMAL; 757 break; 758 case dh_typecode_i64: 759 case dh_typecode_s64: 760 info->nr_out = 64 / TCG_TARGET_REG_BITS; 761 info->out_kind = TCG_CALL_RET_NORMAL; 762 /* Query the last register now to trigger any assert early. */ 763 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 764 break; 765 case dh_typecode_i128: 766 info->nr_out = 128 / TCG_TARGET_REG_BITS; 767 info->out_kind = TCG_TARGET_CALL_RET_I128; 768 switch (TCG_TARGET_CALL_RET_I128) { 769 case TCG_CALL_RET_NORMAL: 770 /* Query the last register now to trigger any assert early. */ 771 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 772 break; 773 case TCG_CALL_RET_BY_VEC: 774 /* Query the single register now to trigger any assert early. */ 775 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 776 break; 777 case TCG_CALL_RET_BY_REF: 778 /* 779 * Allocate the first argument to the output. 780 * We don't need to store this anywhere, just make it 781 * unavailable for use in the input loop below. 782 */ 783 cum.arg_slot = 1; 784 break; 785 default: 786 qemu_build_not_reached(); 787 } 788 break; 789 default: 790 g_assert_not_reached(); 791 } 792 793 /* 794 * Parse and place function arguments. 795 */ 796 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 797 TCGCallArgumentKind kind; 798 TCGType type; 799 800 typecode = typemask & 7; 801 switch (typecode) { 802 case dh_typecode_i32: 803 case dh_typecode_s32: 804 type = TCG_TYPE_I32; 805 break; 806 case dh_typecode_i64: 807 case dh_typecode_s64: 808 type = TCG_TYPE_I64; 809 break; 810 case dh_typecode_ptr: 811 type = TCG_TYPE_PTR; 812 break; 813 case dh_typecode_i128: 814 type = TCG_TYPE_I128; 815 break; 816 default: 817 g_assert_not_reached(); 818 } 819 820 switch (type) { 821 case TCG_TYPE_I32: 822 switch (TCG_TARGET_CALL_ARG_I32) { 823 case TCG_CALL_ARG_EVEN: 824 layout_arg_even(&cum); 825 /* fall through */ 826 case TCG_CALL_ARG_NORMAL: 827 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 828 break; 829 case TCG_CALL_ARG_EXTEND: 830 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 831 layout_arg_1(&cum, info, kind); 832 break; 833 default: 834 qemu_build_not_reached(); 835 } 836 break; 837 838 case TCG_TYPE_I64: 839 switch (TCG_TARGET_CALL_ARG_I64) { 840 case TCG_CALL_ARG_EVEN: 841 layout_arg_even(&cum); 842 /* fall through */ 843 case TCG_CALL_ARG_NORMAL: 844 if (TCG_TARGET_REG_BITS == 32) { 845 layout_arg_normal_n(&cum, info, 2); 846 } else { 847 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 848 } 849 break; 850 default: 851 qemu_build_not_reached(); 852 } 853 break; 854 855 case TCG_TYPE_I128: 856 switch (TCG_TARGET_CALL_ARG_I128) { 857 case TCG_CALL_ARG_EVEN: 858 layout_arg_even(&cum); 859 /* fall through */ 860 case TCG_CALL_ARG_NORMAL: 861 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 862 break; 863 case TCG_CALL_ARG_BY_REF: 864 layout_arg_by_ref(&cum, info); 865 break; 866 default: 867 qemu_build_not_reached(); 868 } 869 break; 870 871 default: 872 g_assert_not_reached(); 873 } 874 } 875 info->nr_in = cum.info_in_idx; 876 877 /* Validate that we didn't overrun the input array. */ 878 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 879 /* Validate the backend has enough argument space. */ 880 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 881 882 /* 883 * Relocate the "ref_slot" area to the end of the parameters. 884 * Minimizing this stack offset helps code size for x86, 885 * which has a signed 8-bit offset encoding. 886 */ 887 if (cum.ref_slot != 0) { 888 int ref_base = 0; 889 890 if (cum.arg_slot > max_reg_slots) { 891 int align = __alignof(Int128) / sizeof(tcg_target_long); 892 893 ref_base = cum.arg_slot - max_reg_slots; 894 if (align > 1) { 895 ref_base = ROUND_UP(ref_base, align); 896 } 897 } 898 assert(ref_base + cum.ref_slot <= max_stk_slots); 899 900 if (ref_base != 0) { 901 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 902 TCGCallArgumentLoc *loc = &info->in[i]; 903 switch (loc->kind) { 904 case TCG_CALL_ARG_BY_REF: 905 case TCG_CALL_ARG_BY_REF_N: 906 loc->ref_slot += ref_base; 907 break; 908 default: 909 break; 910 } 911 } 912 } 913 } 914 } 915 916 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 917 static void process_op_defs(TCGContext *s); 918 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 919 TCGReg reg, const char *name); 920 921 static void tcg_context_init(unsigned max_cpus) 922 { 923 TCGContext *s = &tcg_init_ctx; 924 int op, total_args, n, i; 925 TCGOpDef *def; 926 TCGArgConstraint *args_ct; 927 TCGTemp *ts; 928 929 memset(s, 0, sizeof(*s)); 930 s->nb_globals = 0; 931 932 /* Count total number of arguments and allocate the corresponding 933 space */ 934 total_args = 0; 935 for(op = 0; op < NB_OPS; op++) { 936 def = &tcg_op_defs[op]; 937 n = def->nb_iargs + def->nb_oargs; 938 total_args += n; 939 } 940 941 args_ct = g_new0(TCGArgConstraint, total_args); 942 943 for(op = 0; op < NB_OPS; op++) { 944 def = &tcg_op_defs[op]; 945 def->args_ct = args_ct; 946 n = def->nb_iargs + def->nb_oargs; 947 args_ct += n; 948 } 949 950 /* Register helpers. */ 951 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 952 helper_table = g_hash_table_new(NULL, NULL); 953 954 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 955 init_call_layout(&all_helpers[i]); 956 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 957 (gpointer)&all_helpers[i]); 958 } 959 960 #ifdef CONFIG_TCG_INTERPRETER 961 init_ffi_layouts(); 962 #endif 963 964 tcg_target_init(s); 965 process_op_defs(s); 966 967 /* Reverse the order of the saved registers, assuming they're all at 968 the start of tcg_target_reg_alloc_order. */ 969 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 970 int r = tcg_target_reg_alloc_order[n]; 971 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 972 break; 973 } 974 } 975 for (i = 0; i < n; ++i) { 976 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 977 } 978 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 979 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 980 } 981 982 alloc_tcg_plugin_context(s); 983 984 tcg_ctx = s; 985 /* 986 * In user-mode we simply share the init context among threads, since we 987 * use a single region. See the documentation tcg_region_init() for the 988 * reasoning behind this. 989 * In softmmu we will have at most max_cpus TCG threads. 990 */ 991 #ifdef CONFIG_USER_ONLY 992 tcg_ctxs = &tcg_ctx; 993 tcg_cur_ctxs = 1; 994 tcg_max_ctxs = 1; 995 #else 996 tcg_max_ctxs = max_cpus; 997 tcg_ctxs = g_new0(TCGContext *, max_cpus); 998 #endif 999 1000 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1001 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1002 cpu_env = temp_tcgv_ptr(ts); 1003 } 1004 1005 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 1006 { 1007 tcg_context_init(max_cpus); 1008 tcg_region_init(tb_size, splitwx, max_cpus); 1009 } 1010 1011 /* 1012 * Allocate TBs right before their corresponding translated code, making 1013 * sure that TBs and code are on different cache lines. 1014 */ 1015 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1016 { 1017 uintptr_t align = qemu_icache_linesize; 1018 TranslationBlock *tb; 1019 void *next; 1020 1021 retry: 1022 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1023 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1024 1025 if (unlikely(next > s->code_gen_highwater)) { 1026 if (tcg_region_alloc(s)) { 1027 return NULL; 1028 } 1029 goto retry; 1030 } 1031 qatomic_set(&s->code_gen_ptr, next); 1032 s->data_gen_ptr = NULL; 1033 return tb; 1034 } 1035 1036 void tcg_prologue_init(TCGContext *s) 1037 { 1038 size_t prologue_size; 1039 1040 s->code_ptr = s->code_gen_ptr; 1041 s->code_buf = s->code_gen_ptr; 1042 s->data_gen_ptr = NULL; 1043 1044 #ifndef CONFIG_TCG_INTERPRETER 1045 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1046 #endif 1047 1048 #ifdef TCG_TARGET_NEED_POOL_LABELS 1049 s->pool_labels = NULL; 1050 #endif 1051 1052 qemu_thread_jit_write(); 1053 /* Generate the prologue. */ 1054 tcg_target_qemu_prologue(s); 1055 1056 #ifdef TCG_TARGET_NEED_POOL_LABELS 1057 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1058 { 1059 int result = tcg_out_pool_finalize(s); 1060 tcg_debug_assert(result == 0); 1061 } 1062 #endif 1063 1064 prologue_size = tcg_current_code_size(s); 1065 perf_report_prologue(s->code_gen_ptr, prologue_size); 1066 1067 #ifndef CONFIG_TCG_INTERPRETER 1068 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1069 (uintptr_t)s->code_buf, prologue_size); 1070 #endif 1071 1072 #ifdef DEBUG_DISAS 1073 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1074 FILE *logfile = qemu_log_trylock(); 1075 if (logfile) { 1076 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1077 if (s->data_gen_ptr) { 1078 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1079 size_t data_size = prologue_size - code_size; 1080 size_t i; 1081 1082 disas(logfile, s->code_gen_ptr, code_size); 1083 1084 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1085 if (sizeof(tcg_target_ulong) == 8) { 1086 fprintf(logfile, 1087 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1088 (uintptr_t)s->data_gen_ptr + i, 1089 *(uint64_t *)(s->data_gen_ptr + i)); 1090 } else { 1091 fprintf(logfile, 1092 "0x%08" PRIxPTR ": .long 0x%08x\n", 1093 (uintptr_t)s->data_gen_ptr + i, 1094 *(uint32_t *)(s->data_gen_ptr + i)); 1095 } 1096 } 1097 } else { 1098 disas(logfile, s->code_gen_ptr, prologue_size); 1099 } 1100 fprintf(logfile, "\n"); 1101 qemu_log_unlock(logfile); 1102 } 1103 } 1104 #endif 1105 1106 #ifndef CONFIG_TCG_INTERPRETER 1107 /* 1108 * Assert that goto_ptr is implemented completely, setting an epilogue. 1109 * For tci, we use NULL as the signal to return from the interpreter, 1110 * so skip this check. 1111 */ 1112 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1113 #endif 1114 1115 tcg_region_prologue_set(s); 1116 } 1117 1118 void tcg_func_start(TCGContext *s) 1119 { 1120 tcg_pool_reset(s); 1121 s->nb_temps = s->nb_globals; 1122 1123 /* No temps have been previously allocated for size or locality. */ 1124 memset(s->free_temps, 0, sizeof(s->free_temps)); 1125 1126 /* No constant temps have been previously allocated. */ 1127 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1128 if (s->const_table[i]) { 1129 g_hash_table_remove_all(s->const_table[i]); 1130 } 1131 } 1132 1133 s->nb_ops = 0; 1134 s->nb_labels = 0; 1135 s->current_frame_offset = s->frame_start; 1136 1137 #ifdef CONFIG_DEBUG_TCG 1138 s->goto_tb_issue_mask = 0; 1139 #endif 1140 1141 QTAILQ_INIT(&s->ops); 1142 QTAILQ_INIT(&s->free_ops); 1143 QSIMPLEQ_INIT(&s->labels); 1144 } 1145 1146 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1147 { 1148 int n = s->nb_temps++; 1149 1150 if (n >= TCG_MAX_TEMPS) { 1151 tcg_raise_tb_overflow(s); 1152 } 1153 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1154 } 1155 1156 static TCGTemp *tcg_global_alloc(TCGContext *s) 1157 { 1158 TCGTemp *ts; 1159 1160 tcg_debug_assert(s->nb_globals == s->nb_temps); 1161 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1162 s->nb_globals++; 1163 ts = tcg_temp_alloc(s); 1164 ts->kind = TEMP_GLOBAL; 1165 1166 return ts; 1167 } 1168 1169 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1170 TCGReg reg, const char *name) 1171 { 1172 TCGTemp *ts; 1173 1174 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1175 tcg_abort(); 1176 } 1177 1178 ts = tcg_global_alloc(s); 1179 ts->base_type = type; 1180 ts->type = type; 1181 ts->kind = TEMP_FIXED; 1182 ts->reg = reg; 1183 ts->name = name; 1184 tcg_regset_set_reg(s->reserved_regs, reg); 1185 1186 return ts; 1187 } 1188 1189 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1190 { 1191 s->frame_start = start; 1192 s->frame_end = start + size; 1193 s->frame_temp 1194 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1195 } 1196 1197 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1198 intptr_t offset, const char *name) 1199 { 1200 TCGContext *s = tcg_ctx; 1201 TCGTemp *base_ts = tcgv_ptr_temp(base); 1202 TCGTemp *ts = tcg_global_alloc(s); 1203 int indirect_reg = 0; 1204 1205 switch (base_ts->kind) { 1206 case TEMP_FIXED: 1207 break; 1208 case TEMP_GLOBAL: 1209 /* We do not support double-indirect registers. */ 1210 tcg_debug_assert(!base_ts->indirect_reg); 1211 base_ts->indirect_base = 1; 1212 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1213 ? 2 : 1); 1214 indirect_reg = 1; 1215 break; 1216 default: 1217 g_assert_not_reached(); 1218 } 1219 1220 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1221 TCGTemp *ts2 = tcg_global_alloc(s); 1222 char buf[64]; 1223 1224 ts->base_type = TCG_TYPE_I64; 1225 ts->type = TCG_TYPE_I32; 1226 ts->indirect_reg = indirect_reg; 1227 ts->mem_allocated = 1; 1228 ts->mem_base = base_ts; 1229 ts->mem_offset = offset; 1230 pstrcpy(buf, sizeof(buf), name); 1231 pstrcat(buf, sizeof(buf), "_0"); 1232 ts->name = strdup(buf); 1233 1234 tcg_debug_assert(ts2 == ts + 1); 1235 ts2->base_type = TCG_TYPE_I64; 1236 ts2->type = TCG_TYPE_I32; 1237 ts2->indirect_reg = indirect_reg; 1238 ts2->mem_allocated = 1; 1239 ts2->mem_base = base_ts; 1240 ts2->mem_offset = offset + 4; 1241 ts2->temp_subindex = 1; 1242 pstrcpy(buf, sizeof(buf), name); 1243 pstrcat(buf, sizeof(buf), "_1"); 1244 ts2->name = strdup(buf); 1245 } else { 1246 ts->base_type = type; 1247 ts->type = type; 1248 ts->indirect_reg = indirect_reg; 1249 ts->mem_allocated = 1; 1250 ts->mem_base = base_ts; 1251 ts->mem_offset = offset; 1252 ts->name = name; 1253 } 1254 return ts; 1255 } 1256 1257 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1258 { 1259 TCGContext *s = tcg_ctx; 1260 TCGTemp *ts; 1261 int n; 1262 1263 if (kind == TEMP_EBB) { 1264 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1265 1266 if (idx < TCG_MAX_TEMPS) { 1267 /* There is already an available temp with the right type. */ 1268 clear_bit(idx, s->free_temps[type].l); 1269 1270 ts = &s->temps[idx]; 1271 ts->temp_allocated = 1; 1272 tcg_debug_assert(ts->base_type == type); 1273 tcg_debug_assert(ts->kind == kind); 1274 goto done; 1275 } 1276 } else { 1277 tcg_debug_assert(kind == TEMP_TB); 1278 } 1279 1280 switch (type) { 1281 case TCG_TYPE_I32: 1282 case TCG_TYPE_V64: 1283 case TCG_TYPE_V128: 1284 case TCG_TYPE_V256: 1285 n = 1; 1286 break; 1287 case TCG_TYPE_I64: 1288 n = 64 / TCG_TARGET_REG_BITS; 1289 break; 1290 case TCG_TYPE_I128: 1291 n = 128 / TCG_TARGET_REG_BITS; 1292 break; 1293 default: 1294 g_assert_not_reached(); 1295 } 1296 1297 ts = tcg_temp_alloc(s); 1298 ts->base_type = type; 1299 ts->temp_allocated = 1; 1300 ts->kind = kind; 1301 1302 if (n == 1) { 1303 ts->type = type; 1304 } else { 1305 ts->type = TCG_TYPE_REG; 1306 1307 for (int i = 1; i < n; ++i) { 1308 TCGTemp *ts2 = tcg_temp_alloc(s); 1309 1310 tcg_debug_assert(ts2 == ts + i); 1311 ts2->base_type = type; 1312 ts2->type = TCG_TYPE_REG; 1313 ts2->temp_allocated = 1; 1314 ts2->temp_subindex = i; 1315 ts2->kind = kind; 1316 } 1317 } 1318 1319 done: 1320 #if defined(CONFIG_DEBUG_TCG) 1321 s->temps_in_use++; 1322 #endif 1323 return ts; 1324 } 1325 1326 TCGv_vec tcg_temp_new_vec(TCGType type) 1327 { 1328 TCGTemp *t; 1329 1330 #ifdef CONFIG_DEBUG_TCG 1331 switch (type) { 1332 case TCG_TYPE_V64: 1333 assert(TCG_TARGET_HAS_v64); 1334 break; 1335 case TCG_TYPE_V128: 1336 assert(TCG_TARGET_HAS_v128); 1337 break; 1338 case TCG_TYPE_V256: 1339 assert(TCG_TARGET_HAS_v256); 1340 break; 1341 default: 1342 g_assert_not_reached(); 1343 } 1344 #endif 1345 1346 t = tcg_temp_new_internal(type, TEMP_EBB); 1347 return temp_tcgv_vec(t); 1348 } 1349 1350 /* Create a new temp of the same type as an existing temp. */ 1351 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1352 { 1353 TCGTemp *t = tcgv_vec_temp(match); 1354 1355 tcg_debug_assert(t->temp_allocated != 0); 1356 1357 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 1358 return temp_tcgv_vec(t); 1359 } 1360 1361 void tcg_temp_free_internal(TCGTemp *ts) 1362 { 1363 TCGContext *s = tcg_ctx; 1364 1365 switch (ts->kind) { 1366 case TEMP_CONST: 1367 /* 1368 * In order to simplify users of tcg_constant_*, 1369 * silently ignore free. 1370 */ 1371 return; 1372 case TEMP_EBB: 1373 case TEMP_TB: 1374 break; 1375 default: 1376 g_assert_not_reached(); 1377 } 1378 1379 tcg_debug_assert(ts->temp_allocated != 0); 1380 ts->temp_allocated = 0; 1381 1382 #if defined(CONFIG_DEBUG_TCG) 1383 assert(s->temps_in_use > 0); 1384 s->temps_in_use--; 1385 #endif 1386 1387 if (ts->kind == TEMP_EBB) { 1388 int idx = temp_idx(ts); 1389 set_bit(idx, s->free_temps[ts->base_type].l); 1390 } 1391 } 1392 1393 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1394 { 1395 TCGContext *s = tcg_ctx; 1396 GHashTable *h = s->const_table[type]; 1397 TCGTemp *ts; 1398 1399 if (h == NULL) { 1400 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1401 s->const_table[type] = h; 1402 } 1403 1404 ts = g_hash_table_lookup(h, &val); 1405 if (ts == NULL) { 1406 int64_t *val_ptr; 1407 1408 ts = tcg_temp_alloc(s); 1409 1410 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1411 TCGTemp *ts2 = tcg_temp_alloc(s); 1412 1413 tcg_debug_assert(ts2 == ts + 1); 1414 1415 ts->base_type = TCG_TYPE_I64; 1416 ts->type = TCG_TYPE_I32; 1417 ts->kind = TEMP_CONST; 1418 ts->temp_allocated = 1; 1419 1420 ts2->base_type = TCG_TYPE_I64; 1421 ts2->type = TCG_TYPE_I32; 1422 ts2->kind = TEMP_CONST; 1423 ts2->temp_allocated = 1; 1424 ts2->temp_subindex = 1; 1425 1426 /* 1427 * Retain the full value of the 64-bit constant in the low 1428 * part, so that the hash table works. Actual uses will 1429 * truncate the value to the low part. 1430 */ 1431 ts[HOST_BIG_ENDIAN].val = val; 1432 ts[!HOST_BIG_ENDIAN].val = val >> 32; 1433 val_ptr = &ts[HOST_BIG_ENDIAN].val; 1434 } else { 1435 ts->base_type = type; 1436 ts->type = type; 1437 ts->kind = TEMP_CONST; 1438 ts->temp_allocated = 1; 1439 ts->val = val; 1440 val_ptr = &ts->val; 1441 } 1442 g_hash_table_insert(h, val_ptr, ts); 1443 } 1444 1445 return ts; 1446 } 1447 1448 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1449 { 1450 val = dup_const(vece, val); 1451 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1452 } 1453 1454 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1455 { 1456 TCGTemp *t = tcgv_vec_temp(match); 1457 1458 tcg_debug_assert(t->temp_allocated != 0); 1459 return tcg_constant_vec(t->base_type, vece, val); 1460 } 1461 1462 TCGv_i32 tcg_const_i32(int32_t val) 1463 { 1464 TCGv_i32 t0; 1465 t0 = tcg_temp_new_i32(); 1466 tcg_gen_movi_i32(t0, val); 1467 return t0; 1468 } 1469 1470 TCGv_i64 tcg_const_i64(int64_t val) 1471 { 1472 TCGv_i64 t0; 1473 t0 = tcg_temp_new_i64(); 1474 tcg_gen_movi_i64(t0, val); 1475 return t0; 1476 } 1477 1478 #if defined(CONFIG_DEBUG_TCG) 1479 void tcg_clear_temp_count(void) 1480 { 1481 TCGContext *s = tcg_ctx; 1482 s->temps_in_use = 0; 1483 } 1484 1485 int tcg_check_temp_count(void) 1486 { 1487 TCGContext *s = tcg_ctx; 1488 if (s->temps_in_use) { 1489 /* Clear the count so that we don't give another 1490 * warning immediately next time around. 1491 */ 1492 s->temps_in_use = 0; 1493 return 1; 1494 } 1495 return 0; 1496 } 1497 #endif 1498 1499 /* Return true if OP may appear in the opcode stream. 1500 Test the runtime variable that controls each opcode. */ 1501 bool tcg_op_supported(TCGOpcode op) 1502 { 1503 const bool have_vec 1504 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1505 1506 switch (op) { 1507 case INDEX_op_discard: 1508 case INDEX_op_set_label: 1509 case INDEX_op_call: 1510 case INDEX_op_br: 1511 case INDEX_op_mb: 1512 case INDEX_op_insn_start: 1513 case INDEX_op_exit_tb: 1514 case INDEX_op_goto_tb: 1515 case INDEX_op_goto_ptr: 1516 case INDEX_op_qemu_ld_i32: 1517 case INDEX_op_qemu_st_i32: 1518 case INDEX_op_qemu_ld_i64: 1519 case INDEX_op_qemu_st_i64: 1520 return true; 1521 1522 case INDEX_op_qemu_st8_i32: 1523 return TCG_TARGET_HAS_qemu_st8_i32; 1524 1525 case INDEX_op_mov_i32: 1526 case INDEX_op_setcond_i32: 1527 case INDEX_op_brcond_i32: 1528 case INDEX_op_ld8u_i32: 1529 case INDEX_op_ld8s_i32: 1530 case INDEX_op_ld16u_i32: 1531 case INDEX_op_ld16s_i32: 1532 case INDEX_op_ld_i32: 1533 case INDEX_op_st8_i32: 1534 case INDEX_op_st16_i32: 1535 case INDEX_op_st_i32: 1536 case INDEX_op_add_i32: 1537 case INDEX_op_sub_i32: 1538 case INDEX_op_mul_i32: 1539 case INDEX_op_and_i32: 1540 case INDEX_op_or_i32: 1541 case INDEX_op_xor_i32: 1542 case INDEX_op_shl_i32: 1543 case INDEX_op_shr_i32: 1544 case INDEX_op_sar_i32: 1545 return true; 1546 1547 case INDEX_op_movcond_i32: 1548 return TCG_TARGET_HAS_movcond_i32; 1549 case INDEX_op_div_i32: 1550 case INDEX_op_divu_i32: 1551 return TCG_TARGET_HAS_div_i32; 1552 case INDEX_op_rem_i32: 1553 case INDEX_op_remu_i32: 1554 return TCG_TARGET_HAS_rem_i32; 1555 case INDEX_op_div2_i32: 1556 case INDEX_op_divu2_i32: 1557 return TCG_TARGET_HAS_div2_i32; 1558 case INDEX_op_rotl_i32: 1559 case INDEX_op_rotr_i32: 1560 return TCG_TARGET_HAS_rot_i32; 1561 case INDEX_op_deposit_i32: 1562 return TCG_TARGET_HAS_deposit_i32; 1563 case INDEX_op_extract_i32: 1564 return TCG_TARGET_HAS_extract_i32; 1565 case INDEX_op_sextract_i32: 1566 return TCG_TARGET_HAS_sextract_i32; 1567 case INDEX_op_extract2_i32: 1568 return TCG_TARGET_HAS_extract2_i32; 1569 case INDEX_op_add2_i32: 1570 return TCG_TARGET_HAS_add2_i32; 1571 case INDEX_op_sub2_i32: 1572 return TCG_TARGET_HAS_sub2_i32; 1573 case INDEX_op_mulu2_i32: 1574 return TCG_TARGET_HAS_mulu2_i32; 1575 case INDEX_op_muls2_i32: 1576 return TCG_TARGET_HAS_muls2_i32; 1577 case INDEX_op_muluh_i32: 1578 return TCG_TARGET_HAS_muluh_i32; 1579 case INDEX_op_mulsh_i32: 1580 return TCG_TARGET_HAS_mulsh_i32; 1581 case INDEX_op_ext8s_i32: 1582 return TCG_TARGET_HAS_ext8s_i32; 1583 case INDEX_op_ext16s_i32: 1584 return TCG_TARGET_HAS_ext16s_i32; 1585 case INDEX_op_ext8u_i32: 1586 return TCG_TARGET_HAS_ext8u_i32; 1587 case INDEX_op_ext16u_i32: 1588 return TCG_TARGET_HAS_ext16u_i32; 1589 case INDEX_op_bswap16_i32: 1590 return TCG_TARGET_HAS_bswap16_i32; 1591 case INDEX_op_bswap32_i32: 1592 return TCG_TARGET_HAS_bswap32_i32; 1593 case INDEX_op_not_i32: 1594 return TCG_TARGET_HAS_not_i32; 1595 case INDEX_op_neg_i32: 1596 return TCG_TARGET_HAS_neg_i32; 1597 case INDEX_op_andc_i32: 1598 return TCG_TARGET_HAS_andc_i32; 1599 case INDEX_op_orc_i32: 1600 return TCG_TARGET_HAS_orc_i32; 1601 case INDEX_op_eqv_i32: 1602 return TCG_TARGET_HAS_eqv_i32; 1603 case INDEX_op_nand_i32: 1604 return TCG_TARGET_HAS_nand_i32; 1605 case INDEX_op_nor_i32: 1606 return TCG_TARGET_HAS_nor_i32; 1607 case INDEX_op_clz_i32: 1608 return TCG_TARGET_HAS_clz_i32; 1609 case INDEX_op_ctz_i32: 1610 return TCG_TARGET_HAS_ctz_i32; 1611 case INDEX_op_ctpop_i32: 1612 return TCG_TARGET_HAS_ctpop_i32; 1613 1614 case INDEX_op_brcond2_i32: 1615 case INDEX_op_setcond2_i32: 1616 return TCG_TARGET_REG_BITS == 32; 1617 1618 case INDEX_op_mov_i64: 1619 case INDEX_op_setcond_i64: 1620 case INDEX_op_brcond_i64: 1621 case INDEX_op_ld8u_i64: 1622 case INDEX_op_ld8s_i64: 1623 case INDEX_op_ld16u_i64: 1624 case INDEX_op_ld16s_i64: 1625 case INDEX_op_ld32u_i64: 1626 case INDEX_op_ld32s_i64: 1627 case INDEX_op_ld_i64: 1628 case INDEX_op_st8_i64: 1629 case INDEX_op_st16_i64: 1630 case INDEX_op_st32_i64: 1631 case INDEX_op_st_i64: 1632 case INDEX_op_add_i64: 1633 case INDEX_op_sub_i64: 1634 case INDEX_op_mul_i64: 1635 case INDEX_op_and_i64: 1636 case INDEX_op_or_i64: 1637 case INDEX_op_xor_i64: 1638 case INDEX_op_shl_i64: 1639 case INDEX_op_shr_i64: 1640 case INDEX_op_sar_i64: 1641 case INDEX_op_ext_i32_i64: 1642 case INDEX_op_extu_i32_i64: 1643 return TCG_TARGET_REG_BITS == 64; 1644 1645 case INDEX_op_movcond_i64: 1646 return TCG_TARGET_HAS_movcond_i64; 1647 case INDEX_op_div_i64: 1648 case INDEX_op_divu_i64: 1649 return TCG_TARGET_HAS_div_i64; 1650 case INDEX_op_rem_i64: 1651 case INDEX_op_remu_i64: 1652 return TCG_TARGET_HAS_rem_i64; 1653 case INDEX_op_div2_i64: 1654 case INDEX_op_divu2_i64: 1655 return TCG_TARGET_HAS_div2_i64; 1656 case INDEX_op_rotl_i64: 1657 case INDEX_op_rotr_i64: 1658 return TCG_TARGET_HAS_rot_i64; 1659 case INDEX_op_deposit_i64: 1660 return TCG_TARGET_HAS_deposit_i64; 1661 case INDEX_op_extract_i64: 1662 return TCG_TARGET_HAS_extract_i64; 1663 case INDEX_op_sextract_i64: 1664 return TCG_TARGET_HAS_sextract_i64; 1665 case INDEX_op_extract2_i64: 1666 return TCG_TARGET_HAS_extract2_i64; 1667 case INDEX_op_extrl_i64_i32: 1668 return TCG_TARGET_HAS_extrl_i64_i32; 1669 case INDEX_op_extrh_i64_i32: 1670 return TCG_TARGET_HAS_extrh_i64_i32; 1671 case INDEX_op_ext8s_i64: 1672 return TCG_TARGET_HAS_ext8s_i64; 1673 case INDEX_op_ext16s_i64: 1674 return TCG_TARGET_HAS_ext16s_i64; 1675 case INDEX_op_ext32s_i64: 1676 return TCG_TARGET_HAS_ext32s_i64; 1677 case INDEX_op_ext8u_i64: 1678 return TCG_TARGET_HAS_ext8u_i64; 1679 case INDEX_op_ext16u_i64: 1680 return TCG_TARGET_HAS_ext16u_i64; 1681 case INDEX_op_ext32u_i64: 1682 return TCG_TARGET_HAS_ext32u_i64; 1683 case INDEX_op_bswap16_i64: 1684 return TCG_TARGET_HAS_bswap16_i64; 1685 case INDEX_op_bswap32_i64: 1686 return TCG_TARGET_HAS_bswap32_i64; 1687 case INDEX_op_bswap64_i64: 1688 return TCG_TARGET_HAS_bswap64_i64; 1689 case INDEX_op_not_i64: 1690 return TCG_TARGET_HAS_not_i64; 1691 case INDEX_op_neg_i64: 1692 return TCG_TARGET_HAS_neg_i64; 1693 case INDEX_op_andc_i64: 1694 return TCG_TARGET_HAS_andc_i64; 1695 case INDEX_op_orc_i64: 1696 return TCG_TARGET_HAS_orc_i64; 1697 case INDEX_op_eqv_i64: 1698 return TCG_TARGET_HAS_eqv_i64; 1699 case INDEX_op_nand_i64: 1700 return TCG_TARGET_HAS_nand_i64; 1701 case INDEX_op_nor_i64: 1702 return TCG_TARGET_HAS_nor_i64; 1703 case INDEX_op_clz_i64: 1704 return TCG_TARGET_HAS_clz_i64; 1705 case INDEX_op_ctz_i64: 1706 return TCG_TARGET_HAS_ctz_i64; 1707 case INDEX_op_ctpop_i64: 1708 return TCG_TARGET_HAS_ctpop_i64; 1709 case INDEX_op_add2_i64: 1710 return TCG_TARGET_HAS_add2_i64; 1711 case INDEX_op_sub2_i64: 1712 return TCG_TARGET_HAS_sub2_i64; 1713 case INDEX_op_mulu2_i64: 1714 return TCG_TARGET_HAS_mulu2_i64; 1715 case INDEX_op_muls2_i64: 1716 return TCG_TARGET_HAS_muls2_i64; 1717 case INDEX_op_muluh_i64: 1718 return TCG_TARGET_HAS_muluh_i64; 1719 case INDEX_op_mulsh_i64: 1720 return TCG_TARGET_HAS_mulsh_i64; 1721 1722 case INDEX_op_mov_vec: 1723 case INDEX_op_dup_vec: 1724 case INDEX_op_dupm_vec: 1725 case INDEX_op_ld_vec: 1726 case INDEX_op_st_vec: 1727 case INDEX_op_add_vec: 1728 case INDEX_op_sub_vec: 1729 case INDEX_op_and_vec: 1730 case INDEX_op_or_vec: 1731 case INDEX_op_xor_vec: 1732 case INDEX_op_cmp_vec: 1733 return have_vec; 1734 case INDEX_op_dup2_vec: 1735 return have_vec && TCG_TARGET_REG_BITS == 32; 1736 case INDEX_op_not_vec: 1737 return have_vec && TCG_TARGET_HAS_not_vec; 1738 case INDEX_op_neg_vec: 1739 return have_vec && TCG_TARGET_HAS_neg_vec; 1740 case INDEX_op_abs_vec: 1741 return have_vec && TCG_TARGET_HAS_abs_vec; 1742 case INDEX_op_andc_vec: 1743 return have_vec && TCG_TARGET_HAS_andc_vec; 1744 case INDEX_op_orc_vec: 1745 return have_vec && TCG_TARGET_HAS_orc_vec; 1746 case INDEX_op_nand_vec: 1747 return have_vec && TCG_TARGET_HAS_nand_vec; 1748 case INDEX_op_nor_vec: 1749 return have_vec && TCG_TARGET_HAS_nor_vec; 1750 case INDEX_op_eqv_vec: 1751 return have_vec && TCG_TARGET_HAS_eqv_vec; 1752 case INDEX_op_mul_vec: 1753 return have_vec && TCG_TARGET_HAS_mul_vec; 1754 case INDEX_op_shli_vec: 1755 case INDEX_op_shri_vec: 1756 case INDEX_op_sari_vec: 1757 return have_vec && TCG_TARGET_HAS_shi_vec; 1758 case INDEX_op_shls_vec: 1759 case INDEX_op_shrs_vec: 1760 case INDEX_op_sars_vec: 1761 return have_vec && TCG_TARGET_HAS_shs_vec; 1762 case INDEX_op_shlv_vec: 1763 case INDEX_op_shrv_vec: 1764 case INDEX_op_sarv_vec: 1765 return have_vec && TCG_TARGET_HAS_shv_vec; 1766 case INDEX_op_rotli_vec: 1767 return have_vec && TCG_TARGET_HAS_roti_vec; 1768 case INDEX_op_rotls_vec: 1769 return have_vec && TCG_TARGET_HAS_rots_vec; 1770 case INDEX_op_rotlv_vec: 1771 case INDEX_op_rotrv_vec: 1772 return have_vec && TCG_TARGET_HAS_rotv_vec; 1773 case INDEX_op_ssadd_vec: 1774 case INDEX_op_usadd_vec: 1775 case INDEX_op_sssub_vec: 1776 case INDEX_op_ussub_vec: 1777 return have_vec && TCG_TARGET_HAS_sat_vec; 1778 case INDEX_op_smin_vec: 1779 case INDEX_op_umin_vec: 1780 case INDEX_op_smax_vec: 1781 case INDEX_op_umax_vec: 1782 return have_vec && TCG_TARGET_HAS_minmax_vec; 1783 case INDEX_op_bitsel_vec: 1784 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1785 case INDEX_op_cmpsel_vec: 1786 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1787 1788 default: 1789 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1790 return true; 1791 } 1792 } 1793 1794 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 1795 1796 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1797 { 1798 const TCGHelperInfo *info; 1799 TCGv_i64 extend_free[MAX_CALL_IARGS]; 1800 int n_extend = 0; 1801 TCGOp *op; 1802 int i, n, pi = 0, total_args; 1803 1804 info = g_hash_table_lookup(helper_table, (gpointer)func); 1805 total_args = info->nr_out + info->nr_in + 2; 1806 op = tcg_op_alloc(INDEX_op_call, total_args); 1807 1808 #ifdef CONFIG_PLUGIN 1809 /* Flag helpers that may affect guest state */ 1810 if (tcg_ctx->plugin_insn && 1811 !(info->flags & TCG_CALL_PLUGIN) && 1812 !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1813 tcg_ctx->plugin_insn->calls_helpers = true; 1814 } 1815 #endif 1816 1817 TCGOP_CALLO(op) = n = info->nr_out; 1818 switch (n) { 1819 case 0: 1820 tcg_debug_assert(ret == NULL); 1821 break; 1822 case 1: 1823 tcg_debug_assert(ret != NULL); 1824 op->args[pi++] = temp_arg(ret); 1825 break; 1826 case 2: 1827 case 4: 1828 tcg_debug_assert(ret != NULL); 1829 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 1830 tcg_debug_assert(ret->temp_subindex == 0); 1831 for (i = 0; i < n; ++i) { 1832 op->args[pi++] = temp_arg(ret + i); 1833 } 1834 break; 1835 default: 1836 g_assert_not_reached(); 1837 } 1838 1839 TCGOP_CALLI(op) = n = info->nr_in; 1840 for (i = 0; i < n; i++) { 1841 const TCGCallArgumentLoc *loc = &info->in[i]; 1842 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 1843 1844 switch (loc->kind) { 1845 case TCG_CALL_ARG_NORMAL: 1846 case TCG_CALL_ARG_BY_REF: 1847 case TCG_CALL_ARG_BY_REF_N: 1848 op->args[pi++] = temp_arg(ts); 1849 break; 1850 1851 case TCG_CALL_ARG_EXTEND_U: 1852 case TCG_CALL_ARG_EXTEND_S: 1853 { 1854 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 1855 TCGv_i32 orig = temp_tcgv_i32(ts); 1856 1857 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 1858 tcg_gen_ext_i32_i64(temp, orig); 1859 } else { 1860 tcg_gen_extu_i32_i64(temp, orig); 1861 } 1862 op->args[pi++] = tcgv_i64_arg(temp); 1863 extend_free[n_extend++] = temp; 1864 } 1865 break; 1866 1867 default: 1868 g_assert_not_reached(); 1869 } 1870 } 1871 op->args[pi++] = (uintptr_t)func; 1872 op->args[pi++] = (uintptr_t)info; 1873 tcg_debug_assert(pi == total_args); 1874 1875 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 1876 1877 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 1878 for (i = 0; i < n_extend; ++i) { 1879 tcg_temp_free_i64(extend_free[i]); 1880 } 1881 } 1882 1883 static void tcg_reg_alloc_start(TCGContext *s) 1884 { 1885 int i, n; 1886 1887 for (i = 0, n = s->nb_temps; i < n; i++) { 1888 TCGTemp *ts = &s->temps[i]; 1889 TCGTempVal val = TEMP_VAL_MEM; 1890 1891 switch (ts->kind) { 1892 case TEMP_CONST: 1893 val = TEMP_VAL_CONST; 1894 break; 1895 case TEMP_FIXED: 1896 val = TEMP_VAL_REG; 1897 break; 1898 case TEMP_GLOBAL: 1899 break; 1900 case TEMP_EBB: 1901 val = TEMP_VAL_DEAD; 1902 /* fall through */ 1903 case TEMP_TB: 1904 ts->mem_allocated = 0; 1905 break; 1906 default: 1907 g_assert_not_reached(); 1908 } 1909 ts->val_type = val; 1910 } 1911 1912 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1913 } 1914 1915 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1916 TCGTemp *ts) 1917 { 1918 int idx = temp_idx(ts); 1919 1920 switch (ts->kind) { 1921 case TEMP_FIXED: 1922 case TEMP_GLOBAL: 1923 pstrcpy(buf, buf_size, ts->name); 1924 break; 1925 case TEMP_TB: 1926 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1927 break; 1928 case TEMP_EBB: 1929 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1930 break; 1931 case TEMP_CONST: 1932 switch (ts->type) { 1933 case TCG_TYPE_I32: 1934 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 1935 break; 1936 #if TCG_TARGET_REG_BITS > 32 1937 case TCG_TYPE_I64: 1938 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 1939 break; 1940 #endif 1941 case TCG_TYPE_V64: 1942 case TCG_TYPE_V128: 1943 case TCG_TYPE_V256: 1944 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 1945 64 << (ts->type - TCG_TYPE_V64), ts->val); 1946 break; 1947 default: 1948 g_assert_not_reached(); 1949 } 1950 break; 1951 } 1952 return buf; 1953 } 1954 1955 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1956 int buf_size, TCGArg arg) 1957 { 1958 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1959 } 1960 1961 static const char * const cond_name[] = 1962 { 1963 [TCG_COND_NEVER] = "never", 1964 [TCG_COND_ALWAYS] = "always", 1965 [TCG_COND_EQ] = "eq", 1966 [TCG_COND_NE] = "ne", 1967 [TCG_COND_LT] = "lt", 1968 [TCG_COND_GE] = "ge", 1969 [TCG_COND_LE] = "le", 1970 [TCG_COND_GT] = "gt", 1971 [TCG_COND_LTU] = "ltu", 1972 [TCG_COND_GEU] = "geu", 1973 [TCG_COND_LEU] = "leu", 1974 [TCG_COND_GTU] = "gtu" 1975 }; 1976 1977 static const char * const ldst_name[] = 1978 { 1979 [MO_UB] = "ub", 1980 [MO_SB] = "sb", 1981 [MO_LEUW] = "leuw", 1982 [MO_LESW] = "lesw", 1983 [MO_LEUL] = "leul", 1984 [MO_LESL] = "lesl", 1985 [MO_LEUQ] = "leq", 1986 [MO_BEUW] = "beuw", 1987 [MO_BESW] = "besw", 1988 [MO_BEUL] = "beul", 1989 [MO_BESL] = "besl", 1990 [MO_BEUQ] = "beq", 1991 }; 1992 1993 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1994 #ifdef TARGET_ALIGNED_ONLY 1995 [MO_UNALN >> MO_ASHIFT] = "un+", 1996 [MO_ALIGN >> MO_ASHIFT] = "", 1997 #else 1998 [MO_UNALN >> MO_ASHIFT] = "", 1999 [MO_ALIGN >> MO_ASHIFT] = "al+", 2000 #endif 2001 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2002 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2003 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2004 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2005 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2006 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2007 }; 2008 2009 static const char bswap_flag_name[][6] = { 2010 [TCG_BSWAP_IZ] = "iz", 2011 [TCG_BSWAP_OZ] = "oz", 2012 [TCG_BSWAP_OS] = "os", 2013 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 2014 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 2015 }; 2016 2017 static inline bool tcg_regset_single(TCGRegSet d) 2018 { 2019 return (d & (d - 1)) == 0; 2020 } 2021 2022 static inline TCGReg tcg_regset_first(TCGRegSet d) 2023 { 2024 if (TCG_TARGET_NB_REGS <= 32) { 2025 return ctz32(d); 2026 } else { 2027 return ctz64(d); 2028 } 2029 } 2030 2031 /* Return only the number of characters output -- no error return. */ 2032 #define ne_fprintf(...) \ 2033 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 2034 2035 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2036 { 2037 char buf[128]; 2038 TCGOp *op; 2039 2040 QTAILQ_FOREACH(op, &s->ops, link) { 2041 int i, k, nb_oargs, nb_iargs, nb_cargs; 2042 const TCGOpDef *def; 2043 TCGOpcode c; 2044 int col = 0; 2045 2046 c = op->opc; 2047 def = &tcg_op_defs[c]; 2048 2049 if (c == INDEX_op_insn_start) { 2050 nb_oargs = 0; 2051 col += ne_fprintf(f, "\n ----"); 2052 2053 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2054 target_ulong a; 2055 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2056 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 2057 #else 2058 a = op->args[i]; 2059 #endif 2060 col += ne_fprintf(f, " " TARGET_FMT_lx, a); 2061 } 2062 } else if (c == INDEX_op_call) { 2063 const TCGHelperInfo *info = tcg_call_info(op); 2064 void *func = tcg_call_func(op); 2065 2066 /* variable number of arguments */ 2067 nb_oargs = TCGOP_CALLO(op); 2068 nb_iargs = TCGOP_CALLI(op); 2069 nb_cargs = def->nb_cargs; 2070 2071 col += ne_fprintf(f, " %s ", def->name); 2072 2073 /* 2074 * Print the function name from TCGHelperInfo, if available. 2075 * Note that plugins have a template function for the info, 2076 * but the actual function pointer comes from the plugin. 2077 */ 2078 if (func == info->func) { 2079 col += ne_fprintf(f, "%s", info->name); 2080 } else { 2081 col += ne_fprintf(f, "plugin(%p)", func); 2082 } 2083 2084 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2085 for (i = 0; i < nb_oargs; i++) { 2086 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2087 op->args[i])); 2088 } 2089 for (i = 0; i < nb_iargs; i++) { 2090 TCGArg arg = op->args[nb_oargs + i]; 2091 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2092 col += ne_fprintf(f, ",%s", t); 2093 } 2094 } else { 2095 col += ne_fprintf(f, " %s ", def->name); 2096 2097 nb_oargs = def->nb_oargs; 2098 nb_iargs = def->nb_iargs; 2099 nb_cargs = def->nb_cargs; 2100 2101 if (def->flags & TCG_OPF_VECTOR) { 2102 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 2103 8 << TCGOP_VECE(op)); 2104 } 2105 2106 k = 0; 2107 for (i = 0; i < nb_oargs; i++) { 2108 const char *sep = k ? "," : ""; 2109 col += ne_fprintf(f, "%s%s", sep, 2110 tcg_get_arg_str(s, buf, sizeof(buf), 2111 op->args[k++])); 2112 } 2113 for (i = 0; i < nb_iargs; i++) { 2114 const char *sep = k ? "," : ""; 2115 col += ne_fprintf(f, "%s%s", sep, 2116 tcg_get_arg_str(s, buf, sizeof(buf), 2117 op->args[k++])); 2118 } 2119 switch (c) { 2120 case INDEX_op_brcond_i32: 2121 case INDEX_op_setcond_i32: 2122 case INDEX_op_movcond_i32: 2123 case INDEX_op_brcond2_i32: 2124 case INDEX_op_setcond2_i32: 2125 case INDEX_op_brcond_i64: 2126 case INDEX_op_setcond_i64: 2127 case INDEX_op_movcond_i64: 2128 case INDEX_op_cmp_vec: 2129 case INDEX_op_cmpsel_vec: 2130 if (op->args[k] < ARRAY_SIZE(cond_name) 2131 && cond_name[op->args[k]]) { 2132 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2133 } else { 2134 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2135 } 2136 i = 1; 2137 break; 2138 case INDEX_op_qemu_ld_i32: 2139 case INDEX_op_qemu_st_i32: 2140 case INDEX_op_qemu_st8_i32: 2141 case INDEX_op_qemu_ld_i64: 2142 case INDEX_op_qemu_st_i64: 2143 { 2144 MemOpIdx oi = op->args[k++]; 2145 MemOp op = get_memop(oi); 2146 unsigned ix = get_mmuidx(oi); 2147 2148 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2149 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 2150 } else { 2151 const char *s_al, *s_op; 2152 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2153 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2154 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix); 2155 } 2156 i = 1; 2157 } 2158 break; 2159 case INDEX_op_bswap16_i32: 2160 case INDEX_op_bswap16_i64: 2161 case INDEX_op_bswap32_i32: 2162 case INDEX_op_bswap32_i64: 2163 case INDEX_op_bswap64_i64: 2164 { 2165 TCGArg flags = op->args[k]; 2166 const char *name = NULL; 2167 2168 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2169 name = bswap_flag_name[flags]; 2170 } 2171 if (name) { 2172 col += ne_fprintf(f, ",%s", name); 2173 } else { 2174 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2175 } 2176 i = k = 1; 2177 } 2178 break; 2179 default: 2180 i = 0; 2181 break; 2182 } 2183 switch (c) { 2184 case INDEX_op_set_label: 2185 case INDEX_op_br: 2186 case INDEX_op_brcond_i32: 2187 case INDEX_op_brcond_i64: 2188 case INDEX_op_brcond2_i32: 2189 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2190 arg_label(op->args[k])->id); 2191 i++, k++; 2192 break; 2193 default: 2194 break; 2195 } 2196 for (; i < nb_cargs; i++, k++) { 2197 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 2198 op->args[k]); 2199 } 2200 } 2201 2202 if (have_prefs || op->life) { 2203 for (; col < 40; ++col) { 2204 putc(' ', f); 2205 } 2206 } 2207 2208 if (op->life) { 2209 unsigned life = op->life; 2210 2211 if (life & (SYNC_ARG * 3)) { 2212 ne_fprintf(f, " sync:"); 2213 for (i = 0; i < 2; ++i) { 2214 if (life & (SYNC_ARG << i)) { 2215 ne_fprintf(f, " %d", i); 2216 } 2217 } 2218 } 2219 life /= DEAD_ARG; 2220 if (life) { 2221 ne_fprintf(f, " dead:"); 2222 for (i = 0; life; ++i, life >>= 1) { 2223 if (life & 1) { 2224 ne_fprintf(f, " %d", i); 2225 } 2226 } 2227 } 2228 } 2229 2230 if (have_prefs) { 2231 for (i = 0; i < nb_oargs; ++i) { 2232 TCGRegSet set = output_pref(op, i); 2233 2234 if (i == 0) { 2235 ne_fprintf(f, " pref="); 2236 } else { 2237 ne_fprintf(f, ","); 2238 } 2239 if (set == 0) { 2240 ne_fprintf(f, "none"); 2241 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2242 ne_fprintf(f, "all"); 2243 #ifdef CONFIG_DEBUG_TCG 2244 } else if (tcg_regset_single(set)) { 2245 TCGReg reg = tcg_regset_first(set); 2246 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 2247 #endif 2248 } else if (TCG_TARGET_NB_REGS <= 32) { 2249 ne_fprintf(f, "0x%x", (uint32_t)set); 2250 } else { 2251 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 2252 } 2253 } 2254 } 2255 2256 putc('\n', f); 2257 } 2258 } 2259 2260 /* we give more priority to constraints with less registers */ 2261 static int get_constraint_priority(const TCGOpDef *def, int k) 2262 { 2263 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2264 int n = ctpop64(arg_ct->regs); 2265 2266 /* 2267 * Sort constraints of a single register first, which includes output 2268 * aliases (which must exactly match the input already allocated). 2269 */ 2270 if (n == 1 || arg_ct->oalias) { 2271 return INT_MAX; 2272 } 2273 2274 /* 2275 * Sort register pairs next, first then second immediately after. 2276 * Arbitrarily sort multiple pairs by the index of the first reg; 2277 * there shouldn't be many pairs. 2278 */ 2279 switch (arg_ct->pair) { 2280 case 1: 2281 case 3: 2282 return (k + 1) * 2; 2283 case 2: 2284 return (arg_ct->pair_index + 1) * 2 - 1; 2285 } 2286 2287 /* Finally, sort by decreasing register count. */ 2288 assert(n > 1); 2289 return -n; 2290 } 2291 2292 /* sort from highest priority to lowest */ 2293 static void sort_constraints(TCGOpDef *def, int start, int n) 2294 { 2295 int i, j; 2296 TCGArgConstraint *a = def->args_ct; 2297 2298 for (i = 0; i < n; i++) { 2299 a[start + i].sort_index = start + i; 2300 } 2301 if (n <= 1) { 2302 return; 2303 } 2304 for (i = 0; i < n - 1; i++) { 2305 for (j = i + 1; j < n; j++) { 2306 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2307 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2308 if (p1 < p2) { 2309 int tmp = a[start + i].sort_index; 2310 a[start + i].sort_index = a[start + j].sort_index; 2311 a[start + j].sort_index = tmp; 2312 } 2313 } 2314 } 2315 } 2316 2317 static void process_op_defs(TCGContext *s) 2318 { 2319 TCGOpcode op; 2320 2321 for (op = 0; op < NB_OPS; op++) { 2322 TCGOpDef *def = &tcg_op_defs[op]; 2323 const TCGTargetOpDef *tdefs; 2324 bool saw_alias_pair = false; 2325 int i, o, i2, o2, nb_args; 2326 2327 if (def->flags & TCG_OPF_NOT_PRESENT) { 2328 continue; 2329 } 2330 2331 nb_args = def->nb_iargs + def->nb_oargs; 2332 if (nb_args == 0) { 2333 continue; 2334 } 2335 2336 /* 2337 * Macro magic should make it impossible, but double-check that 2338 * the array index is in range. Since the signness of an enum 2339 * is implementation defined, force the result to unsigned. 2340 */ 2341 unsigned con_set = tcg_target_op_def(op); 2342 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2343 tdefs = &constraint_sets[con_set]; 2344 2345 for (i = 0; i < nb_args; i++) { 2346 const char *ct_str = tdefs->args_ct_str[i]; 2347 bool input_p = i >= def->nb_oargs; 2348 2349 /* Incomplete TCGTargetOpDef entry. */ 2350 tcg_debug_assert(ct_str != NULL); 2351 2352 switch (*ct_str) { 2353 case '0' ... '9': 2354 o = *ct_str - '0'; 2355 tcg_debug_assert(input_p); 2356 tcg_debug_assert(o < def->nb_oargs); 2357 tcg_debug_assert(def->args_ct[o].regs != 0); 2358 tcg_debug_assert(!def->args_ct[o].oalias); 2359 def->args_ct[i] = def->args_ct[o]; 2360 /* The output sets oalias. */ 2361 def->args_ct[o].oalias = 1; 2362 def->args_ct[o].alias_index = i; 2363 /* The input sets ialias. */ 2364 def->args_ct[i].ialias = 1; 2365 def->args_ct[i].alias_index = o; 2366 if (def->args_ct[i].pair) { 2367 saw_alias_pair = true; 2368 } 2369 tcg_debug_assert(ct_str[1] == '\0'); 2370 continue; 2371 2372 case '&': 2373 tcg_debug_assert(!input_p); 2374 def->args_ct[i].newreg = true; 2375 ct_str++; 2376 break; 2377 2378 case 'p': /* plus */ 2379 /* Allocate to the register after the previous. */ 2380 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2381 o = i - 1; 2382 tcg_debug_assert(!def->args_ct[o].pair); 2383 tcg_debug_assert(!def->args_ct[o].ct); 2384 def->args_ct[i] = (TCGArgConstraint){ 2385 .pair = 2, 2386 .pair_index = o, 2387 .regs = def->args_ct[o].regs << 1, 2388 }; 2389 def->args_ct[o].pair = 1; 2390 def->args_ct[o].pair_index = i; 2391 tcg_debug_assert(ct_str[1] == '\0'); 2392 continue; 2393 2394 case 'm': /* minus */ 2395 /* Allocate to the register before the previous. */ 2396 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2397 o = i - 1; 2398 tcg_debug_assert(!def->args_ct[o].pair); 2399 tcg_debug_assert(!def->args_ct[o].ct); 2400 def->args_ct[i] = (TCGArgConstraint){ 2401 .pair = 1, 2402 .pair_index = o, 2403 .regs = def->args_ct[o].regs >> 1, 2404 }; 2405 def->args_ct[o].pair = 2; 2406 def->args_ct[o].pair_index = i; 2407 tcg_debug_assert(ct_str[1] == '\0'); 2408 continue; 2409 } 2410 2411 do { 2412 switch (*ct_str) { 2413 case 'i': 2414 def->args_ct[i].ct |= TCG_CT_CONST; 2415 break; 2416 2417 /* Include all of the target-specific constraints. */ 2418 2419 #undef CONST 2420 #define CONST(CASE, MASK) \ 2421 case CASE: def->args_ct[i].ct |= MASK; break; 2422 #define REGS(CASE, MASK) \ 2423 case CASE: def->args_ct[i].regs |= MASK; break; 2424 2425 #include "tcg-target-con-str.h" 2426 2427 #undef REGS 2428 #undef CONST 2429 default: 2430 case '0' ... '9': 2431 case '&': 2432 case 'p': 2433 case 'm': 2434 /* Typo in TCGTargetOpDef constraint. */ 2435 g_assert_not_reached(); 2436 } 2437 } while (*++ct_str != '\0'); 2438 } 2439 2440 /* TCGTargetOpDef entry with too much information? */ 2441 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2442 2443 /* 2444 * Fix up output pairs that are aliased with inputs. 2445 * When we created the alias, we copied pair from the output. 2446 * There are three cases: 2447 * (1a) Pairs of inputs alias pairs of outputs. 2448 * (1b) One input aliases the first of a pair of outputs. 2449 * (2) One input aliases the second of a pair of outputs. 2450 * 2451 * Case 1a is handled by making sure that the pair_index'es are 2452 * properly updated so that they appear the same as a pair of inputs. 2453 * 2454 * Case 1b is handled by setting the pair_index of the input to 2455 * itself, simply so it doesn't point to an unrelated argument. 2456 * Since we don't encounter the "second" during the input allocation 2457 * phase, nothing happens with the second half of the input pair. 2458 * 2459 * Case 2 is handled by setting the second input to pair=3, the 2460 * first output to pair=3, and the pair_index'es to match. 2461 */ 2462 if (saw_alias_pair) { 2463 for (i = def->nb_oargs; i < nb_args; i++) { 2464 /* 2465 * Since [0-9pm] must be alone in the constraint string, 2466 * the only way they can both be set is if the pair comes 2467 * from the output alias. 2468 */ 2469 if (!def->args_ct[i].ialias) { 2470 continue; 2471 } 2472 switch (def->args_ct[i].pair) { 2473 case 0: 2474 break; 2475 case 1: 2476 o = def->args_ct[i].alias_index; 2477 o2 = def->args_ct[o].pair_index; 2478 tcg_debug_assert(def->args_ct[o].pair == 1); 2479 tcg_debug_assert(def->args_ct[o2].pair == 2); 2480 if (def->args_ct[o2].oalias) { 2481 /* Case 1a */ 2482 i2 = def->args_ct[o2].alias_index; 2483 tcg_debug_assert(def->args_ct[i2].pair == 2); 2484 def->args_ct[i2].pair_index = i; 2485 def->args_ct[i].pair_index = i2; 2486 } else { 2487 /* Case 1b */ 2488 def->args_ct[i].pair_index = i; 2489 } 2490 break; 2491 case 2: 2492 o = def->args_ct[i].alias_index; 2493 o2 = def->args_ct[o].pair_index; 2494 tcg_debug_assert(def->args_ct[o].pair == 2); 2495 tcg_debug_assert(def->args_ct[o2].pair == 1); 2496 if (def->args_ct[o2].oalias) { 2497 /* Case 1a */ 2498 i2 = def->args_ct[o2].alias_index; 2499 tcg_debug_assert(def->args_ct[i2].pair == 1); 2500 def->args_ct[i2].pair_index = i; 2501 def->args_ct[i].pair_index = i2; 2502 } else { 2503 /* Case 2 */ 2504 def->args_ct[i].pair = 3; 2505 def->args_ct[o2].pair = 3; 2506 def->args_ct[i].pair_index = o2; 2507 def->args_ct[o2].pair_index = i; 2508 } 2509 break; 2510 default: 2511 g_assert_not_reached(); 2512 } 2513 } 2514 } 2515 2516 /* sort the constraints (XXX: this is just an heuristic) */ 2517 sort_constraints(def, 0, def->nb_oargs); 2518 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2519 } 2520 } 2521 2522 void tcg_op_remove(TCGContext *s, TCGOp *op) 2523 { 2524 TCGLabel *label; 2525 2526 switch (op->opc) { 2527 case INDEX_op_br: 2528 label = arg_label(op->args[0]); 2529 label->refs--; 2530 break; 2531 case INDEX_op_brcond_i32: 2532 case INDEX_op_brcond_i64: 2533 label = arg_label(op->args[3]); 2534 label->refs--; 2535 break; 2536 case INDEX_op_brcond2_i32: 2537 label = arg_label(op->args[5]); 2538 label->refs--; 2539 break; 2540 default: 2541 break; 2542 } 2543 2544 QTAILQ_REMOVE(&s->ops, op, link); 2545 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2546 s->nb_ops--; 2547 2548 #ifdef CONFIG_PROFILER 2549 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2550 #endif 2551 } 2552 2553 void tcg_remove_ops_after(TCGOp *op) 2554 { 2555 TCGContext *s = tcg_ctx; 2556 2557 while (true) { 2558 TCGOp *last = tcg_last_op(); 2559 if (last == op) { 2560 return; 2561 } 2562 tcg_op_remove(s, last); 2563 } 2564 } 2565 2566 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 2567 { 2568 TCGContext *s = tcg_ctx; 2569 TCGOp *op = NULL; 2570 2571 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 2572 QTAILQ_FOREACH(op, &s->free_ops, link) { 2573 if (nargs <= op->nargs) { 2574 QTAILQ_REMOVE(&s->free_ops, op, link); 2575 nargs = op->nargs; 2576 goto found; 2577 } 2578 } 2579 } 2580 2581 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 2582 nargs = MAX(4, nargs); 2583 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 2584 2585 found: 2586 memset(op, 0, offsetof(TCGOp, link)); 2587 op->opc = opc; 2588 op->nargs = nargs; 2589 2590 /* Check for bitfield overflow. */ 2591 tcg_debug_assert(op->nargs == nargs); 2592 2593 s->nb_ops++; 2594 return op; 2595 } 2596 2597 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 2598 { 2599 TCGOp *op = tcg_op_alloc(opc, nargs); 2600 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2601 return op; 2602 } 2603 2604 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 2605 TCGOpcode opc, unsigned nargs) 2606 { 2607 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2608 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2609 return new_op; 2610 } 2611 2612 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 2613 TCGOpcode opc, unsigned nargs) 2614 { 2615 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2616 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2617 return new_op; 2618 } 2619 2620 /* Reachable analysis : remove unreachable code. */ 2621 static void __attribute__((noinline)) 2622 reachable_code_pass(TCGContext *s) 2623 { 2624 TCGOp *op, *op_next, *op_prev; 2625 bool dead = false; 2626 2627 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2628 bool remove = dead; 2629 TCGLabel *label; 2630 2631 switch (op->opc) { 2632 case INDEX_op_set_label: 2633 label = arg_label(op->args[0]); 2634 2635 /* 2636 * Optimization can fold conditional branches to unconditional. 2637 * If we find a label which is preceded by an unconditional 2638 * branch to next, remove the branch. We couldn't do this when 2639 * processing the branch because any dead code between the branch 2640 * and label had not yet been removed. 2641 */ 2642 op_prev = QTAILQ_PREV(op, link); 2643 if (op_prev->opc == INDEX_op_br && 2644 label == arg_label(op_prev->args[0])) { 2645 tcg_op_remove(s, op_prev); 2646 /* Fall through means insns become live again. */ 2647 dead = false; 2648 } 2649 2650 if (label->refs == 0) { 2651 /* 2652 * While there is an occasional backward branch, virtually 2653 * all branches generated by the translators are forward. 2654 * Which means that generally we will have already removed 2655 * all references to the label that will be, and there is 2656 * little to be gained by iterating. 2657 */ 2658 remove = true; 2659 } else { 2660 /* Once we see a label, insns become live again. */ 2661 dead = false; 2662 remove = false; 2663 } 2664 break; 2665 2666 case INDEX_op_br: 2667 case INDEX_op_exit_tb: 2668 case INDEX_op_goto_ptr: 2669 /* Unconditional branches; everything following is dead. */ 2670 dead = true; 2671 break; 2672 2673 case INDEX_op_call: 2674 /* Notice noreturn helper calls, raising exceptions. */ 2675 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 2676 dead = true; 2677 } 2678 break; 2679 2680 case INDEX_op_insn_start: 2681 /* Never remove -- we need to keep these for unwind. */ 2682 remove = false; 2683 break; 2684 2685 default: 2686 break; 2687 } 2688 2689 if (remove) { 2690 tcg_op_remove(s, op); 2691 } 2692 } 2693 } 2694 2695 #define TS_DEAD 1 2696 #define TS_MEM 2 2697 2698 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2699 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2700 2701 /* For liveness_pass_1, the register preferences for a given temp. */ 2702 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2703 { 2704 return ts->state_ptr; 2705 } 2706 2707 /* For liveness_pass_1, reset the preferences for a given temp to the 2708 * maximal regset for its type. 2709 */ 2710 static inline void la_reset_pref(TCGTemp *ts) 2711 { 2712 *la_temp_pref(ts) 2713 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2714 } 2715 2716 /* liveness analysis: end of function: all temps are dead, and globals 2717 should be in memory. */ 2718 static void la_func_end(TCGContext *s, int ng, int nt) 2719 { 2720 int i; 2721 2722 for (i = 0; i < ng; ++i) { 2723 s->temps[i].state = TS_DEAD | TS_MEM; 2724 la_reset_pref(&s->temps[i]); 2725 } 2726 for (i = ng; i < nt; ++i) { 2727 s->temps[i].state = TS_DEAD; 2728 la_reset_pref(&s->temps[i]); 2729 } 2730 } 2731 2732 /* liveness analysis: end of basic block: all temps are dead, globals 2733 and local temps should be in memory. */ 2734 static void la_bb_end(TCGContext *s, int ng, int nt) 2735 { 2736 int i; 2737 2738 for (i = 0; i < nt; ++i) { 2739 TCGTemp *ts = &s->temps[i]; 2740 int state; 2741 2742 switch (ts->kind) { 2743 case TEMP_FIXED: 2744 case TEMP_GLOBAL: 2745 case TEMP_TB: 2746 state = TS_DEAD | TS_MEM; 2747 break; 2748 case TEMP_EBB: 2749 case TEMP_CONST: 2750 state = TS_DEAD; 2751 break; 2752 default: 2753 g_assert_not_reached(); 2754 } 2755 ts->state = state; 2756 la_reset_pref(ts); 2757 } 2758 } 2759 2760 /* liveness analysis: sync globals back to memory. */ 2761 static void la_global_sync(TCGContext *s, int ng) 2762 { 2763 int i; 2764 2765 for (i = 0; i < ng; ++i) { 2766 int state = s->temps[i].state; 2767 s->temps[i].state = state | TS_MEM; 2768 if (state == TS_DEAD) { 2769 /* If the global was previously dead, reset prefs. */ 2770 la_reset_pref(&s->temps[i]); 2771 } 2772 } 2773 } 2774 2775 /* 2776 * liveness analysis: conditional branch: all temps are dead unless 2777 * explicitly live-across-conditional-branch, globals and local temps 2778 * should be synced. 2779 */ 2780 static void la_bb_sync(TCGContext *s, int ng, int nt) 2781 { 2782 la_global_sync(s, ng); 2783 2784 for (int i = ng; i < nt; ++i) { 2785 TCGTemp *ts = &s->temps[i]; 2786 int state; 2787 2788 switch (ts->kind) { 2789 case TEMP_TB: 2790 state = ts->state; 2791 ts->state = state | TS_MEM; 2792 if (state != TS_DEAD) { 2793 continue; 2794 } 2795 break; 2796 case TEMP_EBB: 2797 case TEMP_CONST: 2798 continue; 2799 default: 2800 g_assert_not_reached(); 2801 } 2802 la_reset_pref(&s->temps[i]); 2803 } 2804 } 2805 2806 /* liveness analysis: sync globals back to memory and kill. */ 2807 static void la_global_kill(TCGContext *s, int ng) 2808 { 2809 int i; 2810 2811 for (i = 0; i < ng; i++) { 2812 s->temps[i].state = TS_DEAD | TS_MEM; 2813 la_reset_pref(&s->temps[i]); 2814 } 2815 } 2816 2817 /* liveness analysis: note live globals crossing calls. */ 2818 static void la_cross_call(TCGContext *s, int nt) 2819 { 2820 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2821 int i; 2822 2823 for (i = 0; i < nt; i++) { 2824 TCGTemp *ts = &s->temps[i]; 2825 if (!(ts->state & TS_DEAD)) { 2826 TCGRegSet *pset = la_temp_pref(ts); 2827 TCGRegSet set = *pset; 2828 2829 set &= mask; 2830 /* If the combination is not possible, restart. */ 2831 if (set == 0) { 2832 set = tcg_target_available_regs[ts->type] & mask; 2833 } 2834 *pset = set; 2835 } 2836 } 2837 } 2838 2839 /* 2840 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 2841 * to TEMP_EBB, if possible. 2842 */ 2843 static void __attribute__((noinline)) 2844 liveness_pass_0(TCGContext *s) 2845 { 2846 void * const multiple_ebb = (void *)(uintptr_t)-1; 2847 int nb_temps = s->nb_temps; 2848 TCGOp *op, *ebb; 2849 2850 for (int i = s->nb_globals; i < nb_temps; ++i) { 2851 s->temps[i].state_ptr = NULL; 2852 } 2853 2854 /* 2855 * Represent each EBB by the op at which it begins. In the case of 2856 * the first EBB, this is the first op, otherwise it is a label. 2857 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 2858 * within a single EBB, else MULTIPLE_EBB. 2859 */ 2860 ebb = QTAILQ_FIRST(&s->ops); 2861 QTAILQ_FOREACH(op, &s->ops, link) { 2862 const TCGOpDef *def; 2863 int nb_oargs, nb_iargs; 2864 2865 switch (op->opc) { 2866 case INDEX_op_set_label: 2867 ebb = op; 2868 continue; 2869 case INDEX_op_discard: 2870 continue; 2871 case INDEX_op_call: 2872 nb_oargs = TCGOP_CALLO(op); 2873 nb_iargs = TCGOP_CALLI(op); 2874 break; 2875 default: 2876 def = &tcg_op_defs[op->opc]; 2877 nb_oargs = def->nb_oargs; 2878 nb_iargs = def->nb_iargs; 2879 break; 2880 } 2881 2882 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 2883 TCGTemp *ts = arg_temp(op->args[i]); 2884 2885 if (ts->kind != TEMP_TB) { 2886 continue; 2887 } 2888 if (ts->state_ptr == NULL) { 2889 ts->state_ptr = ebb; 2890 } else if (ts->state_ptr != ebb) { 2891 ts->state_ptr = multiple_ebb; 2892 } 2893 } 2894 } 2895 2896 /* 2897 * For TEMP_TB that turned out not to be used beyond one EBB, 2898 * reduce the liveness to TEMP_EBB. 2899 */ 2900 for (int i = s->nb_globals; i < nb_temps; ++i) { 2901 TCGTemp *ts = &s->temps[i]; 2902 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 2903 ts->kind = TEMP_EBB; 2904 } 2905 } 2906 } 2907 2908 /* Liveness analysis : update the opc_arg_life array to tell if a 2909 given input arguments is dead. Instructions updating dead 2910 temporaries are removed. */ 2911 static void __attribute__((noinline)) 2912 liveness_pass_1(TCGContext *s) 2913 { 2914 int nb_globals = s->nb_globals; 2915 int nb_temps = s->nb_temps; 2916 TCGOp *op, *op_prev; 2917 TCGRegSet *prefs; 2918 int i; 2919 2920 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2921 for (i = 0; i < nb_temps; ++i) { 2922 s->temps[i].state_ptr = prefs + i; 2923 } 2924 2925 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2926 la_func_end(s, nb_globals, nb_temps); 2927 2928 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2929 int nb_iargs, nb_oargs; 2930 TCGOpcode opc_new, opc_new2; 2931 bool have_opc_new2; 2932 TCGLifeData arg_life = 0; 2933 TCGTemp *ts; 2934 TCGOpcode opc = op->opc; 2935 const TCGOpDef *def = &tcg_op_defs[opc]; 2936 2937 switch (opc) { 2938 case INDEX_op_call: 2939 { 2940 const TCGHelperInfo *info = tcg_call_info(op); 2941 int call_flags = tcg_call_flags(op); 2942 2943 nb_oargs = TCGOP_CALLO(op); 2944 nb_iargs = TCGOP_CALLI(op); 2945 2946 /* pure functions can be removed if their result is unused */ 2947 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2948 for (i = 0; i < nb_oargs; i++) { 2949 ts = arg_temp(op->args[i]); 2950 if (ts->state != TS_DEAD) { 2951 goto do_not_remove_call; 2952 } 2953 } 2954 goto do_remove; 2955 } 2956 do_not_remove_call: 2957 2958 /* Output args are dead. */ 2959 for (i = 0; i < nb_oargs; i++) { 2960 ts = arg_temp(op->args[i]); 2961 if (ts->state & TS_DEAD) { 2962 arg_life |= DEAD_ARG << i; 2963 } 2964 if (ts->state & TS_MEM) { 2965 arg_life |= SYNC_ARG << i; 2966 } 2967 ts->state = TS_DEAD; 2968 la_reset_pref(ts); 2969 } 2970 2971 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 2972 memset(op->output_pref, 0, sizeof(op->output_pref)); 2973 2974 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2975 TCG_CALL_NO_READ_GLOBALS))) { 2976 la_global_kill(s, nb_globals); 2977 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2978 la_global_sync(s, nb_globals); 2979 } 2980 2981 /* Record arguments that die in this helper. */ 2982 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2983 ts = arg_temp(op->args[i]); 2984 if (ts->state & TS_DEAD) { 2985 arg_life |= DEAD_ARG << i; 2986 } 2987 } 2988 2989 /* For all live registers, remove call-clobbered prefs. */ 2990 la_cross_call(s, nb_temps); 2991 2992 /* 2993 * Input arguments are live for preceding opcodes. 2994 * 2995 * For those arguments that die, and will be allocated in 2996 * registers, clear the register set for that arg, to be 2997 * filled in below. For args that will be on the stack, 2998 * reset to any available reg. Process arguments in reverse 2999 * order so that if a temp is used more than once, the stack 3000 * reset to max happens before the register reset to 0. 3001 */ 3002 for (i = nb_iargs - 1; i >= 0; i--) { 3003 const TCGCallArgumentLoc *loc = &info->in[i]; 3004 ts = arg_temp(op->args[nb_oargs + i]); 3005 3006 if (ts->state & TS_DEAD) { 3007 switch (loc->kind) { 3008 case TCG_CALL_ARG_NORMAL: 3009 case TCG_CALL_ARG_EXTEND_U: 3010 case TCG_CALL_ARG_EXTEND_S: 3011 if (REG_P(loc)) { 3012 *la_temp_pref(ts) = 0; 3013 break; 3014 } 3015 /* fall through */ 3016 default: 3017 *la_temp_pref(ts) = 3018 tcg_target_available_regs[ts->type]; 3019 break; 3020 } 3021 ts->state &= ~TS_DEAD; 3022 } 3023 } 3024 3025 /* 3026 * For each input argument, add its input register to prefs. 3027 * If a temp is used once, this produces a single set bit; 3028 * if a temp is used multiple times, this produces a set. 3029 */ 3030 for (i = 0; i < nb_iargs; i++) { 3031 const TCGCallArgumentLoc *loc = &info->in[i]; 3032 ts = arg_temp(op->args[nb_oargs + i]); 3033 3034 switch (loc->kind) { 3035 case TCG_CALL_ARG_NORMAL: 3036 case TCG_CALL_ARG_EXTEND_U: 3037 case TCG_CALL_ARG_EXTEND_S: 3038 if (REG_P(loc)) { 3039 tcg_regset_set_reg(*la_temp_pref(ts), 3040 tcg_target_call_iarg_regs[loc->arg_slot]); 3041 } 3042 break; 3043 default: 3044 break; 3045 } 3046 } 3047 } 3048 break; 3049 case INDEX_op_insn_start: 3050 break; 3051 case INDEX_op_discard: 3052 /* mark the temporary as dead */ 3053 ts = arg_temp(op->args[0]); 3054 ts->state = TS_DEAD; 3055 la_reset_pref(ts); 3056 break; 3057 3058 case INDEX_op_add2_i32: 3059 opc_new = INDEX_op_add_i32; 3060 goto do_addsub2; 3061 case INDEX_op_sub2_i32: 3062 opc_new = INDEX_op_sub_i32; 3063 goto do_addsub2; 3064 case INDEX_op_add2_i64: 3065 opc_new = INDEX_op_add_i64; 3066 goto do_addsub2; 3067 case INDEX_op_sub2_i64: 3068 opc_new = INDEX_op_sub_i64; 3069 do_addsub2: 3070 nb_iargs = 4; 3071 nb_oargs = 2; 3072 /* Test if the high part of the operation is dead, but not 3073 the low part. The result can be optimized to a simple 3074 add or sub. This happens often for x86_64 guest when the 3075 cpu mode is set to 32 bit. */ 3076 if (arg_temp(op->args[1])->state == TS_DEAD) { 3077 if (arg_temp(op->args[0])->state == TS_DEAD) { 3078 goto do_remove; 3079 } 3080 /* Replace the opcode and adjust the args in place, 3081 leaving 3 unused args at the end. */ 3082 op->opc = opc = opc_new; 3083 op->args[1] = op->args[2]; 3084 op->args[2] = op->args[4]; 3085 /* Fall through and mark the single-word operation live. */ 3086 nb_iargs = 2; 3087 nb_oargs = 1; 3088 } 3089 goto do_not_remove; 3090 3091 case INDEX_op_mulu2_i32: 3092 opc_new = INDEX_op_mul_i32; 3093 opc_new2 = INDEX_op_muluh_i32; 3094 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 3095 goto do_mul2; 3096 case INDEX_op_muls2_i32: 3097 opc_new = INDEX_op_mul_i32; 3098 opc_new2 = INDEX_op_mulsh_i32; 3099 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 3100 goto do_mul2; 3101 case INDEX_op_mulu2_i64: 3102 opc_new = INDEX_op_mul_i64; 3103 opc_new2 = INDEX_op_muluh_i64; 3104 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 3105 goto do_mul2; 3106 case INDEX_op_muls2_i64: 3107 opc_new = INDEX_op_mul_i64; 3108 opc_new2 = INDEX_op_mulsh_i64; 3109 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 3110 goto do_mul2; 3111 do_mul2: 3112 nb_iargs = 2; 3113 nb_oargs = 2; 3114 if (arg_temp(op->args[1])->state == TS_DEAD) { 3115 if (arg_temp(op->args[0])->state == TS_DEAD) { 3116 /* Both parts of the operation are dead. */ 3117 goto do_remove; 3118 } 3119 /* The high part of the operation is dead; generate the low. */ 3120 op->opc = opc = opc_new; 3121 op->args[1] = op->args[2]; 3122 op->args[2] = op->args[3]; 3123 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 3124 /* The low part of the operation is dead; generate the high. */ 3125 op->opc = opc = opc_new2; 3126 op->args[0] = op->args[1]; 3127 op->args[1] = op->args[2]; 3128 op->args[2] = op->args[3]; 3129 } else { 3130 goto do_not_remove; 3131 } 3132 /* Mark the single-word operation live. */ 3133 nb_oargs = 1; 3134 goto do_not_remove; 3135 3136 default: 3137 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 3138 nb_iargs = def->nb_iargs; 3139 nb_oargs = def->nb_oargs; 3140 3141 /* Test if the operation can be removed because all 3142 its outputs are dead. We assume that nb_oargs == 0 3143 implies side effects */ 3144 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 3145 for (i = 0; i < nb_oargs; i++) { 3146 if (arg_temp(op->args[i])->state != TS_DEAD) { 3147 goto do_not_remove; 3148 } 3149 } 3150 goto do_remove; 3151 } 3152 goto do_not_remove; 3153 3154 do_remove: 3155 tcg_op_remove(s, op); 3156 break; 3157 3158 do_not_remove: 3159 for (i = 0; i < nb_oargs; i++) { 3160 ts = arg_temp(op->args[i]); 3161 3162 /* Remember the preference of the uses that followed. */ 3163 if (i < ARRAY_SIZE(op->output_pref)) { 3164 op->output_pref[i] = *la_temp_pref(ts); 3165 } 3166 3167 /* Output args are dead. */ 3168 if (ts->state & TS_DEAD) { 3169 arg_life |= DEAD_ARG << i; 3170 } 3171 if (ts->state & TS_MEM) { 3172 arg_life |= SYNC_ARG << i; 3173 } 3174 ts->state = TS_DEAD; 3175 la_reset_pref(ts); 3176 } 3177 3178 /* If end of basic block, update. */ 3179 if (def->flags & TCG_OPF_BB_EXIT) { 3180 la_func_end(s, nb_globals, nb_temps); 3181 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3182 la_bb_sync(s, nb_globals, nb_temps); 3183 } else if (def->flags & TCG_OPF_BB_END) { 3184 la_bb_end(s, nb_globals, nb_temps); 3185 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3186 la_global_sync(s, nb_globals); 3187 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3188 la_cross_call(s, nb_temps); 3189 } 3190 } 3191 3192 /* Record arguments that die in this opcode. */ 3193 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3194 ts = arg_temp(op->args[i]); 3195 if (ts->state & TS_DEAD) { 3196 arg_life |= DEAD_ARG << i; 3197 } 3198 } 3199 3200 /* Input arguments are live for preceding opcodes. */ 3201 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3202 ts = arg_temp(op->args[i]); 3203 if (ts->state & TS_DEAD) { 3204 /* For operands that were dead, initially allow 3205 all regs for the type. */ 3206 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3207 ts->state &= ~TS_DEAD; 3208 } 3209 } 3210 3211 /* Incorporate constraints for this operand. */ 3212 switch (opc) { 3213 case INDEX_op_mov_i32: 3214 case INDEX_op_mov_i64: 3215 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3216 have proper constraints. That said, special case 3217 moves to propagate preferences backward. */ 3218 if (IS_DEAD_ARG(1)) { 3219 *la_temp_pref(arg_temp(op->args[0])) 3220 = *la_temp_pref(arg_temp(op->args[1])); 3221 } 3222 break; 3223 3224 default: 3225 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3226 const TCGArgConstraint *ct = &def->args_ct[i]; 3227 TCGRegSet set, *pset; 3228 3229 ts = arg_temp(op->args[i]); 3230 pset = la_temp_pref(ts); 3231 set = *pset; 3232 3233 set &= ct->regs; 3234 if (ct->ialias) { 3235 set &= output_pref(op, ct->alias_index); 3236 } 3237 /* If the combination is not possible, restart. */ 3238 if (set == 0) { 3239 set = ct->regs; 3240 } 3241 *pset = set; 3242 } 3243 break; 3244 } 3245 break; 3246 } 3247 op->life = arg_life; 3248 } 3249 } 3250 3251 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3252 static bool __attribute__((noinline)) 3253 liveness_pass_2(TCGContext *s) 3254 { 3255 int nb_globals = s->nb_globals; 3256 int nb_temps, i; 3257 bool changes = false; 3258 TCGOp *op, *op_next; 3259 3260 /* Create a temporary for each indirect global. */ 3261 for (i = 0; i < nb_globals; ++i) { 3262 TCGTemp *its = &s->temps[i]; 3263 if (its->indirect_reg) { 3264 TCGTemp *dts = tcg_temp_alloc(s); 3265 dts->type = its->type; 3266 dts->base_type = its->base_type; 3267 dts->temp_subindex = its->temp_subindex; 3268 dts->kind = TEMP_EBB; 3269 its->state_ptr = dts; 3270 } else { 3271 its->state_ptr = NULL; 3272 } 3273 /* All globals begin dead. */ 3274 its->state = TS_DEAD; 3275 } 3276 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3277 TCGTemp *its = &s->temps[i]; 3278 its->state_ptr = NULL; 3279 its->state = TS_DEAD; 3280 } 3281 3282 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3283 TCGOpcode opc = op->opc; 3284 const TCGOpDef *def = &tcg_op_defs[opc]; 3285 TCGLifeData arg_life = op->life; 3286 int nb_iargs, nb_oargs, call_flags; 3287 TCGTemp *arg_ts, *dir_ts; 3288 3289 if (opc == INDEX_op_call) { 3290 nb_oargs = TCGOP_CALLO(op); 3291 nb_iargs = TCGOP_CALLI(op); 3292 call_flags = tcg_call_flags(op); 3293 } else { 3294 nb_iargs = def->nb_iargs; 3295 nb_oargs = def->nb_oargs; 3296 3297 /* Set flags similar to how calls require. */ 3298 if (def->flags & TCG_OPF_COND_BRANCH) { 3299 /* Like reading globals: sync_globals */ 3300 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3301 } else if (def->flags & TCG_OPF_BB_END) { 3302 /* Like writing globals: save_globals */ 3303 call_flags = 0; 3304 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3305 /* Like reading globals: sync_globals */ 3306 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3307 } else { 3308 /* No effect on globals. */ 3309 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3310 TCG_CALL_NO_WRITE_GLOBALS); 3311 } 3312 } 3313 3314 /* Make sure that input arguments are available. */ 3315 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3316 arg_ts = arg_temp(op->args[i]); 3317 dir_ts = arg_ts->state_ptr; 3318 if (dir_ts && arg_ts->state == TS_DEAD) { 3319 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3320 ? INDEX_op_ld_i32 3321 : INDEX_op_ld_i64); 3322 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 3323 3324 lop->args[0] = temp_arg(dir_ts); 3325 lop->args[1] = temp_arg(arg_ts->mem_base); 3326 lop->args[2] = arg_ts->mem_offset; 3327 3328 /* Loaded, but synced with memory. */ 3329 arg_ts->state = TS_MEM; 3330 } 3331 } 3332 3333 /* Perform input replacement, and mark inputs that became dead. 3334 No action is required except keeping temp_state up to date 3335 so that we reload when needed. */ 3336 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3337 arg_ts = arg_temp(op->args[i]); 3338 dir_ts = arg_ts->state_ptr; 3339 if (dir_ts) { 3340 op->args[i] = temp_arg(dir_ts); 3341 changes = true; 3342 if (IS_DEAD_ARG(i)) { 3343 arg_ts->state = TS_DEAD; 3344 } 3345 } 3346 } 3347 3348 /* Liveness analysis should ensure that the following are 3349 all correct, for call sites and basic block end points. */ 3350 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3351 /* Nothing to do */ 3352 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3353 for (i = 0; i < nb_globals; ++i) { 3354 /* Liveness should see that globals are synced back, 3355 that is, either TS_DEAD or TS_MEM. */ 3356 arg_ts = &s->temps[i]; 3357 tcg_debug_assert(arg_ts->state_ptr == 0 3358 || arg_ts->state != 0); 3359 } 3360 } else { 3361 for (i = 0; i < nb_globals; ++i) { 3362 /* Liveness should see that globals are saved back, 3363 that is, TS_DEAD, waiting to be reloaded. */ 3364 arg_ts = &s->temps[i]; 3365 tcg_debug_assert(arg_ts->state_ptr == 0 3366 || arg_ts->state == TS_DEAD); 3367 } 3368 } 3369 3370 /* Outputs become available. */ 3371 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3372 arg_ts = arg_temp(op->args[0]); 3373 dir_ts = arg_ts->state_ptr; 3374 if (dir_ts) { 3375 op->args[0] = temp_arg(dir_ts); 3376 changes = true; 3377 3378 /* The output is now live and modified. */ 3379 arg_ts->state = 0; 3380 3381 if (NEED_SYNC_ARG(0)) { 3382 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3383 ? INDEX_op_st_i32 3384 : INDEX_op_st_i64); 3385 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3386 TCGTemp *out_ts = dir_ts; 3387 3388 if (IS_DEAD_ARG(0)) { 3389 out_ts = arg_temp(op->args[1]); 3390 arg_ts->state = TS_DEAD; 3391 tcg_op_remove(s, op); 3392 } else { 3393 arg_ts->state = TS_MEM; 3394 } 3395 3396 sop->args[0] = temp_arg(out_ts); 3397 sop->args[1] = temp_arg(arg_ts->mem_base); 3398 sop->args[2] = arg_ts->mem_offset; 3399 } else { 3400 tcg_debug_assert(!IS_DEAD_ARG(0)); 3401 } 3402 } 3403 } else { 3404 for (i = 0; i < nb_oargs; i++) { 3405 arg_ts = arg_temp(op->args[i]); 3406 dir_ts = arg_ts->state_ptr; 3407 if (!dir_ts) { 3408 continue; 3409 } 3410 op->args[i] = temp_arg(dir_ts); 3411 changes = true; 3412 3413 /* The output is now live and modified. */ 3414 arg_ts->state = 0; 3415 3416 /* Sync outputs upon their last write. */ 3417 if (NEED_SYNC_ARG(i)) { 3418 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3419 ? INDEX_op_st_i32 3420 : INDEX_op_st_i64); 3421 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3422 3423 sop->args[0] = temp_arg(dir_ts); 3424 sop->args[1] = temp_arg(arg_ts->mem_base); 3425 sop->args[2] = arg_ts->mem_offset; 3426 3427 arg_ts->state = TS_MEM; 3428 } 3429 /* Drop outputs that are dead. */ 3430 if (IS_DEAD_ARG(i)) { 3431 arg_ts->state = TS_DEAD; 3432 } 3433 } 3434 } 3435 } 3436 3437 return changes; 3438 } 3439 3440 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3441 { 3442 intptr_t off; 3443 int size, align; 3444 3445 /* When allocating an object, look at the full type. */ 3446 size = tcg_type_size(ts->base_type); 3447 switch (ts->base_type) { 3448 case TCG_TYPE_I32: 3449 align = 4; 3450 break; 3451 case TCG_TYPE_I64: 3452 case TCG_TYPE_V64: 3453 align = 8; 3454 break; 3455 case TCG_TYPE_I128: 3456 case TCG_TYPE_V128: 3457 case TCG_TYPE_V256: 3458 /* 3459 * Note that we do not require aligned storage for V256, 3460 * and that we provide alignment for I128 to match V128, 3461 * even if that's above what the host ABI requires. 3462 */ 3463 align = 16; 3464 break; 3465 default: 3466 g_assert_not_reached(); 3467 } 3468 3469 /* 3470 * Assume the stack is sufficiently aligned. 3471 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3472 * and do not require 16 byte vector alignment. This seems slightly 3473 * easier than fully parameterizing the above switch statement. 3474 */ 3475 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3476 off = ROUND_UP(s->current_frame_offset, align); 3477 3478 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3479 if (off + size > s->frame_end) { 3480 tcg_raise_tb_overflow(s); 3481 } 3482 s->current_frame_offset = off + size; 3483 #if defined(__sparc__) 3484 off += TCG_TARGET_STACK_BIAS; 3485 #endif 3486 3487 /* If the object was subdivided, assign memory to all the parts. */ 3488 if (ts->base_type != ts->type) { 3489 int part_size = tcg_type_size(ts->type); 3490 int part_count = size / part_size; 3491 3492 /* 3493 * Each part is allocated sequentially in tcg_temp_new_internal. 3494 * Jump back to the first part by subtracting the current index. 3495 */ 3496 ts -= ts->temp_subindex; 3497 for (int i = 0; i < part_count; ++i) { 3498 ts[i].mem_offset = off + i * part_size; 3499 ts[i].mem_base = s->frame_temp; 3500 ts[i].mem_allocated = 1; 3501 } 3502 } else { 3503 ts->mem_offset = off; 3504 ts->mem_base = s->frame_temp; 3505 ts->mem_allocated = 1; 3506 } 3507 } 3508 3509 /* Assign @reg to @ts, and update reg_to_temp[]. */ 3510 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 3511 { 3512 if (ts->val_type == TEMP_VAL_REG) { 3513 TCGReg old = ts->reg; 3514 tcg_debug_assert(s->reg_to_temp[old] == ts); 3515 if (old == reg) { 3516 return; 3517 } 3518 s->reg_to_temp[old] = NULL; 3519 } 3520 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3521 s->reg_to_temp[reg] = ts; 3522 ts->val_type = TEMP_VAL_REG; 3523 ts->reg = reg; 3524 } 3525 3526 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 3527 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 3528 { 3529 tcg_debug_assert(type != TEMP_VAL_REG); 3530 if (ts->val_type == TEMP_VAL_REG) { 3531 TCGReg reg = ts->reg; 3532 tcg_debug_assert(s->reg_to_temp[reg] == ts); 3533 s->reg_to_temp[reg] = NULL; 3534 } 3535 ts->val_type = type; 3536 } 3537 3538 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3539 3540 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3541 mark it free; otherwise mark it dead. */ 3542 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3543 { 3544 TCGTempVal new_type; 3545 3546 switch (ts->kind) { 3547 case TEMP_FIXED: 3548 return; 3549 case TEMP_GLOBAL: 3550 case TEMP_TB: 3551 new_type = TEMP_VAL_MEM; 3552 break; 3553 case TEMP_EBB: 3554 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3555 break; 3556 case TEMP_CONST: 3557 new_type = TEMP_VAL_CONST; 3558 break; 3559 default: 3560 g_assert_not_reached(); 3561 } 3562 set_temp_val_nonreg(s, ts, new_type); 3563 } 3564 3565 /* Mark a temporary as dead. */ 3566 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3567 { 3568 temp_free_or_dead(s, ts, 1); 3569 } 3570 3571 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3572 registers needs to be allocated to store a constant. If 'free_or_dead' 3573 is non-zero, subsequently release the temporary; if it is positive, the 3574 temp is dead; if it is negative, the temp is free. */ 3575 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3576 TCGRegSet preferred_regs, int free_or_dead) 3577 { 3578 if (!temp_readonly(ts) && !ts->mem_coherent) { 3579 if (!ts->mem_allocated) { 3580 temp_allocate_frame(s, ts); 3581 } 3582 switch (ts->val_type) { 3583 case TEMP_VAL_CONST: 3584 /* If we're going to free the temp immediately, then we won't 3585 require it later in a register, so attempt to store the 3586 constant to memory directly. */ 3587 if (free_or_dead 3588 && tcg_out_sti(s, ts->type, ts->val, 3589 ts->mem_base->reg, ts->mem_offset)) { 3590 break; 3591 } 3592 temp_load(s, ts, tcg_target_available_regs[ts->type], 3593 allocated_regs, preferred_regs); 3594 /* fallthrough */ 3595 3596 case TEMP_VAL_REG: 3597 tcg_out_st(s, ts->type, ts->reg, 3598 ts->mem_base->reg, ts->mem_offset); 3599 break; 3600 3601 case TEMP_VAL_MEM: 3602 break; 3603 3604 case TEMP_VAL_DEAD: 3605 default: 3606 tcg_abort(); 3607 } 3608 ts->mem_coherent = 1; 3609 } 3610 if (free_or_dead) { 3611 temp_free_or_dead(s, ts, free_or_dead); 3612 } 3613 } 3614 3615 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3616 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3617 { 3618 TCGTemp *ts = s->reg_to_temp[reg]; 3619 if (ts != NULL) { 3620 temp_sync(s, ts, allocated_regs, 0, -1); 3621 } 3622 } 3623 3624 /** 3625 * tcg_reg_alloc: 3626 * @required_regs: Set of registers in which we must allocate. 3627 * @allocated_regs: Set of registers which must be avoided. 3628 * @preferred_regs: Set of registers we should prefer. 3629 * @rev: True if we search the registers in "indirect" order. 3630 * 3631 * The allocated register must be in @required_regs & ~@allocated_regs, 3632 * but if we can put it in @preferred_regs we may save a move later. 3633 */ 3634 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3635 TCGRegSet allocated_regs, 3636 TCGRegSet preferred_regs, bool rev) 3637 { 3638 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3639 TCGRegSet reg_ct[2]; 3640 const int *order; 3641 3642 reg_ct[1] = required_regs & ~allocated_regs; 3643 tcg_debug_assert(reg_ct[1] != 0); 3644 reg_ct[0] = reg_ct[1] & preferred_regs; 3645 3646 /* Skip the preferred_regs option if it cannot be satisfied, 3647 or if the preference made no difference. */ 3648 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3649 3650 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3651 3652 /* Try free registers, preferences first. */ 3653 for (j = f; j < 2; j++) { 3654 TCGRegSet set = reg_ct[j]; 3655 3656 if (tcg_regset_single(set)) { 3657 /* One register in the set. */ 3658 TCGReg reg = tcg_regset_first(set); 3659 if (s->reg_to_temp[reg] == NULL) { 3660 return reg; 3661 } 3662 } else { 3663 for (i = 0; i < n; i++) { 3664 TCGReg reg = order[i]; 3665 if (s->reg_to_temp[reg] == NULL && 3666 tcg_regset_test_reg(set, reg)) { 3667 return reg; 3668 } 3669 } 3670 } 3671 } 3672 3673 /* We must spill something. */ 3674 for (j = f; j < 2; j++) { 3675 TCGRegSet set = reg_ct[j]; 3676 3677 if (tcg_regset_single(set)) { 3678 /* One register in the set. */ 3679 TCGReg reg = tcg_regset_first(set); 3680 tcg_reg_free(s, reg, allocated_regs); 3681 return reg; 3682 } else { 3683 for (i = 0; i < n; i++) { 3684 TCGReg reg = order[i]; 3685 if (tcg_regset_test_reg(set, reg)) { 3686 tcg_reg_free(s, reg, allocated_regs); 3687 return reg; 3688 } 3689 } 3690 } 3691 } 3692 3693 tcg_abort(); 3694 } 3695 3696 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 3697 TCGRegSet allocated_regs, 3698 TCGRegSet preferred_regs, bool rev) 3699 { 3700 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3701 TCGRegSet reg_ct[2]; 3702 const int *order; 3703 3704 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 3705 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 3706 tcg_debug_assert(reg_ct[1] != 0); 3707 reg_ct[0] = reg_ct[1] & preferred_regs; 3708 3709 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3710 3711 /* 3712 * Skip the preferred_regs option if it cannot be satisfied, 3713 * or if the preference made no difference. 3714 */ 3715 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3716 3717 /* 3718 * Minimize the number of flushes by looking for 2 free registers first, 3719 * then a single flush, then two flushes. 3720 */ 3721 for (fmin = 2; fmin >= 0; fmin--) { 3722 for (j = k; j < 2; j++) { 3723 TCGRegSet set = reg_ct[j]; 3724 3725 for (i = 0; i < n; i++) { 3726 TCGReg reg = order[i]; 3727 3728 if (tcg_regset_test_reg(set, reg)) { 3729 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 3730 if (f >= fmin) { 3731 tcg_reg_free(s, reg, allocated_regs); 3732 tcg_reg_free(s, reg + 1, allocated_regs); 3733 return reg; 3734 } 3735 } 3736 } 3737 } 3738 } 3739 tcg_abort(); 3740 } 3741 3742 /* Make sure the temporary is in a register. If needed, allocate the register 3743 from DESIRED while avoiding ALLOCATED. */ 3744 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3745 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3746 { 3747 TCGReg reg; 3748 3749 switch (ts->val_type) { 3750 case TEMP_VAL_REG: 3751 return; 3752 case TEMP_VAL_CONST: 3753 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3754 preferred_regs, ts->indirect_base); 3755 if (ts->type <= TCG_TYPE_I64) { 3756 tcg_out_movi(s, ts->type, reg, ts->val); 3757 } else { 3758 uint64_t val = ts->val; 3759 MemOp vece = MO_64; 3760 3761 /* 3762 * Find the minimal vector element that matches the constant. 3763 * The targets will, in general, have to do this search anyway, 3764 * do this generically. 3765 */ 3766 if (val == dup_const(MO_8, val)) { 3767 vece = MO_8; 3768 } else if (val == dup_const(MO_16, val)) { 3769 vece = MO_16; 3770 } else if (val == dup_const(MO_32, val)) { 3771 vece = MO_32; 3772 } 3773 3774 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3775 } 3776 ts->mem_coherent = 0; 3777 break; 3778 case TEMP_VAL_MEM: 3779 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3780 preferred_regs, ts->indirect_base); 3781 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3782 ts->mem_coherent = 1; 3783 break; 3784 case TEMP_VAL_DEAD: 3785 default: 3786 tcg_abort(); 3787 } 3788 set_temp_val_reg(s, ts, reg); 3789 } 3790 3791 /* Save a temporary to memory. 'allocated_regs' is used in case a 3792 temporary registers needs to be allocated to store a constant. */ 3793 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3794 { 3795 /* The liveness analysis already ensures that globals are back 3796 in memory. Keep an tcg_debug_assert for safety. */ 3797 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3798 } 3799 3800 /* save globals to their canonical location and assume they can be 3801 modified be the following code. 'allocated_regs' is used in case a 3802 temporary registers needs to be allocated to store a constant. */ 3803 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3804 { 3805 int i, n; 3806 3807 for (i = 0, n = s->nb_globals; i < n; i++) { 3808 temp_save(s, &s->temps[i], allocated_regs); 3809 } 3810 } 3811 3812 /* sync globals to their canonical location and assume they can be 3813 read by the following code. 'allocated_regs' is used in case a 3814 temporary registers needs to be allocated to store a constant. */ 3815 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3816 { 3817 int i, n; 3818 3819 for (i = 0, n = s->nb_globals; i < n; i++) { 3820 TCGTemp *ts = &s->temps[i]; 3821 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3822 || ts->kind == TEMP_FIXED 3823 || ts->mem_coherent); 3824 } 3825 } 3826 3827 /* at the end of a basic block, we assume all temporaries are dead and 3828 all globals are stored at their canonical location. */ 3829 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3830 { 3831 int i; 3832 3833 for (i = s->nb_globals; i < s->nb_temps; i++) { 3834 TCGTemp *ts = &s->temps[i]; 3835 3836 switch (ts->kind) { 3837 case TEMP_TB: 3838 temp_save(s, ts, allocated_regs); 3839 break; 3840 case TEMP_EBB: 3841 /* The liveness analysis already ensures that temps are dead. 3842 Keep an tcg_debug_assert for safety. */ 3843 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3844 break; 3845 case TEMP_CONST: 3846 /* Similarly, we should have freed any allocated register. */ 3847 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3848 break; 3849 default: 3850 g_assert_not_reached(); 3851 } 3852 } 3853 3854 save_globals(s, allocated_regs); 3855 } 3856 3857 /* 3858 * At a conditional branch, we assume all temporaries are dead unless 3859 * explicitly live-across-conditional-branch; all globals and local 3860 * temps are synced to their location. 3861 */ 3862 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3863 { 3864 sync_globals(s, allocated_regs); 3865 3866 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3867 TCGTemp *ts = &s->temps[i]; 3868 /* 3869 * The liveness analysis already ensures that temps are dead. 3870 * Keep tcg_debug_asserts for safety. 3871 */ 3872 switch (ts->kind) { 3873 case TEMP_TB: 3874 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3875 break; 3876 case TEMP_EBB: 3877 case TEMP_CONST: 3878 break; 3879 default: 3880 g_assert_not_reached(); 3881 } 3882 } 3883 } 3884 3885 /* 3886 * Specialized code generation for INDEX_op_mov_* with a constant. 3887 */ 3888 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3889 tcg_target_ulong val, TCGLifeData arg_life, 3890 TCGRegSet preferred_regs) 3891 { 3892 /* ENV should not be modified. */ 3893 tcg_debug_assert(!temp_readonly(ots)); 3894 3895 /* The movi is not explicitly generated here. */ 3896 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 3897 ots->val = val; 3898 ots->mem_coherent = 0; 3899 if (NEED_SYNC_ARG(0)) { 3900 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3901 } else if (IS_DEAD_ARG(0)) { 3902 temp_dead(s, ots); 3903 } 3904 } 3905 3906 /* 3907 * Specialized code generation for INDEX_op_mov_*. 3908 */ 3909 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3910 { 3911 const TCGLifeData arg_life = op->life; 3912 TCGRegSet allocated_regs, preferred_regs; 3913 TCGTemp *ts, *ots; 3914 TCGType otype, itype; 3915 TCGReg oreg, ireg; 3916 3917 allocated_regs = s->reserved_regs; 3918 preferred_regs = output_pref(op, 0); 3919 ots = arg_temp(op->args[0]); 3920 ts = arg_temp(op->args[1]); 3921 3922 /* ENV should not be modified. */ 3923 tcg_debug_assert(!temp_readonly(ots)); 3924 3925 /* Note that otype != itype for no-op truncation. */ 3926 otype = ots->type; 3927 itype = ts->type; 3928 3929 if (ts->val_type == TEMP_VAL_CONST) { 3930 /* propagate constant or generate sti */ 3931 tcg_target_ulong val = ts->val; 3932 if (IS_DEAD_ARG(1)) { 3933 temp_dead(s, ts); 3934 } 3935 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3936 return; 3937 } 3938 3939 /* If the source value is in memory we're going to be forced 3940 to have it in a register in order to perform the copy. Copy 3941 the SOURCE value into its own register first, that way we 3942 don't have to reload SOURCE the next time it is used. */ 3943 if (ts->val_type == TEMP_VAL_MEM) { 3944 temp_load(s, ts, tcg_target_available_regs[itype], 3945 allocated_regs, preferred_regs); 3946 } 3947 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3948 ireg = ts->reg; 3949 3950 if (IS_DEAD_ARG(0)) { 3951 /* mov to a non-saved dead register makes no sense (even with 3952 liveness analysis disabled). */ 3953 tcg_debug_assert(NEED_SYNC_ARG(0)); 3954 if (!ots->mem_allocated) { 3955 temp_allocate_frame(s, ots); 3956 } 3957 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 3958 if (IS_DEAD_ARG(1)) { 3959 temp_dead(s, ts); 3960 } 3961 temp_dead(s, ots); 3962 return; 3963 } 3964 3965 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 3966 /* 3967 * The mov can be suppressed. Kill input first, so that it 3968 * is unlinked from reg_to_temp, then set the output to the 3969 * reg that we saved from the input. 3970 */ 3971 temp_dead(s, ts); 3972 oreg = ireg; 3973 } else { 3974 if (ots->val_type == TEMP_VAL_REG) { 3975 oreg = ots->reg; 3976 } else { 3977 /* Make sure to not spill the input register during allocation. */ 3978 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3979 allocated_regs | ((TCGRegSet)1 << ireg), 3980 preferred_regs, ots->indirect_base); 3981 } 3982 if (!tcg_out_mov(s, otype, oreg, ireg)) { 3983 /* 3984 * Cross register class move not supported. 3985 * Store the source register into the destination slot 3986 * and leave the destination temp as TEMP_VAL_MEM. 3987 */ 3988 assert(!temp_readonly(ots)); 3989 if (!ts->mem_allocated) { 3990 temp_allocate_frame(s, ots); 3991 } 3992 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 3993 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 3994 ots->mem_coherent = 1; 3995 return; 3996 } 3997 } 3998 set_temp_val_reg(s, ots, oreg); 3999 ots->mem_coherent = 0; 4000 4001 if (NEED_SYNC_ARG(0)) { 4002 temp_sync(s, ots, allocated_regs, 0, 0); 4003 } 4004 } 4005 4006 /* 4007 * Specialized code generation for INDEX_op_dup_vec. 4008 */ 4009 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4010 { 4011 const TCGLifeData arg_life = op->life; 4012 TCGRegSet dup_out_regs, dup_in_regs; 4013 TCGTemp *its, *ots; 4014 TCGType itype, vtype; 4015 unsigned vece; 4016 int lowpart_ofs; 4017 bool ok; 4018 4019 ots = arg_temp(op->args[0]); 4020 its = arg_temp(op->args[1]); 4021 4022 /* ENV should not be modified. */ 4023 tcg_debug_assert(!temp_readonly(ots)); 4024 4025 itype = its->type; 4026 vece = TCGOP_VECE(op); 4027 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4028 4029 if (its->val_type == TEMP_VAL_CONST) { 4030 /* Propagate constant via movi -> dupi. */ 4031 tcg_target_ulong val = its->val; 4032 if (IS_DEAD_ARG(1)) { 4033 temp_dead(s, its); 4034 } 4035 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4036 return; 4037 } 4038 4039 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4040 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 4041 4042 /* Allocate the output register now. */ 4043 if (ots->val_type != TEMP_VAL_REG) { 4044 TCGRegSet allocated_regs = s->reserved_regs; 4045 TCGReg oreg; 4046 4047 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4048 /* Make sure to not spill the input register. */ 4049 tcg_regset_set_reg(allocated_regs, its->reg); 4050 } 4051 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4052 output_pref(op, 0), ots->indirect_base); 4053 set_temp_val_reg(s, ots, oreg); 4054 } 4055 4056 switch (its->val_type) { 4057 case TEMP_VAL_REG: 4058 /* 4059 * The dup constriaints must be broad, covering all possible VECE. 4060 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4061 * to fail, indicating that extra moves are required for that case. 4062 */ 4063 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4064 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4065 goto done; 4066 } 4067 /* Try again from memory or a vector input register. */ 4068 } 4069 if (!its->mem_coherent) { 4070 /* 4071 * The input register is not synced, and so an extra store 4072 * would be required to use memory. Attempt an integer-vector 4073 * register move first. We do not have a TCGRegSet for this. 4074 */ 4075 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4076 break; 4077 } 4078 /* Sync the temp back to its slot and load from there. */ 4079 temp_sync(s, its, s->reserved_regs, 0, 0); 4080 } 4081 /* fall through */ 4082 4083 case TEMP_VAL_MEM: 4084 lowpart_ofs = 0; 4085 if (HOST_BIG_ENDIAN) { 4086 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 4087 } 4088 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 4089 its->mem_offset + lowpart_ofs)) { 4090 goto done; 4091 } 4092 /* Load the input into the destination vector register. */ 4093 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 4094 break; 4095 4096 default: 4097 g_assert_not_reached(); 4098 } 4099 4100 /* We now have a vector input register, so dup must succeed. */ 4101 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 4102 tcg_debug_assert(ok); 4103 4104 done: 4105 ots->mem_coherent = 0; 4106 if (IS_DEAD_ARG(1)) { 4107 temp_dead(s, its); 4108 } 4109 if (NEED_SYNC_ARG(0)) { 4110 temp_sync(s, ots, s->reserved_regs, 0, 0); 4111 } 4112 if (IS_DEAD_ARG(0)) { 4113 temp_dead(s, ots); 4114 } 4115 } 4116 4117 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 4118 { 4119 const TCGLifeData arg_life = op->life; 4120 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 4121 TCGRegSet i_allocated_regs; 4122 TCGRegSet o_allocated_regs; 4123 int i, k, nb_iargs, nb_oargs; 4124 TCGReg reg; 4125 TCGArg arg; 4126 const TCGArgConstraint *arg_ct; 4127 TCGTemp *ts; 4128 TCGArg new_args[TCG_MAX_OP_ARGS]; 4129 int const_args[TCG_MAX_OP_ARGS]; 4130 4131 nb_oargs = def->nb_oargs; 4132 nb_iargs = def->nb_iargs; 4133 4134 /* copy constants */ 4135 memcpy(new_args + nb_oargs + nb_iargs, 4136 op->args + nb_oargs + nb_iargs, 4137 sizeof(TCGArg) * def->nb_cargs); 4138 4139 i_allocated_regs = s->reserved_regs; 4140 o_allocated_regs = s->reserved_regs; 4141 4142 /* satisfy input constraints */ 4143 for (k = 0; k < nb_iargs; k++) { 4144 TCGRegSet i_preferred_regs, i_required_regs; 4145 bool allocate_new_reg, copyto_new_reg; 4146 TCGTemp *ts2; 4147 int i1, i2; 4148 4149 i = def->args_ct[nb_oargs + k].sort_index; 4150 arg = op->args[i]; 4151 arg_ct = &def->args_ct[i]; 4152 ts = arg_temp(arg); 4153 4154 if (ts->val_type == TEMP_VAL_CONST 4155 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 4156 /* constant is OK for instruction */ 4157 const_args[i] = 1; 4158 new_args[i] = ts->val; 4159 continue; 4160 } 4161 4162 reg = ts->reg; 4163 i_preferred_regs = 0; 4164 i_required_regs = arg_ct->regs; 4165 allocate_new_reg = false; 4166 copyto_new_reg = false; 4167 4168 switch (arg_ct->pair) { 4169 case 0: /* not paired */ 4170 if (arg_ct->ialias) { 4171 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4172 4173 /* 4174 * If the input is readonly, then it cannot also be an 4175 * output and aliased to itself. If the input is not 4176 * dead after the instruction, we must allocate a new 4177 * register and move it. 4178 */ 4179 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 4180 allocate_new_reg = true; 4181 } else if (ts->val_type == TEMP_VAL_REG) { 4182 /* 4183 * Check if the current register has already been 4184 * allocated for another input. 4185 */ 4186 allocate_new_reg = 4187 tcg_regset_test_reg(i_allocated_regs, reg); 4188 } 4189 } 4190 if (!allocate_new_reg) { 4191 temp_load(s, ts, i_required_regs, i_allocated_regs, 4192 i_preferred_regs); 4193 reg = ts->reg; 4194 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 4195 } 4196 if (allocate_new_reg) { 4197 /* 4198 * Allocate a new register matching the constraint 4199 * and move the temporary register into it. 4200 */ 4201 temp_load(s, ts, tcg_target_available_regs[ts->type], 4202 i_allocated_regs, 0); 4203 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 4204 i_preferred_regs, ts->indirect_base); 4205 copyto_new_reg = true; 4206 } 4207 break; 4208 4209 case 1: 4210 /* First of an input pair; if i1 == i2, the second is an output. */ 4211 i1 = i; 4212 i2 = arg_ct->pair_index; 4213 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 4214 4215 /* 4216 * It is easier to default to allocating a new pair 4217 * and to identify a few cases where it's not required. 4218 */ 4219 if (arg_ct->ialias) { 4220 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4221 if (IS_DEAD_ARG(i1) && 4222 IS_DEAD_ARG(i2) && 4223 !temp_readonly(ts) && 4224 ts->val_type == TEMP_VAL_REG && 4225 ts->reg < TCG_TARGET_NB_REGS - 1 && 4226 tcg_regset_test_reg(i_required_regs, reg) && 4227 !tcg_regset_test_reg(i_allocated_regs, reg) && 4228 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 4229 (ts2 4230 ? ts2->val_type == TEMP_VAL_REG && 4231 ts2->reg == reg + 1 && 4232 !temp_readonly(ts2) 4233 : s->reg_to_temp[reg + 1] == NULL)) { 4234 break; 4235 } 4236 } else { 4237 /* Without aliasing, the pair must also be an input. */ 4238 tcg_debug_assert(ts2); 4239 if (ts->val_type == TEMP_VAL_REG && 4240 ts2->val_type == TEMP_VAL_REG && 4241 ts2->reg == reg + 1 && 4242 tcg_regset_test_reg(i_required_regs, reg)) { 4243 break; 4244 } 4245 } 4246 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 4247 0, ts->indirect_base); 4248 goto do_pair; 4249 4250 case 2: /* pair second */ 4251 reg = new_args[arg_ct->pair_index] + 1; 4252 goto do_pair; 4253 4254 case 3: /* ialias with second output, no first input */ 4255 tcg_debug_assert(arg_ct->ialias); 4256 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4257 4258 if (IS_DEAD_ARG(i) && 4259 !temp_readonly(ts) && 4260 ts->val_type == TEMP_VAL_REG && 4261 reg > 0 && 4262 s->reg_to_temp[reg - 1] == NULL && 4263 tcg_regset_test_reg(i_required_regs, reg) && 4264 !tcg_regset_test_reg(i_allocated_regs, reg) && 4265 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 4266 tcg_regset_set_reg(i_allocated_regs, reg - 1); 4267 break; 4268 } 4269 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 4270 i_allocated_regs, 0, 4271 ts->indirect_base); 4272 tcg_regset_set_reg(i_allocated_regs, reg); 4273 reg += 1; 4274 goto do_pair; 4275 4276 do_pair: 4277 /* 4278 * If an aliased input is not dead after the instruction, 4279 * we must allocate a new register and move it. 4280 */ 4281 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 4282 TCGRegSet t_allocated_regs = i_allocated_regs; 4283 4284 /* 4285 * Because of the alias, and the continued life, make sure 4286 * that the temp is somewhere *other* than the reg pair, 4287 * and we get a copy in reg. 4288 */ 4289 tcg_regset_set_reg(t_allocated_regs, reg); 4290 tcg_regset_set_reg(t_allocated_regs, reg + 1); 4291 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 4292 /* If ts was already in reg, copy it somewhere else. */ 4293 TCGReg nr; 4294 bool ok; 4295 4296 tcg_debug_assert(ts->kind != TEMP_FIXED); 4297 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 4298 t_allocated_regs, 0, ts->indirect_base); 4299 ok = tcg_out_mov(s, ts->type, nr, reg); 4300 tcg_debug_assert(ok); 4301 4302 set_temp_val_reg(s, ts, nr); 4303 } else { 4304 temp_load(s, ts, tcg_target_available_regs[ts->type], 4305 t_allocated_regs, 0); 4306 copyto_new_reg = true; 4307 } 4308 } else { 4309 /* Preferably allocate to reg, otherwise copy. */ 4310 i_required_regs = (TCGRegSet)1 << reg; 4311 temp_load(s, ts, i_required_regs, i_allocated_regs, 4312 i_preferred_regs); 4313 copyto_new_reg = ts->reg != reg; 4314 } 4315 break; 4316 4317 default: 4318 g_assert_not_reached(); 4319 } 4320 4321 if (copyto_new_reg) { 4322 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4323 /* 4324 * Cross register class move not supported. Sync the 4325 * temp back to its slot and load from there. 4326 */ 4327 temp_sync(s, ts, i_allocated_regs, 0, 0); 4328 tcg_out_ld(s, ts->type, reg, 4329 ts->mem_base->reg, ts->mem_offset); 4330 } 4331 } 4332 new_args[i] = reg; 4333 const_args[i] = 0; 4334 tcg_regset_set_reg(i_allocated_regs, reg); 4335 } 4336 4337 /* mark dead temporaries and free the associated registers */ 4338 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4339 if (IS_DEAD_ARG(i)) { 4340 temp_dead(s, arg_temp(op->args[i])); 4341 } 4342 } 4343 4344 if (def->flags & TCG_OPF_COND_BRANCH) { 4345 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4346 } else if (def->flags & TCG_OPF_BB_END) { 4347 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4348 } else { 4349 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4350 /* XXX: permit generic clobber register list ? */ 4351 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4352 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4353 tcg_reg_free(s, i, i_allocated_regs); 4354 } 4355 } 4356 } 4357 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4358 /* sync globals if the op has side effects and might trigger 4359 an exception. */ 4360 sync_globals(s, i_allocated_regs); 4361 } 4362 4363 /* satisfy the output constraints */ 4364 for(k = 0; k < nb_oargs; k++) { 4365 i = def->args_ct[k].sort_index; 4366 arg = op->args[i]; 4367 arg_ct = &def->args_ct[i]; 4368 ts = arg_temp(arg); 4369 4370 /* ENV should not be modified. */ 4371 tcg_debug_assert(!temp_readonly(ts)); 4372 4373 switch (arg_ct->pair) { 4374 case 0: /* not paired */ 4375 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4376 reg = new_args[arg_ct->alias_index]; 4377 } else if (arg_ct->newreg) { 4378 reg = tcg_reg_alloc(s, arg_ct->regs, 4379 i_allocated_regs | o_allocated_regs, 4380 output_pref(op, k), ts->indirect_base); 4381 } else { 4382 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4383 output_pref(op, k), ts->indirect_base); 4384 } 4385 break; 4386 4387 case 1: /* first of pair */ 4388 tcg_debug_assert(!arg_ct->newreg); 4389 if (arg_ct->oalias) { 4390 reg = new_args[arg_ct->alias_index]; 4391 break; 4392 } 4393 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 4394 output_pref(op, k), ts->indirect_base); 4395 break; 4396 4397 case 2: /* second of pair */ 4398 tcg_debug_assert(!arg_ct->newreg); 4399 if (arg_ct->oalias) { 4400 reg = new_args[arg_ct->alias_index]; 4401 } else { 4402 reg = new_args[arg_ct->pair_index] + 1; 4403 } 4404 break; 4405 4406 case 3: /* first of pair, aliasing with a second input */ 4407 tcg_debug_assert(!arg_ct->newreg); 4408 reg = new_args[arg_ct->pair_index] - 1; 4409 break; 4410 4411 default: 4412 g_assert_not_reached(); 4413 } 4414 tcg_regset_set_reg(o_allocated_regs, reg); 4415 set_temp_val_reg(s, ts, reg); 4416 ts->mem_coherent = 0; 4417 new_args[i] = reg; 4418 } 4419 } 4420 4421 /* emit instruction */ 4422 if (def->flags & TCG_OPF_VECTOR) { 4423 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4424 new_args, const_args); 4425 } else { 4426 tcg_out_op(s, op->opc, new_args, const_args); 4427 } 4428 4429 /* move the outputs in the correct register if needed */ 4430 for(i = 0; i < nb_oargs; i++) { 4431 ts = arg_temp(op->args[i]); 4432 4433 /* ENV should not be modified. */ 4434 tcg_debug_assert(!temp_readonly(ts)); 4435 4436 if (NEED_SYNC_ARG(i)) { 4437 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4438 } else if (IS_DEAD_ARG(i)) { 4439 temp_dead(s, ts); 4440 } 4441 } 4442 } 4443 4444 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4445 { 4446 const TCGLifeData arg_life = op->life; 4447 TCGTemp *ots, *itsl, *itsh; 4448 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4449 4450 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4451 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4452 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4453 4454 ots = arg_temp(op->args[0]); 4455 itsl = arg_temp(op->args[1]); 4456 itsh = arg_temp(op->args[2]); 4457 4458 /* ENV should not be modified. */ 4459 tcg_debug_assert(!temp_readonly(ots)); 4460 4461 /* Allocate the output register now. */ 4462 if (ots->val_type != TEMP_VAL_REG) { 4463 TCGRegSet allocated_regs = s->reserved_regs; 4464 TCGRegSet dup_out_regs = 4465 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4466 TCGReg oreg; 4467 4468 /* Make sure to not spill the input registers. */ 4469 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4470 tcg_regset_set_reg(allocated_regs, itsl->reg); 4471 } 4472 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4473 tcg_regset_set_reg(allocated_regs, itsh->reg); 4474 } 4475 4476 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4477 output_pref(op, 0), ots->indirect_base); 4478 set_temp_val_reg(s, ots, oreg); 4479 } 4480 4481 /* Promote dup2 of immediates to dupi_vec. */ 4482 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 4483 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 4484 MemOp vece = MO_64; 4485 4486 if (val == dup_const(MO_8, val)) { 4487 vece = MO_8; 4488 } else if (val == dup_const(MO_16, val)) { 4489 vece = MO_16; 4490 } else if (val == dup_const(MO_32, val)) { 4491 vece = MO_32; 4492 } 4493 4494 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 4495 goto done; 4496 } 4497 4498 /* If the two inputs form one 64-bit value, try dupm_vec. */ 4499 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 4500 itsh->temp_subindex == !HOST_BIG_ENDIAN && 4501 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 4502 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 4503 4504 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 4505 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 4506 4507 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 4508 its->mem_base->reg, its->mem_offset)) { 4509 goto done; 4510 } 4511 } 4512 4513 /* Fall back to generic expansion. */ 4514 return false; 4515 4516 done: 4517 ots->mem_coherent = 0; 4518 if (IS_DEAD_ARG(1)) { 4519 temp_dead(s, itsl); 4520 } 4521 if (IS_DEAD_ARG(2)) { 4522 temp_dead(s, itsh); 4523 } 4524 if (NEED_SYNC_ARG(0)) { 4525 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 4526 } else if (IS_DEAD_ARG(0)) { 4527 temp_dead(s, ots); 4528 } 4529 return true; 4530 } 4531 4532 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 4533 TCGRegSet allocated_regs) 4534 { 4535 if (ts->val_type == TEMP_VAL_REG) { 4536 if (ts->reg != reg) { 4537 tcg_reg_free(s, reg, allocated_regs); 4538 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4539 /* 4540 * Cross register class move not supported. Sync the 4541 * temp back to its slot and load from there. 4542 */ 4543 temp_sync(s, ts, allocated_regs, 0, 0); 4544 tcg_out_ld(s, ts->type, reg, 4545 ts->mem_base->reg, ts->mem_offset); 4546 } 4547 } 4548 } else { 4549 TCGRegSet arg_set = 0; 4550 4551 tcg_reg_free(s, reg, allocated_regs); 4552 tcg_regset_set_reg(arg_set, reg); 4553 temp_load(s, ts, arg_set, allocated_regs, 0); 4554 } 4555 } 4556 4557 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts, 4558 TCGRegSet allocated_regs) 4559 { 4560 /* 4561 * When the destination is on the stack, load up the temp and store. 4562 * If there are many call-saved registers, the temp might live to 4563 * see another use; otherwise it'll be discarded. 4564 */ 4565 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 4566 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 4567 TCG_TARGET_CALL_STACK_OFFSET + 4568 stk_slot * sizeof(tcg_target_long)); 4569 } 4570 4571 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 4572 TCGTemp *ts, TCGRegSet *allocated_regs) 4573 { 4574 if (REG_P(l)) { 4575 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 4576 load_arg_reg(s, reg, ts, *allocated_regs); 4577 tcg_regset_set_reg(*allocated_regs, reg); 4578 } else { 4579 load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs), 4580 ts, *allocated_regs); 4581 } 4582 } 4583 4584 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base, 4585 intptr_t ref_off, TCGRegSet *allocated_regs) 4586 { 4587 TCGReg reg; 4588 int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 4589 4590 if (stk_slot < 0) { 4591 reg = tcg_target_call_iarg_regs[arg_slot]; 4592 tcg_reg_free(s, reg, *allocated_regs); 4593 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4594 tcg_regset_set_reg(*allocated_regs, reg); 4595 } else { 4596 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 4597 *allocated_regs, 0, false); 4598 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4599 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 4600 TCG_TARGET_CALL_STACK_OFFSET 4601 + stk_slot * sizeof(tcg_target_long)); 4602 } 4603 } 4604 4605 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 4606 { 4607 const int nb_oargs = TCGOP_CALLO(op); 4608 const int nb_iargs = TCGOP_CALLI(op); 4609 const TCGLifeData arg_life = op->life; 4610 const TCGHelperInfo *info = tcg_call_info(op); 4611 TCGRegSet allocated_regs = s->reserved_regs; 4612 int i; 4613 4614 /* 4615 * Move inputs into place in reverse order, 4616 * so that we place stacked arguments first. 4617 */ 4618 for (i = nb_iargs - 1; i >= 0; --i) { 4619 const TCGCallArgumentLoc *loc = &info->in[i]; 4620 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 4621 4622 switch (loc->kind) { 4623 case TCG_CALL_ARG_NORMAL: 4624 case TCG_CALL_ARG_EXTEND_U: 4625 case TCG_CALL_ARG_EXTEND_S: 4626 load_arg_normal(s, loc, ts, &allocated_regs); 4627 break; 4628 case TCG_CALL_ARG_BY_REF: 4629 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4630 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 4631 TCG_TARGET_CALL_STACK_OFFSET 4632 + loc->ref_slot * sizeof(tcg_target_long), 4633 &allocated_regs); 4634 break; 4635 case TCG_CALL_ARG_BY_REF_N: 4636 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4637 break; 4638 default: 4639 g_assert_not_reached(); 4640 } 4641 } 4642 4643 /* Mark dead temporaries and free the associated registers. */ 4644 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4645 if (IS_DEAD_ARG(i)) { 4646 temp_dead(s, arg_temp(op->args[i])); 4647 } 4648 } 4649 4650 /* Clobber call registers. */ 4651 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4652 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4653 tcg_reg_free(s, i, allocated_regs); 4654 } 4655 } 4656 4657 /* 4658 * Save globals if they might be written by the helper, 4659 * sync them if they might be read. 4660 */ 4661 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 4662 /* Nothing to do */ 4663 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 4664 sync_globals(s, allocated_regs); 4665 } else { 4666 save_globals(s, allocated_regs); 4667 } 4668 4669 /* 4670 * If the ABI passes a pointer to the returned struct as the first 4671 * argument, load that now. Pass a pointer to the output home slot. 4672 */ 4673 if (info->out_kind == TCG_CALL_RET_BY_REF) { 4674 TCGTemp *ts = arg_temp(op->args[0]); 4675 4676 if (!ts->mem_allocated) { 4677 temp_allocate_frame(s, ts); 4678 } 4679 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 4680 } 4681 4682 tcg_out_call(s, tcg_call_func(op), info); 4683 4684 /* Assign output registers and emit moves if needed. */ 4685 switch (info->out_kind) { 4686 case TCG_CALL_RET_NORMAL: 4687 for (i = 0; i < nb_oargs; i++) { 4688 TCGTemp *ts = arg_temp(op->args[i]); 4689 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 4690 4691 /* ENV should not be modified. */ 4692 tcg_debug_assert(!temp_readonly(ts)); 4693 4694 set_temp_val_reg(s, ts, reg); 4695 ts->mem_coherent = 0; 4696 } 4697 break; 4698 4699 case TCG_CALL_RET_BY_VEC: 4700 { 4701 TCGTemp *ts = arg_temp(op->args[0]); 4702 4703 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 4704 tcg_debug_assert(ts->temp_subindex == 0); 4705 if (!ts->mem_allocated) { 4706 temp_allocate_frame(s, ts); 4707 } 4708 tcg_out_st(s, TCG_TYPE_V128, 4709 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 4710 ts->mem_base->reg, ts->mem_offset); 4711 } 4712 /* fall through to mark all parts in memory */ 4713 4714 case TCG_CALL_RET_BY_REF: 4715 /* The callee has performed a write through the reference. */ 4716 for (i = 0; i < nb_oargs; i++) { 4717 TCGTemp *ts = arg_temp(op->args[i]); 4718 ts->val_type = TEMP_VAL_MEM; 4719 } 4720 break; 4721 4722 default: 4723 g_assert_not_reached(); 4724 } 4725 4726 /* Flush or discard output registers as needed. */ 4727 for (i = 0; i < nb_oargs; i++) { 4728 TCGTemp *ts = arg_temp(op->args[i]); 4729 if (NEED_SYNC_ARG(i)) { 4730 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 4731 } else if (IS_DEAD_ARG(i)) { 4732 temp_dead(s, ts); 4733 } 4734 } 4735 } 4736 4737 #ifdef CONFIG_PROFILER 4738 4739 /* avoid copy/paste errors */ 4740 #define PROF_ADD(to, from, field) \ 4741 do { \ 4742 (to)->field += qatomic_read(&((from)->field)); \ 4743 } while (0) 4744 4745 #define PROF_MAX(to, from, field) \ 4746 do { \ 4747 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4748 if (val__ > (to)->field) { \ 4749 (to)->field = val__; \ 4750 } \ 4751 } while (0) 4752 4753 /* Pass in a zero'ed @prof */ 4754 static inline 4755 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4756 { 4757 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4758 unsigned int i; 4759 4760 for (i = 0; i < n_ctxs; i++) { 4761 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4762 const TCGProfile *orig = &s->prof; 4763 4764 if (counters) { 4765 PROF_ADD(prof, orig, cpu_exec_time); 4766 PROF_ADD(prof, orig, tb_count1); 4767 PROF_ADD(prof, orig, tb_count); 4768 PROF_ADD(prof, orig, op_count); 4769 PROF_MAX(prof, orig, op_count_max); 4770 PROF_ADD(prof, orig, temp_count); 4771 PROF_MAX(prof, orig, temp_count_max); 4772 PROF_ADD(prof, orig, del_op_count); 4773 PROF_ADD(prof, orig, code_in_len); 4774 PROF_ADD(prof, orig, code_out_len); 4775 PROF_ADD(prof, orig, search_out_len); 4776 PROF_ADD(prof, orig, interm_time); 4777 PROF_ADD(prof, orig, code_time); 4778 PROF_ADD(prof, orig, la_time); 4779 PROF_ADD(prof, orig, opt_time); 4780 PROF_ADD(prof, orig, restore_count); 4781 PROF_ADD(prof, orig, restore_time); 4782 } 4783 if (table) { 4784 int i; 4785 4786 for (i = 0; i < NB_OPS; i++) { 4787 PROF_ADD(prof, orig, table_op_count[i]); 4788 } 4789 } 4790 } 4791 } 4792 4793 #undef PROF_ADD 4794 #undef PROF_MAX 4795 4796 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4797 { 4798 tcg_profile_snapshot(prof, true, false); 4799 } 4800 4801 static void tcg_profile_snapshot_table(TCGProfile *prof) 4802 { 4803 tcg_profile_snapshot(prof, false, true); 4804 } 4805 4806 void tcg_dump_op_count(GString *buf) 4807 { 4808 TCGProfile prof = {}; 4809 int i; 4810 4811 tcg_profile_snapshot_table(&prof); 4812 for (i = 0; i < NB_OPS; i++) { 4813 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 4814 prof.table_op_count[i]); 4815 } 4816 } 4817 4818 int64_t tcg_cpu_exec_time(void) 4819 { 4820 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4821 unsigned int i; 4822 int64_t ret = 0; 4823 4824 for (i = 0; i < n_ctxs; i++) { 4825 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4826 const TCGProfile *prof = &s->prof; 4827 4828 ret += qatomic_read(&prof->cpu_exec_time); 4829 } 4830 return ret; 4831 } 4832 #else 4833 void tcg_dump_op_count(GString *buf) 4834 { 4835 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4836 } 4837 4838 int64_t tcg_cpu_exec_time(void) 4839 { 4840 error_report("%s: TCG profiler not compiled", __func__); 4841 exit(EXIT_FAILURE); 4842 } 4843 #endif 4844 4845 4846 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) 4847 { 4848 #ifdef CONFIG_PROFILER 4849 TCGProfile *prof = &s->prof; 4850 #endif 4851 int i, num_insns; 4852 TCGOp *op; 4853 4854 #ifdef CONFIG_PROFILER 4855 { 4856 int n = 0; 4857 4858 QTAILQ_FOREACH(op, &s->ops, link) { 4859 n++; 4860 } 4861 qatomic_set(&prof->op_count, prof->op_count + n); 4862 if (n > prof->op_count_max) { 4863 qatomic_set(&prof->op_count_max, n); 4864 } 4865 4866 n = s->nb_temps; 4867 qatomic_set(&prof->temp_count, prof->temp_count + n); 4868 if (n > prof->temp_count_max) { 4869 qatomic_set(&prof->temp_count_max, n); 4870 } 4871 } 4872 #endif 4873 4874 #ifdef DEBUG_DISAS 4875 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4876 && qemu_log_in_addr_range(pc_start))) { 4877 FILE *logfile = qemu_log_trylock(); 4878 if (logfile) { 4879 fprintf(logfile, "OP:\n"); 4880 tcg_dump_ops(s, logfile, false); 4881 fprintf(logfile, "\n"); 4882 qemu_log_unlock(logfile); 4883 } 4884 } 4885 #endif 4886 4887 #ifdef CONFIG_DEBUG_TCG 4888 /* Ensure all labels referenced have been emitted. */ 4889 { 4890 TCGLabel *l; 4891 bool error = false; 4892 4893 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4894 if (unlikely(!l->present) && l->refs) { 4895 qemu_log_mask(CPU_LOG_TB_OP, 4896 "$L%d referenced but not present.\n", l->id); 4897 error = true; 4898 } 4899 } 4900 assert(!error); 4901 } 4902 #endif 4903 4904 #ifdef CONFIG_PROFILER 4905 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4906 #endif 4907 4908 #ifdef USE_TCG_OPTIMIZATIONS 4909 tcg_optimize(s); 4910 #endif 4911 4912 #ifdef CONFIG_PROFILER 4913 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4914 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4915 #endif 4916 4917 reachable_code_pass(s); 4918 liveness_pass_0(s); 4919 liveness_pass_1(s); 4920 4921 if (s->nb_indirects > 0) { 4922 #ifdef DEBUG_DISAS 4923 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4924 && qemu_log_in_addr_range(pc_start))) { 4925 FILE *logfile = qemu_log_trylock(); 4926 if (logfile) { 4927 fprintf(logfile, "OP before indirect lowering:\n"); 4928 tcg_dump_ops(s, logfile, false); 4929 fprintf(logfile, "\n"); 4930 qemu_log_unlock(logfile); 4931 } 4932 } 4933 #endif 4934 /* Replace indirect temps with direct temps. */ 4935 if (liveness_pass_2(s)) { 4936 /* If changes were made, re-run liveness. */ 4937 liveness_pass_1(s); 4938 } 4939 } 4940 4941 #ifdef CONFIG_PROFILER 4942 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4943 #endif 4944 4945 #ifdef DEBUG_DISAS 4946 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4947 && qemu_log_in_addr_range(pc_start))) { 4948 FILE *logfile = qemu_log_trylock(); 4949 if (logfile) { 4950 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 4951 tcg_dump_ops(s, logfile, true); 4952 fprintf(logfile, "\n"); 4953 qemu_log_unlock(logfile); 4954 } 4955 } 4956 #endif 4957 4958 /* Initialize goto_tb jump offsets. */ 4959 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 4960 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 4961 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 4962 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 4963 4964 tcg_reg_alloc_start(s); 4965 4966 /* 4967 * Reset the buffer pointers when restarting after overflow. 4968 * TODO: Move this into translate-all.c with the rest of the 4969 * buffer management. Having only this done here is confusing. 4970 */ 4971 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 4972 s->code_ptr = s->code_buf; 4973 4974 #ifdef TCG_TARGET_NEED_LDST_LABELS 4975 QSIMPLEQ_INIT(&s->ldst_labels); 4976 #endif 4977 #ifdef TCG_TARGET_NEED_POOL_LABELS 4978 s->pool_labels = NULL; 4979 #endif 4980 4981 num_insns = -1; 4982 QTAILQ_FOREACH(op, &s->ops, link) { 4983 TCGOpcode opc = op->opc; 4984 4985 #ifdef CONFIG_PROFILER 4986 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 4987 #endif 4988 4989 switch (opc) { 4990 case INDEX_op_mov_i32: 4991 case INDEX_op_mov_i64: 4992 case INDEX_op_mov_vec: 4993 tcg_reg_alloc_mov(s, op); 4994 break; 4995 case INDEX_op_dup_vec: 4996 tcg_reg_alloc_dup(s, op); 4997 break; 4998 case INDEX_op_insn_start: 4999 if (num_insns >= 0) { 5000 size_t off = tcg_current_code_size(s); 5001 s->gen_insn_end_off[num_insns] = off; 5002 /* Assert that we do not overflow our stored offset. */ 5003 assert(s->gen_insn_end_off[num_insns] == off); 5004 } 5005 num_insns++; 5006 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 5007 target_ulong a; 5008 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 5009 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 5010 #else 5011 a = op->args[i]; 5012 #endif 5013 s->gen_insn_data[num_insns][i] = a; 5014 } 5015 break; 5016 case INDEX_op_discard: 5017 temp_dead(s, arg_temp(op->args[0])); 5018 break; 5019 case INDEX_op_set_label: 5020 tcg_reg_alloc_bb_end(s, s->reserved_regs); 5021 tcg_out_label(s, arg_label(op->args[0])); 5022 break; 5023 case INDEX_op_call: 5024 tcg_reg_alloc_call(s, op); 5025 break; 5026 case INDEX_op_exit_tb: 5027 tcg_out_exit_tb(s, op->args[0]); 5028 break; 5029 case INDEX_op_goto_tb: 5030 tcg_out_goto_tb(s, op->args[0]); 5031 break; 5032 case INDEX_op_dup2_vec: 5033 if (tcg_reg_alloc_dup2(s, op)) { 5034 break; 5035 } 5036 /* fall through */ 5037 default: 5038 /* Sanity check that we've not introduced any unhandled opcodes. */ 5039 tcg_debug_assert(tcg_op_supported(opc)); 5040 /* Note: in order to speed up the code, it would be much 5041 faster to have specialized register allocator functions for 5042 some common argument patterns */ 5043 tcg_reg_alloc_op(s, op); 5044 break; 5045 } 5046 /* Test for (pending) buffer overflow. The assumption is that any 5047 one operation beginning below the high water mark cannot overrun 5048 the buffer completely. Thus we can test for overflow after 5049 generating code without having to check during generation. */ 5050 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 5051 return -1; 5052 } 5053 /* Test for TB overflow, as seen by gen_insn_end_off. */ 5054 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 5055 return -2; 5056 } 5057 } 5058 tcg_debug_assert(num_insns >= 0); 5059 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 5060 5061 /* Generate TB finalization at the end of block */ 5062 #ifdef TCG_TARGET_NEED_LDST_LABELS 5063 i = tcg_out_ldst_finalize(s); 5064 if (i < 0) { 5065 return i; 5066 } 5067 #endif 5068 #ifdef TCG_TARGET_NEED_POOL_LABELS 5069 i = tcg_out_pool_finalize(s); 5070 if (i < 0) { 5071 return i; 5072 } 5073 #endif 5074 if (!tcg_resolve_relocs(s)) { 5075 return -2; 5076 } 5077 5078 #ifndef CONFIG_TCG_INTERPRETER 5079 /* flush instruction cache */ 5080 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 5081 (uintptr_t)s->code_buf, 5082 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 5083 #endif 5084 5085 return tcg_current_code_size(s); 5086 } 5087 5088 #ifdef CONFIG_PROFILER 5089 void tcg_dump_info(GString *buf) 5090 { 5091 TCGProfile prof = {}; 5092 const TCGProfile *s; 5093 int64_t tb_count; 5094 int64_t tb_div_count; 5095 int64_t tot; 5096 5097 tcg_profile_snapshot_counters(&prof); 5098 s = &prof; 5099 tb_count = s->tb_count; 5100 tb_div_count = tb_count ? tb_count : 1; 5101 tot = s->interm_time + s->code_time; 5102 5103 g_string_append_printf(buf, "JIT cycles %" PRId64 5104 " (%0.3f s at 2.4 GHz)\n", 5105 tot, tot / 2.4e9); 5106 g_string_append_printf(buf, "translated TBs %" PRId64 5107 " (aborted=%" PRId64 " %0.1f%%)\n", 5108 tb_count, s->tb_count1 - tb_count, 5109 (double)(s->tb_count1 - s->tb_count) 5110 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 5111 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 5112 (double)s->op_count / tb_div_count, s->op_count_max); 5113 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 5114 (double)s->del_op_count / tb_div_count); 5115 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 5116 (double)s->temp_count / tb_div_count, 5117 s->temp_count_max); 5118 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 5119 (double)s->code_out_len / tb_div_count); 5120 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 5121 (double)s->search_out_len / tb_div_count); 5122 5123 g_string_append_printf(buf, "cycles/op %0.1f\n", 5124 s->op_count ? (double)tot / s->op_count : 0); 5125 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 5126 s->code_in_len ? (double)tot / s->code_in_len : 0); 5127 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 5128 s->code_out_len ? (double)tot / s->code_out_len : 0); 5129 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 5130 s->search_out_len ? 5131 (double)tot / s->search_out_len : 0); 5132 if (tot == 0) { 5133 tot = 1; 5134 } 5135 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 5136 (double)s->interm_time / tot * 100.0); 5137 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 5138 (double)s->code_time / tot * 100.0); 5139 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 5140 (double)s->opt_time / (s->code_time ? 5141 s->code_time : 1) 5142 * 100.0); 5143 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 5144 (double)s->la_time / (s->code_time ? 5145 s->code_time : 1) * 100.0); 5146 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 5147 s->restore_count); 5148 g_string_append_printf(buf, " avg cycles %0.1f\n", 5149 s->restore_count ? 5150 (double)s->restore_time / s->restore_count : 0); 5151 } 5152 #else 5153 void tcg_dump_info(GString *buf) 5154 { 5155 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 5156 } 5157 #endif 5158 5159 #ifdef ELF_HOST_MACHINE 5160 /* In order to use this feature, the backend needs to do three things: 5161 5162 (1) Define ELF_HOST_MACHINE to indicate both what value to 5163 put into the ELF image and to indicate support for the feature. 5164 5165 (2) Define tcg_register_jit. This should create a buffer containing 5166 the contents of a .debug_frame section that describes the post- 5167 prologue unwind info for the tcg machine. 5168 5169 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 5170 */ 5171 5172 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 5173 typedef enum { 5174 JIT_NOACTION = 0, 5175 JIT_REGISTER_FN, 5176 JIT_UNREGISTER_FN 5177 } jit_actions_t; 5178 5179 struct jit_code_entry { 5180 struct jit_code_entry *next_entry; 5181 struct jit_code_entry *prev_entry; 5182 const void *symfile_addr; 5183 uint64_t symfile_size; 5184 }; 5185 5186 struct jit_descriptor { 5187 uint32_t version; 5188 uint32_t action_flag; 5189 struct jit_code_entry *relevant_entry; 5190 struct jit_code_entry *first_entry; 5191 }; 5192 5193 void __jit_debug_register_code(void) __attribute__((noinline)); 5194 void __jit_debug_register_code(void) 5195 { 5196 asm(""); 5197 } 5198 5199 /* Must statically initialize the version, because GDB may check 5200 the version before we can set it. */ 5201 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 5202 5203 /* End GDB interface. */ 5204 5205 static int find_string(const char *strtab, const char *str) 5206 { 5207 const char *p = strtab + 1; 5208 5209 while (1) { 5210 if (strcmp(p, str) == 0) { 5211 return p - strtab; 5212 } 5213 p += strlen(p) + 1; 5214 } 5215 } 5216 5217 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 5218 const void *debug_frame, 5219 size_t debug_frame_size) 5220 { 5221 struct __attribute__((packed)) DebugInfo { 5222 uint32_t len; 5223 uint16_t version; 5224 uint32_t abbrev; 5225 uint8_t ptr_size; 5226 uint8_t cu_die; 5227 uint16_t cu_lang; 5228 uintptr_t cu_low_pc; 5229 uintptr_t cu_high_pc; 5230 uint8_t fn_die; 5231 char fn_name[16]; 5232 uintptr_t fn_low_pc; 5233 uintptr_t fn_high_pc; 5234 uint8_t cu_eoc; 5235 }; 5236 5237 struct ElfImage { 5238 ElfW(Ehdr) ehdr; 5239 ElfW(Phdr) phdr; 5240 ElfW(Shdr) shdr[7]; 5241 ElfW(Sym) sym[2]; 5242 struct DebugInfo di; 5243 uint8_t da[24]; 5244 char str[80]; 5245 }; 5246 5247 struct ElfImage *img; 5248 5249 static const struct ElfImage img_template = { 5250 .ehdr = { 5251 .e_ident[EI_MAG0] = ELFMAG0, 5252 .e_ident[EI_MAG1] = ELFMAG1, 5253 .e_ident[EI_MAG2] = ELFMAG2, 5254 .e_ident[EI_MAG3] = ELFMAG3, 5255 .e_ident[EI_CLASS] = ELF_CLASS, 5256 .e_ident[EI_DATA] = ELF_DATA, 5257 .e_ident[EI_VERSION] = EV_CURRENT, 5258 .e_type = ET_EXEC, 5259 .e_machine = ELF_HOST_MACHINE, 5260 .e_version = EV_CURRENT, 5261 .e_phoff = offsetof(struct ElfImage, phdr), 5262 .e_shoff = offsetof(struct ElfImage, shdr), 5263 .e_ehsize = sizeof(ElfW(Shdr)), 5264 .e_phentsize = sizeof(ElfW(Phdr)), 5265 .e_phnum = 1, 5266 .e_shentsize = sizeof(ElfW(Shdr)), 5267 .e_shnum = ARRAY_SIZE(img->shdr), 5268 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 5269 #ifdef ELF_HOST_FLAGS 5270 .e_flags = ELF_HOST_FLAGS, 5271 #endif 5272 #ifdef ELF_OSABI 5273 .e_ident[EI_OSABI] = ELF_OSABI, 5274 #endif 5275 }, 5276 .phdr = { 5277 .p_type = PT_LOAD, 5278 .p_flags = PF_X, 5279 }, 5280 .shdr = { 5281 [0] = { .sh_type = SHT_NULL }, 5282 /* Trick: The contents of code_gen_buffer are not present in 5283 this fake ELF file; that got allocated elsewhere. Therefore 5284 we mark .text as SHT_NOBITS (similar to .bss) so that readers 5285 will not look for contents. We can record any address. */ 5286 [1] = { /* .text */ 5287 .sh_type = SHT_NOBITS, 5288 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 5289 }, 5290 [2] = { /* .debug_info */ 5291 .sh_type = SHT_PROGBITS, 5292 .sh_offset = offsetof(struct ElfImage, di), 5293 .sh_size = sizeof(struct DebugInfo), 5294 }, 5295 [3] = { /* .debug_abbrev */ 5296 .sh_type = SHT_PROGBITS, 5297 .sh_offset = offsetof(struct ElfImage, da), 5298 .sh_size = sizeof(img->da), 5299 }, 5300 [4] = { /* .debug_frame */ 5301 .sh_type = SHT_PROGBITS, 5302 .sh_offset = sizeof(struct ElfImage), 5303 }, 5304 [5] = { /* .symtab */ 5305 .sh_type = SHT_SYMTAB, 5306 .sh_offset = offsetof(struct ElfImage, sym), 5307 .sh_size = sizeof(img->sym), 5308 .sh_info = 1, 5309 .sh_link = ARRAY_SIZE(img->shdr) - 1, 5310 .sh_entsize = sizeof(ElfW(Sym)), 5311 }, 5312 [6] = { /* .strtab */ 5313 .sh_type = SHT_STRTAB, 5314 .sh_offset = offsetof(struct ElfImage, str), 5315 .sh_size = sizeof(img->str), 5316 } 5317 }, 5318 .sym = { 5319 [1] = { /* code_gen_buffer */ 5320 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 5321 .st_shndx = 1, 5322 } 5323 }, 5324 .di = { 5325 .len = sizeof(struct DebugInfo) - 4, 5326 .version = 2, 5327 .ptr_size = sizeof(void *), 5328 .cu_die = 1, 5329 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 5330 .fn_die = 2, 5331 .fn_name = "code_gen_buffer" 5332 }, 5333 .da = { 5334 1, /* abbrev number (the cu) */ 5335 0x11, 1, /* DW_TAG_compile_unit, has children */ 5336 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 5337 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5338 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5339 0, 0, /* end of abbrev */ 5340 2, /* abbrev number (the fn) */ 5341 0x2e, 0, /* DW_TAG_subprogram, no children */ 5342 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 5343 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5344 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5345 0, 0, /* end of abbrev */ 5346 0 /* no more abbrev */ 5347 }, 5348 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 5349 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 5350 }; 5351 5352 /* We only need a single jit entry; statically allocate it. */ 5353 static struct jit_code_entry one_entry; 5354 5355 uintptr_t buf = (uintptr_t)buf_ptr; 5356 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 5357 DebugFrameHeader *dfh; 5358 5359 img = g_malloc(img_size); 5360 *img = img_template; 5361 5362 img->phdr.p_vaddr = buf; 5363 img->phdr.p_paddr = buf; 5364 img->phdr.p_memsz = buf_size; 5365 5366 img->shdr[1].sh_name = find_string(img->str, ".text"); 5367 img->shdr[1].sh_addr = buf; 5368 img->shdr[1].sh_size = buf_size; 5369 5370 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 5371 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 5372 5373 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 5374 img->shdr[4].sh_size = debug_frame_size; 5375 5376 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 5377 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 5378 5379 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 5380 img->sym[1].st_value = buf; 5381 img->sym[1].st_size = buf_size; 5382 5383 img->di.cu_low_pc = buf; 5384 img->di.cu_high_pc = buf + buf_size; 5385 img->di.fn_low_pc = buf; 5386 img->di.fn_high_pc = buf + buf_size; 5387 5388 dfh = (DebugFrameHeader *)(img + 1); 5389 memcpy(dfh, debug_frame, debug_frame_size); 5390 dfh->fde.func_start = buf; 5391 dfh->fde.func_len = buf_size; 5392 5393 #ifdef DEBUG_JIT 5394 /* Enable this block to be able to debug the ELF image file creation. 5395 One can use readelf, objdump, or other inspection utilities. */ 5396 { 5397 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 5398 FILE *f = fopen(jit, "w+b"); 5399 if (f) { 5400 if (fwrite(img, img_size, 1, f) != img_size) { 5401 /* Avoid stupid unused return value warning for fwrite. */ 5402 } 5403 fclose(f); 5404 } 5405 } 5406 #endif 5407 5408 one_entry.symfile_addr = img; 5409 one_entry.symfile_size = img_size; 5410 5411 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 5412 __jit_debug_descriptor.relevant_entry = &one_entry; 5413 __jit_debug_descriptor.first_entry = &one_entry; 5414 __jit_debug_register_code(); 5415 } 5416 #else 5417 /* No support for the feature. Provide the entry point expected by exec.c, 5418 and implement the internal function we declared earlier. */ 5419 5420 static void tcg_register_jit_int(const void *buf, size_t size, 5421 const void *debug_frame, 5422 size_t debug_frame_size) 5423 { 5424 } 5425 5426 void tcg_register_jit(const void *buf, size_t buf_size) 5427 { 5428 } 5429 #endif /* ELF_HOST_MACHINE */ 5430 5431 #if !TCG_TARGET_MAYBE_vec 5432 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 5433 { 5434 g_assert_not_reached(); 5435 } 5436 #endif 5437