1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/cacheflush.h" 38 #include "qemu/cacheinfo.h" 39 #include "qemu/timer.h" 40 41 /* Note: the long term plan is to reduce the dependencies on the QEMU 42 CPU definitions. Currently they are used for qemu_ld/st 43 instructions */ 44 #define NO_CPU_IO_DEFS 45 46 #include "exec/exec-all.h" 47 #include "tcg/tcg-op.h" 48 49 #if UINTPTR_MAX == UINT32_MAX 50 # define ELF_CLASS ELFCLASS32 51 #else 52 # define ELF_CLASS ELFCLASS64 53 #endif 54 #if HOST_BIG_ENDIAN 55 # define ELF_DATA ELFDATA2MSB 56 #else 57 # define ELF_DATA ELFDATA2LSB 58 #endif 59 60 #include "elf.h" 61 #include "exec/log.h" 62 #include "tcg/tcg-ldst.h" 63 #include "tcg-internal.h" 64 #include "accel/tcg/perf.h" 65 66 /* Forward declarations for functions declared in tcg-target.c.inc and 67 used here. */ 68 static void tcg_target_init(TCGContext *s); 69 static void tcg_target_qemu_prologue(TCGContext *s); 70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 71 intptr_t value, intptr_t addend); 72 73 /* The CIE and FDE header definitions will be common to all hosts. */ 74 typedef struct { 75 uint32_t len __attribute__((aligned((sizeof(void *))))); 76 uint32_t id; 77 uint8_t version; 78 char augmentation[1]; 79 uint8_t code_align; 80 uint8_t data_align; 81 uint8_t return_column; 82 } DebugFrameCIE; 83 84 typedef struct QEMU_PACKED { 85 uint32_t len __attribute__((aligned((sizeof(void *))))); 86 uint32_t cie_offset; 87 uintptr_t func_start; 88 uintptr_t func_len; 89 } DebugFrameFDEHeader; 90 91 typedef struct QEMU_PACKED { 92 DebugFrameCIE cie; 93 DebugFrameFDEHeader fde; 94 } DebugFrameHeader; 95 96 static void tcg_register_jit_int(const void *buf, size_t size, 97 const void *debug_frame, 98 size_t debug_frame_size) 99 __attribute__((unused)); 100 101 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 103 intptr_t arg2); 104 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 105 static void tcg_out_movi(TCGContext *s, TCGType type, 106 TCGReg ret, tcg_target_long arg); 107 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); 108 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); 109 static void tcg_out_goto_tb(TCGContext *s, int which); 110 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 111 const TCGArg args[TCG_MAX_OP_ARGS], 112 const int const_args[TCG_MAX_OP_ARGS]); 113 #if TCG_TARGET_MAYBE_vec 114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 115 TCGReg dst, TCGReg src); 116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 117 TCGReg dst, TCGReg base, intptr_t offset); 118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 119 TCGReg dst, int64_t arg); 120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 121 unsigned vecl, unsigned vece, 122 const TCGArg args[TCG_MAX_OP_ARGS], 123 const int const_args[TCG_MAX_OP_ARGS]); 124 #else 125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 126 TCGReg dst, TCGReg src) 127 { 128 g_assert_not_reached(); 129 } 130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 131 TCGReg dst, TCGReg base, intptr_t offset) 132 { 133 g_assert_not_reached(); 134 } 135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 136 TCGReg dst, int64_t arg) 137 { 138 g_assert_not_reached(); 139 } 140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 141 unsigned vecl, unsigned vece, 142 const TCGArg args[TCG_MAX_OP_ARGS], 143 const int const_args[TCG_MAX_OP_ARGS]) 144 { 145 g_assert_not_reached(); 146 } 147 #endif 148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 149 intptr_t arg2); 150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 151 TCGReg base, intptr_t ofs); 152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 153 const TCGHelperInfo *info); 154 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); 155 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 156 #ifdef TCG_TARGET_NEED_LDST_LABELS 157 static int tcg_out_ldst_finalize(TCGContext *s); 158 #endif 159 160 TCGContext tcg_init_ctx; 161 __thread TCGContext *tcg_ctx; 162 163 TCGContext **tcg_ctxs; 164 unsigned int tcg_cur_ctxs; 165 unsigned int tcg_max_ctxs; 166 TCGv_env cpu_env = 0; 167 const void *tcg_code_gen_epilogue; 168 uintptr_t tcg_splitwx_diff; 169 170 #ifndef CONFIG_TCG_INTERPRETER 171 tcg_prologue_fn *tcg_qemu_tb_exec; 172 #endif 173 174 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 175 static TCGRegSet tcg_target_call_clobber_regs; 176 177 #if TCG_TARGET_INSN_UNIT_SIZE == 1 178 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 179 { 180 *s->code_ptr++ = v; 181 } 182 183 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 184 uint8_t v) 185 { 186 *p = v; 187 } 188 #endif 189 190 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 191 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 192 { 193 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 194 *s->code_ptr++ = v; 195 } else { 196 tcg_insn_unit *p = s->code_ptr; 197 memcpy(p, &v, sizeof(v)); 198 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 199 } 200 } 201 202 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 203 uint16_t v) 204 { 205 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 206 *p = v; 207 } else { 208 memcpy(p, &v, sizeof(v)); 209 } 210 } 211 #endif 212 213 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 214 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 215 { 216 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 217 *s->code_ptr++ = v; 218 } else { 219 tcg_insn_unit *p = s->code_ptr; 220 memcpy(p, &v, sizeof(v)); 221 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 222 } 223 } 224 225 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 226 uint32_t v) 227 { 228 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 229 *p = v; 230 } else { 231 memcpy(p, &v, sizeof(v)); 232 } 233 } 234 #endif 235 236 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 237 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 238 { 239 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 240 *s->code_ptr++ = v; 241 } else { 242 tcg_insn_unit *p = s->code_ptr; 243 memcpy(p, &v, sizeof(v)); 244 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 245 } 246 } 247 248 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 249 uint64_t v) 250 { 251 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 252 *p = v; 253 } else { 254 memcpy(p, &v, sizeof(v)); 255 } 256 } 257 #endif 258 259 /* label relocation processing */ 260 261 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 262 TCGLabel *l, intptr_t addend) 263 { 264 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 265 266 r->type = type; 267 r->ptr = code_ptr; 268 r->addend = addend; 269 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 270 } 271 272 static void tcg_out_label(TCGContext *s, TCGLabel *l) 273 { 274 tcg_debug_assert(!l->has_value); 275 l->has_value = 1; 276 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 277 } 278 279 TCGLabel *gen_new_label(void) 280 { 281 TCGContext *s = tcg_ctx; 282 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 283 284 memset(l, 0, sizeof(TCGLabel)); 285 l->id = s->nb_labels++; 286 QSIMPLEQ_INIT(&l->branches); 287 QSIMPLEQ_INIT(&l->relocs); 288 289 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 290 291 return l; 292 } 293 294 static bool tcg_resolve_relocs(TCGContext *s) 295 { 296 TCGLabel *l; 297 298 QSIMPLEQ_FOREACH(l, &s->labels, next) { 299 TCGRelocation *r; 300 uintptr_t value = l->u.value; 301 302 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 303 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 304 return false; 305 } 306 } 307 } 308 return true; 309 } 310 311 static void set_jmp_reset_offset(TCGContext *s, int which) 312 { 313 /* 314 * We will check for overflow at the end of the opcode loop in 315 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 316 */ 317 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); 318 } 319 320 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) 321 { 322 /* 323 * We will check for overflow at the end of the opcode loop in 324 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 325 */ 326 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); 327 } 328 329 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) 330 { 331 /* 332 * Return the read-execute version of the pointer, for the benefit 333 * of any pc-relative addressing mode. 334 */ 335 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); 336 } 337 338 /* Signal overflow, starting over with fewer guest insns. */ 339 static G_NORETURN 340 void tcg_raise_tb_overflow(TCGContext *s) 341 { 342 siglongjmp(s->jmp_trans, -2); 343 } 344 345 #define C_PFX1(P, A) P##A 346 #define C_PFX2(P, A, B) P##A##_##B 347 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 348 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 349 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 350 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 351 352 /* Define an enumeration for the various combinations. */ 353 354 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 355 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 356 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 357 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 358 359 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 360 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 361 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 362 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 363 364 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 365 366 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 367 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 368 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 369 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 370 371 typedef enum { 372 #include "tcg-target-con-set.h" 373 } TCGConstraintSetIndex; 374 375 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 376 377 #undef C_O0_I1 378 #undef C_O0_I2 379 #undef C_O0_I3 380 #undef C_O0_I4 381 #undef C_O1_I1 382 #undef C_O1_I2 383 #undef C_O1_I3 384 #undef C_O1_I4 385 #undef C_N1_I2 386 #undef C_O2_I1 387 #undef C_O2_I2 388 #undef C_O2_I3 389 #undef C_O2_I4 390 391 /* Put all of the constraint sets into an array, indexed by the enum. */ 392 393 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 394 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 395 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 396 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 397 398 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 399 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 400 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 401 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 402 403 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 404 405 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 406 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 407 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 408 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 409 410 static const TCGTargetOpDef constraint_sets[] = { 411 #include "tcg-target-con-set.h" 412 }; 413 414 415 #undef C_O0_I1 416 #undef C_O0_I2 417 #undef C_O0_I3 418 #undef C_O0_I4 419 #undef C_O1_I1 420 #undef C_O1_I2 421 #undef C_O1_I3 422 #undef C_O1_I4 423 #undef C_N1_I2 424 #undef C_O2_I1 425 #undef C_O2_I2 426 #undef C_O2_I3 427 #undef C_O2_I4 428 429 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 430 431 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 432 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 433 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 434 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 435 436 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 437 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 438 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 439 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 440 441 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 442 443 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 444 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 445 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 446 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 447 448 #include "tcg-target.c.inc" 449 450 static void alloc_tcg_plugin_context(TCGContext *s) 451 { 452 #ifdef CONFIG_PLUGIN 453 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 454 s->plugin_tb->insns = 455 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 456 #endif 457 } 458 459 /* 460 * All TCG threads except the parent (i.e. the one that called tcg_context_init 461 * and registered the target's TCG globals) must register with this function 462 * before initiating translation. 463 * 464 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 465 * of tcg_region_init() for the reasoning behind this. 466 * 467 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 468 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 469 * is not used anymore for translation once this function is called. 470 * 471 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 472 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 473 */ 474 #ifdef CONFIG_USER_ONLY 475 void tcg_register_thread(void) 476 { 477 tcg_ctx = &tcg_init_ctx; 478 } 479 #else 480 void tcg_register_thread(void) 481 { 482 TCGContext *s = g_malloc(sizeof(*s)); 483 unsigned int i, n; 484 485 *s = tcg_init_ctx; 486 487 /* Relink mem_base. */ 488 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 489 if (tcg_init_ctx.temps[i].mem_base) { 490 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 491 tcg_debug_assert(b >= 0 && b < n); 492 s->temps[i].mem_base = &s->temps[b]; 493 } 494 } 495 496 /* Claim an entry in tcg_ctxs */ 497 n = qatomic_fetch_inc(&tcg_cur_ctxs); 498 g_assert(n < tcg_max_ctxs); 499 qatomic_set(&tcg_ctxs[n], s); 500 501 if (n > 0) { 502 alloc_tcg_plugin_context(s); 503 tcg_region_initial_alloc(s); 504 } 505 506 tcg_ctx = s; 507 } 508 #endif /* !CONFIG_USER_ONLY */ 509 510 /* pool based memory allocation */ 511 void *tcg_malloc_internal(TCGContext *s, int size) 512 { 513 TCGPool *p; 514 int pool_size; 515 516 if (size > TCG_POOL_CHUNK_SIZE) { 517 /* big malloc: insert a new pool (XXX: could optimize) */ 518 p = g_malloc(sizeof(TCGPool) + size); 519 p->size = size; 520 p->next = s->pool_first_large; 521 s->pool_first_large = p; 522 return p->data; 523 } else { 524 p = s->pool_current; 525 if (!p) { 526 p = s->pool_first; 527 if (!p) 528 goto new_pool; 529 } else { 530 if (!p->next) { 531 new_pool: 532 pool_size = TCG_POOL_CHUNK_SIZE; 533 p = g_malloc(sizeof(TCGPool) + pool_size); 534 p->size = pool_size; 535 p->next = NULL; 536 if (s->pool_current) { 537 s->pool_current->next = p; 538 } else { 539 s->pool_first = p; 540 } 541 } else { 542 p = p->next; 543 } 544 } 545 } 546 s->pool_current = p; 547 s->pool_cur = p->data + size; 548 s->pool_end = p->data + p->size; 549 return p->data; 550 } 551 552 void tcg_pool_reset(TCGContext *s) 553 { 554 TCGPool *p, *t; 555 for (p = s->pool_first_large; p; p = t) { 556 t = p->next; 557 g_free(p); 558 } 559 s->pool_first_large = NULL; 560 s->pool_cur = s->pool_end = NULL; 561 s->pool_current = NULL; 562 } 563 564 #include "exec/helper-proto.h" 565 566 static TCGHelperInfo all_helpers[] = { 567 #include "exec/helper-tcg.h" 568 }; 569 static GHashTable *helper_table; 570 571 #ifdef CONFIG_TCG_INTERPRETER 572 static ffi_type *typecode_to_ffi(int argmask) 573 { 574 /* 575 * libffi does not support __int128_t, so we have forced Int128 576 * to use the structure definition instead of the builtin type. 577 */ 578 static ffi_type *ffi_type_i128_elements[3] = { 579 &ffi_type_uint64, 580 &ffi_type_uint64, 581 NULL 582 }; 583 static ffi_type ffi_type_i128 = { 584 .size = 16, 585 .alignment = __alignof__(Int128), 586 .type = FFI_TYPE_STRUCT, 587 .elements = ffi_type_i128_elements, 588 }; 589 590 switch (argmask) { 591 case dh_typecode_void: 592 return &ffi_type_void; 593 case dh_typecode_i32: 594 return &ffi_type_uint32; 595 case dh_typecode_s32: 596 return &ffi_type_sint32; 597 case dh_typecode_i64: 598 return &ffi_type_uint64; 599 case dh_typecode_s64: 600 return &ffi_type_sint64; 601 case dh_typecode_ptr: 602 return &ffi_type_pointer; 603 case dh_typecode_i128: 604 return &ffi_type_i128; 605 } 606 g_assert_not_reached(); 607 } 608 609 static void init_ffi_layouts(void) 610 { 611 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 612 GHashTable *ffi_table = g_hash_table_new(NULL, NULL); 613 614 for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 615 TCGHelperInfo *info = &all_helpers[i]; 616 unsigned typemask = info->typemask; 617 gpointer hash = (gpointer)(uintptr_t)typemask; 618 struct { 619 ffi_cif cif; 620 ffi_type *args[]; 621 } *ca; 622 ffi_status status; 623 int nargs; 624 ffi_cif *cif; 625 626 cif = g_hash_table_lookup(ffi_table, hash); 627 if (cif) { 628 info->cif = cif; 629 continue; 630 } 631 632 /* Ignoring the return type, find the last non-zero field. */ 633 nargs = 32 - clz32(typemask >> 3); 634 nargs = DIV_ROUND_UP(nargs, 3); 635 assert(nargs <= MAX_CALL_IARGS); 636 637 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 638 ca->cif.rtype = typecode_to_ffi(typemask & 7); 639 ca->cif.nargs = nargs; 640 641 if (nargs != 0) { 642 ca->cif.arg_types = ca->args; 643 for (int j = 0; j < nargs; ++j) { 644 int typecode = extract32(typemask, (j + 1) * 3, 3); 645 ca->args[j] = typecode_to_ffi(typecode); 646 } 647 } 648 649 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 650 ca->cif.rtype, ca->cif.arg_types); 651 assert(status == FFI_OK); 652 653 cif = &ca->cif; 654 info->cif = cif; 655 g_hash_table_insert(ffi_table, hash, (gpointer)cif); 656 } 657 658 g_hash_table_destroy(ffi_table); 659 } 660 #endif /* CONFIG_TCG_INTERPRETER */ 661 662 typedef struct TCGCumulativeArgs { 663 int arg_idx; /* tcg_gen_callN args[] */ 664 int info_in_idx; /* TCGHelperInfo in[] */ 665 int arg_slot; /* regs+stack slot */ 666 int ref_slot; /* stack slots for references */ 667 } TCGCumulativeArgs; 668 669 static void layout_arg_even(TCGCumulativeArgs *cum) 670 { 671 cum->arg_slot += cum->arg_slot & 1; 672 } 673 674 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 675 TCGCallArgumentKind kind) 676 { 677 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 678 679 *loc = (TCGCallArgumentLoc){ 680 .kind = kind, 681 .arg_idx = cum->arg_idx, 682 .arg_slot = cum->arg_slot, 683 }; 684 cum->info_in_idx++; 685 cum->arg_slot++; 686 } 687 688 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 689 TCGHelperInfo *info, int n) 690 { 691 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 692 693 for (int i = 0; i < n; ++i) { 694 /* Layout all using the same arg_idx, adjusting the subindex. */ 695 loc[i] = (TCGCallArgumentLoc){ 696 .kind = TCG_CALL_ARG_NORMAL, 697 .arg_idx = cum->arg_idx, 698 .tmp_subindex = i, 699 .arg_slot = cum->arg_slot + i, 700 }; 701 } 702 cum->info_in_idx += n; 703 cum->arg_slot += n; 704 } 705 706 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) 707 { 708 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 709 int n = 128 / TCG_TARGET_REG_BITS; 710 711 /* The first subindex carries the pointer. */ 712 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); 713 714 /* 715 * The callee is allowed to clobber memory associated with 716 * structure pass by-reference. Therefore we must make copies. 717 * Allocate space from "ref_slot", which will be adjusted to 718 * follow the parameters on the stack. 719 */ 720 loc[0].ref_slot = cum->ref_slot; 721 722 /* 723 * Subsequent words also go into the reference slot, but 724 * do not accumulate into the regular arguments. 725 */ 726 for (int i = 1; i < n; ++i) { 727 loc[i] = (TCGCallArgumentLoc){ 728 .kind = TCG_CALL_ARG_BY_REF_N, 729 .arg_idx = cum->arg_idx, 730 .tmp_subindex = i, 731 .ref_slot = cum->ref_slot + i, 732 }; 733 } 734 cum->info_in_idx += n; 735 cum->ref_slot += n; 736 } 737 738 static void init_call_layout(TCGHelperInfo *info) 739 { 740 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 741 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 742 unsigned typemask = info->typemask; 743 unsigned typecode; 744 TCGCumulativeArgs cum = { }; 745 746 /* 747 * Parse and place any function return value. 748 */ 749 typecode = typemask & 7; 750 switch (typecode) { 751 case dh_typecode_void: 752 info->nr_out = 0; 753 break; 754 case dh_typecode_i32: 755 case dh_typecode_s32: 756 case dh_typecode_ptr: 757 info->nr_out = 1; 758 info->out_kind = TCG_CALL_RET_NORMAL; 759 break; 760 case dh_typecode_i64: 761 case dh_typecode_s64: 762 info->nr_out = 64 / TCG_TARGET_REG_BITS; 763 info->out_kind = TCG_CALL_RET_NORMAL; 764 /* Query the last register now to trigger any assert early. */ 765 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 766 break; 767 case dh_typecode_i128: 768 info->nr_out = 128 / TCG_TARGET_REG_BITS; 769 info->out_kind = TCG_TARGET_CALL_RET_I128; 770 switch (TCG_TARGET_CALL_RET_I128) { 771 case TCG_CALL_RET_NORMAL: 772 /* Query the last register now to trigger any assert early. */ 773 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); 774 break; 775 case TCG_CALL_RET_BY_VEC: 776 /* Query the single register now to trigger any assert early. */ 777 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); 778 break; 779 case TCG_CALL_RET_BY_REF: 780 /* 781 * Allocate the first argument to the output. 782 * We don't need to store this anywhere, just make it 783 * unavailable for use in the input loop below. 784 */ 785 cum.arg_slot = 1; 786 break; 787 default: 788 qemu_build_not_reached(); 789 } 790 break; 791 default: 792 g_assert_not_reached(); 793 } 794 795 /* 796 * Parse and place function arguments. 797 */ 798 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 799 TCGCallArgumentKind kind; 800 TCGType type; 801 802 typecode = typemask & 7; 803 switch (typecode) { 804 case dh_typecode_i32: 805 case dh_typecode_s32: 806 type = TCG_TYPE_I32; 807 break; 808 case dh_typecode_i64: 809 case dh_typecode_s64: 810 type = TCG_TYPE_I64; 811 break; 812 case dh_typecode_ptr: 813 type = TCG_TYPE_PTR; 814 break; 815 case dh_typecode_i128: 816 type = TCG_TYPE_I128; 817 break; 818 default: 819 g_assert_not_reached(); 820 } 821 822 switch (type) { 823 case TCG_TYPE_I32: 824 switch (TCG_TARGET_CALL_ARG_I32) { 825 case TCG_CALL_ARG_EVEN: 826 layout_arg_even(&cum); 827 /* fall through */ 828 case TCG_CALL_ARG_NORMAL: 829 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 830 break; 831 case TCG_CALL_ARG_EXTEND: 832 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 833 layout_arg_1(&cum, info, kind); 834 break; 835 default: 836 qemu_build_not_reached(); 837 } 838 break; 839 840 case TCG_TYPE_I64: 841 switch (TCG_TARGET_CALL_ARG_I64) { 842 case TCG_CALL_ARG_EVEN: 843 layout_arg_even(&cum); 844 /* fall through */ 845 case TCG_CALL_ARG_NORMAL: 846 if (TCG_TARGET_REG_BITS == 32) { 847 layout_arg_normal_n(&cum, info, 2); 848 } else { 849 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 850 } 851 break; 852 default: 853 qemu_build_not_reached(); 854 } 855 break; 856 857 case TCG_TYPE_I128: 858 switch (TCG_TARGET_CALL_ARG_I128) { 859 case TCG_CALL_ARG_EVEN: 860 layout_arg_even(&cum); 861 /* fall through */ 862 case TCG_CALL_ARG_NORMAL: 863 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); 864 break; 865 case TCG_CALL_ARG_BY_REF: 866 layout_arg_by_ref(&cum, info); 867 break; 868 default: 869 qemu_build_not_reached(); 870 } 871 break; 872 873 default: 874 g_assert_not_reached(); 875 } 876 } 877 info->nr_in = cum.info_in_idx; 878 879 /* Validate that we didn't overrun the input array. */ 880 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 881 /* Validate the backend has enough argument space. */ 882 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 883 884 /* 885 * Relocate the "ref_slot" area to the end of the parameters. 886 * Minimizing this stack offset helps code size for x86, 887 * which has a signed 8-bit offset encoding. 888 */ 889 if (cum.ref_slot != 0) { 890 int ref_base = 0; 891 892 if (cum.arg_slot > max_reg_slots) { 893 int align = __alignof(Int128) / sizeof(tcg_target_long); 894 895 ref_base = cum.arg_slot - max_reg_slots; 896 if (align > 1) { 897 ref_base = ROUND_UP(ref_base, align); 898 } 899 } 900 assert(ref_base + cum.ref_slot <= max_stk_slots); 901 902 if (ref_base != 0) { 903 for (int i = cum.info_in_idx - 1; i >= 0; --i) { 904 TCGCallArgumentLoc *loc = &info->in[i]; 905 switch (loc->kind) { 906 case TCG_CALL_ARG_BY_REF: 907 case TCG_CALL_ARG_BY_REF_N: 908 loc->ref_slot += ref_base; 909 break; 910 default: 911 break; 912 } 913 } 914 } 915 } 916 } 917 918 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 919 static void process_op_defs(TCGContext *s); 920 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 921 TCGReg reg, const char *name); 922 923 static void tcg_context_init(unsigned max_cpus) 924 { 925 TCGContext *s = &tcg_init_ctx; 926 int op, total_args, n, i; 927 TCGOpDef *def; 928 TCGArgConstraint *args_ct; 929 TCGTemp *ts; 930 931 memset(s, 0, sizeof(*s)); 932 s->nb_globals = 0; 933 934 /* Count total number of arguments and allocate the corresponding 935 space */ 936 total_args = 0; 937 for(op = 0; op < NB_OPS; op++) { 938 def = &tcg_op_defs[op]; 939 n = def->nb_iargs + def->nb_oargs; 940 total_args += n; 941 } 942 943 args_ct = g_new0(TCGArgConstraint, total_args); 944 945 for(op = 0; op < NB_OPS; op++) { 946 def = &tcg_op_defs[op]; 947 def->args_ct = args_ct; 948 n = def->nb_iargs + def->nb_oargs; 949 args_ct += n; 950 } 951 952 /* Register helpers. */ 953 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 954 helper_table = g_hash_table_new(NULL, NULL); 955 956 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 957 init_call_layout(&all_helpers[i]); 958 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 959 (gpointer)&all_helpers[i]); 960 } 961 962 #ifdef CONFIG_TCG_INTERPRETER 963 init_ffi_layouts(); 964 #endif 965 966 tcg_target_init(s); 967 process_op_defs(s); 968 969 /* Reverse the order of the saved registers, assuming they're all at 970 the start of tcg_target_reg_alloc_order. */ 971 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 972 int r = tcg_target_reg_alloc_order[n]; 973 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 974 break; 975 } 976 } 977 for (i = 0; i < n; ++i) { 978 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 979 } 980 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 981 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 982 } 983 984 alloc_tcg_plugin_context(s); 985 986 tcg_ctx = s; 987 /* 988 * In user-mode we simply share the init context among threads, since we 989 * use a single region. See the documentation tcg_region_init() for the 990 * reasoning behind this. 991 * In softmmu we will have at most max_cpus TCG threads. 992 */ 993 #ifdef CONFIG_USER_ONLY 994 tcg_ctxs = &tcg_ctx; 995 tcg_cur_ctxs = 1; 996 tcg_max_ctxs = 1; 997 #else 998 tcg_max_ctxs = max_cpus; 999 tcg_ctxs = g_new0(TCGContext *, max_cpus); 1000 #endif 1001 1002 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1003 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1004 cpu_env = temp_tcgv_ptr(ts); 1005 } 1006 1007 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 1008 { 1009 tcg_context_init(max_cpus); 1010 tcg_region_init(tb_size, splitwx, max_cpus); 1011 } 1012 1013 /* 1014 * Allocate TBs right before their corresponding translated code, making 1015 * sure that TBs and code are on different cache lines. 1016 */ 1017 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1018 { 1019 uintptr_t align = qemu_icache_linesize; 1020 TranslationBlock *tb; 1021 void *next; 1022 1023 retry: 1024 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1025 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1026 1027 if (unlikely(next > s->code_gen_highwater)) { 1028 if (tcg_region_alloc(s)) { 1029 return NULL; 1030 } 1031 goto retry; 1032 } 1033 qatomic_set(&s->code_gen_ptr, next); 1034 s->data_gen_ptr = NULL; 1035 return tb; 1036 } 1037 1038 void tcg_prologue_init(TCGContext *s) 1039 { 1040 size_t prologue_size; 1041 1042 s->code_ptr = s->code_gen_ptr; 1043 s->code_buf = s->code_gen_ptr; 1044 s->data_gen_ptr = NULL; 1045 1046 #ifndef CONFIG_TCG_INTERPRETER 1047 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 1048 #endif 1049 1050 #ifdef TCG_TARGET_NEED_POOL_LABELS 1051 s->pool_labels = NULL; 1052 #endif 1053 1054 qemu_thread_jit_write(); 1055 /* Generate the prologue. */ 1056 tcg_target_qemu_prologue(s); 1057 1058 #ifdef TCG_TARGET_NEED_POOL_LABELS 1059 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1060 { 1061 int result = tcg_out_pool_finalize(s); 1062 tcg_debug_assert(result == 0); 1063 } 1064 #endif 1065 1066 prologue_size = tcg_current_code_size(s); 1067 perf_report_prologue(s->code_gen_ptr, prologue_size); 1068 1069 #ifndef CONFIG_TCG_INTERPRETER 1070 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 1071 (uintptr_t)s->code_buf, prologue_size); 1072 #endif 1073 1074 #ifdef DEBUG_DISAS 1075 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1076 FILE *logfile = qemu_log_trylock(); 1077 if (logfile) { 1078 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 1079 if (s->data_gen_ptr) { 1080 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 1081 size_t data_size = prologue_size - code_size; 1082 size_t i; 1083 1084 disas(logfile, s->code_gen_ptr, code_size); 1085 1086 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1087 if (sizeof(tcg_target_ulong) == 8) { 1088 fprintf(logfile, 1089 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1090 (uintptr_t)s->data_gen_ptr + i, 1091 *(uint64_t *)(s->data_gen_ptr + i)); 1092 } else { 1093 fprintf(logfile, 1094 "0x%08" PRIxPTR ": .long 0x%08x\n", 1095 (uintptr_t)s->data_gen_ptr + i, 1096 *(uint32_t *)(s->data_gen_ptr + i)); 1097 } 1098 } 1099 } else { 1100 disas(logfile, s->code_gen_ptr, prologue_size); 1101 } 1102 fprintf(logfile, "\n"); 1103 qemu_log_unlock(logfile); 1104 } 1105 } 1106 #endif 1107 1108 #ifndef CONFIG_TCG_INTERPRETER 1109 /* 1110 * Assert that goto_ptr is implemented completely, setting an epilogue. 1111 * For tci, we use NULL as the signal to return from the interpreter, 1112 * so skip this check. 1113 */ 1114 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1115 #endif 1116 1117 tcg_region_prologue_set(s); 1118 } 1119 1120 void tcg_func_start(TCGContext *s) 1121 { 1122 tcg_pool_reset(s); 1123 s->nb_temps = s->nb_globals; 1124 1125 /* No temps have been previously allocated for size or locality. */ 1126 memset(s->free_temps, 0, sizeof(s->free_temps)); 1127 1128 /* No constant temps have been previously allocated. */ 1129 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1130 if (s->const_table[i]) { 1131 g_hash_table_remove_all(s->const_table[i]); 1132 } 1133 } 1134 1135 s->nb_ops = 0; 1136 s->nb_labels = 0; 1137 s->current_frame_offset = s->frame_start; 1138 1139 #ifdef CONFIG_DEBUG_TCG 1140 s->goto_tb_issue_mask = 0; 1141 #endif 1142 1143 QTAILQ_INIT(&s->ops); 1144 QTAILQ_INIT(&s->free_ops); 1145 QSIMPLEQ_INIT(&s->labels); 1146 } 1147 1148 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1149 { 1150 int n = s->nb_temps++; 1151 1152 if (n >= TCG_MAX_TEMPS) { 1153 tcg_raise_tb_overflow(s); 1154 } 1155 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1156 } 1157 1158 static TCGTemp *tcg_global_alloc(TCGContext *s) 1159 { 1160 TCGTemp *ts; 1161 1162 tcg_debug_assert(s->nb_globals == s->nb_temps); 1163 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1164 s->nb_globals++; 1165 ts = tcg_temp_alloc(s); 1166 ts->kind = TEMP_GLOBAL; 1167 1168 return ts; 1169 } 1170 1171 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1172 TCGReg reg, const char *name) 1173 { 1174 TCGTemp *ts; 1175 1176 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1177 tcg_abort(); 1178 } 1179 1180 ts = tcg_global_alloc(s); 1181 ts->base_type = type; 1182 ts->type = type; 1183 ts->kind = TEMP_FIXED; 1184 ts->reg = reg; 1185 ts->name = name; 1186 tcg_regset_set_reg(s->reserved_regs, reg); 1187 1188 return ts; 1189 } 1190 1191 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1192 { 1193 s->frame_start = start; 1194 s->frame_end = start + size; 1195 s->frame_temp 1196 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1197 } 1198 1199 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1200 intptr_t offset, const char *name) 1201 { 1202 TCGContext *s = tcg_ctx; 1203 TCGTemp *base_ts = tcgv_ptr_temp(base); 1204 TCGTemp *ts = tcg_global_alloc(s); 1205 int indirect_reg = 0; 1206 1207 switch (base_ts->kind) { 1208 case TEMP_FIXED: 1209 break; 1210 case TEMP_GLOBAL: 1211 /* We do not support double-indirect registers. */ 1212 tcg_debug_assert(!base_ts->indirect_reg); 1213 base_ts->indirect_base = 1; 1214 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1215 ? 2 : 1); 1216 indirect_reg = 1; 1217 break; 1218 default: 1219 g_assert_not_reached(); 1220 } 1221 1222 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1223 TCGTemp *ts2 = tcg_global_alloc(s); 1224 char buf[64]; 1225 1226 ts->base_type = TCG_TYPE_I64; 1227 ts->type = TCG_TYPE_I32; 1228 ts->indirect_reg = indirect_reg; 1229 ts->mem_allocated = 1; 1230 ts->mem_base = base_ts; 1231 ts->mem_offset = offset; 1232 pstrcpy(buf, sizeof(buf), name); 1233 pstrcat(buf, sizeof(buf), "_0"); 1234 ts->name = strdup(buf); 1235 1236 tcg_debug_assert(ts2 == ts + 1); 1237 ts2->base_type = TCG_TYPE_I64; 1238 ts2->type = TCG_TYPE_I32; 1239 ts2->indirect_reg = indirect_reg; 1240 ts2->mem_allocated = 1; 1241 ts2->mem_base = base_ts; 1242 ts2->mem_offset = offset + 4; 1243 ts2->temp_subindex = 1; 1244 pstrcpy(buf, sizeof(buf), name); 1245 pstrcat(buf, sizeof(buf), "_1"); 1246 ts2->name = strdup(buf); 1247 } else { 1248 ts->base_type = type; 1249 ts->type = type; 1250 ts->indirect_reg = indirect_reg; 1251 ts->mem_allocated = 1; 1252 ts->mem_base = base_ts; 1253 ts->mem_offset = offset; 1254 ts->name = name; 1255 } 1256 return ts; 1257 } 1258 1259 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind) 1260 { 1261 TCGContext *s = tcg_ctx; 1262 TCGTemp *ts; 1263 int n; 1264 1265 if (kind == TEMP_EBB) { 1266 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS); 1267 1268 if (idx < TCG_MAX_TEMPS) { 1269 /* There is already an available temp with the right type. */ 1270 clear_bit(idx, s->free_temps[type].l); 1271 1272 ts = &s->temps[idx]; 1273 ts->temp_allocated = 1; 1274 tcg_debug_assert(ts->base_type == type); 1275 tcg_debug_assert(ts->kind == kind); 1276 return ts; 1277 } 1278 } else { 1279 tcg_debug_assert(kind == TEMP_TB); 1280 } 1281 1282 switch (type) { 1283 case TCG_TYPE_I32: 1284 case TCG_TYPE_V64: 1285 case TCG_TYPE_V128: 1286 case TCG_TYPE_V256: 1287 n = 1; 1288 break; 1289 case TCG_TYPE_I64: 1290 n = 64 / TCG_TARGET_REG_BITS; 1291 break; 1292 case TCG_TYPE_I128: 1293 n = 128 / TCG_TARGET_REG_BITS; 1294 break; 1295 default: 1296 g_assert_not_reached(); 1297 } 1298 1299 ts = tcg_temp_alloc(s); 1300 ts->base_type = type; 1301 ts->temp_allocated = 1; 1302 ts->kind = kind; 1303 1304 if (n == 1) { 1305 ts->type = type; 1306 } else { 1307 ts->type = TCG_TYPE_REG; 1308 1309 for (int i = 1; i < n; ++i) { 1310 TCGTemp *ts2 = tcg_temp_alloc(s); 1311 1312 tcg_debug_assert(ts2 == ts + i); 1313 ts2->base_type = type; 1314 ts2->type = TCG_TYPE_REG; 1315 ts2->temp_allocated = 1; 1316 ts2->temp_subindex = i; 1317 ts2->kind = kind; 1318 } 1319 } 1320 return ts; 1321 } 1322 1323 TCGv_vec tcg_temp_new_vec(TCGType type) 1324 { 1325 TCGTemp *t; 1326 1327 #ifdef CONFIG_DEBUG_TCG 1328 switch (type) { 1329 case TCG_TYPE_V64: 1330 assert(TCG_TARGET_HAS_v64); 1331 break; 1332 case TCG_TYPE_V128: 1333 assert(TCG_TARGET_HAS_v128); 1334 break; 1335 case TCG_TYPE_V256: 1336 assert(TCG_TARGET_HAS_v256); 1337 break; 1338 default: 1339 g_assert_not_reached(); 1340 } 1341 #endif 1342 1343 t = tcg_temp_new_internal(type, TEMP_EBB); 1344 return temp_tcgv_vec(t); 1345 } 1346 1347 /* Create a new temp of the same type as an existing temp. */ 1348 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1349 { 1350 TCGTemp *t = tcgv_vec_temp(match); 1351 1352 tcg_debug_assert(t->temp_allocated != 0); 1353 1354 t = tcg_temp_new_internal(t->base_type, TEMP_EBB); 1355 return temp_tcgv_vec(t); 1356 } 1357 1358 void tcg_temp_free_internal(TCGTemp *ts) 1359 { 1360 TCGContext *s = tcg_ctx; 1361 1362 switch (ts->kind) { 1363 case TEMP_CONST: 1364 case TEMP_TB: 1365 /* Silently ignore free. */ 1366 break; 1367 case TEMP_EBB: 1368 tcg_debug_assert(ts->temp_allocated != 0); 1369 ts->temp_allocated = 0; 1370 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l); 1371 break; 1372 default: 1373 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */ 1374 g_assert_not_reached(); 1375 } 1376 } 1377 1378 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1379 { 1380 TCGContext *s = tcg_ctx; 1381 GHashTable *h = s->const_table[type]; 1382 TCGTemp *ts; 1383 1384 if (h == NULL) { 1385 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1386 s->const_table[type] = h; 1387 } 1388 1389 ts = g_hash_table_lookup(h, &val); 1390 if (ts == NULL) { 1391 int64_t *val_ptr; 1392 1393 ts = tcg_temp_alloc(s); 1394 1395 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1396 TCGTemp *ts2 = tcg_temp_alloc(s); 1397 1398 tcg_debug_assert(ts2 == ts + 1); 1399 1400 ts->base_type = TCG_TYPE_I64; 1401 ts->type = TCG_TYPE_I32; 1402 ts->kind = TEMP_CONST; 1403 ts->temp_allocated = 1; 1404 1405 ts2->base_type = TCG_TYPE_I64; 1406 ts2->type = TCG_TYPE_I32; 1407 ts2->kind = TEMP_CONST; 1408 ts2->temp_allocated = 1; 1409 ts2->temp_subindex = 1; 1410 1411 /* 1412 * Retain the full value of the 64-bit constant in the low 1413 * part, so that the hash table works. Actual uses will 1414 * truncate the value to the low part. 1415 */ 1416 ts[HOST_BIG_ENDIAN].val = val; 1417 ts[!HOST_BIG_ENDIAN].val = val >> 32; 1418 val_ptr = &ts[HOST_BIG_ENDIAN].val; 1419 } else { 1420 ts->base_type = type; 1421 ts->type = type; 1422 ts->kind = TEMP_CONST; 1423 ts->temp_allocated = 1; 1424 ts->val = val; 1425 val_ptr = &ts->val; 1426 } 1427 g_hash_table_insert(h, val_ptr, ts); 1428 } 1429 1430 return ts; 1431 } 1432 1433 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1434 { 1435 val = dup_const(vece, val); 1436 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1437 } 1438 1439 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1440 { 1441 TCGTemp *t = tcgv_vec_temp(match); 1442 1443 tcg_debug_assert(t->temp_allocated != 0); 1444 return tcg_constant_vec(t->base_type, vece, val); 1445 } 1446 1447 TCGv_i32 tcg_const_i32(int32_t val) 1448 { 1449 TCGv_i32 t0; 1450 t0 = tcg_temp_new_i32(); 1451 tcg_gen_movi_i32(t0, val); 1452 return t0; 1453 } 1454 1455 TCGv_i64 tcg_const_i64(int64_t val) 1456 { 1457 TCGv_i64 t0; 1458 t0 = tcg_temp_new_i64(); 1459 tcg_gen_movi_i64(t0, val); 1460 return t0; 1461 } 1462 1463 /* Return true if OP may appear in the opcode stream. 1464 Test the runtime variable that controls each opcode. */ 1465 bool tcg_op_supported(TCGOpcode op) 1466 { 1467 const bool have_vec 1468 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1469 1470 switch (op) { 1471 case INDEX_op_discard: 1472 case INDEX_op_set_label: 1473 case INDEX_op_call: 1474 case INDEX_op_br: 1475 case INDEX_op_mb: 1476 case INDEX_op_insn_start: 1477 case INDEX_op_exit_tb: 1478 case INDEX_op_goto_tb: 1479 case INDEX_op_goto_ptr: 1480 case INDEX_op_qemu_ld_i32: 1481 case INDEX_op_qemu_st_i32: 1482 case INDEX_op_qemu_ld_i64: 1483 case INDEX_op_qemu_st_i64: 1484 return true; 1485 1486 case INDEX_op_qemu_st8_i32: 1487 return TCG_TARGET_HAS_qemu_st8_i32; 1488 1489 case INDEX_op_mov_i32: 1490 case INDEX_op_setcond_i32: 1491 case INDEX_op_brcond_i32: 1492 case INDEX_op_ld8u_i32: 1493 case INDEX_op_ld8s_i32: 1494 case INDEX_op_ld16u_i32: 1495 case INDEX_op_ld16s_i32: 1496 case INDEX_op_ld_i32: 1497 case INDEX_op_st8_i32: 1498 case INDEX_op_st16_i32: 1499 case INDEX_op_st_i32: 1500 case INDEX_op_add_i32: 1501 case INDEX_op_sub_i32: 1502 case INDEX_op_mul_i32: 1503 case INDEX_op_and_i32: 1504 case INDEX_op_or_i32: 1505 case INDEX_op_xor_i32: 1506 case INDEX_op_shl_i32: 1507 case INDEX_op_shr_i32: 1508 case INDEX_op_sar_i32: 1509 return true; 1510 1511 case INDEX_op_movcond_i32: 1512 return TCG_TARGET_HAS_movcond_i32; 1513 case INDEX_op_div_i32: 1514 case INDEX_op_divu_i32: 1515 return TCG_TARGET_HAS_div_i32; 1516 case INDEX_op_rem_i32: 1517 case INDEX_op_remu_i32: 1518 return TCG_TARGET_HAS_rem_i32; 1519 case INDEX_op_div2_i32: 1520 case INDEX_op_divu2_i32: 1521 return TCG_TARGET_HAS_div2_i32; 1522 case INDEX_op_rotl_i32: 1523 case INDEX_op_rotr_i32: 1524 return TCG_TARGET_HAS_rot_i32; 1525 case INDEX_op_deposit_i32: 1526 return TCG_TARGET_HAS_deposit_i32; 1527 case INDEX_op_extract_i32: 1528 return TCG_TARGET_HAS_extract_i32; 1529 case INDEX_op_sextract_i32: 1530 return TCG_TARGET_HAS_sextract_i32; 1531 case INDEX_op_extract2_i32: 1532 return TCG_TARGET_HAS_extract2_i32; 1533 case INDEX_op_add2_i32: 1534 return TCG_TARGET_HAS_add2_i32; 1535 case INDEX_op_sub2_i32: 1536 return TCG_TARGET_HAS_sub2_i32; 1537 case INDEX_op_mulu2_i32: 1538 return TCG_TARGET_HAS_mulu2_i32; 1539 case INDEX_op_muls2_i32: 1540 return TCG_TARGET_HAS_muls2_i32; 1541 case INDEX_op_muluh_i32: 1542 return TCG_TARGET_HAS_muluh_i32; 1543 case INDEX_op_mulsh_i32: 1544 return TCG_TARGET_HAS_mulsh_i32; 1545 case INDEX_op_ext8s_i32: 1546 return TCG_TARGET_HAS_ext8s_i32; 1547 case INDEX_op_ext16s_i32: 1548 return TCG_TARGET_HAS_ext16s_i32; 1549 case INDEX_op_ext8u_i32: 1550 return TCG_TARGET_HAS_ext8u_i32; 1551 case INDEX_op_ext16u_i32: 1552 return TCG_TARGET_HAS_ext16u_i32; 1553 case INDEX_op_bswap16_i32: 1554 return TCG_TARGET_HAS_bswap16_i32; 1555 case INDEX_op_bswap32_i32: 1556 return TCG_TARGET_HAS_bswap32_i32; 1557 case INDEX_op_not_i32: 1558 return TCG_TARGET_HAS_not_i32; 1559 case INDEX_op_neg_i32: 1560 return TCG_TARGET_HAS_neg_i32; 1561 case INDEX_op_andc_i32: 1562 return TCG_TARGET_HAS_andc_i32; 1563 case INDEX_op_orc_i32: 1564 return TCG_TARGET_HAS_orc_i32; 1565 case INDEX_op_eqv_i32: 1566 return TCG_TARGET_HAS_eqv_i32; 1567 case INDEX_op_nand_i32: 1568 return TCG_TARGET_HAS_nand_i32; 1569 case INDEX_op_nor_i32: 1570 return TCG_TARGET_HAS_nor_i32; 1571 case INDEX_op_clz_i32: 1572 return TCG_TARGET_HAS_clz_i32; 1573 case INDEX_op_ctz_i32: 1574 return TCG_TARGET_HAS_ctz_i32; 1575 case INDEX_op_ctpop_i32: 1576 return TCG_TARGET_HAS_ctpop_i32; 1577 1578 case INDEX_op_brcond2_i32: 1579 case INDEX_op_setcond2_i32: 1580 return TCG_TARGET_REG_BITS == 32; 1581 1582 case INDEX_op_mov_i64: 1583 case INDEX_op_setcond_i64: 1584 case INDEX_op_brcond_i64: 1585 case INDEX_op_ld8u_i64: 1586 case INDEX_op_ld8s_i64: 1587 case INDEX_op_ld16u_i64: 1588 case INDEX_op_ld16s_i64: 1589 case INDEX_op_ld32u_i64: 1590 case INDEX_op_ld32s_i64: 1591 case INDEX_op_ld_i64: 1592 case INDEX_op_st8_i64: 1593 case INDEX_op_st16_i64: 1594 case INDEX_op_st32_i64: 1595 case INDEX_op_st_i64: 1596 case INDEX_op_add_i64: 1597 case INDEX_op_sub_i64: 1598 case INDEX_op_mul_i64: 1599 case INDEX_op_and_i64: 1600 case INDEX_op_or_i64: 1601 case INDEX_op_xor_i64: 1602 case INDEX_op_shl_i64: 1603 case INDEX_op_shr_i64: 1604 case INDEX_op_sar_i64: 1605 case INDEX_op_ext_i32_i64: 1606 case INDEX_op_extu_i32_i64: 1607 return TCG_TARGET_REG_BITS == 64; 1608 1609 case INDEX_op_movcond_i64: 1610 return TCG_TARGET_HAS_movcond_i64; 1611 case INDEX_op_div_i64: 1612 case INDEX_op_divu_i64: 1613 return TCG_TARGET_HAS_div_i64; 1614 case INDEX_op_rem_i64: 1615 case INDEX_op_remu_i64: 1616 return TCG_TARGET_HAS_rem_i64; 1617 case INDEX_op_div2_i64: 1618 case INDEX_op_divu2_i64: 1619 return TCG_TARGET_HAS_div2_i64; 1620 case INDEX_op_rotl_i64: 1621 case INDEX_op_rotr_i64: 1622 return TCG_TARGET_HAS_rot_i64; 1623 case INDEX_op_deposit_i64: 1624 return TCG_TARGET_HAS_deposit_i64; 1625 case INDEX_op_extract_i64: 1626 return TCG_TARGET_HAS_extract_i64; 1627 case INDEX_op_sextract_i64: 1628 return TCG_TARGET_HAS_sextract_i64; 1629 case INDEX_op_extract2_i64: 1630 return TCG_TARGET_HAS_extract2_i64; 1631 case INDEX_op_extrl_i64_i32: 1632 return TCG_TARGET_HAS_extrl_i64_i32; 1633 case INDEX_op_extrh_i64_i32: 1634 return TCG_TARGET_HAS_extrh_i64_i32; 1635 case INDEX_op_ext8s_i64: 1636 return TCG_TARGET_HAS_ext8s_i64; 1637 case INDEX_op_ext16s_i64: 1638 return TCG_TARGET_HAS_ext16s_i64; 1639 case INDEX_op_ext32s_i64: 1640 return TCG_TARGET_HAS_ext32s_i64; 1641 case INDEX_op_ext8u_i64: 1642 return TCG_TARGET_HAS_ext8u_i64; 1643 case INDEX_op_ext16u_i64: 1644 return TCG_TARGET_HAS_ext16u_i64; 1645 case INDEX_op_ext32u_i64: 1646 return TCG_TARGET_HAS_ext32u_i64; 1647 case INDEX_op_bswap16_i64: 1648 return TCG_TARGET_HAS_bswap16_i64; 1649 case INDEX_op_bswap32_i64: 1650 return TCG_TARGET_HAS_bswap32_i64; 1651 case INDEX_op_bswap64_i64: 1652 return TCG_TARGET_HAS_bswap64_i64; 1653 case INDEX_op_not_i64: 1654 return TCG_TARGET_HAS_not_i64; 1655 case INDEX_op_neg_i64: 1656 return TCG_TARGET_HAS_neg_i64; 1657 case INDEX_op_andc_i64: 1658 return TCG_TARGET_HAS_andc_i64; 1659 case INDEX_op_orc_i64: 1660 return TCG_TARGET_HAS_orc_i64; 1661 case INDEX_op_eqv_i64: 1662 return TCG_TARGET_HAS_eqv_i64; 1663 case INDEX_op_nand_i64: 1664 return TCG_TARGET_HAS_nand_i64; 1665 case INDEX_op_nor_i64: 1666 return TCG_TARGET_HAS_nor_i64; 1667 case INDEX_op_clz_i64: 1668 return TCG_TARGET_HAS_clz_i64; 1669 case INDEX_op_ctz_i64: 1670 return TCG_TARGET_HAS_ctz_i64; 1671 case INDEX_op_ctpop_i64: 1672 return TCG_TARGET_HAS_ctpop_i64; 1673 case INDEX_op_add2_i64: 1674 return TCG_TARGET_HAS_add2_i64; 1675 case INDEX_op_sub2_i64: 1676 return TCG_TARGET_HAS_sub2_i64; 1677 case INDEX_op_mulu2_i64: 1678 return TCG_TARGET_HAS_mulu2_i64; 1679 case INDEX_op_muls2_i64: 1680 return TCG_TARGET_HAS_muls2_i64; 1681 case INDEX_op_muluh_i64: 1682 return TCG_TARGET_HAS_muluh_i64; 1683 case INDEX_op_mulsh_i64: 1684 return TCG_TARGET_HAS_mulsh_i64; 1685 1686 case INDEX_op_mov_vec: 1687 case INDEX_op_dup_vec: 1688 case INDEX_op_dupm_vec: 1689 case INDEX_op_ld_vec: 1690 case INDEX_op_st_vec: 1691 case INDEX_op_add_vec: 1692 case INDEX_op_sub_vec: 1693 case INDEX_op_and_vec: 1694 case INDEX_op_or_vec: 1695 case INDEX_op_xor_vec: 1696 case INDEX_op_cmp_vec: 1697 return have_vec; 1698 case INDEX_op_dup2_vec: 1699 return have_vec && TCG_TARGET_REG_BITS == 32; 1700 case INDEX_op_not_vec: 1701 return have_vec && TCG_TARGET_HAS_not_vec; 1702 case INDEX_op_neg_vec: 1703 return have_vec && TCG_TARGET_HAS_neg_vec; 1704 case INDEX_op_abs_vec: 1705 return have_vec && TCG_TARGET_HAS_abs_vec; 1706 case INDEX_op_andc_vec: 1707 return have_vec && TCG_TARGET_HAS_andc_vec; 1708 case INDEX_op_orc_vec: 1709 return have_vec && TCG_TARGET_HAS_orc_vec; 1710 case INDEX_op_nand_vec: 1711 return have_vec && TCG_TARGET_HAS_nand_vec; 1712 case INDEX_op_nor_vec: 1713 return have_vec && TCG_TARGET_HAS_nor_vec; 1714 case INDEX_op_eqv_vec: 1715 return have_vec && TCG_TARGET_HAS_eqv_vec; 1716 case INDEX_op_mul_vec: 1717 return have_vec && TCG_TARGET_HAS_mul_vec; 1718 case INDEX_op_shli_vec: 1719 case INDEX_op_shri_vec: 1720 case INDEX_op_sari_vec: 1721 return have_vec && TCG_TARGET_HAS_shi_vec; 1722 case INDEX_op_shls_vec: 1723 case INDEX_op_shrs_vec: 1724 case INDEX_op_sars_vec: 1725 return have_vec && TCG_TARGET_HAS_shs_vec; 1726 case INDEX_op_shlv_vec: 1727 case INDEX_op_shrv_vec: 1728 case INDEX_op_sarv_vec: 1729 return have_vec && TCG_TARGET_HAS_shv_vec; 1730 case INDEX_op_rotli_vec: 1731 return have_vec && TCG_TARGET_HAS_roti_vec; 1732 case INDEX_op_rotls_vec: 1733 return have_vec && TCG_TARGET_HAS_rots_vec; 1734 case INDEX_op_rotlv_vec: 1735 case INDEX_op_rotrv_vec: 1736 return have_vec && TCG_TARGET_HAS_rotv_vec; 1737 case INDEX_op_ssadd_vec: 1738 case INDEX_op_usadd_vec: 1739 case INDEX_op_sssub_vec: 1740 case INDEX_op_ussub_vec: 1741 return have_vec && TCG_TARGET_HAS_sat_vec; 1742 case INDEX_op_smin_vec: 1743 case INDEX_op_umin_vec: 1744 case INDEX_op_smax_vec: 1745 case INDEX_op_umax_vec: 1746 return have_vec && TCG_TARGET_HAS_minmax_vec; 1747 case INDEX_op_bitsel_vec: 1748 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1749 case INDEX_op_cmpsel_vec: 1750 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1751 1752 default: 1753 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1754 return true; 1755 } 1756 } 1757 1758 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 1759 1760 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1761 { 1762 const TCGHelperInfo *info; 1763 TCGv_i64 extend_free[MAX_CALL_IARGS]; 1764 int n_extend = 0; 1765 TCGOp *op; 1766 int i, n, pi = 0, total_args; 1767 1768 info = g_hash_table_lookup(helper_table, (gpointer)func); 1769 total_args = info->nr_out + info->nr_in + 2; 1770 op = tcg_op_alloc(INDEX_op_call, total_args); 1771 1772 #ifdef CONFIG_PLUGIN 1773 /* Flag helpers that may affect guest state */ 1774 if (tcg_ctx->plugin_insn && 1775 !(info->flags & TCG_CALL_PLUGIN) && 1776 !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) { 1777 tcg_ctx->plugin_insn->calls_helpers = true; 1778 } 1779 #endif 1780 1781 TCGOP_CALLO(op) = n = info->nr_out; 1782 switch (n) { 1783 case 0: 1784 tcg_debug_assert(ret == NULL); 1785 break; 1786 case 1: 1787 tcg_debug_assert(ret != NULL); 1788 op->args[pi++] = temp_arg(ret); 1789 break; 1790 case 2: 1791 case 4: 1792 tcg_debug_assert(ret != NULL); 1793 tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); 1794 tcg_debug_assert(ret->temp_subindex == 0); 1795 for (i = 0; i < n; ++i) { 1796 op->args[pi++] = temp_arg(ret + i); 1797 } 1798 break; 1799 default: 1800 g_assert_not_reached(); 1801 } 1802 1803 TCGOP_CALLI(op) = n = info->nr_in; 1804 for (i = 0; i < n; i++) { 1805 const TCGCallArgumentLoc *loc = &info->in[i]; 1806 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 1807 1808 switch (loc->kind) { 1809 case TCG_CALL_ARG_NORMAL: 1810 case TCG_CALL_ARG_BY_REF: 1811 case TCG_CALL_ARG_BY_REF_N: 1812 op->args[pi++] = temp_arg(ts); 1813 break; 1814 1815 case TCG_CALL_ARG_EXTEND_U: 1816 case TCG_CALL_ARG_EXTEND_S: 1817 { 1818 TCGv_i64 temp = tcg_temp_ebb_new_i64(); 1819 TCGv_i32 orig = temp_tcgv_i32(ts); 1820 1821 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 1822 tcg_gen_ext_i32_i64(temp, orig); 1823 } else { 1824 tcg_gen_extu_i32_i64(temp, orig); 1825 } 1826 op->args[pi++] = tcgv_i64_arg(temp); 1827 extend_free[n_extend++] = temp; 1828 } 1829 break; 1830 1831 default: 1832 g_assert_not_reached(); 1833 } 1834 } 1835 op->args[pi++] = (uintptr_t)func; 1836 op->args[pi++] = (uintptr_t)info; 1837 tcg_debug_assert(pi == total_args); 1838 1839 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 1840 1841 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 1842 for (i = 0; i < n_extend; ++i) { 1843 tcg_temp_free_i64(extend_free[i]); 1844 } 1845 } 1846 1847 static void tcg_reg_alloc_start(TCGContext *s) 1848 { 1849 int i, n; 1850 1851 for (i = 0, n = s->nb_temps; i < n; i++) { 1852 TCGTemp *ts = &s->temps[i]; 1853 TCGTempVal val = TEMP_VAL_MEM; 1854 1855 switch (ts->kind) { 1856 case TEMP_CONST: 1857 val = TEMP_VAL_CONST; 1858 break; 1859 case TEMP_FIXED: 1860 val = TEMP_VAL_REG; 1861 break; 1862 case TEMP_GLOBAL: 1863 break; 1864 case TEMP_EBB: 1865 val = TEMP_VAL_DEAD; 1866 /* fall through */ 1867 case TEMP_TB: 1868 ts->mem_allocated = 0; 1869 break; 1870 default: 1871 g_assert_not_reached(); 1872 } 1873 ts->val_type = val; 1874 } 1875 1876 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1877 } 1878 1879 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1880 TCGTemp *ts) 1881 { 1882 int idx = temp_idx(ts); 1883 1884 switch (ts->kind) { 1885 case TEMP_FIXED: 1886 case TEMP_GLOBAL: 1887 pstrcpy(buf, buf_size, ts->name); 1888 break; 1889 case TEMP_TB: 1890 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1891 break; 1892 case TEMP_EBB: 1893 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1894 break; 1895 case TEMP_CONST: 1896 switch (ts->type) { 1897 case TCG_TYPE_I32: 1898 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 1899 break; 1900 #if TCG_TARGET_REG_BITS > 32 1901 case TCG_TYPE_I64: 1902 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 1903 break; 1904 #endif 1905 case TCG_TYPE_V64: 1906 case TCG_TYPE_V128: 1907 case TCG_TYPE_V256: 1908 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 1909 64 << (ts->type - TCG_TYPE_V64), ts->val); 1910 break; 1911 default: 1912 g_assert_not_reached(); 1913 } 1914 break; 1915 } 1916 return buf; 1917 } 1918 1919 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1920 int buf_size, TCGArg arg) 1921 { 1922 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1923 } 1924 1925 static const char * const cond_name[] = 1926 { 1927 [TCG_COND_NEVER] = "never", 1928 [TCG_COND_ALWAYS] = "always", 1929 [TCG_COND_EQ] = "eq", 1930 [TCG_COND_NE] = "ne", 1931 [TCG_COND_LT] = "lt", 1932 [TCG_COND_GE] = "ge", 1933 [TCG_COND_LE] = "le", 1934 [TCG_COND_GT] = "gt", 1935 [TCG_COND_LTU] = "ltu", 1936 [TCG_COND_GEU] = "geu", 1937 [TCG_COND_LEU] = "leu", 1938 [TCG_COND_GTU] = "gtu" 1939 }; 1940 1941 static const char * const ldst_name[] = 1942 { 1943 [MO_UB] = "ub", 1944 [MO_SB] = "sb", 1945 [MO_LEUW] = "leuw", 1946 [MO_LESW] = "lesw", 1947 [MO_LEUL] = "leul", 1948 [MO_LESL] = "lesl", 1949 [MO_LEUQ] = "leq", 1950 [MO_BEUW] = "beuw", 1951 [MO_BESW] = "besw", 1952 [MO_BEUL] = "beul", 1953 [MO_BESL] = "besl", 1954 [MO_BEUQ] = "beq", 1955 }; 1956 1957 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1958 #ifdef TARGET_ALIGNED_ONLY 1959 [MO_UNALN >> MO_ASHIFT] = "un+", 1960 [MO_ALIGN >> MO_ASHIFT] = "", 1961 #else 1962 [MO_UNALN >> MO_ASHIFT] = "", 1963 [MO_ALIGN >> MO_ASHIFT] = "al+", 1964 #endif 1965 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1966 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1967 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1968 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1969 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1970 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1971 }; 1972 1973 static const char bswap_flag_name[][6] = { 1974 [TCG_BSWAP_IZ] = "iz", 1975 [TCG_BSWAP_OZ] = "oz", 1976 [TCG_BSWAP_OS] = "os", 1977 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 1978 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 1979 }; 1980 1981 static inline bool tcg_regset_single(TCGRegSet d) 1982 { 1983 return (d & (d - 1)) == 0; 1984 } 1985 1986 static inline TCGReg tcg_regset_first(TCGRegSet d) 1987 { 1988 if (TCG_TARGET_NB_REGS <= 32) { 1989 return ctz32(d); 1990 } else { 1991 return ctz64(d); 1992 } 1993 } 1994 1995 /* Return only the number of characters output -- no error return. */ 1996 #define ne_fprintf(...) \ 1997 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 1998 1999 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 2000 { 2001 char buf[128]; 2002 TCGOp *op; 2003 2004 QTAILQ_FOREACH(op, &s->ops, link) { 2005 int i, k, nb_oargs, nb_iargs, nb_cargs; 2006 const TCGOpDef *def; 2007 TCGOpcode c; 2008 int col = 0; 2009 2010 c = op->opc; 2011 def = &tcg_op_defs[c]; 2012 2013 if (c == INDEX_op_insn_start) { 2014 nb_oargs = 0; 2015 col += ne_fprintf(f, "\n ----"); 2016 2017 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2018 target_ulong a; 2019 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2020 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 2021 #else 2022 a = op->args[i]; 2023 #endif 2024 col += ne_fprintf(f, " " TARGET_FMT_lx, a); 2025 } 2026 } else if (c == INDEX_op_call) { 2027 const TCGHelperInfo *info = tcg_call_info(op); 2028 void *func = tcg_call_func(op); 2029 2030 /* variable number of arguments */ 2031 nb_oargs = TCGOP_CALLO(op); 2032 nb_iargs = TCGOP_CALLI(op); 2033 nb_cargs = def->nb_cargs; 2034 2035 col += ne_fprintf(f, " %s ", def->name); 2036 2037 /* 2038 * Print the function name from TCGHelperInfo, if available. 2039 * Note that plugins have a template function for the info, 2040 * but the actual function pointer comes from the plugin. 2041 */ 2042 if (func == info->func) { 2043 col += ne_fprintf(f, "%s", info->name); 2044 } else { 2045 col += ne_fprintf(f, "plugin(%p)", func); 2046 } 2047 2048 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 2049 for (i = 0; i < nb_oargs; i++) { 2050 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2051 op->args[i])); 2052 } 2053 for (i = 0; i < nb_iargs; i++) { 2054 TCGArg arg = op->args[nb_oargs + i]; 2055 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2056 col += ne_fprintf(f, ",%s", t); 2057 } 2058 } else { 2059 col += ne_fprintf(f, " %s ", def->name); 2060 2061 nb_oargs = def->nb_oargs; 2062 nb_iargs = def->nb_iargs; 2063 nb_cargs = def->nb_cargs; 2064 2065 if (def->flags & TCG_OPF_VECTOR) { 2066 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 2067 8 << TCGOP_VECE(op)); 2068 } 2069 2070 k = 0; 2071 for (i = 0; i < nb_oargs; i++) { 2072 const char *sep = k ? "," : ""; 2073 col += ne_fprintf(f, "%s%s", sep, 2074 tcg_get_arg_str(s, buf, sizeof(buf), 2075 op->args[k++])); 2076 } 2077 for (i = 0; i < nb_iargs; i++) { 2078 const char *sep = k ? "," : ""; 2079 col += ne_fprintf(f, "%s%s", sep, 2080 tcg_get_arg_str(s, buf, sizeof(buf), 2081 op->args[k++])); 2082 } 2083 switch (c) { 2084 case INDEX_op_brcond_i32: 2085 case INDEX_op_setcond_i32: 2086 case INDEX_op_movcond_i32: 2087 case INDEX_op_brcond2_i32: 2088 case INDEX_op_setcond2_i32: 2089 case INDEX_op_brcond_i64: 2090 case INDEX_op_setcond_i64: 2091 case INDEX_op_movcond_i64: 2092 case INDEX_op_cmp_vec: 2093 case INDEX_op_cmpsel_vec: 2094 if (op->args[k] < ARRAY_SIZE(cond_name) 2095 && cond_name[op->args[k]]) { 2096 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 2097 } else { 2098 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 2099 } 2100 i = 1; 2101 break; 2102 case INDEX_op_qemu_ld_i32: 2103 case INDEX_op_qemu_st_i32: 2104 case INDEX_op_qemu_st8_i32: 2105 case INDEX_op_qemu_ld_i64: 2106 case INDEX_op_qemu_st_i64: 2107 { 2108 MemOpIdx oi = op->args[k++]; 2109 MemOp op = get_memop(oi); 2110 unsigned ix = get_mmuidx(oi); 2111 2112 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2113 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 2114 } else { 2115 const char *s_al, *s_op; 2116 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2117 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2118 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix); 2119 } 2120 i = 1; 2121 } 2122 break; 2123 case INDEX_op_bswap16_i32: 2124 case INDEX_op_bswap16_i64: 2125 case INDEX_op_bswap32_i32: 2126 case INDEX_op_bswap32_i64: 2127 case INDEX_op_bswap64_i64: 2128 { 2129 TCGArg flags = op->args[k]; 2130 const char *name = NULL; 2131 2132 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2133 name = bswap_flag_name[flags]; 2134 } 2135 if (name) { 2136 col += ne_fprintf(f, ",%s", name); 2137 } else { 2138 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2139 } 2140 i = k = 1; 2141 } 2142 break; 2143 default: 2144 i = 0; 2145 break; 2146 } 2147 switch (c) { 2148 case INDEX_op_set_label: 2149 case INDEX_op_br: 2150 case INDEX_op_brcond_i32: 2151 case INDEX_op_brcond_i64: 2152 case INDEX_op_brcond2_i32: 2153 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2154 arg_label(op->args[k])->id); 2155 i++, k++; 2156 break; 2157 case INDEX_op_mb: 2158 { 2159 TCGBar membar = op->args[k]; 2160 const char *b_op, *m_op; 2161 2162 switch (membar & TCG_BAR_SC) { 2163 case 0: 2164 b_op = "none"; 2165 break; 2166 case TCG_BAR_LDAQ: 2167 b_op = "acq"; 2168 break; 2169 case TCG_BAR_STRL: 2170 b_op = "rel"; 2171 break; 2172 case TCG_BAR_SC: 2173 b_op = "seq"; 2174 break; 2175 default: 2176 g_assert_not_reached(); 2177 } 2178 2179 switch (membar & TCG_MO_ALL) { 2180 case 0: 2181 m_op = "none"; 2182 break; 2183 case TCG_MO_LD_LD: 2184 m_op = "rr"; 2185 break; 2186 case TCG_MO_LD_ST: 2187 m_op = "rw"; 2188 break; 2189 case TCG_MO_ST_LD: 2190 m_op = "wr"; 2191 break; 2192 case TCG_MO_ST_ST: 2193 m_op = "ww"; 2194 break; 2195 case TCG_MO_LD_LD | TCG_MO_LD_ST: 2196 m_op = "rr+rw"; 2197 break; 2198 case TCG_MO_LD_LD | TCG_MO_ST_LD: 2199 m_op = "rr+wr"; 2200 break; 2201 case TCG_MO_LD_LD | TCG_MO_ST_ST: 2202 m_op = "rr+ww"; 2203 break; 2204 case TCG_MO_LD_ST | TCG_MO_ST_LD: 2205 m_op = "rw+wr"; 2206 break; 2207 case TCG_MO_LD_ST | TCG_MO_ST_ST: 2208 m_op = "rw+ww"; 2209 break; 2210 case TCG_MO_ST_LD | TCG_MO_ST_ST: 2211 m_op = "wr+ww"; 2212 break; 2213 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD: 2214 m_op = "rr+rw+wr"; 2215 break; 2216 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST: 2217 m_op = "rr+rw+ww"; 2218 break; 2219 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST: 2220 m_op = "rr+wr+ww"; 2221 break; 2222 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST: 2223 m_op = "rw+wr+ww"; 2224 break; 2225 case TCG_MO_ALL: 2226 m_op = "all"; 2227 break; 2228 default: 2229 g_assert_not_reached(); 2230 } 2231 2232 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op); 2233 i++, k++; 2234 } 2235 break; 2236 default: 2237 break; 2238 } 2239 for (; i < nb_cargs; i++, k++) { 2240 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 2241 op->args[k]); 2242 } 2243 } 2244 2245 if (have_prefs || op->life) { 2246 for (; col < 40; ++col) { 2247 putc(' ', f); 2248 } 2249 } 2250 2251 if (op->life) { 2252 unsigned life = op->life; 2253 2254 if (life & (SYNC_ARG * 3)) { 2255 ne_fprintf(f, " sync:"); 2256 for (i = 0; i < 2; ++i) { 2257 if (life & (SYNC_ARG << i)) { 2258 ne_fprintf(f, " %d", i); 2259 } 2260 } 2261 } 2262 life /= DEAD_ARG; 2263 if (life) { 2264 ne_fprintf(f, " dead:"); 2265 for (i = 0; life; ++i, life >>= 1) { 2266 if (life & 1) { 2267 ne_fprintf(f, " %d", i); 2268 } 2269 } 2270 } 2271 } 2272 2273 if (have_prefs) { 2274 for (i = 0; i < nb_oargs; ++i) { 2275 TCGRegSet set = output_pref(op, i); 2276 2277 if (i == 0) { 2278 ne_fprintf(f, " pref="); 2279 } else { 2280 ne_fprintf(f, ","); 2281 } 2282 if (set == 0) { 2283 ne_fprintf(f, "none"); 2284 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2285 ne_fprintf(f, "all"); 2286 #ifdef CONFIG_DEBUG_TCG 2287 } else if (tcg_regset_single(set)) { 2288 TCGReg reg = tcg_regset_first(set); 2289 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 2290 #endif 2291 } else if (TCG_TARGET_NB_REGS <= 32) { 2292 ne_fprintf(f, "0x%x", (uint32_t)set); 2293 } else { 2294 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 2295 } 2296 } 2297 } 2298 2299 putc('\n', f); 2300 } 2301 } 2302 2303 /* we give more priority to constraints with less registers */ 2304 static int get_constraint_priority(const TCGOpDef *def, int k) 2305 { 2306 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2307 int n = ctpop64(arg_ct->regs); 2308 2309 /* 2310 * Sort constraints of a single register first, which includes output 2311 * aliases (which must exactly match the input already allocated). 2312 */ 2313 if (n == 1 || arg_ct->oalias) { 2314 return INT_MAX; 2315 } 2316 2317 /* 2318 * Sort register pairs next, first then second immediately after. 2319 * Arbitrarily sort multiple pairs by the index of the first reg; 2320 * there shouldn't be many pairs. 2321 */ 2322 switch (arg_ct->pair) { 2323 case 1: 2324 case 3: 2325 return (k + 1) * 2; 2326 case 2: 2327 return (arg_ct->pair_index + 1) * 2 - 1; 2328 } 2329 2330 /* Finally, sort by decreasing register count. */ 2331 assert(n > 1); 2332 return -n; 2333 } 2334 2335 /* sort from highest priority to lowest */ 2336 static void sort_constraints(TCGOpDef *def, int start, int n) 2337 { 2338 int i, j; 2339 TCGArgConstraint *a = def->args_ct; 2340 2341 for (i = 0; i < n; i++) { 2342 a[start + i].sort_index = start + i; 2343 } 2344 if (n <= 1) { 2345 return; 2346 } 2347 for (i = 0; i < n - 1; i++) { 2348 for (j = i + 1; j < n; j++) { 2349 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2350 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2351 if (p1 < p2) { 2352 int tmp = a[start + i].sort_index; 2353 a[start + i].sort_index = a[start + j].sort_index; 2354 a[start + j].sort_index = tmp; 2355 } 2356 } 2357 } 2358 } 2359 2360 static void process_op_defs(TCGContext *s) 2361 { 2362 TCGOpcode op; 2363 2364 for (op = 0; op < NB_OPS; op++) { 2365 TCGOpDef *def = &tcg_op_defs[op]; 2366 const TCGTargetOpDef *tdefs; 2367 bool saw_alias_pair = false; 2368 int i, o, i2, o2, nb_args; 2369 2370 if (def->flags & TCG_OPF_NOT_PRESENT) { 2371 continue; 2372 } 2373 2374 nb_args = def->nb_iargs + def->nb_oargs; 2375 if (nb_args == 0) { 2376 continue; 2377 } 2378 2379 /* 2380 * Macro magic should make it impossible, but double-check that 2381 * the array index is in range. Since the signness of an enum 2382 * is implementation defined, force the result to unsigned. 2383 */ 2384 unsigned con_set = tcg_target_op_def(op); 2385 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2386 tdefs = &constraint_sets[con_set]; 2387 2388 for (i = 0; i < nb_args; i++) { 2389 const char *ct_str = tdefs->args_ct_str[i]; 2390 bool input_p = i >= def->nb_oargs; 2391 2392 /* Incomplete TCGTargetOpDef entry. */ 2393 tcg_debug_assert(ct_str != NULL); 2394 2395 switch (*ct_str) { 2396 case '0' ... '9': 2397 o = *ct_str - '0'; 2398 tcg_debug_assert(input_p); 2399 tcg_debug_assert(o < def->nb_oargs); 2400 tcg_debug_assert(def->args_ct[o].regs != 0); 2401 tcg_debug_assert(!def->args_ct[o].oalias); 2402 def->args_ct[i] = def->args_ct[o]; 2403 /* The output sets oalias. */ 2404 def->args_ct[o].oalias = 1; 2405 def->args_ct[o].alias_index = i; 2406 /* The input sets ialias. */ 2407 def->args_ct[i].ialias = 1; 2408 def->args_ct[i].alias_index = o; 2409 if (def->args_ct[i].pair) { 2410 saw_alias_pair = true; 2411 } 2412 tcg_debug_assert(ct_str[1] == '\0'); 2413 continue; 2414 2415 case '&': 2416 tcg_debug_assert(!input_p); 2417 def->args_ct[i].newreg = true; 2418 ct_str++; 2419 break; 2420 2421 case 'p': /* plus */ 2422 /* Allocate to the register after the previous. */ 2423 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2424 o = i - 1; 2425 tcg_debug_assert(!def->args_ct[o].pair); 2426 tcg_debug_assert(!def->args_ct[o].ct); 2427 def->args_ct[i] = (TCGArgConstraint){ 2428 .pair = 2, 2429 .pair_index = o, 2430 .regs = def->args_ct[o].regs << 1, 2431 }; 2432 def->args_ct[o].pair = 1; 2433 def->args_ct[o].pair_index = i; 2434 tcg_debug_assert(ct_str[1] == '\0'); 2435 continue; 2436 2437 case 'm': /* minus */ 2438 /* Allocate to the register before the previous. */ 2439 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2440 o = i - 1; 2441 tcg_debug_assert(!def->args_ct[o].pair); 2442 tcg_debug_assert(!def->args_ct[o].ct); 2443 def->args_ct[i] = (TCGArgConstraint){ 2444 .pair = 1, 2445 .pair_index = o, 2446 .regs = def->args_ct[o].regs >> 1, 2447 }; 2448 def->args_ct[o].pair = 2; 2449 def->args_ct[o].pair_index = i; 2450 tcg_debug_assert(ct_str[1] == '\0'); 2451 continue; 2452 } 2453 2454 do { 2455 switch (*ct_str) { 2456 case 'i': 2457 def->args_ct[i].ct |= TCG_CT_CONST; 2458 break; 2459 2460 /* Include all of the target-specific constraints. */ 2461 2462 #undef CONST 2463 #define CONST(CASE, MASK) \ 2464 case CASE: def->args_ct[i].ct |= MASK; break; 2465 #define REGS(CASE, MASK) \ 2466 case CASE: def->args_ct[i].regs |= MASK; break; 2467 2468 #include "tcg-target-con-str.h" 2469 2470 #undef REGS 2471 #undef CONST 2472 default: 2473 case '0' ... '9': 2474 case '&': 2475 case 'p': 2476 case 'm': 2477 /* Typo in TCGTargetOpDef constraint. */ 2478 g_assert_not_reached(); 2479 } 2480 } while (*++ct_str != '\0'); 2481 } 2482 2483 /* TCGTargetOpDef entry with too much information? */ 2484 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2485 2486 /* 2487 * Fix up output pairs that are aliased with inputs. 2488 * When we created the alias, we copied pair from the output. 2489 * There are three cases: 2490 * (1a) Pairs of inputs alias pairs of outputs. 2491 * (1b) One input aliases the first of a pair of outputs. 2492 * (2) One input aliases the second of a pair of outputs. 2493 * 2494 * Case 1a is handled by making sure that the pair_index'es are 2495 * properly updated so that they appear the same as a pair of inputs. 2496 * 2497 * Case 1b is handled by setting the pair_index of the input to 2498 * itself, simply so it doesn't point to an unrelated argument. 2499 * Since we don't encounter the "second" during the input allocation 2500 * phase, nothing happens with the second half of the input pair. 2501 * 2502 * Case 2 is handled by setting the second input to pair=3, the 2503 * first output to pair=3, and the pair_index'es to match. 2504 */ 2505 if (saw_alias_pair) { 2506 for (i = def->nb_oargs; i < nb_args; i++) { 2507 /* 2508 * Since [0-9pm] must be alone in the constraint string, 2509 * the only way they can both be set is if the pair comes 2510 * from the output alias. 2511 */ 2512 if (!def->args_ct[i].ialias) { 2513 continue; 2514 } 2515 switch (def->args_ct[i].pair) { 2516 case 0: 2517 break; 2518 case 1: 2519 o = def->args_ct[i].alias_index; 2520 o2 = def->args_ct[o].pair_index; 2521 tcg_debug_assert(def->args_ct[o].pair == 1); 2522 tcg_debug_assert(def->args_ct[o2].pair == 2); 2523 if (def->args_ct[o2].oalias) { 2524 /* Case 1a */ 2525 i2 = def->args_ct[o2].alias_index; 2526 tcg_debug_assert(def->args_ct[i2].pair == 2); 2527 def->args_ct[i2].pair_index = i; 2528 def->args_ct[i].pair_index = i2; 2529 } else { 2530 /* Case 1b */ 2531 def->args_ct[i].pair_index = i; 2532 } 2533 break; 2534 case 2: 2535 o = def->args_ct[i].alias_index; 2536 o2 = def->args_ct[o].pair_index; 2537 tcg_debug_assert(def->args_ct[o].pair == 2); 2538 tcg_debug_assert(def->args_ct[o2].pair == 1); 2539 if (def->args_ct[o2].oalias) { 2540 /* Case 1a */ 2541 i2 = def->args_ct[o2].alias_index; 2542 tcg_debug_assert(def->args_ct[i2].pair == 1); 2543 def->args_ct[i2].pair_index = i; 2544 def->args_ct[i].pair_index = i2; 2545 } else { 2546 /* Case 2 */ 2547 def->args_ct[i].pair = 3; 2548 def->args_ct[o2].pair = 3; 2549 def->args_ct[i].pair_index = o2; 2550 def->args_ct[o2].pair_index = i; 2551 } 2552 break; 2553 default: 2554 g_assert_not_reached(); 2555 } 2556 } 2557 } 2558 2559 /* sort the constraints (XXX: this is just an heuristic) */ 2560 sort_constraints(def, 0, def->nb_oargs); 2561 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2562 } 2563 } 2564 2565 static void remove_label_use(TCGOp *op, int idx) 2566 { 2567 TCGLabel *label = arg_label(op->args[idx]); 2568 TCGLabelUse *use; 2569 2570 QSIMPLEQ_FOREACH(use, &label->branches, next) { 2571 if (use->op == op) { 2572 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next); 2573 return; 2574 } 2575 } 2576 g_assert_not_reached(); 2577 } 2578 2579 void tcg_op_remove(TCGContext *s, TCGOp *op) 2580 { 2581 switch (op->opc) { 2582 case INDEX_op_br: 2583 remove_label_use(op, 0); 2584 break; 2585 case INDEX_op_brcond_i32: 2586 case INDEX_op_brcond_i64: 2587 remove_label_use(op, 3); 2588 break; 2589 case INDEX_op_brcond2_i32: 2590 remove_label_use(op, 5); 2591 break; 2592 default: 2593 break; 2594 } 2595 2596 QTAILQ_REMOVE(&s->ops, op, link); 2597 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2598 s->nb_ops--; 2599 2600 #ifdef CONFIG_PROFILER 2601 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2602 #endif 2603 } 2604 2605 void tcg_remove_ops_after(TCGOp *op) 2606 { 2607 TCGContext *s = tcg_ctx; 2608 2609 while (true) { 2610 TCGOp *last = tcg_last_op(); 2611 if (last == op) { 2612 return; 2613 } 2614 tcg_op_remove(s, last); 2615 } 2616 } 2617 2618 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 2619 { 2620 TCGContext *s = tcg_ctx; 2621 TCGOp *op = NULL; 2622 2623 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 2624 QTAILQ_FOREACH(op, &s->free_ops, link) { 2625 if (nargs <= op->nargs) { 2626 QTAILQ_REMOVE(&s->free_ops, op, link); 2627 nargs = op->nargs; 2628 goto found; 2629 } 2630 } 2631 } 2632 2633 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 2634 nargs = MAX(4, nargs); 2635 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 2636 2637 found: 2638 memset(op, 0, offsetof(TCGOp, link)); 2639 op->opc = opc; 2640 op->nargs = nargs; 2641 2642 /* Check for bitfield overflow. */ 2643 tcg_debug_assert(op->nargs == nargs); 2644 2645 s->nb_ops++; 2646 return op; 2647 } 2648 2649 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 2650 { 2651 TCGOp *op = tcg_op_alloc(opc, nargs); 2652 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2653 return op; 2654 } 2655 2656 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 2657 TCGOpcode opc, unsigned nargs) 2658 { 2659 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2660 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2661 return new_op; 2662 } 2663 2664 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 2665 TCGOpcode opc, unsigned nargs) 2666 { 2667 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2668 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2669 return new_op; 2670 } 2671 2672 static void move_label_uses(TCGLabel *to, TCGLabel *from) 2673 { 2674 TCGLabelUse *u; 2675 2676 QSIMPLEQ_FOREACH(u, &from->branches, next) { 2677 TCGOp *op = u->op; 2678 switch (op->opc) { 2679 case INDEX_op_br: 2680 op->args[0] = label_arg(to); 2681 break; 2682 case INDEX_op_brcond_i32: 2683 case INDEX_op_brcond_i64: 2684 op->args[3] = label_arg(to); 2685 break; 2686 case INDEX_op_brcond2_i32: 2687 op->args[5] = label_arg(to); 2688 break; 2689 default: 2690 g_assert_not_reached(); 2691 } 2692 } 2693 2694 QSIMPLEQ_CONCAT(&to->branches, &from->branches); 2695 } 2696 2697 /* Reachable analysis : remove unreachable code. */ 2698 static void __attribute__((noinline)) 2699 reachable_code_pass(TCGContext *s) 2700 { 2701 TCGOp *op, *op_next, *op_prev; 2702 bool dead = false; 2703 2704 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2705 bool remove = dead; 2706 TCGLabel *label; 2707 2708 switch (op->opc) { 2709 case INDEX_op_set_label: 2710 label = arg_label(op->args[0]); 2711 2712 /* 2713 * Note that the first op in the TB is always a load, 2714 * so there is always something before a label. 2715 */ 2716 op_prev = QTAILQ_PREV(op, link); 2717 2718 /* 2719 * If we find two sequential labels, move all branches to 2720 * reference the second label and remove the first label. 2721 * Do this before branch to next optimization, so that the 2722 * middle label is out of the way. 2723 */ 2724 if (op_prev->opc == INDEX_op_set_label) { 2725 move_label_uses(label, arg_label(op_prev->args[0])); 2726 tcg_op_remove(s, op_prev); 2727 op_prev = QTAILQ_PREV(op, link); 2728 } 2729 2730 /* 2731 * Optimization can fold conditional branches to unconditional. 2732 * If we find a label which is preceded by an unconditional 2733 * branch to next, remove the branch. We couldn't do this when 2734 * processing the branch because any dead code between the branch 2735 * and label had not yet been removed. 2736 */ 2737 if (op_prev->opc == INDEX_op_br && 2738 label == arg_label(op_prev->args[0])) { 2739 tcg_op_remove(s, op_prev); 2740 /* Fall through means insns become live again. */ 2741 dead = false; 2742 } 2743 2744 if (QSIMPLEQ_EMPTY(&label->branches)) { 2745 /* 2746 * While there is an occasional backward branch, virtually 2747 * all branches generated by the translators are forward. 2748 * Which means that generally we will have already removed 2749 * all references to the label that will be, and there is 2750 * little to be gained by iterating. 2751 */ 2752 remove = true; 2753 } else { 2754 /* Once we see a label, insns become live again. */ 2755 dead = false; 2756 remove = false; 2757 } 2758 break; 2759 2760 case INDEX_op_br: 2761 case INDEX_op_exit_tb: 2762 case INDEX_op_goto_ptr: 2763 /* Unconditional branches; everything following is dead. */ 2764 dead = true; 2765 break; 2766 2767 case INDEX_op_call: 2768 /* Notice noreturn helper calls, raising exceptions. */ 2769 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 2770 dead = true; 2771 } 2772 break; 2773 2774 case INDEX_op_insn_start: 2775 /* Never remove -- we need to keep these for unwind. */ 2776 remove = false; 2777 break; 2778 2779 default: 2780 break; 2781 } 2782 2783 if (remove) { 2784 tcg_op_remove(s, op); 2785 } 2786 } 2787 } 2788 2789 #define TS_DEAD 1 2790 #define TS_MEM 2 2791 2792 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2793 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2794 2795 /* For liveness_pass_1, the register preferences for a given temp. */ 2796 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2797 { 2798 return ts->state_ptr; 2799 } 2800 2801 /* For liveness_pass_1, reset the preferences for a given temp to the 2802 * maximal regset for its type. 2803 */ 2804 static inline void la_reset_pref(TCGTemp *ts) 2805 { 2806 *la_temp_pref(ts) 2807 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2808 } 2809 2810 /* liveness analysis: end of function: all temps are dead, and globals 2811 should be in memory. */ 2812 static void la_func_end(TCGContext *s, int ng, int nt) 2813 { 2814 int i; 2815 2816 for (i = 0; i < ng; ++i) { 2817 s->temps[i].state = TS_DEAD | TS_MEM; 2818 la_reset_pref(&s->temps[i]); 2819 } 2820 for (i = ng; i < nt; ++i) { 2821 s->temps[i].state = TS_DEAD; 2822 la_reset_pref(&s->temps[i]); 2823 } 2824 } 2825 2826 /* liveness analysis: end of basic block: all temps are dead, globals 2827 and local temps should be in memory. */ 2828 static void la_bb_end(TCGContext *s, int ng, int nt) 2829 { 2830 int i; 2831 2832 for (i = 0; i < nt; ++i) { 2833 TCGTemp *ts = &s->temps[i]; 2834 int state; 2835 2836 switch (ts->kind) { 2837 case TEMP_FIXED: 2838 case TEMP_GLOBAL: 2839 case TEMP_TB: 2840 state = TS_DEAD | TS_MEM; 2841 break; 2842 case TEMP_EBB: 2843 case TEMP_CONST: 2844 state = TS_DEAD; 2845 break; 2846 default: 2847 g_assert_not_reached(); 2848 } 2849 ts->state = state; 2850 la_reset_pref(ts); 2851 } 2852 } 2853 2854 /* liveness analysis: sync globals back to memory. */ 2855 static void la_global_sync(TCGContext *s, int ng) 2856 { 2857 int i; 2858 2859 for (i = 0; i < ng; ++i) { 2860 int state = s->temps[i].state; 2861 s->temps[i].state = state | TS_MEM; 2862 if (state == TS_DEAD) { 2863 /* If the global was previously dead, reset prefs. */ 2864 la_reset_pref(&s->temps[i]); 2865 } 2866 } 2867 } 2868 2869 /* 2870 * liveness analysis: conditional branch: all temps are dead unless 2871 * explicitly live-across-conditional-branch, globals and local temps 2872 * should be synced. 2873 */ 2874 static void la_bb_sync(TCGContext *s, int ng, int nt) 2875 { 2876 la_global_sync(s, ng); 2877 2878 for (int i = ng; i < nt; ++i) { 2879 TCGTemp *ts = &s->temps[i]; 2880 int state; 2881 2882 switch (ts->kind) { 2883 case TEMP_TB: 2884 state = ts->state; 2885 ts->state = state | TS_MEM; 2886 if (state != TS_DEAD) { 2887 continue; 2888 } 2889 break; 2890 case TEMP_EBB: 2891 case TEMP_CONST: 2892 continue; 2893 default: 2894 g_assert_not_reached(); 2895 } 2896 la_reset_pref(&s->temps[i]); 2897 } 2898 } 2899 2900 /* liveness analysis: sync globals back to memory and kill. */ 2901 static void la_global_kill(TCGContext *s, int ng) 2902 { 2903 int i; 2904 2905 for (i = 0; i < ng; i++) { 2906 s->temps[i].state = TS_DEAD | TS_MEM; 2907 la_reset_pref(&s->temps[i]); 2908 } 2909 } 2910 2911 /* liveness analysis: note live globals crossing calls. */ 2912 static void la_cross_call(TCGContext *s, int nt) 2913 { 2914 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2915 int i; 2916 2917 for (i = 0; i < nt; i++) { 2918 TCGTemp *ts = &s->temps[i]; 2919 if (!(ts->state & TS_DEAD)) { 2920 TCGRegSet *pset = la_temp_pref(ts); 2921 TCGRegSet set = *pset; 2922 2923 set &= mask; 2924 /* If the combination is not possible, restart. */ 2925 if (set == 0) { 2926 set = tcg_target_available_regs[ts->type] & mask; 2927 } 2928 *pset = set; 2929 } 2930 } 2931 } 2932 2933 /* 2934 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce 2935 * to TEMP_EBB, if possible. 2936 */ 2937 static void __attribute__((noinline)) 2938 liveness_pass_0(TCGContext *s) 2939 { 2940 void * const multiple_ebb = (void *)(uintptr_t)-1; 2941 int nb_temps = s->nb_temps; 2942 TCGOp *op, *ebb; 2943 2944 for (int i = s->nb_globals; i < nb_temps; ++i) { 2945 s->temps[i].state_ptr = NULL; 2946 } 2947 2948 /* 2949 * Represent each EBB by the op at which it begins. In the case of 2950 * the first EBB, this is the first op, otherwise it is a label. 2951 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use 2952 * within a single EBB, else MULTIPLE_EBB. 2953 */ 2954 ebb = QTAILQ_FIRST(&s->ops); 2955 QTAILQ_FOREACH(op, &s->ops, link) { 2956 const TCGOpDef *def; 2957 int nb_oargs, nb_iargs; 2958 2959 switch (op->opc) { 2960 case INDEX_op_set_label: 2961 ebb = op; 2962 continue; 2963 case INDEX_op_discard: 2964 continue; 2965 case INDEX_op_call: 2966 nb_oargs = TCGOP_CALLO(op); 2967 nb_iargs = TCGOP_CALLI(op); 2968 break; 2969 default: 2970 def = &tcg_op_defs[op->opc]; 2971 nb_oargs = def->nb_oargs; 2972 nb_iargs = def->nb_iargs; 2973 break; 2974 } 2975 2976 for (int i = 0; i < nb_oargs + nb_iargs; ++i) { 2977 TCGTemp *ts = arg_temp(op->args[i]); 2978 2979 if (ts->kind != TEMP_TB) { 2980 continue; 2981 } 2982 if (ts->state_ptr == NULL) { 2983 ts->state_ptr = ebb; 2984 } else if (ts->state_ptr != ebb) { 2985 ts->state_ptr = multiple_ebb; 2986 } 2987 } 2988 } 2989 2990 /* 2991 * For TEMP_TB that turned out not to be used beyond one EBB, 2992 * reduce the liveness to TEMP_EBB. 2993 */ 2994 for (int i = s->nb_globals; i < nb_temps; ++i) { 2995 TCGTemp *ts = &s->temps[i]; 2996 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) { 2997 ts->kind = TEMP_EBB; 2998 } 2999 } 3000 } 3001 3002 /* Liveness analysis : update the opc_arg_life array to tell if a 3003 given input arguments is dead. Instructions updating dead 3004 temporaries are removed. */ 3005 static void __attribute__((noinline)) 3006 liveness_pass_1(TCGContext *s) 3007 { 3008 int nb_globals = s->nb_globals; 3009 int nb_temps = s->nb_temps; 3010 TCGOp *op, *op_prev; 3011 TCGRegSet *prefs; 3012 int i; 3013 3014 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 3015 for (i = 0; i < nb_temps; ++i) { 3016 s->temps[i].state_ptr = prefs + i; 3017 } 3018 3019 /* ??? Should be redundant with the exit_tb that ends the TB. */ 3020 la_func_end(s, nb_globals, nb_temps); 3021 3022 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 3023 int nb_iargs, nb_oargs; 3024 TCGOpcode opc_new, opc_new2; 3025 bool have_opc_new2; 3026 TCGLifeData arg_life = 0; 3027 TCGTemp *ts; 3028 TCGOpcode opc = op->opc; 3029 const TCGOpDef *def = &tcg_op_defs[opc]; 3030 3031 switch (opc) { 3032 case INDEX_op_call: 3033 { 3034 const TCGHelperInfo *info = tcg_call_info(op); 3035 int call_flags = tcg_call_flags(op); 3036 3037 nb_oargs = TCGOP_CALLO(op); 3038 nb_iargs = TCGOP_CALLI(op); 3039 3040 /* pure functions can be removed if their result is unused */ 3041 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 3042 for (i = 0; i < nb_oargs; i++) { 3043 ts = arg_temp(op->args[i]); 3044 if (ts->state != TS_DEAD) { 3045 goto do_not_remove_call; 3046 } 3047 } 3048 goto do_remove; 3049 } 3050 do_not_remove_call: 3051 3052 /* Output args are dead. */ 3053 for (i = 0; i < nb_oargs; i++) { 3054 ts = arg_temp(op->args[i]); 3055 if (ts->state & TS_DEAD) { 3056 arg_life |= DEAD_ARG << i; 3057 } 3058 if (ts->state & TS_MEM) { 3059 arg_life |= SYNC_ARG << i; 3060 } 3061 ts->state = TS_DEAD; 3062 la_reset_pref(ts); 3063 } 3064 3065 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 3066 memset(op->output_pref, 0, sizeof(op->output_pref)); 3067 3068 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 3069 TCG_CALL_NO_READ_GLOBALS))) { 3070 la_global_kill(s, nb_globals); 3071 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 3072 la_global_sync(s, nb_globals); 3073 } 3074 3075 /* Record arguments that die in this helper. */ 3076 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3077 ts = arg_temp(op->args[i]); 3078 if (ts->state & TS_DEAD) { 3079 arg_life |= DEAD_ARG << i; 3080 } 3081 } 3082 3083 /* For all live registers, remove call-clobbered prefs. */ 3084 la_cross_call(s, nb_temps); 3085 3086 /* 3087 * Input arguments are live for preceding opcodes. 3088 * 3089 * For those arguments that die, and will be allocated in 3090 * registers, clear the register set for that arg, to be 3091 * filled in below. For args that will be on the stack, 3092 * reset to any available reg. Process arguments in reverse 3093 * order so that if a temp is used more than once, the stack 3094 * reset to max happens before the register reset to 0. 3095 */ 3096 for (i = nb_iargs - 1; i >= 0; i--) { 3097 const TCGCallArgumentLoc *loc = &info->in[i]; 3098 ts = arg_temp(op->args[nb_oargs + i]); 3099 3100 if (ts->state & TS_DEAD) { 3101 switch (loc->kind) { 3102 case TCG_CALL_ARG_NORMAL: 3103 case TCG_CALL_ARG_EXTEND_U: 3104 case TCG_CALL_ARG_EXTEND_S: 3105 if (REG_P(loc)) { 3106 *la_temp_pref(ts) = 0; 3107 break; 3108 } 3109 /* fall through */ 3110 default: 3111 *la_temp_pref(ts) = 3112 tcg_target_available_regs[ts->type]; 3113 break; 3114 } 3115 ts->state &= ~TS_DEAD; 3116 } 3117 } 3118 3119 /* 3120 * For each input argument, add its input register to prefs. 3121 * If a temp is used once, this produces a single set bit; 3122 * if a temp is used multiple times, this produces a set. 3123 */ 3124 for (i = 0; i < nb_iargs; i++) { 3125 const TCGCallArgumentLoc *loc = &info->in[i]; 3126 ts = arg_temp(op->args[nb_oargs + i]); 3127 3128 switch (loc->kind) { 3129 case TCG_CALL_ARG_NORMAL: 3130 case TCG_CALL_ARG_EXTEND_U: 3131 case TCG_CALL_ARG_EXTEND_S: 3132 if (REG_P(loc)) { 3133 tcg_regset_set_reg(*la_temp_pref(ts), 3134 tcg_target_call_iarg_regs[loc->arg_slot]); 3135 } 3136 break; 3137 default: 3138 break; 3139 } 3140 } 3141 } 3142 break; 3143 case INDEX_op_insn_start: 3144 break; 3145 case INDEX_op_discard: 3146 /* mark the temporary as dead */ 3147 ts = arg_temp(op->args[0]); 3148 ts->state = TS_DEAD; 3149 la_reset_pref(ts); 3150 break; 3151 3152 case INDEX_op_add2_i32: 3153 opc_new = INDEX_op_add_i32; 3154 goto do_addsub2; 3155 case INDEX_op_sub2_i32: 3156 opc_new = INDEX_op_sub_i32; 3157 goto do_addsub2; 3158 case INDEX_op_add2_i64: 3159 opc_new = INDEX_op_add_i64; 3160 goto do_addsub2; 3161 case INDEX_op_sub2_i64: 3162 opc_new = INDEX_op_sub_i64; 3163 do_addsub2: 3164 nb_iargs = 4; 3165 nb_oargs = 2; 3166 /* Test if the high part of the operation is dead, but not 3167 the low part. The result can be optimized to a simple 3168 add or sub. This happens often for x86_64 guest when the 3169 cpu mode is set to 32 bit. */ 3170 if (arg_temp(op->args[1])->state == TS_DEAD) { 3171 if (arg_temp(op->args[0])->state == TS_DEAD) { 3172 goto do_remove; 3173 } 3174 /* Replace the opcode and adjust the args in place, 3175 leaving 3 unused args at the end. */ 3176 op->opc = opc = opc_new; 3177 op->args[1] = op->args[2]; 3178 op->args[2] = op->args[4]; 3179 /* Fall through and mark the single-word operation live. */ 3180 nb_iargs = 2; 3181 nb_oargs = 1; 3182 } 3183 goto do_not_remove; 3184 3185 case INDEX_op_mulu2_i32: 3186 opc_new = INDEX_op_mul_i32; 3187 opc_new2 = INDEX_op_muluh_i32; 3188 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 3189 goto do_mul2; 3190 case INDEX_op_muls2_i32: 3191 opc_new = INDEX_op_mul_i32; 3192 opc_new2 = INDEX_op_mulsh_i32; 3193 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 3194 goto do_mul2; 3195 case INDEX_op_mulu2_i64: 3196 opc_new = INDEX_op_mul_i64; 3197 opc_new2 = INDEX_op_muluh_i64; 3198 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 3199 goto do_mul2; 3200 case INDEX_op_muls2_i64: 3201 opc_new = INDEX_op_mul_i64; 3202 opc_new2 = INDEX_op_mulsh_i64; 3203 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 3204 goto do_mul2; 3205 do_mul2: 3206 nb_iargs = 2; 3207 nb_oargs = 2; 3208 if (arg_temp(op->args[1])->state == TS_DEAD) { 3209 if (arg_temp(op->args[0])->state == TS_DEAD) { 3210 /* Both parts of the operation are dead. */ 3211 goto do_remove; 3212 } 3213 /* The high part of the operation is dead; generate the low. */ 3214 op->opc = opc = opc_new; 3215 op->args[1] = op->args[2]; 3216 op->args[2] = op->args[3]; 3217 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 3218 /* The low part of the operation is dead; generate the high. */ 3219 op->opc = opc = opc_new2; 3220 op->args[0] = op->args[1]; 3221 op->args[1] = op->args[2]; 3222 op->args[2] = op->args[3]; 3223 } else { 3224 goto do_not_remove; 3225 } 3226 /* Mark the single-word operation live. */ 3227 nb_oargs = 1; 3228 goto do_not_remove; 3229 3230 default: 3231 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 3232 nb_iargs = def->nb_iargs; 3233 nb_oargs = def->nb_oargs; 3234 3235 /* Test if the operation can be removed because all 3236 its outputs are dead. We assume that nb_oargs == 0 3237 implies side effects */ 3238 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 3239 for (i = 0; i < nb_oargs; i++) { 3240 if (arg_temp(op->args[i])->state != TS_DEAD) { 3241 goto do_not_remove; 3242 } 3243 } 3244 goto do_remove; 3245 } 3246 goto do_not_remove; 3247 3248 do_remove: 3249 tcg_op_remove(s, op); 3250 break; 3251 3252 do_not_remove: 3253 for (i = 0; i < nb_oargs; i++) { 3254 ts = arg_temp(op->args[i]); 3255 3256 /* Remember the preference of the uses that followed. */ 3257 if (i < ARRAY_SIZE(op->output_pref)) { 3258 op->output_pref[i] = *la_temp_pref(ts); 3259 } 3260 3261 /* Output args are dead. */ 3262 if (ts->state & TS_DEAD) { 3263 arg_life |= DEAD_ARG << i; 3264 } 3265 if (ts->state & TS_MEM) { 3266 arg_life |= SYNC_ARG << i; 3267 } 3268 ts->state = TS_DEAD; 3269 la_reset_pref(ts); 3270 } 3271 3272 /* If end of basic block, update. */ 3273 if (def->flags & TCG_OPF_BB_EXIT) { 3274 la_func_end(s, nb_globals, nb_temps); 3275 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3276 la_bb_sync(s, nb_globals, nb_temps); 3277 } else if (def->flags & TCG_OPF_BB_END) { 3278 la_bb_end(s, nb_globals, nb_temps); 3279 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3280 la_global_sync(s, nb_globals); 3281 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3282 la_cross_call(s, nb_temps); 3283 } 3284 } 3285 3286 /* Record arguments that die in this opcode. */ 3287 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3288 ts = arg_temp(op->args[i]); 3289 if (ts->state & TS_DEAD) { 3290 arg_life |= DEAD_ARG << i; 3291 } 3292 } 3293 3294 /* Input arguments are live for preceding opcodes. */ 3295 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3296 ts = arg_temp(op->args[i]); 3297 if (ts->state & TS_DEAD) { 3298 /* For operands that were dead, initially allow 3299 all regs for the type. */ 3300 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3301 ts->state &= ~TS_DEAD; 3302 } 3303 } 3304 3305 /* Incorporate constraints for this operand. */ 3306 switch (opc) { 3307 case INDEX_op_mov_i32: 3308 case INDEX_op_mov_i64: 3309 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3310 have proper constraints. That said, special case 3311 moves to propagate preferences backward. */ 3312 if (IS_DEAD_ARG(1)) { 3313 *la_temp_pref(arg_temp(op->args[0])) 3314 = *la_temp_pref(arg_temp(op->args[1])); 3315 } 3316 break; 3317 3318 default: 3319 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3320 const TCGArgConstraint *ct = &def->args_ct[i]; 3321 TCGRegSet set, *pset; 3322 3323 ts = arg_temp(op->args[i]); 3324 pset = la_temp_pref(ts); 3325 set = *pset; 3326 3327 set &= ct->regs; 3328 if (ct->ialias) { 3329 set &= output_pref(op, ct->alias_index); 3330 } 3331 /* If the combination is not possible, restart. */ 3332 if (set == 0) { 3333 set = ct->regs; 3334 } 3335 *pset = set; 3336 } 3337 break; 3338 } 3339 break; 3340 } 3341 op->life = arg_life; 3342 } 3343 } 3344 3345 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3346 static bool __attribute__((noinline)) 3347 liveness_pass_2(TCGContext *s) 3348 { 3349 int nb_globals = s->nb_globals; 3350 int nb_temps, i; 3351 bool changes = false; 3352 TCGOp *op, *op_next; 3353 3354 /* Create a temporary for each indirect global. */ 3355 for (i = 0; i < nb_globals; ++i) { 3356 TCGTemp *its = &s->temps[i]; 3357 if (its->indirect_reg) { 3358 TCGTemp *dts = tcg_temp_alloc(s); 3359 dts->type = its->type; 3360 dts->base_type = its->base_type; 3361 dts->temp_subindex = its->temp_subindex; 3362 dts->kind = TEMP_EBB; 3363 its->state_ptr = dts; 3364 } else { 3365 its->state_ptr = NULL; 3366 } 3367 /* All globals begin dead. */ 3368 its->state = TS_DEAD; 3369 } 3370 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3371 TCGTemp *its = &s->temps[i]; 3372 its->state_ptr = NULL; 3373 its->state = TS_DEAD; 3374 } 3375 3376 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3377 TCGOpcode opc = op->opc; 3378 const TCGOpDef *def = &tcg_op_defs[opc]; 3379 TCGLifeData arg_life = op->life; 3380 int nb_iargs, nb_oargs, call_flags; 3381 TCGTemp *arg_ts, *dir_ts; 3382 3383 if (opc == INDEX_op_call) { 3384 nb_oargs = TCGOP_CALLO(op); 3385 nb_iargs = TCGOP_CALLI(op); 3386 call_flags = tcg_call_flags(op); 3387 } else { 3388 nb_iargs = def->nb_iargs; 3389 nb_oargs = def->nb_oargs; 3390 3391 /* Set flags similar to how calls require. */ 3392 if (def->flags & TCG_OPF_COND_BRANCH) { 3393 /* Like reading globals: sync_globals */ 3394 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3395 } else if (def->flags & TCG_OPF_BB_END) { 3396 /* Like writing globals: save_globals */ 3397 call_flags = 0; 3398 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3399 /* Like reading globals: sync_globals */ 3400 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3401 } else { 3402 /* No effect on globals. */ 3403 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3404 TCG_CALL_NO_WRITE_GLOBALS); 3405 } 3406 } 3407 3408 /* Make sure that input arguments are available. */ 3409 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3410 arg_ts = arg_temp(op->args[i]); 3411 dir_ts = arg_ts->state_ptr; 3412 if (dir_ts && arg_ts->state == TS_DEAD) { 3413 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3414 ? INDEX_op_ld_i32 3415 : INDEX_op_ld_i64); 3416 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 3417 3418 lop->args[0] = temp_arg(dir_ts); 3419 lop->args[1] = temp_arg(arg_ts->mem_base); 3420 lop->args[2] = arg_ts->mem_offset; 3421 3422 /* Loaded, but synced with memory. */ 3423 arg_ts->state = TS_MEM; 3424 } 3425 } 3426 3427 /* Perform input replacement, and mark inputs that became dead. 3428 No action is required except keeping temp_state up to date 3429 so that we reload when needed. */ 3430 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3431 arg_ts = arg_temp(op->args[i]); 3432 dir_ts = arg_ts->state_ptr; 3433 if (dir_ts) { 3434 op->args[i] = temp_arg(dir_ts); 3435 changes = true; 3436 if (IS_DEAD_ARG(i)) { 3437 arg_ts->state = TS_DEAD; 3438 } 3439 } 3440 } 3441 3442 /* Liveness analysis should ensure that the following are 3443 all correct, for call sites and basic block end points. */ 3444 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3445 /* Nothing to do */ 3446 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3447 for (i = 0; i < nb_globals; ++i) { 3448 /* Liveness should see that globals are synced back, 3449 that is, either TS_DEAD or TS_MEM. */ 3450 arg_ts = &s->temps[i]; 3451 tcg_debug_assert(arg_ts->state_ptr == 0 3452 || arg_ts->state != 0); 3453 } 3454 } else { 3455 for (i = 0; i < nb_globals; ++i) { 3456 /* Liveness should see that globals are saved back, 3457 that is, TS_DEAD, waiting to be reloaded. */ 3458 arg_ts = &s->temps[i]; 3459 tcg_debug_assert(arg_ts->state_ptr == 0 3460 || arg_ts->state == TS_DEAD); 3461 } 3462 } 3463 3464 /* Outputs become available. */ 3465 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3466 arg_ts = arg_temp(op->args[0]); 3467 dir_ts = arg_ts->state_ptr; 3468 if (dir_ts) { 3469 op->args[0] = temp_arg(dir_ts); 3470 changes = true; 3471 3472 /* The output is now live and modified. */ 3473 arg_ts->state = 0; 3474 3475 if (NEED_SYNC_ARG(0)) { 3476 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3477 ? INDEX_op_st_i32 3478 : INDEX_op_st_i64); 3479 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3480 TCGTemp *out_ts = dir_ts; 3481 3482 if (IS_DEAD_ARG(0)) { 3483 out_ts = arg_temp(op->args[1]); 3484 arg_ts->state = TS_DEAD; 3485 tcg_op_remove(s, op); 3486 } else { 3487 arg_ts->state = TS_MEM; 3488 } 3489 3490 sop->args[0] = temp_arg(out_ts); 3491 sop->args[1] = temp_arg(arg_ts->mem_base); 3492 sop->args[2] = arg_ts->mem_offset; 3493 } else { 3494 tcg_debug_assert(!IS_DEAD_ARG(0)); 3495 } 3496 } 3497 } else { 3498 for (i = 0; i < nb_oargs; i++) { 3499 arg_ts = arg_temp(op->args[i]); 3500 dir_ts = arg_ts->state_ptr; 3501 if (!dir_ts) { 3502 continue; 3503 } 3504 op->args[i] = temp_arg(dir_ts); 3505 changes = true; 3506 3507 /* The output is now live and modified. */ 3508 arg_ts->state = 0; 3509 3510 /* Sync outputs upon their last write. */ 3511 if (NEED_SYNC_ARG(i)) { 3512 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3513 ? INDEX_op_st_i32 3514 : INDEX_op_st_i64); 3515 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3516 3517 sop->args[0] = temp_arg(dir_ts); 3518 sop->args[1] = temp_arg(arg_ts->mem_base); 3519 sop->args[2] = arg_ts->mem_offset; 3520 3521 arg_ts->state = TS_MEM; 3522 } 3523 /* Drop outputs that are dead. */ 3524 if (IS_DEAD_ARG(i)) { 3525 arg_ts->state = TS_DEAD; 3526 } 3527 } 3528 } 3529 } 3530 3531 return changes; 3532 } 3533 3534 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3535 { 3536 intptr_t off; 3537 int size, align; 3538 3539 /* When allocating an object, look at the full type. */ 3540 size = tcg_type_size(ts->base_type); 3541 switch (ts->base_type) { 3542 case TCG_TYPE_I32: 3543 align = 4; 3544 break; 3545 case TCG_TYPE_I64: 3546 case TCG_TYPE_V64: 3547 align = 8; 3548 break; 3549 case TCG_TYPE_I128: 3550 case TCG_TYPE_V128: 3551 case TCG_TYPE_V256: 3552 /* 3553 * Note that we do not require aligned storage for V256, 3554 * and that we provide alignment for I128 to match V128, 3555 * even if that's above what the host ABI requires. 3556 */ 3557 align = 16; 3558 break; 3559 default: 3560 g_assert_not_reached(); 3561 } 3562 3563 /* 3564 * Assume the stack is sufficiently aligned. 3565 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3566 * and do not require 16 byte vector alignment. This seems slightly 3567 * easier than fully parameterizing the above switch statement. 3568 */ 3569 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3570 off = ROUND_UP(s->current_frame_offset, align); 3571 3572 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3573 if (off + size > s->frame_end) { 3574 tcg_raise_tb_overflow(s); 3575 } 3576 s->current_frame_offset = off + size; 3577 #if defined(__sparc__) 3578 off += TCG_TARGET_STACK_BIAS; 3579 #endif 3580 3581 /* If the object was subdivided, assign memory to all the parts. */ 3582 if (ts->base_type != ts->type) { 3583 int part_size = tcg_type_size(ts->type); 3584 int part_count = size / part_size; 3585 3586 /* 3587 * Each part is allocated sequentially in tcg_temp_new_internal. 3588 * Jump back to the first part by subtracting the current index. 3589 */ 3590 ts -= ts->temp_subindex; 3591 for (int i = 0; i < part_count; ++i) { 3592 ts[i].mem_offset = off + i * part_size; 3593 ts[i].mem_base = s->frame_temp; 3594 ts[i].mem_allocated = 1; 3595 } 3596 } else { 3597 ts->mem_offset = off; 3598 ts->mem_base = s->frame_temp; 3599 ts->mem_allocated = 1; 3600 } 3601 } 3602 3603 /* Assign @reg to @ts, and update reg_to_temp[]. */ 3604 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 3605 { 3606 if (ts->val_type == TEMP_VAL_REG) { 3607 TCGReg old = ts->reg; 3608 tcg_debug_assert(s->reg_to_temp[old] == ts); 3609 if (old == reg) { 3610 return; 3611 } 3612 s->reg_to_temp[old] = NULL; 3613 } 3614 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3615 s->reg_to_temp[reg] = ts; 3616 ts->val_type = TEMP_VAL_REG; 3617 ts->reg = reg; 3618 } 3619 3620 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 3621 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 3622 { 3623 tcg_debug_assert(type != TEMP_VAL_REG); 3624 if (ts->val_type == TEMP_VAL_REG) { 3625 TCGReg reg = ts->reg; 3626 tcg_debug_assert(s->reg_to_temp[reg] == ts); 3627 s->reg_to_temp[reg] = NULL; 3628 } 3629 ts->val_type = type; 3630 } 3631 3632 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3633 3634 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3635 mark it free; otherwise mark it dead. */ 3636 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3637 { 3638 TCGTempVal new_type; 3639 3640 switch (ts->kind) { 3641 case TEMP_FIXED: 3642 return; 3643 case TEMP_GLOBAL: 3644 case TEMP_TB: 3645 new_type = TEMP_VAL_MEM; 3646 break; 3647 case TEMP_EBB: 3648 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3649 break; 3650 case TEMP_CONST: 3651 new_type = TEMP_VAL_CONST; 3652 break; 3653 default: 3654 g_assert_not_reached(); 3655 } 3656 set_temp_val_nonreg(s, ts, new_type); 3657 } 3658 3659 /* Mark a temporary as dead. */ 3660 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3661 { 3662 temp_free_or_dead(s, ts, 1); 3663 } 3664 3665 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3666 registers needs to be allocated to store a constant. If 'free_or_dead' 3667 is non-zero, subsequently release the temporary; if it is positive, the 3668 temp is dead; if it is negative, the temp is free. */ 3669 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3670 TCGRegSet preferred_regs, int free_or_dead) 3671 { 3672 if (!temp_readonly(ts) && !ts->mem_coherent) { 3673 if (!ts->mem_allocated) { 3674 temp_allocate_frame(s, ts); 3675 } 3676 switch (ts->val_type) { 3677 case TEMP_VAL_CONST: 3678 /* If we're going to free the temp immediately, then we won't 3679 require it later in a register, so attempt to store the 3680 constant to memory directly. */ 3681 if (free_or_dead 3682 && tcg_out_sti(s, ts->type, ts->val, 3683 ts->mem_base->reg, ts->mem_offset)) { 3684 break; 3685 } 3686 temp_load(s, ts, tcg_target_available_regs[ts->type], 3687 allocated_regs, preferred_regs); 3688 /* fallthrough */ 3689 3690 case TEMP_VAL_REG: 3691 tcg_out_st(s, ts->type, ts->reg, 3692 ts->mem_base->reg, ts->mem_offset); 3693 break; 3694 3695 case TEMP_VAL_MEM: 3696 break; 3697 3698 case TEMP_VAL_DEAD: 3699 default: 3700 tcg_abort(); 3701 } 3702 ts->mem_coherent = 1; 3703 } 3704 if (free_or_dead) { 3705 temp_free_or_dead(s, ts, free_or_dead); 3706 } 3707 } 3708 3709 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3710 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3711 { 3712 TCGTemp *ts = s->reg_to_temp[reg]; 3713 if (ts != NULL) { 3714 temp_sync(s, ts, allocated_regs, 0, -1); 3715 } 3716 } 3717 3718 /** 3719 * tcg_reg_alloc: 3720 * @required_regs: Set of registers in which we must allocate. 3721 * @allocated_regs: Set of registers which must be avoided. 3722 * @preferred_regs: Set of registers we should prefer. 3723 * @rev: True if we search the registers in "indirect" order. 3724 * 3725 * The allocated register must be in @required_regs & ~@allocated_regs, 3726 * but if we can put it in @preferred_regs we may save a move later. 3727 */ 3728 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3729 TCGRegSet allocated_regs, 3730 TCGRegSet preferred_regs, bool rev) 3731 { 3732 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3733 TCGRegSet reg_ct[2]; 3734 const int *order; 3735 3736 reg_ct[1] = required_regs & ~allocated_regs; 3737 tcg_debug_assert(reg_ct[1] != 0); 3738 reg_ct[0] = reg_ct[1] & preferred_regs; 3739 3740 /* Skip the preferred_regs option if it cannot be satisfied, 3741 or if the preference made no difference. */ 3742 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3743 3744 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3745 3746 /* Try free registers, preferences first. */ 3747 for (j = f; j < 2; j++) { 3748 TCGRegSet set = reg_ct[j]; 3749 3750 if (tcg_regset_single(set)) { 3751 /* One register in the set. */ 3752 TCGReg reg = tcg_regset_first(set); 3753 if (s->reg_to_temp[reg] == NULL) { 3754 return reg; 3755 } 3756 } else { 3757 for (i = 0; i < n; i++) { 3758 TCGReg reg = order[i]; 3759 if (s->reg_to_temp[reg] == NULL && 3760 tcg_regset_test_reg(set, reg)) { 3761 return reg; 3762 } 3763 } 3764 } 3765 } 3766 3767 /* We must spill something. */ 3768 for (j = f; j < 2; j++) { 3769 TCGRegSet set = reg_ct[j]; 3770 3771 if (tcg_regset_single(set)) { 3772 /* One register in the set. */ 3773 TCGReg reg = tcg_regset_first(set); 3774 tcg_reg_free(s, reg, allocated_regs); 3775 return reg; 3776 } else { 3777 for (i = 0; i < n; i++) { 3778 TCGReg reg = order[i]; 3779 if (tcg_regset_test_reg(set, reg)) { 3780 tcg_reg_free(s, reg, allocated_regs); 3781 return reg; 3782 } 3783 } 3784 } 3785 } 3786 3787 tcg_abort(); 3788 } 3789 3790 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 3791 TCGRegSet allocated_regs, 3792 TCGRegSet preferred_regs, bool rev) 3793 { 3794 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3795 TCGRegSet reg_ct[2]; 3796 const int *order; 3797 3798 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 3799 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 3800 tcg_debug_assert(reg_ct[1] != 0); 3801 reg_ct[0] = reg_ct[1] & preferred_regs; 3802 3803 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3804 3805 /* 3806 * Skip the preferred_regs option if it cannot be satisfied, 3807 * or if the preference made no difference. 3808 */ 3809 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3810 3811 /* 3812 * Minimize the number of flushes by looking for 2 free registers first, 3813 * then a single flush, then two flushes. 3814 */ 3815 for (fmin = 2; fmin >= 0; fmin--) { 3816 for (j = k; j < 2; j++) { 3817 TCGRegSet set = reg_ct[j]; 3818 3819 for (i = 0; i < n; i++) { 3820 TCGReg reg = order[i]; 3821 3822 if (tcg_regset_test_reg(set, reg)) { 3823 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 3824 if (f >= fmin) { 3825 tcg_reg_free(s, reg, allocated_regs); 3826 tcg_reg_free(s, reg + 1, allocated_regs); 3827 return reg; 3828 } 3829 } 3830 } 3831 } 3832 } 3833 tcg_abort(); 3834 } 3835 3836 /* Make sure the temporary is in a register. If needed, allocate the register 3837 from DESIRED while avoiding ALLOCATED. */ 3838 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3839 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3840 { 3841 TCGReg reg; 3842 3843 switch (ts->val_type) { 3844 case TEMP_VAL_REG: 3845 return; 3846 case TEMP_VAL_CONST: 3847 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3848 preferred_regs, ts->indirect_base); 3849 if (ts->type <= TCG_TYPE_I64) { 3850 tcg_out_movi(s, ts->type, reg, ts->val); 3851 } else { 3852 uint64_t val = ts->val; 3853 MemOp vece = MO_64; 3854 3855 /* 3856 * Find the minimal vector element that matches the constant. 3857 * The targets will, in general, have to do this search anyway, 3858 * do this generically. 3859 */ 3860 if (val == dup_const(MO_8, val)) { 3861 vece = MO_8; 3862 } else if (val == dup_const(MO_16, val)) { 3863 vece = MO_16; 3864 } else if (val == dup_const(MO_32, val)) { 3865 vece = MO_32; 3866 } 3867 3868 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3869 } 3870 ts->mem_coherent = 0; 3871 break; 3872 case TEMP_VAL_MEM: 3873 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3874 preferred_regs, ts->indirect_base); 3875 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3876 ts->mem_coherent = 1; 3877 break; 3878 case TEMP_VAL_DEAD: 3879 default: 3880 tcg_abort(); 3881 } 3882 set_temp_val_reg(s, ts, reg); 3883 } 3884 3885 /* Save a temporary to memory. 'allocated_regs' is used in case a 3886 temporary registers needs to be allocated to store a constant. */ 3887 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3888 { 3889 /* The liveness analysis already ensures that globals are back 3890 in memory. Keep an tcg_debug_assert for safety. */ 3891 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3892 } 3893 3894 /* save globals to their canonical location and assume they can be 3895 modified be the following code. 'allocated_regs' is used in case a 3896 temporary registers needs to be allocated to store a constant. */ 3897 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3898 { 3899 int i, n; 3900 3901 for (i = 0, n = s->nb_globals; i < n; i++) { 3902 temp_save(s, &s->temps[i], allocated_regs); 3903 } 3904 } 3905 3906 /* sync globals to their canonical location and assume they can be 3907 read by the following code. 'allocated_regs' is used in case a 3908 temporary registers needs to be allocated to store a constant. */ 3909 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3910 { 3911 int i, n; 3912 3913 for (i = 0, n = s->nb_globals; i < n; i++) { 3914 TCGTemp *ts = &s->temps[i]; 3915 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3916 || ts->kind == TEMP_FIXED 3917 || ts->mem_coherent); 3918 } 3919 } 3920 3921 /* at the end of a basic block, we assume all temporaries are dead and 3922 all globals are stored at their canonical location. */ 3923 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3924 { 3925 int i; 3926 3927 for (i = s->nb_globals; i < s->nb_temps; i++) { 3928 TCGTemp *ts = &s->temps[i]; 3929 3930 switch (ts->kind) { 3931 case TEMP_TB: 3932 temp_save(s, ts, allocated_regs); 3933 break; 3934 case TEMP_EBB: 3935 /* The liveness analysis already ensures that temps are dead. 3936 Keep an tcg_debug_assert for safety. */ 3937 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3938 break; 3939 case TEMP_CONST: 3940 /* Similarly, we should have freed any allocated register. */ 3941 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3942 break; 3943 default: 3944 g_assert_not_reached(); 3945 } 3946 } 3947 3948 save_globals(s, allocated_regs); 3949 } 3950 3951 /* 3952 * At a conditional branch, we assume all temporaries are dead unless 3953 * explicitly live-across-conditional-branch; all globals and local 3954 * temps are synced to their location. 3955 */ 3956 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3957 { 3958 sync_globals(s, allocated_regs); 3959 3960 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3961 TCGTemp *ts = &s->temps[i]; 3962 /* 3963 * The liveness analysis already ensures that temps are dead. 3964 * Keep tcg_debug_asserts for safety. 3965 */ 3966 switch (ts->kind) { 3967 case TEMP_TB: 3968 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3969 break; 3970 case TEMP_EBB: 3971 case TEMP_CONST: 3972 break; 3973 default: 3974 g_assert_not_reached(); 3975 } 3976 } 3977 } 3978 3979 /* 3980 * Specialized code generation for INDEX_op_mov_* with a constant. 3981 */ 3982 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3983 tcg_target_ulong val, TCGLifeData arg_life, 3984 TCGRegSet preferred_regs) 3985 { 3986 /* ENV should not be modified. */ 3987 tcg_debug_assert(!temp_readonly(ots)); 3988 3989 /* The movi is not explicitly generated here. */ 3990 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 3991 ots->val = val; 3992 ots->mem_coherent = 0; 3993 if (NEED_SYNC_ARG(0)) { 3994 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3995 } else if (IS_DEAD_ARG(0)) { 3996 temp_dead(s, ots); 3997 } 3998 } 3999 4000 /* 4001 * Specialized code generation for INDEX_op_mov_*. 4002 */ 4003 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 4004 { 4005 const TCGLifeData arg_life = op->life; 4006 TCGRegSet allocated_regs, preferred_regs; 4007 TCGTemp *ts, *ots; 4008 TCGType otype, itype; 4009 TCGReg oreg, ireg; 4010 4011 allocated_regs = s->reserved_regs; 4012 preferred_regs = output_pref(op, 0); 4013 ots = arg_temp(op->args[0]); 4014 ts = arg_temp(op->args[1]); 4015 4016 /* ENV should not be modified. */ 4017 tcg_debug_assert(!temp_readonly(ots)); 4018 4019 /* Note that otype != itype for no-op truncation. */ 4020 otype = ots->type; 4021 itype = ts->type; 4022 4023 if (ts->val_type == TEMP_VAL_CONST) { 4024 /* propagate constant or generate sti */ 4025 tcg_target_ulong val = ts->val; 4026 if (IS_DEAD_ARG(1)) { 4027 temp_dead(s, ts); 4028 } 4029 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 4030 return; 4031 } 4032 4033 /* If the source value is in memory we're going to be forced 4034 to have it in a register in order to perform the copy. Copy 4035 the SOURCE value into its own register first, that way we 4036 don't have to reload SOURCE the next time it is used. */ 4037 if (ts->val_type == TEMP_VAL_MEM) { 4038 temp_load(s, ts, tcg_target_available_regs[itype], 4039 allocated_regs, preferred_regs); 4040 } 4041 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 4042 ireg = ts->reg; 4043 4044 if (IS_DEAD_ARG(0)) { 4045 /* mov to a non-saved dead register makes no sense (even with 4046 liveness analysis disabled). */ 4047 tcg_debug_assert(NEED_SYNC_ARG(0)); 4048 if (!ots->mem_allocated) { 4049 temp_allocate_frame(s, ots); 4050 } 4051 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 4052 if (IS_DEAD_ARG(1)) { 4053 temp_dead(s, ts); 4054 } 4055 temp_dead(s, ots); 4056 return; 4057 } 4058 4059 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 4060 /* 4061 * The mov can be suppressed. Kill input first, so that it 4062 * is unlinked from reg_to_temp, then set the output to the 4063 * reg that we saved from the input. 4064 */ 4065 temp_dead(s, ts); 4066 oreg = ireg; 4067 } else { 4068 if (ots->val_type == TEMP_VAL_REG) { 4069 oreg = ots->reg; 4070 } else { 4071 /* Make sure to not spill the input register during allocation. */ 4072 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 4073 allocated_regs | ((TCGRegSet)1 << ireg), 4074 preferred_regs, ots->indirect_base); 4075 } 4076 if (!tcg_out_mov(s, otype, oreg, ireg)) { 4077 /* 4078 * Cross register class move not supported. 4079 * Store the source register into the destination slot 4080 * and leave the destination temp as TEMP_VAL_MEM. 4081 */ 4082 assert(!temp_readonly(ots)); 4083 if (!ts->mem_allocated) { 4084 temp_allocate_frame(s, ots); 4085 } 4086 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 4087 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 4088 ots->mem_coherent = 1; 4089 return; 4090 } 4091 } 4092 set_temp_val_reg(s, ots, oreg); 4093 ots->mem_coherent = 0; 4094 4095 if (NEED_SYNC_ARG(0)) { 4096 temp_sync(s, ots, allocated_regs, 0, 0); 4097 } 4098 } 4099 4100 /* 4101 * Specialized code generation for INDEX_op_dup_vec. 4102 */ 4103 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 4104 { 4105 const TCGLifeData arg_life = op->life; 4106 TCGRegSet dup_out_regs, dup_in_regs; 4107 TCGTemp *its, *ots; 4108 TCGType itype, vtype; 4109 unsigned vece; 4110 int lowpart_ofs; 4111 bool ok; 4112 4113 ots = arg_temp(op->args[0]); 4114 its = arg_temp(op->args[1]); 4115 4116 /* ENV should not be modified. */ 4117 tcg_debug_assert(!temp_readonly(ots)); 4118 4119 itype = its->type; 4120 vece = TCGOP_VECE(op); 4121 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4122 4123 if (its->val_type == TEMP_VAL_CONST) { 4124 /* Propagate constant via movi -> dupi. */ 4125 tcg_target_ulong val = its->val; 4126 if (IS_DEAD_ARG(1)) { 4127 temp_dead(s, its); 4128 } 4129 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 4130 return; 4131 } 4132 4133 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4134 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 4135 4136 /* Allocate the output register now. */ 4137 if (ots->val_type != TEMP_VAL_REG) { 4138 TCGRegSet allocated_regs = s->reserved_regs; 4139 TCGReg oreg; 4140 4141 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 4142 /* Make sure to not spill the input register. */ 4143 tcg_regset_set_reg(allocated_regs, its->reg); 4144 } 4145 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4146 output_pref(op, 0), ots->indirect_base); 4147 set_temp_val_reg(s, ots, oreg); 4148 } 4149 4150 switch (its->val_type) { 4151 case TEMP_VAL_REG: 4152 /* 4153 * The dup constriaints must be broad, covering all possible VECE. 4154 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 4155 * to fail, indicating that extra moves are required for that case. 4156 */ 4157 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 4158 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 4159 goto done; 4160 } 4161 /* Try again from memory or a vector input register. */ 4162 } 4163 if (!its->mem_coherent) { 4164 /* 4165 * The input register is not synced, and so an extra store 4166 * would be required to use memory. Attempt an integer-vector 4167 * register move first. We do not have a TCGRegSet for this. 4168 */ 4169 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4170 break; 4171 } 4172 /* Sync the temp back to its slot and load from there. */ 4173 temp_sync(s, its, s->reserved_regs, 0, 0); 4174 } 4175 /* fall through */ 4176 4177 case TEMP_VAL_MEM: 4178 lowpart_ofs = 0; 4179 if (HOST_BIG_ENDIAN) { 4180 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 4181 } 4182 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 4183 its->mem_offset + lowpart_ofs)) { 4184 goto done; 4185 } 4186 /* Load the input into the destination vector register. */ 4187 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 4188 break; 4189 4190 default: 4191 g_assert_not_reached(); 4192 } 4193 4194 /* We now have a vector input register, so dup must succeed. */ 4195 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 4196 tcg_debug_assert(ok); 4197 4198 done: 4199 ots->mem_coherent = 0; 4200 if (IS_DEAD_ARG(1)) { 4201 temp_dead(s, its); 4202 } 4203 if (NEED_SYNC_ARG(0)) { 4204 temp_sync(s, ots, s->reserved_regs, 0, 0); 4205 } 4206 if (IS_DEAD_ARG(0)) { 4207 temp_dead(s, ots); 4208 } 4209 } 4210 4211 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 4212 { 4213 const TCGLifeData arg_life = op->life; 4214 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 4215 TCGRegSet i_allocated_regs; 4216 TCGRegSet o_allocated_regs; 4217 int i, k, nb_iargs, nb_oargs; 4218 TCGReg reg; 4219 TCGArg arg; 4220 const TCGArgConstraint *arg_ct; 4221 TCGTemp *ts; 4222 TCGArg new_args[TCG_MAX_OP_ARGS]; 4223 int const_args[TCG_MAX_OP_ARGS]; 4224 4225 nb_oargs = def->nb_oargs; 4226 nb_iargs = def->nb_iargs; 4227 4228 /* copy constants */ 4229 memcpy(new_args + nb_oargs + nb_iargs, 4230 op->args + nb_oargs + nb_iargs, 4231 sizeof(TCGArg) * def->nb_cargs); 4232 4233 i_allocated_regs = s->reserved_regs; 4234 o_allocated_regs = s->reserved_regs; 4235 4236 /* satisfy input constraints */ 4237 for (k = 0; k < nb_iargs; k++) { 4238 TCGRegSet i_preferred_regs, i_required_regs; 4239 bool allocate_new_reg, copyto_new_reg; 4240 TCGTemp *ts2; 4241 int i1, i2; 4242 4243 i = def->args_ct[nb_oargs + k].sort_index; 4244 arg = op->args[i]; 4245 arg_ct = &def->args_ct[i]; 4246 ts = arg_temp(arg); 4247 4248 if (ts->val_type == TEMP_VAL_CONST 4249 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 4250 /* constant is OK for instruction */ 4251 const_args[i] = 1; 4252 new_args[i] = ts->val; 4253 continue; 4254 } 4255 4256 reg = ts->reg; 4257 i_preferred_regs = 0; 4258 i_required_regs = arg_ct->regs; 4259 allocate_new_reg = false; 4260 copyto_new_reg = false; 4261 4262 switch (arg_ct->pair) { 4263 case 0: /* not paired */ 4264 if (arg_ct->ialias) { 4265 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4266 4267 /* 4268 * If the input is readonly, then it cannot also be an 4269 * output and aliased to itself. If the input is not 4270 * dead after the instruction, we must allocate a new 4271 * register and move it. 4272 */ 4273 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 4274 allocate_new_reg = true; 4275 } else if (ts->val_type == TEMP_VAL_REG) { 4276 /* 4277 * Check if the current register has already been 4278 * allocated for another input. 4279 */ 4280 allocate_new_reg = 4281 tcg_regset_test_reg(i_allocated_regs, reg); 4282 } 4283 } 4284 if (!allocate_new_reg) { 4285 temp_load(s, ts, i_required_regs, i_allocated_regs, 4286 i_preferred_regs); 4287 reg = ts->reg; 4288 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 4289 } 4290 if (allocate_new_reg) { 4291 /* 4292 * Allocate a new register matching the constraint 4293 * and move the temporary register into it. 4294 */ 4295 temp_load(s, ts, tcg_target_available_regs[ts->type], 4296 i_allocated_regs, 0); 4297 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 4298 i_preferred_regs, ts->indirect_base); 4299 copyto_new_reg = true; 4300 } 4301 break; 4302 4303 case 1: 4304 /* First of an input pair; if i1 == i2, the second is an output. */ 4305 i1 = i; 4306 i2 = arg_ct->pair_index; 4307 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 4308 4309 /* 4310 * It is easier to default to allocating a new pair 4311 * and to identify a few cases where it's not required. 4312 */ 4313 if (arg_ct->ialias) { 4314 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4315 if (IS_DEAD_ARG(i1) && 4316 IS_DEAD_ARG(i2) && 4317 !temp_readonly(ts) && 4318 ts->val_type == TEMP_VAL_REG && 4319 ts->reg < TCG_TARGET_NB_REGS - 1 && 4320 tcg_regset_test_reg(i_required_regs, reg) && 4321 !tcg_regset_test_reg(i_allocated_regs, reg) && 4322 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 4323 (ts2 4324 ? ts2->val_type == TEMP_VAL_REG && 4325 ts2->reg == reg + 1 && 4326 !temp_readonly(ts2) 4327 : s->reg_to_temp[reg + 1] == NULL)) { 4328 break; 4329 } 4330 } else { 4331 /* Without aliasing, the pair must also be an input. */ 4332 tcg_debug_assert(ts2); 4333 if (ts->val_type == TEMP_VAL_REG && 4334 ts2->val_type == TEMP_VAL_REG && 4335 ts2->reg == reg + 1 && 4336 tcg_regset_test_reg(i_required_regs, reg)) { 4337 break; 4338 } 4339 } 4340 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 4341 0, ts->indirect_base); 4342 goto do_pair; 4343 4344 case 2: /* pair second */ 4345 reg = new_args[arg_ct->pair_index] + 1; 4346 goto do_pair; 4347 4348 case 3: /* ialias with second output, no first input */ 4349 tcg_debug_assert(arg_ct->ialias); 4350 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4351 4352 if (IS_DEAD_ARG(i) && 4353 !temp_readonly(ts) && 4354 ts->val_type == TEMP_VAL_REG && 4355 reg > 0 && 4356 s->reg_to_temp[reg - 1] == NULL && 4357 tcg_regset_test_reg(i_required_regs, reg) && 4358 !tcg_regset_test_reg(i_allocated_regs, reg) && 4359 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 4360 tcg_regset_set_reg(i_allocated_regs, reg - 1); 4361 break; 4362 } 4363 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 4364 i_allocated_regs, 0, 4365 ts->indirect_base); 4366 tcg_regset_set_reg(i_allocated_regs, reg); 4367 reg += 1; 4368 goto do_pair; 4369 4370 do_pair: 4371 /* 4372 * If an aliased input is not dead after the instruction, 4373 * we must allocate a new register and move it. 4374 */ 4375 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 4376 TCGRegSet t_allocated_regs = i_allocated_regs; 4377 4378 /* 4379 * Because of the alias, and the continued life, make sure 4380 * that the temp is somewhere *other* than the reg pair, 4381 * and we get a copy in reg. 4382 */ 4383 tcg_regset_set_reg(t_allocated_regs, reg); 4384 tcg_regset_set_reg(t_allocated_regs, reg + 1); 4385 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 4386 /* If ts was already in reg, copy it somewhere else. */ 4387 TCGReg nr; 4388 bool ok; 4389 4390 tcg_debug_assert(ts->kind != TEMP_FIXED); 4391 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 4392 t_allocated_regs, 0, ts->indirect_base); 4393 ok = tcg_out_mov(s, ts->type, nr, reg); 4394 tcg_debug_assert(ok); 4395 4396 set_temp_val_reg(s, ts, nr); 4397 } else { 4398 temp_load(s, ts, tcg_target_available_regs[ts->type], 4399 t_allocated_regs, 0); 4400 copyto_new_reg = true; 4401 } 4402 } else { 4403 /* Preferably allocate to reg, otherwise copy. */ 4404 i_required_regs = (TCGRegSet)1 << reg; 4405 temp_load(s, ts, i_required_regs, i_allocated_regs, 4406 i_preferred_regs); 4407 copyto_new_reg = ts->reg != reg; 4408 } 4409 break; 4410 4411 default: 4412 g_assert_not_reached(); 4413 } 4414 4415 if (copyto_new_reg) { 4416 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4417 /* 4418 * Cross register class move not supported. Sync the 4419 * temp back to its slot and load from there. 4420 */ 4421 temp_sync(s, ts, i_allocated_regs, 0, 0); 4422 tcg_out_ld(s, ts->type, reg, 4423 ts->mem_base->reg, ts->mem_offset); 4424 } 4425 } 4426 new_args[i] = reg; 4427 const_args[i] = 0; 4428 tcg_regset_set_reg(i_allocated_regs, reg); 4429 } 4430 4431 /* mark dead temporaries and free the associated registers */ 4432 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4433 if (IS_DEAD_ARG(i)) { 4434 temp_dead(s, arg_temp(op->args[i])); 4435 } 4436 } 4437 4438 if (def->flags & TCG_OPF_COND_BRANCH) { 4439 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4440 } else if (def->flags & TCG_OPF_BB_END) { 4441 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4442 } else { 4443 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4444 /* XXX: permit generic clobber register list ? */ 4445 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4446 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4447 tcg_reg_free(s, i, i_allocated_regs); 4448 } 4449 } 4450 } 4451 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4452 /* sync globals if the op has side effects and might trigger 4453 an exception. */ 4454 sync_globals(s, i_allocated_regs); 4455 } 4456 4457 /* satisfy the output constraints */ 4458 for(k = 0; k < nb_oargs; k++) { 4459 i = def->args_ct[k].sort_index; 4460 arg = op->args[i]; 4461 arg_ct = &def->args_ct[i]; 4462 ts = arg_temp(arg); 4463 4464 /* ENV should not be modified. */ 4465 tcg_debug_assert(!temp_readonly(ts)); 4466 4467 switch (arg_ct->pair) { 4468 case 0: /* not paired */ 4469 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4470 reg = new_args[arg_ct->alias_index]; 4471 } else if (arg_ct->newreg) { 4472 reg = tcg_reg_alloc(s, arg_ct->regs, 4473 i_allocated_regs | o_allocated_regs, 4474 output_pref(op, k), ts->indirect_base); 4475 } else { 4476 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4477 output_pref(op, k), ts->indirect_base); 4478 } 4479 break; 4480 4481 case 1: /* first of pair */ 4482 tcg_debug_assert(!arg_ct->newreg); 4483 if (arg_ct->oalias) { 4484 reg = new_args[arg_ct->alias_index]; 4485 break; 4486 } 4487 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 4488 output_pref(op, k), ts->indirect_base); 4489 break; 4490 4491 case 2: /* second of pair */ 4492 tcg_debug_assert(!arg_ct->newreg); 4493 if (arg_ct->oalias) { 4494 reg = new_args[arg_ct->alias_index]; 4495 } else { 4496 reg = new_args[arg_ct->pair_index] + 1; 4497 } 4498 break; 4499 4500 case 3: /* first of pair, aliasing with a second input */ 4501 tcg_debug_assert(!arg_ct->newreg); 4502 reg = new_args[arg_ct->pair_index] - 1; 4503 break; 4504 4505 default: 4506 g_assert_not_reached(); 4507 } 4508 tcg_regset_set_reg(o_allocated_regs, reg); 4509 set_temp_val_reg(s, ts, reg); 4510 ts->mem_coherent = 0; 4511 new_args[i] = reg; 4512 } 4513 } 4514 4515 /* emit instruction */ 4516 if (def->flags & TCG_OPF_VECTOR) { 4517 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4518 new_args, const_args); 4519 } else { 4520 tcg_out_op(s, op->opc, new_args, const_args); 4521 } 4522 4523 /* move the outputs in the correct register if needed */ 4524 for(i = 0; i < nb_oargs; i++) { 4525 ts = arg_temp(op->args[i]); 4526 4527 /* ENV should not be modified. */ 4528 tcg_debug_assert(!temp_readonly(ts)); 4529 4530 if (NEED_SYNC_ARG(i)) { 4531 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4532 } else if (IS_DEAD_ARG(i)) { 4533 temp_dead(s, ts); 4534 } 4535 } 4536 } 4537 4538 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4539 { 4540 const TCGLifeData arg_life = op->life; 4541 TCGTemp *ots, *itsl, *itsh; 4542 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4543 4544 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4545 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4546 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4547 4548 ots = arg_temp(op->args[0]); 4549 itsl = arg_temp(op->args[1]); 4550 itsh = arg_temp(op->args[2]); 4551 4552 /* ENV should not be modified. */ 4553 tcg_debug_assert(!temp_readonly(ots)); 4554 4555 /* Allocate the output register now. */ 4556 if (ots->val_type != TEMP_VAL_REG) { 4557 TCGRegSet allocated_regs = s->reserved_regs; 4558 TCGRegSet dup_out_regs = 4559 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4560 TCGReg oreg; 4561 4562 /* Make sure to not spill the input registers. */ 4563 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4564 tcg_regset_set_reg(allocated_regs, itsl->reg); 4565 } 4566 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4567 tcg_regset_set_reg(allocated_regs, itsh->reg); 4568 } 4569 4570 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4571 output_pref(op, 0), ots->indirect_base); 4572 set_temp_val_reg(s, ots, oreg); 4573 } 4574 4575 /* Promote dup2 of immediates to dupi_vec. */ 4576 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 4577 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 4578 MemOp vece = MO_64; 4579 4580 if (val == dup_const(MO_8, val)) { 4581 vece = MO_8; 4582 } else if (val == dup_const(MO_16, val)) { 4583 vece = MO_16; 4584 } else if (val == dup_const(MO_32, val)) { 4585 vece = MO_32; 4586 } 4587 4588 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 4589 goto done; 4590 } 4591 4592 /* If the two inputs form one 64-bit value, try dupm_vec. */ 4593 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 4594 itsh->temp_subindex == !HOST_BIG_ENDIAN && 4595 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 4596 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 4597 4598 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 4599 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 4600 4601 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 4602 its->mem_base->reg, its->mem_offset)) { 4603 goto done; 4604 } 4605 } 4606 4607 /* Fall back to generic expansion. */ 4608 return false; 4609 4610 done: 4611 ots->mem_coherent = 0; 4612 if (IS_DEAD_ARG(1)) { 4613 temp_dead(s, itsl); 4614 } 4615 if (IS_DEAD_ARG(2)) { 4616 temp_dead(s, itsh); 4617 } 4618 if (NEED_SYNC_ARG(0)) { 4619 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 4620 } else if (IS_DEAD_ARG(0)) { 4621 temp_dead(s, ots); 4622 } 4623 return true; 4624 } 4625 4626 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 4627 TCGRegSet allocated_regs) 4628 { 4629 if (ts->val_type == TEMP_VAL_REG) { 4630 if (ts->reg != reg) { 4631 tcg_reg_free(s, reg, allocated_regs); 4632 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4633 /* 4634 * Cross register class move not supported. Sync the 4635 * temp back to its slot and load from there. 4636 */ 4637 temp_sync(s, ts, allocated_regs, 0, 0); 4638 tcg_out_ld(s, ts->type, reg, 4639 ts->mem_base->reg, ts->mem_offset); 4640 } 4641 } 4642 } else { 4643 TCGRegSet arg_set = 0; 4644 4645 tcg_reg_free(s, reg, allocated_regs); 4646 tcg_regset_set_reg(arg_set, reg); 4647 temp_load(s, ts, arg_set, allocated_regs, 0); 4648 } 4649 } 4650 4651 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts, 4652 TCGRegSet allocated_regs) 4653 { 4654 /* 4655 * When the destination is on the stack, load up the temp and store. 4656 * If there are many call-saved registers, the temp might live to 4657 * see another use; otherwise it'll be discarded. 4658 */ 4659 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 4660 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 4661 TCG_TARGET_CALL_STACK_OFFSET + 4662 stk_slot * sizeof(tcg_target_long)); 4663 } 4664 4665 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 4666 TCGTemp *ts, TCGRegSet *allocated_regs) 4667 { 4668 if (REG_P(l)) { 4669 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 4670 load_arg_reg(s, reg, ts, *allocated_regs); 4671 tcg_regset_set_reg(*allocated_regs, reg); 4672 } else { 4673 load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs), 4674 ts, *allocated_regs); 4675 } 4676 } 4677 4678 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base, 4679 intptr_t ref_off, TCGRegSet *allocated_regs) 4680 { 4681 TCGReg reg; 4682 int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); 4683 4684 if (stk_slot < 0) { 4685 reg = tcg_target_call_iarg_regs[arg_slot]; 4686 tcg_reg_free(s, reg, *allocated_regs); 4687 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4688 tcg_regset_set_reg(*allocated_regs, reg); 4689 } else { 4690 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], 4691 *allocated_regs, 0, false); 4692 tcg_out_addi_ptr(s, reg, ref_base, ref_off); 4693 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, 4694 TCG_TARGET_CALL_STACK_OFFSET 4695 + stk_slot * sizeof(tcg_target_long)); 4696 } 4697 } 4698 4699 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 4700 { 4701 const int nb_oargs = TCGOP_CALLO(op); 4702 const int nb_iargs = TCGOP_CALLI(op); 4703 const TCGLifeData arg_life = op->life; 4704 const TCGHelperInfo *info = tcg_call_info(op); 4705 TCGRegSet allocated_regs = s->reserved_regs; 4706 int i; 4707 4708 /* 4709 * Move inputs into place in reverse order, 4710 * so that we place stacked arguments first. 4711 */ 4712 for (i = nb_iargs - 1; i >= 0; --i) { 4713 const TCGCallArgumentLoc *loc = &info->in[i]; 4714 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 4715 4716 switch (loc->kind) { 4717 case TCG_CALL_ARG_NORMAL: 4718 case TCG_CALL_ARG_EXTEND_U: 4719 case TCG_CALL_ARG_EXTEND_S: 4720 load_arg_normal(s, loc, ts, &allocated_regs); 4721 break; 4722 case TCG_CALL_ARG_BY_REF: 4723 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4724 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, 4725 TCG_TARGET_CALL_STACK_OFFSET 4726 + loc->ref_slot * sizeof(tcg_target_long), 4727 &allocated_regs); 4728 break; 4729 case TCG_CALL_ARG_BY_REF_N: 4730 load_arg_stk(s, loc->ref_slot, ts, allocated_regs); 4731 break; 4732 default: 4733 g_assert_not_reached(); 4734 } 4735 } 4736 4737 /* Mark dead temporaries and free the associated registers. */ 4738 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4739 if (IS_DEAD_ARG(i)) { 4740 temp_dead(s, arg_temp(op->args[i])); 4741 } 4742 } 4743 4744 /* Clobber call registers. */ 4745 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4746 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4747 tcg_reg_free(s, i, allocated_regs); 4748 } 4749 } 4750 4751 /* 4752 * Save globals if they might be written by the helper, 4753 * sync them if they might be read. 4754 */ 4755 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 4756 /* Nothing to do */ 4757 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 4758 sync_globals(s, allocated_regs); 4759 } else { 4760 save_globals(s, allocated_regs); 4761 } 4762 4763 /* 4764 * If the ABI passes a pointer to the returned struct as the first 4765 * argument, load that now. Pass a pointer to the output home slot. 4766 */ 4767 if (info->out_kind == TCG_CALL_RET_BY_REF) { 4768 TCGTemp *ts = arg_temp(op->args[0]); 4769 4770 if (!ts->mem_allocated) { 4771 temp_allocate_frame(s, ts); 4772 } 4773 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); 4774 } 4775 4776 tcg_out_call(s, tcg_call_func(op), info); 4777 4778 /* Assign output registers and emit moves if needed. */ 4779 switch (info->out_kind) { 4780 case TCG_CALL_RET_NORMAL: 4781 for (i = 0; i < nb_oargs; i++) { 4782 TCGTemp *ts = arg_temp(op->args[i]); 4783 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); 4784 4785 /* ENV should not be modified. */ 4786 tcg_debug_assert(!temp_readonly(ts)); 4787 4788 set_temp_val_reg(s, ts, reg); 4789 ts->mem_coherent = 0; 4790 } 4791 break; 4792 4793 case TCG_CALL_RET_BY_VEC: 4794 { 4795 TCGTemp *ts = arg_temp(op->args[0]); 4796 4797 tcg_debug_assert(ts->base_type == TCG_TYPE_I128); 4798 tcg_debug_assert(ts->temp_subindex == 0); 4799 if (!ts->mem_allocated) { 4800 temp_allocate_frame(s, ts); 4801 } 4802 tcg_out_st(s, TCG_TYPE_V128, 4803 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), 4804 ts->mem_base->reg, ts->mem_offset); 4805 } 4806 /* fall through to mark all parts in memory */ 4807 4808 case TCG_CALL_RET_BY_REF: 4809 /* The callee has performed a write through the reference. */ 4810 for (i = 0; i < nb_oargs; i++) { 4811 TCGTemp *ts = arg_temp(op->args[i]); 4812 ts->val_type = TEMP_VAL_MEM; 4813 } 4814 break; 4815 4816 default: 4817 g_assert_not_reached(); 4818 } 4819 4820 /* Flush or discard output registers as needed. */ 4821 for (i = 0; i < nb_oargs; i++) { 4822 TCGTemp *ts = arg_temp(op->args[i]); 4823 if (NEED_SYNC_ARG(i)) { 4824 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 4825 } else if (IS_DEAD_ARG(i)) { 4826 temp_dead(s, ts); 4827 } 4828 } 4829 } 4830 4831 #ifdef CONFIG_PROFILER 4832 4833 /* avoid copy/paste errors */ 4834 #define PROF_ADD(to, from, field) \ 4835 do { \ 4836 (to)->field += qatomic_read(&((from)->field)); \ 4837 } while (0) 4838 4839 #define PROF_MAX(to, from, field) \ 4840 do { \ 4841 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4842 if (val__ > (to)->field) { \ 4843 (to)->field = val__; \ 4844 } \ 4845 } while (0) 4846 4847 /* Pass in a zero'ed @prof */ 4848 static inline 4849 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4850 { 4851 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4852 unsigned int i; 4853 4854 for (i = 0; i < n_ctxs; i++) { 4855 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4856 const TCGProfile *orig = &s->prof; 4857 4858 if (counters) { 4859 PROF_ADD(prof, orig, cpu_exec_time); 4860 PROF_ADD(prof, orig, tb_count1); 4861 PROF_ADD(prof, orig, tb_count); 4862 PROF_ADD(prof, orig, op_count); 4863 PROF_MAX(prof, orig, op_count_max); 4864 PROF_ADD(prof, orig, temp_count); 4865 PROF_MAX(prof, orig, temp_count_max); 4866 PROF_ADD(prof, orig, del_op_count); 4867 PROF_ADD(prof, orig, code_in_len); 4868 PROF_ADD(prof, orig, code_out_len); 4869 PROF_ADD(prof, orig, search_out_len); 4870 PROF_ADD(prof, orig, interm_time); 4871 PROF_ADD(prof, orig, code_time); 4872 PROF_ADD(prof, orig, la_time); 4873 PROF_ADD(prof, orig, opt_time); 4874 PROF_ADD(prof, orig, restore_count); 4875 PROF_ADD(prof, orig, restore_time); 4876 } 4877 if (table) { 4878 int i; 4879 4880 for (i = 0; i < NB_OPS; i++) { 4881 PROF_ADD(prof, orig, table_op_count[i]); 4882 } 4883 } 4884 } 4885 } 4886 4887 #undef PROF_ADD 4888 #undef PROF_MAX 4889 4890 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4891 { 4892 tcg_profile_snapshot(prof, true, false); 4893 } 4894 4895 static void tcg_profile_snapshot_table(TCGProfile *prof) 4896 { 4897 tcg_profile_snapshot(prof, false, true); 4898 } 4899 4900 void tcg_dump_op_count(GString *buf) 4901 { 4902 TCGProfile prof = {}; 4903 int i; 4904 4905 tcg_profile_snapshot_table(&prof); 4906 for (i = 0; i < NB_OPS; i++) { 4907 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 4908 prof.table_op_count[i]); 4909 } 4910 } 4911 4912 int64_t tcg_cpu_exec_time(void) 4913 { 4914 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4915 unsigned int i; 4916 int64_t ret = 0; 4917 4918 for (i = 0; i < n_ctxs; i++) { 4919 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4920 const TCGProfile *prof = &s->prof; 4921 4922 ret += qatomic_read(&prof->cpu_exec_time); 4923 } 4924 return ret; 4925 } 4926 #else 4927 void tcg_dump_op_count(GString *buf) 4928 { 4929 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4930 } 4931 4932 int64_t tcg_cpu_exec_time(void) 4933 { 4934 error_report("%s: TCG profiler not compiled", __func__); 4935 exit(EXIT_FAILURE); 4936 } 4937 #endif 4938 4939 4940 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) 4941 { 4942 #ifdef CONFIG_PROFILER 4943 TCGProfile *prof = &s->prof; 4944 #endif 4945 int i, num_insns; 4946 TCGOp *op; 4947 4948 #ifdef CONFIG_PROFILER 4949 { 4950 int n = 0; 4951 4952 QTAILQ_FOREACH(op, &s->ops, link) { 4953 n++; 4954 } 4955 qatomic_set(&prof->op_count, prof->op_count + n); 4956 if (n > prof->op_count_max) { 4957 qatomic_set(&prof->op_count_max, n); 4958 } 4959 4960 n = s->nb_temps; 4961 qatomic_set(&prof->temp_count, prof->temp_count + n); 4962 if (n > prof->temp_count_max) { 4963 qatomic_set(&prof->temp_count_max, n); 4964 } 4965 } 4966 #endif 4967 4968 #ifdef DEBUG_DISAS 4969 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4970 && qemu_log_in_addr_range(pc_start))) { 4971 FILE *logfile = qemu_log_trylock(); 4972 if (logfile) { 4973 fprintf(logfile, "OP:\n"); 4974 tcg_dump_ops(s, logfile, false); 4975 fprintf(logfile, "\n"); 4976 qemu_log_unlock(logfile); 4977 } 4978 } 4979 #endif 4980 4981 #ifdef CONFIG_DEBUG_TCG 4982 /* Ensure all labels referenced have been emitted. */ 4983 { 4984 TCGLabel *l; 4985 bool error = false; 4986 4987 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4988 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) { 4989 qemu_log_mask(CPU_LOG_TB_OP, 4990 "$L%d referenced but not present.\n", l->id); 4991 error = true; 4992 } 4993 } 4994 assert(!error); 4995 } 4996 #endif 4997 4998 #ifdef CONFIG_PROFILER 4999 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 5000 #endif 5001 5002 #ifdef USE_TCG_OPTIMIZATIONS 5003 tcg_optimize(s); 5004 #endif 5005 5006 #ifdef CONFIG_PROFILER 5007 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 5008 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 5009 #endif 5010 5011 reachable_code_pass(s); 5012 liveness_pass_0(s); 5013 liveness_pass_1(s); 5014 5015 if (s->nb_indirects > 0) { 5016 #ifdef DEBUG_DISAS 5017 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 5018 && qemu_log_in_addr_range(pc_start))) { 5019 FILE *logfile = qemu_log_trylock(); 5020 if (logfile) { 5021 fprintf(logfile, "OP before indirect lowering:\n"); 5022 tcg_dump_ops(s, logfile, false); 5023 fprintf(logfile, "\n"); 5024 qemu_log_unlock(logfile); 5025 } 5026 } 5027 #endif 5028 /* Replace indirect temps with direct temps. */ 5029 if (liveness_pass_2(s)) { 5030 /* If changes were made, re-run liveness. */ 5031 liveness_pass_1(s); 5032 } 5033 } 5034 5035 #ifdef CONFIG_PROFILER 5036 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 5037 #endif 5038 5039 #ifdef DEBUG_DISAS 5040 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 5041 && qemu_log_in_addr_range(pc_start))) { 5042 FILE *logfile = qemu_log_trylock(); 5043 if (logfile) { 5044 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 5045 tcg_dump_ops(s, logfile, true); 5046 fprintf(logfile, "\n"); 5047 qemu_log_unlock(logfile); 5048 } 5049 } 5050 #endif 5051 5052 /* Initialize goto_tb jump offsets. */ 5053 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; 5054 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; 5055 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; 5056 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; 5057 5058 tcg_reg_alloc_start(s); 5059 5060 /* 5061 * Reset the buffer pointers when restarting after overflow. 5062 * TODO: Move this into translate-all.c with the rest of the 5063 * buffer management. Having only this done here is confusing. 5064 */ 5065 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 5066 s->code_ptr = s->code_buf; 5067 5068 #ifdef TCG_TARGET_NEED_LDST_LABELS 5069 QSIMPLEQ_INIT(&s->ldst_labels); 5070 #endif 5071 #ifdef TCG_TARGET_NEED_POOL_LABELS 5072 s->pool_labels = NULL; 5073 #endif 5074 5075 num_insns = -1; 5076 QTAILQ_FOREACH(op, &s->ops, link) { 5077 TCGOpcode opc = op->opc; 5078 5079 #ifdef CONFIG_PROFILER 5080 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 5081 #endif 5082 5083 switch (opc) { 5084 case INDEX_op_mov_i32: 5085 case INDEX_op_mov_i64: 5086 case INDEX_op_mov_vec: 5087 tcg_reg_alloc_mov(s, op); 5088 break; 5089 case INDEX_op_dup_vec: 5090 tcg_reg_alloc_dup(s, op); 5091 break; 5092 case INDEX_op_insn_start: 5093 if (num_insns >= 0) { 5094 size_t off = tcg_current_code_size(s); 5095 s->gen_insn_end_off[num_insns] = off; 5096 /* Assert that we do not overflow our stored offset. */ 5097 assert(s->gen_insn_end_off[num_insns] == off); 5098 } 5099 num_insns++; 5100 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 5101 target_ulong a; 5102 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 5103 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 5104 #else 5105 a = op->args[i]; 5106 #endif 5107 s->gen_insn_data[num_insns][i] = a; 5108 } 5109 break; 5110 case INDEX_op_discard: 5111 temp_dead(s, arg_temp(op->args[0])); 5112 break; 5113 case INDEX_op_set_label: 5114 tcg_reg_alloc_bb_end(s, s->reserved_regs); 5115 tcg_out_label(s, arg_label(op->args[0])); 5116 break; 5117 case INDEX_op_call: 5118 tcg_reg_alloc_call(s, op); 5119 break; 5120 case INDEX_op_exit_tb: 5121 tcg_out_exit_tb(s, op->args[0]); 5122 break; 5123 case INDEX_op_goto_tb: 5124 tcg_out_goto_tb(s, op->args[0]); 5125 break; 5126 case INDEX_op_dup2_vec: 5127 if (tcg_reg_alloc_dup2(s, op)) { 5128 break; 5129 } 5130 /* fall through */ 5131 default: 5132 /* Sanity check that we've not introduced any unhandled opcodes. */ 5133 tcg_debug_assert(tcg_op_supported(opc)); 5134 /* Note: in order to speed up the code, it would be much 5135 faster to have specialized register allocator functions for 5136 some common argument patterns */ 5137 tcg_reg_alloc_op(s, op); 5138 break; 5139 } 5140 /* Test for (pending) buffer overflow. The assumption is that any 5141 one operation beginning below the high water mark cannot overrun 5142 the buffer completely. Thus we can test for overflow after 5143 generating code without having to check during generation. */ 5144 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 5145 return -1; 5146 } 5147 /* Test for TB overflow, as seen by gen_insn_end_off. */ 5148 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 5149 return -2; 5150 } 5151 } 5152 tcg_debug_assert(num_insns >= 0); 5153 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 5154 5155 /* Generate TB finalization at the end of block */ 5156 #ifdef TCG_TARGET_NEED_LDST_LABELS 5157 i = tcg_out_ldst_finalize(s); 5158 if (i < 0) { 5159 return i; 5160 } 5161 #endif 5162 #ifdef TCG_TARGET_NEED_POOL_LABELS 5163 i = tcg_out_pool_finalize(s); 5164 if (i < 0) { 5165 return i; 5166 } 5167 #endif 5168 if (!tcg_resolve_relocs(s)) { 5169 return -2; 5170 } 5171 5172 #ifndef CONFIG_TCG_INTERPRETER 5173 /* flush instruction cache */ 5174 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 5175 (uintptr_t)s->code_buf, 5176 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 5177 #endif 5178 5179 return tcg_current_code_size(s); 5180 } 5181 5182 #ifdef CONFIG_PROFILER 5183 void tcg_dump_info(GString *buf) 5184 { 5185 TCGProfile prof = {}; 5186 const TCGProfile *s; 5187 int64_t tb_count; 5188 int64_t tb_div_count; 5189 int64_t tot; 5190 5191 tcg_profile_snapshot_counters(&prof); 5192 s = &prof; 5193 tb_count = s->tb_count; 5194 tb_div_count = tb_count ? tb_count : 1; 5195 tot = s->interm_time + s->code_time; 5196 5197 g_string_append_printf(buf, "JIT cycles %" PRId64 5198 " (%0.3f s at 2.4 GHz)\n", 5199 tot, tot / 2.4e9); 5200 g_string_append_printf(buf, "translated TBs %" PRId64 5201 " (aborted=%" PRId64 " %0.1f%%)\n", 5202 tb_count, s->tb_count1 - tb_count, 5203 (double)(s->tb_count1 - s->tb_count) 5204 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 5205 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 5206 (double)s->op_count / tb_div_count, s->op_count_max); 5207 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 5208 (double)s->del_op_count / tb_div_count); 5209 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 5210 (double)s->temp_count / tb_div_count, 5211 s->temp_count_max); 5212 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 5213 (double)s->code_out_len / tb_div_count); 5214 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 5215 (double)s->search_out_len / tb_div_count); 5216 5217 g_string_append_printf(buf, "cycles/op %0.1f\n", 5218 s->op_count ? (double)tot / s->op_count : 0); 5219 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 5220 s->code_in_len ? (double)tot / s->code_in_len : 0); 5221 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 5222 s->code_out_len ? (double)tot / s->code_out_len : 0); 5223 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 5224 s->search_out_len ? 5225 (double)tot / s->search_out_len : 0); 5226 if (tot == 0) { 5227 tot = 1; 5228 } 5229 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 5230 (double)s->interm_time / tot * 100.0); 5231 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 5232 (double)s->code_time / tot * 100.0); 5233 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 5234 (double)s->opt_time / (s->code_time ? 5235 s->code_time : 1) 5236 * 100.0); 5237 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 5238 (double)s->la_time / (s->code_time ? 5239 s->code_time : 1) * 100.0); 5240 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 5241 s->restore_count); 5242 g_string_append_printf(buf, " avg cycles %0.1f\n", 5243 s->restore_count ? 5244 (double)s->restore_time / s->restore_count : 0); 5245 } 5246 #else 5247 void tcg_dump_info(GString *buf) 5248 { 5249 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 5250 } 5251 #endif 5252 5253 #ifdef ELF_HOST_MACHINE 5254 /* In order to use this feature, the backend needs to do three things: 5255 5256 (1) Define ELF_HOST_MACHINE to indicate both what value to 5257 put into the ELF image and to indicate support for the feature. 5258 5259 (2) Define tcg_register_jit. This should create a buffer containing 5260 the contents of a .debug_frame section that describes the post- 5261 prologue unwind info for the tcg machine. 5262 5263 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 5264 */ 5265 5266 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 5267 typedef enum { 5268 JIT_NOACTION = 0, 5269 JIT_REGISTER_FN, 5270 JIT_UNREGISTER_FN 5271 } jit_actions_t; 5272 5273 struct jit_code_entry { 5274 struct jit_code_entry *next_entry; 5275 struct jit_code_entry *prev_entry; 5276 const void *symfile_addr; 5277 uint64_t symfile_size; 5278 }; 5279 5280 struct jit_descriptor { 5281 uint32_t version; 5282 uint32_t action_flag; 5283 struct jit_code_entry *relevant_entry; 5284 struct jit_code_entry *first_entry; 5285 }; 5286 5287 void __jit_debug_register_code(void) __attribute__((noinline)); 5288 void __jit_debug_register_code(void) 5289 { 5290 asm(""); 5291 } 5292 5293 /* Must statically initialize the version, because GDB may check 5294 the version before we can set it. */ 5295 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 5296 5297 /* End GDB interface. */ 5298 5299 static int find_string(const char *strtab, const char *str) 5300 { 5301 const char *p = strtab + 1; 5302 5303 while (1) { 5304 if (strcmp(p, str) == 0) { 5305 return p - strtab; 5306 } 5307 p += strlen(p) + 1; 5308 } 5309 } 5310 5311 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 5312 const void *debug_frame, 5313 size_t debug_frame_size) 5314 { 5315 struct __attribute__((packed)) DebugInfo { 5316 uint32_t len; 5317 uint16_t version; 5318 uint32_t abbrev; 5319 uint8_t ptr_size; 5320 uint8_t cu_die; 5321 uint16_t cu_lang; 5322 uintptr_t cu_low_pc; 5323 uintptr_t cu_high_pc; 5324 uint8_t fn_die; 5325 char fn_name[16]; 5326 uintptr_t fn_low_pc; 5327 uintptr_t fn_high_pc; 5328 uint8_t cu_eoc; 5329 }; 5330 5331 struct ElfImage { 5332 ElfW(Ehdr) ehdr; 5333 ElfW(Phdr) phdr; 5334 ElfW(Shdr) shdr[7]; 5335 ElfW(Sym) sym[2]; 5336 struct DebugInfo di; 5337 uint8_t da[24]; 5338 char str[80]; 5339 }; 5340 5341 struct ElfImage *img; 5342 5343 static const struct ElfImage img_template = { 5344 .ehdr = { 5345 .e_ident[EI_MAG0] = ELFMAG0, 5346 .e_ident[EI_MAG1] = ELFMAG1, 5347 .e_ident[EI_MAG2] = ELFMAG2, 5348 .e_ident[EI_MAG3] = ELFMAG3, 5349 .e_ident[EI_CLASS] = ELF_CLASS, 5350 .e_ident[EI_DATA] = ELF_DATA, 5351 .e_ident[EI_VERSION] = EV_CURRENT, 5352 .e_type = ET_EXEC, 5353 .e_machine = ELF_HOST_MACHINE, 5354 .e_version = EV_CURRENT, 5355 .e_phoff = offsetof(struct ElfImage, phdr), 5356 .e_shoff = offsetof(struct ElfImage, shdr), 5357 .e_ehsize = sizeof(ElfW(Shdr)), 5358 .e_phentsize = sizeof(ElfW(Phdr)), 5359 .e_phnum = 1, 5360 .e_shentsize = sizeof(ElfW(Shdr)), 5361 .e_shnum = ARRAY_SIZE(img->shdr), 5362 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 5363 #ifdef ELF_HOST_FLAGS 5364 .e_flags = ELF_HOST_FLAGS, 5365 #endif 5366 #ifdef ELF_OSABI 5367 .e_ident[EI_OSABI] = ELF_OSABI, 5368 #endif 5369 }, 5370 .phdr = { 5371 .p_type = PT_LOAD, 5372 .p_flags = PF_X, 5373 }, 5374 .shdr = { 5375 [0] = { .sh_type = SHT_NULL }, 5376 /* Trick: The contents of code_gen_buffer are not present in 5377 this fake ELF file; that got allocated elsewhere. Therefore 5378 we mark .text as SHT_NOBITS (similar to .bss) so that readers 5379 will not look for contents. We can record any address. */ 5380 [1] = { /* .text */ 5381 .sh_type = SHT_NOBITS, 5382 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 5383 }, 5384 [2] = { /* .debug_info */ 5385 .sh_type = SHT_PROGBITS, 5386 .sh_offset = offsetof(struct ElfImage, di), 5387 .sh_size = sizeof(struct DebugInfo), 5388 }, 5389 [3] = { /* .debug_abbrev */ 5390 .sh_type = SHT_PROGBITS, 5391 .sh_offset = offsetof(struct ElfImage, da), 5392 .sh_size = sizeof(img->da), 5393 }, 5394 [4] = { /* .debug_frame */ 5395 .sh_type = SHT_PROGBITS, 5396 .sh_offset = sizeof(struct ElfImage), 5397 }, 5398 [5] = { /* .symtab */ 5399 .sh_type = SHT_SYMTAB, 5400 .sh_offset = offsetof(struct ElfImage, sym), 5401 .sh_size = sizeof(img->sym), 5402 .sh_info = 1, 5403 .sh_link = ARRAY_SIZE(img->shdr) - 1, 5404 .sh_entsize = sizeof(ElfW(Sym)), 5405 }, 5406 [6] = { /* .strtab */ 5407 .sh_type = SHT_STRTAB, 5408 .sh_offset = offsetof(struct ElfImage, str), 5409 .sh_size = sizeof(img->str), 5410 } 5411 }, 5412 .sym = { 5413 [1] = { /* code_gen_buffer */ 5414 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 5415 .st_shndx = 1, 5416 } 5417 }, 5418 .di = { 5419 .len = sizeof(struct DebugInfo) - 4, 5420 .version = 2, 5421 .ptr_size = sizeof(void *), 5422 .cu_die = 1, 5423 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 5424 .fn_die = 2, 5425 .fn_name = "code_gen_buffer" 5426 }, 5427 .da = { 5428 1, /* abbrev number (the cu) */ 5429 0x11, 1, /* DW_TAG_compile_unit, has children */ 5430 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 5431 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5432 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5433 0, 0, /* end of abbrev */ 5434 2, /* abbrev number (the fn) */ 5435 0x2e, 0, /* DW_TAG_subprogram, no children */ 5436 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 5437 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5438 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5439 0, 0, /* end of abbrev */ 5440 0 /* no more abbrev */ 5441 }, 5442 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 5443 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 5444 }; 5445 5446 /* We only need a single jit entry; statically allocate it. */ 5447 static struct jit_code_entry one_entry; 5448 5449 uintptr_t buf = (uintptr_t)buf_ptr; 5450 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 5451 DebugFrameHeader *dfh; 5452 5453 img = g_malloc(img_size); 5454 *img = img_template; 5455 5456 img->phdr.p_vaddr = buf; 5457 img->phdr.p_paddr = buf; 5458 img->phdr.p_memsz = buf_size; 5459 5460 img->shdr[1].sh_name = find_string(img->str, ".text"); 5461 img->shdr[1].sh_addr = buf; 5462 img->shdr[1].sh_size = buf_size; 5463 5464 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 5465 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 5466 5467 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 5468 img->shdr[4].sh_size = debug_frame_size; 5469 5470 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 5471 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 5472 5473 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 5474 img->sym[1].st_value = buf; 5475 img->sym[1].st_size = buf_size; 5476 5477 img->di.cu_low_pc = buf; 5478 img->di.cu_high_pc = buf + buf_size; 5479 img->di.fn_low_pc = buf; 5480 img->di.fn_high_pc = buf + buf_size; 5481 5482 dfh = (DebugFrameHeader *)(img + 1); 5483 memcpy(dfh, debug_frame, debug_frame_size); 5484 dfh->fde.func_start = buf; 5485 dfh->fde.func_len = buf_size; 5486 5487 #ifdef DEBUG_JIT 5488 /* Enable this block to be able to debug the ELF image file creation. 5489 One can use readelf, objdump, or other inspection utilities. */ 5490 { 5491 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 5492 FILE *f = fopen(jit, "w+b"); 5493 if (f) { 5494 if (fwrite(img, img_size, 1, f) != img_size) { 5495 /* Avoid stupid unused return value warning for fwrite. */ 5496 } 5497 fclose(f); 5498 } 5499 } 5500 #endif 5501 5502 one_entry.symfile_addr = img; 5503 one_entry.symfile_size = img_size; 5504 5505 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 5506 __jit_debug_descriptor.relevant_entry = &one_entry; 5507 __jit_debug_descriptor.first_entry = &one_entry; 5508 __jit_debug_register_code(); 5509 } 5510 #else 5511 /* No support for the feature. Provide the entry point expected by exec.c, 5512 and implement the internal function we declared earlier. */ 5513 5514 static void tcg_register_jit_int(const void *buf, size_t size, 5515 const void *debug_frame, 5516 size_t debug_frame_size) 5517 { 5518 } 5519 5520 void tcg_register_jit(const void *buf, size_t buf_size) 5521 { 5522 } 5523 #endif /* ELF_HOST_MACHINE */ 5524 5525 #if !TCG_TARGET_MAYBE_vec 5526 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 5527 { 5528 g_assert_not_reached(); 5529 } 5530 #endif 5531