1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/timer.h" 38 #include "qemu/cacheflush.h" 39 #include "qemu/cacheinfo.h" 40 41 /* Note: the long term plan is to reduce the dependencies on the QEMU 42 CPU definitions. Currently they are used for qemu_ld/st 43 instructions */ 44 #define NO_CPU_IO_DEFS 45 46 #include "exec/exec-all.h" 47 #include "tcg/tcg-op.h" 48 49 #if UINTPTR_MAX == UINT32_MAX 50 # define ELF_CLASS ELFCLASS32 51 #else 52 # define ELF_CLASS ELFCLASS64 53 #endif 54 #if HOST_BIG_ENDIAN 55 # define ELF_DATA ELFDATA2MSB 56 #else 57 # define ELF_DATA ELFDATA2LSB 58 #endif 59 60 #include "elf.h" 61 #include "exec/log.h" 62 #include "tcg/tcg-ldst.h" 63 #include "tcg-internal.h" 64 65 #ifdef CONFIG_TCG_INTERPRETER 66 #include <ffi.h> 67 #endif 68 69 /* Forward declarations for functions declared in tcg-target.c.inc and 70 used here. */ 71 static void tcg_target_init(TCGContext *s); 72 static void tcg_target_qemu_prologue(TCGContext *s); 73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 74 intptr_t value, intptr_t addend); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 static void tcg_register_jit_int(const void *buf, size_t size, 100 const void *debug_frame, 101 size_t debug_frame_size) 102 __attribute__((unused)); 103 104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 106 intptr_t arg2); 107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 108 static void tcg_out_movi(TCGContext *s, TCGType type, 109 TCGReg ret, tcg_target_long arg); 110 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 111 const TCGArg args[TCG_MAX_OP_ARGS], 112 const int const_args[TCG_MAX_OP_ARGS]); 113 #if TCG_TARGET_MAYBE_vec 114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 115 TCGReg dst, TCGReg src); 116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 117 TCGReg dst, TCGReg base, intptr_t offset); 118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 119 TCGReg dst, int64_t arg); 120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 121 unsigned vecl, unsigned vece, 122 const TCGArg args[TCG_MAX_OP_ARGS], 123 const int const_args[TCG_MAX_OP_ARGS]); 124 #else 125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 126 TCGReg dst, TCGReg src) 127 { 128 g_assert_not_reached(); 129 } 130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 131 TCGReg dst, TCGReg base, intptr_t offset) 132 { 133 g_assert_not_reached(); 134 } 135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 136 TCGReg dst, int64_t arg) 137 { 138 g_assert_not_reached(); 139 } 140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 141 unsigned vecl, unsigned vece, 142 const TCGArg args[TCG_MAX_OP_ARGS], 143 const int const_args[TCG_MAX_OP_ARGS]) 144 { 145 g_assert_not_reached(); 146 } 147 #endif 148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 149 intptr_t arg2); 150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 151 TCGReg base, intptr_t ofs); 152 #ifdef CONFIG_TCG_INTERPRETER 153 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 154 ffi_cif *cif); 155 #else 156 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); 157 #endif 158 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 159 #ifdef TCG_TARGET_NEED_LDST_LABELS 160 static int tcg_out_ldst_finalize(TCGContext *s); 161 #endif 162 163 TCGContext tcg_init_ctx; 164 __thread TCGContext *tcg_ctx; 165 166 TCGContext **tcg_ctxs; 167 unsigned int tcg_cur_ctxs; 168 unsigned int tcg_max_ctxs; 169 TCGv_env cpu_env = 0; 170 const void *tcg_code_gen_epilogue; 171 uintptr_t tcg_splitwx_diff; 172 173 #ifndef CONFIG_TCG_INTERPRETER 174 tcg_prologue_fn *tcg_qemu_tb_exec; 175 #endif 176 177 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 178 static TCGRegSet tcg_target_call_clobber_regs; 179 180 #if TCG_TARGET_INSN_UNIT_SIZE == 1 181 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 182 { 183 *s->code_ptr++ = v; 184 } 185 186 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 187 uint8_t v) 188 { 189 *p = v; 190 } 191 #endif 192 193 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 194 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 195 { 196 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 197 *s->code_ptr++ = v; 198 } else { 199 tcg_insn_unit *p = s->code_ptr; 200 memcpy(p, &v, sizeof(v)); 201 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 202 } 203 } 204 205 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 206 uint16_t v) 207 { 208 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 209 *p = v; 210 } else { 211 memcpy(p, &v, sizeof(v)); 212 } 213 } 214 #endif 215 216 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 217 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 218 { 219 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 220 *s->code_ptr++ = v; 221 } else { 222 tcg_insn_unit *p = s->code_ptr; 223 memcpy(p, &v, sizeof(v)); 224 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 225 } 226 } 227 228 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 229 uint32_t v) 230 { 231 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 232 *p = v; 233 } else { 234 memcpy(p, &v, sizeof(v)); 235 } 236 } 237 #endif 238 239 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 240 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 241 { 242 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 243 *s->code_ptr++ = v; 244 } else { 245 tcg_insn_unit *p = s->code_ptr; 246 memcpy(p, &v, sizeof(v)); 247 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 248 } 249 } 250 251 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 252 uint64_t v) 253 { 254 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 255 *p = v; 256 } else { 257 memcpy(p, &v, sizeof(v)); 258 } 259 } 260 #endif 261 262 /* label relocation processing */ 263 264 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 265 TCGLabel *l, intptr_t addend) 266 { 267 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 268 269 r->type = type; 270 r->ptr = code_ptr; 271 r->addend = addend; 272 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 273 } 274 275 static void tcg_out_label(TCGContext *s, TCGLabel *l) 276 { 277 tcg_debug_assert(!l->has_value); 278 l->has_value = 1; 279 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 280 } 281 282 TCGLabel *gen_new_label(void) 283 { 284 TCGContext *s = tcg_ctx; 285 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 286 287 memset(l, 0, sizeof(TCGLabel)); 288 l->id = s->nb_labels++; 289 QSIMPLEQ_INIT(&l->relocs); 290 291 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 292 293 return l; 294 } 295 296 static bool tcg_resolve_relocs(TCGContext *s) 297 { 298 TCGLabel *l; 299 300 QSIMPLEQ_FOREACH(l, &s->labels, next) { 301 TCGRelocation *r; 302 uintptr_t value = l->u.value; 303 304 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 305 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 306 return false; 307 } 308 } 309 } 310 return true; 311 } 312 313 static void set_jmp_reset_offset(TCGContext *s, int which) 314 { 315 /* 316 * We will check for overflow at the end of the opcode loop in 317 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 318 */ 319 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); 320 } 321 322 /* Signal overflow, starting over with fewer guest insns. */ 323 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s) 324 { 325 siglongjmp(s->jmp_trans, -2); 326 } 327 328 #define C_PFX1(P, A) P##A 329 #define C_PFX2(P, A, B) P##A##_##B 330 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 331 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 332 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 333 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 334 335 /* Define an enumeration for the various combinations. */ 336 337 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 338 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 339 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 340 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 341 342 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 343 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 344 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 345 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 346 347 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 348 349 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 350 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 351 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 352 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 353 354 typedef enum { 355 #include "tcg-target-con-set.h" 356 } TCGConstraintSetIndex; 357 358 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 359 360 #undef C_O0_I1 361 #undef C_O0_I2 362 #undef C_O0_I3 363 #undef C_O0_I4 364 #undef C_O1_I1 365 #undef C_O1_I2 366 #undef C_O1_I3 367 #undef C_O1_I4 368 #undef C_N1_I2 369 #undef C_O2_I1 370 #undef C_O2_I2 371 #undef C_O2_I3 372 #undef C_O2_I4 373 374 /* Put all of the constraint sets into an array, indexed by the enum. */ 375 376 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 377 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 378 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 379 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 380 381 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 382 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 383 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 384 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 385 386 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 387 388 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 389 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 390 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 391 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 392 393 static const TCGTargetOpDef constraint_sets[] = { 394 #include "tcg-target-con-set.h" 395 }; 396 397 398 #undef C_O0_I1 399 #undef C_O0_I2 400 #undef C_O0_I3 401 #undef C_O0_I4 402 #undef C_O1_I1 403 #undef C_O1_I2 404 #undef C_O1_I3 405 #undef C_O1_I4 406 #undef C_N1_I2 407 #undef C_O2_I1 408 #undef C_O2_I2 409 #undef C_O2_I3 410 #undef C_O2_I4 411 412 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 413 414 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 415 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 416 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 417 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 418 419 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 420 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 421 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 422 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 423 424 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 425 426 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 427 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 428 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 429 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 430 431 #include "tcg-target.c.inc" 432 433 static void alloc_tcg_plugin_context(TCGContext *s) 434 { 435 #ifdef CONFIG_PLUGIN 436 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 437 s->plugin_tb->insns = 438 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 439 #endif 440 } 441 442 /* 443 * All TCG threads except the parent (i.e. the one that called tcg_context_init 444 * and registered the target's TCG globals) must register with this function 445 * before initiating translation. 446 * 447 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 448 * of tcg_region_init() for the reasoning behind this. 449 * 450 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 451 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 452 * is not used anymore for translation once this function is called. 453 * 454 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 455 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 456 */ 457 #ifdef CONFIG_USER_ONLY 458 void tcg_register_thread(void) 459 { 460 tcg_ctx = &tcg_init_ctx; 461 } 462 #else 463 void tcg_register_thread(void) 464 { 465 TCGContext *s = g_malloc(sizeof(*s)); 466 unsigned int i, n; 467 468 *s = tcg_init_ctx; 469 470 /* Relink mem_base. */ 471 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 472 if (tcg_init_ctx.temps[i].mem_base) { 473 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 474 tcg_debug_assert(b >= 0 && b < n); 475 s->temps[i].mem_base = &s->temps[b]; 476 } 477 } 478 479 /* Claim an entry in tcg_ctxs */ 480 n = qatomic_fetch_inc(&tcg_cur_ctxs); 481 g_assert(n < tcg_max_ctxs); 482 qatomic_set(&tcg_ctxs[n], s); 483 484 if (n > 0) { 485 alloc_tcg_plugin_context(s); 486 tcg_region_initial_alloc(s); 487 } 488 489 tcg_ctx = s; 490 } 491 #endif /* !CONFIG_USER_ONLY */ 492 493 /* pool based memory allocation */ 494 void *tcg_malloc_internal(TCGContext *s, int size) 495 { 496 TCGPool *p; 497 int pool_size; 498 499 if (size > TCG_POOL_CHUNK_SIZE) { 500 /* big malloc: insert a new pool (XXX: could optimize) */ 501 p = g_malloc(sizeof(TCGPool) + size); 502 p->size = size; 503 p->next = s->pool_first_large; 504 s->pool_first_large = p; 505 return p->data; 506 } else { 507 p = s->pool_current; 508 if (!p) { 509 p = s->pool_first; 510 if (!p) 511 goto new_pool; 512 } else { 513 if (!p->next) { 514 new_pool: 515 pool_size = TCG_POOL_CHUNK_SIZE; 516 p = g_malloc(sizeof(TCGPool) + pool_size); 517 p->size = pool_size; 518 p->next = NULL; 519 if (s->pool_current) 520 s->pool_current->next = p; 521 else 522 s->pool_first = p; 523 } else { 524 p = p->next; 525 } 526 } 527 } 528 s->pool_current = p; 529 s->pool_cur = p->data + size; 530 s->pool_end = p->data + p->size; 531 return p->data; 532 } 533 534 void tcg_pool_reset(TCGContext *s) 535 { 536 TCGPool *p, *t; 537 for (p = s->pool_first_large; p; p = t) { 538 t = p->next; 539 g_free(p); 540 } 541 s->pool_first_large = NULL; 542 s->pool_cur = s->pool_end = NULL; 543 s->pool_current = NULL; 544 } 545 546 #include "exec/helper-proto.h" 547 548 static const TCGHelperInfo all_helpers[] = { 549 #include "exec/helper-tcg.h" 550 }; 551 static GHashTable *helper_table; 552 553 #ifdef CONFIG_TCG_INTERPRETER 554 static GHashTable *ffi_table; 555 556 static ffi_type * const typecode_to_ffi[8] = { 557 [dh_typecode_void] = &ffi_type_void, 558 [dh_typecode_i32] = &ffi_type_uint32, 559 [dh_typecode_s32] = &ffi_type_sint32, 560 [dh_typecode_i64] = &ffi_type_uint64, 561 [dh_typecode_s64] = &ffi_type_sint64, 562 [dh_typecode_ptr] = &ffi_type_pointer, 563 }; 564 #endif 565 566 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 567 static void process_op_defs(TCGContext *s); 568 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 569 TCGReg reg, const char *name); 570 571 static void tcg_context_init(unsigned max_cpus) 572 { 573 TCGContext *s = &tcg_init_ctx; 574 int op, total_args, n, i; 575 TCGOpDef *def; 576 TCGArgConstraint *args_ct; 577 TCGTemp *ts; 578 579 memset(s, 0, sizeof(*s)); 580 s->nb_globals = 0; 581 582 /* Count total number of arguments and allocate the corresponding 583 space */ 584 total_args = 0; 585 for(op = 0; op < NB_OPS; op++) { 586 def = &tcg_op_defs[op]; 587 n = def->nb_iargs + def->nb_oargs; 588 total_args += n; 589 } 590 591 args_ct = g_new0(TCGArgConstraint, total_args); 592 593 for(op = 0; op < NB_OPS; op++) { 594 def = &tcg_op_defs[op]; 595 def->args_ct = args_ct; 596 n = def->nb_iargs + def->nb_oargs; 597 args_ct += n; 598 } 599 600 /* Register helpers. */ 601 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 602 helper_table = g_hash_table_new(NULL, NULL); 603 604 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 605 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 606 (gpointer)&all_helpers[i]); 607 } 608 609 #ifdef CONFIG_TCG_INTERPRETER 610 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 611 ffi_table = g_hash_table_new(NULL, NULL); 612 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 613 struct { 614 ffi_cif cif; 615 ffi_type *args[]; 616 } *ca; 617 uint32_t typemask = all_helpers[i].typemask; 618 gpointer hash = (gpointer)(uintptr_t)typemask; 619 ffi_status status; 620 int nargs; 621 622 if (g_hash_table_lookup(ffi_table, hash)) { 623 continue; 624 } 625 626 /* Ignoring the return type, find the last non-zero field. */ 627 nargs = 32 - clz32(typemask >> 3); 628 nargs = DIV_ROUND_UP(nargs, 3); 629 630 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 631 ca->cif.rtype = typecode_to_ffi[typemask & 7]; 632 ca->cif.nargs = nargs; 633 634 if (nargs != 0) { 635 ca->cif.arg_types = ca->args; 636 for (i = 0; i < nargs; ++i) { 637 int typecode = extract32(typemask, (i + 1) * 3, 3); 638 ca->args[i] = typecode_to_ffi[typecode]; 639 } 640 } 641 642 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 643 ca->cif.rtype, ca->cif.arg_types); 644 assert(status == FFI_OK); 645 646 g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif); 647 } 648 #endif 649 650 tcg_target_init(s); 651 process_op_defs(s); 652 653 /* Reverse the order of the saved registers, assuming they're all at 654 the start of tcg_target_reg_alloc_order. */ 655 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 656 int r = tcg_target_reg_alloc_order[n]; 657 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 658 break; 659 } 660 } 661 for (i = 0; i < n; ++i) { 662 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 663 } 664 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 665 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 666 } 667 668 alloc_tcg_plugin_context(s); 669 670 tcg_ctx = s; 671 /* 672 * In user-mode we simply share the init context among threads, since we 673 * use a single region. See the documentation tcg_region_init() for the 674 * reasoning behind this. 675 * In softmmu we will have at most max_cpus TCG threads. 676 */ 677 #ifdef CONFIG_USER_ONLY 678 tcg_ctxs = &tcg_ctx; 679 tcg_cur_ctxs = 1; 680 tcg_max_ctxs = 1; 681 #else 682 tcg_max_ctxs = max_cpus; 683 tcg_ctxs = g_new0(TCGContext *, max_cpus); 684 #endif 685 686 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 687 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 688 cpu_env = temp_tcgv_ptr(ts); 689 } 690 691 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 692 { 693 tcg_context_init(max_cpus); 694 tcg_region_init(tb_size, splitwx, max_cpus); 695 } 696 697 /* 698 * Allocate TBs right before their corresponding translated code, making 699 * sure that TBs and code are on different cache lines. 700 */ 701 TranslationBlock *tcg_tb_alloc(TCGContext *s) 702 { 703 uintptr_t align = qemu_icache_linesize; 704 TranslationBlock *tb; 705 void *next; 706 707 retry: 708 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 709 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 710 711 if (unlikely(next > s->code_gen_highwater)) { 712 if (tcg_region_alloc(s)) { 713 return NULL; 714 } 715 goto retry; 716 } 717 qatomic_set(&s->code_gen_ptr, next); 718 s->data_gen_ptr = NULL; 719 return tb; 720 } 721 722 void tcg_prologue_init(TCGContext *s) 723 { 724 size_t prologue_size; 725 726 s->code_ptr = s->code_gen_ptr; 727 s->code_buf = s->code_gen_ptr; 728 s->data_gen_ptr = NULL; 729 730 #ifndef CONFIG_TCG_INTERPRETER 731 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 732 #endif 733 734 #ifdef TCG_TARGET_NEED_POOL_LABELS 735 s->pool_labels = NULL; 736 #endif 737 738 qemu_thread_jit_write(); 739 /* Generate the prologue. */ 740 tcg_target_qemu_prologue(s); 741 742 #ifdef TCG_TARGET_NEED_POOL_LABELS 743 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 744 { 745 int result = tcg_out_pool_finalize(s); 746 tcg_debug_assert(result == 0); 747 } 748 #endif 749 750 prologue_size = tcg_current_code_size(s); 751 752 #ifndef CONFIG_TCG_INTERPRETER 753 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 754 (uintptr_t)s->code_buf, prologue_size); 755 #endif 756 757 #ifdef DEBUG_DISAS 758 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 759 FILE *logfile = qemu_log_trylock(); 760 if (logfile) { 761 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 762 if (s->data_gen_ptr) { 763 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 764 size_t data_size = prologue_size - code_size; 765 size_t i; 766 767 disas(logfile, s->code_gen_ptr, code_size); 768 769 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 770 if (sizeof(tcg_target_ulong) == 8) { 771 fprintf(logfile, 772 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 773 (uintptr_t)s->data_gen_ptr + i, 774 *(uint64_t *)(s->data_gen_ptr + i)); 775 } else { 776 fprintf(logfile, 777 "0x%08" PRIxPTR ": .long 0x%08x\n", 778 (uintptr_t)s->data_gen_ptr + i, 779 *(uint32_t *)(s->data_gen_ptr + i)); 780 } 781 } 782 } else { 783 disas(logfile, s->code_gen_ptr, prologue_size); 784 } 785 fprintf(logfile, "\n"); 786 qemu_log_unlock(logfile); 787 } 788 } 789 #endif 790 791 #ifndef CONFIG_TCG_INTERPRETER 792 /* 793 * Assert that goto_ptr is implemented completely, setting an epilogue. 794 * For tci, we use NULL as the signal to return from the interpreter, 795 * so skip this check. 796 */ 797 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 798 #endif 799 800 tcg_region_prologue_set(s); 801 } 802 803 void tcg_func_start(TCGContext *s) 804 { 805 tcg_pool_reset(s); 806 s->nb_temps = s->nb_globals; 807 808 /* No temps have been previously allocated for size or locality. */ 809 memset(s->free_temps, 0, sizeof(s->free_temps)); 810 811 /* No constant temps have been previously allocated. */ 812 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 813 if (s->const_table[i]) { 814 g_hash_table_remove_all(s->const_table[i]); 815 } 816 } 817 818 s->nb_ops = 0; 819 s->nb_labels = 0; 820 s->current_frame_offset = s->frame_start; 821 822 #ifdef CONFIG_DEBUG_TCG 823 s->goto_tb_issue_mask = 0; 824 #endif 825 826 QTAILQ_INIT(&s->ops); 827 QTAILQ_INIT(&s->free_ops); 828 QSIMPLEQ_INIT(&s->labels); 829 } 830 831 static TCGTemp *tcg_temp_alloc(TCGContext *s) 832 { 833 int n = s->nb_temps++; 834 835 if (n >= TCG_MAX_TEMPS) { 836 tcg_raise_tb_overflow(s); 837 } 838 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 839 } 840 841 static TCGTemp *tcg_global_alloc(TCGContext *s) 842 { 843 TCGTemp *ts; 844 845 tcg_debug_assert(s->nb_globals == s->nb_temps); 846 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 847 s->nb_globals++; 848 ts = tcg_temp_alloc(s); 849 ts->kind = TEMP_GLOBAL; 850 851 return ts; 852 } 853 854 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 855 TCGReg reg, const char *name) 856 { 857 TCGTemp *ts; 858 859 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 860 tcg_abort(); 861 } 862 863 ts = tcg_global_alloc(s); 864 ts->base_type = type; 865 ts->type = type; 866 ts->kind = TEMP_FIXED; 867 ts->reg = reg; 868 ts->name = name; 869 tcg_regset_set_reg(s->reserved_regs, reg); 870 871 return ts; 872 } 873 874 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 875 { 876 s->frame_start = start; 877 s->frame_end = start + size; 878 s->frame_temp 879 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 880 } 881 882 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 883 intptr_t offset, const char *name) 884 { 885 TCGContext *s = tcg_ctx; 886 TCGTemp *base_ts = tcgv_ptr_temp(base); 887 TCGTemp *ts = tcg_global_alloc(s); 888 int indirect_reg = 0, bigendian = 0; 889 #if HOST_BIG_ENDIAN 890 bigendian = 1; 891 #endif 892 893 switch (base_ts->kind) { 894 case TEMP_FIXED: 895 break; 896 case TEMP_GLOBAL: 897 /* We do not support double-indirect registers. */ 898 tcg_debug_assert(!base_ts->indirect_reg); 899 base_ts->indirect_base = 1; 900 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 901 ? 2 : 1); 902 indirect_reg = 1; 903 break; 904 default: 905 g_assert_not_reached(); 906 } 907 908 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 909 TCGTemp *ts2 = tcg_global_alloc(s); 910 char buf[64]; 911 912 ts->base_type = TCG_TYPE_I64; 913 ts->type = TCG_TYPE_I32; 914 ts->indirect_reg = indirect_reg; 915 ts->mem_allocated = 1; 916 ts->mem_base = base_ts; 917 ts->mem_offset = offset + bigendian * 4; 918 pstrcpy(buf, sizeof(buf), name); 919 pstrcat(buf, sizeof(buf), "_0"); 920 ts->name = strdup(buf); 921 922 tcg_debug_assert(ts2 == ts + 1); 923 ts2->base_type = TCG_TYPE_I64; 924 ts2->type = TCG_TYPE_I32; 925 ts2->indirect_reg = indirect_reg; 926 ts2->mem_allocated = 1; 927 ts2->mem_base = base_ts; 928 ts2->mem_offset = offset + (1 - bigendian) * 4; 929 pstrcpy(buf, sizeof(buf), name); 930 pstrcat(buf, sizeof(buf), "_1"); 931 ts2->name = strdup(buf); 932 } else { 933 ts->base_type = type; 934 ts->type = type; 935 ts->indirect_reg = indirect_reg; 936 ts->mem_allocated = 1; 937 ts->mem_base = base_ts; 938 ts->mem_offset = offset; 939 ts->name = name; 940 } 941 return ts; 942 } 943 944 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 945 { 946 TCGContext *s = tcg_ctx; 947 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL; 948 TCGTemp *ts; 949 int idx, k; 950 951 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 952 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 953 if (idx < TCG_MAX_TEMPS) { 954 /* There is already an available temp with the right type. */ 955 clear_bit(idx, s->free_temps[k].l); 956 957 ts = &s->temps[idx]; 958 ts->temp_allocated = 1; 959 tcg_debug_assert(ts->base_type == type); 960 tcg_debug_assert(ts->kind == kind); 961 } else { 962 ts = tcg_temp_alloc(s); 963 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 964 TCGTemp *ts2 = tcg_temp_alloc(s); 965 966 ts->base_type = type; 967 ts->type = TCG_TYPE_I32; 968 ts->temp_allocated = 1; 969 ts->kind = kind; 970 971 tcg_debug_assert(ts2 == ts + 1); 972 ts2->base_type = TCG_TYPE_I64; 973 ts2->type = TCG_TYPE_I32; 974 ts2->temp_allocated = 1; 975 ts2->kind = kind; 976 } else { 977 ts->base_type = type; 978 ts->type = type; 979 ts->temp_allocated = 1; 980 ts->kind = kind; 981 } 982 } 983 984 #if defined(CONFIG_DEBUG_TCG) 985 s->temps_in_use++; 986 #endif 987 return ts; 988 } 989 990 TCGv_vec tcg_temp_new_vec(TCGType type) 991 { 992 TCGTemp *t; 993 994 #ifdef CONFIG_DEBUG_TCG 995 switch (type) { 996 case TCG_TYPE_V64: 997 assert(TCG_TARGET_HAS_v64); 998 break; 999 case TCG_TYPE_V128: 1000 assert(TCG_TARGET_HAS_v128); 1001 break; 1002 case TCG_TYPE_V256: 1003 assert(TCG_TARGET_HAS_v256); 1004 break; 1005 default: 1006 g_assert_not_reached(); 1007 } 1008 #endif 1009 1010 t = tcg_temp_new_internal(type, 0); 1011 return temp_tcgv_vec(t); 1012 } 1013 1014 /* Create a new temp of the same type as an existing temp. */ 1015 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1016 { 1017 TCGTemp *t = tcgv_vec_temp(match); 1018 1019 tcg_debug_assert(t->temp_allocated != 0); 1020 1021 t = tcg_temp_new_internal(t->base_type, 0); 1022 return temp_tcgv_vec(t); 1023 } 1024 1025 void tcg_temp_free_internal(TCGTemp *ts) 1026 { 1027 TCGContext *s = tcg_ctx; 1028 int k, idx; 1029 1030 switch (ts->kind) { 1031 case TEMP_CONST: 1032 /* 1033 * In order to simplify users of tcg_constant_*, 1034 * silently ignore free. 1035 */ 1036 return; 1037 case TEMP_NORMAL: 1038 case TEMP_LOCAL: 1039 break; 1040 default: 1041 g_assert_not_reached(); 1042 } 1043 1044 #if defined(CONFIG_DEBUG_TCG) 1045 s->temps_in_use--; 1046 if (s->temps_in_use < 0) { 1047 fprintf(stderr, "More temporaries freed than allocated!\n"); 1048 } 1049 #endif 1050 1051 tcg_debug_assert(ts->temp_allocated != 0); 1052 ts->temp_allocated = 0; 1053 1054 idx = temp_idx(ts); 1055 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT); 1056 set_bit(idx, s->free_temps[k].l); 1057 } 1058 1059 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1060 { 1061 TCGContext *s = tcg_ctx; 1062 GHashTable *h = s->const_table[type]; 1063 TCGTemp *ts; 1064 1065 if (h == NULL) { 1066 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1067 s->const_table[type] = h; 1068 } 1069 1070 ts = g_hash_table_lookup(h, &val); 1071 if (ts == NULL) { 1072 ts = tcg_temp_alloc(s); 1073 1074 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1075 TCGTemp *ts2 = tcg_temp_alloc(s); 1076 1077 ts->base_type = TCG_TYPE_I64; 1078 ts->type = TCG_TYPE_I32; 1079 ts->kind = TEMP_CONST; 1080 ts->temp_allocated = 1; 1081 /* 1082 * Retain the full value of the 64-bit constant in the low 1083 * part, so that the hash table works. Actual uses will 1084 * truncate the value to the low part. 1085 */ 1086 ts->val = val; 1087 1088 tcg_debug_assert(ts2 == ts + 1); 1089 ts2->base_type = TCG_TYPE_I64; 1090 ts2->type = TCG_TYPE_I32; 1091 ts2->kind = TEMP_CONST; 1092 ts2->temp_allocated = 1; 1093 ts2->val = val >> 32; 1094 } else { 1095 ts->base_type = type; 1096 ts->type = type; 1097 ts->kind = TEMP_CONST; 1098 ts->temp_allocated = 1; 1099 ts->val = val; 1100 } 1101 g_hash_table_insert(h, &ts->val, ts); 1102 } 1103 1104 return ts; 1105 } 1106 1107 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1108 { 1109 val = dup_const(vece, val); 1110 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1111 } 1112 1113 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1114 { 1115 TCGTemp *t = tcgv_vec_temp(match); 1116 1117 tcg_debug_assert(t->temp_allocated != 0); 1118 return tcg_constant_vec(t->base_type, vece, val); 1119 } 1120 1121 TCGv_i32 tcg_const_i32(int32_t val) 1122 { 1123 TCGv_i32 t0; 1124 t0 = tcg_temp_new_i32(); 1125 tcg_gen_movi_i32(t0, val); 1126 return t0; 1127 } 1128 1129 TCGv_i64 tcg_const_i64(int64_t val) 1130 { 1131 TCGv_i64 t0; 1132 t0 = tcg_temp_new_i64(); 1133 tcg_gen_movi_i64(t0, val); 1134 return t0; 1135 } 1136 1137 TCGv_i32 tcg_const_local_i32(int32_t val) 1138 { 1139 TCGv_i32 t0; 1140 t0 = tcg_temp_local_new_i32(); 1141 tcg_gen_movi_i32(t0, val); 1142 return t0; 1143 } 1144 1145 TCGv_i64 tcg_const_local_i64(int64_t val) 1146 { 1147 TCGv_i64 t0; 1148 t0 = tcg_temp_local_new_i64(); 1149 tcg_gen_movi_i64(t0, val); 1150 return t0; 1151 } 1152 1153 #if defined(CONFIG_DEBUG_TCG) 1154 void tcg_clear_temp_count(void) 1155 { 1156 TCGContext *s = tcg_ctx; 1157 s->temps_in_use = 0; 1158 } 1159 1160 int tcg_check_temp_count(void) 1161 { 1162 TCGContext *s = tcg_ctx; 1163 if (s->temps_in_use) { 1164 /* Clear the count so that we don't give another 1165 * warning immediately next time around. 1166 */ 1167 s->temps_in_use = 0; 1168 return 1; 1169 } 1170 return 0; 1171 } 1172 #endif 1173 1174 /* Return true if OP may appear in the opcode stream. 1175 Test the runtime variable that controls each opcode. */ 1176 bool tcg_op_supported(TCGOpcode op) 1177 { 1178 const bool have_vec 1179 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1180 1181 switch (op) { 1182 case INDEX_op_discard: 1183 case INDEX_op_set_label: 1184 case INDEX_op_call: 1185 case INDEX_op_br: 1186 case INDEX_op_mb: 1187 case INDEX_op_insn_start: 1188 case INDEX_op_exit_tb: 1189 case INDEX_op_goto_tb: 1190 case INDEX_op_goto_ptr: 1191 case INDEX_op_qemu_ld_i32: 1192 case INDEX_op_qemu_st_i32: 1193 case INDEX_op_qemu_ld_i64: 1194 case INDEX_op_qemu_st_i64: 1195 return true; 1196 1197 case INDEX_op_qemu_st8_i32: 1198 return TCG_TARGET_HAS_qemu_st8_i32; 1199 1200 case INDEX_op_mov_i32: 1201 case INDEX_op_setcond_i32: 1202 case INDEX_op_brcond_i32: 1203 case INDEX_op_ld8u_i32: 1204 case INDEX_op_ld8s_i32: 1205 case INDEX_op_ld16u_i32: 1206 case INDEX_op_ld16s_i32: 1207 case INDEX_op_ld_i32: 1208 case INDEX_op_st8_i32: 1209 case INDEX_op_st16_i32: 1210 case INDEX_op_st_i32: 1211 case INDEX_op_add_i32: 1212 case INDEX_op_sub_i32: 1213 case INDEX_op_mul_i32: 1214 case INDEX_op_and_i32: 1215 case INDEX_op_or_i32: 1216 case INDEX_op_xor_i32: 1217 case INDEX_op_shl_i32: 1218 case INDEX_op_shr_i32: 1219 case INDEX_op_sar_i32: 1220 return true; 1221 1222 case INDEX_op_movcond_i32: 1223 return TCG_TARGET_HAS_movcond_i32; 1224 case INDEX_op_div_i32: 1225 case INDEX_op_divu_i32: 1226 return TCG_TARGET_HAS_div_i32; 1227 case INDEX_op_rem_i32: 1228 case INDEX_op_remu_i32: 1229 return TCG_TARGET_HAS_rem_i32; 1230 case INDEX_op_div2_i32: 1231 case INDEX_op_divu2_i32: 1232 return TCG_TARGET_HAS_div2_i32; 1233 case INDEX_op_rotl_i32: 1234 case INDEX_op_rotr_i32: 1235 return TCG_TARGET_HAS_rot_i32; 1236 case INDEX_op_deposit_i32: 1237 return TCG_TARGET_HAS_deposit_i32; 1238 case INDEX_op_extract_i32: 1239 return TCG_TARGET_HAS_extract_i32; 1240 case INDEX_op_sextract_i32: 1241 return TCG_TARGET_HAS_sextract_i32; 1242 case INDEX_op_extract2_i32: 1243 return TCG_TARGET_HAS_extract2_i32; 1244 case INDEX_op_add2_i32: 1245 return TCG_TARGET_HAS_add2_i32; 1246 case INDEX_op_sub2_i32: 1247 return TCG_TARGET_HAS_sub2_i32; 1248 case INDEX_op_mulu2_i32: 1249 return TCG_TARGET_HAS_mulu2_i32; 1250 case INDEX_op_muls2_i32: 1251 return TCG_TARGET_HAS_muls2_i32; 1252 case INDEX_op_muluh_i32: 1253 return TCG_TARGET_HAS_muluh_i32; 1254 case INDEX_op_mulsh_i32: 1255 return TCG_TARGET_HAS_mulsh_i32; 1256 case INDEX_op_ext8s_i32: 1257 return TCG_TARGET_HAS_ext8s_i32; 1258 case INDEX_op_ext16s_i32: 1259 return TCG_TARGET_HAS_ext16s_i32; 1260 case INDEX_op_ext8u_i32: 1261 return TCG_TARGET_HAS_ext8u_i32; 1262 case INDEX_op_ext16u_i32: 1263 return TCG_TARGET_HAS_ext16u_i32; 1264 case INDEX_op_bswap16_i32: 1265 return TCG_TARGET_HAS_bswap16_i32; 1266 case INDEX_op_bswap32_i32: 1267 return TCG_TARGET_HAS_bswap32_i32; 1268 case INDEX_op_not_i32: 1269 return TCG_TARGET_HAS_not_i32; 1270 case INDEX_op_neg_i32: 1271 return TCG_TARGET_HAS_neg_i32; 1272 case INDEX_op_andc_i32: 1273 return TCG_TARGET_HAS_andc_i32; 1274 case INDEX_op_orc_i32: 1275 return TCG_TARGET_HAS_orc_i32; 1276 case INDEX_op_eqv_i32: 1277 return TCG_TARGET_HAS_eqv_i32; 1278 case INDEX_op_nand_i32: 1279 return TCG_TARGET_HAS_nand_i32; 1280 case INDEX_op_nor_i32: 1281 return TCG_TARGET_HAS_nor_i32; 1282 case INDEX_op_clz_i32: 1283 return TCG_TARGET_HAS_clz_i32; 1284 case INDEX_op_ctz_i32: 1285 return TCG_TARGET_HAS_ctz_i32; 1286 case INDEX_op_ctpop_i32: 1287 return TCG_TARGET_HAS_ctpop_i32; 1288 1289 case INDEX_op_brcond2_i32: 1290 case INDEX_op_setcond2_i32: 1291 return TCG_TARGET_REG_BITS == 32; 1292 1293 case INDEX_op_mov_i64: 1294 case INDEX_op_setcond_i64: 1295 case INDEX_op_brcond_i64: 1296 case INDEX_op_ld8u_i64: 1297 case INDEX_op_ld8s_i64: 1298 case INDEX_op_ld16u_i64: 1299 case INDEX_op_ld16s_i64: 1300 case INDEX_op_ld32u_i64: 1301 case INDEX_op_ld32s_i64: 1302 case INDEX_op_ld_i64: 1303 case INDEX_op_st8_i64: 1304 case INDEX_op_st16_i64: 1305 case INDEX_op_st32_i64: 1306 case INDEX_op_st_i64: 1307 case INDEX_op_add_i64: 1308 case INDEX_op_sub_i64: 1309 case INDEX_op_mul_i64: 1310 case INDEX_op_and_i64: 1311 case INDEX_op_or_i64: 1312 case INDEX_op_xor_i64: 1313 case INDEX_op_shl_i64: 1314 case INDEX_op_shr_i64: 1315 case INDEX_op_sar_i64: 1316 case INDEX_op_ext_i32_i64: 1317 case INDEX_op_extu_i32_i64: 1318 return TCG_TARGET_REG_BITS == 64; 1319 1320 case INDEX_op_movcond_i64: 1321 return TCG_TARGET_HAS_movcond_i64; 1322 case INDEX_op_div_i64: 1323 case INDEX_op_divu_i64: 1324 return TCG_TARGET_HAS_div_i64; 1325 case INDEX_op_rem_i64: 1326 case INDEX_op_remu_i64: 1327 return TCG_TARGET_HAS_rem_i64; 1328 case INDEX_op_div2_i64: 1329 case INDEX_op_divu2_i64: 1330 return TCG_TARGET_HAS_div2_i64; 1331 case INDEX_op_rotl_i64: 1332 case INDEX_op_rotr_i64: 1333 return TCG_TARGET_HAS_rot_i64; 1334 case INDEX_op_deposit_i64: 1335 return TCG_TARGET_HAS_deposit_i64; 1336 case INDEX_op_extract_i64: 1337 return TCG_TARGET_HAS_extract_i64; 1338 case INDEX_op_sextract_i64: 1339 return TCG_TARGET_HAS_sextract_i64; 1340 case INDEX_op_extract2_i64: 1341 return TCG_TARGET_HAS_extract2_i64; 1342 case INDEX_op_extrl_i64_i32: 1343 return TCG_TARGET_HAS_extrl_i64_i32; 1344 case INDEX_op_extrh_i64_i32: 1345 return TCG_TARGET_HAS_extrh_i64_i32; 1346 case INDEX_op_ext8s_i64: 1347 return TCG_TARGET_HAS_ext8s_i64; 1348 case INDEX_op_ext16s_i64: 1349 return TCG_TARGET_HAS_ext16s_i64; 1350 case INDEX_op_ext32s_i64: 1351 return TCG_TARGET_HAS_ext32s_i64; 1352 case INDEX_op_ext8u_i64: 1353 return TCG_TARGET_HAS_ext8u_i64; 1354 case INDEX_op_ext16u_i64: 1355 return TCG_TARGET_HAS_ext16u_i64; 1356 case INDEX_op_ext32u_i64: 1357 return TCG_TARGET_HAS_ext32u_i64; 1358 case INDEX_op_bswap16_i64: 1359 return TCG_TARGET_HAS_bswap16_i64; 1360 case INDEX_op_bswap32_i64: 1361 return TCG_TARGET_HAS_bswap32_i64; 1362 case INDEX_op_bswap64_i64: 1363 return TCG_TARGET_HAS_bswap64_i64; 1364 case INDEX_op_not_i64: 1365 return TCG_TARGET_HAS_not_i64; 1366 case INDEX_op_neg_i64: 1367 return TCG_TARGET_HAS_neg_i64; 1368 case INDEX_op_andc_i64: 1369 return TCG_TARGET_HAS_andc_i64; 1370 case INDEX_op_orc_i64: 1371 return TCG_TARGET_HAS_orc_i64; 1372 case INDEX_op_eqv_i64: 1373 return TCG_TARGET_HAS_eqv_i64; 1374 case INDEX_op_nand_i64: 1375 return TCG_TARGET_HAS_nand_i64; 1376 case INDEX_op_nor_i64: 1377 return TCG_TARGET_HAS_nor_i64; 1378 case INDEX_op_clz_i64: 1379 return TCG_TARGET_HAS_clz_i64; 1380 case INDEX_op_ctz_i64: 1381 return TCG_TARGET_HAS_ctz_i64; 1382 case INDEX_op_ctpop_i64: 1383 return TCG_TARGET_HAS_ctpop_i64; 1384 case INDEX_op_add2_i64: 1385 return TCG_TARGET_HAS_add2_i64; 1386 case INDEX_op_sub2_i64: 1387 return TCG_TARGET_HAS_sub2_i64; 1388 case INDEX_op_mulu2_i64: 1389 return TCG_TARGET_HAS_mulu2_i64; 1390 case INDEX_op_muls2_i64: 1391 return TCG_TARGET_HAS_muls2_i64; 1392 case INDEX_op_muluh_i64: 1393 return TCG_TARGET_HAS_muluh_i64; 1394 case INDEX_op_mulsh_i64: 1395 return TCG_TARGET_HAS_mulsh_i64; 1396 1397 case INDEX_op_mov_vec: 1398 case INDEX_op_dup_vec: 1399 case INDEX_op_dupm_vec: 1400 case INDEX_op_ld_vec: 1401 case INDEX_op_st_vec: 1402 case INDEX_op_add_vec: 1403 case INDEX_op_sub_vec: 1404 case INDEX_op_and_vec: 1405 case INDEX_op_or_vec: 1406 case INDEX_op_xor_vec: 1407 case INDEX_op_cmp_vec: 1408 return have_vec; 1409 case INDEX_op_dup2_vec: 1410 return have_vec && TCG_TARGET_REG_BITS == 32; 1411 case INDEX_op_not_vec: 1412 return have_vec && TCG_TARGET_HAS_not_vec; 1413 case INDEX_op_neg_vec: 1414 return have_vec && TCG_TARGET_HAS_neg_vec; 1415 case INDEX_op_abs_vec: 1416 return have_vec && TCG_TARGET_HAS_abs_vec; 1417 case INDEX_op_andc_vec: 1418 return have_vec && TCG_TARGET_HAS_andc_vec; 1419 case INDEX_op_orc_vec: 1420 return have_vec && TCG_TARGET_HAS_orc_vec; 1421 case INDEX_op_nand_vec: 1422 return have_vec && TCG_TARGET_HAS_nand_vec; 1423 case INDEX_op_nor_vec: 1424 return have_vec && TCG_TARGET_HAS_nor_vec; 1425 case INDEX_op_eqv_vec: 1426 return have_vec && TCG_TARGET_HAS_eqv_vec; 1427 case INDEX_op_mul_vec: 1428 return have_vec && TCG_TARGET_HAS_mul_vec; 1429 case INDEX_op_shli_vec: 1430 case INDEX_op_shri_vec: 1431 case INDEX_op_sari_vec: 1432 return have_vec && TCG_TARGET_HAS_shi_vec; 1433 case INDEX_op_shls_vec: 1434 case INDEX_op_shrs_vec: 1435 case INDEX_op_sars_vec: 1436 return have_vec && TCG_TARGET_HAS_shs_vec; 1437 case INDEX_op_shlv_vec: 1438 case INDEX_op_shrv_vec: 1439 case INDEX_op_sarv_vec: 1440 return have_vec && TCG_TARGET_HAS_shv_vec; 1441 case INDEX_op_rotli_vec: 1442 return have_vec && TCG_TARGET_HAS_roti_vec; 1443 case INDEX_op_rotls_vec: 1444 return have_vec && TCG_TARGET_HAS_rots_vec; 1445 case INDEX_op_rotlv_vec: 1446 case INDEX_op_rotrv_vec: 1447 return have_vec && TCG_TARGET_HAS_rotv_vec; 1448 case INDEX_op_ssadd_vec: 1449 case INDEX_op_usadd_vec: 1450 case INDEX_op_sssub_vec: 1451 case INDEX_op_ussub_vec: 1452 return have_vec && TCG_TARGET_HAS_sat_vec; 1453 case INDEX_op_smin_vec: 1454 case INDEX_op_umin_vec: 1455 case INDEX_op_smax_vec: 1456 case INDEX_op_umax_vec: 1457 return have_vec && TCG_TARGET_HAS_minmax_vec; 1458 case INDEX_op_bitsel_vec: 1459 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1460 case INDEX_op_cmpsel_vec: 1461 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1462 1463 default: 1464 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1465 return true; 1466 } 1467 } 1468 1469 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1470 and endian swap. Maybe it would be better to do the alignment 1471 and endian swap in tcg_reg_alloc_call(). */ 1472 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1473 { 1474 int i, real_args, nb_rets, pi; 1475 unsigned typemask; 1476 const TCGHelperInfo *info; 1477 TCGOp *op; 1478 1479 info = g_hash_table_lookup(helper_table, (gpointer)func); 1480 typemask = info->typemask; 1481 1482 #ifdef CONFIG_PLUGIN 1483 /* detect non-plugin helpers */ 1484 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) { 1485 tcg_ctx->plugin_insn->calls_helpers = true; 1486 } 1487 #endif 1488 1489 #if defined(__sparc__) && !defined(__arch64__) \ 1490 && !defined(CONFIG_TCG_INTERPRETER) 1491 /* We have 64-bit values in one register, but need to pass as two 1492 separate parameters. Split them. */ 1493 int orig_typemask = typemask; 1494 int orig_nargs = nargs; 1495 TCGv_i64 retl, reth; 1496 TCGTemp *split_args[MAX_OPC_PARAM]; 1497 1498 retl = NULL; 1499 reth = NULL; 1500 typemask = 0; 1501 for (i = real_args = 0; i < nargs; ++i) { 1502 int argtype = extract32(orig_typemask, (i + 1) * 3, 3); 1503 bool is_64bit = (argtype & ~1) == dh_typecode_i64; 1504 1505 if (is_64bit) { 1506 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1507 TCGv_i32 h = tcg_temp_new_i32(); 1508 TCGv_i32 l = tcg_temp_new_i32(); 1509 tcg_gen_extr_i64_i32(l, h, orig); 1510 split_args[real_args++] = tcgv_i32_temp(h); 1511 typemask |= dh_typecode_i32 << (real_args * 3); 1512 split_args[real_args++] = tcgv_i32_temp(l); 1513 typemask |= dh_typecode_i32 << (real_args * 3); 1514 } else { 1515 split_args[real_args++] = args[i]; 1516 typemask |= argtype << (real_args * 3); 1517 } 1518 } 1519 nargs = real_args; 1520 args = split_args; 1521 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1522 for (i = 0; i < nargs; ++i) { 1523 int argtype = extract32(typemask, (i + 1) * 3, 3); 1524 bool is_32bit = (argtype & ~1) == dh_typecode_i32; 1525 bool is_signed = argtype & 1; 1526 1527 if (is_32bit) { 1528 TCGv_i64 temp = tcg_temp_new_i64(); 1529 TCGv_i32 orig = temp_tcgv_i32(args[i]); 1530 if (is_signed) { 1531 tcg_gen_ext_i32_i64(temp, orig); 1532 } else { 1533 tcg_gen_extu_i32_i64(temp, orig); 1534 } 1535 args[i] = tcgv_i64_temp(temp); 1536 } 1537 } 1538 #endif /* TCG_TARGET_EXTEND_ARGS */ 1539 1540 op = tcg_emit_op(INDEX_op_call); 1541 1542 pi = 0; 1543 if (ret != NULL) { 1544 #if defined(__sparc__) && !defined(__arch64__) \ 1545 && !defined(CONFIG_TCG_INTERPRETER) 1546 if ((typemask & 6) == dh_typecode_i64) { 1547 /* The 32-bit ABI is going to return the 64-bit value in 1548 the %o0/%o1 register pair. Prepare for this by using 1549 two return temporaries, and reassemble below. */ 1550 retl = tcg_temp_new_i64(); 1551 reth = tcg_temp_new_i64(); 1552 op->args[pi++] = tcgv_i64_arg(reth); 1553 op->args[pi++] = tcgv_i64_arg(retl); 1554 nb_rets = 2; 1555 } else { 1556 op->args[pi++] = temp_arg(ret); 1557 nb_rets = 1; 1558 } 1559 #else 1560 if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) { 1561 #if HOST_BIG_ENDIAN 1562 op->args[pi++] = temp_arg(ret + 1); 1563 op->args[pi++] = temp_arg(ret); 1564 #else 1565 op->args[pi++] = temp_arg(ret); 1566 op->args[pi++] = temp_arg(ret + 1); 1567 #endif 1568 nb_rets = 2; 1569 } else { 1570 op->args[pi++] = temp_arg(ret); 1571 nb_rets = 1; 1572 } 1573 #endif 1574 } else { 1575 nb_rets = 0; 1576 } 1577 TCGOP_CALLO(op) = nb_rets; 1578 1579 real_args = 0; 1580 for (i = 0; i < nargs; i++) { 1581 int argtype = extract32(typemask, (i + 1) * 3, 3); 1582 bool is_64bit = (argtype & ~1) == dh_typecode_i64; 1583 bool want_align = false; 1584 1585 #if defined(CONFIG_TCG_INTERPRETER) 1586 /* 1587 * Align all arguments, so that they land in predictable places 1588 * for passing off to ffi_call. 1589 */ 1590 want_align = true; 1591 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS) 1592 /* Some targets want aligned 64 bit args */ 1593 want_align = is_64bit; 1594 #endif 1595 1596 if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) { 1597 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1598 real_args++; 1599 } 1600 1601 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1602 /* 1603 * If stack grows up, then we will be placing successive 1604 * arguments at lower addresses, which means we need to 1605 * reverse the order compared to how we would normally 1606 * treat either big or little-endian. For those arguments 1607 * that will wind up in registers, this still works for 1608 * HPPA (the only current STACK_GROWSUP target) since the 1609 * argument registers are *also* allocated in decreasing 1610 * order. If another such target is added, this logic may 1611 * have to get more complicated to differentiate between 1612 * stack arguments and register arguments. 1613 */ 1614 #if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP) 1615 op->args[pi++] = temp_arg(args[i] + 1); 1616 op->args[pi++] = temp_arg(args[i]); 1617 #else 1618 op->args[pi++] = temp_arg(args[i]); 1619 op->args[pi++] = temp_arg(args[i] + 1); 1620 #endif 1621 real_args += 2; 1622 continue; 1623 } 1624 1625 op->args[pi++] = temp_arg(args[i]); 1626 real_args++; 1627 } 1628 op->args[pi++] = (uintptr_t)func; 1629 op->args[pi++] = (uintptr_t)info; 1630 TCGOP_CALLI(op) = real_args; 1631 1632 /* Make sure the fields didn't overflow. */ 1633 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 1634 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1635 1636 #if defined(__sparc__) && !defined(__arch64__) \ 1637 && !defined(CONFIG_TCG_INTERPRETER) 1638 /* Free all of the parts we allocated above. */ 1639 for (i = real_args = 0; i < orig_nargs; ++i) { 1640 int argtype = extract32(orig_typemask, (i + 1) * 3, 3); 1641 bool is_64bit = (argtype & ~1) == dh_typecode_i64; 1642 1643 if (is_64bit) { 1644 tcg_temp_free_internal(args[real_args++]); 1645 tcg_temp_free_internal(args[real_args++]); 1646 } else { 1647 real_args++; 1648 } 1649 } 1650 if ((orig_typemask & 6) == dh_typecode_i64) { 1651 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 1652 Note that describing these as TCGv_i64 eliminates an unnecessary 1653 zero-extension that tcg_gen_concat_i32_i64 would create. */ 1654 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 1655 tcg_temp_free_i64(retl); 1656 tcg_temp_free_i64(reth); 1657 } 1658 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1659 for (i = 0; i < nargs; ++i) { 1660 int argtype = extract32(typemask, (i + 1) * 3, 3); 1661 bool is_32bit = (argtype & ~1) == dh_typecode_i32; 1662 1663 if (is_32bit) { 1664 tcg_temp_free_internal(args[i]); 1665 } 1666 } 1667 #endif /* TCG_TARGET_EXTEND_ARGS */ 1668 } 1669 1670 static void tcg_reg_alloc_start(TCGContext *s) 1671 { 1672 int i, n; 1673 1674 for (i = 0, n = s->nb_temps; i < n; i++) { 1675 TCGTemp *ts = &s->temps[i]; 1676 TCGTempVal val = TEMP_VAL_MEM; 1677 1678 switch (ts->kind) { 1679 case TEMP_CONST: 1680 val = TEMP_VAL_CONST; 1681 break; 1682 case TEMP_FIXED: 1683 val = TEMP_VAL_REG; 1684 break; 1685 case TEMP_GLOBAL: 1686 break; 1687 case TEMP_NORMAL: 1688 case TEMP_EBB: 1689 val = TEMP_VAL_DEAD; 1690 /* fall through */ 1691 case TEMP_LOCAL: 1692 ts->mem_allocated = 0; 1693 break; 1694 default: 1695 g_assert_not_reached(); 1696 } 1697 ts->val_type = val; 1698 } 1699 1700 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1701 } 1702 1703 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1704 TCGTemp *ts) 1705 { 1706 int idx = temp_idx(ts); 1707 1708 switch (ts->kind) { 1709 case TEMP_FIXED: 1710 case TEMP_GLOBAL: 1711 pstrcpy(buf, buf_size, ts->name); 1712 break; 1713 case TEMP_LOCAL: 1714 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1715 break; 1716 case TEMP_EBB: 1717 snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals); 1718 break; 1719 case TEMP_NORMAL: 1720 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1721 break; 1722 case TEMP_CONST: 1723 switch (ts->type) { 1724 case TCG_TYPE_I32: 1725 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 1726 break; 1727 #if TCG_TARGET_REG_BITS > 32 1728 case TCG_TYPE_I64: 1729 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 1730 break; 1731 #endif 1732 case TCG_TYPE_V64: 1733 case TCG_TYPE_V128: 1734 case TCG_TYPE_V256: 1735 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 1736 64 << (ts->type - TCG_TYPE_V64), ts->val); 1737 break; 1738 default: 1739 g_assert_not_reached(); 1740 } 1741 break; 1742 } 1743 return buf; 1744 } 1745 1746 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1747 int buf_size, TCGArg arg) 1748 { 1749 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1750 } 1751 1752 static const char * const cond_name[] = 1753 { 1754 [TCG_COND_NEVER] = "never", 1755 [TCG_COND_ALWAYS] = "always", 1756 [TCG_COND_EQ] = "eq", 1757 [TCG_COND_NE] = "ne", 1758 [TCG_COND_LT] = "lt", 1759 [TCG_COND_GE] = "ge", 1760 [TCG_COND_LE] = "le", 1761 [TCG_COND_GT] = "gt", 1762 [TCG_COND_LTU] = "ltu", 1763 [TCG_COND_GEU] = "geu", 1764 [TCG_COND_LEU] = "leu", 1765 [TCG_COND_GTU] = "gtu" 1766 }; 1767 1768 static const char * const ldst_name[] = 1769 { 1770 [MO_UB] = "ub", 1771 [MO_SB] = "sb", 1772 [MO_LEUW] = "leuw", 1773 [MO_LESW] = "lesw", 1774 [MO_LEUL] = "leul", 1775 [MO_LESL] = "lesl", 1776 [MO_LEUQ] = "leq", 1777 [MO_BEUW] = "beuw", 1778 [MO_BESW] = "besw", 1779 [MO_BEUL] = "beul", 1780 [MO_BESL] = "besl", 1781 [MO_BEUQ] = "beq", 1782 }; 1783 1784 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1785 #ifdef TARGET_ALIGNED_ONLY 1786 [MO_UNALN >> MO_ASHIFT] = "un+", 1787 [MO_ALIGN >> MO_ASHIFT] = "", 1788 #else 1789 [MO_UNALN >> MO_ASHIFT] = "", 1790 [MO_ALIGN >> MO_ASHIFT] = "al+", 1791 #endif 1792 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1793 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1794 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1795 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1796 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1797 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1798 }; 1799 1800 static const char bswap_flag_name[][6] = { 1801 [TCG_BSWAP_IZ] = "iz", 1802 [TCG_BSWAP_OZ] = "oz", 1803 [TCG_BSWAP_OS] = "os", 1804 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 1805 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 1806 }; 1807 1808 static inline bool tcg_regset_single(TCGRegSet d) 1809 { 1810 return (d & (d - 1)) == 0; 1811 } 1812 1813 static inline TCGReg tcg_regset_first(TCGRegSet d) 1814 { 1815 if (TCG_TARGET_NB_REGS <= 32) { 1816 return ctz32(d); 1817 } else { 1818 return ctz64(d); 1819 } 1820 } 1821 1822 /* Return only the number of characters output -- no error return. */ 1823 #define ne_fprintf(...) \ 1824 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 1825 1826 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 1827 { 1828 char buf[128]; 1829 TCGOp *op; 1830 1831 QTAILQ_FOREACH(op, &s->ops, link) { 1832 int i, k, nb_oargs, nb_iargs, nb_cargs; 1833 const TCGOpDef *def; 1834 TCGOpcode c; 1835 int col = 0; 1836 1837 c = op->opc; 1838 def = &tcg_op_defs[c]; 1839 1840 if (c == INDEX_op_insn_start) { 1841 nb_oargs = 0; 1842 col += ne_fprintf(f, "\n ----"); 1843 1844 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1845 target_ulong a; 1846 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1847 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 1848 #else 1849 a = op->args[i]; 1850 #endif 1851 col += ne_fprintf(f, " " TARGET_FMT_lx, a); 1852 } 1853 } else if (c == INDEX_op_call) { 1854 const TCGHelperInfo *info = tcg_call_info(op); 1855 void *func = tcg_call_func(op); 1856 1857 /* variable number of arguments */ 1858 nb_oargs = TCGOP_CALLO(op); 1859 nb_iargs = TCGOP_CALLI(op); 1860 nb_cargs = def->nb_cargs; 1861 1862 col += ne_fprintf(f, " %s ", def->name); 1863 1864 /* 1865 * Print the function name from TCGHelperInfo, if available. 1866 * Note that plugins have a template function for the info, 1867 * but the actual function pointer comes from the plugin. 1868 */ 1869 if (func == info->func) { 1870 col += ne_fprintf(f, "%s", info->name); 1871 } else { 1872 col += ne_fprintf(f, "plugin(%p)", func); 1873 } 1874 1875 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 1876 for (i = 0; i < nb_oargs; i++) { 1877 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 1878 op->args[i])); 1879 } 1880 for (i = 0; i < nb_iargs; i++) { 1881 TCGArg arg = op->args[nb_oargs + i]; 1882 const char *t = "<dummy>"; 1883 if (arg != TCG_CALL_DUMMY_ARG) { 1884 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 1885 } 1886 col += ne_fprintf(f, ",%s", t); 1887 } 1888 } else { 1889 col += ne_fprintf(f, " %s ", def->name); 1890 1891 nb_oargs = def->nb_oargs; 1892 nb_iargs = def->nb_iargs; 1893 nb_cargs = def->nb_cargs; 1894 1895 if (def->flags & TCG_OPF_VECTOR) { 1896 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 1897 8 << TCGOP_VECE(op)); 1898 } 1899 1900 k = 0; 1901 for (i = 0; i < nb_oargs; i++) { 1902 const char *sep = k ? "," : ""; 1903 col += ne_fprintf(f, "%s%s", sep, 1904 tcg_get_arg_str(s, buf, sizeof(buf), 1905 op->args[k++])); 1906 } 1907 for (i = 0; i < nb_iargs; i++) { 1908 const char *sep = k ? "," : ""; 1909 col += ne_fprintf(f, "%s%s", sep, 1910 tcg_get_arg_str(s, buf, sizeof(buf), 1911 op->args[k++])); 1912 } 1913 switch (c) { 1914 case INDEX_op_brcond_i32: 1915 case INDEX_op_setcond_i32: 1916 case INDEX_op_movcond_i32: 1917 case INDEX_op_brcond2_i32: 1918 case INDEX_op_setcond2_i32: 1919 case INDEX_op_brcond_i64: 1920 case INDEX_op_setcond_i64: 1921 case INDEX_op_movcond_i64: 1922 case INDEX_op_cmp_vec: 1923 case INDEX_op_cmpsel_vec: 1924 if (op->args[k] < ARRAY_SIZE(cond_name) 1925 && cond_name[op->args[k]]) { 1926 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 1927 } else { 1928 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 1929 } 1930 i = 1; 1931 break; 1932 case INDEX_op_qemu_ld_i32: 1933 case INDEX_op_qemu_st_i32: 1934 case INDEX_op_qemu_st8_i32: 1935 case INDEX_op_qemu_ld_i64: 1936 case INDEX_op_qemu_st_i64: 1937 { 1938 MemOpIdx oi = op->args[k++]; 1939 MemOp op = get_memop(oi); 1940 unsigned ix = get_mmuidx(oi); 1941 1942 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1943 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 1944 } else { 1945 const char *s_al, *s_op; 1946 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1947 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 1948 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix); 1949 } 1950 i = 1; 1951 } 1952 break; 1953 case INDEX_op_bswap16_i32: 1954 case INDEX_op_bswap16_i64: 1955 case INDEX_op_bswap32_i32: 1956 case INDEX_op_bswap32_i64: 1957 case INDEX_op_bswap64_i64: 1958 { 1959 TCGArg flags = op->args[k]; 1960 const char *name = NULL; 1961 1962 if (flags < ARRAY_SIZE(bswap_flag_name)) { 1963 name = bswap_flag_name[flags]; 1964 } 1965 if (name) { 1966 col += ne_fprintf(f, ",%s", name); 1967 } else { 1968 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 1969 } 1970 i = k = 1; 1971 } 1972 break; 1973 default: 1974 i = 0; 1975 break; 1976 } 1977 switch (c) { 1978 case INDEX_op_set_label: 1979 case INDEX_op_br: 1980 case INDEX_op_brcond_i32: 1981 case INDEX_op_brcond_i64: 1982 case INDEX_op_brcond2_i32: 1983 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 1984 arg_label(op->args[k])->id); 1985 i++, k++; 1986 break; 1987 default: 1988 break; 1989 } 1990 for (; i < nb_cargs; i++, k++) { 1991 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 1992 op->args[k]); 1993 } 1994 } 1995 1996 if (have_prefs || op->life) { 1997 for (; col < 40; ++col) { 1998 putc(' ', f); 1999 } 2000 } 2001 2002 if (op->life) { 2003 unsigned life = op->life; 2004 2005 if (life & (SYNC_ARG * 3)) { 2006 ne_fprintf(f, " sync:"); 2007 for (i = 0; i < 2; ++i) { 2008 if (life & (SYNC_ARG << i)) { 2009 ne_fprintf(f, " %d", i); 2010 } 2011 } 2012 } 2013 life /= DEAD_ARG; 2014 if (life) { 2015 ne_fprintf(f, " dead:"); 2016 for (i = 0; life; ++i, life >>= 1) { 2017 if (life & 1) { 2018 ne_fprintf(f, " %d", i); 2019 } 2020 } 2021 } 2022 } 2023 2024 if (have_prefs) { 2025 for (i = 0; i < nb_oargs; ++i) { 2026 TCGRegSet set = op->output_pref[i]; 2027 2028 if (i == 0) { 2029 ne_fprintf(f, " pref="); 2030 } else { 2031 ne_fprintf(f, ","); 2032 } 2033 if (set == 0) { 2034 ne_fprintf(f, "none"); 2035 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2036 ne_fprintf(f, "all"); 2037 #ifdef CONFIG_DEBUG_TCG 2038 } else if (tcg_regset_single(set)) { 2039 TCGReg reg = tcg_regset_first(set); 2040 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 2041 #endif 2042 } else if (TCG_TARGET_NB_REGS <= 32) { 2043 ne_fprintf(f, "0x%x", (uint32_t)set); 2044 } else { 2045 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 2046 } 2047 } 2048 } 2049 2050 putc('\n', f); 2051 } 2052 } 2053 2054 /* we give more priority to constraints with less registers */ 2055 static int get_constraint_priority(const TCGOpDef *def, int k) 2056 { 2057 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2058 int n; 2059 2060 if (arg_ct->oalias) { 2061 /* an alias is equivalent to a single register */ 2062 n = 1; 2063 } else { 2064 n = ctpop64(arg_ct->regs); 2065 } 2066 return TCG_TARGET_NB_REGS - n + 1; 2067 } 2068 2069 /* sort from highest priority to lowest */ 2070 static void sort_constraints(TCGOpDef *def, int start, int n) 2071 { 2072 int i, j; 2073 TCGArgConstraint *a = def->args_ct; 2074 2075 for (i = 0; i < n; i++) { 2076 a[start + i].sort_index = start + i; 2077 } 2078 if (n <= 1) { 2079 return; 2080 } 2081 for (i = 0; i < n - 1; i++) { 2082 for (j = i + 1; j < n; j++) { 2083 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2084 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2085 if (p1 < p2) { 2086 int tmp = a[start + i].sort_index; 2087 a[start + i].sort_index = a[start + j].sort_index; 2088 a[start + j].sort_index = tmp; 2089 } 2090 } 2091 } 2092 } 2093 2094 static void process_op_defs(TCGContext *s) 2095 { 2096 TCGOpcode op; 2097 2098 for (op = 0; op < NB_OPS; op++) { 2099 TCGOpDef *def = &tcg_op_defs[op]; 2100 const TCGTargetOpDef *tdefs; 2101 int i, nb_args; 2102 2103 if (def->flags & TCG_OPF_NOT_PRESENT) { 2104 continue; 2105 } 2106 2107 nb_args = def->nb_iargs + def->nb_oargs; 2108 if (nb_args == 0) { 2109 continue; 2110 } 2111 2112 /* 2113 * Macro magic should make it impossible, but double-check that 2114 * the array index is in range. Since the signness of an enum 2115 * is implementation defined, force the result to unsigned. 2116 */ 2117 unsigned con_set = tcg_target_op_def(op); 2118 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2119 tdefs = &constraint_sets[con_set]; 2120 2121 for (i = 0; i < nb_args; i++) { 2122 const char *ct_str = tdefs->args_ct_str[i]; 2123 /* Incomplete TCGTargetOpDef entry. */ 2124 tcg_debug_assert(ct_str != NULL); 2125 2126 while (*ct_str != '\0') { 2127 switch(*ct_str) { 2128 case '0' ... '9': 2129 { 2130 int oarg = *ct_str - '0'; 2131 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 2132 tcg_debug_assert(oarg < def->nb_oargs); 2133 tcg_debug_assert(def->args_ct[oarg].regs != 0); 2134 def->args_ct[i] = def->args_ct[oarg]; 2135 /* The output sets oalias. */ 2136 def->args_ct[oarg].oalias = true; 2137 def->args_ct[oarg].alias_index = i; 2138 /* The input sets ialias. */ 2139 def->args_ct[i].ialias = true; 2140 def->args_ct[i].alias_index = oarg; 2141 } 2142 ct_str++; 2143 break; 2144 case '&': 2145 def->args_ct[i].newreg = true; 2146 ct_str++; 2147 break; 2148 case 'i': 2149 def->args_ct[i].ct |= TCG_CT_CONST; 2150 ct_str++; 2151 break; 2152 2153 /* Include all of the target-specific constraints. */ 2154 2155 #undef CONST 2156 #define CONST(CASE, MASK) \ 2157 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break; 2158 #define REGS(CASE, MASK) \ 2159 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break; 2160 2161 #include "tcg-target-con-str.h" 2162 2163 #undef REGS 2164 #undef CONST 2165 default: 2166 /* Typo in TCGTargetOpDef constraint. */ 2167 g_assert_not_reached(); 2168 } 2169 } 2170 } 2171 2172 /* TCGTargetOpDef entry with too much information? */ 2173 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2174 2175 /* sort the constraints (XXX: this is just an heuristic) */ 2176 sort_constraints(def, 0, def->nb_oargs); 2177 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2178 } 2179 } 2180 2181 void tcg_op_remove(TCGContext *s, TCGOp *op) 2182 { 2183 TCGLabel *label; 2184 2185 switch (op->opc) { 2186 case INDEX_op_br: 2187 label = arg_label(op->args[0]); 2188 label->refs--; 2189 break; 2190 case INDEX_op_brcond_i32: 2191 case INDEX_op_brcond_i64: 2192 label = arg_label(op->args[3]); 2193 label->refs--; 2194 break; 2195 case INDEX_op_brcond2_i32: 2196 label = arg_label(op->args[5]); 2197 label->refs--; 2198 break; 2199 default: 2200 break; 2201 } 2202 2203 QTAILQ_REMOVE(&s->ops, op, link); 2204 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2205 s->nb_ops--; 2206 2207 #ifdef CONFIG_PROFILER 2208 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2209 #endif 2210 } 2211 2212 void tcg_remove_ops_after(TCGOp *op) 2213 { 2214 TCGContext *s = tcg_ctx; 2215 2216 while (true) { 2217 TCGOp *last = tcg_last_op(); 2218 if (last == op) { 2219 return; 2220 } 2221 tcg_op_remove(s, last); 2222 } 2223 } 2224 2225 static TCGOp *tcg_op_alloc(TCGOpcode opc) 2226 { 2227 TCGContext *s = tcg_ctx; 2228 TCGOp *op; 2229 2230 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 2231 op = tcg_malloc(sizeof(TCGOp)); 2232 } else { 2233 op = QTAILQ_FIRST(&s->free_ops); 2234 QTAILQ_REMOVE(&s->free_ops, op, link); 2235 } 2236 memset(op, 0, offsetof(TCGOp, link)); 2237 op->opc = opc; 2238 s->nb_ops++; 2239 2240 return op; 2241 } 2242 2243 TCGOp *tcg_emit_op(TCGOpcode opc) 2244 { 2245 TCGOp *op = tcg_op_alloc(opc); 2246 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2247 return op; 2248 } 2249 2250 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2251 { 2252 TCGOp *new_op = tcg_op_alloc(opc); 2253 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2254 return new_op; 2255 } 2256 2257 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2258 { 2259 TCGOp *new_op = tcg_op_alloc(opc); 2260 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2261 return new_op; 2262 } 2263 2264 /* Reachable analysis : remove unreachable code. */ 2265 static void reachable_code_pass(TCGContext *s) 2266 { 2267 TCGOp *op, *op_next; 2268 bool dead = false; 2269 2270 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2271 bool remove = dead; 2272 TCGLabel *label; 2273 2274 switch (op->opc) { 2275 case INDEX_op_set_label: 2276 label = arg_label(op->args[0]); 2277 if (label->refs == 0) { 2278 /* 2279 * While there is an occasional backward branch, virtually 2280 * all branches generated by the translators are forward. 2281 * Which means that generally we will have already removed 2282 * all references to the label that will be, and there is 2283 * little to be gained by iterating. 2284 */ 2285 remove = true; 2286 } else { 2287 /* Once we see a label, insns become live again. */ 2288 dead = false; 2289 remove = false; 2290 2291 /* 2292 * Optimization can fold conditional branches to unconditional. 2293 * If we find a label with one reference which is preceded by 2294 * an unconditional branch to it, remove both. This needed to 2295 * wait until the dead code in between them was removed. 2296 */ 2297 if (label->refs == 1) { 2298 TCGOp *op_prev = QTAILQ_PREV(op, link); 2299 if (op_prev->opc == INDEX_op_br && 2300 label == arg_label(op_prev->args[0])) { 2301 tcg_op_remove(s, op_prev); 2302 remove = true; 2303 } 2304 } 2305 } 2306 break; 2307 2308 case INDEX_op_br: 2309 case INDEX_op_exit_tb: 2310 case INDEX_op_goto_ptr: 2311 /* Unconditional branches; everything following is dead. */ 2312 dead = true; 2313 break; 2314 2315 case INDEX_op_call: 2316 /* Notice noreturn helper calls, raising exceptions. */ 2317 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 2318 dead = true; 2319 } 2320 break; 2321 2322 case INDEX_op_insn_start: 2323 /* Never remove -- we need to keep these for unwind. */ 2324 remove = false; 2325 break; 2326 2327 default: 2328 break; 2329 } 2330 2331 if (remove) { 2332 tcg_op_remove(s, op); 2333 } 2334 } 2335 } 2336 2337 #define TS_DEAD 1 2338 #define TS_MEM 2 2339 2340 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2341 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2342 2343 /* For liveness_pass_1, the register preferences for a given temp. */ 2344 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2345 { 2346 return ts->state_ptr; 2347 } 2348 2349 /* For liveness_pass_1, reset the preferences for a given temp to the 2350 * maximal regset for its type. 2351 */ 2352 static inline void la_reset_pref(TCGTemp *ts) 2353 { 2354 *la_temp_pref(ts) 2355 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2356 } 2357 2358 /* liveness analysis: end of function: all temps are dead, and globals 2359 should be in memory. */ 2360 static void la_func_end(TCGContext *s, int ng, int nt) 2361 { 2362 int i; 2363 2364 for (i = 0; i < ng; ++i) { 2365 s->temps[i].state = TS_DEAD | TS_MEM; 2366 la_reset_pref(&s->temps[i]); 2367 } 2368 for (i = ng; i < nt; ++i) { 2369 s->temps[i].state = TS_DEAD; 2370 la_reset_pref(&s->temps[i]); 2371 } 2372 } 2373 2374 /* liveness analysis: end of basic block: all temps are dead, globals 2375 and local temps should be in memory. */ 2376 static void la_bb_end(TCGContext *s, int ng, int nt) 2377 { 2378 int i; 2379 2380 for (i = 0; i < nt; ++i) { 2381 TCGTemp *ts = &s->temps[i]; 2382 int state; 2383 2384 switch (ts->kind) { 2385 case TEMP_FIXED: 2386 case TEMP_GLOBAL: 2387 case TEMP_LOCAL: 2388 state = TS_DEAD | TS_MEM; 2389 break; 2390 case TEMP_NORMAL: 2391 case TEMP_EBB: 2392 case TEMP_CONST: 2393 state = TS_DEAD; 2394 break; 2395 default: 2396 g_assert_not_reached(); 2397 } 2398 ts->state = state; 2399 la_reset_pref(ts); 2400 } 2401 } 2402 2403 /* liveness analysis: sync globals back to memory. */ 2404 static void la_global_sync(TCGContext *s, int ng) 2405 { 2406 int i; 2407 2408 for (i = 0; i < ng; ++i) { 2409 int state = s->temps[i].state; 2410 s->temps[i].state = state | TS_MEM; 2411 if (state == TS_DEAD) { 2412 /* If the global was previously dead, reset prefs. */ 2413 la_reset_pref(&s->temps[i]); 2414 } 2415 } 2416 } 2417 2418 /* 2419 * liveness analysis: conditional branch: all temps are dead unless 2420 * explicitly live-across-conditional-branch, globals and local temps 2421 * should be synced. 2422 */ 2423 static void la_bb_sync(TCGContext *s, int ng, int nt) 2424 { 2425 la_global_sync(s, ng); 2426 2427 for (int i = ng; i < nt; ++i) { 2428 TCGTemp *ts = &s->temps[i]; 2429 int state; 2430 2431 switch (ts->kind) { 2432 case TEMP_LOCAL: 2433 state = ts->state; 2434 ts->state = state | TS_MEM; 2435 if (state != TS_DEAD) { 2436 continue; 2437 } 2438 break; 2439 case TEMP_NORMAL: 2440 s->temps[i].state = TS_DEAD; 2441 break; 2442 case TEMP_EBB: 2443 case TEMP_CONST: 2444 continue; 2445 default: 2446 g_assert_not_reached(); 2447 } 2448 la_reset_pref(&s->temps[i]); 2449 } 2450 } 2451 2452 /* liveness analysis: sync globals back to memory and kill. */ 2453 static void la_global_kill(TCGContext *s, int ng) 2454 { 2455 int i; 2456 2457 for (i = 0; i < ng; i++) { 2458 s->temps[i].state = TS_DEAD | TS_MEM; 2459 la_reset_pref(&s->temps[i]); 2460 } 2461 } 2462 2463 /* liveness analysis: note live globals crossing calls. */ 2464 static void la_cross_call(TCGContext *s, int nt) 2465 { 2466 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2467 int i; 2468 2469 for (i = 0; i < nt; i++) { 2470 TCGTemp *ts = &s->temps[i]; 2471 if (!(ts->state & TS_DEAD)) { 2472 TCGRegSet *pset = la_temp_pref(ts); 2473 TCGRegSet set = *pset; 2474 2475 set &= mask; 2476 /* If the combination is not possible, restart. */ 2477 if (set == 0) { 2478 set = tcg_target_available_regs[ts->type] & mask; 2479 } 2480 *pset = set; 2481 } 2482 } 2483 } 2484 2485 /* Liveness analysis : update the opc_arg_life array to tell if a 2486 given input arguments is dead. Instructions updating dead 2487 temporaries are removed. */ 2488 static void liveness_pass_1(TCGContext *s) 2489 { 2490 int nb_globals = s->nb_globals; 2491 int nb_temps = s->nb_temps; 2492 TCGOp *op, *op_prev; 2493 TCGRegSet *prefs; 2494 int i; 2495 2496 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2497 for (i = 0; i < nb_temps; ++i) { 2498 s->temps[i].state_ptr = prefs + i; 2499 } 2500 2501 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2502 la_func_end(s, nb_globals, nb_temps); 2503 2504 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2505 int nb_iargs, nb_oargs; 2506 TCGOpcode opc_new, opc_new2; 2507 bool have_opc_new2; 2508 TCGLifeData arg_life = 0; 2509 TCGTemp *ts; 2510 TCGOpcode opc = op->opc; 2511 const TCGOpDef *def = &tcg_op_defs[opc]; 2512 2513 switch (opc) { 2514 case INDEX_op_call: 2515 { 2516 int call_flags; 2517 int nb_call_regs; 2518 2519 nb_oargs = TCGOP_CALLO(op); 2520 nb_iargs = TCGOP_CALLI(op); 2521 call_flags = tcg_call_flags(op); 2522 2523 /* pure functions can be removed if their result is unused */ 2524 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2525 for (i = 0; i < nb_oargs; i++) { 2526 ts = arg_temp(op->args[i]); 2527 if (ts->state != TS_DEAD) { 2528 goto do_not_remove_call; 2529 } 2530 } 2531 goto do_remove; 2532 } 2533 do_not_remove_call: 2534 2535 /* Output args are dead. */ 2536 for (i = 0; i < nb_oargs; i++) { 2537 ts = arg_temp(op->args[i]); 2538 if (ts->state & TS_DEAD) { 2539 arg_life |= DEAD_ARG << i; 2540 } 2541 if (ts->state & TS_MEM) { 2542 arg_life |= SYNC_ARG << i; 2543 } 2544 ts->state = TS_DEAD; 2545 la_reset_pref(ts); 2546 2547 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ 2548 op->output_pref[i] = 0; 2549 } 2550 2551 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2552 TCG_CALL_NO_READ_GLOBALS))) { 2553 la_global_kill(s, nb_globals); 2554 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2555 la_global_sync(s, nb_globals); 2556 } 2557 2558 /* Record arguments that die in this helper. */ 2559 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2560 ts = arg_temp(op->args[i]); 2561 if (ts && ts->state & TS_DEAD) { 2562 arg_life |= DEAD_ARG << i; 2563 } 2564 } 2565 2566 /* For all live registers, remove call-clobbered prefs. */ 2567 la_cross_call(s, nb_temps); 2568 2569 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2570 2571 /* Input arguments are live for preceding opcodes. */ 2572 for (i = 0; i < nb_iargs; i++) { 2573 ts = arg_temp(op->args[i + nb_oargs]); 2574 if (ts && ts->state & TS_DEAD) { 2575 /* For those arguments that die, and will be allocated 2576 * in registers, clear the register set for that arg, 2577 * to be filled in below. For args that will be on 2578 * the stack, reset to any available reg. 2579 */ 2580 *la_temp_pref(ts) 2581 = (i < nb_call_regs ? 0 : 2582 tcg_target_available_regs[ts->type]); 2583 ts->state &= ~TS_DEAD; 2584 } 2585 } 2586 2587 /* For each input argument, add its input register to prefs. 2588 If a temp is used once, this produces a single set bit. */ 2589 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { 2590 ts = arg_temp(op->args[i + nb_oargs]); 2591 if (ts) { 2592 tcg_regset_set_reg(*la_temp_pref(ts), 2593 tcg_target_call_iarg_regs[i]); 2594 } 2595 } 2596 } 2597 break; 2598 case INDEX_op_insn_start: 2599 break; 2600 case INDEX_op_discard: 2601 /* mark the temporary as dead */ 2602 ts = arg_temp(op->args[0]); 2603 ts->state = TS_DEAD; 2604 la_reset_pref(ts); 2605 break; 2606 2607 case INDEX_op_add2_i32: 2608 opc_new = INDEX_op_add_i32; 2609 goto do_addsub2; 2610 case INDEX_op_sub2_i32: 2611 opc_new = INDEX_op_sub_i32; 2612 goto do_addsub2; 2613 case INDEX_op_add2_i64: 2614 opc_new = INDEX_op_add_i64; 2615 goto do_addsub2; 2616 case INDEX_op_sub2_i64: 2617 opc_new = INDEX_op_sub_i64; 2618 do_addsub2: 2619 nb_iargs = 4; 2620 nb_oargs = 2; 2621 /* Test if the high part of the operation is dead, but not 2622 the low part. The result can be optimized to a simple 2623 add or sub. This happens often for x86_64 guest when the 2624 cpu mode is set to 32 bit. */ 2625 if (arg_temp(op->args[1])->state == TS_DEAD) { 2626 if (arg_temp(op->args[0])->state == TS_DEAD) { 2627 goto do_remove; 2628 } 2629 /* Replace the opcode and adjust the args in place, 2630 leaving 3 unused args at the end. */ 2631 op->opc = opc = opc_new; 2632 op->args[1] = op->args[2]; 2633 op->args[2] = op->args[4]; 2634 /* Fall through and mark the single-word operation live. */ 2635 nb_iargs = 2; 2636 nb_oargs = 1; 2637 } 2638 goto do_not_remove; 2639 2640 case INDEX_op_mulu2_i32: 2641 opc_new = INDEX_op_mul_i32; 2642 opc_new2 = INDEX_op_muluh_i32; 2643 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2644 goto do_mul2; 2645 case INDEX_op_muls2_i32: 2646 opc_new = INDEX_op_mul_i32; 2647 opc_new2 = INDEX_op_mulsh_i32; 2648 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2649 goto do_mul2; 2650 case INDEX_op_mulu2_i64: 2651 opc_new = INDEX_op_mul_i64; 2652 opc_new2 = INDEX_op_muluh_i64; 2653 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2654 goto do_mul2; 2655 case INDEX_op_muls2_i64: 2656 opc_new = INDEX_op_mul_i64; 2657 opc_new2 = INDEX_op_mulsh_i64; 2658 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2659 goto do_mul2; 2660 do_mul2: 2661 nb_iargs = 2; 2662 nb_oargs = 2; 2663 if (arg_temp(op->args[1])->state == TS_DEAD) { 2664 if (arg_temp(op->args[0])->state == TS_DEAD) { 2665 /* Both parts of the operation are dead. */ 2666 goto do_remove; 2667 } 2668 /* The high part of the operation is dead; generate the low. */ 2669 op->opc = opc = opc_new; 2670 op->args[1] = op->args[2]; 2671 op->args[2] = op->args[3]; 2672 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2673 /* The low part of the operation is dead; generate the high. */ 2674 op->opc = opc = opc_new2; 2675 op->args[0] = op->args[1]; 2676 op->args[1] = op->args[2]; 2677 op->args[2] = op->args[3]; 2678 } else { 2679 goto do_not_remove; 2680 } 2681 /* Mark the single-word operation live. */ 2682 nb_oargs = 1; 2683 goto do_not_remove; 2684 2685 default: 2686 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2687 nb_iargs = def->nb_iargs; 2688 nb_oargs = def->nb_oargs; 2689 2690 /* Test if the operation can be removed because all 2691 its outputs are dead. We assume that nb_oargs == 0 2692 implies side effects */ 2693 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2694 for (i = 0; i < nb_oargs; i++) { 2695 if (arg_temp(op->args[i])->state != TS_DEAD) { 2696 goto do_not_remove; 2697 } 2698 } 2699 goto do_remove; 2700 } 2701 goto do_not_remove; 2702 2703 do_remove: 2704 tcg_op_remove(s, op); 2705 break; 2706 2707 do_not_remove: 2708 for (i = 0; i < nb_oargs; i++) { 2709 ts = arg_temp(op->args[i]); 2710 2711 /* Remember the preference of the uses that followed. */ 2712 op->output_pref[i] = *la_temp_pref(ts); 2713 2714 /* Output args are dead. */ 2715 if (ts->state & TS_DEAD) { 2716 arg_life |= DEAD_ARG << i; 2717 } 2718 if (ts->state & TS_MEM) { 2719 arg_life |= SYNC_ARG << i; 2720 } 2721 ts->state = TS_DEAD; 2722 la_reset_pref(ts); 2723 } 2724 2725 /* If end of basic block, update. */ 2726 if (def->flags & TCG_OPF_BB_EXIT) { 2727 la_func_end(s, nb_globals, nb_temps); 2728 } else if (def->flags & TCG_OPF_COND_BRANCH) { 2729 la_bb_sync(s, nb_globals, nb_temps); 2730 } else if (def->flags & TCG_OPF_BB_END) { 2731 la_bb_end(s, nb_globals, nb_temps); 2732 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2733 la_global_sync(s, nb_globals); 2734 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2735 la_cross_call(s, nb_temps); 2736 } 2737 } 2738 2739 /* Record arguments that die in this opcode. */ 2740 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2741 ts = arg_temp(op->args[i]); 2742 if (ts->state & TS_DEAD) { 2743 arg_life |= DEAD_ARG << i; 2744 } 2745 } 2746 2747 /* Input arguments are live for preceding opcodes. */ 2748 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2749 ts = arg_temp(op->args[i]); 2750 if (ts->state & TS_DEAD) { 2751 /* For operands that were dead, initially allow 2752 all regs for the type. */ 2753 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 2754 ts->state &= ~TS_DEAD; 2755 } 2756 } 2757 2758 /* Incorporate constraints for this operand. */ 2759 switch (opc) { 2760 case INDEX_op_mov_i32: 2761 case INDEX_op_mov_i64: 2762 /* Note that these are TCG_OPF_NOT_PRESENT and do not 2763 have proper constraints. That said, special case 2764 moves to propagate preferences backward. */ 2765 if (IS_DEAD_ARG(1)) { 2766 *la_temp_pref(arg_temp(op->args[0])) 2767 = *la_temp_pref(arg_temp(op->args[1])); 2768 } 2769 break; 2770 2771 default: 2772 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2773 const TCGArgConstraint *ct = &def->args_ct[i]; 2774 TCGRegSet set, *pset; 2775 2776 ts = arg_temp(op->args[i]); 2777 pset = la_temp_pref(ts); 2778 set = *pset; 2779 2780 set &= ct->regs; 2781 if (ct->ialias) { 2782 set &= op->output_pref[ct->alias_index]; 2783 } 2784 /* If the combination is not possible, restart. */ 2785 if (set == 0) { 2786 set = ct->regs; 2787 } 2788 *pset = set; 2789 } 2790 break; 2791 } 2792 break; 2793 } 2794 op->life = arg_life; 2795 } 2796 } 2797 2798 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 2799 static bool liveness_pass_2(TCGContext *s) 2800 { 2801 int nb_globals = s->nb_globals; 2802 int nb_temps, i; 2803 bool changes = false; 2804 TCGOp *op, *op_next; 2805 2806 /* Create a temporary for each indirect global. */ 2807 for (i = 0; i < nb_globals; ++i) { 2808 TCGTemp *its = &s->temps[i]; 2809 if (its->indirect_reg) { 2810 TCGTemp *dts = tcg_temp_alloc(s); 2811 dts->type = its->type; 2812 dts->base_type = its->base_type; 2813 dts->kind = TEMP_EBB; 2814 its->state_ptr = dts; 2815 } else { 2816 its->state_ptr = NULL; 2817 } 2818 /* All globals begin dead. */ 2819 its->state = TS_DEAD; 2820 } 2821 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 2822 TCGTemp *its = &s->temps[i]; 2823 its->state_ptr = NULL; 2824 its->state = TS_DEAD; 2825 } 2826 2827 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2828 TCGOpcode opc = op->opc; 2829 const TCGOpDef *def = &tcg_op_defs[opc]; 2830 TCGLifeData arg_life = op->life; 2831 int nb_iargs, nb_oargs, call_flags; 2832 TCGTemp *arg_ts, *dir_ts; 2833 2834 if (opc == INDEX_op_call) { 2835 nb_oargs = TCGOP_CALLO(op); 2836 nb_iargs = TCGOP_CALLI(op); 2837 call_flags = tcg_call_flags(op); 2838 } else { 2839 nb_iargs = def->nb_iargs; 2840 nb_oargs = def->nb_oargs; 2841 2842 /* Set flags similar to how calls require. */ 2843 if (def->flags & TCG_OPF_COND_BRANCH) { 2844 /* Like reading globals: sync_globals */ 2845 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2846 } else if (def->flags & TCG_OPF_BB_END) { 2847 /* Like writing globals: save_globals */ 2848 call_flags = 0; 2849 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2850 /* Like reading globals: sync_globals */ 2851 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2852 } else { 2853 /* No effect on globals. */ 2854 call_flags = (TCG_CALL_NO_READ_GLOBALS | 2855 TCG_CALL_NO_WRITE_GLOBALS); 2856 } 2857 } 2858 2859 /* Make sure that input arguments are available. */ 2860 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2861 arg_ts = arg_temp(op->args[i]); 2862 if (arg_ts) { 2863 dir_ts = arg_ts->state_ptr; 2864 if (dir_ts && arg_ts->state == TS_DEAD) { 2865 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 2866 ? INDEX_op_ld_i32 2867 : INDEX_op_ld_i64); 2868 TCGOp *lop = tcg_op_insert_before(s, op, lopc); 2869 2870 lop->args[0] = temp_arg(dir_ts); 2871 lop->args[1] = temp_arg(arg_ts->mem_base); 2872 lop->args[2] = arg_ts->mem_offset; 2873 2874 /* Loaded, but synced with memory. */ 2875 arg_ts->state = TS_MEM; 2876 } 2877 } 2878 } 2879 2880 /* Perform input replacement, and mark inputs that became dead. 2881 No action is required except keeping temp_state up to date 2882 so that we reload when needed. */ 2883 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2884 arg_ts = arg_temp(op->args[i]); 2885 if (arg_ts) { 2886 dir_ts = arg_ts->state_ptr; 2887 if (dir_ts) { 2888 op->args[i] = temp_arg(dir_ts); 2889 changes = true; 2890 if (IS_DEAD_ARG(i)) { 2891 arg_ts->state = TS_DEAD; 2892 } 2893 } 2894 } 2895 } 2896 2897 /* Liveness analysis should ensure that the following are 2898 all correct, for call sites and basic block end points. */ 2899 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 2900 /* Nothing to do */ 2901 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 2902 for (i = 0; i < nb_globals; ++i) { 2903 /* Liveness should see that globals are synced back, 2904 that is, either TS_DEAD or TS_MEM. */ 2905 arg_ts = &s->temps[i]; 2906 tcg_debug_assert(arg_ts->state_ptr == 0 2907 || arg_ts->state != 0); 2908 } 2909 } else { 2910 for (i = 0; i < nb_globals; ++i) { 2911 /* Liveness should see that globals are saved back, 2912 that is, TS_DEAD, waiting to be reloaded. */ 2913 arg_ts = &s->temps[i]; 2914 tcg_debug_assert(arg_ts->state_ptr == 0 2915 || arg_ts->state == TS_DEAD); 2916 } 2917 } 2918 2919 /* Outputs become available. */ 2920 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 2921 arg_ts = arg_temp(op->args[0]); 2922 dir_ts = arg_ts->state_ptr; 2923 if (dir_ts) { 2924 op->args[0] = temp_arg(dir_ts); 2925 changes = true; 2926 2927 /* The output is now live and modified. */ 2928 arg_ts->state = 0; 2929 2930 if (NEED_SYNC_ARG(0)) { 2931 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2932 ? INDEX_op_st_i32 2933 : INDEX_op_st_i64); 2934 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 2935 TCGTemp *out_ts = dir_ts; 2936 2937 if (IS_DEAD_ARG(0)) { 2938 out_ts = arg_temp(op->args[1]); 2939 arg_ts->state = TS_DEAD; 2940 tcg_op_remove(s, op); 2941 } else { 2942 arg_ts->state = TS_MEM; 2943 } 2944 2945 sop->args[0] = temp_arg(out_ts); 2946 sop->args[1] = temp_arg(arg_ts->mem_base); 2947 sop->args[2] = arg_ts->mem_offset; 2948 } else { 2949 tcg_debug_assert(!IS_DEAD_ARG(0)); 2950 } 2951 } 2952 } else { 2953 for (i = 0; i < nb_oargs; i++) { 2954 arg_ts = arg_temp(op->args[i]); 2955 dir_ts = arg_ts->state_ptr; 2956 if (!dir_ts) { 2957 continue; 2958 } 2959 op->args[i] = temp_arg(dir_ts); 2960 changes = true; 2961 2962 /* The output is now live and modified. */ 2963 arg_ts->state = 0; 2964 2965 /* Sync outputs upon their last write. */ 2966 if (NEED_SYNC_ARG(i)) { 2967 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2968 ? INDEX_op_st_i32 2969 : INDEX_op_st_i64); 2970 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 2971 2972 sop->args[0] = temp_arg(dir_ts); 2973 sop->args[1] = temp_arg(arg_ts->mem_base); 2974 sop->args[2] = arg_ts->mem_offset; 2975 2976 arg_ts->state = TS_MEM; 2977 } 2978 /* Drop outputs that are dead. */ 2979 if (IS_DEAD_ARG(i)) { 2980 arg_ts->state = TS_DEAD; 2981 } 2982 } 2983 } 2984 } 2985 2986 return changes; 2987 } 2988 2989 #ifdef CONFIG_DEBUG_TCG 2990 static void dump_regs(TCGContext *s) 2991 { 2992 TCGTemp *ts; 2993 int i; 2994 char buf[64]; 2995 2996 for(i = 0; i < s->nb_temps; i++) { 2997 ts = &s->temps[i]; 2998 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2999 switch(ts->val_type) { 3000 case TEMP_VAL_REG: 3001 printf("%s", tcg_target_reg_names[ts->reg]); 3002 break; 3003 case TEMP_VAL_MEM: 3004 printf("%d(%s)", (int)ts->mem_offset, 3005 tcg_target_reg_names[ts->mem_base->reg]); 3006 break; 3007 case TEMP_VAL_CONST: 3008 printf("$0x%" PRIx64, ts->val); 3009 break; 3010 case TEMP_VAL_DEAD: 3011 printf("D"); 3012 break; 3013 default: 3014 printf("???"); 3015 break; 3016 } 3017 printf("\n"); 3018 } 3019 3020 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 3021 if (s->reg_to_temp[i] != NULL) { 3022 printf("%s: %s\n", 3023 tcg_target_reg_names[i], 3024 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 3025 } 3026 } 3027 } 3028 3029 static void check_regs(TCGContext *s) 3030 { 3031 int reg; 3032 int k; 3033 TCGTemp *ts; 3034 char buf[64]; 3035 3036 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 3037 ts = s->reg_to_temp[reg]; 3038 if (ts != NULL) { 3039 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 3040 printf("Inconsistency for register %s:\n", 3041 tcg_target_reg_names[reg]); 3042 goto fail; 3043 } 3044 } 3045 } 3046 for (k = 0; k < s->nb_temps; k++) { 3047 ts = &s->temps[k]; 3048 if (ts->val_type == TEMP_VAL_REG 3049 && ts->kind != TEMP_FIXED 3050 && s->reg_to_temp[ts->reg] != ts) { 3051 printf("Inconsistency for temp %s:\n", 3052 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3053 fail: 3054 printf("reg state:\n"); 3055 dump_regs(s); 3056 tcg_abort(); 3057 } 3058 } 3059 } 3060 #endif 3061 3062 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3063 { 3064 intptr_t off, size, align; 3065 3066 switch (ts->type) { 3067 case TCG_TYPE_I32: 3068 size = align = 4; 3069 break; 3070 case TCG_TYPE_I64: 3071 case TCG_TYPE_V64: 3072 size = align = 8; 3073 break; 3074 case TCG_TYPE_V128: 3075 size = align = 16; 3076 break; 3077 case TCG_TYPE_V256: 3078 /* Note that we do not require aligned storage for V256. */ 3079 size = 32, align = 16; 3080 break; 3081 default: 3082 g_assert_not_reached(); 3083 } 3084 3085 /* 3086 * Assume the stack is sufficiently aligned. 3087 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3088 * and do not require 16 byte vector alignment. This seems slightly 3089 * easier than fully parameterizing the above switch statement. 3090 */ 3091 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3092 off = ROUND_UP(s->current_frame_offset, align); 3093 3094 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3095 if (off + size > s->frame_end) { 3096 tcg_raise_tb_overflow(s); 3097 } 3098 s->current_frame_offset = off + size; 3099 3100 ts->mem_offset = off; 3101 #if defined(__sparc__) 3102 ts->mem_offset += TCG_TARGET_STACK_BIAS; 3103 #endif 3104 ts->mem_base = s->frame_temp; 3105 ts->mem_allocated = 1; 3106 } 3107 3108 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3109 3110 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3111 mark it free; otherwise mark it dead. */ 3112 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3113 { 3114 TCGTempVal new_type; 3115 3116 switch (ts->kind) { 3117 case TEMP_FIXED: 3118 return; 3119 case TEMP_GLOBAL: 3120 case TEMP_LOCAL: 3121 new_type = TEMP_VAL_MEM; 3122 break; 3123 case TEMP_NORMAL: 3124 case TEMP_EBB: 3125 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3126 break; 3127 case TEMP_CONST: 3128 new_type = TEMP_VAL_CONST; 3129 break; 3130 default: 3131 g_assert_not_reached(); 3132 } 3133 if (ts->val_type == TEMP_VAL_REG) { 3134 s->reg_to_temp[ts->reg] = NULL; 3135 } 3136 ts->val_type = new_type; 3137 } 3138 3139 /* Mark a temporary as dead. */ 3140 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3141 { 3142 temp_free_or_dead(s, ts, 1); 3143 } 3144 3145 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3146 registers needs to be allocated to store a constant. If 'free_or_dead' 3147 is non-zero, subsequently release the temporary; if it is positive, the 3148 temp is dead; if it is negative, the temp is free. */ 3149 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3150 TCGRegSet preferred_regs, int free_or_dead) 3151 { 3152 if (!temp_readonly(ts) && !ts->mem_coherent) { 3153 if (!ts->mem_allocated) { 3154 temp_allocate_frame(s, ts); 3155 } 3156 switch (ts->val_type) { 3157 case TEMP_VAL_CONST: 3158 /* If we're going to free the temp immediately, then we won't 3159 require it later in a register, so attempt to store the 3160 constant to memory directly. */ 3161 if (free_or_dead 3162 && tcg_out_sti(s, ts->type, ts->val, 3163 ts->mem_base->reg, ts->mem_offset)) { 3164 break; 3165 } 3166 temp_load(s, ts, tcg_target_available_regs[ts->type], 3167 allocated_regs, preferred_regs); 3168 /* fallthrough */ 3169 3170 case TEMP_VAL_REG: 3171 tcg_out_st(s, ts->type, ts->reg, 3172 ts->mem_base->reg, ts->mem_offset); 3173 break; 3174 3175 case TEMP_VAL_MEM: 3176 break; 3177 3178 case TEMP_VAL_DEAD: 3179 default: 3180 tcg_abort(); 3181 } 3182 ts->mem_coherent = 1; 3183 } 3184 if (free_or_dead) { 3185 temp_free_or_dead(s, ts, free_or_dead); 3186 } 3187 } 3188 3189 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3190 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3191 { 3192 TCGTemp *ts = s->reg_to_temp[reg]; 3193 if (ts != NULL) { 3194 temp_sync(s, ts, allocated_regs, 0, -1); 3195 } 3196 } 3197 3198 /** 3199 * tcg_reg_alloc: 3200 * @required_regs: Set of registers in which we must allocate. 3201 * @allocated_regs: Set of registers which must be avoided. 3202 * @preferred_regs: Set of registers we should prefer. 3203 * @rev: True if we search the registers in "indirect" order. 3204 * 3205 * The allocated register must be in @required_regs & ~@allocated_regs, 3206 * but if we can put it in @preferred_regs we may save a move later. 3207 */ 3208 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3209 TCGRegSet allocated_regs, 3210 TCGRegSet preferred_regs, bool rev) 3211 { 3212 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3213 TCGRegSet reg_ct[2]; 3214 const int *order; 3215 3216 reg_ct[1] = required_regs & ~allocated_regs; 3217 tcg_debug_assert(reg_ct[1] != 0); 3218 reg_ct[0] = reg_ct[1] & preferred_regs; 3219 3220 /* Skip the preferred_regs option if it cannot be satisfied, 3221 or if the preference made no difference. */ 3222 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3223 3224 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3225 3226 /* Try free registers, preferences first. */ 3227 for (j = f; j < 2; j++) { 3228 TCGRegSet set = reg_ct[j]; 3229 3230 if (tcg_regset_single(set)) { 3231 /* One register in the set. */ 3232 TCGReg reg = tcg_regset_first(set); 3233 if (s->reg_to_temp[reg] == NULL) { 3234 return reg; 3235 } 3236 } else { 3237 for (i = 0; i < n; i++) { 3238 TCGReg reg = order[i]; 3239 if (s->reg_to_temp[reg] == NULL && 3240 tcg_regset_test_reg(set, reg)) { 3241 return reg; 3242 } 3243 } 3244 } 3245 } 3246 3247 /* We must spill something. */ 3248 for (j = f; j < 2; j++) { 3249 TCGRegSet set = reg_ct[j]; 3250 3251 if (tcg_regset_single(set)) { 3252 /* One register in the set. */ 3253 TCGReg reg = tcg_regset_first(set); 3254 tcg_reg_free(s, reg, allocated_regs); 3255 return reg; 3256 } else { 3257 for (i = 0; i < n; i++) { 3258 TCGReg reg = order[i]; 3259 if (tcg_regset_test_reg(set, reg)) { 3260 tcg_reg_free(s, reg, allocated_regs); 3261 return reg; 3262 } 3263 } 3264 } 3265 } 3266 3267 tcg_abort(); 3268 } 3269 3270 /* Make sure the temporary is in a register. If needed, allocate the register 3271 from DESIRED while avoiding ALLOCATED. */ 3272 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3273 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3274 { 3275 TCGReg reg; 3276 3277 switch (ts->val_type) { 3278 case TEMP_VAL_REG: 3279 return; 3280 case TEMP_VAL_CONST: 3281 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3282 preferred_regs, ts->indirect_base); 3283 if (ts->type <= TCG_TYPE_I64) { 3284 tcg_out_movi(s, ts->type, reg, ts->val); 3285 } else { 3286 uint64_t val = ts->val; 3287 MemOp vece = MO_64; 3288 3289 /* 3290 * Find the minimal vector element that matches the constant. 3291 * The targets will, in general, have to do this search anyway, 3292 * do this generically. 3293 */ 3294 if (val == dup_const(MO_8, val)) { 3295 vece = MO_8; 3296 } else if (val == dup_const(MO_16, val)) { 3297 vece = MO_16; 3298 } else if (val == dup_const(MO_32, val)) { 3299 vece = MO_32; 3300 } 3301 3302 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3303 } 3304 ts->mem_coherent = 0; 3305 break; 3306 case TEMP_VAL_MEM: 3307 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3308 preferred_regs, ts->indirect_base); 3309 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3310 ts->mem_coherent = 1; 3311 break; 3312 case TEMP_VAL_DEAD: 3313 default: 3314 tcg_abort(); 3315 } 3316 ts->reg = reg; 3317 ts->val_type = TEMP_VAL_REG; 3318 s->reg_to_temp[reg] = ts; 3319 } 3320 3321 /* Save a temporary to memory. 'allocated_regs' is used in case a 3322 temporary registers needs to be allocated to store a constant. */ 3323 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3324 { 3325 /* The liveness analysis already ensures that globals are back 3326 in memory. Keep an tcg_debug_assert for safety. */ 3327 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3328 } 3329 3330 /* save globals to their canonical location and assume they can be 3331 modified be the following code. 'allocated_regs' is used in case a 3332 temporary registers needs to be allocated to store a constant. */ 3333 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3334 { 3335 int i, n; 3336 3337 for (i = 0, n = s->nb_globals; i < n; i++) { 3338 temp_save(s, &s->temps[i], allocated_regs); 3339 } 3340 } 3341 3342 /* sync globals to their canonical location and assume they can be 3343 read by the following code. 'allocated_regs' is used in case a 3344 temporary registers needs to be allocated to store a constant. */ 3345 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3346 { 3347 int i, n; 3348 3349 for (i = 0, n = s->nb_globals; i < n; i++) { 3350 TCGTemp *ts = &s->temps[i]; 3351 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3352 || ts->kind == TEMP_FIXED 3353 || ts->mem_coherent); 3354 } 3355 } 3356 3357 /* at the end of a basic block, we assume all temporaries are dead and 3358 all globals are stored at their canonical location. */ 3359 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3360 { 3361 int i; 3362 3363 for (i = s->nb_globals; i < s->nb_temps; i++) { 3364 TCGTemp *ts = &s->temps[i]; 3365 3366 switch (ts->kind) { 3367 case TEMP_LOCAL: 3368 temp_save(s, ts, allocated_regs); 3369 break; 3370 case TEMP_NORMAL: 3371 case TEMP_EBB: 3372 /* The liveness analysis already ensures that temps are dead. 3373 Keep an tcg_debug_assert for safety. */ 3374 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3375 break; 3376 case TEMP_CONST: 3377 /* Similarly, we should have freed any allocated register. */ 3378 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3379 break; 3380 default: 3381 g_assert_not_reached(); 3382 } 3383 } 3384 3385 save_globals(s, allocated_regs); 3386 } 3387 3388 /* 3389 * At a conditional branch, we assume all temporaries are dead unless 3390 * explicitly live-across-conditional-branch; all globals and local 3391 * temps are synced to their location. 3392 */ 3393 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3394 { 3395 sync_globals(s, allocated_regs); 3396 3397 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3398 TCGTemp *ts = &s->temps[i]; 3399 /* 3400 * The liveness analysis already ensures that temps are dead. 3401 * Keep tcg_debug_asserts for safety. 3402 */ 3403 switch (ts->kind) { 3404 case TEMP_LOCAL: 3405 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3406 break; 3407 case TEMP_NORMAL: 3408 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3409 break; 3410 case TEMP_EBB: 3411 case TEMP_CONST: 3412 break; 3413 default: 3414 g_assert_not_reached(); 3415 } 3416 } 3417 } 3418 3419 /* 3420 * Specialized code generation for INDEX_op_mov_* with a constant. 3421 */ 3422 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3423 tcg_target_ulong val, TCGLifeData arg_life, 3424 TCGRegSet preferred_regs) 3425 { 3426 /* ENV should not be modified. */ 3427 tcg_debug_assert(!temp_readonly(ots)); 3428 3429 /* The movi is not explicitly generated here. */ 3430 if (ots->val_type == TEMP_VAL_REG) { 3431 s->reg_to_temp[ots->reg] = NULL; 3432 } 3433 ots->val_type = TEMP_VAL_CONST; 3434 ots->val = val; 3435 ots->mem_coherent = 0; 3436 if (NEED_SYNC_ARG(0)) { 3437 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3438 } else if (IS_DEAD_ARG(0)) { 3439 temp_dead(s, ots); 3440 } 3441 } 3442 3443 /* 3444 * Specialized code generation for INDEX_op_mov_*. 3445 */ 3446 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3447 { 3448 const TCGLifeData arg_life = op->life; 3449 TCGRegSet allocated_regs, preferred_regs; 3450 TCGTemp *ts, *ots; 3451 TCGType otype, itype; 3452 3453 allocated_regs = s->reserved_regs; 3454 preferred_regs = op->output_pref[0]; 3455 ots = arg_temp(op->args[0]); 3456 ts = arg_temp(op->args[1]); 3457 3458 /* ENV should not be modified. */ 3459 tcg_debug_assert(!temp_readonly(ots)); 3460 3461 /* Note that otype != itype for no-op truncation. */ 3462 otype = ots->type; 3463 itype = ts->type; 3464 3465 if (ts->val_type == TEMP_VAL_CONST) { 3466 /* propagate constant or generate sti */ 3467 tcg_target_ulong val = ts->val; 3468 if (IS_DEAD_ARG(1)) { 3469 temp_dead(s, ts); 3470 } 3471 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3472 return; 3473 } 3474 3475 /* If the source value is in memory we're going to be forced 3476 to have it in a register in order to perform the copy. Copy 3477 the SOURCE value into its own register first, that way we 3478 don't have to reload SOURCE the next time it is used. */ 3479 if (ts->val_type == TEMP_VAL_MEM) { 3480 temp_load(s, ts, tcg_target_available_regs[itype], 3481 allocated_regs, preferred_regs); 3482 } 3483 3484 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3485 if (IS_DEAD_ARG(0)) { 3486 /* mov to a non-saved dead register makes no sense (even with 3487 liveness analysis disabled). */ 3488 tcg_debug_assert(NEED_SYNC_ARG(0)); 3489 if (!ots->mem_allocated) { 3490 temp_allocate_frame(s, ots); 3491 } 3492 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 3493 if (IS_DEAD_ARG(1)) { 3494 temp_dead(s, ts); 3495 } 3496 temp_dead(s, ots); 3497 } else { 3498 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 3499 /* the mov can be suppressed */ 3500 if (ots->val_type == TEMP_VAL_REG) { 3501 s->reg_to_temp[ots->reg] = NULL; 3502 } 3503 ots->reg = ts->reg; 3504 temp_dead(s, ts); 3505 } else { 3506 if (ots->val_type != TEMP_VAL_REG) { 3507 /* When allocating a new register, make sure to not spill the 3508 input one. */ 3509 tcg_regset_set_reg(allocated_regs, ts->reg); 3510 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3511 allocated_regs, preferred_regs, 3512 ots->indirect_base); 3513 } 3514 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { 3515 /* 3516 * Cross register class move not supported. 3517 * Store the source register into the destination slot 3518 * and leave the destination temp as TEMP_VAL_MEM. 3519 */ 3520 assert(!temp_readonly(ots)); 3521 if (!ts->mem_allocated) { 3522 temp_allocate_frame(s, ots); 3523 } 3524 tcg_out_st(s, ts->type, ts->reg, 3525 ots->mem_base->reg, ots->mem_offset); 3526 ots->mem_coherent = 1; 3527 temp_free_or_dead(s, ots, -1); 3528 return; 3529 } 3530 } 3531 ots->val_type = TEMP_VAL_REG; 3532 ots->mem_coherent = 0; 3533 s->reg_to_temp[ots->reg] = ots; 3534 if (NEED_SYNC_ARG(0)) { 3535 temp_sync(s, ots, allocated_regs, 0, 0); 3536 } 3537 } 3538 } 3539 3540 /* 3541 * Specialized code generation for INDEX_op_dup_vec. 3542 */ 3543 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 3544 { 3545 const TCGLifeData arg_life = op->life; 3546 TCGRegSet dup_out_regs, dup_in_regs; 3547 TCGTemp *its, *ots; 3548 TCGType itype, vtype; 3549 intptr_t endian_fixup; 3550 unsigned vece; 3551 bool ok; 3552 3553 ots = arg_temp(op->args[0]); 3554 its = arg_temp(op->args[1]); 3555 3556 /* ENV should not be modified. */ 3557 tcg_debug_assert(!temp_readonly(ots)); 3558 3559 itype = its->type; 3560 vece = TCGOP_VECE(op); 3561 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3562 3563 if (its->val_type == TEMP_VAL_CONST) { 3564 /* Propagate constant via movi -> dupi. */ 3565 tcg_target_ulong val = its->val; 3566 if (IS_DEAD_ARG(1)) { 3567 temp_dead(s, its); 3568 } 3569 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); 3570 return; 3571 } 3572 3573 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3574 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 3575 3576 /* Allocate the output register now. */ 3577 if (ots->val_type != TEMP_VAL_REG) { 3578 TCGRegSet allocated_regs = s->reserved_regs; 3579 3580 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 3581 /* Make sure to not spill the input register. */ 3582 tcg_regset_set_reg(allocated_regs, its->reg); 3583 } 3584 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3585 op->output_pref[0], ots->indirect_base); 3586 ots->val_type = TEMP_VAL_REG; 3587 ots->mem_coherent = 0; 3588 s->reg_to_temp[ots->reg] = ots; 3589 } 3590 3591 switch (its->val_type) { 3592 case TEMP_VAL_REG: 3593 /* 3594 * The dup constriaints must be broad, covering all possible VECE. 3595 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 3596 * to fail, indicating that extra moves are required for that case. 3597 */ 3598 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 3599 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 3600 goto done; 3601 } 3602 /* Try again from memory or a vector input register. */ 3603 } 3604 if (!its->mem_coherent) { 3605 /* 3606 * The input register is not synced, and so an extra store 3607 * would be required to use memory. Attempt an integer-vector 3608 * register move first. We do not have a TCGRegSet for this. 3609 */ 3610 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 3611 break; 3612 } 3613 /* Sync the temp back to its slot and load from there. */ 3614 temp_sync(s, its, s->reserved_regs, 0, 0); 3615 } 3616 /* fall through */ 3617 3618 case TEMP_VAL_MEM: 3619 #if HOST_BIG_ENDIAN 3620 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; 3621 endian_fixup -= 1 << vece; 3622 #else 3623 endian_fixup = 0; 3624 #endif 3625 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 3626 its->mem_offset + endian_fixup)) { 3627 goto done; 3628 } 3629 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 3630 break; 3631 3632 default: 3633 g_assert_not_reached(); 3634 } 3635 3636 /* We now have a vector input register, so dup must succeed. */ 3637 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 3638 tcg_debug_assert(ok); 3639 3640 done: 3641 if (IS_DEAD_ARG(1)) { 3642 temp_dead(s, its); 3643 } 3644 if (NEED_SYNC_ARG(0)) { 3645 temp_sync(s, ots, s->reserved_regs, 0, 0); 3646 } 3647 if (IS_DEAD_ARG(0)) { 3648 temp_dead(s, ots); 3649 } 3650 } 3651 3652 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 3653 { 3654 const TCGLifeData arg_life = op->life; 3655 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 3656 TCGRegSet i_allocated_regs; 3657 TCGRegSet o_allocated_regs; 3658 int i, k, nb_iargs, nb_oargs; 3659 TCGReg reg; 3660 TCGArg arg; 3661 const TCGArgConstraint *arg_ct; 3662 TCGTemp *ts; 3663 TCGArg new_args[TCG_MAX_OP_ARGS]; 3664 int const_args[TCG_MAX_OP_ARGS]; 3665 3666 nb_oargs = def->nb_oargs; 3667 nb_iargs = def->nb_iargs; 3668 3669 /* copy constants */ 3670 memcpy(new_args + nb_oargs + nb_iargs, 3671 op->args + nb_oargs + nb_iargs, 3672 sizeof(TCGArg) * def->nb_cargs); 3673 3674 i_allocated_regs = s->reserved_regs; 3675 o_allocated_regs = s->reserved_regs; 3676 3677 /* satisfy input constraints */ 3678 for (k = 0; k < nb_iargs; k++) { 3679 TCGRegSet i_preferred_regs, o_preferred_regs; 3680 3681 i = def->args_ct[nb_oargs + k].sort_index; 3682 arg = op->args[i]; 3683 arg_ct = &def->args_ct[i]; 3684 ts = arg_temp(arg); 3685 3686 if (ts->val_type == TEMP_VAL_CONST 3687 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 3688 /* constant is OK for instruction */ 3689 const_args[i] = 1; 3690 new_args[i] = ts->val; 3691 continue; 3692 } 3693 3694 i_preferred_regs = o_preferred_regs = 0; 3695 if (arg_ct->ialias) { 3696 o_preferred_regs = op->output_pref[arg_ct->alias_index]; 3697 3698 /* 3699 * If the input is readonly, then it cannot also be an 3700 * output and aliased to itself. If the input is not 3701 * dead after the instruction, we must allocate a new 3702 * register and move it. 3703 */ 3704 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 3705 goto allocate_in_reg; 3706 } 3707 3708 /* 3709 * Check if the current register has already been allocated 3710 * for another input aliased to an output. 3711 */ 3712 if (ts->val_type == TEMP_VAL_REG) { 3713 reg = ts->reg; 3714 for (int k2 = 0; k2 < k; k2++) { 3715 int i2 = def->args_ct[nb_oargs + k2].sort_index; 3716 if (def->args_ct[i2].ialias && reg == new_args[i2]) { 3717 goto allocate_in_reg; 3718 } 3719 } 3720 } 3721 i_preferred_regs = o_preferred_regs; 3722 } 3723 3724 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); 3725 reg = ts->reg; 3726 3727 if (!tcg_regset_test_reg(arg_ct->regs, reg)) { 3728 allocate_in_reg: 3729 /* 3730 * Allocate a new register matching the constraint 3731 * and move the temporary register into it. 3732 */ 3733 temp_load(s, ts, tcg_target_available_regs[ts->type], 3734 i_allocated_regs, 0); 3735 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, 3736 o_preferred_regs, ts->indirect_base); 3737 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 3738 /* 3739 * Cross register class move not supported. Sync the 3740 * temp back to its slot and load from there. 3741 */ 3742 temp_sync(s, ts, i_allocated_regs, 0, 0); 3743 tcg_out_ld(s, ts->type, reg, 3744 ts->mem_base->reg, ts->mem_offset); 3745 } 3746 } 3747 new_args[i] = reg; 3748 const_args[i] = 0; 3749 tcg_regset_set_reg(i_allocated_regs, reg); 3750 } 3751 3752 /* mark dead temporaries and free the associated registers */ 3753 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3754 if (IS_DEAD_ARG(i)) { 3755 temp_dead(s, arg_temp(op->args[i])); 3756 } 3757 } 3758 3759 if (def->flags & TCG_OPF_COND_BRANCH) { 3760 tcg_reg_alloc_cbranch(s, i_allocated_regs); 3761 } else if (def->flags & TCG_OPF_BB_END) { 3762 tcg_reg_alloc_bb_end(s, i_allocated_regs); 3763 } else { 3764 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3765 /* XXX: permit generic clobber register list ? */ 3766 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3767 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3768 tcg_reg_free(s, i, i_allocated_regs); 3769 } 3770 } 3771 } 3772 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3773 /* sync globals if the op has side effects and might trigger 3774 an exception. */ 3775 sync_globals(s, i_allocated_regs); 3776 } 3777 3778 /* satisfy the output constraints */ 3779 for(k = 0; k < nb_oargs; k++) { 3780 i = def->args_ct[k].sort_index; 3781 arg = op->args[i]; 3782 arg_ct = &def->args_ct[i]; 3783 ts = arg_temp(arg); 3784 3785 /* ENV should not be modified. */ 3786 tcg_debug_assert(!temp_readonly(ts)); 3787 3788 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 3789 reg = new_args[arg_ct->alias_index]; 3790 } else if (arg_ct->newreg) { 3791 reg = tcg_reg_alloc(s, arg_ct->regs, 3792 i_allocated_regs | o_allocated_regs, 3793 op->output_pref[k], ts->indirect_base); 3794 } else { 3795 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 3796 op->output_pref[k], ts->indirect_base); 3797 } 3798 tcg_regset_set_reg(o_allocated_regs, reg); 3799 if (ts->val_type == TEMP_VAL_REG) { 3800 s->reg_to_temp[ts->reg] = NULL; 3801 } 3802 ts->val_type = TEMP_VAL_REG; 3803 ts->reg = reg; 3804 /* 3805 * Temp value is modified, so the value kept in memory is 3806 * potentially not the same. 3807 */ 3808 ts->mem_coherent = 0; 3809 s->reg_to_temp[reg] = ts; 3810 new_args[i] = reg; 3811 } 3812 } 3813 3814 /* emit instruction */ 3815 if (def->flags & TCG_OPF_VECTOR) { 3816 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 3817 new_args, const_args); 3818 } else { 3819 tcg_out_op(s, op->opc, new_args, const_args); 3820 } 3821 3822 /* move the outputs in the correct register if needed */ 3823 for(i = 0; i < nb_oargs; i++) { 3824 ts = arg_temp(op->args[i]); 3825 3826 /* ENV should not be modified. */ 3827 tcg_debug_assert(!temp_readonly(ts)); 3828 3829 if (NEED_SYNC_ARG(i)) { 3830 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 3831 } else if (IS_DEAD_ARG(i)) { 3832 temp_dead(s, ts); 3833 } 3834 } 3835 } 3836 3837 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 3838 { 3839 const TCGLifeData arg_life = op->life; 3840 TCGTemp *ots, *itsl, *itsh; 3841 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3842 3843 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 3844 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 3845 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 3846 3847 ots = arg_temp(op->args[0]); 3848 itsl = arg_temp(op->args[1]); 3849 itsh = arg_temp(op->args[2]); 3850 3851 /* ENV should not be modified. */ 3852 tcg_debug_assert(!temp_readonly(ots)); 3853 3854 /* Allocate the output register now. */ 3855 if (ots->val_type != TEMP_VAL_REG) { 3856 TCGRegSet allocated_regs = s->reserved_regs; 3857 TCGRegSet dup_out_regs = 3858 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3859 3860 /* Make sure to not spill the input registers. */ 3861 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 3862 tcg_regset_set_reg(allocated_regs, itsl->reg); 3863 } 3864 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 3865 tcg_regset_set_reg(allocated_regs, itsh->reg); 3866 } 3867 3868 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3869 op->output_pref[0], ots->indirect_base); 3870 ots->val_type = TEMP_VAL_REG; 3871 ots->mem_coherent = 0; 3872 s->reg_to_temp[ots->reg] = ots; 3873 } 3874 3875 /* Promote dup2 of immediates to dupi_vec. */ 3876 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 3877 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 3878 MemOp vece = MO_64; 3879 3880 if (val == dup_const(MO_8, val)) { 3881 vece = MO_8; 3882 } else if (val == dup_const(MO_16, val)) { 3883 vece = MO_16; 3884 } else if (val == dup_const(MO_32, val)) { 3885 vece = MO_32; 3886 } 3887 3888 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 3889 goto done; 3890 } 3891 3892 /* If the two inputs form one 64-bit value, try dupm_vec. */ 3893 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { 3894 if (!itsl->mem_coherent) { 3895 temp_sync(s, itsl, s->reserved_regs, 0, 0); 3896 } 3897 if (!itsh->mem_coherent) { 3898 temp_sync(s, itsh, s->reserved_regs, 0, 0); 3899 } 3900 #if HOST_BIG_ENDIAN 3901 TCGTemp *its = itsh; 3902 #else 3903 TCGTemp *its = itsl; 3904 #endif 3905 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 3906 its->mem_base->reg, its->mem_offset)) { 3907 goto done; 3908 } 3909 } 3910 3911 /* Fall back to generic expansion. */ 3912 return false; 3913 3914 done: 3915 if (IS_DEAD_ARG(1)) { 3916 temp_dead(s, itsl); 3917 } 3918 if (IS_DEAD_ARG(2)) { 3919 temp_dead(s, itsh); 3920 } 3921 if (NEED_SYNC_ARG(0)) { 3922 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 3923 } else if (IS_DEAD_ARG(0)) { 3924 temp_dead(s, ots); 3925 } 3926 return true; 3927 } 3928 3929 #ifdef TCG_TARGET_STACK_GROWSUP 3930 #define STACK_DIR(x) (-(x)) 3931 #else 3932 #define STACK_DIR(x) (x) 3933 #endif 3934 3935 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 3936 { 3937 const int nb_oargs = TCGOP_CALLO(op); 3938 const int nb_iargs = TCGOP_CALLI(op); 3939 const TCGLifeData arg_life = op->life; 3940 const TCGHelperInfo *info; 3941 int flags, nb_regs, i; 3942 TCGReg reg; 3943 TCGArg arg; 3944 TCGTemp *ts; 3945 intptr_t stack_offset; 3946 size_t call_stack_size; 3947 tcg_insn_unit *func_addr; 3948 int allocate_args; 3949 TCGRegSet allocated_regs; 3950 3951 func_addr = tcg_call_func(op); 3952 info = tcg_call_info(op); 3953 flags = info->flags; 3954 3955 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 3956 if (nb_regs > nb_iargs) { 3957 nb_regs = nb_iargs; 3958 } 3959 3960 /* assign stack slots first */ 3961 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 3962 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 3963 ~(TCG_TARGET_STACK_ALIGN - 1); 3964 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 3965 if (allocate_args) { 3966 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 3967 preallocate call stack */ 3968 tcg_abort(); 3969 } 3970 3971 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 3972 for (i = nb_regs; i < nb_iargs; i++) { 3973 arg = op->args[nb_oargs + i]; 3974 #ifdef TCG_TARGET_STACK_GROWSUP 3975 stack_offset -= sizeof(tcg_target_long); 3976 #endif 3977 if (arg != TCG_CALL_DUMMY_ARG) { 3978 ts = arg_temp(arg); 3979 temp_load(s, ts, tcg_target_available_regs[ts->type], 3980 s->reserved_regs, 0); 3981 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 3982 } 3983 #ifndef TCG_TARGET_STACK_GROWSUP 3984 stack_offset += sizeof(tcg_target_long); 3985 #endif 3986 } 3987 3988 /* assign input registers */ 3989 allocated_regs = s->reserved_regs; 3990 for (i = 0; i < nb_regs; i++) { 3991 arg = op->args[nb_oargs + i]; 3992 if (arg != TCG_CALL_DUMMY_ARG) { 3993 ts = arg_temp(arg); 3994 reg = tcg_target_call_iarg_regs[i]; 3995 3996 if (ts->val_type == TEMP_VAL_REG) { 3997 if (ts->reg != reg) { 3998 tcg_reg_free(s, reg, allocated_regs); 3999 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4000 /* 4001 * Cross register class move not supported. Sync the 4002 * temp back to its slot and load from there. 4003 */ 4004 temp_sync(s, ts, allocated_regs, 0, 0); 4005 tcg_out_ld(s, ts->type, reg, 4006 ts->mem_base->reg, ts->mem_offset); 4007 } 4008 } 4009 } else { 4010 TCGRegSet arg_set = 0; 4011 4012 tcg_reg_free(s, reg, allocated_regs); 4013 tcg_regset_set_reg(arg_set, reg); 4014 temp_load(s, ts, arg_set, allocated_regs, 0); 4015 } 4016 4017 tcg_regset_set_reg(allocated_regs, reg); 4018 } 4019 } 4020 4021 /* mark dead temporaries and free the associated registers */ 4022 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4023 if (IS_DEAD_ARG(i)) { 4024 temp_dead(s, arg_temp(op->args[i])); 4025 } 4026 } 4027 4028 /* clobber call registers */ 4029 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4030 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4031 tcg_reg_free(s, i, allocated_regs); 4032 } 4033 } 4034 4035 /* Save globals if they might be written by the helper, sync them if 4036 they might be read. */ 4037 if (flags & TCG_CALL_NO_READ_GLOBALS) { 4038 /* Nothing to do */ 4039 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 4040 sync_globals(s, allocated_regs); 4041 } else { 4042 save_globals(s, allocated_regs); 4043 } 4044 4045 #ifdef CONFIG_TCG_INTERPRETER 4046 { 4047 gpointer hash = (gpointer)(uintptr_t)info->typemask; 4048 ffi_cif *cif = g_hash_table_lookup(ffi_table, hash); 4049 assert(cif != NULL); 4050 tcg_out_call(s, func_addr, cif); 4051 } 4052 #else 4053 tcg_out_call(s, func_addr); 4054 #endif 4055 4056 /* assign output registers and emit moves if needed */ 4057 for(i = 0; i < nb_oargs; i++) { 4058 arg = op->args[i]; 4059 ts = arg_temp(arg); 4060 4061 /* ENV should not be modified. */ 4062 tcg_debug_assert(!temp_readonly(ts)); 4063 4064 reg = tcg_target_call_oarg_regs[i]; 4065 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4066 if (ts->val_type == TEMP_VAL_REG) { 4067 s->reg_to_temp[ts->reg] = NULL; 4068 } 4069 ts->val_type = TEMP_VAL_REG; 4070 ts->reg = reg; 4071 ts->mem_coherent = 0; 4072 s->reg_to_temp[reg] = ts; 4073 if (NEED_SYNC_ARG(i)) { 4074 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); 4075 } else if (IS_DEAD_ARG(i)) { 4076 temp_dead(s, ts); 4077 } 4078 } 4079 } 4080 4081 #ifdef CONFIG_PROFILER 4082 4083 /* avoid copy/paste errors */ 4084 #define PROF_ADD(to, from, field) \ 4085 do { \ 4086 (to)->field += qatomic_read(&((from)->field)); \ 4087 } while (0) 4088 4089 #define PROF_MAX(to, from, field) \ 4090 do { \ 4091 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4092 if (val__ > (to)->field) { \ 4093 (to)->field = val__; \ 4094 } \ 4095 } while (0) 4096 4097 /* Pass in a zero'ed @prof */ 4098 static inline 4099 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4100 { 4101 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4102 unsigned int i; 4103 4104 for (i = 0; i < n_ctxs; i++) { 4105 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4106 const TCGProfile *orig = &s->prof; 4107 4108 if (counters) { 4109 PROF_ADD(prof, orig, cpu_exec_time); 4110 PROF_ADD(prof, orig, tb_count1); 4111 PROF_ADD(prof, orig, tb_count); 4112 PROF_ADD(prof, orig, op_count); 4113 PROF_MAX(prof, orig, op_count_max); 4114 PROF_ADD(prof, orig, temp_count); 4115 PROF_MAX(prof, orig, temp_count_max); 4116 PROF_ADD(prof, orig, del_op_count); 4117 PROF_ADD(prof, orig, code_in_len); 4118 PROF_ADD(prof, orig, code_out_len); 4119 PROF_ADD(prof, orig, search_out_len); 4120 PROF_ADD(prof, orig, interm_time); 4121 PROF_ADD(prof, orig, code_time); 4122 PROF_ADD(prof, orig, la_time); 4123 PROF_ADD(prof, orig, opt_time); 4124 PROF_ADD(prof, orig, restore_count); 4125 PROF_ADD(prof, orig, restore_time); 4126 } 4127 if (table) { 4128 int i; 4129 4130 for (i = 0; i < NB_OPS; i++) { 4131 PROF_ADD(prof, orig, table_op_count[i]); 4132 } 4133 } 4134 } 4135 } 4136 4137 #undef PROF_ADD 4138 #undef PROF_MAX 4139 4140 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4141 { 4142 tcg_profile_snapshot(prof, true, false); 4143 } 4144 4145 static void tcg_profile_snapshot_table(TCGProfile *prof) 4146 { 4147 tcg_profile_snapshot(prof, false, true); 4148 } 4149 4150 void tcg_dump_op_count(GString *buf) 4151 { 4152 TCGProfile prof = {}; 4153 int i; 4154 4155 tcg_profile_snapshot_table(&prof); 4156 for (i = 0; i < NB_OPS; i++) { 4157 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 4158 prof.table_op_count[i]); 4159 } 4160 } 4161 4162 int64_t tcg_cpu_exec_time(void) 4163 { 4164 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4165 unsigned int i; 4166 int64_t ret = 0; 4167 4168 for (i = 0; i < n_ctxs; i++) { 4169 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4170 const TCGProfile *prof = &s->prof; 4171 4172 ret += qatomic_read(&prof->cpu_exec_time); 4173 } 4174 return ret; 4175 } 4176 #else 4177 void tcg_dump_op_count(GString *buf) 4178 { 4179 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4180 } 4181 4182 int64_t tcg_cpu_exec_time(void) 4183 { 4184 error_report("%s: TCG profiler not compiled", __func__); 4185 exit(EXIT_FAILURE); 4186 } 4187 #endif 4188 4189 4190 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 4191 { 4192 #ifdef CONFIG_PROFILER 4193 TCGProfile *prof = &s->prof; 4194 #endif 4195 int i, num_insns; 4196 TCGOp *op; 4197 4198 #ifdef CONFIG_PROFILER 4199 { 4200 int n = 0; 4201 4202 QTAILQ_FOREACH(op, &s->ops, link) { 4203 n++; 4204 } 4205 qatomic_set(&prof->op_count, prof->op_count + n); 4206 if (n > prof->op_count_max) { 4207 qatomic_set(&prof->op_count_max, n); 4208 } 4209 4210 n = s->nb_temps; 4211 qatomic_set(&prof->temp_count, prof->temp_count + n); 4212 if (n > prof->temp_count_max) { 4213 qatomic_set(&prof->temp_count_max, n); 4214 } 4215 } 4216 #endif 4217 4218 #ifdef DEBUG_DISAS 4219 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4220 && qemu_log_in_addr_range(tb->pc))) { 4221 FILE *logfile = qemu_log_trylock(); 4222 if (logfile) { 4223 fprintf(logfile, "OP:\n"); 4224 tcg_dump_ops(s, logfile, false); 4225 fprintf(logfile, "\n"); 4226 qemu_log_unlock(logfile); 4227 } 4228 } 4229 #endif 4230 4231 #ifdef CONFIG_DEBUG_TCG 4232 /* Ensure all labels referenced have been emitted. */ 4233 { 4234 TCGLabel *l; 4235 bool error = false; 4236 4237 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4238 if (unlikely(!l->present) && l->refs) { 4239 qemu_log_mask(CPU_LOG_TB_OP, 4240 "$L%d referenced but not present.\n", l->id); 4241 error = true; 4242 } 4243 } 4244 assert(!error); 4245 } 4246 #endif 4247 4248 #ifdef CONFIG_PROFILER 4249 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4250 #endif 4251 4252 #ifdef USE_TCG_OPTIMIZATIONS 4253 tcg_optimize(s); 4254 #endif 4255 4256 #ifdef CONFIG_PROFILER 4257 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4258 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4259 #endif 4260 4261 reachable_code_pass(s); 4262 liveness_pass_1(s); 4263 4264 if (s->nb_indirects > 0) { 4265 #ifdef DEBUG_DISAS 4266 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4267 && qemu_log_in_addr_range(tb->pc))) { 4268 FILE *logfile = qemu_log_trylock(); 4269 if (logfile) { 4270 fprintf(logfile, "OP before indirect lowering:\n"); 4271 tcg_dump_ops(s, logfile, false); 4272 fprintf(logfile, "\n"); 4273 qemu_log_unlock(logfile); 4274 } 4275 } 4276 #endif 4277 /* Replace indirect temps with direct temps. */ 4278 if (liveness_pass_2(s)) { 4279 /* If changes were made, re-run liveness. */ 4280 liveness_pass_1(s); 4281 } 4282 } 4283 4284 #ifdef CONFIG_PROFILER 4285 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4286 #endif 4287 4288 #ifdef DEBUG_DISAS 4289 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4290 && qemu_log_in_addr_range(tb->pc))) { 4291 FILE *logfile = qemu_log_trylock(); 4292 if (logfile) { 4293 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 4294 tcg_dump_ops(s, logfile, true); 4295 fprintf(logfile, "\n"); 4296 qemu_log_unlock(logfile); 4297 } 4298 } 4299 #endif 4300 4301 tcg_reg_alloc_start(s); 4302 4303 /* 4304 * Reset the buffer pointers when restarting after overflow. 4305 * TODO: Move this into translate-all.c with the rest of the 4306 * buffer management. Having only this done here is confusing. 4307 */ 4308 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 4309 s->code_ptr = s->code_buf; 4310 4311 #ifdef TCG_TARGET_NEED_LDST_LABELS 4312 QSIMPLEQ_INIT(&s->ldst_labels); 4313 #endif 4314 #ifdef TCG_TARGET_NEED_POOL_LABELS 4315 s->pool_labels = NULL; 4316 #endif 4317 4318 num_insns = -1; 4319 QTAILQ_FOREACH(op, &s->ops, link) { 4320 TCGOpcode opc = op->opc; 4321 4322 #ifdef CONFIG_PROFILER 4323 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 4324 #endif 4325 4326 switch (opc) { 4327 case INDEX_op_mov_i32: 4328 case INDEX_op_mov_i64: 4329 case INDEX_op_mov_vec: 4330 tcg_reg_alloc_mov(s, op); 4331 break; 4332 case INDEX_op_dup_vec: 4333 tcg_reg_alloc_dup(s, op); 4334 break; 4335 case INDEX_op_insn_start: 4336 if (num_insns >= 0) { 4337 size_t off = tcg_current_code_size(s); 4338 s->gen_insn_end_off[num_insns] = off; 4339 /* Assert that we do not overflow our stored offset. */ 4340 assert(s->gen_insn_end_off[num_insns] == off); 4341 } 4342 num_insns++; 4343 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 4344 target_ulong a; 4345 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 4346 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 4347 #else 4348 a = op->args[i]; 4349 #endif 4350 s->gen_insn_data[num_insns][i] = a; 4351 } 4352 break; 4353 case INDEX_op_discard: 4354 temp_dead(s, arg_temp(op->args[0])); 4355 break; 4356 case INDEX_op_set_label: 4357 tcg_reg_alloc_bb_end(s, s->reserved_regs); 4358 tcg_out_label(s, arg_label(op->args[0])); 4359 break; 4360 case INDEX_op_call: 4361 tcg_reg_alloc_call(s, op); 4362 break; 4363 case INDEX_op_dup2_vec: 4364 if (tcg_reg_alloc_dup2(s, op)) { 4365 break; 4366 } 4367 /* fall through */ 4368 default: 4369 /* Sanity check that we've not introduced any unhandled opcodes. */ 4370 tcg_debug_assert(tcg_op_supported(opc)); 4371 /* Note: in order to speed up the code, it would be much 4372 faster to have specialized register allocator functions for 4373 some common argument patterns */ 4374 tcg_reg_alloc_op(s, op); 4375 break; 4376 } 4377 #ifdef CONFIG_DEBUG_TCG 4378 check_regs(s); 4379 #endif 4380 /* Test for (pending) buffer overflow. The assumption is that any 4381 one operation beginning below the high water mark cannot overrun 4382 the buffer completely. Thus we can test for overflow after 4383 generating code without having to check during generation. */ 4384 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 4385 return -1; 4386 } 4387 /* Test for TB overflow, as seen by gen_insn_end_off. */ 4388 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 4389 return -2; 4390 } 4391 } 4392 tcg_debug_assert(num_insns >= 0); 4393 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 4394 4395 /* Generate TB finalization at the end of block */ 4396 #ifdef TCG_TARGET_NEED_LDST_LABELS 4397 i = tcg_out_ldst_finalize(s); 4398 if (i < 0) { 4399 return i; 4400 } 4401 #endif 4402 #ifdef TCG_TARGET_NEED_POOL_LABELS 4403 i = tcg_out_pool_finalize(s); 4404 if (i < 0) { 4405 return i; 4406 } 4407 #endif 4408 if (!tcg_resolve_relocs(s)) { 4409 return -2; 4410 } 4411 4412 #ifndef CONFIG_TCG_INTERPRETER 4413 /* flush instruction cache */ 4414 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 4415 (uintptr_t)s->code_buf, 4416 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 4417 #endif 4418 4419 return tcg_current_code_size(s); 4420 } 4421 4422 #ifdef CONFIG_PROFILER 4423 void tcg_dump_info(GString *buf) 4424 { 4425 TCGProfile prof = {}; 4426 const TCGProfile *s; 4427 int64_t tb_count; 4428 int64_t tb_div_count; 4429 int64_t tot; 4430 4431 tcg_profile_snapshot_counters(&prof); 4432 s = &prof; 4433 tb_count = s->tb_count; 4434 tb_div_count = tb_count ? tb_count : 1; 4435 tot = s->interm_time + s->code_time; 4436 4437 g_string_append_printf(buf, "JIT cycles %" PRId64 4438 " (%0.3f s at 2.4 GHz)\n", 4439 tot, tot / 2.4e9); 4440 g_string_append_printf(buf, "translated TBs %" PRId64 4441 " (aborted=%" PRId64 " %0.1f%%)\n", 4442 tb_count, s->tb_count1 - tb_count, 4443 (double)(s->tb_count1 - s->tb_count) 4444 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 4445 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 4446 (double)s->op_count / tb_div_count, s->op_count_max); 4447 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 4448 (double)s->del_op_count / tb_div_count); 4449 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 4450 (double)s->temp_count / tb_div_count, 4451 s->temp_count_max); 4452 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 4453 (double)s->code_out_len / tb_div_count); 4454 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 4455 (double)s->search_out_len / tb_div_count); 4456 4457 g_string_append_printf(buf, "cycles/op %0.1f\n", 4458 s->op_count ? (double)tot / s->op_count : 0); 4459 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 4460 s->code_in_len ? (double)tot / s->code_in_len : 0); 4461 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 4462 s->code_out_len ? (double)tot / s->code_out_len : 0); 4463 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 4464 s->search_out_len ? 4465 (double)tot / s->search_out_len : 0); 4466 if (tot == 0) { 4467 tot = 1; 4468 } 4469 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 4470 (double)s->interm_time / tot * 100.0); 4471 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 4472 (double)s->code_time / tot * 100.0); 4473 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 4474 (double)s->opt_time / (s->code_time ? 4475 s->code_time : 1) 4476 * 100.0); 4477 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 4478 (double)s->la_time / (s->code_time ? 4479 s->code_time : 1) * 100.0); 4480 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 4481 s->restore_count); 4482 g_string_append_printf(buf, " avg cycles %0.1f\n", 4483 s->restore_count ? 4484 (double)s->restore_time / s->restore_count : 0); 4485 } 4486 #else 4487 void tcg_dump_info(GString *buf) 4488 { 4489 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4490 } 4491 #endif 4492 4493 #ifdef ELF_HOST_MACHINE 4494 /* In order to use this feature, the backend needs to do three things: 4495 4496 (1) Define ELF_HOST_MACHINE to indicate both what value to 4497 put into the ELF image and to indicate support for the feature. 4498 4499 (2) Define tcg_register_jit. This should create a buffer containing 4500 the contents of a .debug_frame section that describes the post- 4501 prologue unwind info for the tcg machine. 4502 4503 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 4504 */ 4505 4506 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 4507 typedef enum { 4508 JIT_NOACTION = 0, 4509 JIT_REGISTER_FN, 4510 JIT_UNREGISTER_FN 4511 } jit_actions_t; 4512 4513 struct jit_code_entry { 4514 struct jit_code_entry *next_entry; 4515 struct jit_code_entry *prev_entry; 4516 const void *symfile_addr; 4517 uint64_t symfile_size; 4518 }; 4519 4520 struct jit_descriptor { 4521 uint32_t version; 4522 uint32_t action_flag; 4523 struct jit_code_entry *relevant_entry; 4524 struct jit_code_entry *first_entry; 4525 }; 4526 4527 void __jit_debug_register_code(void) __attribute__((noinline)); 4528 void __jit_debug_register_code(void) 4529 { 4530 asm(""); 4531 } 4532 4533 /* Must statically initialize the version, because GDB may check 4534 the version before we can set it. */ 4535 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 4536 4537 /* End GDB interface. */ 4538 4539 static int find_string(const char *strtab, const char *str) 4540 { 4541 const char *p = strtab + 1; 4542 4543 while (1) { 4544 if (strcmp(p, str) == 0) { 4545 return p - strtab; 4546 } 4547 p += strlen(p) + 1; 4548 } 4549 } 4550 4551 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 4552 const void *debug_frame, 4553 size_t debug_frame_size) 4554 { 4555 struct __attribute__((packed)) DebugInfo { 4556 uint32_t len; 4557 uint16_t version; 4558 uint32_t abbrev; 4559 uint8_t ptr_size; 4560 uint8_t cu_die; 4561 uint16_t cu_lang; 4562 uintptr_t cu_low_pc; 4563 uintptr_t cu_high_pc; 4564 uint8_t fn_die; 4565 char fn_name[16]; 4566 uintptr_t fn_low_pc; 4567 uintptr_t fn_high_pc; 4568 uint8_t cu_eoc; 4569 }; 4570 4571 struct ElfImage { 4572 ElfW(Ehdr) ehdr; 4573 ElfW(Phdr) phdr; 4574 ElfW(Shdr) shdr[7]; 4575 ElfW(Sym) sym[2]; 4576 struct DebugInfo di; 4577 uint8_t da[24]; 4578 char str[80]; 4579 }; 4580 4581 struct ElfImage *img; 4582 4583 static const struct ElfImage img_template = { 4584 .ehdr = { 4585 .e_ident[EI_MAG0] = ELFMAG0, 4586 .e_ident[EI_MAG1] = ELFMAG1, 4587 .e_ident[EI_MAG2] = ELFMAG2, 4588 .e_ident[EI_MAG3] = ELFMAG3, 4589 .e_ident[EI_CLASS] = ELF_CLASS, 4590 .e_ident[EI_DATA] = ELF_DATA, 4591 .e_ident[EI_VERSION] = EV_CURRENT, 4592 .e_type = ET_EXEC, 4593 .e_machine = ELF_HOST_MACHINE, 4594 .e_version = EV_CURRENT, 4595 .e_phoff = offsetof(struct ElfImage, phdr), 4596 .e_shoff = offsetof(struct ElfImage, shdr), 4597 .e_ehsize = sizeof(ElfW(Shdr)), 4598 .e_phentsize = sizeof(ElfW(Phdr)), 4599 .e_phnum = 1, 4600 .e_shentsize = sizeof(ElfW(Shdr)), 4601 .e_shnum = ARRAY_SIZE(img->shdr), 4602 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 4603 #ifdef ELF_HOST_FLAGS 4604 .e_flags = ELF_HOST_FLAGS, 4605 #endif 4606 #ifdef ELF_OSABI 4607 .e_ident[EI_OSABI] = ELF_OSABI, 4608 #endif 4609 }, 4610 .phdr = { 4611 .p_type = PT_LOAD, 4612 .p_flags = PF_X, 4613 }, 4614 .shdr = { 4615 [0] = { .sh_type = SHT_NULL }, 4616 /* Trick: The contents of code_gen_buffer are not present in 4617 this fake ELF file; that got allocated elsewhere. Therefore 4618 we mark .text as SHT_NOBITS (similar to .bss) so that readers 4619 will not look for contents. We can record any address. */ 4620 [1] = { /* .text */ 4621 .sh_type = SHT_NOBITS, 4622 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 4623 }, 4624 [2] = { /* .debug_info */ 4625 .sh_type = SHT_PROGBITS, 4626 .sh_offset = offsetof(struct ElfImage, di), 4627 .sh_size = sizeof(struct DebugInfo), 4628 }, 4629 [3] = { /* .debug_abbrev */ 4630 .sh_type = SHT_PROGBITS, 4631 .sh_offset = offsetof(struct ElfImage, da), 4632 .sh_size = sizeof(img->da), 4633 }, 4634 [4] = { /* .debug_frame */ 4635 .sh_type = SHT_PROGBITS, 4636 .sh_offset = sizeof(struct ElfImage), 4637 }, 4638 [5] = { /* .symtab */ 4639 .sh_type = SHT_SYMTAB, 4640 .sh_offset = offsetof(struct ElfImage, sym), 4641 .sh_size = sizeof(img->sym), 4642 .sh_info = 1, 4643 .sh_link = ARRAY_SIZE(img->shdr) - 1, 4644 .sh_entsize = sizeof(ElfW(Sym)), 4645 }, 4646 [6] = { /* .strtab */ 4647 .sh_type = SHT_STRTAB, 4648 .sh_offset = offsetof(struct ElfImage, str), 4649 .sh_size = sizeof(img->str), 4650 } 4651 }, 4652 .sym = { 4653 [1] = { /* code_gen_buffer */ 4654 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 4655 .st_shndx = 1, 4656 } 4657 }, 4658 .di = { 4659 .len = sizeof(struct DebugInfo) - 4, 4660 .version = 2, 4661 .ptr_size = sizeof(void *), 4662 .cu_die = 1, 4663 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 4664 .fn_die = 2, 4665 .fn_name = "code_gen_buffer" 4666 }, 4667 .da = { 4668 1, /* abbrev number (the cu) */ 4669 0x11, 1, /* DW_TAG_compile_unit, has children */ 4670 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 4671 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4672 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4673 0, 0, /* end of abbrev */ 4674 2, /* abbrev number (the fn) */ 4675 0x2e, 0, /* DW_TAG_subprogram, no children */ 4676 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 4677 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4678 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4679 0, 0, /* end of abbrev */ 4680 0 /* no more abbrev */ 4681 }, 4682 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 4683 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 4684 }; 4685 4686 /* We only need a single jit entry; statically allocate it. */ 4687 static struct jit_code_entry one_entry; 4688 4689 uintptr_t buf = (uintptr_t)buf_ptr; 4690 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 4691 DebugFrameHeader *dfh; 4692 4693 img = g_malloc(img_size); 4694 *img = img_template; 4695 4696 img->phdr.p_vaddr = buf; 4697 img->phdr.p_paddr = buf; 4698 img->phdr.p_memsz = buf_size; 4699 4700 img->shdr[1].sh_name = find_string(img->str, ".text"); 4701 img->shdr[1].sh_addr = buf; 4702 img->shdr[1].sh_size = buf_size; 4703 4704 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 4705 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 4706 4707 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 4708 img->shdr[4].sh_size = debug_frame_size; 4709 4710 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 4711 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 4712 4713 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 4714 img->sym[1].st_value = buf; 4715 img->sym[1].st_size = buf_size; 4716 4717 img->di.cu_low_pc = buf; 4718 img->di.cu_high_pc = buf + buf_size; 4719 img->di.fn_low_pc = buf; 4720 img->di.fn_high_pc = buf + buf_size; 4721 4722 dfh = (DebugFrameHeader *)(img + 1); 4723 memcpy(dfh, debug_frame, debug_frame_size); 4724 dfh->fde.func_start = buf; 4725 dfh->fde.func_len = buf_size; 4726 4727 #ifdef DEBUG_JIT 4728 /* Enable this block to be able to debug the ELF image file creation. 4729 One can use readelf, objdump, or other inspection utilities. */ 4730 { 4731 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 4732 if (f) { 4733 if (fwrite(img, img_size, 1, f) != img_size) { 4734 /* Avoid stupid unused return value warning for fwrite. */ 4735 } 4736 fclose(f); 4737 } 4738 } 4739 #endif 4740 4741 one_entry.symfile_addr = img; 4742 one_entry.symfile_size = img_size; 4743 4744 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 4745 __jit_debug_descriptor.relevant_entry = &one_entry; 4746 __jit_debug_descriptor.first_entry = &one_entry; 4747 __jit_debug_register_code(); 4748 } 4749 #else 4750 /* No support for the feature. Provide the entry point expected by exec.c, 4751 and implement the internal function we declared earlier. */ 4752 4753 static void tcg_register_jit_int(const void *buf, size_t size, 4754 const void *debug_frame, 4755 size_t debug_frame_size) 4756 { 4757 } 4758 4759 void tcg_register_jit(const void *buf, size_t buf_size) 4760 { 4761 } 4762 #endif /* ELF_HOST_MACHINE */ 4763 4764 #if !TCG_TARGET_MAYBE_vec 4765 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 4766 { 4767 g_assert_not_reached(); 4768 } 4769 #endif 4770