1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/timer.h" 38 #include "qemu/cacheflush.h" 39 #include "qemu/cacheinfo.h" 40 41 /* Note: the long term plan is to reduce the dependencies on the QEMU 42 CPU definitions. Currently they are used for qemu_ld/st 43 instructions */ 44 #define NO_CPU_IO_DEFS 45 46 #include "exec/exec-all.h" 47 #include "tcg/tcg-op.h" 48 49 #if UINTPTR_MAX == UINT32_MAX 50 # define ELF_CLASS ELFCLASS32 51 #else 52 # define ELF_CLASS ELFCLASS64 53 #endif 54 #if HOST_BIG_ENDIAN 55 # define ELF_DATA ELFDATA2MSB 56 #else 57 # define ELF_DATA ELFDATA2LSB 58 #endif 59 60 #include "elf.h" 61 #include "exec/log.h" 62 #include "tcg/tcg-ldst.h" 63 #include "tcg-internal.h" 64 65 #ifdef CONFIG_TCG_INTERPRETER 66 #include <ffi.h> 67 #endif 68 69 /* Forward declarations for functions declared in tcg-target.c.inc and 70 used here. */ 71 static void tcg_target_init(TCGContext *s); 72 static void tcg_target_qemu_prologue(TCGContext *s); 73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 74 intptr_t value, intptr_t addend); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 static void tcg_register_jit_int(const void *buf, size_t size, 100 const void *debug_frame, 101 size_t debug_frame_size) 102 __attribute__((unused)); 103 104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 106 intptr_t arg2); 107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 108 static void tcg_out_movi(TCGContext *s, TCGType type, 109 TCGReg ret, tcg_target_long arg); 110 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 111 const TCGArg args[TCG_MAX_OP_ARGS], 112 const int const_args[TCG_MAX_OP_ARGS]); 113 #if TCG_TARGET_MAYBE_vec 114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 115 TCGReg dst, TCGReg src); 116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 117 TCGReg dst, TCGReg base, intptr_t offset); 118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 119 TCGReg dst, int64_t arg); 120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 121 unsigned vecl, unsigned vece, 122 const TCGArg args[TCG_MAX_OP_ARGS], 123 const int const_args[TCG_MAX_OP_ARGS]); 124 #else 125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 126 TCGReg dst, TCGReg src) 127 { 128 g_assert_not_reached(); 129 } 130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 131 TCGReg dst, TCGReg base, intptr_t offset) 132 { 133 g_assert_not_reached(); 134 } 135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 136 TCGReg dst, int64_t arg) 137 { 138 g_assert_not_reached(); 139 } 140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 141 unsigned vecl, unsigned vece, 142 const TCGArg args[TCG_MAX_OP_ARGS], 143 const int const_args[TCG_MAX_OP_ARGS]) 144 { 145 g_assert_not_reached(); 146 } 147 #endif 148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 149 intptr_t arg2); 150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 151 TCGReg base, intptr_t ofs); 152 #ifdef CONFIG_TCG_INTERPRETER 153 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 154 ffi_cif *cif); 155 #else 156 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); 157 #endif 158 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 159 #ifdef TCG_TARGET_NEED_LDST_LABELS 160 static int tcg_out_ldst_finalize(TCGContext *s); 161 #endif 162 163 TCGContext tcg_init_ctx; 164 __thread TCGContext *tcg_ctx; 165 166 TCGContext **tcg_ctxs; 167 unsigned int tcg_cur_ctxs; 168 unsigned int tcg_max_ctxs; 169 TCGv_env cpu_env = 0; 170 const void *tcg_code_gen_epilogue; 171 uintptr_t tcg_splitwx_diff; 172 173 #ifndef CONFIG_TCG_INTERPRETER 174 tcg_prologue_fn *tcg_qemu_tb_exec; 175 #endif 176 177 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 178 static TCGRegSet tcg_target_call_clobber_regs; 179 180 #if TCG_TARGET_INSN_UNIT_SIZE == 1 181 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 182 { 183 *s->code_ptr++ = v; 184 } 185 186 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 187 uint8_t v) 188 { 189 *p = v; 190 } 191 #endif 192 193 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 194 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 195 { 196 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 197 *s->code_ptr++ = v; 198 } else { 199 tcg_insn_unit *p = s->code_ptr; 200 memcpy(p, &v, sizeof(v)); 201 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 202 } 203 } 204 205 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 206 uint16_t v) 207 { 208 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 209 *p = v; 210 } else { 211 memcpy(p, &v, sizeof(v)); 212 } 213 } 214 #endif 215 216 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 217 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 218 { 219 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 220 *s->code_ptr++ = v; 221 } else { 222 tcg_insn_unit *p = s->code_ptr; 223 memcpy(p, &v, sizeof(v)); 224 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 225 } 226 } 227 228 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 229 uint32_t v) 230 { 231 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 232 *p = v; 233 } else { 234 memcpy(p, &v, sizeof(v)); 235 } 236 } 237 #endif 238 239 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 240 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 241 { 242 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 243 *s->code_ptr++ = v; 244 } else { 245 tcg_insn_unit *p = s->code_ptr; 246 memcpy(p, &v, sizeof(v)); 247 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 248 } 249 } 250 251 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 252 uint64_t v) 253 { 254 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 255 *p = v; 256 } else { 257 memcpy(p, &v, sizeof(v)); 258 } 259 } 260 #endif 261 262 /* label relocation processing */ 263 264 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 265 TCGLabel *l, intptr_t addend) 266 { 267 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 268 269 r->type = type; 270 r->ptr = code_ptr; 271 r->addend = addend; 272 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 273 } 274 275 static void tcg_out_label(TCGContext *s, TCGLabel *l) 276 { 277 tcg_debug_assert(!l->has_value); 278 l->has_value = 1; 279 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 280 } 281 282 TCGLabel *gen_new_label(void) 283 { 284 TCGContext *s = tcg_ctx; 285 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 286 287 memset(l, 0, sizeof(TCGLabel)); 288 l->id = s->nb_labels++; 289 QSIMPLEQ_INIT(&l->relocs); 290 291 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 292 293 return l; 294 } 295 296 static bool tcg_resolve_relocs(TCGContext *s) 297 { 298 TCGLabel *l; 299 300 QSIMPLEQ_FOREACH(l, &s->labels, next) { 301 TCGRelocation *r; 302 uintptr_t value = l->u.value; 303 304 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 305 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 306 return false; 307 } 308 } 309 } 310 return true; 311 } 312 313 static void set_jmp_reset_offset(TCGContext *s, int which) 314 { 315 /* 316 * We will check for overflow at the end of the opcode loop in 317 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 318 */ 319 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); 320 } 321 322 /* Signal overflow, starting over with fewer guest insns. */ 323 static G_NORETURN 324 void tcg_raise_tb_overflow(TCGContext *s) 325 { 326 siglongjmp(s->jmp_trans, -2); 327 } 328 329 #define C_PFX1(P, A) P##A 330 #define C_PFX2(P, A, B) P##A##_##B 331 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 332 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 333 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 334 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 335 336 /* Define an enumeration for the various combinations. */ 337 338 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 339 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 340 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 341 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 342 343 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 344 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 345 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 346 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 347 348 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 349 350 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 351 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 352 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 353 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 354 355 typedef enum { 356 #include "tcg-target-con-set.h" 357 } TCGConstraintSetIndex; 358 359 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 360 361 #undef C_O0_I1 362 #undef C_O0_I2 363 #undef C_O0_I3 364 #undef C_O0_I4 365 #undef C_O1_I1 366 #undef C_O1_I2 367 #undef C_O1_I3 368 #undef C_O1_I4 369 #undef C_N1_I2 370 #undef C_O2_I1 371 #undef C_O2_I2 372 #undef C_O2_I3 373 #undef C_O2_I4 374 375 /* Put all of the constraint sets into an array, indexed by the enum. */ 376 377 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 378 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 379 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 380 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 381 382 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 383 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 384 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 385 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 386 387 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 388 389 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 390 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 391 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 392 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 393 394 static const TCGTargetOpDef constraint_sets[] = { 395 #include "tcg-target-con-set.h" 396 }; 397 398 399 #undef C_O0_I1 400 #undef C_O0_I2 401 #undef C_O0_I3 402 #undef C_O0_I4 403 #undef C_O1_I1 404 #undef C_O1_I2 405 #undef C_O1_I3 406 #undef C_O1_I4 407 #undef C_N1_I2 408 #undef C_O2_I1 409 #undef C_O2_I2 410 #undef C_O2_I3 411 #undef C_O2_I4 412 413 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 414 415 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 416 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 417 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 418 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 419 420 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 421 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 422 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 423 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 424 425 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 426 427 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 428 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 429 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 430 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 431 432 #include "tcg-target.c.inc" 433 434 static void alloc_tcg_plugin_context(TCGContext *s) 435 { 436 #ifdef CONFIG_PLUGIN 437 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 438 s->plugin_tb->insns = 439 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 440 #endif 441 } 442 443 /* 444 * All TCG threads except the parent (i.e. the one that called tcg_context_init 445 * and registered the target's TCG globals) must register with this function 446 * before initiating translation. 447 * 448 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 449 * of tcg_region_init() for the reasoning behind this. 450 * 451 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 452 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 453 * is not used anymore for translation once this function is called. 454 * 455 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 456 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 457 */ 458 #ifdef CONFIG_USER_ONLY 459 void tcg_register_thread(void) 460 { 461 tcg_ctx = &tcg_init_ctx; 462 } 463 #else 464 void tcg_register_thread(void) 465 { 466 TCGContext *s = g_malloc(sizeof(*s)); 467 unsigned int i, n; 468 469 *s = tcg_init_ctx; 470 471 /* Relink mem_base. */ 472 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 473 if (tcg_init_ctx.temps[i].mem_base) { 474 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 475 tcg_debug_assert(b >= 0 && b < n); 476 s->temps[i].mem_base = &s->temps[b]; 477 } 478 } 479 480 /* Claim an entry in tcg_ctxs */ 481 n = qatomic_fetch_inc(&tcg_cur_ctxs); 482 g_assert(n < tcg_max_ctxs); 483 qatomic_set(&tcg_ctxs[n], s); 484 485 if (n > 0) { 486 alloc_tcg_plugin_context(s); 487 tcg_region_initial_alloc(s); 488 } 489 490 tcg_ctx = s; 491 } 492 #endif /* !CONFIG_USER_ONLY */ 493 494 /* pool based memory allocation */ 495 void *tcg_malloc_internal(TCGContext *s, int size) 496 { 497 TCGPool *p; 498 int pool_size; 499 500 if (size > TCG_POOL_CHUNK_SIZE) { 501 /* big malloc: insert a new pool (XXX: could optimize) */ 502 p = g_malloc(sizeof(TCGPool) + size); 503 p->size = size; 504 p->next = s->pool_first_large; 505 s->pool_first_large = p; 506 return p->data; 507 } else { 508 p = s->pool_current; 509 if (!p) { 510 p = s->pool_first; 511 if (!p) 512 goto new_pool; 513 } else { 514 if (!p->next) { 515 new_pool: 516 pool_size = TCG_POOL_CHUNK_SIZE; 517 p = g_malloc(sizeof(TCGPool) + pool_size); 518 p->size = pool_size; 519 p->next = NULL; 520 if (s->pool_current) 521 s->pool_current->next = p; 522 else 523 s->pool_first = p; 524 } else { 525 p = p->next; 526 } 527 } 528 } 529 s->pool_current = p; 530 s->pool_cur = p->data + size; 531 s->pool_end = p->data + p->size; 532 return p->data; 533 } 534 535 void tcg_pool_reset(TCGContext *s) 536 { 537 TCGPool *p, *t; 538 for (p = s->pool_first_large; p; p = t) { 539 t = p->next; 540 g_free(p); 541 } 542 s->pool_first_large = NULL; 543 s->pool_cur = s->pool_end = NULL; 544 s->pool_current = NULL; 545 } 546 547 #include "exec/helper-proto.h" 548 549 static const TCGHelperInfo all_helpers[] = { 550 #include "exec/helper-tcg.h" 551 }; 552 static GHashTable *helper_table; 553 554 #ifdef CONFIG_TCG_INTERPRETER 555 static GHashTable *ffi_table; 556 557 static ffi_type * const typecode_to_ffi[8] = { 558 [dh_typecode_void] = &ffi_type_void, 559 [dh_typecode_i32] = &ffi_type_uint32, 560 [dh_typecode_s32] = &ffi_type_sint32, 561 [dh_typecode_i64] = &ffi_type_uint64, 562 [dh_typecode_s64] = &ffi_type_sint64, 563 [dh_typecode_ptr] = &ffi_type_pointer, 564 }; 565 #endif 566 567 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 568 static void process_op_defs(TCGContext *s); 569 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 570 TCGReg reg, const char *name); 571 572 static void tcg_context_init(unsigned max_cpus) 573 { 574 TCGContext *s = &tcg_init_ctx; 575 int op, total_args, n, i; 576 TCGOpDef *def; 577 TCGArgConstraint *args_ct; 578 TCGTemp *ts; 579 580 memset(s, 0, sizeof(*s)); 581 s->nb_globals = 0; 582 583 /* Count total number of arguments and allocate the corresponding 584 space */ 585 total_args = 0; 586 for(op = 0; op < NB_OPS; op++) { 587 def = &tcg_op_defs[op]; 588 n = def->nb_iargs + def->nb_oargs; 589 total_args += n; 590 } 591 592 args_ct = g_new0(TCGArgConstraint, total_args); 593 594 for(op = 0; op < NB_OPS; op++) { 595 def = &tcg_op_defs[op]; 596 def->args_ct = args_ct; 597 n = def->nb_iargs + def->nb_oargs; 598 args_ct += n; 599 } 600 601 /* Register helpers. */ 602 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 603 helper_table = g_hash_table_new(NULL, NULL); 604 605 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 606 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 607 (gpointer)&all_helpers[i]); 608 } 609 610 #ifdef CONFIG_TCG_INTERPRETER 611 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 612 ffi_table = g_hash_table_new(NULL, NULL); 613 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 614 struct { 615 ffi_cif cif; 616 ffi_type *args[]; 617 } *ca; 618 uint32_t typemask = all_helpers[i].typemask; 619 gpointer hash = (gpointer)(uintptr_t)typemask; 620 ffi_status status; 621 int nargs; 622 623 if (g_hash_table_lookup(ffi_table, hash)) { 624 continue; 625 } 626 627 /* Ignoring the return type, find the last non-zero field. */ 628 nargs = 32 - clz32(typemask >> 3); 629 nargs = DIV_ROUND_UP(nargs, 3); 630 631 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 632 ca->cif.rtype = typecode_to_ffi[typemask & 7]; 633 ca->cif.nargs = nargs; 634 635 if (nargs != 0) { 636 ca->cif.arg_types = ca->args; 637 for (i = 0; i < nargs; ++i) { 638 int typecode = extract32(typemask, (i + 1) * 3, 3); 639 ca->args[i] = typecode_to_ffi[typecode]; 640 } 641 } 642 643 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 644 ca->cif.rtype, ca->cif.arg_types); 645 assert(status == FFI_OK); 646 647 g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif); 648 } 649 #endif 650 651 tcg_target_init(s); 652 process_op_defs(s); 653 654 /* Reverse the order of the saved registers, assuming they're all at 655 the start of tcg_target_reg_alloc_order. */ 656 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 657 int r = tcg_target_reg_alloc_order[n]; 658 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 659 break; 660 } 661 } 662 for (i = 0; i < n; ++i) { 663 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 664 } 665 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 666 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 667 } 668 669 alloc_tcg_plugin_context(s); 670 671 tcg_ctx = s; 672 /* 673 * In user-mode we simply share the init context among threads, since we 674 * use a single region. See the documentation tcg_region_init() for the 675 * reasoning behind this. 676 * In softmmu we will have at most max_cpus TCG threads. 677 */ 678 #ifdef CONFIG_USER_ONLY 679 tcg_ctxs = &tcg_ctx; 680 tcg_cur_ctxs = 1; 681 tcg_max_ctxs = 1; 682 #else 683 tcg_max_ctxs = max_cpus; 684 tcg_ctxs = g_new0(TCGContext *, max_cpus); 685 #endif 686 687 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 688 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 689 cpu_env = temp_tcgv_ptr(ts); 690 } 691 692 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 693 { 694 tcg_context_init(max_cpus); 695 tcg_region_init(tb_size, splitwx, max_cpus); 696 } 697 698 /* 699 * Allocate TBs right before their corresponding translated code, making 700 * sure that TBs and code are on different cache lines. 701 */ 702 TranslationBlock *tcg_tb_alloc(TCGContext *s) 703 { 704 uintptr_t align = qemu_icache_linesize; 705 TranslationBlock *tb; 706 void *next; 707 708 retry: 709 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 710 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 711 712 if (unlikely(next > s->code_gen_highwater)) { 713 if (tcg_region_alloc(s)) { 714 return NULL; 715 } 716 goto retry; 717 } 718 qatomic_set(&s->code_gen_ptr, next); 719 s->data_gen_ptr = NULL; 720 return tb; 721 } 722 723 void tcg_prologue_init(TCGContext *s) 724 { 725 size_t prologue_size; 726 727 s->code_ptr = s->code_gen_ptr; 728 s->code_buf = s->code_gen_ptr; 729 s->data_gen_ptr = NULL; 730 731 #ifndef CONFIG_TCG_INTERPRETER 732 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 733 #endif 734 735 #ifdef TCG_TARGET_NEED_POOL_LABELS 736 s->pool_labels = NULL; 737 #endif 738 739 qemu_thread_jit_write(); 740 /* Generate the prologue. */ 741 tcg_target_qemu_prologue(s); 742 743 #ifdef TCG_TARGET_NEED_POOL_LABELS 744 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 745 { 746 int result = tcg_out_pool_finalize(s); 747 tcg_debug_assert(result == 0); 748 } 749 #endif 750 751 prologue_size = tcg_current_code_size(s); 752 753 #ifndef CONFIG_TCG_INTERPRETER 754 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 755 (uintptr_t)s->code_buf, prologue_size); 756 #endif 757 758 #ifdef DEBUG_DISAS 759 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 760 FILE *logfile = qemu_log_trylock(); 761 if (logfile) { 762 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 763 if (s->data_gen_ptr) { 764 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 765 size_t data_size = prologue_size - code_size; 766 size_t i; 767 768 disas(logfile, s->code_gen_ptr, code_size); 769 770 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 771 if (sizeof(tcg_target_ulong) == 8) { 772 fprintf(logfile, 773 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 774 (uintptr_t)s->data_gen_ptr + i, 775 *(uint64_t *)(s->data_gen_ptr + i)); 776 } else { 777 fprintf(logfile, 778 "0x%08" PRIxPTR ": .long 0x%08x\n", 779 (uintptr_t)s->data_gen_ptr + i, 780 *(uint32_t *)(s->data_gen_ptr + i)); 781 } 782 } 783 } else { 784 disas(logfile, s->code_gen_ptr, prologue_size); 785 } 786 fprintf(logfile, "\n"); 787 qemu_log_unlock(logfile); 788 } 789 } 790 #endif 791 792 #ifndef CONFIG_TCG_INTERPRETER 793 /* 794 * Assert that goto_ptr is implemented completely, setting an epilogue. 795 * For tci, we use NULL as the signal to return from the interpreter, 796 * so skip this check. 797 */ 798 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 799 #endif 800 801 tcg_region_prologue_set(s); 802 } 803 804 void tcg_func_start(TCGContext *s) 805 { 806 tcg_pool_reset(s); 807 s->nb_temps = s->nb_globals; 808 809 /* No temps have been previously allocated for size or locality. */ 810 memset(s->free_temps, 0, sizeof(s->free_temps)); 811 812 /* No constant temps have been previously allocated. */ 813 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 814 if (s->const_table[i]) { 815 g_hash_table_remove_all(s->const_table[i]); 816 } 817 } 818 819 s->nb_ops = 0; 820 s->nb_labels = 0; 821 s->current_frame_offset = s->frame_start; 822 823 #ifdef CONFIG_DEBUG_TCG 824 s->goto_tb_issue_mask = 0; 825 #endif 826 827 QTAILQ_INIT(&s->ops); 828 QTAILQ_INIT(&s->free_ops); 829 QSIMPLEQ_INIT(&s->labels); 830 } 831 832 static TCGTemp *tcg_temp_alloc(TCGContext *s) 833 { 834 int n = s->nb_temps++; 835 836 if (n >= TCG_MAX_TEMPS) { 837 tcg_raise_tb_overflow(s); 838 } 839 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 840 } 841 842 static TCGTemp *tcg_global_alloc(TCGContext *s) 843 { 844 TCGTemp *ts; 845 846 tcg_debug_assert(s->nb_globals == s->nb_temps); 847 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 848 s->nb_globals++; 849 ts = tcg_temp_alloc(s); 850 ts->kind = TEMP_GLOBAL; 851 852 return ts; 853 } 854 855 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 856 TCGReg reg, const char *name) 857 { 858 TCGTemp *ts; 859 860 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 861 tcg_abort(); 862 } 863 864 ts = tcg_global_alloc(s); 865 ts->base_type = type; 866 ts->type = type; 867 ts->kind = TEMP_FIXED; 868 ts->reg = reg; 869 ts->name = name; 870 tcg_regset_set_reg(s->reserved_regs, reg); 871 872 return ts; 873 } 874 875 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 876 { 877 s->frame_start = start; 878 s->frame_end = start + size; 879 s->frame_temp 880 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 881 } 882 883 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 884 intptr_t offset, const char *name) 885 { 886 TCGContext *s = tcg_ctx; 887 TCGTemp *base_ts = tcgv_ptr_temp(base); 888 TCGTemp *ts = tcg_global_alloc(s); 889 int indirect_reg = 0, bigendian = 0; 890 #if HOST_BIG_ENDIAN 891 bigendian = 1; 892 #endif 893 894 switch (base_ts->kind) { 895 case TEMP_FIXED: 896 break; 897 case TEMP_GLOBAL: 898 /* We do not support double-indirect registers. */ 899 tcg_debug_assert(!base_ts->indirect_reg); 900 base_ts->indirect_base = 1; 901 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 902 ? 2 : 1); 903 indirect_reg = 1; 904 break; 905 default: 906 g_assert_not_reached(); 907 } 908 909 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 910 TCGTemp *ts2 = tcg_global_alloc(s); 911 char buf[64]; 912 913 ts->base_type = TCG_TYPE_I64; 914 ts->type = TCG_TYPE_I32; 915 ts->indirect_reg = indirect_reg; 916 ts->mem_allocated = 1; 917 ts->mem_base = base_ts; 918 ts->mem_offset = offset + bigendian * 4; 919 pstrcpy(buf, sizeof(buf), name); 920 pstrcat(buf, sizeof(buf), "_0"); 921 ts->name = strdup(buf); 922 923 tcg_debug_assert(ts2 == ts + 1); 924 ts2->base_type = TCG_TYPE_I64; 925 ts2->type = TCG_TYPE_I32; 926 ts2->indirect_reg = indirect_reg; 927 ts2->mem_allocated = 1; 928 ts2->mem_base = base_ts; 929 ts2->mem_offset = offset + (1 - bigendian) * 4; 930 pstrcpy(buf, sizeof(buf), name); 931 pstrcat(buf, sizeof(buf), "_1"); 932 ts2->name = strdup(buf); 933 } else { 934 ts->base_type = type; 935 ts->type = type; 936 ts->indirect_reg = indirect_reg; 937 ts->mem_allocated = 1; 938 ts->mem_base = base_ts; 939 ts->mem_offset = offset; 940 ts->name = name; 941 } 942 return ts; 943 } 944 945 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 946 { 947 TCGContext *s = tcg_ctx; 948 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL; 949 TCGTemp *ts; 950 int idx, k; 951 952 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 953 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 954 if (idx < TCG_MAX_TEMPS) { 955 /* There is already an available temp with the right type. */ 956 clear_bit(idx, s->free_temps[k].l); 957 958 ts = &s->temps[idx]; 959 ts->temp_allocated = 1; 960 tcg_debug_assert(ts->base_type == type); 961 tcg_debug_assert(ts->kind == kind); 962 } else { 963 ts = tcg_temp_alloc(s); 964 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 965 TCGTemp *ts2 = tcg_temp_alloc(s); 966 967 ts->base_type = type; 968 ts->type = TCG_TYPE_I32; 969 ts->temp_allocated = 1; 970 ts->kind = kind; 971 972 tcg_debug_assert(ts2 == ts + 1); 973 ts2->base_type = TCG_TYPE_I64; 974 ts2->type = TCG_TYPE_I32; 975 ts2->temp_allocated = 1; 976 ts2->kind = kind; 977 } else { 978 ts->base_type = type; 979 ts->type = type; 980 ts->temp_allocated = 1; 981 ts->kind = kind; 982 } 983 } 984 985 #if defined(CONFIG_DEBUG_TCG) 986 s->temps_in_use++; 987 #endif 988 return ts; 989 } 990 991 TCGv_vec tcg_temp_new_vec(TCGType type) 992 { 993 TCGTemp *t; 994 995 #ifdef CONFIG_DEBUG_TCG 996 switch (type) { 997 case TCG_TYPE_V64: 998 assert(TCG_TARGET_HAS_v64); 999 break; 1000 case TCG_TYPE_V128: 1001 assert(TCG_TARGET_HAS_v128); 1002 break; 1003 case TCG_TYPE_V256: 1004 assert(TCG_TARGET_HAS_v256); 1005 break; 1006 default: 1007 g_assert_not_reached(); 1008 } 1009 #endif 1010 1011 t = tcg_temp_new_internal(type, 0); 1012 return temp_tcgv_vec(t); 1013 } 1014 1015 /* Create a new temp of the same type as an existing temp. */ 1016 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1017 { 1018 TCGTemp *t = tcgv_vec_temp(match); 1019 1020 tcg_debug_assert(t->temp_allocated != 0); 1021 1022 t = tcg_temp_new_internal(t->base_type, 0); 1023 return temp_tcgv_vec(t); 1024 } 1025 1026 void tcg_temp_free_internal(TCGTemp *ts) 1027 { 1028 TCGContext *s = tcg_ctx; 1029 int k, idx; 1030 1031 switch (ts->kind) { 1032 case TEMP_CONST: 1033 /* 1034 * In order to simplify users of tcg_constant_*, 1035 * silently ignore free. 1036 */ 1037 return; 1038 case TEMP_NORMAL: 1039 case TEMP_LOCAL: 1040 break; 1041 default: 1042 g_assert_not_reached(); 1043 } 1044 1045 #if defined(CONFIG_DEBUG_TCG) 1046 s->temps_in_use--; 1047 if (s->temps_in_use < 0) { 1048 fprintf(stderr, "More temporaries freed than allocated!\n"); 1049 } 1050 #endif 1051 1052 tcg_debug_assert(ts->temp_allocated != 0); 1053 ts->temp_allocated = 0; 1054 1055 idx = temp_idx(ts); 1056 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT); 1057 set_bit(idx, s->free_temps[k].l); 1058 } 1059 1060 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1061 { 1062 TCGContext *s = tcg_ctx; 1063 GHashTable *h = s->const_table[type]; 1064 TCGTemp *ts; 1065 1066 if (h == NULL) { 1067 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1068 s->const_table[type] = h; 1069 } 1070 1071 ts = g_hash_table_lookup(h, &val); 1072 if (ts == NULL) { 1073 ts = tcg_temp_alloc(s); 1074 1075 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1076 TCGTemp *ts2 = tcg_temp_alloc(s); 1077 1078 ts->base_type = TCG_TYPE_I64; 1079 ts->type = TCG_TYPE_I32; 1080 ts->kind = TEMP_CONST; 1081 ts->temp_allocated = 1; 1082 /* 1083 * Retain the full value of the 64-bit constant in the low 1084 * part, so that the hash table works. Actual uses will 1085 * truncate the value to the low part. 1086 */ 1087 ts->val = val; 1088 1089 tcg_debug_assert(ts2 == ts + 1); 1090 ts2->base_type = TCG_TYPE_I64; 1091 ts2->type = TCG_TYPE_I32; 1092 ts2->kind = TEMP_CONST; 1093 ts2->temp_allocated = 1; 1094 ts2->val = val >> 32; 1095 } else { 1096 ts->base_type = type; 1097 ts->type = type; 1098 ts->kind = TEMP_CONST; 1099 ts->temp_allocated = 1; 1100 ts->val = val; 1101 } 1102 g_hash_table_insert(h, &ts->val, ts); 1103 } 1104 1105 return ts; 1106 } 1107 1108 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1109 { 1110 val = dup_const(vece, val); 1111 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1112 } 1113 1114 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1115 { 1116 TCGTemp *t = tcgv_vec_temp(match); 1117 1118 tcg_debug_assert(t->temp_allocated != 0); 1119 return tcg_constant_vec(t->base_type, vece, val); 1120 } 1121 1122 TCGv_i32 tcg_const_i32(int32_t val) 1123 { 1124 TCGv_i32 t0; 1125 t0 = tcg_temp_new_i32(); 1126 tcg_gen_movi_i32(t0, val); 1127 return t0; 1128 } 1129 1130 TCGv_i64 tcg_const_i64(int64_t val) 1131 { 1132 TCGv_i64 t0; 1133 t0 = tcg_temp_new_i64(); 1134 tcg_gen_movi_i64(t0, val); 1135 return t0; 1136 } 1137 1138 TCGv_i32 tcg_const_local_i32(int32_t val) 1139 { 1140 TCGv_i32 t0; 1141 t0 = tcg_temp_local_new_i32(); 1142 tcg_gen_movi_i32(t0, val); 1143 return t0; 1144 } 1145 1146 TCGv_i64 tcg_const_local_i64(int64_t val) 1147 { 1148 TCGv_i64 t0; 1149 t0 = tcg_temp_local_new_i64(); 1150 tcg_gen_movi_i64(t0, val); 1151 return t0; 1152 } 1153 1154 #if defined(CONFIG_DEBUG_TCG) 1155 void tcg_clear_temp_count(void) 1156 { 1157 TCGContext *s = tcg_ctx; 1158 s->temps_in_use = 0; 1159 } 1160 1161 int tcg_check_temp_count(void) 1162 { 1163 TCGContext *s = tcg_ctx; 1164 if (s->temps_in_use) { 1165 /* Clear the count so that we don't give another 1166 * warning immediately next time around. 1167 */ 1168 s->temps_in_use = 0; 1169 return 1; 1170 } 1171 return 0; 1172 } 1173 #endif 1174 1175 /* Return true if OP may appear in the opcode stream. 1176 Test the runtime variable that controls each opcode. */ 1177 bool tcg_op_supported(TCGOpcode op) 1178 { 1179 const bool have_vec 1180 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1181 1182 switch (op) { 1183 case INDEX_op_discard: 1184 case INDEX_op_set_label: 1185 case INDEX_op_call: 1186 case INDEX_op_br: 1187 case INDEX_op_mb: 1188 case INDEX_op_insn_start: 1189 case INDEX_op_exit_tb: 1190 case INDEX_op_goto_tb: 1191 case INDEX_op_goto_ptr: 1192 case INDEX_op_qemu_ld_i32: 1193 case INDEX_op_qemu_st_i32: 1194 case INDEX_op_qemu_ld_i64: 1195 case INDEX_op_qemu_st_i64: 1196 return true; 1197 1198 case INDEX_op_qemu_st8_i32: 1199 return TCG_TARGET_HAS_qemu_st8_i32; 1200 1201 case INDEX_op_mov_i32: 1202 case INDEX_op_setcond_i32: 1203 case INDEX_op_brcond_i32: 1204 case INDEX_op_ld8u_i32: 1205 case INDEX_op_ld8s_i32: 1206 case INDEX_op_ld16u_i32: 1207 case INDEX_op_ld16s_i32: 1208 case INDEX_op_ld_i32: 1209 case INDEX_op_st8_i32: 1210 case INDEX_op_st16_i32: 1211 case INDEX_op_st_i32: 1212 case INDEX_op_add_i32: 1213 case INDEX_op_sub_i32: 1214 case INDEX_op_mul_i32: 1215 case INDEX_op_and_i32: 1216 case INDEX_op_or_i32: 1217 case INDEX_op_xor_i32: 1218 case INDEX_op_shl_i32: 1219 case INDEX_op_shr_i32: 1220 case INDEX_op_sar_i32: 1221 return true; 1222 1223 case INDEX_op_movcond_i32: 1224 return TCG_TARGET_HAS_movcond_i32; 1225 case INDEX_op_div_i32: 1226 case INDEX_op_divu_i32: 1227 return TCG_TARGET_HAS_div_i32; 1228 case INDEX_op_rem_i32: 1229 case INDEX_op_remu_i32: 1230 return TCG_TARGET_HAS_rem_i32; 1231 case INDEX_op_div2_i32: 1232 case INDEX_op_divu2_i32: 1233 return TCG_TARGET_HAS_div2_i32; 1234 case INDEX_op_rotl_i32: 1235 case INDEX_op_rotr_i32: 1236 return TCG_TARGET_HAS_rot_i32; 1237 case INDEX_op_deposit_i32: 1238 return TCG_TARGET_HAS_deposit_i32; 1239 case INDEX_op_extract_i32: 1240 return TCG_TARGET_HAS_extract_i32; 1241 case INDEX_op_sextract_i32: 1242 return TCG_TARGET_HAS_sextract_i32; 1243 case INDEX_op_extract2_i32: 1244 return TCG_TARGET_HAS_extract2_i32; 1245 case INDEX_op_add2_i32: 1246 return TCG_TARGET_HAS_add2_i32; 1247 case INDEX_op_sub2_i32: 1248 return TCG_TARGET_HAS_sub2_i32; 1249 case INDEX_op_mulu2_i32: 1250 return TCG_TARGET_HAS_mulu2_i32; 1251 case INDEX_op_muls2_i32: 1252 return TCG_TARGET_HAS_muls2_i32; 1253 case INDEX_op_muluh_i32: 1254 return TCG_TARGET_HAS_muluh_i32; 1255 case INDEX_op_mulsh_i32: 1256 return TCG_TARGET_HAS_mulsh_i32; 1257 case INDEX_op_ext8s_i32: 1258 return TCG_TARGET_HAS_ext8s_i32; 1259 case INDEX_op_ext16s_i32: 1260 return TCG_TARGET_HAS_ext16s_i32; 1261 case INDEX_op_ext8u_i32: 1262 return TCG_TARGET_HAS_ext8u_i32; 1263 case INDEX_op_ext16u_i32: 1264 return TCG_TARGET_HAS_ext16u_i32; 1265 case INDEX_op_bswap16_i32: 1266 return TCG_TARGET_HAS_bswap16_i32; 1267 case INDEX_op_bswap32_i32: 1268 return TCG_TARGET_HAS_bswap32_i32; 1269 case INDEX_op_not_i32: 1270 return TCG_TARGET_HAS_not_i32; 1271 case INDEX_op_neg_i32: 1272 return TCG_TARGET_HAS_neg_i32; 1273 case INDEX_op_andc_i32: 1274 return TCG_TARGET_HAS_andc_i32; 1275 case INDEX_op_orc_i32: 1276 return TCG_TARGET_HAS_orc_i32; 1277 case INDEX_op_eqv_i32: 1278 return TCG_TARGET_HAS_eqv_i32; 1279 case INDEX_op_nand_i32: 1280 return TCG_TARGET_HAS_nand_i32; 1281 case INDEX_op_nor_i32: 1282 return TCG_TARGET_HAS_nor_i32; 1283 case INDEX_op_clz_i32: 1284 return TCG_TARGET_HAS_clz_i32; 1285 case INDEX_op_ctz_i32: 1286 return TCG_TARGET_HAS_ctz_i32; 1287 case INDEX_op_ctpop_i32: 1288 return TCG_TARGET_HAS_ctpop_i32; 1289 1290 case INDEX_op_brcond2_i32: 1291 case INDEX_op_setcond2_i32: 1292 return TCG_TARGET_REG_BITS == 32; 1293 1294 case INDEX_op_mov_i64: 1295 case INDEX_op_setcond_i64: 1296 case INDEX_op_brcond_i64: 1297 case INDEX_op_ld8u_i64: 1298 case INDEX_op_ld8s_i64: 1299 case INDEX_op_ld16u_i64: 1300 case INDEX_op_ld16s_i64: 1301 case INDEX_op_ld32u_i64: 1302 case INDEX_op_ld32s_i64: 1303 case INDEX_op_ld_i64: 1304 case INDEX_op_st8_i64: 1305 case INDEX_op_st16_i64: 1306 case INDEX_op_st32_i64: 1307 case INDEX_op_st_i64: 1308 case INDEX_op_add_i64: 1309 case INDEX_op_sub_i64: 1310 case INDEX_op_mul_i64: 1311 case INDEX_op_and_i64: 1312 case INDEX_op_or_i64: 1313 case INDEX_op_xor_i64: 1314 case INDEX_op_shl_i64: 1315 case INDEX_op_shr_i64: 1316 case INDEX_op_sar_i64: 1317 case INDEX_op_ext_i32_i64: 1318 case INDEX_op_extu_i32_i64: 1319 return TCG_TARGET_REG_BITS == 64; 1320 1321 case INDEX_op_movcond_i64: 1322 return TCG_TARGET_HAS_movcond_i64; 1323 case INDEX_op_div_i64: 1324 case INDEX_op_divu_i64: 1325 return TCG_TARGET_HAS_div_i64; 1326 case INDEX_op_rem_i64: 1327 case INDEX_op_remu_i64: 1328 return TCG_TARGET_HAS_rem_i64; 1329 case INDEX_op_div2_i64: 1330 case INDEX_op_divu2_i64: 1331 return TCG_TARGET_HAS_div2_i64; 1332 case INDEX_op_rotl_i64: 1333 case INDEX_op_rotr_i64: 1334 return TCG_TARGET_HAS_rot_i64; 1335 case INDEX_op_deposit_i64: 1336 return TCG_TARGET_HAS_deposit_i64; 1337 case INDEX_op_extract_i64: 1338 return TCG_TARGET_HAS_extract_i64; 1339 case INDEX_op_sextract_i64: 1340 return TCG_TARGET_HAS_sextract_i64; 1341 case INDEX_op_extract2_i64: 1342 return TCG_TARGET_HAS_extract2_i64; 1343 case INDEX_op_extrl_i64_i32: 1344 return TCG_TARGET_HAS_extrl_i64_i32; 1345 case INDEX_op_extrh_i64_i32: 1346 return TCG_TARGET_HAS_extrh_i64_i32; 1347 case INDEX_op_ext8s_i64: 1348 return TCG_TARGET_HAS_ext8s_i64; 1349 case INDEX_op_ext16s_i64: 1350 return TCG_TARGET_HAS_ext16s_i64; 1351 case INDEX_op_ext32s_i64: 1352 return TCG_TARGET_HAS_ext32s_i64; 1353 case INDEX_op_ext8u_i64: 1354 return TCG_TARGET_HAS_ext8u_i64; 1355 case INDEX_op_ext16u_i64: 1356 return TCG_TARGET_HAS_ext16u_i64; 1357 case INDEX_op_ext32u_i64: 1358 return TCG_TARGET_HAS_ext32u_i64; 1359 case INDEX_op_bswap16_i64: 1360 return TCG_TARGET_HAS_bswap16_i64; 1361 case INDEX_op_bswap32_i64: 1362 return TCG_TARGET_HAS_bswap32_i64; 1363 case INDEX_op_bswap64_i64: 1364 return TCG_TARGET_HAS_bswap64_i64; 1365 case INDEX_op_not_i64: 1366 return TCG_TARGET_HAS_not_i64; 1367 case INDEX_op_neg_i64: 1368 return TCG_TARGET_HAS_neg_i64; 1369 case INDEX_op_andc_i64: 1370 return TCG_TARGET_HAS_andc_i64; 1371 case INDEX_op_orc_i64: 1372 return TCG_TARGET_HAS_orc_i64; 1373 case INDEX_op_eqv_i64: 1374 return TCG_TARGET_HAS_eqv_i64; 1375 case INDEX_op_nand_i64: 1376 return TCG_TARGET_HAS_nand_i64; 1377 case INDEX_op_nor_i64: 1378 return TCG_TARGET_HAS_nor_i64; 1379 case INDEX_op_clz_i64: 1380 return TCG_TARGET_HAS_clz_i64; 1381 case INDEX_op_ctz_i64: 1382 return TCG_TARGET_HAS_ctz_i64; 1383 case INDEX_op_ctpop_i64: 1384 return TCG_TARGET_HAS_ctpop_i64; 1385 case INDEX_op_add2_i64: 1386 return TCG_TARGET_HAS_add2_i64; 1387 case INDEX_op_sub2_i64: 1388 return TCG_TARGET_HAS_sub2_i64; 1389 case INDEX_op_mulu2_i64: 1390 return TCG_TARGET_HAS_mulu2_i64; 1391 case INDEX_op_muls2_i64: 1392 return TCG_TARGET_HAS_muls2_i64; 1393 case INDEX_op_muluh_i64: 1394 return TCG_TARGET_HAS_muluh_i64; 1395 case INDEX_op_mulsh_i64: 1396 return TCG_TARGET_HAS_mulsh_i64; 1397 1398 case INDEX_op_mov_vec: 1399 case INDEX_op_dup_vec: 1400 case INDEX_op_dupm_vec: 1401 case INDEX_op_ld_vec: 1402 case INDEX_op_st_vec: 1403 case INDEX_op_add_vec: 1404 case INDEX_op_sub_vec: 1405 case INDEX_op_and_vec: 1406 case INDEX_op_or_vec: 1407 case INDEX_op_xor_vec: 1408 case INDEX_op_cmp_vec: 1409 return have_vec; 1410 case INDEX_op_dup2_vec: 1411 return have_vec && TCG_TARGET_REG_BITS == 32; 1412 case INDEX_op_not_vec: 1413 return have_vec && TCG_TARGET_HAS_not_vec; 1414 case INDEX_op_neg_vec: 1415 return have_vec && TCG_TARGET_HAS_neg_vec; 1416 case INDEX_op_abs_vec: 1417 return have_vec && TCG_TARGET_HAS_abs_vec; 1418 case INDEX_op_andc_vec: 1419 return have_vec && TCG_TARGET_HAS_andc_vec; 1420 case INDEX_op_orc_vec: 1421 return have_vec && TCG_TARGET_HAS_orc_vec; 1422 case INDEX_op_nand_vec: 1423 return have_vec && TCG_TARGET_HAS_nand_vec; 1424 case INDEX_op_nor_vec: 1425 return have_vec && TCG_TARGET_HAS_nor_vec; 1426 case INDEX_op_eqv_vec: 1427 return have_vec && TCG_TARGET_HAS_eqv_vec; 1428 case INDEX_op_mul_vec: 1429 return have_vec && TCG_TARGET_HAS_mul_vec; 1430 case INDEX_op_shli_vec: 1431 case INDEX_op_shri_vec: 1432 case INDEX_op_sari_vec: 1433 return have_vec && TCG_TARGET_HAS_shi_vec; 1434 case INDEX_op_shls_vec: 1435 case INDEX_op_shrs_vec: 1436 case INDEX_op_sars_vec: 1437 return have_vec && TCG_TARGET_HAS_shs_vec; 1438 case INDEX_op_shlv_vec: 1439 case INDEX_op_shrv_vec: 1440 case INDEX_op_sarv_vec: 1441 return have_vec && TCG_TARGET_HAS_shv_vec; 1442 case INDEX_op_rotli_vec: 1443 return have_vec && TCG_TARGET_HAS_roti_vec; 1444 case INDEX_op_rotls_vec: 1445 return have_vec && TCG_TARGET_HAS_rots_vec; 1446 case INDEX_op_rotlv_vec: 1447 case INDEX_op_rotrv_vec: 1448 return have_vec && TCG_TARGET_HAS_rotv_vec; 1449 case INDEX_op_ssadd_vec: 1450 case INDEX_op_usadd_vec: 1451 case INDEX_op_sssub_vec: 1452 case INDEX_op_ussub_vec: 1453 return have_vec && TCG_TARGET_HAS_sat_vec; 1454 case INDEX_op_smin_vec: 1455 case INDEX_op_umin_vec: 1456 case INDEX_op_smax_vec: 1457 case INDEX_op_umax_vec: 1458 return have_vec && TCG_TARGET_HAS_minmax_vec; 1459 case INDEX_op_bitsel_vec: 1460 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1461 case INDEX_op_cmpsel_vec: 1462 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1463 1464 default: 1465 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1466 return true; 1467 } 1468 } 1469 1470 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1471 and endian swap. Maybe it would be better to do the alignment 1472 and endian swap in tcg_reg_alloc_call(). */ 1473 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1474 { 1475 int i, real_args, nb_rets, pi; 1476 unsigned typemask; 1477 const TCGHelperInfo *info; 1478 TCGOp *op; 1479 1480 info = g_hash_table_lookup(helper_table, (gpointer)func); 1481 typemask = info->typemask; 1482 1483 #ifdef CONFIG_PLUGIN 1484 /* detect non-plugin helpers */ 1485 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) { 1486 tcg_ctx->plugin_insn->calls_helpers = true; 1487 } 1488 #endif 1489 1490 #if defined(__sparc__) && !defined(__arch64__) \ 1491 && !defined(CONFIG_TCG_INTERPRETER) 1492 /* We have 64-bit values in one register, but need to pass as two 1493 separate parameters. Split them. */ 1494 int orig_typemask = typemask; 1495 int orig_nargs = nargs; 1496 TCGv_i64 retl, reth; 1497 TCGTemp *split_args[MAX_OPC_PARAM]; 1498 1499 retl = NULL; 1500 reth = NULL; 1501 typemask = 0; 1502 for (i = real_args = 0; i < nargs; ++i) { 1503 int argtype = extract32(orig_typemask, (i + 1) * 3, 3); 1504 bool is_64bit = (argtype & ~1) == dh_typecode_i64; 1505 1506 if (is_64bit) { 1507 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1508 TCGv_i32 h = tcg_temp_new_i32(); 1509 TCGv_i32 l = tcg_temp_new_i32(); 1510 tcg_gen_extr_i64_i32(l, h, orig); 1511 split_args[real_args++] = tcgv_i32_temp(h); 1512 typemask |= dh_typecode_i32 << (real_args * 3); 1513 split_args[real_args++] = tcgv_i32_temp(l); 1514 typemask |= dh_typecode_i32 << (real_args * 3); 1515 } else { 1516 split_args[real_args++] = args[i]; 1517 typemask |= argtype << (real_args * 3); 1518 } 1519 } 1520 nargs = real_args; 1521 args = split_args; 1522 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1523 for (i = 0; i < nargs; ++i) { 1524 int argtype = extract32(typemask, (i + 1) * 3, 3); 1525 bool is_32bit = (argtype & ~1) == dh_typecode_i32; 1526 bool is_signed = argtype & 1; 1527 1528 if (is_32bit) { 1529 TCGv_i64 temp = tcg_temp_new_i64(); 1530 TCGv_i32 orig = temp_tcgv_i32(args[i]); 1531 if (is_signed) { 1532 tcg_gen_ext_i32_i64(temp, orig); 1533 } else { 1534 tcg_gen_extu_i32_i64(temp, orig); 1535 } 1536 args[i] = tcgv_i64_temp(temp); 1537 } 1538 } 1539 #endif /* TCG_TARGET_EXTEND_ARGS */ 1540 1541 op = tcg_emit_op(INDEX_op_call); 1542 1543 pi = 0; 1544 if (ret != NULL) { 1545 #if defined(__sparc__) && !defined(__arch64__) \ 1546 && !defined(CONFIG_TCG_INTERPRETER) 1547 if ((typemask & 6) == dh_typecode_i64) { 1548 /* The 32-bit ABI is going to return the 64-bit value in 1549 the %o0/%o1 register pair. Prepare for this by using 1550 two return temporaries, and reassemble below. */ 1551 retl = tcg_temp_new_i64(); 1552 reth = tcg_temp_new_i64(); 1553 op->args[pi++] = tcgv_i64_arg(reth); 1554 op->args[pi++] = tcgv_i64_arg(retl); 1555 nb_rets = 2; 1556 } else { 1557 op->args[pi++] = temp_arg(ret); 1558 nb_rets = 1; 1559 } 1560 #else 1561 if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) { 1562 #if HOST_BIG_ENDIAN 1563 op->args[pi++] = temp_arg(ret + 1); 1564 op->args[pi++] = temp_arg(ret); 1565 #else 1566 op->args[pi++] = temp_arg(ret); 1567 op->args[pi++] = temp_arg(ret + 1); 1568 #endif 1569 nb_rets = 2; 1570 } else { 1571 op->args[pi++] = temp_arg(ret); 1572 nb_rets = 1; 1573 } 1574 #endif 1575 } else { 1576 nb_rets = 0; 1577 } 1578 TCGOP_CALLO(op) = nb_rets; 1579 1580 real_args = 0; 1581 for (i = 0; i < nargs; i++) { 1582 int argtype = extract32(typemask, (i + 1) * 3, 3); 1583 bool is_64bit = (argtype & ~1) == dh_typecode_i64; 1584 bool want_align = false; 1585 1586 #if defined(CONFIG_TCG_INTERPRETER) 1587 /* 1588 * Align all arguments, so that they land in predictable places 1589 * for passing off to ffi_call. 1590 */ 1591 want_align = true; 1592 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS) 1593 /* Some targets want aligned 64 bit args */ 1594 want_align = is_64bit; 1595 #endif 1596 1597 if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) { 1598 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1599 real_args++; 1600 } 1601 1602 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1603 /* 1604 * If stack grows up, then we will be placing successive 1605 * arguments at lower addresses, which means we need to 1606 * reverse the order compared to how we would normally 1607 * treat either big or little-endian. For those arguments 1608 * that will wind up in registers, this still works for 1609 * HPPA (the only current STACK_GROWSUP target) since the 1610 * argument registers are *also* allocated in decreasing 1611 * order. If another such target is added, this logic may 1612 * have to get more complicated to differentiate between 1613 * stack arguments and register arguments. 1614 */ 1615 #if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP) 1616 op->args[pi++] = temp_arg(args[i] + 1); 1617 op->args[pi++] = temp_arg(args[i]); 1618 #else 1619 op->args[pi++] = temp_arg(args[i]); 1620 op->args[pi++] = temp_arg(args[i] + 1); 1621 #endif 1622 real_args += 2; 1623 continue; 1624 } 1625 1626 op->args[pi++] = temp_arg(args[i]); 1627 real_args++; 1628 } 1629 op->args[pi++] = (uintptr_t)func; 1630 op->args[pi++] = (uintptr_t)info; 1631 TCGOP_CALLI(op) = real_args; 1632 1633 /* Make sure the fields didn't overflow. */ 1634 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 1635 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1636 1637 #if defined(__sparc__) && !defined(__arch64__) \ 1638 && !defined(CONFIG_TCG_INTERPRETER) 1639 /* Free all of the parts we allocated above. */ 1640 for (i = real_args = 0; i < orig_nargs; ++i) { 1641 int argtype = extract32(orig_typemask, (i + 1) * 3, 3); 1642 bool is_64bit = (argtype & ~1) == dh_typecode_i64; 1643 1644 if (is_64bit) { 1645 tcg_temp_free_internal(args[real_args++]); 1646 tcg_temp_free_internal(args[real_args++]); 1647 } else { 1648 real_args++; 1649 } 1650 } 1651 if ((orig_typemask & 6) == dh_typecode_i64) { 1652 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 1653 Note that describing these as TCGv_i64 eliminates an unnecessary 1654 zero-extension that tcg_gen_concat_i32_i64 would create. */ 1655 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 1656 tcg_temp_free_i64(retl); 1657 tcg_temp_free_i64(reth); 1658 } 1659 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1660 for (i = 0; i < nargs; ++i) { 1661 int argtype = extract32(typemask, (i + 1) * 3, 3); 1662 bool is_32bit = (argtype & ~1) == dh_typecode_i32; 1663 1664 if (is_32bit) { 1665 tcg_temp_free_internal(args[i]); 1666 } 1667 } 1668 #endif /* TCG_TARGET_EXTEND_ARGS */ 1669 } 1670 1671 static void tcg_reg_alloc_start(TCGContext *s) 1672 { 1673 int i, n; 1674 1675 for (i = 0, n = s->nb_temps; i < n; i++) { 1676 TCGTemp *ts = &s->temps[i]; 1677 TCGTempVal val = TEMP_VAL_MEM; 1678 1679 switch (ts->kind) { 1680 case TEMP_CONST: 1681 val = TEMP_VAL_CONST; 1682 break; 1683 case TEMP_FIXED: 1684 val = TEMP_VAL_REG; 1685 break; 1686 case TEMP_GLOBAL: 1687 break; 1688 case TEMP_NORMAL: 1689 case TEMP_EBB: 1690 val = TEMP_VAL_DEAD; 1691 /* fall through */ 1692 case TEMP_LOCAL: 1693 ts->mem_allocated = 0; 1694 break; 1695 default: 1696 g_assert_not_reached(); 1697 } 1698 ts->val_type = val; 1699 } 1700 1701 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1702 } 1703 1704 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1705 TCGTemp *ts) 1706 { 1707 int idx = temp_idx(ts); 1708 1709 switch (ts->kind) { 1710 case TEMP_FIXED: 1711 case TEMP_GLOBAL: 1712 pstrcpy(buf, buf_size, ts->name); 1713 break; 1714 case TEMP_LOCAL: 1715 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1716 break; 1717 case TEMP_EBB: 1718 snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals); 1719 break; 1720 case TEMP_NORMAL: 1721 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1722 break; 1723 case TEMP_CONST: 1724 switch (ts->type) { 1725 case TCG_TYPE_I32: 1726 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 1727 break; 1728 #if TCG_TARGET_REG_BITS > 32 1729 case TCG_TYPE_I64: 1730 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 1731 break; 1732 #endif 1733 case TCG_TYPE_V64: 1734 case TCG_TYPE_V128: 1735 case TCG_TYPE_V256: 1736 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 1737 64 << (ts->type - TCG_TYPE_V64), ts->val); 1738 break; 1739 default: 1740 g_assert_not_reached(); 1741 } 1742 break; 1743 } 1744 return buf; 1745 } 1746 1747 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1748 int buf_size, TCGArg arg) 1749 { 1750 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1751 } 1752 1753 static const char * const cond_name[] = 1754 { 1755 [TCG_COND_NEVER] = "never", 1756 [TCG_COND_ALWAYS] = "always", 1757 [TCG_COND_EQ] = "eq", 1758 [TCG_COND_NE] = "ne", 1759 [TCG_COND_LT] = "lt", 1760 [TCG_COND_GE] = "ge", 1761 [TCG_COND_LE] = "le", 1762 [TCG_COND_GT] = "gt", 1763 [TCG_COND_LTU] = "ltu", 1764 [TCG_COND_GEU] = "geu", 1765 [TCG_COND_LEU] = "leu", 1766 [TCG_COND_GTU] = "gtu" 1767 }; 1768 1769 static const char * const ldst_name[] = 1770 { 1771 [MO_UB] = "ub", 1772 [MO_SB] = "sb", 1773 [MO_LEUW] = "leuw", 1774 [MO_LESW] = "lesw", 1775 [MO_LEUL] = "leul", 1776 [MO_LESL] = "lesl", 1777 [MO_LEUQ] = "leq", 1778 [MO_BEUW] = "beuw", 1779 [MO_BESW] = "besw", 1780 [MO_BEUL] = "beul", 1781 [MO_BESL] = "besl", 1782 [MO_BEUQ] = "beq", 1783 }; 1784 1785 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1786 #ifdef TARGET_ALIGNED_ONLY 1787 [MO_UNALN >> MO_ASHIFT] = "un+", 1788 [MO_ALIGN >> MO_ASHIFT] = "", 1789 #else 1790 [MO_UNALN >> MO_ASHIFT] = "", 1791 [MO_ALIGN >> MO_ASHIFT] = "al+", 1792 #endif 1793 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1794 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1795 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1796 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1797 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1798 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1799 }; 1800 1801 static const char bswap_flag_name[][6] = { 1802 [TCG_BSWAP_IZ] = "iz", 1803 [TCG_BSWAP_OZ] = "oz", 1804 [TCG_BSWAP_OS] = "os", 1805 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 1806 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 1807 }; 1808 1809 static inline bool tcg_regset_single(TCGRegSet d) 1810 { 1811 return (d & (d - 1)) == 0; 1812 } 1813 1814 static inline TCGReg tcg_regset_first(TCGRegSet d) 1815 { 1816 if (TCG_TARGET_NB_REGS <= 32) { 1817 return ctz32(d); 1818 } else { 1819 return ctz64(d); 1820 } 1821 } 1822 1823 /* Return only the number of characters output -- no error return. */ 1824 #define ne_fprintf(...) \ 1825 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 1826 1827 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 1828 { 1829 char buf[128]; 1830 TCGOp *op; 1831 1832 QTAILQ_FOREACH(op, &s->ops, link) { 1833 int i, k, nb_oargs, nb_iargs, nb_cargs; 1834 const TCGOpDef *def; 1835 TCGOpcode c; 1836 int col = 0; 1837 1838 c = op->opc; 1839 def = &tcg_op_defs[c]; 1840 1841 if (c == INDEX_op_insn_start) { 1842 nb_oargs = 0; 1843 col += ne_fprintf(f, "\n ----"); 1844 1845 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1846 target_ulong a; 1847 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1848 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 1849 #else 1850 a = op->args[i]; 1851 #endif 1852 col += ne_fprintf(f, " " TARGET_FMT_lx, a); 1853 } 1854 } else if (c == INDEX_op_call) { 1855 const TCGHelperInfo *info = tcg_call_info(op); 1856 void *func = tcg_call_func(op); 1857 1858 /* variable number of arguments */ 1859 nb_oargs = TCGOP_CALLO(op); 1860 nb_iargs = TCGOP_CALLI(op); 1861 nb_cargs = def->nb_cargs; 1862 1863 col += ne_fprintf(f, " %s ", def->name); 1864 1865 /* 1866 * Print the function name from TCGHelperInfo, if available. 1867 * Note that plugins have a template function for the info, 1868 * but the actual function pointer comes from the plugin. 1869 */ 1870 if (func == info->func) { 1871 col += ne_fprintf(f, "%s", info->name); 1872 } else { 1873 col += ne_fprintf(f, "plugin(%p)", func); 1874 } 1875 1876 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 1877 for (i = 0; i < nb_oargs; i++) { 1878 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 1879 op->args[i])); 1880 } 1881 for (i = 0; i < nb_iargs; i++) { 1882 TCGArg arg = op->args[nb_oargs + i]; 1883 const char *t = "<dummy>"; 1884 if (arg != TCG_CALL_DUMMY_ARG) { 1885 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 1886 } 1887 col += ne_fprintf(f, ",%s", t); 1888 } 1889 } else { 1890 col += ne_fprintf(f, " %s ", def->name); 1891 1892 nb_oargs = def->nb_oargs; 1893 nb_iargs = def->nb_iargs; 1894 nb_cargs = def->nb_cargs; 1895 1896 if (def->flags & TCG_OPF_VECTOR) { 1897 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 1898 8 << TCGOP_VECE(op)); 1899 } 1900 1901 k = 0; 1902 for (i = 0; i < nb_oargs; i++) { 1903 const char *sep = k ? "," : ""; 1904 col += ne_fprintf(f, "%s%s", sep, 1905 tcg_get_arg_str(s, buf, sizeof(buf), 1906 op->args[k++])); 1907 } 1908 for (i = 0; i < nb_iargs; i++) { 1909 const char *sep = k ? "," : ""; 1910 col += ne_fprintf(f, "%s%s", sep, 1911 tcg_get_arg_str(s, buf, sizeof(buf), 1912 op->args[k++])); 1913 } 1914 switch (c) { 1915 case INDEX_op_brcond_i32: 1916 case INDEX_op_setcond_i32: 1917 case INDEX_op_movcond_i32: 1918 case INDEX_op_brcond2_i32: 1919 case INDEX_op_setcond2_i32: 1920 case INDEX_op_brcond_i64: 1921 case INDEX_op_setcond_i64: 1922 case INDEX_op_movcond_i64: 1923 case INDEX_op_cmp_vec: 1924 case INDEX_op_cmpsel_vec: 1925 if (op->args[k] < ARRAY_SIZE(cond_name) 1926 && cond_name[op->args[k]]) { 1927 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 1928 } else { 1929 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 1930 } 1931 i = 1; 1932 break; 1933 case INDEX_op_qemu_ld_i32: 1934 case INDEX_op_qemu_st_i32: 1935 case INDEX_op_qemu_st8_i32: 1936 case INDEX_op_qemu_ld_i64: 1937 case INDEX_op_qemu_st_i64: 1938 { 1939 MemOpIdx oi = op->args[k++]; 1940 MemOp op = get_memop(oi); 1941 unsigned ix = get_mmuidx(oi); 1942 1943 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1944 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 1945 } else { 1946 const char *s_al, *s_op; 1947 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1948 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 1949 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix); 1950 } 1951 i = 1; 1952 } 1953 break; 1954 case INDEX_op_bswap16_i32: 1955 case INDEX_op_bswap16_i64: 1956 case INDEX_op_bswap32_i32: 1957 case INDEX_op_bswap32_i64: 1958 case INDEX_op_bswap64_i64: 1959 { 1960 TCGArg flags = op->args[k]; 1961 const char *name = NULL; 1962 1963 if (flags < ARRAY_SIZE(bswap_flag_name)) { 1964 name = bswap_flag_name[flags]; 1965 } 1966 if (name) { 1967 col += ne_fprintf(f, ",%s", name); 1968 } else { 1969 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 1970 } 1971 i = k = 1; 1972 } 1973 break; 1974 default: 1975 i = 0; 1976 break; 1977 } 1978 switch (c) { 1979 case INDEX_op_set_label: 1980 case INDEX_op_br: 1981 case INDEX_op_brcond_i32: 1982 case INDEX_op_brcond_i64: 1983 case INDEX_op_brcond2_i32: 1984 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 1985 arg_label(op->args[k])->id); 1986 i++, k++; 1987 break; 1988 default: 1989 break; 1990 } 1991 for (; i < nb_cargs; i++, k++) { 1992 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 1993 op->args[k]); 1994 } 1995 } 1996 1997 if (have_prefs || op->life) { 1998 for (; col < 40; ++col) { 1999 putc(' ', f); 2000 } 2001 } 2002 2003 if (op->life) { 2004 unsigned life = op->life; 2005 2006 if (life & (SYNC_ARG * 3)) { 2007 ne_fprintf(f, " sync:"); 2008 for (i = 0; i < 2; ++i) { 2009 if (life & (SYNC_ARG << i)) { 2010 ne_fprintf(f, " %d", i); 2011 } 2012 } 2013 } 2014 life /= DEAD_ARG; 2015 if (life) { 2016 ne_fprintf(f, " dead:"); 2017 for (i = 0; life; ++i, life >>= 1) { 2018 if (life & 1) { 2019 ne_fprintf(f, " %d", i); 2020 } 2021 } 2022 } 2023 } 2024 2025 if (have_prefs) { 2026 for (i = 0; i < nb_oargs; ++i) { 2027 TCGRegSet set = op->output_pref[i]; 2028 2029 if (i == 0) { 2030 ne_fprintf(f, " pref="); 2031 } else { 2032 ne_fprintf(f, ","); 2033 } 2034 if (set == 0) { 2035 ne_fprintf(f, "none"); 2036 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2037 ne_fprintf(f, "all"); 2038 #ifdef CONFIG_DEBUG_TCG 2039 } else if (tcg_regset_single(set)) { 2040 TCGReg reg = tcg_regset_first(set); 2041 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 2042 #endif 2043 } else if (TCG_TARGET_NB_REGS <= 32) { 2044 ne_fprintf(f, "0x%x", (uint32_t)set); 2045 } else { 2046 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 2047 } 2048 } 2049 } 2050 2051 putc('\n', f); 2052 } 2053 } 2054 2055 /* we give more priority to constraints with less registers */ 2056 static int get_constraint_priority(const TCGOpDef *def, int k) 2057 { 2058 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2059 int n; 2060 2061 if (arg_ct->oalias) { 2062 /* an alias is equivalent to a single register */ 2063 n = 1; 2064 } else { 2065 n = ctpop64(arg_ct->regs); 2066 } 2067 return TCG_TARGET_NB_REGS - n + 1; 2068 } 2069 2070 /* sort from highest priority to lowest */ 2071 static void sort_constraints(TCGOpDef *def, int start, int n) 2072 { 2073 int i, j; 2074 TCGArgConstraint *a = def->args_ct; 2075 2076 for (i = 0; i < n; i++) { 2077 a[start + i].sort_index = start + i; 2078 } 2079 if (n <= 1) { 2080 return; 2081 } 2082 for (i = 0; i < n - 1; i++) { 2083 for (j = i + 1; j < n; j++) { 2084 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2085 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2086 if (p1 < p2) { 2087 int tmp = a[start + i].sort_index; 2088 a[start + i].sort_index = a[start + j].sort_index; 2089 a[start + j].sort_index = tmp; 2090 } 2091 } 2092 } 2093 } 2094 2095 static void process_op_defs(TCGContext *s) 2096 { 2097 TCGOpcode op; 2098 2099 for (op = 0; op < NB_OPS; op++) { 2100 TCGOpDef *def = &tcg_op_defs[op]; 2101 const TCGTargetOpDef *tdefs; 2102 int i, nb_args; 2103 2104 if (def->flags & TCG_OPF_NOT_PRESENT) { 2105 continue; 2106 } 2107 2108 nb_args = def->nb_iargs + def->nb_oargs; 2109 if (nb_args == 0) { 2110 continue; 2111 } 2112 2113 /* 2114 * Macro magic should make it impossible, but double-check that 2115 * the array index is in range. Since the signness of an enum 2116 * is implementation defined, force the result to unsigned. 2117 */ 2118 unsigned con_set = tcg_target_op_def(op); 2119 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2120 tdefs = &constraint_sets[con_set]; 2121 2122 for (i = 0; i < nb_args; i++) { 2123 const char *ct_str = tdefs->args_ct_str[i]; 2124 /* Incomplete TCGTargetOpDef entry. */ 2125 tcg_debug_assert(ct_str != NULL); 2126 2127 while (*ct_str != '\0') { 2128 switch(*ct_str) { 2129 case '0' ... '9': 2130 { 2131 int oarg = *ct_str - '0'; 2132 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 2133 tcg_debug_assert(oarg < def->nb_oargs); 2134 tcg_debug_assert(def->args_ct[oarg].regs != 0); 2135 def->args_ct[i] = def->args_ct[oarg]; 2136 /* The output sets oalias. */ 2137 def->args_ct[oarg].oalias = true; 2138 def->args_ct[oarg].alias_index = i; 2139 /* The input sets ialias. */ 2140 def->args_ct[i].ialias = true; 2141 def->args_ct[i].alias_index = oarg; 2142 } 2143 ct_str++; 2144 break; 2145 case '&': 2146 def->args_ct[i].newreg = true; 2147 ct_str++; 2148 break; 2149 case 'i': 2150 def->args_ct[i].ct |= TCG_CT_CONST; 2151 ct_str++; 2152 break; 2153 2154 /* Include all of the target-specific constraints. */ 2155 2156 #undef CONST 2157 #define CONST(CASE, MASK) \ 2158 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break; 2159 #define REGS(CASE, MASK) \ 2160 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break; 2161 2162 #include "tcg-target-con-str.h" 2163 2164 #undef REGS 2165 #undef CONST 2166 default: 2167 /* Typo in TCGTargetOpDef constraint. */ 2168 g_assert_not_reached(); 2169 } 2170 } 2171 } 2172 2173 /* TCGTargetOpDef entry with too much information? */ 2174 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2175 2176 /* sort the constraints (XXX: this is just an heuristic) */ 2177 sort_constraints(def, 0, def->nb_oargs); 2178 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2179 } 2180 } 2181 2182 void tcg_op_remove(TCGContext *s, TCGOp *op) 2183 { 2184 TCGLabel *label; 2185 2186 switch (op->opc) { 2187 case INDEX_op_br: 2188 label = arg_label(op->args[0]); 2189 label->refs--; 2190 break; 2191 case INDEX_op_brcond_i32: 2192 case INDEX_op_brcond_i64: 2193 label = arg_label(op->args[3]); 2194 label->refs--; 2195 break; 2196 case INDEX_op_brcond2_i32: 2197 label = arg_label(op->args[5]); 2198 label->refs--; 2199 break; 2200 default: 2201 break; 2202 } 2203 2204 QTAILQ_REMOVE(&s->ops, op, link); 2205 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2206 s->nb_ops--; 2207 2208 #ifdef CONFIG_PROFILER 2209 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2210 #endif 2211 } 2212 2213 void tcg_remove_ops_after(TCGOp *op) 2214 { 2215 TCGContext *s = tcg_ctx; 2216 2217 while (true) { 2218 TCGOp *last = tcg_last_op(); 2219 if (last == op) { 2220 return; 2221 } 2222 tcg_op_remove(s, last); 2223 } 2224 } 2225 2226 static TCGOp *tcg_op_alloc(TCGOpcode opc) 2227 { 2228 TCGContext *s = tcg_ctx; 2229 TCGOp *op; 2230 2231 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 2232 op = tcg_malloc(sizeof(TCGOp)); 2233 } else { 2234 op = QTAILQ_FIRST(&s->free_ops); 2235 QTAILQ_REMOVE(&s->free_ops, op, link); 2236 } 2237 memset(op, 0, offsetof(TCGOp, link)); 2238 op->opc = opc; 2239 s->nb_ops++; 2240 2241 return op; 2242 } 2243 2244 TCGOp *tcg_emit_op(TCGOpcode opc) 2245 { 2246 TCGOp *op = tcg_op_alloc(opc); 2247 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2248 return op; 2249 } 2250 2251 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2252 { 2253 TCGOp *new_op = tcg_op_alloc(opc); 2254 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2255 return new_op; 2256 } 2257 2258 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2259 { 2260 TCGOp *new_op = tcg_op_alloc(opc); 2261 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2262 return new_op; 2263 } 2264 2265 /* Reachable analysis : remove unreachable code. */ 2266 static void reachable_code_pass(TCGContext *s) 2267 { 2268 TCGOp *op, *op_next; 2269 bool dead = false; 2270 2271 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2272 bool remove = dead; 2273 TCGLabel *label; 2274 2275 switch (op->opc) { 2276 case INDEX_op_set_label: 2277 label = arg_label(op->args[0]); 2278 if (label->refs == 0) { 2279 /* 2280 * While there is an occasional backward branch, virtually 2281 * all branches generated by the translators are forward. 2282 * Which means that generally we will have already removed 2283 * all references to the label that will be, and there is 2284 * little to be gained by iterating. 2285 */ 2286 remove = true; 2287 } else { 2288 /* Once we see a label, insns become live again. */ 2289 dead = false; 2290 remove = false; 2291 2292 /* 2293 * Optimization can fold conditional branches to unconditional. 2294 * If we find a label with one reference which is preceded by 2295 * an unconditional branch to it, remove both. This needed to 2296 * wait until the dead code in between them was removed. 2297 */ 2298 if (label->refs == 1) { 2299 TCGOp *op_prev = QTAILQ_PREV(op, link); 2300 if (op_prev->opc == INDEX_op_br && 2301 label == arg_label(op_prev->args[0])) { 2302 tcg_op_remove(s, op_prev); 2303 remove = true; 2304 } 2305 } 2306 } 2307 break; 2308 2309 case INDEX_op_br: 2310 case INDEX_op_exit_tb: 2311 case INDEX_op_goto_ptr: 2312 /* Unconditional branches; everything following is dead. */ 2313 dead = true; 2314 break; 2315 2316 case INDEX_op_call: 2317 /* Notice noreturn helper calls, raising exceptions. */ 2318 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 2319 dead = true; 2320 } 2321 break; 2322 2323 case INDEX_op_insn_start: 2324 /* Never remove -- we need to keep these for unwind. */ 2325 remove = false; 2326 break; 2327 2328 default: 2329 break; 2330 } 2331 2332 if (remove) { 2333 tcg_op_remove(s, op); 2334 } 2335 } 2336 } 2337 2338 #define TS_DEAD 1 2339 #define TS_MEM 2 2340 2341 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2342 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2343 2344 /* For liveness_pass_1, the register preferences for a given temp. */ 2345 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2346 { 2347 return ts->state_ptr; 2348 } 2349 2350 /* For liveness_pass_1, reset the preferences for a given temp to the 2351 * maximal regset for its type. 2352 */ 2353 static inline void la_reset_pref(TCGTemp *ts) 2354 { 2355 *la_temp_pref(ts) 2356 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2357 } 2358 2359 /* liveness analysis: end of function: all temps are dead, and globals 2360 should be in memory. */ 2361 static void la_func_end(TCGContext *s, int ng, int nt) 2362 { 2363 int i; 2364 2365 for (i = 0; i < ng; ++i) { 2366 s->temps[i].state = TS_DEAD | TS_MEM; 2367 la_reset_pref(&s->temps[i]); 2368 } 2369 for (i = ng; i < nt; ++i) { 2370 s->temps[i].state = TS_DEAD; 2371 la_reset_pref(&s->temps[i]); 2372 } 2373 } 2374 2375 /* liveness analysis: end of basic block: all temps are dead, globals 2376 and local temps should be in memory. */ 2377 static void la_bb_end(TCGContext *s, int ng, int nt) 2378 { 2379 int i; 2380 2381 for (i = 0; i < nt; ++i) { 2382 TCGTemp *ts = &s->temps[i]; 2383 int state; 2384 2385 switch (ts->kind) { 2386 case TEMP_FIXED: 2387 case TEMP_GLOBAL: 2388 case TEMP_LOCAL: 2389 state = TS_DEAD | TS_MEM; 2390 break; 2391 case TEMP_NORMAL: 2392 case TEMP_EBB: 2393 case TEMP_CONST: 2394 state = TS_DEAD; 2395 break; 2396 default: 2397 g_assert_not_reached(); 2398 } 2399 ts->state = state; 2400 la_reset_pref(ts); 2401 } 2402 } 2403 2404 /* liveness analysis: sync globals back to memory. */ 2405 static void la_global_sync(TCGContext *s, int ng) 2406 { 2407 int i; 2408 2409 for (i = 0; i < ng; ++i) { 2410 int state = s->temps[i].state; 2411 s->temps[i].state = state | TS_MEM; 2412 if (state == TS_DEAD) { 2413 /* If the global was previously dead, reset prefs. */ 2414 la_reset_pref(&s->temps[i]); 2415 } 2416 } 2417 } 2418 2419 /* 2420 * liveness analysis: conditional branch: all temps are dead unless 2421 * explicitly live-across-conditional-branch, globals and local temps 2422 * should be synced. 2423 */ 2424 static void la_bb_sync(TCGContext *s, int ng, int nt) 2425 { 2426 la_global_sync(s, ng); 2427 2428 for (int i = ng; i < nt; ++i) { 2429 TCGTemp *ts = &s->temps[i]; 2430 int state; 2431 2432 switch (ts->kind) { 2433 case TEMP_LOCAL: 2434 state = ts->state; 2435 ts->state = state | TS_MEM; 2436 if (state != TS_DEAD) { 2437 continue; 2438 } 2439 break; 2440 case TEMP_NORMAL: 2441 s->temps[i].state = TS_DEAD; 2442 break; 2443 case TEMP_EBB: 2444 case TEMP_CONST: 2445 continue; 2446 default: 2447 g_assert_not_reached(); 2448 } 2449 la_reset_pref(&s->temps[i]); 2450 } 2451 } 2452 2453 /* liveness analysis: sync globals back to memory and kill. */ 2454 static void la_global_kill(TCGContext *s, int ng) 2455 { 2456 int i; 2457 2458 for (i = 0; i < ng; i++) { 2459 s->temps[i].state = TS_DEAD | TS_MEM; 2460 la_reset_pref(&s->temps[i]); 2461 } 2462 } 2463 2464 /* liveness analysis: note live globals crossing calls. */ 2465 static void la_cross_call(TCGContext *s, int nt) 2466 { 2467 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2468 int i; 2469 2470 for (i = 0; i < nt; i++) { 2471 TCGTemp *ts = &s->temps[i]; 2472 if (!(ts->state & TS_DEAD)) { 2473 TCGRegSet *pset = la_temp_pref(ts); 2474 TCGRegSet set = *pset; 2475 2476 set &= mask; 2477 /* If the combination is not possible, restart. */ 2478 if (set == 0) { 2479 set = tcg_target_available_regs[ts->type] & mask; 2480 } 2481 *pset = set; 2482 } 2483 } 2484 } 2485 2486 /* Liveness analysis : update the opc_arg_life array to tell if a 2487 given input arguments is dead. Instructions updating dead 2488 temporaries are removed. */ 2489 static void liveness_pass_1(TCGContext *s) 2490 { 2491 int nb_globals = s->nb_globals; 2492 int nb_temps = s->nb_temps; 2493 TCGOp *op, *op_prev; 2494 TCGRegSet *prefs; 2495 int i; 2496 2497 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2498 for (i = 0; i < nb_temps; ++i) { 2499 s->temps[i].state_ptr = prefs + i; 2500 } 2501 2502 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2503 la_func_end(s, nb_globals, nb_temps); 2504 2505 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2506 int nb_iargs, nb_oargs; 2507 TCGOpcode opc_new, opc_new2; 2508 bool have_opc_new2; 2509 TCGLifeData arg_life = 0; 2510 TCGTemp *ts; 2511 TCGOpcode opc = op->opc; 2512 const TCGOpDef *def = &tcg_op_defs[opc]; 2513 2514 switch (opc) { 2515 case INDEX_op_call: 2516 { 2517 int call_flags; 2518 int nb_call_regs; 2519 2520 nb_oargs = TCGOP_CALLO(op); 2521 nb_iargs = TCGOP_CALLI(op); 2522 call_flags = tcg_call_flags(op); 2523 2524 /* pure functions can be removed if their result is unused */ 2525 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2526 for (i = 0; i < nb_oargs; i++) { 2527 ts = arg_temp(op->args[i]); 2528 if (ts->state != TS_DEAD) { 2529 goto do_not_remove_call; 2530 } 2531 } 2532 goto do_remove; 2533 } 2534 do_not_remove_call: 2535 2536 /* Output args are dead. */ 2537 for (i = 0; i < nb_oargs; i++) { 2538 ts = arg_temp(op->args[i]); 2539 if (ts->state & TS_DEAD) { 2540 arg_life |= DEAD_ARG << i; 2541 } 2542 if (ts->state & TS_MEM) { 2543 arg_life |= SYNC_ARG << i; 2544 } 2545 ts->state = TS_DEAD; 2546 la_reset_pref(ts); 2547 2548 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ 2549 op->output_pref[i] = 0; 2550 } 2551 2552 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2553 TCG_CALL_NO_READ_GLOBALS))) { 2554 la_global_kill(s, nb_globals); 2555 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2556 la_global_sync(s, nb_globals); 2557 } 2558 2559 /* Record arguments that die in this helper. */ 2560 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2561 ts = arg_temp(op->args[i]); 2562 if (ts && ts->state & TS_DEAD) { 2563 arg_life |= DEAD_ARG << i; 2564 } 2565 } 2566 2567 /* For all live registers, remove call-clobbered prefs. */ 2568 la_cross_call(s, nb_temps); 2569 2570 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2571 2572 /* Input arguments are live for preceding opcodes. */ 2573 for (i = 0; i < nb_iargs; i++) { 2574 ts = arg_temp(op->args[i + nb_oargs]); 2575 if (ts && ts->state & TS_DEAD) { 2576 /* For those arguments that die, and will be allocated 2577 * in registers, clear the register set for that arg, 2578 * to be filled in below. For args that will be on 2579 * the stack, reset to any available reg. 2580 */ 2581 *la_temp_pref(ts) 2582 = (i < nb_call_regs ? 0 : 2583 tcg_target_available_regs[ts->type]); 2584 ts->state &= ~TS_DEAD; 2585 } 2586 } 2587 2588 /* For each input argument, add its input register to prefs. 2589 If a temp is used once, this produces a single set bit. */ 2590 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { 2591 ts = arg_temp(op->args[i + nb_oargs]); 2592 if (ts) { 2593 tcg_regset_set_reg(*la_temp_pref(ts), 2594 tcg_target_call_iarg_regs[i]); 2595 } 2596 } 2597 } 2598 break; 2599 case INDEX_op_insn_start: 2600 break; 2601 case INDEX_op_discard: 2602 /* mark the temporary as dead */ 2603 ts = arg_temp(op->args[0]); 2604 ts->state = TS_DEAD; 2605 la_reset_pref(ts); 2606 break; 2607 2608 case INDEX_op_add2_i32: 2609 opc_new = INDEX_op_add_i32; 2610 goto do_addsub2; 2611 case INDEX_op_sub2_i32: 2612 opc_new = INDEX_op_sub_i32; 2613 goto do_addsub2; 2614 case INDEX_op_add2_i64: 2615 opc_new = INDEX_op_add_i64; 2616 goto do_addsub2; 2617 case INDEX_op_sub2_i64: 2618 opc_new = INDEX_op_sub_i64; 2619 do_addsub2: 2620 nb_iargs = 4; 2621 nb_oargs = 2; 2622 /* Test if the high part of the operation is dead, but not 2623 the low part. The result can be optimized to a simple 2624 add or sub. This happens often for x86_64 guest when the 2625 cpu mode is set to 32 bit. */ 2626 if (arg_temp(op->args[1])->state == TS_DEAD) { 2627 if (arg_temp(op->args[0])->state == TS_DEAD) { 2628 goto do_remove; 2629 } 2630 /* Replace the opcode and adjust the args in place, 2631 leaving 3 unused args at the end. */ 2632 op->opc = opc = opc_new; 2633 op->args[1] = op->args[2]; 2634 op->args[2] = op->args[4]; 2635 /* Fall through and mark the single-word operation live. */ 2636 nb_iargs = 2; 2637 nb_oargs = 1; 2638 } 2639 goto do_not_remove; 2640 2641 case INDEX_op_mulu2_i32: 2642 opc_new = INDEX_op_mul_i32; 2643 opc_new2 = INDEX_op_muluh_i32; 2644 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2645 goto do_mul2; 2646 case INDEX_op_muls2_i32: 2647 opc_new = INDEX_op_mul_i32; 2648 opc_new2 = INDEX_op_mulsh_i32; 2649 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2650 goto do_mul2; 2651 case INDEX_op_mulu2_i64: 2652 opc_new = INDEX_op_mul_i64; 2653 opc_new2 = INDEX_op_muluh_i64; 2654 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2655 goto do_mul2; 2656 case INDEX_op_muls2_i64: 2657 opc_new = INDEX_op_mul_i64; 2658 opc_new2 = INDEX_op_mulsh_i64; 2659 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2660 goto do_mul2; 2661 do_mul2: 2662 nb_iargs = 2; 2663 nb_oargs = 2; 2664 if (arg_temp(op->args[1])->state == TS_DEAD) { 2665 if (arg_temp(op->args[0])->state == TS_DEAD) { 2666 /* Both parts of the operation are dead. */ 2667 goto do_remove; 2668 } 2669 /* The high part of the operation is dead; generate the low. */ 2670 op->opc = opc = opc_new; 2671 op->args[1] = op->args[2]; 2672 op->args[2] = op->args[3]; 2673 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2674 /* The low part of the operation is dead; generate the high. */ 2675 op->opc = opc = opc_new2; 2676 op->args[0] = op->args[1]; 2677 op->args[1] = op->args[2]; 2678 op->args[2] = op->args[3]; 2679 } else { 2680 goto do_not_remove; 2681 } 2682 /* Mark the single-word operation live. */ 2683 nb_oargs = 1; 2684 goto do_not_remove; 2685 2686 default: 2687 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2688 nb_iargs = def->nb_iargs; 2689 nb_oargs = def->nb_oargs; 2690 2691 /* Test if the operation can be removed because all 2692 its outputs are dead. We assume that nb_oargs == 0 2693 implies side effects */ 2694 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2695 for (i = 0; i < nb_oargs; i++) { 2696 if (arg_temp(op->args[i])->state != TS_DEAD) { 2697 goto do_not_remove; 2698 } 2699 } 2700 goto do_remove; 2701 } 2702 goto do_not_remove; 2703 2704 do_remove: 2705 tcg_op_remove(s, op); 2706 break; 2707 2708 do_not_remove: 2709 for (i = 0; i < nb_oargs; i++) { 2710 ts = arg_temp(op->args[i]); 2711 2712 /* Remember the preference of the uses that followed. */ 2713 op->output_pref[i] = *la_temp_pref(ts); 2714 2715 /* Output args are dead. */ 2716 if (ts->state & TS_DEAD) { 2717 arg_life |= DEAD_ARG << i; 2718 } 2719 if (ts->state & TS_MEM) { 2720 arg_life |= SYNC_ARG << i; 2721 } 2722 ts->state = TS_DEAD; 2723 la_reset_pref(ts); 2724 } 2725 2726 /* If end of basic block, update. */ 2727 if (def->flags & TCG_OPF_BB_EXIT) { 2728 la_func_end(s, nb_globals, nb_temps); 2729 } else if (def->flags & TCG_OPF_COND_BRANCH) { 2730 la_bb_sync(s, nb_globals, nb_temps); 2731 } else if (def->flags & TCG_OPF_BB_END) { 2732 la_bb_end(s, nb_globals, nb_temps); 2733 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2734 la_global_sync(s, nb_globals); 2735 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2736 la_cross_call(s, nb_temps); 2737 } 2738 } 2739 2740 /* Record arguments that die in this opcode. */ 2741 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2742 ts = arg_temp(op->args[i]); 2743 if (ts->state & TS_DEAD) { 2744 arg_life |= DEAD_ARG << i; 2745 } 2746 } 2747 2748 /* Input arguments are live for preceding opcodes. */ 2749 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2750 ts = arg_temp(op->args[i]); 2751 if (ts->state & TS_DEAD) { 2752 /* For operands that were dead, initially allow 2753 all regs for the type. */ 2754 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 2755 ts->state &= ~TS_DEAD; 2756 } 2757 } 2758 2759 /* Incorporate constraints for this operand. */ 2760 switch (opc) { 2761 case INDEX_op_mov_i32: 2762 case INDEX_op_mov_i64: 2763 /* Note that these are TCG_OPF_NOT_PRESENT and do not 2764 have proper constraints. That said, special case 2765 moves to propagate preferences backward. */ 2766 if (IS_DEAD_ARG(1)) { 2767 *la_temp_pref(arg_temp(op->args[0])) 2768 = *la_temp_pref(arg_temp(op->args[1])); 2769 } 2770 break; 2771 2772 default: 2773 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2774 const TCGArgConstraint *ct = &def->args_ct[i]; 2775 TCGRegSet set, *pset; 2776 2777 ts = arg_temp(op->args[i]); 2778 pset = la_temp_pref(ts); 2779 set = *pset; 2780 2781 set &= ct->regs; 2782 if (ct->ialias) { 2783 set &= op->output_pref[ct->alias_index]; 2784 } 2785 /* If the combination is not possible, restart. */ 2786 if (set == 0) { 2787 set = ct->regs; 2788 } 2789 *pset = set; 2790 } 2791 break; 2792 } 2793 break; 2794 } 2795 op->life = arg_life; 2796 } 2797 } 2798 2799 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 2800 static bool liveness_pass_2(TCGContext *s) 2801 { 2802 int nb_globals = s->nb_globals; 2803 int nb_temps, i; 2804 bool changes = false; 2805 TCGOp *op, *op_next; 2806 2807 /* Create a temporary for each indirect global. */ 2808 for (i = 0; i < nb_globals; ++i) { 2809 TCGTemp *its = &s->temps[i]; 2810 if (its->indirect_reg) { 2811 TCGTemp *dts = tcg_temp_alloc(s); 2812 dts->type = its->type; 2813 dts->base_type = its->base_type; 2814 dts->kind = TEMP_EBB; 2815 its->state_ptr = dts; 2816 } else { 2817 its->state_ptr = NULL; 2818 } 2819 /* All globals begin dead. */ 2820 its->state = TS_DEAD; 2821 } 2822 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 2823 TCGTemp *its = &s->temps[i]; 2824 its->state_ptr = NULL; 2825 its->state = TS_DEAD; 2826 } 2827 2828 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2829 TCGOpcode opc = op->opc; 2830 const TCGOpDef *def = &tcg_op_defs[opc]; 2831 TCGLifeData arg_life = op->life; 2832 int nb_iargs, nb_oargs, call_flags; 2833 TCGTemp *arg_ts, *dir_ts; 2834 2835 if (opc == INDEX_op_call) { 2836 nb_oargs = TCGOP_CALLO(op); 2837 nb_iargs = TCGOP_CALLI(op); 2838 call_flags = tcg_call_flags(op); 2839 } else { 2840 nb_iargs = def->nb_iargs; 2841 nb_oargs = def->nb_oargs; 2842 2843 /* Set flags similar to how calls require. */ 2844 if (def->flags & TCG_OPF_COND_BRANCH) { 2845 /* Like reading globals: sync_globals */ 2846 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2847 } else if (def->flags & TCG_OPF_BB_END) { 2848 /* Like writing globals: save_globals */ 2849 call_flags = 0; 2850 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2851 /* Like reading globals: sync_globals */ 2852 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2853 } else { 2854 /* No effect on globals. */ 2855 call_flags = (TCG_CALL_NO_READ_GLOBALS | 2856 TCG_CALL_NO_WRITE_GLOBALS); 2857 } 2858 } 2859 2860 /* Make sure that input arguments are available. */ 2861 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2862 arg_ts = arg_temp(op->args[i]); 2863 if (arg_ts) { 2864 dir_ts = arg_ts->state_ptr; 2865 if (dir_ts && arg_ts->state == TS_DEAD) { 2866 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 2867 ? INDEX_op_ld_i32 2868 : INDEX_op_ld_i64); 2869 TCGOp *lop = tcg_op_insert_before(s, op, lopc); 2870 2871 lop->args[0] = temp_arg(dir_ts); 2872 lop->args[1] = temp_arg(arg_ts->mem_base); 2873 lop->args[2] = arg_ts->mem_offset; 2874 2875 /* Loaded, but synced with memory. */ 2876 arg_ts->state = TS_MEM; 2877 } 2878 } 2879 } 2880 2881 /* Perform input replacement, and mark inputs that became dead. 2882 No action is required except keeping temp_state up to date 2883 so that we reload when needed. */ 2884 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2885 arg_ts = arg_temp(op->args[i]); 2886 if (arg_ts) { 2887 dir_ts = arg_ts->state_ptr; 2888 if (dir_ts) { 2889 op->args[i] = temp_arg(dir_ts); 2890 changes = true; 2891 if (IS_DEAD_ARG(i)) { 2892 arg_ts->state = TS_DEAD; 2893 } 2894 } 2895 } 2896 } 2897 2898 /* Liveness analysis should ensure that the following are 2899 all correct, for call sites and basic block end points. */ 2900 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 2901 /* Nothing to do */ 2902 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 2903 for (i = 0; i < nb_globals; ++i) { 2904 /* Liveness should see that globals are synced back, 2905 that is, either TS_DEAD or TS_MEM. */ 2906 arg_ts = &s->temps[i]; 2907 tcg_debug_assert(arg_ts->state_ptr == 0 2908 || arg_ts->state != 0); 2909 } 2910 } else { 2911 for (i = 0; i < nb_globals; ++i) { 2912 /* Liveness should see that globals are saved back, 2913 that is, TS_DEAD, waiting to be reloaded. */ 2914 arg_ts = &s->temps[i]; 2915 tcg_debug_assert(arg_ts->state_ptr == 0 2916 || arg_ts->state == TS_DEAD); 2917 } 2918 } 2919 2920 /* Outputs become available. */ 2921 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 2922 arg_ts = arg_temp(op->args[0]); 2923 dir_ts = arg_ts->state_ptr; 2924 if (dir_ts) { 2925 op->args[0] = temp_arg(dir_ts); 2926 changes = true; 2927 2928 /* The output is now live and modified. */ 2929 arg_ts->state = 0; 2930 2931 if (NEED_SYNC_ARG(0)) { 2932 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2933 ? INDEX_op_st_i32 2934 : INDEX_op_st_i64); 2935 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 2936 TCGTemp *out_ts = dir_ts; 2937 2938 if (IS_DEAD_ARG(0)) { 2939 out_ts = arg_temp(op->args[1]); 2940 arg_ts->state = TS_DEAD; 2941 tcg_op_remove(s, op); 2942 } else { 2943 arg_ts->state = TS_MEM; 2944 } 2945 2946 sop->args[0] = temp_arg(out_ts); 2947 sop->args[1] = temp_arg(arg_ts->mem_base); 2948 sop->args[2] = arg_ts->mem_offset; 2949 } else { 2950 tcg_debug_assert(!IS_DEAD_ARG(0)); 2951 } 2952 } 2953 } else { 2954 for (i = 0; i < nb_oargs; i++) { 2955 arg_ts = arg_temp(op->args[i]); 2956 dir_ts = arg_ts->state_ptr; 2957 if (!dir_ts) { 2958 continue; 2959 } 2960 op->args[i] = temp_arg(dir_ts); 2961 changes = true; 2962 2963 /* The output is now live and modified. */ 2964 arg_ts->state = 0; 2965 2966 /* Sync outputs upon their last write. */ 2967 if (NEED_SYNC_ARG(i)) { 2968 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2969 ? INDEX_op_st_i32 2970 : INDEX_op_st_i64); 2971 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 2972 2973 sop->args[0] = temp_arg(dir_ts); 2974 sop->args[1] = temp_arg(arg_ts->mem_base); 2975 sop->args[2] = arg_ts->mem_offset; 2976 2977 arg_ts->state = TS_MEM; 2978 } 2979 /* Drop outputs that are dead. */ 2980 if (IS_DEAD_ARG(i)) { 2981 arg_ts->state = TS_DEAD; 2982 } 2983 } 2984 } 2985 } 2986 2987 return changes; 2988 } 2989 2990 #ifdef CONFIG_DEBUG_TCG 2991 static void dump_regs(TCGContext *s) 2992 { 2993 TCGTemp *ts; 2994 int i; 2995 char buf[64]; 2996 2997 for(i = 0; i < s->nb_temps; i++) { 2998 ts = &s->temps[i]; 2999 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3000 switch(ts->val_type) { 3001 case TEMP_VAL_REG: 3002 printf("%s", tcg_target_reg_names[ts->reg]); 3003 break; 3004 case TEMP_VAL_MEM: 3005 printf("%d(%s)", (int)ts->mem_offset, 3006 tcg_target_reg_names[ts->mem_base->reg]); 3007 break; 3008 case TEMP_VAL_CONST: 3009 printf("$0x%" PRIx64, ts->val); 3010 break; 3011 case TEMP_VAL_DEAD: 3012 printf("D"); 3013 break; 3014 default: 3015 printf("???"); 3016 break; 3017 } 3018 printf("\n"); 3019 } 3020 3021 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 3022 if (s->reg_to_temp[i] != NULL) { 3023 printf("%s: %s\n", 3024 tcg_target_reg_names[i], 3025 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 3026 } 3027 } 3028 } 3029 3030 static void check_regs(TCGContext *s) 3031 { 3032 int reg; 3033 int k; 3034 TCGTemp *ts; 3035 char buf[64]; 3036 3037 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 3038 ts = s->reg_to_temp[reg]; 3039 if (ts != NULL) { 3040 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 3041 printf("Inconsistency for register %s:\n", 3042 tcg_target_reg_names[reg]); 3043 goto fail; 3044 } 3045 } 3046 } 3047 for (k = 0; k < s->nb_temps; k++) { 3048 ts = &s->temps[k]; 3049 if (ts->val_type == TEMP_VAL_REG 3050 && ts->kind != TEMP_FIXED 3051 && s->reg_to_temp[ts->reg] != ts) { 3052 printf("Inconsistency for temp %s:\n", 3053 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3054 fail: 3055 printf("reg state:\n"); 3056 dump_regs(s); 3057 tcg_abort(); 3058 } 3059 } 3060 } 3061 #endif 3062 3063 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3064 { 3065 intptr_t off, size, align; 3066 3067 switch (ts->type) { 3068 case TCG_TYPE_I32: 3069 size = align = 4; 3070 break; 3071 case TCG_TYPE_I64: 3072 case TCG_TYPE_V64: 3073 size = align = 8; 3074 break; 3075 case TCG_TYPE_V128: 3076 size = align = 16; 3077 break; 3078 case TCG_TYPE_V256: 3079 /* Note that we do not require aligned storage for V256. */ 3080 size = 32, align = 16; 3081 break; 3082 default: 3083 g_assert_not_reached(); 3084 } 3085 3086 /* 3087 * Assume the stack is sufficiently aligned. 3088 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3089 * and do not require 16 byte vector alignment. This seems slightly 3090 * easier than fully parameterizing the above switch statement. 3091 */ 3092 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3093 off = ROUND_UP(s->current_frame_offset, align); 3094 3095 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3096 if (off + size > s->frame_end) { 3097 tcg_raise_tb_overflow(s); 3098 } 3099 s->current_frame_offset = off + size; 3100 3101 ts->mem_offset = off; 3102 #if defined(__sparc__) 3103 ts->mem_offset += TCG_TARGET_STACK_BIAS; 3104 #endif 3105 ts->mem_base = s->frame_temp; 3106 ts->mem_allocated = 1; 3107 } 3108 3109 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3110 3111 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3112 mark it free; otherwise mark it dead. */ 3113 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3114 { 3115 TCGTempVal new_type; 3116 3117 switch (ts->kind) { 3118 case TEMP_FIXED: 3119 return; 3120 case TEMP_GLOBAL: 3121 case TEMP_LOCAL: 3122 new_type = TEMP_VAL_MEM; 3123 break; 3124 case TEMP_NORMAL: 3125 case TEMP_EBB: 3126 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3127 break; 3128 case TEMP_CONST: 3129 new_type = TEMP_VAL_CONST; 3130 break; 3131 default: 3132 g_assert_not_reached(); 3133 } 3134 if (ts->val_type == TEMP_VAL_REG) { 3135 s->reg_to_temp[ts->reg] = NULL; 3136 } 3137 ts->val_type = new_type; 3138 } 3139 3140 /* Mark a temporary as dead. */ 3141 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3142 { 3143 temp_free_or_dead(s, ts, 1); 3144 } 3145 3146 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3147 registers needs to be allocated to store a constant. If 'free_or_dead' 3148 is non-zero, subsequently release the temporary; if it is positive, the 3149 temp is dead; if it is negative, the temp is free. */ 3150 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3151 TCGRegSet preferred_regs, int free_or_dead) 3152 { 3153 if (!temp_readonly(ts) && !ts->mem_coherent) { 3154 if (!ts->mem_allocated) { 3155 temp_allocate_frame(s, ts); 3156 } 3157 switch (ts->val_type) { 3158 case TEMP_VAL_CONST: 3159 /* If we're going to free the temp immediately, then we won't 3160 require it later in a register, so attempt to store the 3161 constant to memory directly. */ 3162 if (free_or_dead 3163 && tcg_out_sti(s, ts->type, ts->val, 3164 ts->mem_base->reg, ts->mem_offset)) { 3165 break; 3166 } 3167 temp_load(s, ts, tcg_target_available_regs[ts->type], 3168 allocated_regs, preferred_regs); 3169 /* fallthrough */ 3170 3171 case TEMP_VAL_REG: 3172 tcg_out_st(s, ts->type, ts->reg, 3173 ts->mem_base->reg, ts->mem_offset); 3174 break; 3175 3176 case TEMP_VAL_MEM: 3177 break; 3178 3179 case TEMP_VAL_DEAD: 3180 default: 3181 tcg_abort(); 3182 } 3183 ts->mem_coherent = 1; 3184 } 3185 if (free_or_dead) { 3186 temp_free_or_dead(s, ts, free_or_dead); 3187 } 3188 } 3189 3190 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3191 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3192 { 3193 TCGTemp *ts = s->reg_to_temp[reg]; 3194 if (ts != NULL) { 3195 temp_sync(s, ts, allocated_regs, 0, -1); 3196 } 3197 } 3198 3199 /** 3200 * tcg_reg_alloc: 3201 * @required_regs: Set of registers in which we must allocate. 3202 * @allocated_regs: Set of registers which must be avoided. 3203 * @preferred_regs: Set of registers we should prefer. 3204 * @rev: True if we search the registers in "indirect" order. 3205 * 3206 * The allocated register must be in @required_regs & ~@allocated_regs, 3207 * but if we can put it in @preferred_regs we may save a move later. 3208 */ 3209 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3210 TCGRegSet allocated_regs, 3211 TCGRegSet preferred_regs, bool rev) 3212 { 3213 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3214 TCGRegSet reg_ct[2]; 3215 const int *order; 3216 3217 reg_ct[1] = required_regs & ~allocated_regs; 3218 tcg_debug_assert(reg_ct[1] != 0); 3219 reg_ct[0] = reg_ct[1] & preferred_regs; 3220 3221 /* Skip the preferred_regs option if it cannot be satisfied, 3222 or if the preference made no difference. */ 3223 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3224 3225 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3226 3227 /* Try free registers, preferences first. */ 3228 for (j = f; j < 2; j++) { 3229 TCGRegSet set = reg_ct[j]; 3230 3231 if (tcg_regset_single(set)) { 3232 /* One register in the set. */ 3233 TCGReg reg = tcg_regset_first(set); 3234 if (s->reg_to_temp[reg] == NULL) { 3235 return reg; 3236 } 3237 } else { 3238 for (i = 0; i < n; i++) { 3239 TCGReg reg = order[i]; 3240 if (s->reg_to_temp[reg] == NULL && 3241 tcg_regset_test_reg(set, reg)) { 3242 return reg; 3243 } 3244 } 3245 } 3246 } 3247 3248 /* We must spill something. */ 3249 for (j = f; j < 2; j++) { 3250 TCGRegSet set = reg_ct[j]; 3251 3252 if (tcg_regset_single(set)) { 3253 /* One register in the set. */ 3254 TCGReg reg = tcg_regset_first(set); 3255 tcg_reg_free(s, reg, allocated_regs); 3256 return reg; 3257 } else { 3258 for (i = 0; i < n; i++) { 3259 TCGReg reg = order[i]; 3260 if (tcg_regset_test_reg(set, reg)) { 3261 tcg_reg_free(s, reg, allocated_regs); 3262 return reg; 3263 } 3264 } 3265 } 3266 } 3267 3268 tcg_abort(); 3269 } 3270 3271 /* Make sure the temporary is in a register. If needed, allocate the register 3272 from DESIRED while avoiding ALLOCATED. */ 3273 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3274 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3275 { 3276 TCGReg reg; 3277 3278 switch (ts->val_type) { 3279 case TEMP_VAL_REG: 3280 return; 3281 case TEMP_VAL_CONST: 3282 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3283 preferred_regs, ts->indirect_base); 3284 if (ts->type <= TCG_TYPE_I64) { 3285 tcg_out_movi(s, ts->type, reg, ts->val); 3286 } else { 3287 uint64_t val = ts->val; 3288 MemOp vece = MO_64; 3289 3290 /* 3291 * Find the minimal vector element that matches the constant. 3292 * The targets will, in general, have to do this search anyway, 3293 * do this generically. 3294 */ 3295 if (val == dup_const(MO_8, val)) { 3296 vece = MO_8; 3297 } else if (val == dup_const(MO_16, val)) { 3298 vece = MO_16; 3299 } else if (val == dup_const(MO_32, val)) { 3300 vece = MO_32; 3301 } 3302 3303 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3304 } 3305 ts->mem_coherent = 0; 3306 break; 3307 case TEMP_VAL_MEM: 3308 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3309 preferred_regs, ts->indirect_base); 3310 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3311 ts->mem_coherent = 1; 3312 break; 3313 case TEMP_VAL_DEAD: 3314 default: 3315 tcg_abort(); 3316 } 3317 ts->reg = reg; 3318 ts->val_type = TEMP_VAL_REG; 3319 s->reg_to_temp[reg] = ts; 3320 } 3321 3322 /* Save a temporary to memory. 'allocated_regs' is used in case a 3323 temporary registers needs to be allocated to store a constant. */ 3324 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3325 { 3326 /* The liveness analysis already ensures that globals are back 3327 in memory. Keep an tcg_debug_assert for safety. */ 3328 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3329 } 3330 3331 /* save globals to their canonical location and assume they can be 3332 modified be the following code. 'allocated_regs' is used in case a 3333 temporary registers needs to be allocated to store a constant. */ 3334 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3335 { 3336 int i, n; 3337 3338 for (i = 0, n = s->nb_globals; i < n; i++) { 3339 temp_save(s, &s->temps[i], allocated_regs); 3340 } 3341 } 3342 3343 /* sync globals to their canonical location and assume they can be 3344 read by the following code. 'allocated_regs' is used in case a 3345 temporary registers needs to be allocated to store a constant. */ 3346 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3347 { 3348 int i, n; 3349 3350 for (i = 0, n = s->nb_globals; i < n; i++) { 3351 TCGTemp *ts = &s->temps[i]; 3352 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3353 || ts->kind == TEMP_FIXED 3354 || ts->mem_coherent); 3355 } 3356 } 3357 3358 /* at the end of a basic block, we assume all temporaries are dead and 3359 all globals are stored at their canonical location. */ 3360 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3361 { 3362 int i; 3363 3364 for (i = s->nb_globals; i < s->nb_temps; i++) { 3365 TCGTemp *ts = &s->temps[i]; 3366 3367 switch (ts->kind) { 3368 case TEMP_LOCAL: 3369 temp_save(s, ts, allocated_regs); 3370 break; 3371 case TEMP_NORMAL: 3372 case TEMP_EBB: 3373 /* The liveness analysis already ensures that temps are dead. 3374 Keep an tcg_debug_assert for safety. */ 3375 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3376 break; 3377 case TEMP_CONST: 3378 /* Similarly, we should have freed any allocated register. */ 3379 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3380 break; 3381 default: 3382 g_assert_not_reached(); 3383 } 3384 } 3385 3386 save_globals(s, allocated_regs); 3387 } 3388 3389 /* 3390 * At a conditional branch, we assume all temporaries are dead unless 3391 * explicitly live-across-conditional-branch; all globals and local 3392 * temps are synced to their location. 3393 */ 3394 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3395 { 3396 sync_globals(s, allocated_regs); 3397 3398 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3399 TCGTemp *ts = &s->temps[i]; 3400 /* 3401 * The liveness analysis already ensures that temps are dead. 3402 * Keep tcg_debug_asserts for safety. 3403 */ 3404 switch (ts->kind) { 3405 case TEMP_LOCAL: 3406 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3407 break; 3408 case TEMP_NORMAL: 3409 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3410 break; 3411 case TEMP_EBB: 3412 case TEMP_CONST: 3413 break; 3414 default: 3415 g_assert_not_reached(); 3416 } 3417 } 3418 } 3419 3420 /* 3421 * Specialized code generation for INDEX_op_mov_* with a constant. 3422 */ 3423 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3424 tcg_target_ulong val, TCGLifeData arg_life, 3425 TCGRegSet preferred_regs) 3426 { 3427 /* ENV should not be modified. */ 3428 tcg_debug_assert(!temp_readonly(ots)); 3429 3430 /* The movi is not explicitly generated here. */ 3431 if (ots->val_type == TEMP_VAL_REG) { 3432 s->reg_to_temp[ots->reg] = NULL; 3433 } 3434 ots->val_type = TEMP_VAL_CONST; 3435 ots->val = val; 3436 ots->mem_coherent = 0; 3437 if (NEED_SYNC_ARG(0)) { 3438 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3439 } else if (IS_DEAD_ARG(0)) { 3440 temp_dead(s, ots); 3441 } 3442 } 3443 3444 /* 3445 * Specialized code generation for INDEX_op_mov_*. 3446 */ 3447 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3448 { 3449 const TCGLifeData arg_life = op->life; 3450 TCGRegSet allocated_regs, preferred_regs; 3451 TCGTemp *ts, *ots; 3452 TCGType otype, itype; 3453 3454 allocated_regs = s->reserved_regs; 3455 preferred_regs = op->output_pref[0]; 3456 ots = arg_temp(op->args[0]); 3457 ts = arg_temp(op->args[1]); 3458 3459 /* ENV should not be modified. */ 3460 tcg_debug_assert(!temp_readonly(ots)); 3461 3462 /* Note that otype != itype for no-op truncation. */ 3463 otype = ots->type; 3464 itype = ts->type; 3465 3466 if (ts->val_type == TEMP_VAL_CONST) { 3467 /* propagate constant or generate sti */ 3468 tcg_target_ulong val = ts->val; 3469 if (IS_DEAD_ARG(1)) { 3470 temp_dead(s, ts); 3471 } 3472 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3473 return; 3474 } 3475 3476 /* If the source value is in memory we're going to be forced 3477 to have it in a register in order to perform the copy. Copy 3478 the SOURCE value into its own register first, that way we 3479 don't have to reload SOURCE the next time it is used. */ 3480 if (ts->val_type == TEMP_VAL_MEM) { 3481 temp_load(s, ts, tcg_target_available_regs[itype], 3482 allocated_regs, preferred_regs); 3483 } 3484 3485 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3486 if (IS_DEAD_ARG(0)) { 3487 /* mov to a non-saved dead register makes no sense (even with 3488 liveness analysis disabled). */ 3489 tcg_debug_assert(NEED_SYNC_ARG(0)); 3490 if (!ots->mem_allocated) { 3491 temp_allocate_frame(s, ots); 3492 } 3493 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 3494 if (IS_DEAD_ARG(1)) { 3495 temp_dead(s, ts); 3496 } 3497 temp_dead(s, ots); 3498 } else { 3499 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 3500 /* the mov can be suppressed */ 3501 if (ots->val_type == TEMP_VAL_REG) { 3502 s->reg_to_temp[ots->reg] = NULL; 3503 } 3504 ots->reg = ts->reg; 3505 temp_dead(s, ts); 3506 } else { 3507 if (ots->val_type != TEMP_VAL_REG) { 3508 /* When allocating a new register, make sure to not spill the 3509 input one. */ 3510 tcg_regset_set_reg(allocated_regs, ts->reg); 3511 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3512 allocated_regs, preferred_regs, 3513 ots->indirect_base); 3514 } 3515 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { 3516 /* 3517 * Cross register class move not supported. 3518 * Store the source register into the destination slot 3519 * and leave the destination temp as TEMP_VAL_MEM. 3520 */ 3521 assert(!temp_readonly(ots)); 3522 if (!ts->mem_allocated) { 3523 temp_allocate_frame(s, ots); 3524 } 3525 tcg_out_st(s, ts->type, ts->reg, 3526 ots->mem_base->reg, ots->mem_offset); 3527 ots->mem_coherent = 1; 3528 temp_free_or_dead(s, ots, -1); 3529 return; 3530 } 3531 } 3532 ots->val_type = TEMP_VAL_REG; 3533 ots->mem_coherent = 0; 3534 s->reg_to_temp[ots->reg] = ots; 3535 if (NEED_SYNC_ARG(0)) { 3536 temp_sync(s, ots, allocated_regs, 0, 0); 3537 } 3538 } 3539 } 3540 3541 /* 3542 * Specialized code generation for INDEX_op_dup_vec. 3543 */ 3544 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 3545 { 3546 const TCGLifeData arg_life = op->life; 3547 TCGRegSet dup_out_regs, dup_in_regs; 3548 TCGTemp *its, *ots; 3549 TCGType itype, vtype; 3550 intptr_t endian_fixup; 3551 unsigned vece; 3552 bool ok; 3553 3554 ots = arg_temp(op->args[0]); 3555 its = arg_temp(op->args[1]); 3556 3557 /* ENV should not be modified. */ 3558 tcg_debug_assert(!temp_readonly(ots)); 3559 3560 itype = its->type; 3561 vece = TCGOP_VECE(op); 3562 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3563 3564 if (its->val_type == TEMP_VAL_CONST) { 3565 /* Propagate constant via movi -> dupi. */ 3566 tcg_target_ulong val = its->val; 3567 if (IS_DEAD_ARG(1)) { 3568 temp_dead(s, its); 3569 } 3570 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); 3571 return; 3572 } 3573 3574 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3575 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 3576 3577 /* Allocate the output register now. */ 3578 if (ots->val_type != TEMP_VAL_REG) { 3579 TCGRegSet allocated_regs = s->reserved_regs; 3580 3581 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 3582 /* Make sure to not spill the input register. */ 3583 tcg_regset_set_reg(allocated_regs, its->reg); 3584 } 3585 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3586 op->output_pref[0], ots->indirect_base); 3587 ots->val_type = TEMP_VAL_REG; 3588 ots->mem_coherent = 0; 3589 s->reg_to_temp[ots->reg] = ots; 3590 } 3591 3592 switch (its->val_type) { 3593 case TEMP_VAL_REG: 3594 /* 3595 * The dup constriaints must be broad, covering all possible VECE. 3596 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 3597 * to fail, indicating that extra moves are required for that case. 3598 */ 3599 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 3600 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 3601 goto done; 3602 } 3603 /* Try again from memory or a vector input register. */ 3604 } 3605 if (!its->mem_coherent) { 3606 /* 3607 * The input register is not synced, and so an extra store 3608 * would be required to use memory. Attempt an integer-vector 3609 * register move first. We do not have a TCGRegSet for this. 3610 */ 3611 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 3612 break; 3613 } 3614 /* Sync the temp back to its slot and load from there. */ 3615 temp_sync(s, its, s->reserved_regs, 0, 0); 3616 } 3617 /* fall through */ 3618 3619 case TEMP_VAL_MEM: 3620 #if HOST_BIG_ENDIAN 3621 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; 3622 endian_fixup -= 1 << vece; 3623 #else 3624 endian_fixup = 0; 3625 #endif 3626 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 3627 its->mem_offset + endian_fixup)) { 3628 goto done; 3629 } 3630 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 3631 break; 3632 3633 default: 3634 g_assert_not_reached(); 3635 } 3636 3637 /* We now have a vector input register, so dup must succeed. */ 3638 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 3639 tcg_debug_assert(ok); 3640 3641 done: 3642 if (IS_DEAD_ARG(1)) { 3643 temp_dead(s, its); 3644 } 3645 if (NEED_SYNC_ARG(0)) { 3646 temp_sync(s, ots, s->reserved_regs, 0, 0); 3647 } 3648 if (IS_DEAD_ARG(0)) { 3649 temp_dead(s, ots); 3650 } 3651 } 3652 3653 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 3654 { 3655 const TCGLifeData arg_life = op->life; 3656 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 3657 TCGRegSet i_allocated_regs; 3658 TCGRegSet o_allocated_regs; 3659 int i, k, nb_iargs, nb_oargs; 3660 TCGReg reg; 3661 TCGArg arg; 3662 const TCGArgConstraint *arg_ct; 3663 TCGTemp *ts; 3664 TCGArg new_args[TCG_MAX_OP_ARGS]; 3665 int const_args[TCG_MAX_OP_ARGS]; 3666 3667 nb_oargs = def->nb_oargs; 3668 nb_iargs = def->nb_iargs; 3669 3670 /* copy constants */ 3671 memcpy(new_args + nb_oargs + nb_iargs, 3672 op->args + nb_oargs + nb_iargs, 3673 sizeof(TCGArg) * def->nb_cargs); 3674 3675 i_allocated_regs = s->reserved_regs; 3676 o_allocated_regs = s->reserved_regs; 3677 3678 /* satisfy input constraints */ 3679 for (k = 0; k < nb_iargs; k++) { 3680 TCGRegSet i_preferred_regs, o_preferred_regs; 3681 3682 i = def->args_ct[nb_oargs + k].sort_index; 3683 arg = op->args[i]; 3684 arg_ct = &def->args_ct[i]; 3685 ts = arg_temp(arg); 3686 3687 if (ts->val_type == TEMP_VAL_CONST 3688 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 3689 /* constant is OK for instruction */ 3690 const_args[i] = 1; 3691 new_args[i] = ts->val; 3692 continue; 3693 } 3694 3695 i_preferred_regs = o_preferred_regs = 0; 3696 if (arg_ct->ialias) { 3697 o_preferred_regs = op->output_pref[arg_ct->alias_index]; 3698 3699 /* 3700 * If the input is readonly, then it cannot also be an 3701 * output and aliased to itself. If the input is not 3702 * dead after the instruction, we must allocate a new 3703 * register and move it. 3704 */ 3705 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 3706 goto allocate_in_reg; 3707 } 3708 3709 /* 3710 * Check if the current register has already been allocated 3711 * for another input aliased to an output. 3712 */ 3713 if (ts->val_type == TEMP_VAL_REG) { 3714 reg = ts->reg; 3715 for (int k2 = 0; k2 < k; k2++) { 3716 int i2 = def->args_ct[nb_oargs + k2].sort_index; 3717 if (def->args_ct[i2].ialias && reg == new_args[i2]) { 3718 goto allocate_in_reg; 3719 } 3720 } 3721 } 3722 i_preferred_regs = o_preferred_regs; 3723 } 3724 3725 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); 3726 reg = ts->reg; 3727 3728 if (!tcg_regset_test_reg(arg_ct->regs, reg)) { 3729 allocate_in_reg: 3730 /* 3731 * Allocate a new register matching the constraint 3732 * and move the temporary register into it. 3733 */ 3734 temp_load(s, ts, tcg_target_available_regs[ts->type], 3735 i_allocated_regs, 0); 3736 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, 3737 o_preferred_regs, ts->indirect_base); 3738 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 3739 /* 3740 * Cross register class move not supported. Sync the 3741 * temp back to its slot and load from there. 3742 */ 3743 temp_sync(s, ts, i_allocated_regs, 0, 0); 3744 tcg_out_ld(s, ts->type, reg, 3745 ts->mem_base->reg, ts->mem_offset); 3746 } 3747 } 3748 new_args[i] = reg; 3749 const_args[i] = 0; 3750 tcg_regset_set_reg(i_allocated_regs, reg); 3751 } 3752 3753 /* mark dead temporaries and free the associated registers */ 3754 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3755 if (IS_DEAD_ARG(i)) { 3756 temp_dead(s, arg_temp(op->args[i])); 3757 } 3758 } 3759 3760 if (def->flags & TCG_OPF_COND_BRANCH) { 3761 tcg_reg_alloc_cbranch(s, i_allocated_regs); 3762 } else if (def->flags & TCG_OPF_BB_END) { 3763 tcg_reg_alloc_bb_end(s, i_allocated_regs); 3764 } else { 3765 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3766 /* XXX: permit generic clobber register list ? */ 3767 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3768 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3769 tcg_reg_free(s, i, i_allocated_regs); 3770 } 3771 } 3772 } 3773 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3774 /* sync globals if the op has side effects and might trigger 3775 an exception. */ 3776 sync_globals(s, i_allocated_regs); 3777 } 3778 3779 /* satisfy the output constraints */ 3780 for(k = 0; k < nb_oargs; k++) { 3781 i = def->args_ct[k].sort_index; 3782 arg = op->args[i]; 3783 arg_ct = &def->args_ct[i]; 3784 ts = arg_temp(arg); 3785 3786 /* ENV should not be modified. */ 3787 tcg_debug_assert(!temp_readonly(ts)); 3788 3789 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 3790 reg = new_args[arg_ct->alias_index]; 3791 } else if (arg_ct->newreg) { 3792 reg = tcg_reg_alloc(s, arg_ct->regs, 3793 i_allocated_regs | o_allocated_regs, 3794 op->output_pref[k], ts->indirect_base); 3795 } else { 3796 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 3797 op->output_pref[k], ts->indirect_base); 3798 } 3799 tcg_regset_set_reg(o_allocated_regs, reg); 3800 if (ts->val_type == TEMP_VAL_REG) { 3801 s->reg_to_temp[ts->reg] = NULL; 3802 } 3803 ts->val_type = TEMP_VAL_REG; 3804 ts->reg = reg; 3805 /* 3806 * Temp value is modified, so the value kept in memory is 3807 * potentially not the same. 3808 */ 3809 ts->mem_coherent = 0; 3810 s->reg_to_temp[reg] = ts; 3811 new_args[i] = reg; 3812 } 3813 } 3814 3815 /* emit instruction */ 3816 if (def->flags & TCG_OPF_VECTOR) { 3817 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 3818 new_args, const_args); 3819 } else { 3820 tcg_out_op(s, op->opc, new_args, const_args); 3821 } 3822 3823 /* move the outputs in the correct register if needed */ 3824 for(i = 0; i < nb_oargs; i++) { 3825 ts = arg_temp(op->args[i]); 3826 3827 /* ENV should not be modified. */ 3828 tcg_debug_assert(!temp_readonly(ts)); 3829 3830 if (NEED_SYNC_ARG(i)) { 3831 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 3832 } else if (IS_DEAD_ARG(i)) { 3833 temp_dead(s, ts); 3834 } 3835 } 3836 } 3837 3838 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 3839 { 3840 const TCGLifeData arg_life = op->life; 3841 TCGTemp *ots, *itsl, *itsh; 3842 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3843 3844 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 3845 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 3846 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 3847 3848 ots = arg_temp(op->args[0]); 3849 itsl = arg_temp(op->args[1]); 3850 itsh = arg_temp(op->args[2]); 3851 3852 /* ENV should not be modified. */ 3853 tcg_debug_assert(!temp_readonly(ots)); 3854 3855 /* Allocate the output register now. */ 3856 if (ots->val_type != TEMP_VAL_REG) { 3857 TCGRegSet allocated_regs = s->reserved_regs; 3858 TCGRegSet dup_out_regs = 3859 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3860 3861 /* Make sure to not spill the input registers. */ 3862 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 3863 tcg_regset_set_reg(allocated_regs, itsl->reg); 3864 } 3865 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 3866 tcg_regset_set_reg(allocated_regs, itsh->reg); 3867 } 3868 3869 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3870 op->output_pref[0], ots->indirect_base); 3871 ots->val_type = TEMP_VAL_REG; 3872 ots->mem_coherent = 0; 3873 s->reg_to_temp[ots->reg] = ots; 3874 } 3875 3876 /* Promote dup2 of immediates to dupi_vec. */ 3877 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 3878 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 3879 MemOp vece = MO_64; 3880 3881 if (val == dup_const(MO_8, val)) { 3882 vece = MO_8; 3883 } else if (val == dup_const(MO_16, val)) { 3884 vece = MO_16; 3885 } else if (val == dup_const(MO_32, val)) { 3886 vece = MO_32; 3887 } 3888 3889 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 3890 goto done; 3891 } 3892 3893 /* If the two inputs form one 64-bit value, try dupm_vec. */ 3894 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { 3895 if (!itsl->mem_coherent) { 3896 temp_sync(s, itsl, s->reserved_regs, 0, 0); 3897 } 3898 if (!itsh->mem_coherent) { 3899 temp_sync(s, itsh, s->reserved_regs, 0, 0); 3900 } 3901 #if HOST_BIG_ENDIAN 3902 TCGTemp *its = itsh; 3903 #else 3904 TCGTemp *its = itsl; 3905 #endif 3906 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 3907 its->mem_base->reg, its->mem_offset)) { 3908 goto done; 3909 } 3910 } 3911 3912 /* Fall back to generic expansion. */ 3913 return false; 3914 3915 done: 3916 if (IS_DEAD_ARG(1)) { 3917 temp_dead(s, itsl); 3918 } 3919 if (IS_DEAD_ARG(2)) { 3920 temp_dead(s, itsh); 3921 } 3922 if (NEED_SYNC_ARG(0)) { 3923 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 3924 } else if (IS_DEAD_ARG(0)) { 3925 temp_dead(s, ots); 3926 } 3927 return true; 3928 } 3929 3930 #ifdef TCG_TARGET_STACK_GROWSUP 3931 #define STACK_DIR(x) (-(x)) 3932 #else 3933 #define STACK_DIR(x) (x) 3934 #endif 3935 3936 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 3937 { 3938 const int nb_oargs = TCGOP_CALLO(op); 3939 const int nb_iargs = TCGOP_CALLI(op); 3940 const TCGLifeData arg_life = op->life; 3941 const TCGHelperInfo *info; 3942 int flags, nb_regs, i; 3943 TCGReg reg; 3944 TCGArg arg; 3945 TCGTemp *ts; 3946 intptr_t stack_offset; 3947 size_t call_stack_size; 3948 tcg_insn_unit *func_addr; 3949 int allocate_args; 3950 TCGRegSet allocated_regs; 3951 3952 func_addr = tcg_call_func(op); 3953 info = tcg_call_info(op); 3954 flags = info->flags; 3955 3956 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 3957 if (nb_regs > nb_iargs) { 3958 nb_regs = nb_iargs; 3959 } 3960 3961 /* assign stack slots first */ 3962 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 3963 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 3964 ~(TCG_TARGET_STACK_ALIGN - 1); 3965 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 3966 if (allocate_args) { 3967 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 3968 preallocate call stack */ 3969 tcg_abort(); 3970 } 3971 3972 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 3973 for (i = nb_regs; i < nb_iargs; i++) { 3974 arg = op->args[nb_oargs + i]; 3975 #ifdef TCG_TARGET_STACK_GROWSUP 3976 stack_offset -= sizeof(tcg_target_long); 3977 #endif 3978 if (arg != TCG_CALL_DUMMY_ARG) { 3979 ts = arg_temp(arg); 3980 temp_load(s, ts, tcg_target_available_regs[ts->type], 3981 s->reserved_regs, 0); 3982 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 3983 } 3984 #ifndef TCG_TARGET_STACK_GROWSUP 3985 stack_offset += sizeof(tcg_target_long); 3986 #endif 3987 } 3988 3989 /* assign input registers */ 3990 allocated_regs = s->reserved_regs; 3991 for (i = 0; i < nb_regs; i++) { 3992 arg = op->args[nb_oargs + i]; 3993 if (arg != TCG_CALL_DUMMY_ARG) { 3994 ts = arg_temp(arg); 3995 reg = tcg_target_call_iarg_regs[i]; 3996 3997 if (ts->val_type == TEMP_VAL_REG) { 3998 if (ts->reg != reg) { 3999 tcg_reg_free(s, reg, allocated_regs); 4000 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4001 /* 4002 * Cross register class move not supported. Sync the 4003 * temp back to its slot and load from there. 4004 */ 4005 temp_sync(s, ts, allocated_regs, 0, 0); 4006 tcg_out_ld(s, ts->type, reg, 4007 ts->mem_base->reg, ts->mem_offset); 4008 } 4009 } 4010 } else { 4011 TCGRegSet arg_set = 0; 4012 4013 tcg_reg_free(s, reg, allocated_regs); 4014 tcg_regset_set_reg(arg_set, reg); 4015 temp_load(s, ts, arg_set, allocated_regs, 0); 4016 } 4017 4018 tcg_regset_set_reg(allocated_regs, reg); 4019 } 4020 } 4021 4022 /* mark dead temporaries and free the associated registers */ 4023 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4024 if (IS_DEAD_ARG(i)) { 4025 temp_dead(s, arg_temp(op->args[i])); 4026 } 4027 } 4028 4029 /* clobber call registers */ 4030 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4031 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4032 tcg_reg_free(s, i, allocated_regs); 4033 } 4034 } 4035 4036 /* Save globals if they might be written by the helper, sync them if 4037 they might be read. */ 4038 if (flags & TCG_CALL_NO_READ_GLOBALS) { 4039 /* Nothing to do */ 4040 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 4041 sync_globals(s, allocated_regs); 4042 } else { 4043 save_globals(s, allocated_regs); 4044 } 4045 4046 #ifdef CONFIG_TCG_INTERPRETER 4047 { 4048 gpointer hash = (gpointer)(uintptr_t)info->typemask; 4049 ffi_cif *cif = g_hash_table_lookup(ffi_table, hash); 4050 assert(cif != NULL); 4051 tcg_out_call(s, func_addr, cif); 4052 } 4053 #else 4054 tcg_out_call(s, func_addr); 4055 #endif 4056 4057 /* assign output registers and emit moves if needed */ 4058 for(i = 0; i < nb_oargs; i++) { 4059 arg = op->args[i]; 4060 ts = arg_temp(arg); 4061 4062 /* ENV should not be modified. */ 4063 tcg_debug_assert(!temp_readonly(ts)); 4064 4065 reg = tcg_target_call_oarg_regs[i]; 4066 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4067 if (ts->val_type == TEMP_VAL_REG) { 4068 s->reg_to_temp[ts->reg] = NULL; 4069 } 4070 ts->val_type = TEMP_VAL_REG; 4071 ts->reg = reg; 4072 ts->mem_coherent = 0; 4073 s->reg_to_temp[reg] = ts; 4074 if (NEED_SYNC_ARG(i)) { 4075 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); 4076 } else if (IS_DEAD_ARG(i)) { 4077 temp_dead(s, ts); 4078 } 4079 } 4080 } 4081 4082 #ifdef CONFIG_PROFILER 4083 4084 /* avoid copy/paste errors */ 4085 #define PROF_ADD(to, from, field) \ 4086 do { \ 4087 (to)->field += qatomic_read(&((from)->field)); \ 4088 } while (0) 4089 4090 #define PROF_MAX(to, from, field) \ 4091 do { \ 4092 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4093 if (val__ > (to)->field) { \ 4094 (to)->field = val__; \ 4095 } \ 4096 } while (0) 4097 4098 /* Pass in a zero'ed @prof */ 4099 static inline 4100 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4101 { 4102 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4103 unsigned int i; 4104 4105 for (i = 0; i < n_ctxs; i++) { 4106 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4107 const TCGProfile *orig = &s->prof; 4108 4109 if (counters) { 4110 PROF_ADD(prof, orig, cpu_exec_time); 4111 PROF_ADD(prof, orig, tb_count1); 4112 PROF_ADD(prof, orig, tb_count); 4113 PROF_ADD(prof, orig, op_count); 4114 PROF_MAX(prof, orig, op_count_max); 4115 PROF_ADD(prof, orig, temp_count); 4116 PROF_MAX(prof, orig, temp_count_max); 4117 PROF_ADD(prof, orig, del_op_count); 4118 PROF_ADD(prof, orig, code_in_len); 4119 PROF_ADD(prof, orig, code_out_len); 4120 PROF_ADD(prof, orig, search_out_len); 4121 PROF_ADD(prof, orig, interm_time); 4122 PROF_ADD(prof, orig, code_time); 4123 PROF_ADD(prof, orig, la_time); 4124 PROF_ADD(prof, orig, opt_time); 4125 PROF_ADD(prof, orig, restore_count); 4126 PROF_ADD(prof, orig, restore_time); 4127 } 4128 if (table) { 4129 int i; 4130 4131 for (i = 0; i < NB_OPS; i++) { 4132 PROF_ADD(prof, orig, table_op_count[i]); 4133 } 4134 } 4135 } 4136 } 4137 4138 #undef PROF_ADD 4139 #undef PROF_MAX 4140 4141 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4142 { 4143 tcg_profile_snapshot(prof, true, false); 4144 } 4145 4146 static void tcg_profile_snapshot_table(TCGProfile *prof) 4147 { 4148 tcg_profile_snapshot(prof, false, true); 4149 } 4150 4151 void tcg_dump_op_count(GString *buf) 4152 { 4153 TCGProfile prof = {}; 4154 int i; 4155 4156 tcg_profile_snapshot_table(&prof); 4157 for (i = 0; i < NB_OPS; i++) { 4158 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 4159 prof.table_op_count[i]); 4160 } 4161 } 4162 4163 int64_t tcg_cpu_exec_time(void) 4164 { 4165 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4166 unsigned int i; 4167 int64_t ret = 0; 4168 4169 for (i = 0; i < n_ctxs; i++) { 4170 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4171 const TCGProfile *prof = &s->prof; 4172 4173 ret += qatomic_read(&prof->cpu_exec_time); 4174 } 4175 return ret; 4176 } 4177 #else 4178 void tcg_dump_op_count(GString *buf) 4179 { 4180 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4181 } 4182 4183 int64_t tcg_cpu_exec_time(void) 4184 { 4185 error_report("%s: TCG profiler not compiled", __func__); 4186 exit(EXIT_FAILURE); 4187 } 4188 #endif 4189 4190 4191 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) 4192 { 4193 #ifdef CONFIG_PROFILER 4194 TCGProfile *prof = &s->prof; 4195 #endif 4196 int i, num_insns; 4197 TCGOp *op; 4198 4199 #ifdef CONFIG_PROFILER 4200 { 4201 int n = 0; 4202 4203 QTAILQ_FOREACH(op, &s->ops, link) { 4204 n++; 4205 } 4206 qatomic_set(&prof->op_count, prof->op_count + n); 4207 if (n > prof->op_count_max) { 4208 qatomic_set(&prof->op_count_max, n); 4209 } 4210 4211 n = s->nb_temps; 4212 qatomic_set(&prof->temp_count, prof->temp_count + n); 4213 if (n > prof->temp_count_max) { 4214 qatomic_set(&prof->temp_count_max, n); 4215 } 4216 } 4217 #endif 4218 4219 #ifdef DEBUG_DISAS 4220 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4221 && qemu_log_in_addr_range(pc_start))) { 4222 FILE *logfile = qemu_log_trylock(); 4223 if (logfile) { 4224 fprintf(logfile, "OP:\n"); 4225 tcg_dump_ops(s, logfile, false); 4226 fprintf(logfile, "\n"); 4227 qemu_log_unlock(logfile); 4228 } 4229 } 4230 #endif 4231 4232 #ifdef CONFIG_DEBUG_TCG 4233 /* Ensure all labels referenced have been emitted. */ 4234 { 4235 TCGLabel *l; 4236 bool error = false; 4237 4238 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4239 if (unlikely(!l->present) && l->refs) { 4240 qemu_log_mask(CPU_LOG_TB_OP, 4241 "$L%d referenced but not present.\n", l->id); 4242 error = true; 4243 } 4244 } 4245 assert(!error); 4246 } 4247 #endif 4248 4249 #ifdef CONFIG_PROFILER 4250 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4251 #endif 4252 4253 #ifdef USE_TCG_OPTIMIZATIONS 4254 tcg_optimize(s); 4255 #endif 4256 4257 #ifdef CONFIG_PROFILER 4258 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4259 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4260 #endif 4261 4262 reachable_code_pass(s); 4263 liveness_pass_1(s); 4264 4265 if (s->nb_indirects > 0) { 4266 #ifdef DEBUG_DISAS 4267 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4268 && qemu_log_in_addr_range(pc_start))) { 4269 FILE *logfile = qemu_log_trylock(); 4270 if (logfile) { 4271 fprintf(logfile, "OP before indirect lowering:\n"); 4272 tcg_dump_ops(s, logfile, false); 4273 fprintf(logfile, "\n"); 4274 qemu_log_unlock(logfile); 4275 } 4276 } 4277 #endif 4278 /* Replace indirect temps with direct temps. */ 4279 if (liveness_pass_2(s)) { 4280 /* If changes were made, re-run liveness. */ 4281 liveness_pass_1(s); 4282 } 4283 } 4284 4285 #ifdef CONFIG_PROFILER 4286 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4287 #endif 4288 4289 #ifdef DEBUG_DISAS 4290 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4291 && qemu_log_in_addr_range(pc_start))) { 4292 FILE *logfile = qemu_log_trylock(); 4293 if (logfile) { 4294 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 4295 tcg_dump_ops(s, logfile, true); 4296 fprintf(logfile, "\n"); 4297 qemu_log_unlock(logfile); 4298 } 4299 } 4300 #endif 4301 4302 tcg_reg_alloc_start(s); 4303 4304 /* 4305 * Reset the buffer pointers when restarting after overflow. 4306 * TODO: Move this into translate-all.c with the rest of the 4307 * buffer management. Having only this done here is confusing. 4308 */ 4309 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 4310 s->code_ptr = s->code_buf; 4311 4312 #ifdef TCG_TARGET_NEED_LDST_LABELS 4313 QSIMPLEQ_INIT(&s->ldst_labels); 4314 #endif 4315 #ifdef TCG_TARGET_NEED_POOL_LABELS 4316 s->pool_labels = NULL; 4317 #endif 4318 4319 num_insns = -1; 4320 QTAILQ_FOREACH(op, &s->ops, link) { 4321 TCGOpcode opc = op->opc; 4322 4323 #ifdef CONFIG_PROFILER 4324 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 4325 #endif 4326 4327 switch (opc) { 4328 case INDEX_op_mov_i32: 4329 case INDEX_op_mov_i64: 4330 case INDEX_op_mov_vec: 4331 tcg_reg_alloc_mov(s, op); 4332 break; 4333 case INDEX_op_dup_vec: 4334 tcg_reg_alloc_dup(s, op); 4335 break; 4336 case INDEX_op_insn_start: 4337 if (num_insns >= 0) { 4338 size_t off = tcg_current_code_size(s); 4339 s->gen_insn_end_off[num_insns] = off; 4340 /* Assert that we do not overflow our stored offset. */ 4341 assert(s->gen_insn_end_off[num_insns] == off); 4342 } 4343 num_insns++; 4344 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 4345 target_ulong a; 4346 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 4347 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 4348 #else 4349 a = op->args[i]; 4350 #endif 4351 s->gen_insn_data[num_insns][i] = a; 4352 } 4353 break; 4354 case INDEX_op_discard: 4355 temp_dead(s, arg_temp(op->args[0])); 4356 break; 4357 case INDEX_op_set_label: 4358 tcg_reg_alloc_bb_end(s, s->reserved_regs); 4359 tcg_out_label(s, arg_label(op->args[0])); 4360 break; 4361 case INDEX_op_call: 4362 tcg_reg_alloc_call(s, op); 4363 break; 4364 case INDEX_op_dup2_vec: 4365 if (tcg_reg_alloc_dup2(s, op)) { 4366 break; 4367 } 4368 /* fall through */ 4369 default: 4370 /* Sanity check that we've not introduced any unhandled opcodes. */ 4371 tcg_debug_assert(tcg_op_supported(opc)); 4372 /* Note: in order to speed up the code, it would be much 4373 faster to have specialized register allocator functions for 4374 some common argument patterns */ 4375 tcg_reg_alloc_op(s, op); 4376 break; 4377 } 4378 #ifdef CONFIG_DEBUG_TCG 4379 check_regs(s); 4380 #endif 4381 /* Test for (pending) buffer overflow. The assumption is that any 4382 one operation beginning below the high water mark cannot overrun 4383 the buffer completely. Thus we can test for overflow after 4384 generating code without having to check during generation. */ 4385 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 4386 return -1; 4387 } 4388 /* Test for TB overflow, as seen by gen_insn_end_off. */ 4389 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 4390 return -2; 4391 } 4392 } 4393 tcg_debug_assert(num_insns >= 0); 4394 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 4395 4396 /* Generate TB finalization at the end of block */ 4397 #ifdef TCG_TARGET_NEED_LDST_LABELS 4398 i = tcg_out_ldst_finalize(s); 4399 if (i < 0) { 4400 return i; 4401 } 4402 #endif 4403 #ifdef TCG_TARGET_NEED_POOL_LABELS 4404 i = tcg_out_pool_finalize(s); 4405 if (i < 0) { 4406 return i; 4407 } 4408 #endif 4409 if (!tcg_resolve_relocs(s)) { 4410 return -2; 4411 } 4412 4413 #ifndef CONFIG_TCG_INTERPRETER 4414 /* flush instruction cache */ 4415 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 4416 (uintptr_t)s->code_buf, 4417 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 4418 #endif 4419 4420 return tcg_current_code_size(s); 4421 } 4422 4423 #ifdef CONFIG_PROFILER 4424 void tcg_dump_info(GString *buf) 4425 { 4426 TCGProfile prof = {}; 4427 const TCGProfile *s; 4428 int64_t tb_count; 4429 int64_t tb_div_count; 4430 int64_t tot; 4431 4432 tcg_profile_snapshot_counters(&prof); 4433 s = &prof; 4434 tb_count = s->tb_count; 4435 tb_div_count = tb_count ? tb_count : 1; 4436 tot = s->interm_time + s->code_time; 4437 4438 g_string_append_printf(buf, "JIT cycles %" PRId64 4439 " (%0.3f s at 2.4 GHz)\n", 4440 tot, tot / 2.4e9); 4441 g_string_append_printf(buf, "translated TBs %" PRId64 4442 " (aborted=%" PRId64 " %0.1f%%)\n", 4443 tb_count, s->tb_count1 - tb_count, 4444 (double)(s->tb_count1 - s->tb_count) 4445 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 4446 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 4447 (double)s->op_count / tb_div_count, s->op_count_max); 4448 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 4449 (double)s->del_op_count / tb_div_count); 4450 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 4451 (double)s->temp_count / tb_div_count, 4452 s->temp_count_max); 4453 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 4454 (double)s->code_out_len / tb_div_count); 4455 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 4456 (double)s->search_out_len / tb_div_count); 4457 4458 g_string_append_printf(buf, "cycles/op %0.1f\n", 4459 s->op_count ? (double)tot / s->op_count : 0); 4460 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 4461 s->code_in_len ? (double)tot / s->code_in_len : 0); 4462 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 4463 s->code_out_len ? (double)tot / s->code_out_len : 0); 4464 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 4465 s->search_out_len ? 4466 (double)tot / s->search_out_len : 0); 4467 if (tot == 0) { 4468 tot = 1; 4469 } 4470 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 4471 (double)s->interm_time / tot * 100.0); 4472 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 4473 (double)s->code_time / tot * 100.0); 4474 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 4475 (double)s->opt_time / (s->code_time ? 4476 s->code_time : 1) 4477 * 100.0); 4478 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 4479 (double)s->la_time / (s->code_time ? 4480 s->code_time : 1) * 100.0); 4481 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 4482 s->restore_count); 4483 g_string_append_printf(buf, " avg cycles %0.1f\n", 4484 s->restore_count ? 4485 (double)s->restore_time / s->restore_count : 0); 4486 } 4487 #else 4488 void tcg_dump_info(GString *buf) 4489 { 4490 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4491 } 4492 #endif 4493 4494 #ifdef ELF_HOST_MACHINE 4495 /* In order to use this feature, the backend needs to do three things: 4496 4497 (1) Define ELF_HOST_MACHINE to indicate both what value to 4498 put into the ELF image and to indicate support for the feature. 4499 4500 (2) Define tcg_register_jit. This should create a buffer containing 4501 the contents of a .debug_frame section that describes the post- 4502 prologue unwind info for the tcg machine. 4503 4504 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 4505 */ 4506 4507 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 4508 typedef enum { 4509 JIT_NOACTION = 0, 4510 JIT_REGISTER_FN, 4511 JIT_UNREGISTER_FN 4512 } jit_actions_t; 4513 4514 struct jit_code_entry { 4515 struct jit_code_entry *next_entry; 4516 struct jit_code_entry *prev_entry; 4517 const void *symfile_addr; 4518 uint64_t symfile_size; 4519 }; 4520 4521 struct jit_descriptor { 4522 uint32_t version; 4523 uint32_t action_flag; 4524 struct jit_code_entry *relevant_entry; 4525 struct jit_code_entry *first_entry; 4526 }; 4527 4528 void __jit_debug_register_code(void) __attribute__((noinline)); 4529 void __jit_debug_register_code(void) 4530 { 4531 asm(""); 4532 } 4533 4534 /* Must statically initialize the version, because GDB may check 4535 the version before we can set it. */ 4536 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 4537 4538 /* End GDB interface. */ 4539 4540 static int find_string(const char *strtab, const char *str) 4541 { 4542 const char *p = strtab + 1; 4543 4544 while (1) { 4545 if (strcmp(p, str) == 0) { 4546 return p - strtab; 4547 } 4548 p += strlen(p) + 1; 4549 } 4550 } 4551 4552 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 4553 const void *debug_frame, 4554 size_t debug_frame_size) 4555 { 4556 struct __attribute__((packed)) DebugInfo { 4557 uint32_t len; 4558 uint16_t version; 4559 uint32_t abbrev; 4560 uint8_t ptr_size; 4561 uint8_t cu_die; 4562 uint16_t cu_lang; 4563 uintptr_t cu_low_pc; 4564 uintptr_t cu_high_pc; 4565 uint8_t fn_die; 4566 char fn_name[16]; 4567 uintptr_t fn_low_pc; 4568 uintptr_t fn_high_pc; 4569 uint8_t cu_eoc; 4570 }; 4571 4572 struct ElfImage { 4573 ElfW(Ehdr) ehdr; 4574 ElfW(Phdr) phdr; 4575 ElfW(Shdr) shdr[7]; 4576 ElfW(Sym) sym[2]; 4577 struct DebugInfo di; 4578 uint8_t da[24]; 4579 char str[80]; 4580 }; 4581 4582 struct ElfImage *img; 4583 4584 static const struct ElfImage img_template = { 4585 .ehdr = { 4586 .e_ident[EI_MAG0] = ELFMAG0, 4587 .e_ident[EI_MAG1] = ELFMAG1, 4588 .e_ident[EI_MAG2] = ELFMAG2, 4589 .e_ident[EI_MAG3] = ELFMAG3, 4590 .e_ident[EI_CLASS] = ELF_CLASS, 4591 .e_ident[EI_DATA] = ELF_DATA, 4592 .e_ident[EI_VERSION] = EV_CURRENT, 4593 .e_type = ET_EXEC, 4594 .e_machine = ELF_HOST_MACHINE, 4595 .e_version = EV_CURRENT, 4596 .e_phoff = offsetof(struct ElfImage, phdr), 4597 .e_shoff = offsetof(struct ElfImage, shdr), 4598 .e_ehsize = sizeof(ElfW(Shdr)), 4599 .e_phentsize = sizeof(ElfW(Phdr)), 4600 .e_phnum = 1, 4601 .e_shentsize = sizeof(ElfW(Shdr)), 4602 .e_shnum = ARRAY_SIZE(img->shdr), 4603 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 4604 #ifdef ELF_HOST_FLAGS 4605 .e_flags = ELF_HOST_FLAGS, 4606 #endif 4607 #ifdef ELF_OSABI 4608 .e_ident[EI_OSABI] = ELF_OSABI, 4609 #endif 4610 }, 4611 .phdr = { 4612 .p_type = PT_LOAD, 4613 .p_flags = PF_X, 4614 }, 4615 .shdr = { 4616 [0] = { .sh_type = SHT_NULL }, 4617 /* Trick: The contents of code_gen_buffer are not present in 4618 this fake ELF file; that got allocated elsewhere. Therefore 4619 we mark .text as SHT_NOBITS (similar to .bss) so that readers 4620 will not look for contents. We can record any address. */ 4621 [1] = { /* .text */ 4622 .sh_type = SHT_NOBITS, 4623 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 4624 }, 4625 [2] = { /* .debug_info */ 4626 .sh_type = SHT_PROGBITS, 4627 .sh_offset = offsetof(struct ElfImage, di), 4628 .sh_size = sizeof(struct DebugInfo), 4629 }, 4630 [3] = { /* .debug_abbrev */ 4631 .sh_type = SHT_PROGBITS, 4632 .sh_offset = offsetof(struct ElfImage, da), 4633 .sh_size = sizeof(img->da), 4634 }, 4635 [4] = { /* .debug_frame */ 4636 .sh_type = SHT_PROGBITS, 4637 .sh_offset = sizeof(struct ElfImage), 4638 }, 4639 [5] = { /* .symtab */ 4640 .sh_type = SHT_SYMTAB, 4641 .sh_offset = offsetof(struct ElfImage, sym), 4642 .sh_size = sizeof(img->sym), 4643 .sh_info = 1, 4644 .sh_link = ARRAY_SIZE(img->shdr) - 1, 4645 .sh_entsize = sizeof(ElfW(Sym)), 4646 }, 4647 [6] = { /* .strtab */ 4648 .sh_type = SHT_STRTAB, 4649 .sh_offset = offsetof(struct ElfImage, str), 4650 .sh_size = sizeof(img->str), 4651 } 4652 }, 4653 .sym = { 4654 [1] = { /* code_gen_buffer */ 4655 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 4656 .st_shndx = 1, 4657 } 4658 }, 4659 .di = { 4660 .len = sizeof(struct DebugInfo) - 4, 4661 .version = 2, 4662 .ptr_size = sizeof(void *), 4663 .cu_die = 1, 4664 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 4665 .fn_die = 2, 4666 .fn_name = "code_gen_buffer" 4667 }, 4668 .da = { 4669 1, /* abbrev number (the cu) */ 4670 0x11, 1, /* DW_TAG_compile_unit, has children */ 4671 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 4672 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4673 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4674 0, 0, /* end of abbrev */ 4675 2, /* abbrev number (the fn) */ 4676 0x2e, 0, /* DW_TAG_subprogram, no children */ 4677 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 4678 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4679 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4680 0, 0, /* end of abbrev */ 4681 0 /* no more abbrev */ 4682 }, 4683 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 4684 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 4685 }; 4686 4687 /* We only need a single jit entry; statically allocate it. */ 4688 static struct jit_code_entry one_entry; 4689 4690 uintptr_t buf = (uintptr_t)buf_ptr; 4691 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 4692 DebugFrameHeader *dfh; 4693 4694 img = g_malloc(img_size); 4695 *img = img_template; 4696 4697 img->phdr.p_vaddr = buf; 4698 img->phdr.p_paddr = buf; 4699 img->phdr.p_memsz = buf_size; 4700 4701 img->shdr[1].sh_name = find_string(img->str, ".text"); 4702 img->shdr[1].sh_addr = buf; 4703 img->shdr[1].sh_size = buf_size; 4704 4705 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 4706 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 4707 4708 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 4709 img->shdr[4].sh_size = debug_frame_size; 4710 4711 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 4712 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 4713 4714 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 4715 img->sym[1].st_value = buf; 4716 img->sym[1].st_size = buf_size; 4717 4718 img->di.cu_low_pc = buf; 4719 img->di.cu_high_pc = buf + buf_size; 4720 img->di.fn_low_pc = buf; 4721 img->di.fn_high_pc = buf + buf_size; 4722 4723 dfh = (DebugFrameHeader *)(img + 1); 4724 memcpy(dfh, debug_frame, debug_frame_size); 4725 dfh->fde.func_start = buf; 4726 dfh->fde.func_len = buf_size; 4727 4728 #ifdef DEBUG_JIT 4729 /* Enable this block to be able to debug the ELF image file creation. 4730 One can use readelf, objdump, or other inspection utilities. */ 4731 { 4732 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 4733 if (f) { 4734 if (fwrite(img, img_size, 1, f) != img_size) { 4735 /* Avoid stupid unused return value warning for fwrite. */ 4736 } 4737 fclose(f); 4738 } 4739 } 4740 #endif 4741 4742 one_entry.symfile_addr = img; 4743 one_entry.symfile_size = img_size; 4744 4745 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 4746 __jit_debug_descriptor.relevant_entry = &one_entry; 4747 __jit_debug_descriptor.first_entry = &one_entry; 4748 __jit_debug_register_code(); 4749 } 4750 #else 4751 /* No support for the feature. Provide the entry point expected by exec.c, 4752 and implement the internal function we declared earlier. */ 4753 4754 static void tcg_register_jit_int(const void *buf, size_t size, 4755 const void *debug_frame, 4756 size_t debug_frame_size) 4757 { 4758 } 4759 4760 void tcg_register_jit(const void *buf, size_t buf_size) 4761 { 4762 } 4763 #endif /* ELF_HOST_MACHINE */ 4764 4765 #if !TCG_TARGET_MAYBE_vec 4766 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 4767 { 4768 g_assert_not_reached(); 4769 } 4770 #endif 4771