1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/timer.h" 38 #include "qemu/cacheflush.h" 39 40 /* Note: the long term plan is to reduce the dependencies on the QEMU 41 CPU definitions. Currently they are used for qemu_ld/st 42 instructions */ 43 #define NO_CPU_IO_DEFS 44 #include "cpu.h" 45 46 #include "exec/exec-all.h" 47 48 #if !defined(CONFIG_USER_ONLY) 49 #include "hw/boards.h" 50 #endif 51 52 #include "tcg/tcg-op.h" 53 54 #if UINTPTR_MAX == UINT32_MAX 55 # define ELF_CLASS ELFCLASS32 56 #else 57 # define ELF_CLASS ELFCLASS64 58 #endif 59 #ifdef HOST_WORDS_BIGENDIAN 60 # define ELF_DATA ELFDATA2MSB 61 #else 62 # define ELF_DATA ELFDATA2LSB 63 #endif 64 65 #include "elf.h" 66 #include "exec/log.h" 67 #include "sysemu/sysemu.h" 68 69 /* Forward declarations for functions declared in tcg-target.c.inc and 70 used here. */ 71 static void tcg_target_init(TCGContext *s); 72 static void tcg_target_qemu_prologue(TCGContext *s); 73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 74 intptr_t value, intptr_t addend); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 static void tcg_register_jit_int(const void *buf, size_t size, 100 const void *debug_frame, 101 size_t debug_frame_size) 102 __attribute__((unused)); 103 104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 106 intptr_t arg2); 107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 108 static void tcg_out_movi(TCGContext *s, TCGType type, 109 TCGReg ret, tcg_target_long arg); 110 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, 111 const int *const_args); 112 #if TCG_TARGET_MAYBE_vec 113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 114 TCGReg dst, TCGReg src); 115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 116 TCGReg dst, TCGReg base, intptr_t offset); 117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 118 TCGReg dst, int64_t arg); 119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 120 unsigned vece, const TCGArg *args, 121 const int *const_args); 122 #else 123 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 124 TCGReg dst, TCGReg src) 125 { 126 g_assert_not_reached(); 127 } 128 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 129 TCGReg dst, TCGReg base, intptr_t offset) 130 { 131 g_assert_not_reached(); 132 } 133 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 134 TCGReg dst, int64_t arg) 135 { 136 g_assert_not_reached(); 137 } 138 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, 139 unsigned vece, const TCGArg *args, 140 const int *const_args) 141 { 142 g_assert_not_reached(); 143 } 144 #endif 145 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 146 intptr_t arg2); 147 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 148 TCGReg base, intptr_t ofs); 149 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); 150 static int tcg_target_const_match(tcg_target_long val, TCGType type, 151 const TCGArgConstraint *arg_ct); 152 #ifdef TCG_TARGET_NEED_LDST_LABELS 153 static int tcg_out_ldst_finalize(TCGContext *s); 154 #endif 155 156 #define TCG_HIGHWATER 1024 157 158 static TCGContext **tcg_ctxs; 159 static unsigned int n_tcg_ctxs; 160 TCGv_env cpu_env = 0; 161 const void *tcg_code_gen_epilogue; 162 uintptr_t tcg_splitwx_diff; 163 164 #ifndef CONFIG_TCG_INTERPRETER 165 tcg_prologue_fn *tcg_qemu_tb_exec; 166 #endif 167 168 struct tcg_region_tree { 169 QemuMutex lock; 170 GTree *tree; 171 /* padding to avoid false sharing is computed at run-time */ 172 }; 173 174 /* 175 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 176 * dynamically allocate from as demand dictates. Given appropriate region 177 * sizing, this minimizes flushes even when some TCG threads generate a lot 178 * more code than others. 179 */ 180 struct tcg_region_state { 181 QemuMutex lock; 182 183 /* fields set at init time */ 184 void *start; 185 void *start_aligned; 186 void *end; 187 size_t n; 188 size_t size; /* size of one region */ 189 size_t stride; /* .size + guard size */ 190 191 /* fields protected by the lock */ 192 size_t current; /* current region index */ 193 size_t agg_size_full; /* aggregate size of full regions */ 194 }; 195 196 static struct tcg_region_state region; 197 /* 198 * This is an array of struct tcg_region_tree's, with padding. 199 * We use void * to simplify the computation of region_trees[i]; each 200 * struct is found every tree_size bytes. 201 */ 202 static void *region_trees; 203 static size_t tree_size; 204 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 205 static TCGRegSet tcg_target_call_clobber_regs; 206 207 #if TCG_TARGET_INSN_UNIT_SIZE == 1 208 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 209 { 210 *s->code_ptr++ = v; 211 } 212 213 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 214 uint8_t v) 215 { 216 *p = v; 217 } 218 #endif 219 220 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 221 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 222 { 223 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 224 *s->code_ptr++ = v; 225 } else { 226 tcg_insn_unit *p = s->code_ptr; 227 memcpy(p, &v, sizeof(v)); 228 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 229 } 230 } 231 232 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 233 uint16_t v) 234 { 235 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 236 *p = v; 237 } else { 238 memcpy(p, &v, sizeof(v)); 239 } 240 } 241 #endif 242 243 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 244 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 245 { 246 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 247 *s->code_ptr++ = v; 248 } else { 249 tcg_insn_unit *p = s->code_ptr; 250 memcpy(p, &v, sizeof(v)); 251 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 252 } 253 } 254 255 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 256 uint32_t v) 257 { 258 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 259 *p = v; 260 } else { 261 memcpy(p, &v, sizeof(v)); 262 } 263 } 264 #endif 265 266 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 267 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 268 { 269 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 270 *s->code_ptr++ = v; 271 } else { 272 tcg_insn_unit *p = s->code_ptr; 273 memcpy(p, &v, sizeof(v)); 274 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 275 } 276 } 277 278 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 279 uint64_t v) 280 { 281 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 282 *p = v; 283 } else { 284 memcpy(p, &v, sizeof(v)); 285 } 286 } 287 #endif 288 289 /* label relocation processing */ 290 291 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 292 TCGLabel *l, intptr_t addend) 293 { 294 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 295 296 r->type = type; 297 r->ptr = code_ptr; 298 r->addend = addend; 299 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 300 } 301 302 static void tcg_out_label(TCGContext *s, TCGLabel *l) 303 { 304 tcg_debug_assert(!l->has_value); 305 l->has_value = 1; 306 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 307 } 308 309 TCGLabel *gen_new_label(void) 310 { 311 TCGContext *s = tcg_ctx; 312 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 313 314 memset(l, 0, sizeof(TCGLabel)); 315 l->id = s->nb_labels++; 316 QSIMPLEQ_INIT(&l->relocs); 317 318 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 319 320 return l; 321 } 322 323 static bool tcg_resolve_relocs(TCGContext *s) 324 { 325 TCGLabel *l; 326 327 QSIMPLEQ_FOREACH(l, &s->labels, next) { 328 TCGRelocation *r; 329 uintptr_t value = l->u.value; 330 331 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 332 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 333 return false; 334 } 335 } 336 } 337 return true; 338 } 339 340 static void set_jmp_reset_offset(TCGContext *s, int which) 341 { 342 /* 343 * We will check for overflow at the end of the opcode loop in 344 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 345 */ 346 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); 347 } 348 349 #define C_PFX1(P, A) P##A 350 #define C_PFX2(P, A, B) P##A##_##B 351 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 352 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 353 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 354 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 355 356 /* Define an enumeration for the various combinations. */ 357 358 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 359 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 360 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 361 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 362 363 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 364 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 365 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 366 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 367 368 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 369 370 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 371 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 372 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 373 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 374 375 typedef enum { 376 #include "tcg-target-con-set.h" 377 } TCGConstraintSetIndex; 378 379 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 380 381 #undef C_O0_I1 382 #undef C_O0_I2 383 #undef C_O0_I3 384 #undef C_O0_I4 385 #undef C_O1_I1 386 #undef C_O1_I2 387 #undef C_O1_I3 388 #undef C_O1_I4 389 #undef C_N1_I2 390 #undef C_O2_I1 391 #undef C_O2_I2 392 #undef C_O2_I3 393 #undef C_O2_I4 394 395 /* Put all of the constraint sets into an array, indexed by the enum. */ 396 397 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 398 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 399 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 400 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 401 402 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 403 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 404 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 405 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 406 407 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 408 409 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 410 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 411 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 412 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 413 414 static const TCGTargetOpDef constraint_sets[] = { 415 #include "tcg-target-con-set.h" 416 }; 417 418 419 #undef C_O0_I1 420 #undef C_O0_I2 421 #undef C_O0_I3 422 #undef C_O0_I4 423 #undef C_O1_I1 424 #undef C_O1_I2 425 #undef C_O1_I3 426 #undef C_O1_I4 427 #undef C_N1_I2 428 #undef C_O2_I1 429 #undef C_O2_I2 430 #undef C_O2_I3 431 #undef C_O2_I4 432 433 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 434 435 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 436 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 437 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 438 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 439 440 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 441 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 442 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 443 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 444 445 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 446 447 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 448 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 449 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 450 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 451 452 #include "tcg-target.c.inc" 453 454 /* compare a pointer @ptr and a tb_tc @s */ 455 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) 456 { 457 if (ptr >= s->ptr + s->size) { 458 return 1; 459 } else if (ptr < s->ptr) { 460 return -1; 461 } 462 return 0; 463 } 464 465 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp) 466 { 467 const struct tb_tc *a = ap; 468 const struct tb_tc *b = bp; 469 470 /* 471 * When both sizes are set, we know this isn't a lookup. 472 * This is the most likely case: every TB must be inserted; lookups 473 * are a lot less frequent. 474 */ 475 if (likely(a->size && b->size)) { 476 if (a->ptr > b->ptr) { 477 return 1; 478 } else if (a->ptr < b->ptr) { 479 return -1; 480 } 481 /* a->ptr == b->ptr should happen only on deletions */ 482 g_assert(a->size == b->size); 483 return 0; 484 } 485 /* 486 * All lookups have either .size field set to 0. 487 * From the glib sources we see that @ap is always the lookup key. However 488 * the docs provide no guarantee, so we just mark this case as likely. 489 */ 490 if (likely(a->size == 0)) { 491 return ptr_cmp_tb_tc(a->ptr, b); 492 } 493 return ptr_cmp_tb_tc(b->ptr, a); 494 } 495 496 static void tcg_region_trees_init(void) 497 { 498 size_t i; 499 500 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); 501 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); 502 for (i = 0; i < region.n; i++) { 503 struct tcg_region_tree *rt = region_trees + i * tree_size; 504 505 qemu_mutex_init(&rt->lock); 506 rt->tree = g_tree_new(tb_tc_cmp); 507 } 508 } 509 510 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp) 511 { 512 void *p = tcg_splitwx_to_rw(cp); 513 size_t region_idx; 514 515 if (p < region.start_aligned) { 516 region_idx = 0; 517 } else { 518 ptrdiff_t offset = p - region.start_aligned; 519 520 if (offset > region.stride * (region.n - 1)) { 521 region_idx = region.n - 1; 522 } else { 523 region_idx = offset / region.stride; 524 } 525 } 526 return region_trees + region_idx * tree_size; 527 } 528 529 void tcg_tb_insert(TranslationBlock *tb) 530 { 531 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 532 533 qemu_mutex_lock(&rt->lock); 534 g_tree_insert(rt->tree, &tb->tc, tb); 535 qemu_mutex_unlock(&rt->lock); 536 } 537 538 void tcg_tb_remove(TranslationBlock *tb) 539 { 540 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 541 542 qemu_mutex_lock(&rt->lock); 543 g_tree_remove(rt->tree, &tb->tc); 544 qemu_mutex_unlock(&rt->lock); 545 } 546 547 /* 548 * Find the TB 'tb' such that 549 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size 550 * Return NULL if not found. 551 */ 552 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) 553 { 554 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); 555 TranslationBlock *tb; 556 struct tb_tc s = { .ptr = (void *)tc_ptr }; 557 558 qemu_mutex_lock(&rt->lock); 559 tb = g_tree_lookup(rt->tree, &s); 560 qemu_mutex_unlock(&rt->lock); 561 return tb; 562 } 563 564 static void tcg_region_tree_lock_all(void) 565 { 566 size_t i; 567 568 for (i = 0; i < region.n; i++) { 569 struct tcg_region_tree *rt = region_trees + i * tree_size; 570 571 qemu_mutex_lock(&rt->lock); 572 } 573 } 574 575 static void tcg_region_tree_unlock_all(void) 576 { 577 size_t i; 578 579 for (i = 0; i < region.n; i++) { 580 struct tcg_region_tree *rt = region_trees + i * tree_size; 581 582 qemu_mutex_unlock(&rt->lock); 583 } 584 } 585 586 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) 587 { 588 size_t i; 589 590 tcg_region_tree_lock_all(); 591 for (i = 0; i < region.n; i++) { 592 struct tcg_region_tree *rt = region_trees + i * tree_size; 593 594 g_tree_foreach(rt->tree, func, user_data); 595 } 596 tcg_region_tree_unlock_all(); 597 } 598 599 size_t tcg_nb_tbs(void) 600 { 601 size_t nb_tbs = 0; 602 size_t i; 603 604 tcg_region_tree_lock_all(); 605 for (i = 0; i < region.n; i++) { 606 struct tcg_region_tree *rt = region_trees + i * tree_size; 607 608 nb_tbs += g_tree_nnodes(rt->tree); 609 } 610 tcg_region_tree_unlock_all(); 611 return nb_tbs; 612 } 613 614 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data) 615 { 616 TranslationBlock *tb = v; 617 618 tb_destroy(tb); 619 return FALSE; 620 } 621 622 static void tcg_region_tree_reset_all(void) 623 { 624 size_t i; 625 626 tcg_region_tree_lock_all(); 627 for (i = 0; i < region.n; i++) { 628 struct tcg_region_tree *rt = region_trees + i * tree_size; 629 630 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL); 631 /* Increment the refcount first so that destroy acts as a reset */ 632 g_tree_ref(rt->tree); 633 g_tree_destroy(rt->tree); 634 } 635 tcg_region_tree_unlock_all(); 636 } 637 638 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 639 { 640 void *start, *end; 641 642 start = region.start_aligned + curr_region * region.stride; 643 end = start + region.size; 644 645 if (curr_region == 0) { 646 start = region.start; 647 } 648 if (curr_region == region.n - 1) { 649 end = region.end; 650 } 651 652 *pstart = start; 653 *pend = end; 654 } 655 656 static void tcg_region_assign(TCGContext *s, size_t curr_region) 657 { 658 void *start, *end; 659 660 tcg_region_bounds(curr_region, &start, &end); 661 662 s->code_gen_buffer = start; 663 s->code_gen_ptr = start; 664 s->code_gen_buffer_size = end - start; 665 s->code_gen_highwater = end - TCG_HIGHWATER; 666 } 667 668 static bool tcg_region_alloc__locked(TCGContext *s) 669 { 670 if (region.current == region.n) { 671 return true; 672 } 673 tcg_region_assign(s, region.current); 674 region.current++; 675 return false; 676 } 677 678 /* 679 * Request a new region once the one in use has filled up. 680 * Returns true on error. 681 */ 682 static bool tcg_region_alloc(TCGContext *s) 683 { 684 bool err; 685 /* read the region size now; alloc__locked will overwrite it on success */ 686 size_t size_full = s->code_gen_buffer_size; 687 688 qemu_mutex_lock(®ion.lock); 689 err = tcg_region_alloc__locked(s); 690 if (!err) { 691 region.agg_size_full += size_full - TCG_HIGHWATER; 692 } 693 qemu_mutex_unlock(®ion.lock); 694 return err; 695 } 696 697 /* 698 * Perform a context's first region allocation. 699 * This function does _not_ increment region.agg_size_full. 700 */ 701 static inline bool tcg_region_initial_alloc__locked(TCGContext *s) 702 { 703 return tcg_region_alloc__locked(s); 704 } 705 706 /* Call from a safe-work context */ 707 void tcg_region_reset_all(void) 708 { 709 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 710 unsigned int i; 711 712 qemu_mutex_lock(®ion.lock); 713 region.current = 0; 714 region.agg_size_full = 0; 715 716 for (i = 0; i < n_ctxs; i++) { 717 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 718 bool err = tcg_region_initial_alloc__locked(s); 719 720 g_assert(!err); 721 } 722 qemu_mutex_unlock(®ion.lock); 723 724 tcg_region_tree_reset_all(); 725 } 726 727 #ifdef CONFIG_USER_ONLY 728 static size_t tcg_n_regions(void) 729 { 730 return 1; 731 } 732 #else 733 /* 734 * It is likely that some vCPUs will translate more code than others, so we 735 * first try to set more regions than max_cpus, with those regions being of 736 * reasonable size. If that's not possible we make do by evenly dividing 737 * the code_gen_buffer among the vCPUs. 738 */ 739 static size_t tcg_n_regions(void) 740 { 741 size_t i; 742 743 /* Use a single region if all we have is one vCPU thread */ 744 #if !defined(CONFIG_USER_ONLY) 745 MachineState *ms = MACHINE(qdev_get_machine()); 746 unsigned int max_cpus = ms->smp.max_cpus; 747 #endif 748 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 749 return 1; 750 } 751 752 /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 753 for (i = 8; i > 0; i--) { 754 size_t regions_per_thread = i; 755 size_t region_size; 756 757 region_size = tcg_init_ctx.code_gen_buffer_size; 758 region_size /= max_cpus * regions_per_thread; 759 760 if (region_size >= 2 * 1024u * 1024) { 761 return max_cpus * regions_per_thread; 762 } 763 } 764 /* If we can't, then just allocate one region per vCPU thread */ 765 return max_cpus; 766 } 767 #endif 768 769 /* 770 * Initializes region partitioning. 771 * 772 * Called at init time from the parent thread (i.e. the one calling 773 * tcg_context_init), after the target's TCG globals have been set. 774 * 775 * Region partitioning works by splitting code_gen_buffer into separate regions, 776 * and then assigning regions to TCG threads so that the threads can translate 777 * code in parallel without synchronization. 778 * 779 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 780 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 781 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 782 * must have been parsed before calling this function, since it calls 783 * qemu_tcg_mttcg_enabled(). 784 * 785 * In user-mode we use a single region. Having multiple regions in user-mode 786 * is not supported, because the number of vCPU threads (recall that each thread 787 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 788 * OS, and usually this number is huge (tens of thousands is not uncommon). 789 * Thus, given this large bound on the number of vCPU threads and the fact 790 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 791 * that the availability of at least one region per vCPU thread. 792 * 793 * However, this user-mode limitation is unlikely to be a significant problem 794 * in practice. Multi-threaded guests share most if not all of their translated 795 * code, which makes parallel code generation less appealing than in softmmu. 796 */ 797 void tcg_region_init(void) 798 { 799 void *buf = tcg_init_ctx.code_gen_buffer; 800 void *aligned; 801 size_t size = tcg_init_ctx.code_gen_buffer_size; 802 size_t page_size = qemu_real_host_page_size; 803 size_t region_size; 804 size_t n_regions; 805 size_t i; 806 uintptr_t splitwx_diff; 807 808 n_regions = tcg_n_regions(); 809 810 /* The first region will be 'aligned - buf' bytes larger than the others */ 811 aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 812 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 813 /* 814 * Make region_size a multiple of page_size, using aligned as the start. 815 * As a result of this we might end up with a few extra pages at the end of 816 * the buffer; we will assign those to the last region. 817 */ 818 region_size = (size - (aligned - buf)) / n_regions; 819 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 820 821 /* A region must have at least 2 pages; one code, one guard */ 822 g_assert(region_size >= 2 * page_size); 823 824 /* init the region struct */ 825 qemu_mutex_init(®ion.lock); 826 region.n = n_regions; 827 region.size = region_size - page_size; 828 region.stride = region_size; 829 region.start = buf; 830 region.start_aligned = aligned; 831 /* page-align the end, since its last page will be a guard page */ 832 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 833 /* account for that last guard page */ 834 region.end -= page_size; 835 836 /* set guard pages */ 837 splitwx_diff = tcg_splitwx_diff; 838 for (i = 0; i < region.n; i++) { 839 void *start, *end; 840 int rc; 841 842 tcg_region_bounds(i, &start, &end); 843 rc = qemu_mprotect_none(end, page_size); 844 g_assert(!rc); 845 if (splitwx_diff) { 846 rc = qemu_mprotect_none(end + splitwx_diff, page_size); 847 g_assert(!rc); 848 } 849 } 850 851 tcg_region_trees_init(); 852 853 /* In user-mode we support only one ctx, so do the initial allocation now */ 854 #ifdef CONFIG_USER_ONLY 855 { 856 bool err = tcg_region_initial_alloc__locked(tcg_ctx); 857 858 g_assert(!err); 859 } 860 #endif 861 } 862 863 #ifdef CONFIG_DEBUG_TCG 864 const void *tcg_splitwx_to_rx(void *rw) 865 { 866 /* Pass NULL pointers unchanged. */ 867 if (rw) { 868 g_assert(in_code_gen_buffer(rw)); 869 rw += tcg_splitwx_diff; 870 } 871 return rw; 872 } 873 874 void *tcg_splitwx_to_rw(const void *rx) 875 { 876 /* Pass NULL pointers unchanged. */ 877 if (rx) { 878 rx -= tcg_splitwx_diff; 879 /* Assert that we end with a pointer in the rw region. */ 880 g_assert(in_code_gen_buffer(rx)); 881 } 882 return (void *)rx; 883 } 884 #endif /* CONFIG_DEBUG_TCG */ 885 886 static void alloc_tcg_plugin_context(TCGContext *s) 887 { 888 #ifdef CONFIG_PLUGIN 889 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 890 s->plugin_tb->insns = 891 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 892 #endif 893 } 894 895 /* 896 * All TCG threads except the parent (i.e. the one that called tcg_context_init 897 * and registered the target's TCG globals) must register with this function 898 * before initiating translation. 899 * 900 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 901 * of tcg_region_init() for the reasoning behind this. 902 * 903 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 904 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 905 * is not used anymore for translation once this function is called. 906 * 907 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 908 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 909 */ 910 #ifdef CONFIG_USER_ONLY 911 void tcg_register_thread(void) 912 { 913 tcg_ctx = &tcg_init_ctx; 914 } 915 #else 916 void tcg_register_thread(void) 917 { 918 MachineState *ms = MACHINE(qdev_get_machine()); 919 TCGContext *s = g_malloc(sizeof(*s)); 920 unsigned int i, n; 921 bool err; 922 923 *s = tcg_init_ctx; 924 925 /* Relink mem_base. */ 926 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 927 if (tcg_init_ctx.temps[i].mem_base) { 928 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 929 tcg_debug_assert(b >= 0 && b < n); 930 s->temps[i].mem_base = &s->temps[b]; 931 } 932 } 933 934 /* Claim an entry in tcg_ctxs */ 935 n = qatomic_fetch_inc(&n_tcg_ctxs); 936 g_assert(n < ms->smp.max_cpus); 937 qatomic_set(&tcg_ctxs[n], s); 938 939 if (n > 0) { 940 alloc_tcg_plugin_context(s); 941 } 942 943 tcg_ctx = s; 944 qemu_mutex_lock(®ion.lock); 945 err = tcg_region_initial_alloc__locked(tcg_ctx); 946 g_assert(!err); 947 qemu_mutex_unlock(®ion.lock); 948 } 949 #endif /* !CONFIG_USER_ONLY */ 950 951 /* 952 * Returns the size (in bytes) of all translated code (i.e. from all regions) 953 * currently in the cache. 954 * See also: tcg_code_capacity() 955 * Do not confuse with tcg_current_code_size(); that one applies to a single 956 * TCG context. 957 */ 958 size_t tcg_code_size(void) 959 { 960 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 961 unsigned int i; 962 size_t total; 963 964 qemu_mutex_lock(®ion.lock); 965 total = region.agg_size_full; 966 for (i = 0; i < n_ctxs; i++) { 967 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 968 size_t size; 969 970 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 971 g_assert(size <= s->code_gen_buffer_size); 972 total += size; 973 } 974 qemu_mutex_unlock(®ion.lock); 975 return total; 976 } 977 978 /* 979 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 980 * regions. 981 * See also: tcg_code_size() 982 */ 983 size_t tcg_code_capacity(void) 984 { 985 size_t guard_size, capacity; 986 987 /* no need for synchronization; these variables are set at init time */ 988 guard_size = region.stride - region.size; 989 capacity = region.end + guard_size - region.start; 990 capacity -= region.n * (guard_size + TCG_HIGHWATER); 991 return capacity; 992 } 993 994 size_t tcg_tb_phys_invalidate_count(void) 995 { 996 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 997 unsigned int i; 998 size_t total = 0; 999 1000 for (i = 0; i < n_ctxs; i++) { 1001 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 1002 1003 total += qatomic_read(&s->tb_phys_invalidate_count); 1004 } 1005 return total; 1006 } 1007 1008 /* pool based memory allocation */ 1009 void *tcg_malloc_internal(TCGContext *s, int size) 1010 { 1011 TCGPool *p; 1012 int pool_size; 1013 1014 if (size > TCG_POOL_CHUNK_SIZE) { 1015 /* big malloc: insert a new pool (XXX: could optimize) */ 1016 p = g_malloc(sizeof(TCGPool) + size); 1017 p->size = size; 1018 p->next = s->pool_first_large; 1019 s->pool_first_large = p; 1020 return p->data; 1021 } else { 1022 p = s->pool_current; 1023 if (!p) { 1024 p = s->pool_first; 1025 if (!p) 1026 goto new_pool; 1027 } else { 1028 if (!p->next) { 1029 new_pool: 1030 pool_size = TCG_POOL_CHUNK_SIZE; 1031 p = g_malloc(sizeof(TCGPool) + pool_size); 1032 p->size = pool_size; 1033 p->next = NULL; 1034 if (s->pool_current) 1035 s->pool_current->next = p; 1036 else 1037 s->pool_first = p; 1038 } else { 1039 p = p->next; 1040 } 1041 } 1042 } 1043 s->pool_current = p; 1044 s->pool_cur = p->data + size; 1045 s->pool_end = p->data + p->size; 1046 return p->data; 1047 } 1048 1049 void tcg_pool_reset(TCGContext *s) 1050 { 1051 TCGPool *p, *t; 1052 for (p = s->pool_first_large; p; p = t) { 1053 t = p->next; 1054 g_free(p); 1055 } 1056 s->pool_first_large = NULL; 1057 s->pool_cur = s->pool_end = NULL; 1058 s->pool_current = NULL; 1059 } 1060 1061 typedef struct TCGHelperInfo { 1062 void *func; 1063 const char *name; 1064 unsigned flags; 1065 unsigned sizemask; 1066 } TCGHelperInfo; 1067 1068 #include "exec/helper-proto.h" 1069 1070 static const TCGHelperInfo all_helpers[] = { 1071 #include "exec/helper-tcg.h" 1072 }; 1073 static GHashTable *helper_table; 1074 1075 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1076 static void process_op_defs(TCGContext *s); 1077 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1078 TCGReg reg, const char *name); 1079 1080 void tcg_context_init(TCGContext *s) 1081 { 1082 int op, total_args, n, i; 1083 TCGOpDef *def; 1084 TCGArgConstraint *args_ct; 1085 TCGTemp *ts; 1086 1087 memset(s, 0, sizeof(*s)); 1088 s->nb_globals = 0; 1089 1090 /* Count total number of arguments and allocate the corresponding 1091 space */ 1092 total_args = 0; 1093 for(op = 0; op < NB_OPS; op++) { 1094 def = &tcg_op_defs[op]; 1095 n = def->nb_iargs + def->nb_oargs; 1096 total_args += n; 1097 } 1098 1099 args_ct = g_new0(TCGArgConstraint, total_args); 1100 1101 for(op = 0; op < NB_OPS; op++) { 1102 def = &tcg_op_defs[op]; 1103 def->args_ct = args_ct; 1104 n = def->nb_iargs + def->nb_oargs; 1105 args_ct += n; 1106 } 1107 1108 /* Register helpers. */ 1109 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 1110 helper_table = g_hash_table_new(NULL, NULL); 1111 1112 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 1113 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 1114 (gpointer)&all_helpers[i]); 1115 } 1116 1117 tcg_target_init(s); 1118 process_op_defs(s); 1119 1120 /* Reverse the order of the saved registers, assuming they're all at 1121 the start of tcg_target_reg_alloc_order. */ 1122 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1123 int r = tcg_target_reg_alloc_order[n]; 1124 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1125 break; 1126 } 1127 } 1128 for (i = 0; i < n; ++i) { 1129 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1130 } 1131 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1132 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1133 } 1134 1135 alloc_tcg_plugin_context(s); 1136 1137 tcg_ctx = s; 1138 /* 1139 * In user-mode we simply share the init context among threads, since we 1140 * use a single region. See the documentation tcg_region_init() for the 1141 * reasoning behind this. 1142 * In softmmu we will have at most max_cpus TCG threads. 1143 */ 1144 #ifdef CONFIG_USER_ONLY 1145 tcg_ctxs = &tcg_ctx; 1146 n_tcg_ctxs = 1; 1147 #else 1148 MachineState *ms = MACHINE(qdev_get_machine()); 1149 unsigned int max_cpus = ms->smp.max_cpus; 1150 tcg_ctxs = g_new(TCGContext *, max_cpus); 1151 #endif 1152 1153 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1154 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1155 cpu_env = temp_tcgv_ptr(ts); 1156 } 1157 1158 /* 1159 * Allocate TBs right before their corresponding translated code, making 1160 * sure that TBs and code are on different cache lines. 1161 */ 1162 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1163 { 1164 uintptr_t align = qemu_icache_linesize; 1165 TranslationBlock *tb; 1166 void *next; 1167 1168 retry: 1169 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1170 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1171 1172 if (unlikely(next > s->code_gen_highwater)) { 1173 if (tcg_region_alloc(s)) { 1174 return NULL; 1175 } 1176 goto retry; 1177 } 1178 qatomic_set(&s->code_gen_ptr, next); 1179 s->data_gen_ptr = NULL; 1180 return tb; 1181 } 1182 1183 void tcg_prologue_init(TCGContext *s) 1184 { 1185 size_t prologue_size, total_size; 1186 void *buf0, *buf1; 1187 1188 /* Put the prologue at the beginning of code_gen_buffer. */ 1189 buf0 = s->code_gen_buffer; 1190 total_size = s->code_gen_buffer_size; 1191 s->code_ptr = buf0; 1192 s->code_buf = buf0; 1193 s->data_gen_ptr = NULL; 1194 1195 /* 1196 * The region trees are not yet configured, but tcg_splitwx_to_rx 1197 * needs the bounds for an assert. 1198 */ 1199 region.start = buf0; 1200 region.end = buf0 + total_size; 1201 1202 #ifndef CONFIG_TCG_INTERPRETER 1203 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0); 1204 #endif 1205 1206 /* Compute a high-water mark, at which we voluntarily flush the buffer 1207 and start over. The size here is arbitrary, significantly larger 1208 than we expect the code generation for any one opcode to require. */ 1209 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); 1210 1211 #ifdef TCG_TARGET_NEED_POOL_LABELS 1212 s->pool_labels = NULL; 1213 #endif 1214 1215 qemu_thread_jit_write(); 1216 /* Generate the prologue. */ 1217 tcg_target_qemu_prologue(s); 1218 1219 #ifdef TCG_TARGET_NEED_POOL_LABELS 1220 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1221 { 1222 int result = tcg_out_pool_finalize(s); 1223 tcg_debug_assert(result == 0); 1224 } 1225 #endif 1226 1227 buf1 = s->code_ptr; 1228 #ifndef CONFIG_TCG_INTERPRETER 1229 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0, 1230 tcg_ptr_byte_diff(buf1, buf0)); 1231 #endif 1232 1233 /* Deduct the prologue from the buffer. */ 1234 prologue_size = tcg_current_code_size(s); 1235 s->code_gen_ptr = buf1; 1236 s->code_gen_buffer = buf1; 1237 s->code_buf = buf1; 1238 total_size -= prologue_size; 1239 s->code_gen_buffer_size = total_size; 1240 1241 tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size); 1242 1243 #ifdef DEBUG_DISAS 1244 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1245 FILE *logfile = qemu_log_lock(); 1246 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 1247 if (s->data_gen_ptr) { 1248 size_t code_size = s->data_gen_ptr - buf0; 1249 size_t data_size = prologue_size - code_size; 1250 size_t i; 1251 1252 log_disas(buf0, code_size); 1253 1254 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1255 if (sizeof(tcg_target_ulong) == 8) { 1256 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1257 (uintptr_t)s->data_gen_ptr + i, 1258 *(uint64_t *)(s->data_gen_ptr + i)); 1259 } else { 1260 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", 1261 (uintptr_t)s->data_gen_ptr + i, 1262 *(uint32_t *)(s->data_gen_ptr + i)); 1263 } 1264 } 1265 } else { 1266 log_disas(buf0, prologue_size); 1267 } 1268 qemu_log("\n"); 1269 qemu_log_flush(); 1270 qemu_log_unlock(logfile); 1271 } 1272 #endif 1273 1274 /* Assert that goto_ptr is implemented completely. */ 1275 if (TCG_TARGET_HAS_goto_ptr) { 1276 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1277 } 1278 } 1279 1280 void tcg_func_start(TCGContext *s) 1281 { 1282 tcg_pool_reset(s); 1283 s->nb_temps = s->nb_globals; 1284 1285 /* No temps have been previously allocated for size or locality. */ 1286 memset(s->free_temps, 0, sizeof(s->free_temps)); 1287 1288 /* No constant temps have been previously allocated. */ 1289 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1290 if (s->const_table[i]) { 1291 g_hash_table_remove_all(s->const_table[i]); 1292 } 1293 } 1294 1295 s->nb_ops = 0; 1296 s->nb_labels = 0; 1297 s->current_frame_offset = s->frame_start; 1298 1299 #ifdef CONFIG_DEBUG_TCG 1300 s->goto_tb_issue_mask = 0; 1301 #endif 1302 1303 QTAILQ_INIT(&s->ops); 1304 QTAILQ_INIT(&s->free_ops); 1305 QSIMPLEQ_INIT(&s->labels); 1306 } 1307 1308 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1309 { 1310 int n = s->nb_temps++; 1311 1312 if (n >= TCG_MAX_TEMPS) { 1313 /* Signal overflow, starting over with fewer guest insns. */ 1314 siglongjmp(s->jmp_trans, -2); 1315 } 1316 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1317 } 1318 1319 static TCGTemp *tcg_global_alloc(TCGContext *s) 1320 { 1321 TCGTemp *ts; 1322 1323 tcg_debug_assert(s->nb_globals == s->nb_temps); 1324 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1325 s->nb_globals++; 1326 ts = tcg_temp_alloc(s); 1327 ts->kind = TEMP_GLOBAL; 1328 1329 return ts; 1330 } 1331 1332 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1333 TCGReg reg, const char *name) 1334 { 1335 TCGTemp *ts; 1336 1337 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1338 tcg_abort(); 1339 } 1340 1341 ts = tcg_global_alloc(s); 1342 ts->base_type = type; 1343 ts->type = type; 1344 ts->kind = TEMP_FIXED; 1345 ts->reg = reg; 1346 ts->name = name; 1347 tcg_regset_set_reg(s->reserved_regs, reg); 1348 1349 return ts; 1350 } 1351 1352 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1353 { 1354 s->frame_start = start; 1355 s->frame_end = start + size; 1356 s->frame_temp 1357 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1358 } 1359 1360 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1361 intptr_t offset, const char *name) 1362 { 1363 TCGContext *s = tcg_ctx; 1364 TCGTemp *base_ts = tcgv_ptr_temp(base); 1365 TCGTemp *ts = tcg_global_alloc(s); 1366 int indirect_reg = 0, bigendian = 0; 1367 #ifdef HOST_WORDS_BIGENDIAN 1368 bigendian = 1; 1369 #endif 1370 1371 switch (base_ts->kind) { 1372 case TEMP_FIXED: 1373 break; 1374 case TEMP_GLOBAL: 1375 /* We do not support double-indirect registers. */ 1376 tcg_debug_assert(!base_ts->indirect_reg); 1377 base_ts->indirect_base = 1; 1378 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1379 ? 2 : 1); 1380 indirect_reg = 1; 1381 break; 1382 default: 1383 g_assert_not_reached(); 1384 } 1385 1386 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1387 TCGTemp *ts2 = tcg_global_alloc(s); 1388 char buf[64]; 1389 1390 ts->base_type = TCG_TYPE_I64; 1391 ts->type = TCG_TYPE_I32; 1392 ts->indirect_reg = indirect_reg; 1393 ts->mem_allocated = 1; 1394 ts->mem_base = base_ts; 1395 ts->mem_offset = offset + bigendian * 4; 1396 pstrcpy(buf, sizeof(buf), name); 1397 pstrcat(buf, sizeof(buf), "_0"); 1398 ts->name = strdup(buf); 1399 1400 tcg_debug_assert(ts2 == ts + 1); 1401 ts2->base_type = TCG_TYPE_I64; 1402 ts2->type = TCG_TYPE_I32; 1403 ts2->indirect_reg = indirect_reg; 1404 ts2->mem_allocated = 1; 1405 ts2->mem_base = base_ts; 1406 ts2->mem_offset = offset + (1 - bigendian) * 4; 1407 pstrcpy(buf, sizeof(buf), name); 1408 pstrcat(buf, sizeof(buf), "_1"); 1409 ts2->name = strdup(buf); 1410 } else { 1411 ts->base_type = type; 1412 ts->type = type; 1413 ts->indirect_reg = indirect_reg; 1414 ts->mem_allocated = 1; 1415 ts->mem_base = base_ts; 1416 ts->mem_offset = offset; 1417 ts->name = name; 1418 } 1419 return ts; 1420 } 1421 1422 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 1423 { 1424 TCGContext *s = tcg_ctx; 1425 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL; 1426 TCGTemp *ts; 1427 int idx, k; 1428 1429 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 1430 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 1431 if (idx < TCG_MAX_TEMPS) { 1432 /* There is already an available temp with the right type. */ 1433 clear_bit(idx, s->free_temps[k].l); 1434 1435 ts = &s->temps[idx]; 1436 ts->temp_allocated = 1; 1437 tcg_debug_assert(ts->base_type == type); 1438 tcg_debug_assert(ts->kind == kind); 1439 } else { 1440 ts = tcg_temp_alloc(s); 1441 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1442 TCGTemp *ts2 = tcg_temp_alloc(s); 1443 1444 ts->base_type = type; 1445 ts->type = TCG_TYPE_I32; 1446 ts->temp_allocated = 1; 1447 ts->kind = kind; 1448 1449 tcg_debug_assert(ts2 == ts + 1); 1450 ts2->base_type = TCG_TYPE_I64; 1451 ts2->type = TCG_TYPE_I32; 1452 ts2->temp_allocated = 1; 1453 ts2->kind = kind; 1454 } else { 1455 ts->base_type = type; 1456 ts->type = type; 1457 ts->temp_allocated = 1; 1458 ts->kind = kind; 1459 } 1460 } 1461 1462 #if defined(CONFIG_DEBUG_TCG) 1463 s->temps_in_use++; 1464 #endif 1465 return ts; 1466 } 1467 1468 TCGv_vec tcg_temp_new_vec(TCGType type) 1469 { 1470 TCGTemp *t; 1471 1472 #ifdef CONFIG_DEBUG_TCG 1473 switch (type) { 1474 case TCG_TYPE_V64: 1475 assert(TCG_TARGET_HAS_v64); 1476 break; 1477 case TCG_TYPE_V128: 1478 assert(TCG_TARGET_HAS_v128); 1479 break; 1480 case TCG_TYPE_V256: 1481 assert(TCG_TARGET_HAS_v256); 1482 break; 1483 default: 1484 g_assert_not_reached(); 1485 } 1486 #endif 1487 1488 t = tcg_temp_new_internal(type, 0); 1489 return temp_tcgv_vec(t); 1490 } 1491 1492 /* Create a new temp of the same type as an existing temp. */ 1493 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1494 { 1495 TCGTemp *t = tcgv_vec_temp(match); 1496 1497 tcg_debug_assert(t->temp_allocated != 0); 1498 1499 t = tcg_temp_new_internal(t->base_type, 0); 1500 return temp_tcgv_vec(t); 1501 } 1502 1503 void tcg_temp_free_internal(TCGTemp *ts) 1504 { 1505 TCGContext *s = tcg_ctx; 1506 int k, idx; 1507 1508 /* In order to simplify users of tcg_constant_*, silently ignore free. */ 1509 if (ts->kind == TEMP_CONST) { 1510 return; 1511 } 1512 1513 #if defined(CONFIG_DEBUG_TCG) 1514 s->temps_in_use--; 1515 if (s->temps_in_use < 0) { 1516 fprintf(stderr, "More temporaries freed than allocated!\n"); 1517 } 1518 #endif 1519 1520 tcg_debug_assert(ts->kind < TEMP_GLOBAL); 1521 tcg_debug_assert(ts->temp_allocated != 0); 1522 ts->temp_allocated = 0; 1523 1524 idx = temp_idx(ts); 1525 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT); 1526 set_bit(idx, s->free_temps[k].l); 1527 } 1528 1529 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1530 { 1531 TCGContext *s = tcg_ctx; 1532 GHashTable *h = s->const_table[type]; 1533 TCGTemp *ts; 1534 1535 if (h == NULL) { 1536 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1537 s->const_table[type] = h; 1538 } 1539 1540 ts = g_hash_table_lookup(h, &val); 1541 if (ts == NULL) { 1542 ts = tcg_temp_alloc(s); 1543 1544 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1545 TCGTemp *ts2 = tcg_temp_alloc(s); 1546 1547 ts->base_type = TCG_TYPE_I64; 1548 ts->type = TCG_TYPE_I32; 1549 ts->kind = TEMP_CONST; 1550 ts->temp_allocated = 1; 1551 /* 1552 * Retain the full value of the 64-bit constant in the low 1553 * part, so that the hash table works. Actual uses will 1554 * truncate the value to the low part. 1555 */ 1556 ts->val = val; 1557 1558 tcg_debug_assert(ts2 == ts + 1); 1559 ts2->base_type = TCG_TYPE_I64; 1560 ts2->type = TCG_TYPE_I32; 1561 ts2->kind = TEMP_CONST; 1562 ts2->temp_allocated = 1; 1563 ts2->val = val >> 32; 1564 } else { 1565 ts->base_type = type; 1566 ts->type = type; 1567 ts->kind = TEMP_CONST; 1568 ts->temp_allocated = 1; 1569 ts->val = val; 1570 } 1571 g_hash_table_insert(h, &ts->val, ts); 1572 } 1573 1574 return ts; 1575 } 1576 1577 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1578 { 1579 val = dup_const(vece, val); 1580 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1581 } 1582 1583 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1584 { 1585 TCGTemp *t = tcgv_vec_temp(match); 1586 1587 tcg_debug_assert(t->temp_allocated != 0); 1588 return tcg_constant_vec(t->base_type, vece, val); 1589 } 1590 1591 TCGv_i32 tcg_const_i32(int32_t val) 1592 { 1593 TCGv_i32 t0; 1594 t0 = tcg_temp_new_i32(); 1595 tcg_gen_movi_i32(t0, val); 1596 return t0; 1597 } 1598 1599 TCGv_i64 tcg_const_i64(int64_t val) 1600 { 1601 TCGv_i64 t0; 1602 t0 = tcg_temp_new_i64(); 1603 tcg_gen_movi_i64(t0, val); 1604 return t0; 1605 } 1606 1607 TCGv_i32 tcg_const_local_i32(int32_t val) 1608 { 1609 TCGv_i32 t0; 1610 t0 = tcg_temp_local_new_i32(); 1611 tcg_gen_movi_i32(t0, val); 1612 return t0; 1613 } 1614 1615 TCGv_i64 tcg_const_local_i64(int64_t val) 1616 { 1617 TCGv_i64 t0; 1618 t0 = tcg_temp_local_new_i64(); 1619 tcg_gen_movi_i64(t0, val); 1620 return t0; 1621 } 1622 1623 #if defined(CONFIG_DEBUG_TCG) 1624 void tcg_clear_temp_count(void) 1625 { 1626 TCGContext *s = tcg_ctx; 1627 s->temps_in_use = 0; 1628 } 1629 1630 int tcg_check_temp_count(void) 1631 { 1632 TCGContext *s = tcg_ctx; 1633 if (s->temps_in_use) { 1634 /* Clear the count so that we don't give another 1635 * warning immediately next time around. 1636 */ 1637 s->temps_in_use = 0; 1638 return 1; 1639 } 1640 return 0; 1641 } 1642 #endif 1643 1644 /* Return true if OP may appear in the opcode stream. 1645 Test the runtime variable that controls each opcode. */ 1646 bool tcg_op_supported(TCGOpcode op) 1647 { 1648 const bool have_vec 1649 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1650 1651 switch (op) { 1652 case INDEX_op_discard: 1653 case INDEX_op_set_label: 1654 case INDEX_op_call: 1655 case INDEX_op_br: 1656 case INDEX_op_mb: 1657 case INDEX_op_insn_start: 1658 case INDEX_op_exit_tb: 1659 case INDEX_op_goto_tb: 1660 case INDEX_op_qemu_ld_i32: 1661 case INDEX_op_qemu_st_i32: 1662 case INDEX_op_qemu_ld_i64: 1663 case INDEX_op_qemu_st_i64: 1664 return true; 1665 1666 case INDEX_op_qemu_st8_i32: 1667 return TCG_TARGET_HAS_qemu_st8_i32; 1668 1669 case INDEX_op_goto_ptr: 1670 return TCG_TARGET_HAS_goto_ptr; 1671 1672 case INDEX_op_mov_i32: 1673 case INDEX_op_setcond_i32: 1674 case INDEX_op_brcond_i32: 1675 case INDEX_op_ld8u_i32: 1676 case INDEX_op_ld8s_i32: 1677 case INDEX_op_ld16u_i32: 1678 case INDEX_op_ld16s_i32: 1679 case INDEX_op_ld_i32: 1680 case INDEX_op_st8_i32: 1681 case INDEX_op_st16_i32: 1682 case INDEX_op_st_i32: 1683 case INDEX_op_add_i32: 1684 case INDEX_op_sub_i32: 1685 case INDEX_op_mul_i32: 1686 case INDEX_op_and_i32: 1687 case INDEX_op_or_i32: 1688 case INDEX_op_xor_i32: 1689 case INDEX_op_shl_i32: 1690 case INDEX_op_shr_i32: 1691 case INDEX_op_sar_i32: 1692 return true; 1693 1694 case INDEX_op_movcond_i32: 1695 return TCG_TARGET_HAS_movcond_i32; 1696 case INDEX_op_div_i32: 1697 case INDEX_op_divu_i32: 1698 return TCG_TARGET_HAS_div_i32; 1699 case INDEX_op_rem_i32: 1700 case INDEX_op_remu_i32: 1701 return TCG_TARGET_HAS_rem_i32; 1702 case INDEX_op_div2_i32: 1703 case INDEX_op_divu2_i32: 1704 return TCG_TARGET_HAS_div2_i32; 1705 case INDEX_op_rotl_i32: 1706 case INDEX_op_rotr_i32: 1707 return TCG_TARGET_HAS_rot_i32; 1708 case INDEX_op_deposit_i32: 1709 return TCG_TARGET_HAS_deposit_i32; 1710 case INDEX_op_extract_i32: 1711 return TCG_TARGET_HAS_extract_i32; 1712 case INDEX_op_sextract_i32: 1713 return TCG_TARGET_HAS_sextract_i32; 1714 case INDEX_op_extract2_i32: 1715 return TCG_TARGET_HAS_extract2_i32; 1716 case INDEX_op_add2_i32: 1717 return TCG_TARGET_HAS_add2_i32; 1718 case INDEX_op_sub2_i32: 1719 return TCG_TARGET_HAS_sub2_i32; 1720 case INDEX_op_mulu2_i32: 1721 return TCG_TARGET_HAS_mulu2_i32; 1722 case INDEX_op_muls2_i32: 1723 return TCG_TARGET_HAS_muls2_i32; 1724 case INDEX_op_muluh_i32: 1725 return TCG_TARGET_HAS_muluh_i32; 1726 case INDEX_op_mulsh_i32: 1727 return TCG_TARGET_HAS_mulsh_i32; 1728 case INDEX_op_ext8s_i32: 1729 return TCG_TARGET_HAS_ext8s_i32; 1730 case INDEX_op_ext16s_i32: 1731 return TCG_TARGET_HAS_ext16s_i32; 1732 case INDEX_op_ext8u_i32: 1733 return TCG_TARGET_HAS_ext8u_i32; 1734 case INDEX_op_ext16u_i32: 1735 return TCG_TARGET_HAS_ext16u_i32; 1736 case INDEX_op_bswap16_i32: 1737 return TCG_TARGET_HAS_bswap16_i32; 1738 case INDEX_op_bswap32_i32: 1739 return TCG_TARGET_HAS_bswap32_i32; 1740 case INDEX_op_not_i32: 1741 return TCG_TARGET_HAS_not_i32; 1742 case INDEX_op_neg_i32: 1743 return TCG_TARGET_HAS_neg_i32; 1744 case INDEX_op_andc_i32: 1745 return TCG_TARGET_HAS_andc_i32; 1746 case INDEX_op_orc_i32: 1747 return TCG_TARGET_HAS_orc_i32; 1748 case INDEX_op_eqv_i32: 1749 return TCG_TARGET_HAS_eqv_i32; 1750 case INDEX_op_nand_i32: 1751 return TCG_TARGET_HAS_nand_i32; 1752 case INDEX_op_nor_i32: 1753 return TCG_TARGET_HAS_nor_i32; 1754 case INDEX_op_clz_i32: 1755 return TCG_TARGET_HAS_clz_i32; 1756 case INDEX_op_ctz_i32: 1757 return TCG_TARGET_HAS_ctz_i32; 1758 case INDEX_op_ctpop_i32: 1759 return TCG_TARGET_HAS_ctpop_i32; 1760 1761 case INDEX_op_brcond2_i32: 1762 case INDEX_op_setcond2_i32: 1763 return TCG_TARGET_REG_BITS == 32; 1764 1765 case INDEX_op_mov_i64: 1766 case INDEX_op_setcond_i64: 1767 case INDEX_op_brcond_i64: 1768 case INDEX_op_ld8u_i64: 1769 case INDEX_op_ld8s_i64: 1770 case INDEX_op_ld16u_i64: 1771 case INDEX_op_ld16s_i64: 1772 case INDEX_op_ld32u_i64: 1773 case INDEX_op_ld32s_i64: 1774 case INDEX_op_ld_i64: 1775 case INDEX_op_st8_i64: 1776 case INDEX_op_st16_i64: 1777 case INDEX_op_st32_i64: 1778 case INDEX_op_st_i64: 1779 case INDEX_op_add_i64: 1780 case INDEX_op_sub_i64: 1781 case INDEX_op_mul_i64: 1782 case INDEX_op_and_i64: 1783 case INDEX_op_or_i64: 1784 case INDEX_op_xor_i64: 1785 case INDEX_op_shl_i64: 1786 case INDEX_op_shr_i64: 1787 case INDEX_op_sar_i64: 1788 case INDEX_op_ext_i32_i64: 1789 case INDEX_op_extu_i32_i64: 1790 return TCG_TARGET_REG_BITS == 64; 1791 1792 case INDEX_op_movcond_i64: 1793 return TCG_TARGET_HAS_movcond_i64; 1794 case INDEX_op_div_i64: 1795 case INDEX_op_divu_i64: 1796 return TCG_TARGET_HAS_div_i64; 1797 case INDEX_op_rem_i64: 1798 case INDEX_op_remu_i64: 1799 return TCG_TARGET_HAS_rem_i64; 1800 case INDEX_op_div2_i64: 1801 case INDEX_op_divu2_i64: 1802 return TCG_TARGET_HAS_div2_i64; 1803 case INDEX_op_rotl_i64: 1804 case INDEX_op_rotr_i64: 1805 return TCG_TARGET_HAS_rot_i64; 1806 case INDEX_op_deposit_i64: 1807 return TCG_TARGET_HAS_deposit_i64; 1808 case INDEX_op_extract_i64: 1809 return TCG_TARGET_HAS_extract_i64; 1810 case INDEX_op_sextract_i64: 1811 return TCG_TARGET_HAS_sextract_i64; 1812 case INDEX_op_extract2_i64: 1813 return TCG_TARGET_HAS_extract2_i64; 1814 case INDEX_op_extrl_i64_i32: 1815 return TCG_TARGET_HAS_extrl_i64_i32; 1816 case INDEX_op_extrh_i64_i32: 1817 return TCG_TARGET_HAS_extrh_i64_i32; 1818 case INDEX_op_ext8s_i64: 1819 return TCG_TARGET_HAS_ext8s_i64; 1820 case INDEX_op_ext16s_i64: 1821 return TCG_TARGET_HAS_ext16s_i64; 1822 case INDEX_op_ext32s_i64: 1823 return TCG_TARGET_HAS_ext32s_i64; 1824 case INDEX_op_ext8u_i64: 1825 return TCG_TARGET_HAS_ext8u_i64; 1826 case INDEX_op_ext16u_i64: 1827 return TCG_TARGET_HAS_ext16u_i64; 1828 case INDEX_op_ext32u_i64: 1829 return TCG_TARGET_HAS_ext32u_i64; 1830 case INDEX_op_bswap16_i64: 1831 return TCG_TARGET_HAS_bswap16_i64; 1832 case INDEX_op_bswap32_i64: 1833 return TCG_TARGET_HAS_bswap32_i64; 1834 case INDEX_op_bswap64_i64: 1835 return TCG_TARGET_HAS_bswap64_i64; 1836 case INDEX_op_not_i64: 1837 return TCG_TARGET_HAS_not_i64; 1838 case INDEX_op_neg_i64: 1839 return TCG_TARGET_HAS_neg_i64; 1840 case INDEX_op_andc_i64: 1841 return TCG_TARGET_HAS_andc_i64; 1842 case INDEX_op_orc_i64: 1843 return TCG_TARGET_HAS_orc_i64; 1844 case INDEX_op_eqv_i64: 1845 return TCG_TARGET_HAS_eqv_i64; 1846 case INDEX_op_nand_i64: 1847 return TCG_TARGET_HAS_nand_i64; 1848 case INDEX_op_nor_i64: 1849 return TCG_TARGET_HAS_nor_i64; 1850 case INDEX_op_clz_i64: 1851 return TCG_TARGET_HAS_clz_i64; 1852 case INDEX_op_ctz_i64: 1853 return TCG_TARGET_HAS_ctz_i64; 1854 case INDEX_op_ctpop_i64: 1855 return TCG_TARGET_HAS_ctpop_i64; 1856 case INDEX_op_add2_i64: 1857 return TCG_TARGET_HAS_add2_i64; 1858 case INDEX_op_sub2_i64: 1859 return TCG_TARGET_HAS_sub2_i64; 1860 case INDEX_op_mulu2_i64: 1861 return TCG_TARGET_HAS_mulu2_i64; 1862 case INDEX_op_muls2_i64: 1863 return TCG_TARGET_HAS_muls2_i64; 1864 case INDEX_op_muluh_i64: 1865 return TCG_TARGET_HAS_muluh_i64; 1866 case INDEX_op_mulsh_i64: 1867 return TCG_TARGET_HAS_mulsh_i64; 1868 1869 case INDEX_op_mov_vec: 1870 case INDEX_op_dup_vec: 1871 case INDEX_op_dupm_vec: 1872 case INDEX_op_ld_vec: 1873 case INDEX_op_st_vec: 1874 case INDEX_op_add_vec: 1875 case INDEX_op_sub_vec: 1876 case INDEX_op_and_vec: 1877 case INDEX_op_or_vec: 1878 case INDEX_op_xor_vec: 1879 case INDEX_op_cmp_vec: 1880 return have_vec; 1881 case INDEX_op_dup2_vec: 1882 return have_vec && TCG_TARGET_REG_BITS == 32; 1883 case INDEX_op_not_vec: 1884 return have_vec && TCG_TARGET_HAS_not_vec; 1885 case INDEX_op_neg_vec: 1886 return have_vec && TCG_TARGET_HAS_neg_vec; 1887 case INDEX_op_abs_vec: 1888 return have_vec && TCG_TARGET_HAS_abs_vec; 1889 case INDEX_op_andc_vec: 1890 return have_vec && TCG_TARGET_HAS_andc_vec; 1891 case INDEX_op_orc_vec: 1892 return have_vec && TCG_TARGET_HAS_orc_vec; 1893 case INDEX_op_mul_vec: 1894 return have_vec && TCG_TARGET_HAS_mul_vec; 1895 case INDEX_op_shli_vec: 1896 case INDEX_op_shri_vec: 1897 case INDEX_op_sari_vec: 1898 return have_vec && TCG_TARGET_HAS_shi_vec; 1899 case INDEX_op_shls_vec: 1900 case INDEX_op_shrs_vec: 1901 case INDEX_op_sars_vec: 1902 return have_vec && TCG_TARGET_HAS_shs_vec; 1903 case INDEX_op_shlv_vec: 1904 case INDEX_op_shrv_vec: 1905 case INDEX_op_sarv_vec: 1906 return have_vec && TCG_TARGET_HAS_shv_vec; 1907 case INDEX_op_rotli_vec: 1908 return have_vec && TCG_TARGET_HAS_roti_vec; 1909 case INDEX_op_rotls_vec: 1910 return have_vec && TCG_TARGET_HAS_rots_vec; 1911 case INDEX_op_rotlv_vec: 1912 case INDEX_op_rotrv_vec: 1913 return have_vec && TCG_TARGET_HAS_rotv_vec; 1914 case INDEX_op_ssadd_vec: 1915 case INDEX_op_usadd_vec: 1916 case INDEX_op_sssub_vec: 1917 case INDEX_op_ussub_vec: 1918 return have_vec && TCG_TARGET_HAS_sat_vec; 1919 case INDEX_op_smin_vec: 1920 case INDEX_op_umin_vec: 1921 case INDEX_op_smax_vec: 1922 case INDEX_op_umax_vec: 1923 return have_vec && TCG_TARGET_HAS_minmax_vec; 1924 case INDEX_op_bitsel_vec: 1925 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1926 case INDEX_op_cmpsel_vec: 1927 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1928 1929 default: 1930 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1931 return true; 1932 } 1933 } 1934 1935 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1936 and endian swap. Maybe it would be better to do the alignment 1937 and endian swap in tcg_reg_alloc_call(). */ 1938 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1939 { 1940 int i, real_args, nb_rets, pi; 1941 unsigned sizemask, flags; 1942 TCGHelperInfo *info; 1943 TCGOp *op; 1944 1945 info = g_hash_table_lookup(helper_table, (gpointer)func); 1946 flags = info->flags; 1947 sizemask = info->sizemask; 1948 1949 #ifdef CONFIG_PLUGIN 1950 /* detect non-plugin helpers */ 1951 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) { 1952 tcg_ctx->plugin_insn->calls_helpers = true; 1953 } 1954 #endif 1955 1956 #if defined(__sparc__) && !defined(__arch64__) \ 1957 && !defined(CONFIG_TCG_INTERPRETER) 1958 /* We have 64-bit values in one register, but need to pass as two 1959 separate parameters. Split them. */ 1960 int orig_sizemask = sizemask; 1961 int orig_nargs = nargs; 1962 TCGv_i64 retl, reth; 1963 TCGTemp *split_args[MAX_OPC_PARAM]; 1964 1965 retl = NULL; 1966 reth = NULL; 1967 if (sizemask != 0) { 1968 for (i = real_args = 0; i < nargs; ++i) { 1969 int is_64bit = sizemask & (1 << (i+1)*2); 1970 if (is_64bit) { 1971 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1972 TCGv_i32 h = tcg_temp_new_i32(); 1973 TCGv_i32 l = tcg_temp_new_i32(); 1974 tcg_gen_extr_i64_i32(l, h, orig); 1975 split_args[real_args++] = tcgv_i32_temp(h); 1976 split_args[real_args++] = tcgv_i32_temp(l); 1977 } else { 1978 split_args[real_args++] = args[i]; 1979 } 1980 } 1981 nargs = real_args; 1982 args = split_args; 1983 sizemask = 0; 1984 } 1985 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1986 for (i = 0; i < nargs; ++i) { 1987 int is_64bit = sizemask & (1 << (i+1)*2); 1988 int is_signed = sizemask & (2 << (i+1)*2); 1989 if (!is_64bit) { 1990 TCGv_i64 temp = tcg_temp_new_i64(); 1991 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1992 if (is_signed) { 1993 tcg_gen_ext32s_i64(temp, orig); 1994 } else { 1995 tcg_gen_ext32u_i64(temp, orig); 1996 } 1997 args[i] = tcgv_i64_temp(temp); 1998 } 1999 } 2000 #endif /* TCG_TARGET_EXTEND_ARGS */ 2001 2002 op = tcg_emit_op(INDEX_op_call); 2003 2004 pi = 0; 2005 if (ret != NULL) { 2006 #if defined(__sparc__) && !defined(__arch64__) \ 2007 && !defined(CONFIG_TCG_INTERPRETER) 2008 if (orig_sizemask & 1) { 2009 /* The 32-bit ABI is going to return the 64-bit value in 2010 the %o0/%o1 register pair. Prepare for this by using 2011 two return temporaries, and reassemble below. */ 2012 retl = tcg_temp_new_i64(); 2013 reth = tcg_temp_new_i64(); 2014 op->args[pi++] = tcgv_i64_arg(reth); 2015 op->args[pi++] = tcgv_i64_arg(retl); 2016 nb_rets = 2; 2017 } else { 2018 op->args[pi++] = temp_arg(ret); 2019 nb_rets = 1; 2020 } 2021 #else 2022 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 2023 #ifdef HOST_WORDS_BIGENDIAN 2024 op->args[pi++] = temp_arg(ret + 1); 2025 op->args[pi++] = temp_arg(ret); 2026 #else 2027 op->args[pi++] = temp_arg(ret); 2028 op->args[pi++] = temp_arg(ret + 1); 2029 #endif 2030 nb_rets = 2; 2031 } else { 2032 op->args[pi++] = temp_arg(ret); 2033 nb_rets = 1; 2034 } 2035 #endif 2036 } else { 2037 nb_rets = 0; 2038 } 2039 TCGOP_CALLO(op) = nb_rets; 2040 2041 real_args = 0; 2042 for (i = 0; i < nargs; i++) { 2043 int is_64bit = sizemask & (1 << (i+1)*2); 2044 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 2045 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 2046 /* some targets want aligned 64 bit args */ 2047 if (real_args & 1) { 2048 op->args[pi++] = TCG_CALL_DUMMY_ARG; 2049 real_args++; 2050 } 2051 #endif 2052 /* If stack grows up, then we will be placing successive 2053 arguments at lower addresses, which means we need to 2054 reverse the order compared to how we would normally 2055 treat either big or little-endian. For those arguments 2056 that will wind up in registers, this still works for 2057 HPPA (the only current STACK_GROWSUP target) since the 2058 argument registers are *also* allocated in decreasing 2059 order. If another such target is added, this logic may 2060 have to get more complicated to differentiate between 2061 stack arguments and register arguments. */ 2062 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 2063 op->args[pi++] = temp_arg(args[i] + 1); 2064 op->args[pi++] = temp_arg(args[i]); 2065 #else 2066 op->args[pi++] = temp_arg(args[i]); 2067 op->args[pi++] = temp_arg(args[i] + 1); 2068 #endif 2069 real_args += 2; 2070 continue; 2071 } 2072 2073 op->args[pi++] = temp_arg(args[i]); 2074 real_args++; 2075 } 2076 op->args[pi++] = (uintptr_t)func; 2077 op->args[pi++] = flags; 2078 TCGOP_CALLI(op) = real_args; 2079 2080 /* Make sure the fields didn't overflow. */ 2081 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 2082 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 2083 2084 #if defined(__sparc__) && !defined(__arch64__) \ 2085 && !defined(CONFIG_TCG_INTERPRETER) 2086 /* Free all of the parts we allocated above. */ 2087 for (i = real_args = 0; i < orig_nargs; ++i) { 2088 int is_64bit = orig_sizemask & (1 << (i+1)*2); 2089 if (is_64bit) { 2090 tcg_temp_free_internal(args[real_args++]); 2091 tcg_temp_free_internal(args[real_args++]); 2092 } else { 2093 real_args++; 2094 } 2095 } 2096 if (orig_sizemask & 1) { 2097 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 2098 Note that describing these as TCGv_i64 eliminates an unnecessary 2099 zero-extension that tcg_gen_concat_i32_i64 would create. */ 2100 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 2101 tcg_temp_free_i64(retl); 2102 tcg_temp_free_i64(reth); 2103 } 2104 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 2105 for (i = 0; i < nargs; ++i) { 2106 int is_64bit = sizemask & (1 << (i+1)*2); 2107 if (!is_64bit) { 2108 tcg_temp_free_internal(args[i]); 2109 } 2110 } 2111 #endif /* TCG_TARGET_EXTEND_ARGS */ 2112 } 2113 2114 static void tcg_reg_alloc_start(TCGContext *s) 2115 { 2116 int i, n; 2117 2118 for (i = 0, n = s->nb_temps; i < n; i++) { 2119 TCGTemp *ts = &s->temps[i]; 2120 TCGTempVal val = TEMP_VAL_MEM; 2121 2122 switch (ts->kind) { 2123 case TEMP_CONST: 2124 val = TEMP_VAL_CONST; 2125 break; 2126 case TEMP_FIXED: 2127 val = TEMP_VAL_REG; 2128 break; 2129 case TEMP_GLOBAL: 2130 break; 2131 case TEMP_NORMAL: 2132 val = TEMP_VAL_DEAD; 2133 /* fall through */ 2134 case TEMP_LOCAL: 2135 ts->mem_allocated = 0; 2136 break; 2137 default: 2138 g_assert_not_reached(); 2139 } 2140 ts->val_type = val; 2141 } 2142 2143 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2144 } 2145 2146 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2147 TCGTemp *ts) 2148 { 2149 int idx = temp_idx(ts); 2150 2151 switch (ts->kind) { 2152 case TEMP_FIXED: 2153 case TEMP_GLOBAL: 2154 pstrcpy(buf, buf_size, ts->name); 2155 break; 2156 case TEMP_LOCAL: 2157 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2158 break; 2159 case TEMP_NORMAL: 2160 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2161 break; 2162 case TEMP_CONST: 2163 switch (ts->type) { 2164 case TCG_TYPE_I32: 2165 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2166 break; 2167 #if TCG_TARGET_REG_BITS > 32 2168 case TCG_TYPE_I64: 2169 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2170 break; 2171 #endif 2172 case TCG_TYPE_V64: 2173 case TCG_TYPE_V128: 2174 case TCG_TYPE_V256: 2175 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2176 64 << (ts->type - TCG_TYPE_V64), ts->val); 2177 break; 2178 default: 2179 g_assert_not_reached(); 2180 } 2181 break; 2182 } 2183 return buf; 2184 } 2185 2186 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2187 int buf_size, TCGArg arg) 2188 { 2189 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2190 } 2191 2192 /* Find helper name. */ 2193 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 2194 { 2195 const char *ret = NULL; 2196 if (helper_table) { 2197 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); 2198 if (info) { 2199 ret = info->name; 2200 } 2201 } 2202 return ret; 2203 } 2204 2205 static const char * const cond_name[] = 2206 { 2207 [TCG_COND_NEVER] = "never", 2208 [TCG_COND_ALWAYS] = "always", 2209 [TCG_COND_EQ] = "eq", 2210 [TCG_COND_NE] = "ne", 2211 [TCG_COND_LT] = "lt", 2212 [TCG_COND_GE] = "ge", 2213 [TCG_COND_LE] = "le", 2214 [TCG_COND_GT] = "gt", 2215 [TCG_COND_LTU] = "ltu", 2216 [TCG_COND_GEU] = "geu", 2217 [TCG_COND_LEU] = "leu", 2218 [TCG_COND_GTU] = "gtu" 2219 }; 2220 2221 static const char * const ldst_name[] = 2222 { 2223 [MO_UB] = "ub", 2224 [MO_SB] = "sb", 2225 [MO_LEUW] = "leuw", 2226 [MO_LESW] = "lesw", 2227 [MO_LEUL] = "leul", 2228 [MO_LESL] = "lesl", 2229 [MO_LEQ] = "leq", 2230 [MO_BEUW] = "beuw", 2231 [MO_BESW] = "besw", 2232 [MO_BEUL] = "beul", 2233 [MO_BESL] = "besl", 2234 [MO_BEQ] = "beq", 2235 }; 2236 2237 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2238 #ifdef TARGET_ALIGNED_ONLY 2239 [MO_UNALN >> MO_ASHIFT] = "un+", 2240 [MO_ALIGN >> MO_ASHIFT] = "", 2241 #else 2242 [MO_UNALN >> MO_ASHIFT] = "", 2243 [MO_ALIGN >> MO_ASHIFT] = "al+", 2244 #endif 2245 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2246 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2247 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2248 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2249 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2250 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2251 }; 2252 2253 static inline bool tcg_regset_single(TCGRegSet d) 2254 { 2255 return (d & (d - 1)) == 0; 2256 } 2257 2258 static inline TCGReg tcg_regset_first(TCGRegSet d) 2259 { 2260 if (TCG_TARGET_NB_REGS <= 32) { 2261 return ctz32(d); 2262 } else { 2263 return ctz64(d); 2264 } 2265 } 2266 2267 static void tcg_dump_ops(TCGContext *s, bool have_prefs) 2268 { 2269 char buf[128]; 2270 TCGOp *op; 2271 2272 QTAILQ_FOREACH(op, &s->ops, link) { 2273 int i, k, nb_oargs, nb_iargs, nb_cargs; 2274 const TCGOpDef *def; 2275 TCGOpcode c; 2276 int col = 0; 2277 2278 c = op->opc; 2279 def = &tcg_op_defs[c]; 2280 2281 if (c == INDEX_op_insn_start) { 2282 nb_oargs = 0; 2283 col += qemu_log("\n ----"); 2284 2285 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2286 target_ulong a; 2287 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2288 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 2289 #else 2290 a = op->args[i]; 2291 #endif 2292 col += qemu_log(" " TARGET_FMT_lx, a); 2293 } 2294 } else if (c == INDEX_op_call) { 2295 /* variable number of arguments */ 2296 nb_oargs = TCGOP_CALLO(op); 2297 nb_iargs = TCGOP_CALLI(op); 2298 nb_cargs = def->nb_cargs; 2299 2300 /* function name, flags, out args */ 2301 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 2302 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), 2303 op->args[nb_oargs + nb_iargs + 1], nb_oargs); 2304 for (i = 0; i < nb_oargs; i++) { 2305 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2306 op->args[i])); 2307 } 2308 for (i = 0; i < nb_iargs; i++) { 2309 TCGArg arg = op->args[nb_oargs + i]; 2310 const char *t = "<dummy>"; 2311 if (arg != TCG_CALL_DUMMY_ARG) { 2312 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2313 } 2314 col += qemu_log(",%s", t); 2315 } 2316 } else { 2317 col += qemu_log(" %s ", def->name); 2318 2319 nb_oargs = def->nb_oargs; 2320 nb_iargs = def->nb_iargs; 2321 nb_cargs = def->nb_cargs; 2322 2323 if (def->flags & TCG_OPF_VECTOR) { 2324 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op), 2325 8 << TCGOP_VECE(op)); 2326 } 2327 2328 k = 0; 2329 for (i = 0; i < nb_oargs; i++) { 2330 if (k != 0) { 2331 col += qemu_log(","); 2332 } 2333 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 2334 op->args[k++])); 2335 } 2336 for (i = 0; i < nb_iargs; i++) { 2337 if (k != 0) { 2338 col += qemu_log(","); 2339 } 2340 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 2341 op->args[k++])); 2342 } 2343 switch (c) { 2344 case INDEX_op_brcond_i32: 2345 case INDEX_op_setcond_i32: 2346 case INDEX_op_movcond_i32: 2347 case INDEX_op_brcond2_i32: 2348 case INDEX_op_setcond2_i32: 2349 case INDEX_op_brcond_i64: 2350 case INDEX_op_setcond_i64: 2351 case INDEX_op_movcond_i64: 2352 case INDEX_op_cmp_vec: 2353 case INDEX_op_cmpsel_vec: 2354 if (op->args[k] < ARRAY_SIZE(cond_name) 2355 && cond_name[op->args[k]]) { 2356 col += qemu_log(",%s", cond_name[op->args[k++]]); 2357 } else { 2358 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]); 2359 } 2360 i = 1; 2361 break; 2362 case INDEX_op_qemu_ld_i32: 2363 case INDEX_op_qemu_st_i32: 2364 case INDEX_op_qemu_st8_i32: 2365 case INDEX_op_qemu_ld_i64: 2366 case INDEX_op_qemu_st_i64: 2367 { 2368 TCGMemOpIdx oi = op->args[k++]; 2369 MemOp op = get_memop(oi); 2370 unsigned ix = get_mmuidx(oi); 2371 2372 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2373 col += qemu_log(",$0x%x,%u", op, ix); 2374 } else { 2375 const char *s_al, *s_op; 2376 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2377 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2378 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 2379 } 2380 i = 1; 2381 } 2382 break; 2383 default: 2384 i = 0; 2385 break; 2386 } 2387 switch (c) { 2388 case INDEX_op_set_label: 2389 case INDEX_op_br: 2390 case INDEX_op_brcond_i32: 2391 case INDEX_op_brcond_i64: 2392 case INDEX_op_brcond2_i32: 2393 col += qemu_log("%s$L%d", k ? "," : "", 2394 arg_label(op->args[k])->id); 2395 i++, k++; 2396 break; 2397 default: 2398 break; 2399 } 2400 for (; i < nb_cargs; i++, k++) { 2401 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); 2402 } 2403 } 2404 2405 if (have_prefs || op->life) { 2406 2407 QemuLogFile *logfile; 2408 2409 rcu_read_lock(); 2410 logfile = qatomic_rcu_read(&qemu_logfile); 2411 if (logfile) { 2412 for (; col < 40; ++col) { 2413 putc(' ', logfile->fd); 2414 } 2415 } 2416 rcu_read_unlock(); 2417 } 2418 2419 if (op->life) { 2420 unsigned life = op->life; 2421 2422 if (life & (SYNC_ARG * 3)) { 2423 qemu_log(" sync:"); 2424 for (i = 0; i < 2; ++i) { 2425 if (life & (SYNC_ARG << i)) { 2426 qemu_log(" %d", i); 2427 } 2428 } 2429 } 2430 life /= DEAD_ARG; 2431 if (life) { 2432 qemu_log(" dead:"); 2433 for (i = 0; life; ++i, life >>= 1) { 2434 if (life & 1) { 2435 qemu_log(" %d", i); 2436 } 2437 } 2438 } 2439 } 2440 2441 if (have_prefs) { 2442 for (i = 0; i < nb_oargs; ++i) { 2443 TCGRegSet set = op->output_pref[i]; 2444 2445 if (i == 0) { 2446 qemu_log(" pref="); 2447 } else { 2448 qemu_log(","); 2449 } 2450 if (set == 0) { 2451 qemu_log("none"); 2452 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2453 qemu_log("all"); 2454 #ifdef CONFIG_DEBUG_TCG 2455 } else if (tcg_regset_single(set)) { 2456 TCGReg reg = tcg_regset_first(set); 2457 qemu_log("%s", tcg_target_reg_names[reg]); 2458 #endif 2459 } else if (TCG_TARGET_NB_REGS <= 32) { 2460 qemu_log("%#x", (uint32_t)set); 2461 } else { 2462 qemu_log("%#" PRIx64, (uint64_t)set); 2463 } 2464 } 2465 } 2466 2467 qemu_log("\n"); 2468 } 2469 } 2470 2471 /* we give more priority to constraints with less registers */ 2472 static int get_constraint_priority(const TCGOpDef *def, int k) 2473 { 2474 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2475 int n; 2476 2477 if (arg_ct->oalias) { 2478 /* an alias is equivalent to a single register */ 2479 n = 1; 2480 } else { 2481 n = ctpop64(arg_ct->regs); 2482 } 2483 return TCG_TARGET_NB_REGS - n + 1; 2484 } 2485 2486 /* sort from highest priority to lowest */ 2487 static void sort_constraints(TCGOpDef *def, int start, int n) 2488 { 2489 int i, j; 2490 TCGArgConstraint *a = def->args_ct; 2491 2492 for (i = 0; i < n; i++) { 2493 a[start + i].sort_index = start + i; 2494 } 2495 if (n <= 1) { 2496 return; 2497 } 2498 for (i = 0; i < n - 1; i++) { 2499 for (j = i + 1; j < n; j++) { 2500 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2501 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2502 if (p1 < p2) { 2503 int tmp = a[start + i].sort_index; 2504 a[start + i].sort_index = a[start + j].sort_index; 2505 a[start + j].sort_index = tmp; 2506 } 2507 } 2508 } 2509 } 2510 2511 static void process_op_defs(TCGContext *s) 2512 { 2513 TCGOpcode op; 2514 2515 for (op = 0; op < NB_OPS; op++) { 2516 TCGOpDef *def = &tcg_op_defs[op]; 2517 const TCGTargetOpDef *tdefs; 2518 int i, nb_args; 2519 2520 if (def->flags & TCG_OPF_NOT_PRESENT) { 2521 continue; 2522 } 2523 2524 nb_args = def->nb_iargs + def->nb_oargs; 2525 if (nb_args == 0) { 2526 continue; 2527 } 2528 2529 /* 2530 * Macro magic should make it impossible, but double-check that 2531 * the array index is in range. Since the signness of an enum 2532 * is implementation defined, force the result to unsigned. 2533 */ 2534 unsigned con_set = tcg_target_op_def(op); 2535 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2536 tdefs = &constraint_sets[con_set]; 2537 2538 for (i = 0; i < nb_args; i++) { 2539 const char *ct_str = tdefs->args_ct_str[i]; 2540 /* Incomplete TCGTargetOpDef entry. */ 2541 tcg_debug_assert(ct_str != NULL); 2542 2543 while (*ct_str != '\0') { 2544 switch(*ct_str) { 2545 case '0' ... '9': 2546 { 2547 int oarg = *ct_str - '0'; 2548 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 2549 tcg_debug_assert(oarg < def->nb_oargs); 2550 tcg_debug_assert(def->args_ct[oarg].regs != 0); 2551 def->args_ct[i] = def->args_ct[oarg]; 2552 /* The output sets oalias. */ 2553 def->args_ct[oarg].oalias = true; 2554 def->args_ct[oarg].alias_index = i; 2555 /* The input sets ialias. */ 2556 def->args_ct[i].ialias = true; 2557 def->args_ct[i].alias_index = oarg; 2558 } 2559 ct_str++; 2560 break; 2561 case '&': 2562 def->args_ct[i].newreg = true; 2563 ct_str++; 2564 break; 2565 case 'i': 2566 def->args_ct[i].ct |= TCG_CT_CONST; 2567 ct_str++; 2568 break; 2569 2570 /* Include all of the target-specific constraints. */ 2571 2572 #undef CONST 2573 #define CONST(CASE, MASK) \ 2574 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break; 2575 #define REGS(CASE, MASK) \ 2576 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break; 2577 2578 #include "tcg-target-con-str.h" 2579 2580 #undef REGS 2581 #undef CONST 2582 default: 2583 /* Typo in TCGTargetOpDef constraint. */ 2584 g_assert_not_reached(); 2585 } 2586 } 2587 } 2588 2589 /* TCGTargetOpDef entry with too much information? */ 2590 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2591 2592 /* sort the constraints (XXX: this is just an heuristic) */ 2593 sort_constraints(def, 0, def->nb_oargs); 2594 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2595 } 2596 } 2597 2598 void tcg_op_remove(TCGContext *s, TCGOp *op) 2599 { 2600 TCGLabel *label; 2601 2602 switch (op->opc) { 2603 case INDEX_op_br: 2604 label = arg_label(op->args[0]); 2605 label->refs--; 2606 break; 2607 case INDEX_op_brcond_i32: 2608 case INDEX_op_brcond_i64: 2609 label = arg_label(op->args[3]); 2610 label->refs--; 2611 break; 2612 case INDEX_op_brcond2_i32: 2613 label = arg_label(op->args[5]); 2614 label->refs--; 2615 break; 2616 default: 2617 break; 2618 } 2619 2620 QTAILQ_REMOVE(&s->ops, op, link); 2621 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2622 s->nb_ops--; 2623 2624 #ifdef CONFIG_PROFILER 2625 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2626 #endif 2627 } 2628 2629 static TCGOp *tcg_op_alloc(TCGOpcode opc) 2630 { 2631 TCGContext *s = tcg_ctx; 2632 TCGOp *op; 2633 2634 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 2635 op = tcg_malloc(sizeof(TCGOp)); 2636 } else { 2637 op = QTAILQ_FIRST(&s->free_ops); 2638 QTAILQ_REMOVE(&s->free_ops, op, link); 2639 } 2640 memset(op, 0, offsetof(TCGOp, link)); 2641 op->opc = opc; 2642 s->nb_ops++; 2643 2644 return op; 2645 } 2646 2647 TCGOp *tcg_emit_op(TCGOpcode opc) 2648 { 2649 TCGOp *op = tcg_op_alloc(opc); 2650 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2651 return op; 2652 } 2653 2654 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2655 { 2656 TCGOp *new_op = tcg_op_alloc(opc); 2657 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2658 return new_op; 2659 } 2660 2661 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2662 { 2663 TCGOp *new_op = tcg_op_alloc(opc); 2664 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2665 return new_op; 2666 } 2667 2668 /* Reachable analysis : remove unreachable code. */ 2669 static void reachable_code_pass(TCGContext *s) 2670 { 2671 TCGOp *op, *op_next; 2672 bool dead = false; 2673 2674 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2675 bool remove = dead; 2676 TCGLabel *label; 2677 int call_flags; 2678 2679 switch (op->opc) { 2680 case INDEX_op_set_label: 2681 label = arg_label(op->args[0]); 2682 if (label->refs == 0) { 2683 /* 2684 * While there is an occasional backward branch, virtually 2685 * all branches generated by the translators are forward. 2686 * Which means that generally we will have already removed 2687 * all references to the label that will be, and there is 2688 * little to be gained by iterating. 2689 */ 2690 remove = true; 2691 } else { 2692 /* Once we see a label, insns become live again. */ 2693 dead = false; 2694 remove = false; 2695 2696 /* 2697 * Optimization can fold conditional branches to unconditional. 2698 * If we find a label with one reference which is preceded by 2699 * an unconditional branch to it, remove both. This needed to 2700 * wait until the dead code in between them was removed. 2701 */ 2702 if (label->refs == 1) { 2703 TCGOp *op_prev = QTAILQ_PREV(op, link); 2704 if (op_prev->opc == INDEX_op_br && 2705 label == arg_label(op_prev->args[0])) { 2706 tcg_op_remove(s, op_prev); 2707 remove = true; 2708 } 2709 } 2710 } 2711 break; 2712 2713 case INDEX_op_br: 2714 case INDEX_op_exit_tb: 2715 case INDEX_op_goto_ptr: 2716 /* Unconditional branches; everything following is dead. */ 2717 dead = true; 2718 break; 2719 2720 case INDEX_op_call: 2721 /* Notice noreturn helper calls, raising exceptions. */ 2722 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; 2723 if (call_flags & TCG_CALL_NO_RETURN) { 2724 dead = true; 2725 } 2726 break; 2727 2728 case INDEX_op_insn_start: 2729 /* Never remove -- we need to keep these for unwind. */ 2730 remove = false; 2731 break; 2732 2733 default: 2734 break; 2735 } 2736 2737 if (remove) { 2738 tcg_op_remove(s, op); 2739 } 2740 } 2741 } 2742 2743 #define TS_DEAD 1 2744 #define TS_MEM 2 2745 2746 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2747 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2748 2749 /* For liveness_pass_1, the register preferences for a given temp. */ 2750 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2751 { 2752 return ts->state_ptr; 2753 } 2754 2755 /* For liveness_pass_1, reset the preferences for a given temp to the 2756 * maximal regset for its type. 2757 */ 2758 static inline void la_reset_pref(TCGTemp *ts) 2759 { 2760 *la_temp_pref(ts) 2761 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2762 } 2763 2764 /* liveness analysis: end of function: all temps are dead, and globals 2765 should be in memory. */ 2766 static void la_func_end(TCGContext *s, int ng, int nt) 2767 { 2768 int i; 2769 2770 for (i = 0; i < ng; ++i) { 2771 s->temps[i].state = TS_DEAD | TS_MEM; 2772 la_reset_pref(&s->temps[i]); 2773 } 2774 for (i = ng; i < nt; ++i) { 2775 s->temps[i].state = TS_DEAD; 2776 la_reset_pref(&s->temps[i]); 2777 } 2778 } 2779 2780 /* liveness analysis: end of basic block: all temps are dead, globals 2781 and local temps should be in memory. */ 2782 static void la_bb_end(TCGContext *s, int ng, int nt) 2783 { 2784 int i; 2785 2786 for (i = 0; i < nt; ++i) { 2787 TCGTemp *ts = &s->temps[i]; 2788 int state; 2789 2790 switch (ts->kind) { 2791 case TEMP_FIXED: 2792 case TEMP_GLOBAL: 2793 case TEMP_LOCAL: 2794 state = TS_DEAD | TS_MEM; 2795 break; 2796 case TEMP_NORMAL: 2797 case TEMP_CONST: 2798 state = TS_DEAD; 2799 break; 2800 default: 2801 g_assert_not_reached(); 2802 } 2803 ts->state = state; 2804 la_reset_pref(ts); 2805 } 2806 } 2807 2808 /* liveness analysis: sync globals back to memory. */ 2809 static void la_global_sync(TCGContext *s, int ng) 2810 { 2811 int i; 2812 2813 for (i = 0; i < ng; ++i) { 2814 int state = s->temps[i].state; 2815 s->temps[i].state = state | TS_MEM; 2816 if (state == TS_DEAD) { 2817 /* If the global was previously dead, reset prefs. */ 2818 la_reset_pref(&s->temps[i]); 2819 } 2820 } 2821 } 2822 2823 /* 2824 * liveness analysis: conditional branch: all temps are dead, 2825 * globals and local temps should be synced. 2826 */ 2827 static void la_bb_sync(TCGContext *s, int ng, int nt) 2828 { 2829 la_global_sync(s, ng); 2830 2831 for (int i = ng; i < nt; ++i) { 2832 TCGTemp *ts = &s->temps[i]; 2833 int state; 2834 2835 switch (ts->kind) { 2836 case TEMP_LOCAL: 2837 state = ts->state; 2838 ts->state = state | TS_MEM; 2839 if (state != TS_DEAD) { 2840 continue; 2841 } 2842 break; 2843 case TEMP_NORMAL: 2844 s->temps[i].state = TS_DEAD; 2845 break; 2846 case TEMP_CONST: 2847 continue; 2848 default: 2849 g_assert_not_reached(); 2850 } 2851 la_reset_pref(&s->temps[i]); 2852 } 2853 } 2854 2855 /* liveness analysis: sync globals back to memory and kill. */ 2856 static void la_global_kill(TCGContext *s, int ng) 2857 { 2858 int i; 2859 2860 for (i = 0; i < ng; i++) { 2861 s->temps[i].state = TS_DEAD | TS_MEM; 2862 la_reset_pref(&s->temps[i]); 2863 } 2864 } 2865 2866 /* liveness analysis: note live globals crossing calls. */ 2867 static void la_cross_call(TCGContext *s, int nt) 2868 { 2869 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2870 int i; 2871 2872 for (i = 0; i < nt; i++) { 2873 TCGTemp *ts = &s->temps[i]; 2874 if (!(ts->state & TS_DEAD)) { 2875 TCGRegSet *pset = la_temp_pref(ts); 2876 TCGRegSet set = *pset; 2877 2878 set &= mask; 2879 /* If the combination is not possible, restart. */ 2880 if (set == 0) { 2881 set = tcg_target_available_regs[ts->type] & mask; 2882 } 2883 *pset = set; 2884 } 2885 } 2886 } 2887 2888 /* Liveness analysis : update the opc_arg_life array to tell if a 2889 given input arguments is dead. Instructions updating dead 2890 temporaries are removed. */ 2891 static void liveness_pass_1(TCGContext *s) 2892 { 2893 int nb_globals = s->nb_globals; 2894 int nb_temps = s->nb_temps; 2895 TCGOp *op, *op_prev; 2896 TCGRegSet *prefs; 2897 int i; 2898 2899 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2900 for (i = 0; i < nb_temps; ++i) { 2901 s->temps[i].state_ptr = prefs + i; 2902 } 2903 2904 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2905 la_func_end(s, nb_globals, nb_temps); 2906 2907 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2908 int nb_iargs, nb_oargs; 2909 TCGOpcode opc_new, opc_new2; 2910 bool have_opc_new2; 2911 TCGLifeData arg_life = 0; 2912 TCGTemp *ts; 2913 TCGOpcode opc = op->opc; 2914 const TCGOpDef *def = &tcg_op_defs[opc]; 2915 2916 switch (opc) { 2917 case INDEX_op_call: 2918 { 2919 int call_flags; 2920 int nb_call_regs; 2921 2922 nb_oargs = TCGOP_CALLO(op); 2923 nb_iargs = TCGOP_CALLI(op); 2924 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2925 2926 /* pure functions can be removed if their result is unused */ 2927 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2928 for (i = 0; i < nb_oargs; i++) { 2929 ts = arg_temp(op->args[i]); 2930 if (ts->state != TS_DEAD) { 2931 goto do_not_remove_call; 2932 } 2933 } 2934 goto do_remove; 2935 } 2936 do_not_remove_call: 2937 2938 /* Output args are dead. */ 2939 for (i = 0; i < nb_oargs; i++) { 2940 ts = arg_temp(op->args[i]); 2941 if (ts->state & TS_DEAD) { 2942 arg_life |= DEAD_ARG << i; 2943 } 2944 if (ts->state & TS_MEM) { 2945 arg_life |= SYNC_ARG << i; 2946 } 2947 ts->state = TS_DEAD; 2948 la_reset_pref(ts); 2949 2950 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ 2951 op->output_pref[i] = 0; 2952 } 2953 2954 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2955 TCG_CALL_NO_READ_GLOBALS))) { 2956 la_global_kill(s, nb_globals); 2957 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2958 la_global_sync(s, nb_globals); 2959 } 2960 2961 /* Record arguments that die in this helper. */ 2962 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2963 ts = arg_temp(op->args[i]); 2964 if (ts && ts->state & TS_DEAD) { 2965 arg_life |= DEAD_ARG << i; 2966 } 2967 } 2968 2969 /* For all live registers, remove call-clobbered prefs. */ 2970 la_cross_call(s, nb_temps); 2971 2972 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2973 2974 /* Input arguments are live for preceding opcodes. */ 2975 for (i = 0; i < nb_iargs; i++) { 2976 ts = arg_temp(op->args[i + nb_oargs]); 2977 if (ts && ts->state & TS_DEAD) { 2978 /* For those arguments that die, and will be allocated 2979 * in registers, clear the register set for that arg, 2980 * to be filled in below. For args that will be on 2981 * the stack, reset to any available reg. 2982 */ 2983 *la_temp_pref(ts) 2984 = (i < nb_call_regs ? 0 : 2985 tcg_target_available_regs[ts->type]); 2986 ts->state &= ~TS_DEAD; 2987 } 2988 } 2989 2990 /* For each input argument, add its input register to prefs. 2991 If a temp is used once, this produces a single set bit. */ 2992 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { 2993 ts = arg_temp(op->args[i + nb_oargs]); 2994 if (ts) { 2995 tcg_regset_set_reg(*la_temp_pref(ts), 2996 tcg_target_call_iarg_regs[i]); 2997 } 2998 } 2999 } 3000 break; 3001 case INDEX_op_insn_start: 3002 break; 3003 case INDEX_op_discard: 3004 /* mark the temporary as dead */ 3005 ts = arg_temp(op->args[0]); 3006 ts->state = TS_DEAD; 3007 la_reset_pref(ts); 3008 break; 3009 3010 case INDEX_op_add2_i32: 3011 opc_new = INDEX_op_add_i32; 3012 goto do_addsub2; 3013 case INDEX_op_sub2_i32: 3014 opc_new = INDEX_op_sub_i32; 3015 goto do_addsub2; 3016 case INDEX_op_add2_i64: 3017 opc_new = INDEX_op_add_i64; 3018 goto do_addsub2; 3019 case INDEX_op_sub2_i64: 3020 opc_new = INDEX_op_sub_i64; 3021 do_addsub2: 3022 nb_iargs = 4; 3023 nb_oargs = 2; 3024 /* Test if the high part of the operation is dead, but not 3025 the low part. The result can be optimized to a simple 3026 add or sub. This happens often for x86_64 guest when the 3027 cpu mode is set to 32 bit. */ 3028 if (arg_temp(op->args[1])->state == TS_DEAD) { 3029 if (arg_temp(op->args[0])->state == TS_DEAD) { 3030 goto do_remove; 3031 } 3032 /* Replace the opcode and adjust the args in place, 3033 leaving 3 unused args at the end. */ 3034 op->opc = opc = opc_new; 3035 op->args[1] = op->args[2]; 3036 op->args[2] = op->args[4]; 3037 /* Fall through and mark the single-word operation live. */ 3038 nb_iargs = 2; 3039 nb_oargs = 1; 3040 } 3041 goto do_not_remove; 3042 3043 case INDEX_op_mulu2_i32: 3044 opc_new = INDEX_op_mul_i32; 3045 opc_new2 = INDEX_op_muluh_i32; 3046 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 3047 goto do_mul2; 3048 case INDEX_op_muls2_i32: 3049 opc_new = INDEX_op_mul_i32; 3050 opc_new2 = INDEX_op_mulsh_i32; 3051 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 3052 goto do_mul2; 3053 case INDEX_op_mulu2_i64: 3054 opc_new = INDEX_op_mul_i64; 3055 opc_new2 = INDEX_op_muluh_i64; 3056 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 3057 goto do_mul2; 3058 case INDEX_op_muls2_i64: 3059 opc_new = INDEX_op_mul_i64; 3060 opc_new2 = INDEX_op_mulsh_i64; 3061 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 3062 goto do_mul2; 3063 do_mul2: 3064 nb_iargs = 2; 3065 nb_oargs = 2; 3066 if (arg_temp(op->args[1])->state == TS_DEAD) { 3067 if (arg_temp(op->args[0])->state == TS_DEAD) { 3068 /* Both parts of the operation are dead. */ 3069 goto do_remove; 3070 } 3071 /* The high part of the operation is dead; generate the low. */ 3072 op->opc = opc = opc_new; 3073 op->args[1] = op->args[2]; 3074 op->args[2] = op->args[3]; 3075 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 3076 /* The low part of the operation is dead; generate the high. */ 3077 op->opc = opc = opc_new2; 3078 op->args[0] = op->args[1]; 3079 op->args[1] = op->args[2]; 3080 op->args[2] = op->args[3]; 3081 } else { 3082 goto do_not_remove; 3083 } 3084 /* Mark the single-word operation live. */ 3085 nb_oargs = 1; 3086 goto do_not_remove; 3087 3088 default: 3089 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 3090 nb_iargs = def->nb_iargs; 3091 nb_oargs = def->nb_oargs; 3092 3093 /* Test if the operation can be removed because all 3094 its outputs are dead. We assume that nb_oargs == 0 3095 implies side effects */ 3096 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 3097 for (i = 0; i < nb_oargs; i++) { 3098 if (arg_temp(op->args[i])->state != TS_DEAD) { 3099 goto do_not_remove; 3100 } 3101 } 3102 goto do_remove; 3103 } 3104 goto do_not_remove; 3105 3106 do_remove: 3107 tcg_op_remove(s, op); 3108 break; 3109 3110 do_not_remove: 3111 for (i = 0; i < nb_oargs; i++) { 3112 ts = arg_temp(op->args[i]); 3113 3114 /* Remember the preference of the uses that followed. */ 3115 op->output_pref[i] = *la_temp_pref(ts); 3116 3117 /* Output args are dead. */ 3118 if (ts->state & TS_DEAD) { 3119 arg_life |= DEAD_ARG << i; 3120 } 3121 if (ts->state & TS_MEM) { 3122 arg_life |= SYNC_ARG << i; 3123 } 3124 ts->state = TS_DEAD; 3125 la_reset_pref(ts); 3126 } 3127 3128 /* If end of basic block, update. */ 3129 if (def->flags & TCG_OPF_BB_EXIT) { 3130 la_func_end(s, nb_globals, nb_temps); 3131 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3132 la_bb_sync(s, nb_globals, nb_temps); 3133 } else if (def->flags & TCG_OPF_BB_END) { 3134 la_bb_end(s, nb_globals, nb_temps); 3135 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3136 la_global_sync(s, nb_globals); 3137 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3138 la_cross_call(s, nb_temps); 3139 } 3140 } 3141 3142 /* Record arguments that die in this opcode. */ 3143 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3144 ts = arg_temp(op->args[i]); 3145 if (ts->state & TS_DEAD) { 3146 arg_life |= DEAD_ARG << i; 3147 } 3148 } 3149 3150 /* Input arguments are live for preceding opcodes. */ 3151 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3152 ts = arg_temp(op->args[i]); 3153 if (ts->state & TS_DEAD) { 3154 /* For operands that were dead, initially allow 3155 all regs for the type. */ 3156 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3157 ts->state &= ~TS_DEAD; 3158 } 3159 } 3160 3161 /* Incorporate constraints for this operand. */ 3162 switch (opc) { 3163 case INDEX_op_mov_i32: 3164 case INDEX_op_mov_i64: 3165 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3166 have proper constraints. That said, special case 3167 moves to propagate preferences backward. */ 3168 if (IS_DEAD_ARG(1)) { 3169 *la_temp_pref(arg_temp(op->args[0])) 3170 = *la_temp_pref(arg_temp(op->args[1])); 3171 } 3172 break; 3173 3174 default: 3175 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3176 const TCGArgConstraint *ct = &def->args_ct[i]; 3177 TCGRegSet set, *pset; 3178 3179 ts = arg_temp(op->args[i]); 3180 pset = la_temp_pref(ts); 3181 set = *pset; 3182 3183 set &= ct->regs; 3184 if (ct->ialias) { 3185 set &= op->output_pref[ct->alias_index]; 3186 } 3187 /* If the combination is not possible, restart. */ 3188 if (set == 0) { 3189 set = ct->regs; 3190 } 3191 *pset = set; 3192 } 3193 break; 3194 } 3195 break; 3196 } 3197 op->life = arg_life; 3198 } 3199 } 3200 3201 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3202 static bool liveness_pass_2(TCGContext *s) 3203 { 3204 int nb_globals = s->nb_globals; 3205 int nb_temps, i; 3206 bool changes = false; 3207 TCGOp *op, *op_next; 3208 3209 /* Create a temporary for each indirect global. */ 3210 for (i = 0; i < nb_globals; ++i) { 3211 TCGTemp *its = &s->temps[i]; 3212 if (its->indirect_reg) { 3213 TCGTemp *dts = tcg_temp_alloc(s); 3214 dts->type = its->type; 3215 dts->base_type = its->base_type; 3216 its->state_ptr = dts; 3217 } else { 3218 its->state_ptr = NULL; 3219 } 3220 /* All globals begin dead. */ 3221 its->state = TS_DEAD; 3222 } 3223 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3224 TCGTemp *its = &s->temps[i]; 3225 its->state_ptr = NULL; 3226 its->state = TS_DEAD; 3227 } 3228 3229 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3230 TCGOpcode opc = op->opc; 3231 const TCGOpDef *def = &tcg_op_defs[opc]; 3232 TCGLifeData arg_life = op->life; 3233 int nb_iargs, nb_oargs, call_flags; 3234 TCGTemp *arg_ts, *dir_ts; 3235 3236 if (opc == INDEX_op_call) { 3237 nb_oargs = TCGOP_CALLO(op); 3238 nb_iargs = TCGOP_CALLI(op); 3239 call_flags = op->args[nb_oargs + nb_iargs + 1]; 3240 } else { 3241 nb_iargs = def->nb_iargs; 3242 nb_oargs = def->nb_oargs; 3243 3244 /* Set flags similar to how calls require. */ 3245 if (def->flags & TCG_OPF_COND_BRANCH) { 3246 /* Like reading globals: sync_globals */ 3247 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3248 } else if (def->flags & TCG_OPF_BB_END) { 3249 /* Like writing globals: save_globals */ 3250 call_flags = 0; 3251 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3252 /* Like reading globals: sync_globals */ 3253 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3254 } else { 3255 /* No effect on globals. */ 3256 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3257 TCG_CALL_NO_WRITE_GLOBALS); 3258 } 3259 } 3260 3261 /* Make sure that input arguments are available. */ 3262 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3263 arg_ts = arg_temp(op->args[i]); 3264 if (arg_ts) { 3265 dir_ts = arg_ts->state_ptr; 3266 if (dir_ts && arg_ts->state == TS_DEAD) { 3267 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3268 ? INDEX_op_ld_i32 3269 : INDEX_op_ld_i64); 3270 TCGOp *lop = tcg_op_insert_before(s, op, lopc); 3271 3272 lop->args[0] = temp_arg(dir_ts); 3273 lop->args[1] = temp_arg(arg_ts->mem_base); 3274 lop->args[2] = arg_ts->mem_offset; 3275 3276 /* Loaded, but synced with memory. */ 3277 arg_ts->state = TS_MEM; 3278 } 3279 } 3280 } 3281 3282 /* Perform input replacement, and mark inputs that became dead. 3283 No action is required except keeping temp_state up to date 3284 so that we reload when needed. */ 3285 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3286 arg_ts = arg_temp(op->args[i]); 3287 if (arg_ts) { 3288 dir_ts = arg_ts->state_ptr; 3289 if (dir_ts) { 3290 op->args[i] = temp_arg(dir_ts); 3291 changes = true; 3292 if (IS_DEAD_ARG(i)) { 3293 arg_ts->state = TS_DEAD; 3294 } 3295 } 3296 } 3297 } 3298 3299 /* Liveness analysis should ensure that the following are 3300 all correct, for call sites and basic block end points. */ 3301 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3302 /* Nothing to do */ 3303 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3304 for (i = 0; i < nb_globals; ++i) { 3305 /* Liveness should see that globals are synced back, 3306 that is, either TS_DEAD or TS_MEM. */ 3307 arg_ts = &s->temps[i]; 3308 tcg_debug_assert(arg_ts->state_ptr == 0 3309 || arg_ts->state != 0); 3310 } 3311 } else { 3312 for (i = 0; i < nb_globals; ++i) { 3313 /* Liveness should see that globals are saved back, 3314 that is, TS_DEAD, waiting to be reloaded. */ 3315 arg_ts = &s->temps[i]; 3316 tcg_debug_assert(arg_ts->state_ptr == 0 3317 || arg_ts->state == TS_DEAD); 3318 } 3319 } 3320 3321 /* Outputs become available. */ 3322 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3323 arg_ts = arg_temp(op->args[0]); 3324 dir_ts = arg_ts->state_ptr; 3325 if (dir_ts) { 3326 op->args[0] = temp_arg(dir_ts); 3327 changes = true; 3328 3329 /* The output is now live and modified. */ 3330 arg_ts->state = 0; 3331 3332 if (NEED_SYNC_ARG(0)) { 3333 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3334 ? INDEX_op_st_i32 3335 : INDEX_op_st_i64); 3336 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 3337 TCGTemp *out_ts = dir_ts; 3338 3339 if (IS_DEAD_ARG(0)) { 3340 out_ts = arg_temp(op->args[1]); 3341 arg_ts->state = TS_DEAD; 3342 tcg_op_remove(s, op); 3343 } else { 3344 arg_ts->state = TS_MEM; 3345 } 3346 3347 sop->args[0] = temp_arg(out_ts); 3348 sop->args[1] = temp_arg(arg_ts->mem_base); 3349 sop->args[2] = arg_ts->mem_offset; 3350 } else { 3351 tcg_debug_assert(!IS_DEAD_ARG(0)); 3352 } 3353 } 3354 } else { 3355 for (i = 0; i < nb_oargs; i++) { 3356 arg_ts = arg_temp(op->args[i]); 3357 dir_ts = arg_ts->state_ptr; 3358 if (!dir_ts) { 3359 continue; 3360 } 3361 op->args[i] = temp_arg(dir_ts); 3362 changes = true; 3363 3364 /* The output is now live and modified. */ 3365 arg_ts->state = 0; 3366 3367 /* Sync outputs upon their last write. */ 3368 if (NEED_SYNC_ARG(i)) { 3369 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3370 ? INDEX_op_st_i32 3371 : INDEX_op_st_i64); 3372 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 3373 3374 sop->args[0] = temp_arg(dir_ts); 3375 sop->args[1] = temp_arg(arg_ts->mem_base); 3376 sop->args[2] = arg_ts->mem_offset; 3377 3378 arg_ts->state = TS_MEM; 3379 } 3380 /* Drop outputs that are dead. */ 3381 if (IS_DEAD_ARG(i)) { 3382 arg_ts->state = TS_DEAD; 3383 } 3384 } 3385 } 3386 } 3387 3388 return changes; 3389 } 3390 3391 #ifdef CONFIG_DEBUG_TCG 3392 static void dump_regs(TCGContext *s) 3393 { 3394 TCGTemp *ts; 3395 int i; 3396 char buf[64]; 3397 3398 for(i = 0; i < s->nb_temps; i++) { 3399 ts = &s->temps[i]; 3400 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3401 switch(ts->val_type) { 3402 case TEMP_VAL_REG: 3403 printf("%s", tcg_target_reg_names[ts->reg]); 3404 break; 3405 case TEMP_VAL_MEM: 3406 printf("%d(%s)", (int)ts->mem_offset, 3407 tcg_target_reg_names[ts->mem_base->reg]); 3408 break; 3409 case TEMP_VAL_CONST: 3410 printf("$0x%" PRIx64, ts->val); 3411 break; 3412 case TEMP_VAL_DEAD: 3413 printf("D"); 3414 break; 3415 default: 3416 printf("???"); 3417 break; 3418 } 3419 printf("\n"); 3420 } 3421 3422 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 3423 if (s->reg_to_temp[i] != NULL) { 3424 printf("%s: %s\n", 3425 tcg_target_reg_names[i], 3426 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 3427 } 3428 } 3429 } 3430 3431 static void check_regs(TCGContext *s) 3432 { 3433 int reg; 3434 int k; 3435 TCGTemp *ts; 3436 char buf[64]; 3437 3438 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 3439 ts = s->reg_to_temp[reg]; 3440 if (ts != NULL) { 3441 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 3442 printf("Inconsistency for register %s:\n", 3443 tcg_target_reg_names[reg]); 3444 goto fail; 3445 } 3446 } 3447 } 3448 for (k = 0; k < s->nb_temps; k++) { 3449 ts = &s->temps[k]; 3450 if (ts->val_type == TEMP_VAL_REG 3451 && ts->kind != TEMP_FIXED 3452 && s->reg_to_temp[ts->reg] != ts) { 3453 printf("Inconsistency for temp %s:\n", 3454 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3455 fail: 3456 printf("reg state:\n"); 3457 dump_regs(s); 3458 tcg_abort(); 3459 } 3460 } 3461 } 3462 #endif 3463 3464 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3465 { 3466 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 3467 /* Sparc64 stack is accessed with offset of 2047 */ 3468 s->current_frame_offset = (s->current_frame_offset + 3469 (tcg_target_long)sizeof(tcg_target_long) - 1) & 3470 ~(sizeof(tcg_target_long) - 1); 3471 #endif 3472 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 3473 s->frame_end) { 3474 tcg_abort(); 3475 } 3476 ts->mem_offset = s->current_frame_offset; 3477 ts->mem_base = s->frame_temp; 3478 ts->mem_allocated = 1; 3479 s->current_frame_offset += sizeof(tcg_target_long); 3480 } 3481 3482 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3483 3484 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3485 mark it free; otherwise mark it dead. */ 3486 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3487 { 3488 TCGTempVal new_type; 3489 3490 switch (ts->kind) { 3491 case TEMP_FIXED: 3492 return; 3493 case TEMP_GLOBAL: 3494 case TEMP_LOCAL: 3495 new_type = TEMP_VAL_MEM; 3496 break; 3497 case TEMP_NORMAL: 3498 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3499 break; 3500 case TEMP_CONST: 3501 new_type = TEMP_VAL_CONST; 3502 break; 3503 default: 3504 g_assert_not_reached(); 3505 } 3506 if (ts->val_type == TEMP_VAL_REG) { 3507 s->reg_to_temp[ts->reg] = NULL; 3508 } 3509 ts->val_type = new_type; 3510 } 3511 3512 /* Mark a temporary as dead. */ 3513 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3514 { 3515 temp_free_or_dead(s, ts, 1); 3516 } 3517 3518 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3519 registers needs to be allocated to store a constant. If 'free_or_dead' 3520 is non-zero, subsequently release the temporary; if it is positive, the 3521 temp is dead; if it is negative, the temp is free. */ 3522 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3523 TCGRegSet preferred_regs, int free_or_dead) 3524 { 3525 if (!temp_readonly(ts) && !ts->mem_coherent) { 3526 if (!ts->mem_allocated) { 3527 temp_allocate_frame(s, ts); 3528 } 3529 switch (ts->val_type) { 3530 case TEMP_VAL_CONST: 3531 /* If we're going to free the temp immediately, then we won't 3532 require it later in a register, so attempt to store the 3533 constant to memory directly. */ 3534 if (free_or_dead 3535 && tcg_out_sti(s, ts->type, ts->val, 3536 ts->mem_base->reg, ts->mem_offset)) { 3537 break; 3538 } 3539 temp_load(s, ts, tcg_target_available_regs[ts->type], 3540 allocated_regs, preferred_regs); 3541 /* fallthrough */ 3542 3543 case TEMP_VAL_REG: 3544 tcg_out_st(s, ts->type, ts->reg, 3545 ts->mem_base->reg, ts->mem_offset); 3546 break; 3547 3548 case TEMP_VAL_MEM: 3549 break; 3550 3551 case TEMP_VAL_DEAD: 3552 default: 3553 tcg_abort(); 3554 } 3555 ts->mem_coherent = 1; 3556 } 3557 if (free_or_dead) { 3558 temp_free_or_dead(s, ts, free_or_dead); 3559 } 3560 } 3561 3562 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3563 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3564 { 3565 TCGTemp *ts = s->reg_to_temp[reg]; 3566 if (ts != NULL) { 3567 temp_sync(s, ts, allocated_regs, 0, -1); 3568 } 3569 } 3570 3571 /** 3572 * tcg_reg_alloc: 3573 * @required_regs: Set of registers in which we must allocate. 3574 * @allocated_regs: Set of registers which must be avoided. 3575 * @preferred_regs: Set of registers we should prefer. 3576 * @rev: True if we search the registers in "indirect" order. 3577 * 3578 * The allocated register must be in @required_regs & ~@allocated_regs, 3579 * but if we can put it in @preferred_regs we may save a move later. 3580 */ 3581 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3582 TCGRegSet allocated_regs, 3583 TCGRegSet preferred_regs, bool rev) 3584 { 3585 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3586 TCGRegSet reg_ct[2]; 3587 const int *order; 3588 3589 reg_ct[1] = required_regs & ~allocated_regs; 3590 tcg_debug_assert(reg_ct[1] != 0); 3591 reg_ct[0] = reg_ct[1] & preferred_regs; 3592 3593 /* Skip the preferred_regs option if it cannot be satisfied, 3594 or if the preference made no difference. */ 3595 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3596 3597 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3598 3599 /* Try free registers, preferences first. */ 3600 for (j = f; j < 2; j++) { 3601 TCGRegSet set = reg_ct[j]; 3602 3603 if (tcg_regset_single(set)) { 3604 /* One register in the set. */ 3605 TCGReg reg = tcg_regset_first(set); 3606 if (s->reg_to_temp[reg] == NULL) { 3607 return reg; 3608 } 3609 } else { 3610 for (i = 0; i < n; i++) { 3611 TCGReg reg = order[i]; 3612 if (s->reg_to_temp[reg] == NULL && 3613 tcg_regset_test_reg(set, reg)) { 3614 return reg; 3615 } 3616 } 3617 } 3618 } 3619 3620 /* We must spill something. */ 3621 for (j = f; j < 2; j++) { 3622 TCGRegSet set = reg_ct[j]; 3623 3624 if (tcg_regset_single(set)) { 3625 /* One register in the set. */ 3626 TCGReg reg = tcg_regset_first(set); 3627 tcg_reg_free(s, reg, allocated_regs); 3628 return reg; 3629 } else { 3630 for (i = 0; i < n; i++) { 3631 TCGReg reg = order[i]; 3632 if (tcg_regset_test_reg(set, reg)) { 3633 tcg_reg_free(s, reg, allocated_regs); 3634 return reg; 3635 } 3636 } 3637 } 3638 } 3639 3640 tcg_abort(); 3641 } 3642 3643 /* Make sure the temporary is in a register. If needed, allocate the register 3644 from DESIRED while avoiding ALLOCATED. */ 3645 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3646 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3647 { 3648 TCGReg reg; 3649 3650 switch (ts->val_type) { 3651 case TEMP_VAL_REG: 3652 return; 3653 case TEMP_VAL_CONST: 3654 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3655 preferred_regs, ts->indirect_base); 3656 if (ts->type <= TCG_TYPE_I64) { 3657 tcg_out_movi(s, ts->type, reg, ts->val); 3658 } else { 3659 uint64_t val = ts->val; 3660 MemOp vece = MO_64; 3661 3662 /* 3663 * Find the minimal vector element that matches the constant. 3664 * The targets will, in general, have to do this search anyway, 3665 * do this generically. 3666 */ 3667 if (val == dup_const(MO_8, val)) { 3668 vece = MO_8; 3669 } else if (val == dup_const(MO_16, val)) { 3670 vece = MO_16; 3671 } else if (val == dup_const(MO_32, val)) { 3672 vece = MO_32; 3673 } 3674 3675 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3676 } 3677 ts->mem_coherent = 0; 3678 break; 3679 case TEMP_VAL_MEM: 3680 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3681 preferred_regs, ts->indirect_base); 3682 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3683 ts->mem_coherent = 1; 3684 break; 3685 case TEMP_VAL_DEAD: 3686 default: 3687 tcg_abort(); 3688 } 3689 ts->reg = reg; 3690 ts->val_type = TEMP_VAL_REG; 3691 s->reg_to_temp[reg] = ts; 3692 } 3693 3694 /* Save a temporary to memory. 'allocated_regs' is used in case a 3695 temporary registers needs to be allocated to store a constant. */ 3696 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3697 { 3698 /* The liveness analysis already ensures that globals are back 3699 in memory. Keep an tcg_debug_assert for safety. */ 3700 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3701 } 3702 3703 /* save globals to their canonical location and assume they can be 3704 modified be the following code. 'allocated_regs' is used in case a 3705 temporary registers needs to be allocated to store a constant. */ 3706 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3707 { 3708 int i, n; 3709 3710 for (i = 0, n = s->nb_globals; i < n; i++) { 3711 temp_save(s, &s->temps[i], allocated_regs); 3712 } 3713 } 3714 3715 /* sync globals to their canonical location and assume they can be 3716 read by the following code. 'allocated_regs' is used in case a 3717 temporary registers needs to be allocated to store a constant. */ 3718 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3719 { 3720 int i, n; 3721 3722 for (i = 0, n = s->nb_globals; i < n; i++) { 3723 TCGTemp *ts = &s->temps[i]; 3724 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3725 || ts->kind == TEMP_FIXED 3726 || ts->mem_coherent); 3727 } 3728 } 3729 3730 /* at the end of a basic block, we assume all temporaries are dead and 3731 all globals are stored at their canonical location. */ 3732 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3733 { 3734 int i; 3735 3736 for (i = s->nb_globals; i < s->nb_temps; i++) { 3737 TCGTemp *ts = &s->temps[i]; 3738 3739 switch (ts->kind) { 3740 case TEMP_LOCAL: 3741 temp_save(s, ts, allocated_regs); 3742 break; 3743 case TEMP_NORMAL: 3744 /* The liveness analysis already ensures that temps are dead. 3745 Keep an tcg_debug_assert for safety. */ 3746 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3747 break; 3748 case TEMP_CONST: 3749 /* Similarly, we should have freed any allocated register. */ 3750 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3751 break; 3752 default: 3753 g_assert_not_reached(); 3754 } 3755 } 3756 3757 save_globals(s, allocated_regs); 3758 } 3759 3760 /* 3761 * At a conditional branch, we assume all temporaries are dead and 3762 * all globals and local temps are synced to their location. 3763 */ 3764 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3765 { 3766 sync_globals(s, allocated_regs); 3767 3768 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3769 TCGTemp *ts = &s->temps[i]; 3770 /* 3771 * The liveness analysis already ensures that temps are dead. 3772 * Keep tcg_debug_asserts for safety. 3773 */ 3774 switch (ts->kind) { 3775 case TEMP_LOCAL: 3776 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3777 break; 3778 case TEMP_NORMAL: 3779 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3780 break; 3781 case TEMP_CONST: 3782 break; 3783 default: 3784 g_assert_not_reached(); 3785 } 3786 } 3787 } 3788 3789 /* 3790 * Specialized code generation for INDEX_op_mov_* with a constant. 3791 */ 3792 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3793 tcg_target_ulong val, TCGLifeData arg_life, 3794 TCGRegSet preferred_regs) 3795 { 3796 /* ENV should not be modified. */ 3797 tcg_debug_assert(!temp_readonly(ots)); 3798 3799 /* The movi is not explicitly generated here. */ 3800 if (ots->val_type == TEMP_VAL_REG) { 3801 s->reg_to_temp[ots->reg] = NULL; 3802 } 3803 ots->val_type = TEMP_VAL_CONST; 3804 ots->val = val; 3805 ots->mem_coherent = 0; 3806 if (NEED_SYNC_ARG(0)) { 3807 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3808 } else if (IS_DEAD_ARG(0)) { 3809 temp_dead(s, ots); 3810 } 3811 } 3812 3813 /* 3814 * Specialized code generation for INDEX_op_mov_*. 3815 */ 3816 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3817 { 3818 const TCGLifeData arg_life = op->life; 3819 TCGRegSet allocated_regs, preferred_regs; 3820 TCGTemp *ts, *ots; 3821 TCGType otype, itype; 3822 3823 allocated_regs = s->reserved_regs; 3824 preferred_regs = op->output_pref[0]; 3825 ots = arg_temp(op->args[0]); 3826 ts = arg_temp(op->args[1]); 3827 3828 /* ENV should not be modified. */ 3829 tcg_debug_assert(!temp_readonly(ots)); 3830 3831 /* Note that otype != itype for no-op truncation. */ 3832 otype = ots->type; 3833 itype = ts->type; 3834 3835 if (ts->val_type == TEMP_VAL_CONST) { 3836 /* propagate constant or generate sti */ 3837 tcg_target_ulong val = ts->val; 3838 if (IS_DEAD_ARG(1)) { 3839 temp_dead(s, ts); 3840 } 3841 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3842 return; 3843 } 3844 3845 /* If the source value is in memory we're going to be forced 3846 to have it in a register in order to perform the copy. Copy 3847 the SOURCE value into its own register first, that way we 3848 don't have to reload SOURCE the next time it is used. */ 3849 if (ts->val_type == TEMP_VAL_MEM) { 3850 temp_load(s, ts, tcg_target_available_regs[itype], 3851 allocated_regs, preferred_regs); 3852 } 3853 3854 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3855 if (IS_DEAD_ARG(0)) { 3856 /* mov to a non-saved dead register makes no sense (even with 3857 liveness analysis disabled). */ 3858 tcg_debug_assert(NEED_SYNC_ARG(0)); 3859 if (!ots->mem_allocated) { 3860 temp_allocate_frame(s, ots); 3861 } 3862 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 3863 if (IS_DEAD_ARG(1)) { 3864 temp_dead(s, ts); 3865 } 3866 temp_dead(s, ots); 3867 } else { 3868 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 3869 /* the mov can be suppressed */ 3870 if (ots->val_type == TEMP_VAL_REG) { 3871 s->reg_to_temp[ots->reg] = NULL; 3872 } 3873 ots->reg = ts->reg; 3874 temp_dead(s, ts); 3875 } else { 3876 if (ots->val_type != TEMP_VAL_REG) { 3877 /* When allocating a new register, make sure to not spill the 3878 input one. */ 3879 tcg_regset_set_reg(allocated_regs, ts->reg); 3880 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3881 allocated_regs, preferred_regs, 3882 ots->indirect_base); 3883 } 3884 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { 3885 /* 3886 * Cross register class move not supported. 3887 * Store the source register into the destination slot 3888 * and leave the destination temp as TEMP_VAL_MEM. 3889 */ 3890 assert(!temp_readonly(ots)); 3891 if (!ts->mem_allocated) { 3892 temp_allocate_frame(s, ots); 3893 } 3894 tcg_out_st(s, ts->type, ts->reg, 3895 ots->mem_base->reg, ots->mem_offset); 3896 ots->mem_coherent = 1; 3897 temp_free_or_dead(s, ots, -1); 3898 return; 3899 } 3900 } 3901 ots->val_type = TEMP_VAL_REG; 3902 ots->mem_coherent = 0; 3903 s->reg_to_temp[ots->reg] = ots; 3904 if (NEED_SYNC_ARG(0)) { 3905 temp_sync(s, ots, allocated_regs, 0, 0); 3906 } 3907 } 3908 } 3909 3910 /* 3911 * Specialized code generation for INDEX_op_dup_vec. 3912 */ 3913 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 3914 { 3915 const TCGLifeData arg_life = op->life; 3916 TCGRegSet dup_out_regs, dup_in_regs; 3917 TCGTemp *its, *ots; 3918 TCGType itype, vtype; 3919 intptr_t endian_fixup; 3920 unsigned vece; 3921 bool ok; 3922 3923 ots = arg_temp(op->args[0]); 3924 its = arg_temp(op->args[1]); 3925 3926 /* ENV should not be modified. */ 3927 tcg_debug_assert(!temp_readonly(ots)); 3928 3929 itype = its->type; 3930 vece = TCGOP_VECE(op); 3931 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3932 3933 if (its->val_type == TEMP_VAL_CONST) { 3934 /* Propagate constant via movi -> dupi. */ 3935 tcg_target_ulong val = its->val; 3936 if (IS_DEAD_ARG(1)) { 3937 temp_dead(s, its); 3938 } 3939 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); 3940 return; 3941 } 3942 3943 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3944 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 3945 3946 /* Allocate the output register now. */ 3947 if (ots->val_type != TEMP_VAL_REG) { 3948 TCGRegSet allocated_regs = s->reserved_regs; 3949 3950 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 3951 /* Make sure to not spill the input register. */ 3952 tcg_regset_set_reg(allocated_regs, its->reg); 3953 } 3954 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3955 op->output_pref[0], ots->indirect_base); 3956 ots->val_type = TEMP_VAL_REG; 3957 ots->mem_coherent = 0; 3958 s->reg_to_temp[ots->reg] = ots; 3959 } 3960 3961 switch (its->val_type) { 3962 case TEMP_VAL_REG: 3963 /* 3964 * The dup constriaints must be broad, covering all possible VECE. 3965 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 3966 * to fail, indicating that extra moves are required for that case. 3967 */ 3968 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 3969 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 3970 goto done; 3971 } 3972 /* Try again from memory or a vector input register. */ 3973 } 3974 if (!its->mem_coherent) { 3975 /* 3976 * The input register is not synced, and so an extra store 3977 * would be required to use memory. Attempt an integer-vector 3978 * register move first. We do not have a TCGRegSet for this. 3979 */ 3980 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 3981 break; 3982 } 3983 /* Sync the temp back to its slot and load from there. */ 3984 temp_sync(s, its, s->reserved_regs, 0, 0); 3985 } 3986 /* fall through */ 3987 3988 case TEMP_VAL_MEM: 3989 #ifdef HOST_WORDS_BIGENDIAN 3990 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; 3991 endian_fixup -= 1 << vece; 3992 #else 3993 endian_fixup = 0; 3994 #endif 3995 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 3996 its->mem_offset + endian_fixup)) { 3997 goto done; 3998 } 3999 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 4000 break; 4001 4002 default: 4003 g_assert_not_reached(); 4004 } 4005 4006 /* We now have a vector input register, so dup must succeed. */ 4007 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 4008 tcg_debug_assert(ok); 4009 4010 done: 4011 if (IS_DEAD_ARG(1)) { 4012 temp_dead(s, its); 4013 } 4014 if (NEED_SYNC_ARG(0)) { 4015 temp_sync(s, ots, s->reserved_regs, 0, 0); 4016 } 4017 if (IS_DEAD_ARG(0)) { 4018 temp_dead(s, ots); 4019 } 4020 } 4021 4022 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 4023 { 4024 const TCGLifeData arg_life = op->life; 4025 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 4026 TCGRegSet i_allocated_regs; 4027 TCGRegSet o_allocated_regs; 4028 int i, k, nb_iargs, nb_oargs; 4029 TCGReg reg; 4030 TCGArg arg; 4031 const TCGArgConstraint *arg_ct; 4032 TCGTemp *ts; 4033 TCGArg new_args[TCG_MAX_OP_ARGS]; 4034 int const_args[TCG_MAX_OP_ARGS]; 4035 4036 nb_oargs = def->nb_oargs; 4037 nb_iargs = def->nb_iargs; 4038 4039 /* copy constants */ 4040 memcpy(new_args + nb_oargs + nb_iargs, 4041 op->args + nb_oargs + nb_iargs, 4042 sizeof(TCGArg) * def->nb_cargs); 4043 4044 i_allocated_regs = s->reserved_regs; 4045 o_allocated_regs = s->reserved_regs; 4046 4047 /* satisfy input constraints */ 4048 for (k = 0; k < nb_iargs; k++) { 4049 TCGRegSet i_preferred_regs, o_preferred_regs; 4050 4051 i = def->args_ct[nb_oargs + k].sort_index; 4052 arg = op->args[i]; 4053 arg_ct = &def->args_ct[i]; 4054 ts = arg_temp(arg); 4055 4056 if (ts->val_type == TEMP_VAL_CONST 4057 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 4058 /* constant is OK for instruction */ 4059 const_args[i] = 1; 4060 new_args[i] = ts->val; 4061 continue; 4062 } 4063 4064 i_preferred_regs = o_preferred_regs = 0; 4065 if (arg_ct->ialias) { 4066 o_preferred_regs = op->output_pref[arg_ct->alias_index]; 4067 4068 /* 4069 * If the input is readonly, then it cannot also be an 4070 * output and aliased to itself. If the input is not 4071 * dead after the instruction, we must allocate a new 4072 * register and move it. 4073 */ 4074 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 4075 goto allocate_in_reg; 4076 } 4077 4078 /* 4079 * Check if the current register has already been allocated 4080 * for another input aliased to an output. 4081 */ 4082 if (ts->val_type == TEMP_VAL_REG) { 4083 reg = ts->reg; 4084 for (int k2 = 0; k2 < k; k2++) { 4085 int i2 = def->args_ct[nb_oargs + k2].sort_index; 4086 if (def->args_ct[i2].ialias && reg == new_args[i2]) { 4087 goto allocate_in_reg; 4088 } 4089 } 4090 } 4091 i_preferred_regs = o_preferred_regs; 4092 } 4093 4094 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); 4095 reg = ts->reg; 4096 4097 if (!tcg_regset_test_reg(arg_ct->regs, reg)) { 4098 allocate_in_reg: 4099 /* 4100 * Allocate a new register matching the constraint 4101 * and move the temporary register into it. 4102 */ 4103 temp_load(s, ts, tcg_target_available_regs[ts->type], 4104 i_allocated_regs, 0); 4105 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, 4106 o_preferred_regs, ts->indirect_base); 4107 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4108 /* 4109 * Cross register class move not supported. Sync the 4110 * temp back to its slot and load from there. 4111 */ 4112 temp_sync(s, ts, i_allocated_regs, 0, 0); 4113 tcg_out_ld(s, ts->type, reg, 4114 ts->mem_base->reg, ts->mem_offset); 4115 } 4116 } 4117 new_args[i] = reg; 4118 const_args[i] = 0; 4119 tcg_regset_set_reg(i_allocated_regs, reg); 4120 } 4121 4122 /* mark dead temporaries and free the associated registers */ 4123 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4124 if (IS_DEAD_ARG(i)) { 4125 temp_dead(s, arg_temp(op->args[i])); 4126 } 4127 } 4128 4129 if (def->flags & TCG_OPF_COND_BRANCH) { 4130 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4131 } else if (def->flags & TCG_OPF_BB_END) { 4132 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4133 } else { 4134 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4135 /* XXX: permit generic clobber register list ? */ 4136 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4137 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4138 tcg_reg_free(s, i, i_allocated_regs); 4139 } 4140 } 4141 } 4142 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4143 /* sync globals if the op has side effects and might trigger 4144 an exception. */ 4145 sync_globals(s, i_allocated_regs); 4146 } 4147 4148 /* satisfy the output constraints */ 4149 for(k = 0; k < nb_oargs; k++) { 4150 i = def->args_ct[k].sort_index; 4151 arg = op->args[i]; 4152 arg_ct = &def->args_ct[i]; 4153 ts = arg_temp(arg); 4154 4155 /* ENV should not be modified. */ 4156 tcg_debug_assert(!temp_readonly(ts)); 4157 4158 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4159 reg = new_args[arg_ct->alias_index]; 4160 } else if (arg_ct->newreg) { 4161 reg = tcg_reg_alloc(s, arg_ct->regs, 4162 i_allocated_regs | o_allocated_regs, 4163 op->output_pref[k], ts->indirect_base); 4164 } else { 4165 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4166 op->output_pref[k], ts->indirect_base); 4167 } 4168 tcg_regset_set_reg(o_allocated_regs, reg); 4169 if (ts->val_type == TEMP_VAL_REG) { 4170 s->reg_to_temp[ts->reg] = NULL; 4171 } 4172 ts->val_type = TEMP_VAL_REG; 4173 ts->reg = reg; 4174 /* 4175 * Temp value is modified, so the value kept in memory is 4176 * potentially not the same. 4177 */ 4178 ts->mem_coherent = 0; 4179 s->reg_to_temp[reg] = ts; 4180 new_args[i] = reg; 4181 } 4182 } 4183 4184 /* emit instruction */ 4185 if (def->flags & TCG_OPF_VECTOR) { 4186 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4187 new_args, const_args); 4188 } else { 4189 tcg_out_op(s, op->opc, new_args, const_args); 4190 } 4191 4192 /* move the outputs in the correct register if needed */ 4193 for(i = 0; i < nb_oargs; i++) { 4194 ts = arg_temp(op->args[i]); 4195 4196 /* ENV should not be modified. */ 4197 tcg_debug_assert(!temp_readonly(ts)); 4198 4199 if (NEED_SYNC_ARG(i)) { 4200 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4201 } else if (IS_DEAD_ARG(i)) { 4202 temp_dead(s, ts); 4203 } 4204 } 4205 } 4206 4207 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4208 { 4209 const TCGLifeData arg_life = op->life; 4210 TCGTemp *ots, *itsl, *itsh; 4211 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4212 4213 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4214 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4215 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4216 4217 ots = arg_temp(op->args[0]); 4218 itsl = arg_temp(op->args[1]); 4219 itsh = arg_temp(op->args[2]); 4220 4221 /* ENV should not be modified. */ 4222 tcg_debug_assert(!temp_readonly(ots)); 4223 4224 /* Allocate the output register now. */ 4225 if (ots->val_type != TEMP_VAL_REG) { 4226 TCGRegSet allocated_regs = s->reserved_regs; 4227 TCGRegSet dup_out_regs = 4228 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4229 4230 /* Make sure to not spill the input registers. */ 4231 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4232 tcg_regset_set_reg(allocated_regs, itsl->reg); 4233 } 4234 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4235 tcg_regset_set_reg(allocated_regs, itsh->reg); 4236 } 4237 4238 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4239 op->output_pref[0], ots->indirect_base); 4240 ots->val_type = TEMP_VAL_REG; 4241 ots->mem_coherent = 0; 4242 s->reg_to_temp[ots->reg] = ots; 4243 } 4244 4245 /* Promote dup2 of immediates to dupi_vec. */ 4246 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 4247 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 4248 MemOp vece = MO_64; 4249 4250 if (val == dup_const(MO_8, val)) { 4251 vece = MO_8; 4252 } else if (val == dup_const(MO_16, val)) { 4253 vece = MO_16; 4254 } else if (val == dup_const(MO_32, val)) { 4255 vece = MO_32; 4256 } 4257 4258 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 4259 goto done; 4260 } 4261 4262 /* If the two inputs form one 64-bit value, try dupm_vec. */ 4263 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { 4264 if (!itsl->mem_coherent) { 4265 temp_sync(s, itsl, s->reserved_regs, 0, 0); 4266 } 4267 if (!itsh->mem_coherent) { 4268 temp_sync(s, itsh, s->reserved_regs, 0, 0); 4269 } 4270 #ifdef HOST_WORDS_BIGENDIAN 4271 TCGTemp *its = itsh; 4272 #else 4273 TCGTemp *its = itsl; 4274 #endif 4275 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 4276 its->mem_base->reg, its->mem_offset)) { 4277 goto done; 4278 } 4279 } 4280 4281 /* Fall back to generic expansion. */ 4282 return false; 4283 4284 done: 4285 if (IS_DEAD_ARG(1)) { 4286 temp_dead(s, itsl); 4287 } 4288 if (IS_DEAD_ARG(2)) { 4289 temp_dead(s, itsh); 4290 } 4291 if (NEED_SYNC_ARG(0)) { 4292 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 4293 } else if (IS_DEAD_ARG(0)) { 4294 temp_dead(s, ots); 4295 } 4296 return true; 4297 } 4298 4299 #ifdef TCG_TARGET_STACK_GROWSUP 4300 #define STACK_DIR(x) (-(x)) 4301 #else 4302 #define STACK_DIR(x) (x) 4303 #endif 4304 4305 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 4306 { 4307 const int nb_oargs = TCGOP_CALLO(op); 4308 const int nb_iargs = TCGOP_CALLI(op); 4309 const TCGLifeData arg_life = op->life; 4310 int flags, nb_regs, i; 4311 TCGReg reg; 4312 TCGArg arg; 4313 TCGTemp *ts; 4314 intptr_t stack_offset; 4315 size_t call_stack_size; 4316 tcg_insn_unit *func_addr; 4317 int allocate_args; 4318 TCGRegSet allocated_regs; 4319 4320 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; 4321 flags = op->args[nb_oargs + nb_iargs + 1]; 4322 4323 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 4324 if (nb_regs > nb_iargs) { 4325 nb_regs = nb_iargs; 4326 } 4327 4328 /* assign stack slots first */ 4329 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 4330 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 4331 ~(TCG_TARGET_STACK_ALIGN - 1); 4332 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 4333 if (allocate_args) { 4334 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 4335 preallocate call stack */ 4336 tcg_abort(); 4337 } 4338 4339 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 4340 for (i = nb_regs; i < nb_iargs; i++) { 4341 arg = op->args[nb_oargs + i]; 4342 #ifdef TCG_TARGET_STACK_GROWSUP 4343 stack_offset -= sizeof(tcg_target_long); 4344 #endif 4345 if (arg != TCG_CALL_DUMMY_ARG) { 4346 ts = arg_temp(arg); 4347 temp_load(s, ts, tcg_target_available_regs[ts->type], 4348 s->reserved_regs, 0); 4349 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 4350 } 4351 #ifndef TCG_TARGET_STACK_GROWSUP 4352 stack_offset += sizeof(tcg_target_long); 4353 #endif 4354 } 4355 4356 /* assign input registers */ 4357 allocated_regs = s->reserved_regs; 4358 for (i = 0; i < nb_regs; i++) { 4359 arg = op->args[nb_oargs + i]; 4360 if (arg != TCG_CALL_DUMMY_ARG) { 4361 ts = arg_temp(arg); 4362 reg = tcg_target_call_iarg_regs[i]; 4363 4364 if (ts->val_type == TEMP_VAL_REG) { 4365 if (ts->reg != reg) { 4366 tcg_reg_free(s, reg, allocated_regs); 4367 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4368 /* 4369 * Cross register class move not supported. Sync the 4370 * temp back to its slot and load from there. 4371 */ 4372 temp_sync(s, ts, allocated_regs, 0, 0); 4373 tcg_out_ld(s, ts->type, reg, 4374 ts->mem_base->reg, ts->mem_offset); 4375 } 4376 } 4377 } else { 4378 TCGRegSet arg_set = 0; 4379 4380 tcg_reg_free(s, reg, allocated_regs); 4381 tcg_regset_set_reg(arg_set, reg); 4382 temp_load(s, ts, arg_set, allocated_regs, 0); 4383 } 4384 4385 tcg_regset_set_reg(allocated_regs, reg); 4386 } 4387 } 4388 4389 /* mark dead temporaries and free the associated registers */ 4390 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4391 if (IS_DEAD_ARG(i)) { 4392 temp_dead(s, arg_temp(op->args[i])); 4393 } 4394 } 4395 4396 /* clobber call registers */ 4397 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4398 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4399 tcg_reg_free(s, i, allocated_regs); 4400 } 4401 } 4402 4403 /* Save globals if they might be written by the helper, sync them if 4404 they might be read. */ 4405 if (flags & TCG_CALL_NO_READ_GLOBALS) { 4406 /* Nothing to do */ 4407 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 4408 sync_globals(s, allocated_regs); 4409 } else { 4410 save_globals(s, allocated_regs); 4411 } 4412 4413 tcg_out_call(s, func_addr); 4414 4415 /* assign output registers and emit moves if needed */ 4416 for(i = 0; i < nb_oargs; i++) { 4417 arg = op->args[i]; 4418 ts = arg_temp(arg); 4419 4420 /* ENV should not be modified. */ 4421 tcg_debug_assert(!temp_readonly(ts)); 4422 4423 reg = tcg_target_call_oarg_regs[i]; 4424 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4425 if (ts->val_type == TEMP_VAL_REG) { 4426 s->reg_to_temp[ts->reg] = NULL; 4427 } 4428 ts->val_type = TEMP_VAL_REG; 4429 ts->reg = reg; 4430 ts->mem_coherent = 0; 4431 s->reg_to_temp[reg] = ts; 4432 if (NEED_SYNC_ARG(i)) { 4433 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); 4434 } else if (IS_DEAD_ARG(i)) { 4435 temp_dead(s, ts); 4436 } 4437 } 4438 } 4439 4440 #ifdef CONFIG_PROFILER 4441 4442 /* avoid copy/paste errors */ 4443 #define PROF_ADD(to, from, field) \ 4444 do { \ 4445 (to)->field += qatomic_read(&((from)->field)); \ 4446 } while (0) 4447 4448 #define PROF_MAX(to, from, field) \ 4449 do { \ 4450 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4451 if (val__ > (to)->field) { \ 4452 (to)->field = val__; \ 4453 } \ 4454 } while (0) 4455 4456 /* Pass in a zero'ed @prof */ 4457 static inline 4458 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4459 { 4460 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 4461 unsigned int i; 4462 4463 for (i = 0; i < n_ctxs; i++) { 4464 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4465 const TCGProfile *orig = &s->prof; 4466 4467 if (counters) { 4468 PROF_ADD(prof, orig, cpu_exec_time); 4469 PROF_ADD(prof, orig, tb_count1); 4470 PROF_ADD(prof, orig, tb_count); 4471 PROF_ADD(prof, orig, op_count); 4472 PROF_MAX(prof, orig, op_count_max); 4473 PROF_ADD(prof, orig, temp_count); 4474 PROF_MAX(prof, orig, temp_count_max); 4475 PROF_ADD(prof, orig, del_op_count); 4476 PROF_ADD(prof, orig, code_in_len); 4477 PROF_ADD(prof, orig, code_out_len); 4478 PROF_ADD(prof, orig, search_out_len); 4479 PROF_ADD(prof, orig, interm_time); 4480 PROF_ADD(prof, orig, code_time); 4481 PROF_ADD(prof, orig, la_time); 4482 PROF_ADD(prof, orig, opt_time); 4483 PROF_ADD(prof, orig, restore_count); 4484 PROF_ADD(prof, orig, restore_time); 4485 } 4486 if (table) { 4487 int i; 4488 4489 for (i = 0; i < NB_OPS; i++) { 4490 PROF_ADD(prof, orig, table_op_count[i]); 4491 } 4492 } 4493 } 4494 } 4495 4496 #undef PROF_ADD 4497 #undef PROF_MAX 4498 4499 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4500 { 4501 tcg_profile_snapshot(prof, true, false); 4502 } 4503 4504 static void tcg_profile_snapshot_table(TCGProfile *prof) 4505 { 4506 tcg_profile_snapshot(prof, false, true); 4507 } 4508 4509 void tcg_dump_op_count(void) 4510 { 4511 TCGProfile prof = {}; 4512 int i; 4513 4514 tcg_profile_snapshot_table(&prof); 4515 for (i = 0; i < NB_OPS; i++) { 4516 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, 4517 prof.table_op_count[i]); 4518 } 4519 } 4520 4521 int64_t tcg_cpu_exec_time(void) 4522 { 4523 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 4524 unsigned int i; 4525 int64_t ret = 0; 4526 4527 for (i = 0; i < n_ctxs; i++) { 4528 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4529 const TCGProfile *prof = &s->prof; 4530 4531 ret += qatomic_read(&prof->cpu_exec_time); 4532 } 4533 return ret; 4534 } 4535 #else 4536 void tcg_dump_op_count(void) 4537 { 4538 qemu_printf("[TCG profiler not compiled]\n"); 4539 } 4540 4541 int64_t tcg_cpu_exec_time(void) 4542 { 4543 error_report("%s: TCG profiler not compiled", __func__); 4544 exit(EXIT_FAILURE); 4545 } 4546 #endif 4547 4548 4549 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 4550 { 4551 #ifdef CONFIG_PROFILER 4552 TCGProfile *prof = &s->prof; 4553 #endif 4554 int i, num_insns; 4555 TCGOp *op; 4556 4557 #ifdef CONFIG_PROFILER 4558 { 4559 int n = 0; 4560 4561 QTAILQ_FOREACH(op, &s->ops, link) { 4562 n++; 4563 } 4564 qatomic_set(&prof->op_count, prof->op_count + n); 4565 if (n > prof->op_count_max) { 4566 qatomic_set(&prof->op_count_max, n); 4567 } 4568 4569 n = s->nb_temps; 4570 qatomic_set(&prof->temp_count, prof->temp_count + n); 4571 if (n > prof->temp_count_max) { 4572 qatomic_set(&prof->temp_count_max, n); 4573 } 4574 } 4575 #endif 4576 4577 #ifdef DEBUG_DISAS 4578 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4579 && qemu_log_in_addr_range(tb->pc))) { 4580 FILE *logfile = qemu_log_lock(); 4581 qemu_log("OP:\n"); 4582 tcg_dump_ops(s, false); 4583 qemu_log("\n"); 4584 qemu_log_unlock(logfile); 4585 } 4586 #endif 4587 4588 #ifdef CONFIG_DEBUG_TCG 4589 /* Ensure all labels referenced have been emitted. */ 4590 { 4591 TCGLabel *l; 4592 bool error = false; 4593 4594 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4595 if (unlikely(!l->present) && l->refs) { 4596 qemu_log_mask(CPU_LOG_TB_OP, 4597 "$L%d referenced but not present.\n", l->id); 4598 error = true; 4599 } 4600 } 4601 assert(!error); 4602 } 4603 #endif 4604 4605 #ifdef CONFIG_PROFILER 4606 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4607 #endif 4608 4609 #ifdef USE_TCG_OPTIMIZATIONS 4610 tcg_optimize(s); 4611 #endif 4612 4613 #ifdef CONFIG_PROFILER 4614 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4615 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4616 #endif 4617 4618 reachable_code_pass(s); 4619 liveness_pass_1(s); 4620 4621 if (s->nb_indirects > 0) { 4622 #ifdef DEBUG_DISAS 4623 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4624 && qemu_log_in_addr_range(tb->pc))) { 4625 FILE *logfile = qemu_log_lock(); 4626 qemu_log("OP before indirect lowering:\n"); 4627 tcg_dump_ops(s, false); 4628 qemu_log("\n"); 4629 qemu_log_unlock(logfile); 4630 } 4631 #endif 4632 /* Replace indirect temps with direct temps. */ 4633 if (liveness_pass_2(s)) { 4634 /* If changes were made, re-run liveness. */ 4635 liveness_pass_1(s); 4636 } 4637 } 4638 4639 #ifdef CONFIG_PROFILER 4640 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4641 #endif 4642 4643 #ifdef DEBUG_DISAS 4644 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4645 && qemu_log_in_addr_range(tb->pc))) { 4646 FILE *logfile = qemu_log_lock(); 4647 qemu_log("OP after optimization and liveness analysis:\n"); 4648 tcg_dump_ops(s, true); 4649 qemu_log("\n"); 4650 qemu_log_unlock(logfile); 4651 } 4652 #endif 4653 4654 tcg_reg_alloc_start(s); 4655 4656 /* 4657 * Reset the buffer pointers when restarting after overflow. 4658 * TODO: Move this into translate-all.c with the rest of the 4659 * buffer management. Having only this done here is confusing. 4660 */ 4661 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 4662 s->code_ptr = s->code_buf; 4663 4664 #ifdef TCG_TARGET_NEED_LDST_LABELS 4665 QSIMPLEQ_INIT(&s->ldst_labels); 4666 #endif 4667 #ifdef TCG_TARGET_NEED_POOL_LABELS 4668 s->pool_labels = NULL; 4669 #endif 4670 4671 num_insns = -1; 4672 QTAILQ_FOREACH(op, &s->ops, link) { 4673 TCGOpcode opc = op->opc; 4674 4675 #ifdef CONFIG_PROFILER 4676 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 4677 #endif 4678 4679 switch (opc) { 4680 case INDEX_op_mov_i32: 4681 case INDEX_op_mov_i64: 4682 case INDEX_op_mov_vec: 4683 tcg_reg_alloc_mov(s, op); 4684 break; 4685 case INDEX_op_dup_vec: 4686 tcg_reg_alloc_dup(s, op); 4687 break; 4688 case INDEX_op_insn_start: 4689 if (num_insns >= 0) { 4690 size_t off = tcg_current_code_size(s); 4691 s->gen_insn_end_off[num_insns] = off; 4692 /* Assert that we do not overflow our stored offset. */ 4693 assert(s->gen_insn_end_off[num_insns] == off); 4694 } 4695 num_insns++; 4696 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 4697 target_ulong a; 4698 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 4699 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 4700 #else 4701 a = op->args[i]; 4702 #endif 4703 s->gen_insn_data[num_insns][i] = a; 4704 } 4705 break; 4706 case INDEX_op_discard: 4707 temp_dead(s, arg_temp(op->args[0])); 4708 break; 4709 case INDEX_op_set_label: 4710 tcg_reg_alloc_bb_end(s, s->reserved_regs); 4711 tcg_out_label(s, arg_label(op->args[0])); 4712 break; 4713 case INDEX_op_call: 4714 tcg_reg_alloc_call(s, op); 4715 break; 4716 case INDEX_op_dup2_vec: 4717 if (tcg_reg_alloc_dup2(s, op)) { 4718 break; 4719 } 4720 /* fall through */ 4721 default: 4722 /* Sanity check that we've not introduced any unhandled opcodes. */ 4723 tcg_debug_assert(tcg_op_supported(opc)); 4724 /* Note: in order to speed up the code, it would be much 4725 faster to have specialized register allocator functions for 4726 some common argument patterns */ 4727 tcg_reg_alloc_op(s, op); 4728 break; 4729 } 4730 #ifdef CONFIG_DEBUG_TCG 4731 check_regs(s); 4732 #endif 4733 /* Test for (pending) buffer overflow. The assumption is that any 4734 one operation beginning below the high water mark cannot overrun 4735 the buffer completely. Thus we can test for overflow after 4736 generating code without having to check during generation. */ 4737 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 4738 return -1; 4739 } 4740 /* Test for TB overflow, as seen by gen_insn_end_off. */ 4741 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 4742 return -2; 4743 } 4744 } 4745 tcg_debug_assert(num_insns >= 0); 4746 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 4747 4748 /* Generate TB finalization at the end of block */ 4749 #ifdef TCG_TARGET_NEED_LDST_LABELS 4750 i = tcg_out_ldst_finalize(s); 4751 if (i < 0) { 4752 return i; 4753 } 4754 #endif 4755 #ifdef TCG_TARGET_NEED_POOL_LABELS 4756 i = tcg_out_pool_finalize(s); 4757 if (i < 0) { 4758 return i; 4759 } 4760 #endif 4761 if (!tcg_resolve_relocs(s)) { 4762 return -2; 4763 } 4764 4765 #ifndef CONFIG_TCG_INTERPRETER 4766 /* flush instruction cache */ 4767 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 4768 (uintptr_t)s->code_buf, 4769 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 4770 #endif 4771 4772 return tcg_current_code_size(s); 4773 } 4774 4775 #ifdef CONFIG_PROFILER 4776 void tcg_dump_info(void) 4777 { 4778 TCGProfile prof = {}; 4779 const TCGProfile *s; 4780 int64_t tb_count; 4781 int64_t tb_div_count; 4782 int64_t tot; 4783 4784 tcg_profile_snapshot_counters(&prof); 4785 s = &prof; 4786 tb_count = s->tb_count; 4787 tb_div_count = tb_count ? tb_count : 1; 4788 tot = s->interm_time + s->code_time; 4789 4790 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 4791 tot, tot / 2.4e9); 4792 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64 4793 " %0.1f%%)\n", 4794 tb_count, s->tb_count1 - tb_count, 4795 (double)(s->tb_count1 - s->tb_count) 4796 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 4797 qemu_printf("avg ops/TB %0.1f max=%d\n", 4798 (double)s->op_count / tb_div_count, s->op_count_max); 4799 qemu_printf("deleted ops/TB %0.2f\n", 4800 (double)s->del_op_count / tb_div_count); 4801 qemu_printf("avg temps/TB %0.2f max=%d\n", 4802 (double)s->temp_count / tb_div_count, s->temp_count_max); 4803 qemu_printf("avg host code/TB %0.1f\n", 4804 (double)s->code_out_len / tb_div_count); 4805 qemu_printf("avg search data/TB %0.1f\n", 4806 (double)s->search_out_len / tb_div_count); 4807 4808 qemu_printf("cycles/op %0.1f\n", 4809 s->op_count ? (double)tot / s->op_count : 0); 4810 qemu_printf("cycles/in byte %0.1f\n", 4811 s->code_in_len ? (double)tot / s->code_in_len : 0); 4812 qemu_printf("cycles/out byte %0.1f\n", 4813 s->code_out_len ? (double)tot / s->code_out_len : 0); 4814 qemu_printf("cycles/search byte %0.1f\n", 4815 s->search_out_len ? (double)tot / s->search_out_len : 0); 4816 if (tot == 0) { 4817 tot = 1; 4818 } 4819 qemu_printf(" gen_interm time %0.1f%%\n", 4820 (double)s->interm_time / tot * 100.0); 4821 qemu_printf(" gen_code time %0.1f%%\n", 4822 (double)s->code_time / tot * 100.0); 4823 qemu_printf("optim./code time %0.1f%%\n", 4824 (double)s->opt_time / (s->code_time ? s->code_time : 1) 4825 * 100.0); 4826 qemu_printf("liveness/code time %0.1f%%\n", 4827 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 4828 qemu_printf("cpu_restore count %" PRId64 "\n", 4829 s->restore_count); 4830 qemu_printf(" avg cycles %0.1f\n", 4831 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 4832 } 4833 #else 4834 void tcg_dump_info(void) 4835 { 4836 qemu_printf("[TCG profiler not compiled]\n"); 4837 } 4838 #endif 4839 4840 #ifdef ELF_HOST_MACHINE 4841 /* In order to use this feature, the backend needs to do three things: 4842 4843 (1) Define ELF_HOST_MACHINE to indicate both what value to 4844 put into the ELF image and to indicate support for the feature. 4845 4846 (2) Define tcg_register_jit. This should create a buffer containing 4847 the contents of a .debug_frame section that describes the post- 4848 prologue unwind info for the tcg machine. 4849 4850 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 4851 */ 4852 4853 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 4854 typedef enum { 4855 JIT_NOACTION = 0, 4856 JIT_REGISTER_FN, 4857 JIT_UNREGISTER_FN 4858 } jit_actions_t; 4859 4860 struct jit_code_entry { 4861 struct jit_code_entry *next_entry; 4862 struct jit_code_entry *prev_entry; 4863 const void *symfile_addr; 4864 uint64_t symfile_size; 4865 }; 4866 4867 struct jit_descriptor { 4868 uint32_t version; 4869 uint32_t action_flag; 4870 struct jit_code_entry *relevant_entry; 4871 struct jit_code_entry *first_entry; 4872 }; 4873 4874 void __jit_debug_register_code(void) __attribute__((noinline)); 4875 void __jit_debug_register_code(void) 4876 { 4877 asm(""); 4878 } 4879 4880 /* Must statically initialize the version, because GDB may check 4881 the version before we can set it. */ 4882 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 4883 4884 /* End GDB interface. */ 4885 4886 static int find_string(const char *strtab, const char *str) 4887 { 4888 const char *p = strtab + 1; 4889 4890 while (1) { 4891 if (strcmp(p, str) == 0) { 4892 return p - strtab; 4893 } 4894 p += strlen(p) + 1; 4895 } 4896 } 4897 4898 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 4899 const void *debug_frame, 4900 size_t debug_frame_size) 4901 { 4902 struct __attribute__((packed)) DebugInfo { 4903 uint32_t len; 4904 uint16_t version; 4905 uint32_t abbrev; 4906 uint8_t ptr_size; 4907 uint8_t cu_die; 4908 uint16_t cu_lang; 4909 uintptr_t cu_low_pc; 4910 uintptr_t cu_high_pc; 4911 uint8_t fn_die; 4912 char fn_name[16]; 4913 uintptr_t fn_low_pc; 4914 uintptr_t fn_high_pc; 4915 uint8_t cu_eoc; 4916 }; 4917 4918 struct ElfImage { 4919 ElfW(Ehdr) ehdr; 4920 ElfW(Phdr) phdr; 4921 ElfW(Shdr) shdr[7]; 4922 ElfW(Sym) sym[2]; 4923 struct DebugInfo di; 4924 uint8_t da[24]; 4925 char str[80]; 4926 }; 4927 4928 struct ElfImage *img; 4929 4930 static const struct ElfImage img_template = { 4931 .ehdr = { 4932 .e_ident[EI_MAG0] = ELFMAG0, 4933 .e_ident[EI_MAG1] = ELFMAG1, 4934 .e_ident[EI_MAG2] = ELFMAG2, 4935 .e_ident[EI_MAG3] = ELFMAG3, 4936 .e_ident[EI_CLASS] = ELF_CLASS, 4937 .e_ident[EI_DATA] = ELF_DATA, 4938 .e_ident[EI_VERSION] = EV_CURRENT, 4939 .e_type = ET_EXEC, 4940 .e_machine = ELF_HOST_MACHINE, 4941 .e_version = EV_CURRENT, 4942 .e_phoff = offsetof(struct ElfImage, phdr), 4943 .e_shoff = offsetof(struct ElfImage, shdr), 4944 .e_ehsize = sizeof(ElfW(Shdr)), 4945 .e_phentsize = sizeof(ElfW(Phdr)), 4946 .e_phnum = 1, 4947 .e_shentsize = sizeof(ElfW(Shdr)), 4948 .e_shnum = ARRAY_SIZE(img->shdr), 4949 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 4950 #ifdef ELF_HOST_FLAGS 4951 .e_flags = ELF_HOST_FLAGS, 4952 #endif 4953 #ifdef ELF_OSABI 4954 .e_ident[EI_OSABI] = ELF_OSABI, 4955 #endif 4956 }, 4957 .phdr = { 4958 .p_type = PT_LOAD, 4959 .p_flags = PF_X, 4960 }, 4961 .shdr = { 4962 [0] = { .sh_type = SHT_NULL }, 4963 /* Trick: The contents of code_gen_buffer are not present in 4964 this fake ELF file; that got allocated elsewhere. Therefore 4965 we mark .text as SHT_NOBITS (similar to .bss) so that readers 4966 will not look for contents. We can record any address. */ 4967 [1] = { /* .text */ 4968 .sh_type = SHT_NOBITS, 4969 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 4970 }, 4971 [2] = { /* .debug_info */ 4972 .sh_type = SHT_PROGBITS, 4973 .sh_offset = offsetof(struct ElfImage, di), 4974 .sh_size = sizeof(struct DebugInfo), 4975 }, 4976 [3] = { /* .debug_abbrev */ 4977 .sh_type = SHT_PROGBITS, 4978 .sh_offset = offsetof(struct ElfImage, da), 4979 .sh_size = sizeof(img->da), 4980 }, 4981 [4] = { /* .debug_frame */ 4982 .sh_type = SHT_PROGBITS, 4983 .sh_offset = sizeof(struct ElfImage), 4984 }, 4985 [5] = { /* .symtab */ 4986 .sh_type = SHT_SYMTAB, 4987 .sh_offset = offsetof(struct ElfImage, sym), 4988 .sh_size = sizeof(img->sym), 4989 .sh_info = 1, 4990 .sh_link = ARRAY_SIZE(img->shdr) - 1, 4991 .sh_entsize = sizeof(ElfW(Sym)), 4992 }, 4993 [6] = { /* .strtab */ 4994 .sh_type = SHT_STRTAB, 4995 .sh_offset = offsetof(struct ElfImage, str), 4996 .sh_size = sizeof(img->str), 4997 } 4998 }, 4999 .sym = { 5000 [1] = { /* code_gen_buffer */ 5001 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 5002 .st_shndx = 1, 5003 } 5004 }, 5005 .di = { 5006 .len = sizeof(struct DebugInfo) - 4, 5007 .version = 2, 5008 .ptr_size = sizeof(void *), 5009 .cu_die = 1, 5010 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 5011 .fn_die = 2, 5012 .fn_name = "code_gen_buffer" 5013 }, 5014 .da = { 5015 1, /* abbrev number (the cu) */ 5016 0x11, 1, /* DW_TAG_compile_unit, has children */ 5017 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 5018 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5019 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5020 0, 0, /* end of abbrev */ 5021 2, /* abbrev number (the fn) */ 5022 0x2e, 0, /* DW_TAG_subprogram, no children */ 5023 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 5024 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5025 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5026 0, 0, /* end of abbrev */ 5027 0 /* no more abbrev */ 5028 }, 5029 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 5030 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 5031 }; 5032 5033 /* We only need a single jit entry; statically allocate it. */ 5034 static struct jit_code_entry one_entry; 5035 5036 uintptr_t buf = (uintptr_t)buf_ptr; 5037 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 5038 DebugFrameHeader *dfh; 5039 5040 img = g_malloc(img_size); 5041 *img = img_template; 5042 5043 img->phdr.p_vaddr = buf; 5044 img->phdr.p_paddr = buf; 5045 img->phdr.p_memsz = buf_size; 5046 5047 img->shdr[1].sh_name = find_string(img->str, ".text"); 5048 img->shdr[1].sh_addr = buf; 5049 img->shdr[1].sh_size = buf_size; 5050 5051 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 5052 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 5053 5054 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 5055 img->shdr[4].sh_size = debug_frame_size; 5056 5057 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 5058 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 5059 5060 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 5061 img->sym[1].st_value = buf; 5062 img->sym[1].st_size = buf_size; 5063 5064 img->di.cu_low_pc = buf; 5065 img->di.cu_high_pc = buf + buf_size; 5066 img->di.fn_low_pc = buf; 5067 img->di.fn_high_pc = buf + buf_size; 5068 5069 dfh = (DebugFrameHeader *)(img + 1); 5070 memcpy(dfh, debug_frame, debug_frame_size); 5071 dfh->fde.func_start = buf; 5072 dfh->fde.func_len = buf_size; 5073 5074 #ifdef DEBUG_JIT 5075 /* Enable this block to be able to debug the ELF image file creation. 5076 One can use readelf, objdump, or other inspection utilities. */ 5077 { 5078 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 5079 if (f) { 5080 if (fwrite(img, img_size, 1, f) != img_size) { 5081 /* Avoid stupid unused return value warning for fwrite. */ 5082 } 5083 fclose(f); 5084 } 5085 } 5086 #endif 5087 5088 one_entry.symfile_addr = img; 5089 one_entry.symfile_size = img_size; 5090 5091 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 5092 __jit_debug_descriptor.relevant_entry = &one_entry; 5093 __jit_debug_descriptor.first_entry = &one_entry; 5094 __jit_debug_register_code(); 5095 } 5096 #else 5097 /* No support for the feature. Provide the entry point expected by exec.c, 5098 and implement the internal function we declared earlier. */ 5099 5100 static void tcg_register_jit_int(const void *buf, size_t size, 5101 const void *debug_frame, 5102 size_t debug_frame_size) 5103 { 5104 } 5105 5106 void tcg_register_jit(const void *buf, size_t buf_size) 5107 { 5108 } 5109 #endif /* ELF_HOST_MACHINE */ 5110 5111 #if !TCG_TARGET_MAYBE_vec 5112 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 5113 { 5114 g_assert_not_reached(); 5115 } 5116 #endif 5117