1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/timer.h" 38 #include "qemu/cacheflush.h" 39 40 /* Note: the long term plan is to reduce the dependencies on the QEMU 41 CPU definitions. Currently they are used for qemu_ld/st 42 instructions */ 43 #define NO_CPU_IO_DEFS 44 45 #include "exec/exec-all.h" 46 47 #if !defined(CONFIG_USER_ONLY) 48 #include "hw/boards.h" 49 #endif 50 51 #include "tcg/tcg-op.h" 52 53 #if UINTPTR_MAX == UINT32_MAX 54 # define ELF_CLASS ELFCLASS32 55 #else 56 # define ELF_CLASS ELFCLASS64 57 #endif 58 #ifdef HOST_WORDS_BIGENDIAN 59 # define ELF_DATA ELFDATA2MSB 60 #else 61 # define ELF_DATA ELFDATA2LSB 62 #endif 63 64 #include "elf.h" 65 #include "exec/log.h" 66 67 /* Forward declarations for functions declared in tcg-target.c.inc and 68 used here. */ 69 static void tcg_target_init(TCGContext *s); 70 static void tcg_target_qemu_prologue(TCGContext *s); 71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 72 intptr_t value, intptr_t addend); 73 74 /* The CIE and FDE header definitions will be common to all hosts. */ 75 typedef struct { 76 uint32_t len __attribute__((aligned((sizeof(void *))))); 77 uint32_t id; 78 uint8_t version; 79 char augmentation[1]; 80 uint8_t code_align; 81 uint8_t data_align; 82 uint8_t return_column; 83 } DebugFrameCIE; 84 85 typedef struct QEMU_PACKED { 86 uint32_t len __attribute__((aligned((sizeof(void *))))); 87 uint32_t cie_offset; 88 uintptr_t func_start; 89 uintptr_t func_len; 90 } DebugFrameFDEHeader; 91 92 typedef struct QEMU_PACKED { 93 DebugFrameCIE cie; 94 DebugFrameFDEHeader fde; 95 } DebugFrameHeader; 96 97 static void tcg_register_jit_int(const void *buf, size_t size, 98 const void *debug_frame, 99 size_t debug_frame_size) 100 __attribute__((unused)); 101 102 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 104 intptr_t arg2); 105 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 106 static void tcg_out_movi(TCGContext *s, TCGType type, 107 TCGReg ret, tcg_target_long arg); 108 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 109 const TCGArg args[TCG_MAX_OP_ARGS], 110 const int const_args[TCG_MAX_OP_ARGS]); 111 #if TCG_TARGET_MAYBE_vec 112 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 113 TCGReg dst, TCGReg src); 114 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 115 TCGReg dst, TCGReg base, intptr_t offset); 116 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 117 TCGReg dst, int64_t arg); 118 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 119 unsigned vecl, unsigned vece, 120 const TCGArg args[TCG_MAX_OP_ARGS], 121 const int const_args[TCG_MAX_OP_ARGS]); 122 #else 123 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 124 TCGReg dst, TCGReg src) 125 { 126 g_assert_not_reached(); 127 } 128 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 129 TCGReg dst, TCGReg base, intptr_t offset) 130 { 131 g_assert_not_reached(); 132 } 133 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 134 TCGReg dst, int64_t arg) 135 { 136 g_assert_not_reached(); 137 } 138 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 139 unsigned vecl, unsigned vece, 140 const TCGArg args[TCG_MAX_OP_ARGS], 141 const int const_args[TCG_MAX_OP_ARGS]) 142 { 143 g_assert_not_reached(); 144 } 145 #endif 146 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 147 intptr_t arg2); 148 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 149 TCGReg base, intptr_t ofs); 150 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); 151 static int tcg_target_const_match(tcg_target_long val, TCGType type, 152 const TCGArgConstraint *arg_ct); 153 #ifdef TCG_TARGET_NEED_LDST_LABELS 154 static int tcg_out_ldst_finalize(TCGContext *s); 155 #endif 156 157 #define TCG_HIGHWATER 1024 158 159 static TCGContext **tcg_ctxs; 160 static unsigned int n_tcg_ctxs; 161 TCGv_env cpu_env = 0; 162 const void *tcg_code_gen_epilogue; 163 uintptr_t tcg_splitwx_diff; 164 165 #ifndef CONFIG_TCG_INTERPRETER 166 tcg_prologue_fn *tcg_qemu_tb_exec; 167 #endif 168 169 struct tcg_region_tree { 170 QemuMutex lock; 171 GTree *tree; 172 /* padding to avoid false sharing is computed at run-time */ 173 }; 174 175 /* 176 * We divide code_gen_buffer into equally-sized "regions" that TCG threads 177 * dynamically allocate from as demand dictates. Given appropriate region 178 * sizing, this minimizes flushes even when some TCG threads generate a lot 179 * more code than others. 180 */ 181 struct tcg_region_state { 182 QemuMutex lock; 183 184 /* fields set at init time */ 185 void *start; 186 void *start_aligned; 187 void *end; 188 size_t n; 189 size_t size; /* size of one region */ 190 size_t stride; /* .size + guard size */ 191 192 /* fields protected by the lock */ 193 size_t current; /* current region index */ 194 size_t agg_size_full; /* aggregate size of full regions */ 195 }; 196 197 static struct tcg_region_state region; 198 /* 199 * This is an array of struct tcg_region_tree's, with padding. 200 * We use void * to simplify the computation of region_trees[i]; each 201 * struct is found every tree_size bytes. 202 */ 203 static void *region_trees; 204 static size_t tree_size; 205 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 206 static TCGRegSet tcg_target_call_clobber_regs; 207 208 #if TCG_TARGET_INSN_UNIT_SIZE == 1 209 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 210 { 211 *s->code_ptr++ = v; 212 } 213 214 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 215 uint8_t v) 216 { 217 *p = v; 218 } 219 #endif 220 221 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 222 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 223 { 224 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 225 *s->code_ptr++ = v; 226 } else { 227 tcg_insn_unit *p = s->code_ptr; 228 memcpy(p, &v, sizeof(v)); 229 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 230 } 231 } 232 233 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 234 uint16_t v) 235 { 236 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 237 *p = v; 238 } else { 239 memcpy(p, &v, sizeof(v)); 240 } 241 } 242 #endif 243 244 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 245 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 246 { 247 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 248 *s->code_ptr++ = v; 249 } else { 250 tcg_insn_unit *p = s->code_ptr; 251 memcpy(p, &v, sizeof(v)); 252 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 253 } 254 } 255 256 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 257 uint32_t v) 258 { 259 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 260 *p = v; 261 } else { 262 memcpy(p, &v, sizeof(v)); 263 } 264 } 265 #endif 266 267 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 268 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 269 { 270 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 271 *s->code_ptr++ = v; 272 } else { 273 tcg_insn_unit *p = s->code_ptr; 274 memcpy(p, &v, sizeof(v)); 275 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 276 } 277 } 278 279 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 280 uint64_t v) 281 { 282 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 283 *p = v; 284 } else { 285 memcpy(p, &v, sizeof(v)); 286 } 287 } 288 #endif 289 290 /* label relocation processing */ 291 292 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 293 TCGLabel *l, intptr_t addend) 294 { 295 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 296 297 r->type = type; 298 r->ptr = code_ptr; 299 r->addend = addend; 300 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 301 } 302 303 static void tcg_out_label(TCGContext *s, TCGLabel *l) 304 { 305 tcg_debug_assert(!l->has_value); 306 l->has_value = 1; 307 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 308 } 309 310 TCGLabel *gen_new_label(void) 311 { 312 TCGContext *s = tcg_ctx; 313 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 314 315 memset(l, 0, sizeof(TCGLabel)); 316 l->id = s->nb_labels++; 317 QSIMPLEQ_INIT(&l->relocs); 318 319 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 320 321 return l; 322 } 323 324 static bool tcg_resolve_relocs(TCGContext *s) 325 { 326 TCGLabel *l; 327 328 QSIMPLEQ_FOREACH(l, &s->labels, next) { 329 TCGRelocation *r; 330 uintptr_t value = l->u.value; 331 332 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 333 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 334 return false; 335 } 336 } 337 } 338 return true; 339 } 340 341 static void set_jmp_reset_offset(TCGContext *s, int which) 342 { 343 /* 344 * We will check for overflow at the end of the opcode loop in 345 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 346 */ 347 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); 348 } 349 350 /* Signal overflow, starting over with fewer guest insns. */ 351 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s) 352 { 353 siglongjmp(s->jmp_trans, -2); 354 } 355 356 #define C_PFX1(P, A) P##A 357 #define C_PFX2(P, A, B) P##A##_##B 358 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 359 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 360 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 361 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 362 363 /* Define an enumeration for the various combinations. */ 364 365 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 366 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 367 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 368 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 369 370 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 371 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 372 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 373 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 374 375 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 376 377 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 378 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 379 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 380 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 381 382 typedef enum { 383 #include "tcg-target-con-set.h" 384 } TCGConstraintSetIndex; 385 386 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 387 388 #undef C_O0_I1 389 #undef C_O0_I2 390 #undef C_O0_I3 391 #undef C_O0_I4 392 #undef C_O1_I1 393 #undef C_O1_I2 394 #undef C_O1_I3 395 #undef C_O1_I4 396 #undef C_N1_I2 397 #undef C_O2_I1 398 #undef C_O2_I2 399 #undef C_O2_I3 400 #undef C_O2_I4 401 402 /* Put all of the constraint sets into an array, indexed by the enum. */ 403 404 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 405 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 406 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 407 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 408 409 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 410 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 411 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 412 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 413 414 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 415 416 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 417 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 418 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 419 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 420 421 static const TCGTargetOpDef constraint_sets[] = { 422 #include "tcg-target-con-set.h" 423 }; 424 425 426 #undef C_O0_I1 427 #undef C_O0_I2 428 #undef C_O0_I3 429 #undef C_O0_I4 430 #undef C_O1_I1 431 #undef C_O1_I2 432 #undef C_O1_I3 433 #undef C_O1_I4 434 #undef C_N1_I2 435 #undef C_O2_I1 436 #undef C_O2_I2 437 #undef C_O2_I3 438 #undef C_O2_I4 439 440 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 441 442 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 443 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 444 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 445 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 446 447 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 448 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 449 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 450 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 451 452 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 453 454 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 455 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 456 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 457 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 458 459 #include "tcg-target.c.inc" 460 461 /* compare a pointer @ptr and a tb_tc @s */ 462 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) 463 { 464 if (ptr >= s->ptr + s->size) { 465 return 1; 466 } else if (ptr < s->ptr) { 467 return -1; 468 } 469 return 0; 470 } 471 472 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp) 473 { 474 const struct tb_tc *a = ap; 475 const struct tb_tc *b = bp; 476 477 /* 478 * When both sizes are set, we know this isn't a lookup. 479 * This is the most likely case: every TB must be inserted; lookups 480 * are a lot less frequent. 481 */ 482 if (likely(a->size && b->size)) { 483 if (a->ptr > b->ptr) { 484 return 1; 485 } else if (a->ptr < b->ptr) { 486 return -1; 487 } 488 /* a->ptr == b->ptr should happen only on deletions */ 489 g_assert(a->size == b->size); 490 return 0; 491 } 492 /* 493 * All lookups have either .size field set to 0. 494 * From the glib sources we see that @ap is always the lookup key. However 495 * the docs provide no guarantee, so we just mark this case as likely. 496 */ 497 if (likely(a->size == 0)) { 498 return ptr_cmp_tb_tc(a->ptr, b); 499 } 500 return ptr_cmp_tb_tc(b->ptr, a); 501 } 502 503 static void tcg_region_trees_init(void) 504 { 505 size_t i; 506 507 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); 508 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); 509 for (i = 0; i < region.n; i++) { 510 struct tcg_region_tree *rt = region_trees + i * tree_size; 511 512 qemu_mutex_init(&rt->lock); 513 rt->tree = g_tree_new(tb_tc_cmp); 514 } 515 } 516 517 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p) 518 { 519 size_t region_idx; 520 521 /* 522 * Like tcg_splitwx_to_rw, with no assert. The pc may come from 523 * a signal handler over which the caller has no control. 524 */ 525 if (!in_code_gen_buffer(p)) { 526 p -= tcg_splitwx_diff; 527 if (!in_code_gen_buffer(p)) { 528 return NULL; 529 } 530 } 531 532 if (p < region.start_aligned) { 533 region_idx = 0; 534 } else { 535 ptrdiff_t offset = p - region.start_aligned; 536 537 if (offset > region.stride * (region.n - 1)) { 538 region_idx = region.n - 1; 539 } else { 540 region_idx = offset / region.stride; 541 } 542 } 543 return region_trees + region_idx * tree_size; 544 } 545 546 void tcg_tb_insert(TranslationBlock *tb) 547 { 548 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 549 550 g_assert(rt != NULL); 551 qemu_mutex_lock(&rt->lock); 552 g_tree_insert(rt->tree, &tb->tc, tb); 553 qemu_mutex_unlock(&rt->lock); 554 } 555 556 void tcg_tb_remove(TranslationBlock *tb) 557 { 558 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 559 560 g_assert(rt != NULL); 561 qemu_mutex_lock(&rt->lock); 562 g_tree_remove(rt->tree, &tb->tc); 563 qemu_mutex_unlock(&rt->lock); 564 } 565 566 /* 567 * Find the TB 'tb' such that 568 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size 569 * Return NULL if not found. 570 */ 571 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) 572 { 573 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); 574 TranslationBlock *tb; 575 struct tb_tc s = { .ptr = (void *)tc_ptr }; 576 577 if (rt == NULL) { 578 return NULL; 579 } 580 581 qemu_mutex_lock(&rt->lock); 582 tb = g_tree_lookup(rt->tree, &s); 583 qemu_mutex_unlock(&rt->lock); 584 return tb; 585 } 586 587 static void tcg_region_tree_lock_all(void) 588 { 589 size_t i; 590 591 for (i = 0; i < region.n; i++) { 592 struct tcg_region_tree *rt = region_trees + i * tree_size; 593 594 qemu_mutex_lock(&rt->lock); 595 } 596 } 597 598 static void tcg_region_tree_unlock_all(void) 599 { 600 size_t i; 601 602 for (i = 0; i < region.n; i++) { 603 struct tcg_region_tree *rt = region_trees + i * tree_size; 604 605 qemu_mutex_unlock(&rt->lock); 606 } 607 } 608 609 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) 610 { 611 size_t i; 612 613 tcg_region_tree_lock_all(); 614 for (i = 0; i < region.n; i++) { 615 struct tcg_region_tree *rt = region_trees + i * tree_size; 616 617 g_tree_foreach(rt->tree, func, user_data); 618 } 619 tcg_region_tree_unlock_all(); 620 } 621 622 size_t tcg_nb_tbs(void) 623 { 624 size_t nb_tbs = 0; 625 size_t i; 626 627 tcg_region_tree_lock_all(); 628 for (i = 0; i < region.n; i++) { 629 struct tcg_region_tree *rt = region_trees + i * tree_size; 630 631 nb_tbs += g_tree_nnodes(rt->tree); 632 } 633 tcg_region_tree_unlock_all(); 634 return nb_tbs; 635 } 636 637 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data) 638 { 639 TranslationBlock *tb = v; 640 641 tb_destroy(tb); 642 return FALSE; 643 } 644 645 static void tcg_region_tree_reset_all(void) 646 { 647 size_t i; 648 649 tcg_region_tree_lock_all(); 650 for (i = 0; i < region.n; i++) { 651 struct tcg_region_tree *rt = region_trees + i * tree_size; 652 653 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL); 654 /* Increment the refcount first so that destroy acts as a reset */ 655 g_tree_ref(rt->tree); 656 g_tree_destroy(rt->tree); 657 } 658 tcg_region_tree_unlock_all(); 659 } 660 661 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 662 { 663 void *start, *end; 664 665 start = region.start_aligned + curr_region * region.stride; 666 end = start + region.size; 667 668 if (curr_region == 0) { 669 start = region.start; 670 } 671 if (curr_region == region.n - 1) { 672 end = region.end; 673 } 674 675 *pstart = start; 676 *pend = end; 677 } 678 679 static void tcg_region_assign(TCGContext *s, size_t curr_region) 680 { 681 void *start, *end; 682 683 tcg_region_bounds(curr_region, &start, &end); 684 685 s->code_gen_buffer = start; 686 s->code_gen_ptr = start; 687 s->code_gen_buffer_size = end - start; 688 s->code_gen_highwater = end - TCG_HIGHWATER; 689 } 690 691 static bool tcg_region_alloc__locked(TCGContext *s) 692 { 693 if (region.current == region.n) { 694 return true; 695 } 696 tcg_region_assign(s, region.current); 697 region.current++; 698 return false; 699 } 700 701 /* 702 * Request a new region once the one in use has filled up. 703 * Returns true on error. 704 */ 705 static bool tcg_region_alloc(TCGContext *s) 706 { 707 bool err; 708 /* read the region size now; alloc__locked will overwrite it on success */ 709 size_t size_full = s->code_gen_buffer_size; 710 711 qemu_mutex_lock(®ion.lock); 712 err = tcg_region_alloc__locked(s); 713 if (!err) { 714 region.agg_size_full += size_full - TCG_HIGHWATER; 715 } 716 qemu_mutex_unlock(®ion.lock); 717 return err; 718 } 719 720 /* 721 * Perform a context's first region allocation. 722 * This function does _not_ increment region.agg_size_full. 723 */ 724 static inline bool tcg_region_initial_alloc__locked(TCGContext *s) 725 { 726 return tcg_region_alloc__locked(s); 727 } 728 729 /* Call from a safe-work context */ 730 void tcg_region_reset_all(void) 731 { 732 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 733 unsigned int i; 734 735 qemu_mutex_lock(®ion.lock); 736 region.current = 0; 737 region.agg_size_full = 0; 738 739 for (i = 0; i < n_ctxs; i++) { 740 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 741 bool err = tcg_region_initial_alloc__locked(s); 742 743 g_assert(!err); 744 } 745 qemu_mutex_unlock(®ion.lock); 746 747 tcg_region_tree_reset_all(); 748 } 749 750 #ifdef CONFIG_USER_ONLY 751 static size_t tcg_n_regions(void) 752 { 753 return 1; 754 } 755 #else 756 /* 757 * It is likely that some vCPUs will translate more code than others, so we 758 * first try to set more regions than max_cpus, with those regions being of 759 * reasonable size. If that's not possible we make do by evenly dividing 760 * the code_gen_buffer among the vCPUs. 761 */ 762 static size_t tcg_n_regions(void) 763 { 764 size_t i; 765 766 /* Use a single region if all we have is one vCPU thread */ 767 #if !defined(CONFIG_USER_ONLY) 768 MachineState *ms = MACHINE(qdev_get_machine()); 769 unsigned int max_cpus = ms->smp.max_cpus; 770 #endif 771 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 772 return 1; 773 } 774 775 /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 776 for (i = 8; i > 0; i--) { 777 size_t regions_per_thread = i; 778 size_t region_size; 779 780 region_size = tcg_init_ctx.code_gen_buffer_size; 781 region_size /= max_cpus * regions_per_thread; 782 783 if (region_size >= 2 * 1024u * 1024) { 784 return max_cpus * regions_per_thread; 785 } 786 } 787 /* If we can't, then just allocate one region per vCPU thread */ 788 return max_cpus; 789 } 790 #endif 791 792 /* 793 * Initializes region partitioning. 794 * 795 * Called at init time from the parent thread (i.e. the one calling 796 * tcg_context_init), after the target's TCG globals have been set. 797 * 798 * Region partitioning works by splitting code_gen_buffer into separate regions, 799 * and then assigning regions to TCG threads so that the threads can translate 800 * code in parallel without synchronization. 801 * 802 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 803 * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 804 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 805 * must have been parsed before calling this function, since it calls 806 * qemu_tcg_mttcg_enabled(). 807 * 808 * In user-mode we use a single region. Having multiple regions in user-mode 809 * is not supported, because the number of vCPU threads (recall that each thread 810 * spawned by the guest corresponds to a vCPU thread) is only bounded by the 811 * OS, and usually this number is huge (tens of thousands is not uncommon). 812 * Thus, given this large bound on the number of vCPU threads and the fact 813 * that code_gen_buffer is allocated at compile-time, we cannot guarantee 814 * that the availability of at least one region per vCPU thread. 815 * 816 * However, this user-mode limitation is unlikely to be a significant problem 817 * in practice. Multi-threaded guests share most if not all of their translated 818 * code, which makes parallel code generation less appealing than in softmmu. 819 */ 820 void tcg_region_init(void) 821 { 822 void *buf = tcg_init_ctx.code_gen_buffer; 823 void *aligned; 824 size_t size = tcg_init_ctx.code_gen_buffer_size; 825 size_t page_size = qemu_real_host_page_size; 826 size_t region_size; 827 size_t n_regions; 828 size_t i; 829 830 n_regions = tcg_n_regions(); 831 832 /* The first region will be 'aligned - buf' bytes larger than the others */ 833 aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 834 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 835 /* 836 * Make region_size a multiple of page_size, using aligned as the start. 837 * As a result of this we might end up with a few extra pages at the end of 838 * the buffer; we will assign those to the last region. 839 */ 840 region_size = (size - (aligned - buf)) / n_regions; 841 region_size = QEMU_ALIGN_DOWN(region_size, page_size); 842 843 /* A region must have at least 2 pages; one code, one guard */ 844 g_assert(region_size >= 2 * page_size); 845 846 /* init the region struct */ 847 qemu_mutex_init(®ion.lock); 848 region.n = n_regions; 849 region.size = region_size - page_size; 850 region.stride = region_size; 851 region.start = buf; 852 region.start_aligned = aligned; 853 /* page-align the end, since its last page will be a guard page */ 854 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 855 /* account for that last guard page */ 856 region.end -= page_size; 857 858 /* 859 * Set guard pages in the rw buffer, as that's the one into which 860 * buffer overruns could occur. Do not set guard pages in the rx 861 * buffer -- let that one use hugepages throughout. 862 */ 863 for (i = 0; i < region.n; i++) { 864 void *start, *end; 865 866 tcg_region_bounds(i, &start, &end); 867 868 /* 869 * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect 870 * rejects a permission change from RWX -> NONE. Guard pages are 871 * nice for bug detection but are not essential; ignore any failure. 872 */ 873 (void)qemu_mprotect_none(end, page_size); 874 } 875 876 tcg_region_trees_init(); 877 878 /* In user-mode we support only one ctx, so do the initial allocation now */ 879 #ifdef CONFIG_USER_ONLY 880 { 881 bool err = tcg_region_initial_alloc__locked(tcg_ctx); 882 883 g_assert(!err); 884 } 885 #endif 886 } 887 888 #ifdef CONFIG_DEBUG_TCG 889 const void *tcg_splitwx_to_rx(void *rw) 890 { 891 /* Pass NULL pointers unchanged. */ 892 if (rw) { 893 g_assert(in_code_gen_buffer(rw)); 894 rw += tcg_splitwx_diff; 895 } 896 return rw; 897 } 898 899 void *tcg_splitwx_to_rw(const void *rx) 900 { 901 /* Pass NULL pointers unchanged. */ 902 if (rx) { 903 rx -= tcg_splitwx_diff; 904 /* Assert that we end with a pointer in the rw region. */ 905 g_assert(in_code_gen_buffer(rx)); 906 } 907 return (void *)rx; 908 } 909 #endif /* CONFIG_DEBUG_TCG */ 910 911 static void alloc_tcg_plugin_context(TCGContext *s) 912 { 913 #ifdef CONFIG_PLUGIN 914 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 915 s->plugin_tb->insns = 916 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 917 #endif 918 } 919 920 /* 921 * All TCG threads except the parent (i.e. the one that called tcg_context_init 922 * and registered the target's TCG globals) must register with this function 923 * before initiating translation. 924 * 925 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 926 * of tcg_region_init() for the reasoning behind this. 927 * 928 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 929 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 930 * is not used anymore for translation once this function is called. 931 * 932 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 933 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 934 */ 935 #ifdef CONFIG_USER_ONLY 936 void tcg_register_thread(void) 937 { 938 tcg_ctx = &tcg_init_ctx; 939 } 940 #else 941 void tcg_register_thread(void) 942 { 943 MachineState *ms = MACHINE(qdev_get_machine()); 944 TCGContext *s = g_malloc(sizeof(*s)); 945 unsigned int i, n; 946 bool err; 947 948 *s = tcg_init_ctx; 949 950 /* Relink mem_base. */ 951 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 952 if (tcg_init_ctx.temps[i].mem_base) { 953 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 954 tcg_debug_assert(b >= 0 && b < n); 955 s->temps[i].mem_base = &s->temps[b]; 956 } 957 } 958 959 /* Claim an entry in tcg_ctxs */ 960 n = qatomic_fetch_inc(&n_tcg_ctxs); 961 g_assert(n < ms->smp.max_cpus); 962 qatomic_set(&tcg_ctxs[n], s); 963 964 if (n > 0) { 965 alloc_tcg_plugin_context(s); 966 } 967 968 tcg_ctx = s; 969 qemu_mutex_lock(®ion.lock); 970 err = tcg_region_initial_alloc__locked(tcg_ctx); 971 g_assert(!err); 972 qemu_mutex_unlock(®ion.lock); 973 } 974 #endif /* !CONFIG_USER_ONLY */ 975 976 /* 977 * Returns the size (in bytes) of all translated code (i.e. from all regions) 978 * currently in the cache. 979 * See also: tcg_code_capacity() 980 * Do not confuse with tcg_current_code_size(); that one applies to a single 981 * TCG context. 982 */ 983 size_t tcg_code_size(void) 984 { 985 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 986 unsigned int i; 987 size_t total; 988 989 qemu_mutex_lock(®ion.lock); 990 total = region.agg_size_full; 991 for (i = 0; i < n_ctxs; i++) { 992 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 993 size_t size; 994 995 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 996 g_assert(size <= s->code_gen_buffer_size); 997 total += size; 998 } 999 qemu_mutex_unlock(®ion.lock); 1000 return total; 1001 } 1002 1003 /* 1004 * Returns the code capacity (in bytes) of the entire cache, i.e. including all 1005 * regions. 1006 * See also: tcg_code_size() 1007 */ 1008 size_t tcg_code_capacity(void) 1009 { 1010 size_t guard_size, capacity; 1011 1012 /* no need for synchronization; these variables are set at init time */ 1013 guard_size = region.stride - region.size; 1014 capacity = region.end + guard_size - region.start; 1015 capacity -= region.n * (guard_size + TCG_HIGHWATER); 1016 return capacity; 1017 } 1018 1019 size_t tcg_tb_phys_invalidate_count(void) 1020 { 1021 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 1022 unsigned int i; 1023 size_t total = 0; 1024 1025 for (i = 0; i < n_ctxs; i++) { 1026 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 1027 1028 total += qatomic_read(&s->tb_phys_invalidate_count); 1029 } 1030 return total; 1031 } 1032 1033 /* pool based memory allocation */ 1034 void *tcg_malloc_internal(TCGContext *s, int size) 1035 { 1036 TCGPool *p; 1037 int pool_size; 1038 1039 if (size > TCG_POOL_CHUNK_SIZE) { 1040 /* big malloc: insert a new pool (XXX: could optimize) */ 1041 p = g_malloc(sizeof(TCGPool) + size); 1042 p->size = size; 1043 p->next = s->pool_first_large; 1044 s->pool_first_large = p; 1045 return p->data; 1046 } else { 1047 p = s->pool_current; 1048 if (!p) { 1049 p = s->pool_first; 1050 if (!p) 1051 goto new_pool; 1052 } else { 1053 if (!p->next) { 1054 new_pool: 1055 pool_size = TCG_POOL_CHUNK_SIZE; 1056 p = g_malloc(sizeof(TCGPool) + pool_size); 1057 p->size = pool_size; 1058 p->next = NULL; 1059 if (s->pool_current) 1060 s->pool_current->next = p; 1061 else 1062 s->pool_first = p; 1063 } else { 1064 p = p->next; 1065 } 1066 } 1067 } 1068 s->pool_current = p; 1069 s->pool_cur = p->data + size; 1070 s->pool_end = p->data + p->size; 1071 return p->data; 1072 } 1073 1074 void tcg_pool_reset(TCGContext *s) 1075 { 1076 TCGPool *p, *t; 1077 for (p = s->pool_first_large; p; p = t) { 1078 t = p->next; 1079 g_free(p); 1080 } 1081 s->pool_first_large = NULL; 1082 s->pool_cur = s->pool_end = NULL; 1083 s->pool_current = NULL; 1084 } 1085 1086 typedef struct TCGHelperInfo { 1087 void *func; 1088 const char *name; 1089 unsigned flags; 1090 unsigned sizemask; 1091 } TCGHelperInfo; 1092 1093 #include "exec/helper-proto.h" 1094 1095 static const TCGHelperInfo all_helpers[] = { 1096 #include "exec/helper-tcg.h" 1097 }; 1098 static GHashTable *helper_table; 1099 1100 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 1101 static void process_op_defs(TCGContext *s); 1102 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1103 TCGReg reg, const char *name); 1104 1105 void tcg_context_init(TCGContext *s) 1106 { 1107 int op, total_args, n, i; 1108 TCGOpDef *def; 1109 TCGArgConstraint *args_ct; 1110 TCGTemp *ts; 1111 1112 memset(s, 0, sizeof(*s)); 1113 s->nb_globals = 0; 1114 1115 /* Count total number of arguments and allocate the corresponding 1116 space */ 1117 total_args = 0; 1118 for(op = 0; op < NB_OPS; op++) { 1119 def = &tcg_op_defs[op]; 1120 n = def->nb_iargs + def->nb_oargs; 1121 total_args += n; 1122 } 1123 1124 args_ct = g_new0(TCGArgConstraint, total_args); 1125 1126 for(op = 0; op < NB_OPS; op++) { 1127 def = &tcg_op_defs[op]; 1128 def->args_ct = args_ct; 1129 n = def->nb_iargs + def->nb_oargs; 1130 args_ct += n; 1131 } 1132 1133 /* Register helpers. */ 1134 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 1135 helper_table = g_hash_table_new(NULL, NULL); 1136 1137 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 1138 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 1139 (gpointer)&all_helpers[i]); 1140 } 1141 1142 tcg_target_init(s); 1143 process_op_defs(s); 1144 1145 /* Reverse the order of the saved registers, assuming they're all at 1146 the start of tcg_target_reg_alloc_order. */ 1147 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 1148 int r = tcg_target_reg_alloc_order[n]; 1149 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 1150 break; 1151 } 1152 } 1153 for (i = 0; i < n; ++i) { 1154 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 1155 } 1156 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 1157 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 1158 } 1159 1160 alloc_tcg_plugin_context(s); 1161 1162 tcg_ctx = s; 1163 /* 1164 * In user-mode we simply share the init context among threads, since we 1165 * use a single region. See the documentation tcg_region_init() for the 1166 * reasoning behind this. 1167 * In softmmu we will have at most max_cpus TCG threads. 1168 */ 1169 #ifdef CONFIG_USER_ONLY 1170 tcg_ctxs = &tcg_ctx; 1171 n_tcg_ctxs = 1; 1172 #else 1173 MachineState *ms = MACHINE(qdev_get_machine()); 1174 unsigned int max_cpus = ms->smp.max_cpus; 1175 tcg_ctxs = g_new(TCGContext *, max_cpus); 1176 #endif 1177 1178 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 1179 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 1180 cpu_env = temp_tcgv_ptr(ts); 1181 } 1182 1183 /* 1184 * Allocate TBs right before their corresponding translated code, making 1185 * sure that TBs and code are on different cache lines. 1186 */ 1187 TranslationBlock *tcg_tb_alloc(TCGContext *s) 1188 { 1189 uintptr_t align = qemu_icache_linesize; 1190 TranslationBlock *tb; 1191 void *next; 1192 1193 retry: 1194 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 1195 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 1196 1197 if (unlikely(next > s->code_gen_highwater)) { 1198 if (tcg_region_alloc(s)) { 1199 return NULL; 1200 } 1201 goto retry; 1202 } 1203 qatomic_set(&s->code_gen_ptr, next); 1204 s->data_gen_ptr = NULL; 1205 return tb; 1206 } 1207 1208 void tcg_prologue_init(TCGContext *s) 1209 { 1210 size_t prologue_size, total_size; 1211 void *buf0, *buf1; 1212 1213 /* Put the prologue at the beginning of code_gen_buffer. */ 1214 buf0 = s->code_gen_buffer; 1215 total_size = s->code_gen_buffer_size; 1216 s->code_ptr = buf0; 1217 s->code_buf = buf0; 1218 s->data_gen_ptr = NULL; 1219 1220 /* 1221 * The region trees are not yet configured, but tcg_splitwx_to_rx 1222 * needs the bounds for an assert. 1223 */ 1224 region.start = buf0; 1225 region.end = buf0 + total_size; 1226 1227 #ifndef CONFIG_TCG_INTERPRETER 1228 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0); 1229 #endif 1230 1231 /* Compute a high-water mark, at which we voluntarily flush the buffer 1232 and start over. The size here is arbitrary, significantly larger 1233 than we expect the code generation for any one opcode to require. */ 1234 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER); 1235 1236 #ifdef TCG_TARGET_NEED_POOL_LABELS 1237 s->pool_labels = NULL; 1238 #endif 1239 1240 qemu_thread_jit_write(); 1241 /* Generate the prologue. */ 1242 tcg_target_qemu_prologue(s); 1243 1244 #ifdef TCG_TARGET_NEED_POOL_LABELS 1245 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 1246 { 1247 int result = tcg_out_pool_finalize(s); 1248 tcg_debug_assert(result == 0); 1249 } 1250 #endif 1251 1252 buf1 = s->code_ptr; 1253 #ifndef CONFIG_TCG_INTERPRETER 1254 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0, 1255 tcg_ptr_byte_diff(buf1, buf0)); 1256 #endif 1257 1258 /* Deduct the prologue from the buffer. */ 1259 prologue_size = tcg_current_code_size(s); 1260 s->code_gen_ptr = buf1; 1261 s->code_gen_buffer = buf1; 1262 s->code_buf = buf1; 1263 total_size -= prologue_size; 1264 s->code_gen_buffer_size = total_size; 1265 1266 tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size); 1267 1268 #ifdef DEBUG_DISAS 1269 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 1270 FILE *logfile = qemu_log_lock(); 1271 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); 1272 if (s->data_gen_ptr) { 1273 size_t code_size = s->data_gen_ptr - buf0; 1274 size_t data_size = prologue_size - code_size; 1275 size_t i; 1276 1277 log_disas(buf0, code_size); 1278 1279 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 1280 if (sizeof(tcg_target_ulong) == 8) { 1281 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 1282 (uintptr_t)s->data_gen_ptr + i, 1283 *(uint64_t *)(s->data_gen_ptr + i)); 1284 } else { 1285 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n", 1286 (uintptr_t)s->data_gen_ptr + i, 1287 *(uint32_t *)(s->data_gen_ptr + i)); 1288 } 1289 } 1290 } else { 1291 log_disas(buf0, prologue_size); 1292 } 1293 qemu_log("\n"); 1294 qemu_log_flush(); 1295 qemu_log_unlock(logfile); 1296 } 1297 #endif 1298 1299 /* Assert that goto_ptr is implemented completely. */ 1300 if (TCG_TARGET_HAS_goto_ptr) { 1301 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 1302 } 1303 } 1304 1305 void tcg_func_start(TCGContext *s) 1306 { 1307 tcg_pool_reset(s); 1308 s->nb_temps = s->nb_globals; 1309 1310 /* No temps have been previously allocated for size or locality. */ 1311 memset(s->free_temps, 0, sizeof(s->free_temps)); 1312 1313 /* No constant temps have been previously allocated. */ 1314 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 1315 if (s->const_table[i]) { 1316 g_hash_table_remove_all(s->const_table[i]); 1317 } 1318 } 1319 1320 s->nb_ops = 0; 1321 s->nb_labels = 0; 1322 s->current_frame_offset = s->frame_start; 1323 1324 #ifdef CONFIG_DEBUG_TCG 1325 s->goto_tb_issue_mask = 0; 1326 #endif 1327 1328 QTAILQ_INIT(&s->ops); 1329 QTAILQ_INIT(&s->free_ops); 1330 QSIMPLEQ_INIT(&s->labels); 1331 } 1332 1333 static TCGTemp *tcg_temp_alloc(TCGContext *s) 1334 { 1335 int n = s->nb_temps++; 1336 1337 if (n >= TCG_MAX_TEMPS) { 1338 tcg_raise_tb_overflow(s); 1339 } 1340 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1341 } 1342 1343 static TCGTemp *tcg_global_alloc(TCGContext *s) 1344 { 1345 TCGTemp *ts; 1346 1347 tcg_debug_assert(s->nb_globals == s->nb_temps); 1348 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1349 s->nb_globals++; 1350 ts = tcg_temp_alloc(s); 1351 ts->kind = TEMP_GLOBAL; 1352 1353 return ts; 1354 } 1355 1356 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1357 TCGReg reg, const char *name) 1358 { 1359 TCGTemp *ts; 1360 1361 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1362 tcg_abort(); 1363 } 1364 1365 ts = tcg_global_alloc(s); 1366 ts->base_type = type; 1367 ts->type = type; 1368 ts->kind = TEMP_FIXED; 1369 ts->reg = reg; 1370 ts->name = name; 1371 tcg_regset_set_reg(s->reserved_regs, reg); 1372 1373 return ts; 1374 } 1375 1376 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1377 { 1378 s->frame_start = start; 1379 s->frame_end = start + size; 1380 s->frame_temp 1381 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1382 } 1383 1384 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1385 intptr_t offset, const char *name) 1386 { 1387 TCGContext *s = tcg_ctx; 1388 TCGTemp *base_ts = tcgv_ptr_temp(base); 1389 TCGTemp *ts = tcg_global_alloc(s); 1390 int indirect_reg = 0, bigendian = 0; 1391 #ifdef HOST_WORDS_BIGENDIAN 1392 bigendian = 1; 1393 #endif 1394 1395 switch (base_ts->kind) { 1396 case TEMP_FIXED: 1397 break; 1398 case TEMP_GLOBAL: 1399 /* We do not support double-indirect registers. */ 1400 tcg_debug_assert(!base_ts->indirect_reg); 1401 base_ts->indirect_base = 1; 1402 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1403 ? 2 : 1); 1404 indirect_reg = 1; 1405 break; 1406 default: 1407 g_assert_not_reached(); 1408 } 1409 1410 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1411 TCGTemp *ts2 = tcg_global_alloc(s); 1412 char buf[64]; 1413 1414 ts->base_type = TCG_TYPE_I64; 1415 ts->type = TCG_TYPE_I32; 1416 ts->indirect_reg = indirect_reg; 1417 ts->mem_allocated = 1; 1418 ts->mem_base = base_ts; 1419 ts->mem_offset = offset + bigendian * 4; 1420 pstrcpy(buf, sizeof(buf), name); 1421 pstrcat(buf, sizeof(buf), "_0"); 1422 ts->name = strdup(buf); 1423 1424 tcg_debug_assert(ts2 == ts + 1); 1425 ts2->base_type = TCG_TYPE_I64; 1426 ts2->type = TCG_TYPE_I32; 1427 ts2->indirect_reg = indirect_reg; 1428 ts2->mem_allocated = 1; 1429 ts2->mem_base = base_ts; 1430 ts2->mem_offset = offset + (1 - bigendian) * 4; 1431 pstrcpy(buf, sizeof(buf), name); 1432 pstrcat(buf, sizeof(buf), "_1"); 1433 ts2->name = strdup(buf); 1434 } else { 1435 ts->base_type = type; 1436 ts->type = type; 1437 ts->indirect_reg = indirect_reg; 1438 ts->mem_allocated = 1; 1439 ts->mem_base = base_ts; 1440 ts->mem_offset = offset; 1441 ts->name = name; 1442 } 1443 return ts; 1444 } 1445 1446 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 1447 { 1448 TCGContext *s = tcg_ctx; 1449 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL; 1450 TCGTemp *ts; 1451 int idx, k; 1452 1453 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 1454 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 1455 if (idx < TCG_MAX_TEMPS) { 1456 /* There is already an available temp with the right type. */ 1457 clear_bit(idx, s->free_temps[k].l); 1458 1459 ts = &s->temps[idx]; 1460 ts->temp_allocated = 1; 1461 tcg_debug_assert(ts->base_type == type); 1462 tcg_debug_assert(ts->kind == kind); 1463 } else { 1464 ts = tcg_temp_alloc(s); 1465 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1466 TCGTemp *ts2 = tcg_temp_alloc(s); 1467 1468 ts->base_type = type; 1469 ts->type = TCG_TYPE_I32; 1470 ts->temp_allocated = 1; 1471 ts->kind = kind; 1472 1473 tcg_debug_assert(ts2 == ts + 1); 1474 ts2->base_type = TCG_TYPE_I64; 1475 ts2->type = TCG_TYPE_I32; 1476 ts2->temp_allocated = 1; 1477 ts2->kind = kind; 1478 } else { 1479 ts->base_type = type; 1480 ts->type = type; 1481 ts->temp_allocated = 1; 1482 ts->kind = kind; 1483 } 1484 } 1485 1486 #if defined(CONFIG_DEBUG_TCG) 1487 s->temps_in_use++; 1488 #endif 1489 return ts; 1490 } 1491 1492 TCGv_vec tcg_temp_new_vec(TCGType type) 1493 { 1494 TCGTemp *t; 1495 1496 #ifdef CONFIG_DEBUG_TCG 1497 switch (type) { 1498 case TCG_TYPE_V64: 1499 assert(TCG_TARGET_HAS_v64); 1500 break; 1501 case TCG_TYPE_V128: 1502 assert(TCG_TARGET_HAS_v128); 1503 break; 1504 case TCG_TYPE_V256: 1505 assert(TCG_TARGET_HAS_v256); 1506 break; 1507 default: 1508 g_assert_not_reached(); 1509 } 1510 #endif 1511 1512 t = tcg_temp_new_internal(type, 0); 1513 return temp_tcgv_vec(t); 1514 } 1515 1516 /* Create a new temp of the same type as an existing temp. */ 1517 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1518 { 1519 TCGTemp *t = tcgv_vec_temp(match); 1520 1521 tcg_debug_assert(t->temp_allocated != 0); 1522 1523 t = tcg_temp_new_internal(t->base_type, 0); 1524 return temp_tcgv_vec(t); 1525 } 1526 1527 void tcg_temp_free_internal(TCGTemp *ts) 1528 { 1529 TCGContext *s = tcg_ctx; 1530 int k, idx; 1531 1532 /* In order to simplify users of tcg_constant_*, silently ignore free. */ 1533 if (ts->kind == TEMP_CONST) { 1534 return; 1535 } 1536 1537 #if defined(CONFIG_DEBUG_TCG) 1538 s->temps_in_use--; 1539 if (s->temps_in_use < 0) { 1540 fprintf(stderr, "More temporaries freed than allocated!\n"); 1541 } 1542 #endif 1543 1544 tcg_debug_assert(ts->kind < TEMP_GLOBAL); 1545 tcg_debug_assert(ts->temp_allocated != 0); 1546 ts->temp_allocated = 0; 1547 1548 idx = temp_idx(ts); 1549 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT); 1550 set_bit(idx, s->free_temps[k].l); 1551 } 1552 1553 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1554 { 1555 TCGContext *s = tcg_ctx; 1556 GHashTable *h = s->const_table[type]; 1557 TCGTemp *ts; 1558 1559 if (h == NULL) { 1560 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1561 s->const_table[type] = h; 1562 } 1563 1564 ts = g_hash_table_lookup(h, &val); 1565 if (ts == NULL) { 1566 ts = tcg_temp_alloc(s); 1567 1568 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1569 TCGTemp *ts2 = tcg_temp_alloc(s); 1570 1571 ts->base_type = TCG_TYPE_I64; 1572 ts->type = TCG_TYPE_I32; 1573 ts->kind = TEMP_CONST; 1574 ts->temp_allocated = 1; 1575 /* 1576 * Retain the full value of the 64-bit constant in the low 1577 * part, so that the hash table works. Actual uses will 1578 * truncate the value to the low part. 1579 */ 1580 ts->val = val; 1581 1582 tcg_debug_assert(ts2 == ts + 1); 1583 ts2->base_type = TCG_TYPE_I64; 1584 ts2->type = TCG_TYPE_I32; 1585 ts2->kind = TEMP_CONST; 1586 ts2->temp_allocated = 1; 1587 ts2->val = val >> 32; 1588 } else { 1589 ts->base_type = type; 1590 ts->type = type; 1591 ts->kind = TEMP_CONST; 1592 ts->temp_allocated = 1; 1593 ts->val = val; 1594 } 1595 g_hash_table_insert(h, &ts->val, ts); 1596 } 1597 1598 return ts; 1599 } 1600 1601 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1602 { 1603 val = dup_const(vece, val); 1604 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1605 } 1606 1607 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1608 { 1609 TCGTemp *t = tcgv_vec_temp(match); 1610 1611 tcg_debug_assert(t->temp_allocated != 0); 1612 return tcg_constant_vec(t->base_type, vece, val); 1613 } 1614 1615 TCGv_i32 tcg_const_i32(int32_t val) 1616 { 1617 TCGv_i32 t0; 1618 t0 = tcg_temp_new_i32(); 1619 tcg_gen_movi_i32(t0, val); 1620 return t0; 1621 } 1622 1623 TCGv_i64 tcg_const_i64(int64_t val) 1624 { 1625 TCGv_i64 t0; 1626 t0 = tcg_temp_new_i64(); 1627 tcg_gen_movi_i64(t0, val); 1628 return t0; 1629 } 1630 1631 TCGv_i32 tcg_const_local_i32(int32_t val) 1632 { 1633 TCGv_i32 t0; 1634 t0 = tcg_temp_local_new_i32(); 1635 tcg_gen_movi_i32(t0, val); 1636 return t0; 1637 } 1638 1639 TCGv_i64 tcg_const_local_i64(int64_t val) 1640 { 1641 TCGv_i64 t0; 1642 t0 = tcg_temp_local_new_i64(); 1643 tcg_gen_movi_i64(t0, val); 1644 return t0; 1645 } 1646 1647 #if defined(CONFIG_DEBUG_TCG) 1648 void tcg_clear_temp_count(void) 1649 { 1650 TCGContext *s = tcg_ctx; 1651 s->temps_in_use = 0; 1652 } 1653 1654 int tcg_check_temp_count(void) 1655 { 1656 TCGContext *s = tcg_ctx; 1657 if (s->temps_in_use) { 1658 /* Clear the count so that we don't give another 1659 * warning immediately next time around. 1660 */ 1661 s->temps_in_use = 0; 1662 return 1; 1663 } 1664 return 0; 1665 } 1666 #endif 1667 1668 /* Return true if OP may appear in the opcode stream. 1669 Test the runtime variable that controls each opcode. */ 1670 bool tcg_op_supported(TCGOpcode op) 1671 { 1672 const bool have_vec 1673 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1674 1675 switch (op) { 1676 case INDEX_op_discard: 1677 case INDEX_op_set_label: 1678 case INDEX_op_call: 1679 case INDEX_op_br: 1680 case INDEX_op_mb: 1681 case INDEX_op_insn_start: 1682 case INDEX_op_exit_tb: 1683 case INDEX_op_goto_tb: 1684 case INDEX_op_qemu_ld_i32: 1685 case INDEX_op_qemu_st_i32: 1686 case INDEX_op_qemu_ld_i64: 1687 case INDEX_op_qemu_st_i64: 1688 return true; 1689 1690 case INDEX_op_qemu_st8_i32: 1691 return TCG_TARGET_HAS_qemu_st8_i32; 1692 1693 case INDEX_op_goto_ptr: 1694 return TCG_TARGET_HAS_goto_ptr; 1695 1696 case INDEX_op_mov_i32: 1697 case INDEX_op_setcond_i32: 1698 case INDEX_op_brcond_i32: 1699 case INDEX_op_ld8u_i32: 1700 case INDEX_op_ld8s_i32: 1701 case INDEX_op_ld16u_i32: 1702 case INDEX_op_ld16s_i32: 1703 case INDEX_op_ld_i32: 1704 case INDEX_op_st8_i32: 1705 case INDEX_op_st16_i32: 1706 case INDEX_op_st_i32: 1707 case INDEX_op_add_i32: 1708 case INDEX_op_sub_i32: 1709 case INDEX_op_mul_i32: 1710 case INDEX_op_and_i32: 1711 case INDEX_op_or_i32: 1712 case INDEX_op_xor_i32: 1713 case INDEX_op_shl_i32: 1714 case INDEX_op_shr_i32: 1715 case INDEX_op_sar_i32: 1716 return true; 1717 1718 case INDEX_op_movcond_i32: 1719 return TCG_TARGET_HAS_movcond_i32; 1720 case INDEX_op_div_i32: 1721 case INDEX_op_divu_i32: 1722 return TCG_TARGET_HAS_div_i32; 1723 case INDEX_op_rem_i32: 1724 case INDEX_op_remu_i32: 1725 return TCG_TARGET_HAS_rem_i32; 1726 case INDEX_op_div2_i32: 1727 case INDEX_op_divu2_i32: 1728 return TCG_TARGET_HAS_div2_i32; 1729 case INDEX_op_rotl_i32: 1730 case INDEX_op_rotr_i32: 1731 return TCG_TARGET_HAS_rot_i32; 1732 case INDEX_op_deposit_i32: 1733 return TCG_TARGET_HAS_deposit_i32; 1734 case INDEX_op_extract_i32: 1735 return TCG_TARGET_HAS_extract_i32; 1736 case INDEX_op_sextract_i32: 1737 return TCG_TARGET_HAS_sextract_i32; 1738 case INDEX_op_extract2_i32: 1739 return TCG_TARGET_HAS_extract2_i32; 1740 case INDEX_op_add2_i32: 1741 return TCG_TARGET_HAS_add2_i32; 1742 case INDEX_op_sub2_i32: 1743 return TCG_TARGET_HAS_sub2_i32; 1744 case INDEX_op_mulu2_i32: 1745 return TCG_TARGET_HAS_mulu2_i32; 1746 case INDEX_op_muls2_i32: 1747 return TCG_TARGET_HAS_muls2_i32; 1748 case INDEX_op_muluh_i32: 1749 return TCG_TARGET_HAS_muluh_i32; 1750 case INDEX_op_mulsh_i32: 1751 return TCG_TARGET_HAS_mulsh_i32; 1752 case INDEX_op_ext8s_i32: 1753 return TCG_TARGET_HAS_ext8s_i32; 1754 case INDEX_op_ext16s_i32: 1755 return TCG_TARGET_HAS_ext16s_i32; 1756 case INDEX_op_ext8u_i32: 1757 return TCG_TARGET_HAS_ext8u_i32; 1758 case INDEX_op_ext16u_i32: 1759 return TCG_TARGET_HAS_ext16u_i32; 1760 case INDEX_op_bswap16_i32: 1761 return TCG_TARGET_HAS_bswap16_i32; 1762 case INDEX_op_bswap32_i32: 1763 return TCG_TARGET_HAS_bswap32_i32; 1764 case INDEX_op_not_i32: 1765 return TCG_TARGET_HAS_not_i32; 1766 case INDEX_op_neg_i32: 1767 return TCG_TARGET_HAS_neg_i32; 1768 case INDEX_op_andc_i32: 1769 return TCG_TARGET_HAS_andc_i32; 1770 case INDEX_op_orc_i32: 1771 return TCG_TARGET_HAS_orc_i32; 1772 case INDEX_op_eqv_i32: 1773 return TCG_TARGET_HAS_eqv_i32; 1774 case INDEX_op_nand_i32: 1775 return TCG_TARGET_HAS_nand_i32; 1776 case INDEX_op_nor_i32: 1777 return TCG_TARGET_HAS_nor_i32; 1778 case INDEX_op_clz_i32: 1779 return TCG_TARGET_HAS_clz_i32; 1780 case INDEX_op_ctz_i32: 1781 return TCG_TARGET_HAS_ctz_i32; 1782 case INDEX_op_ctpop_i32: 1783 return TCG_TARGET_HAS_ctpop_i32; 1784 1785 case INDEX_op_brcond2_i32: 1786 case INDEX_op_setcond2_i32: 1787 return TCG_TARGET_REG_BITS == 32; 1788 1789 case INDEX_op_mov_i64: 1790 case INDEX_op_setcond_i64: 1791 case INDEX_op_brcond_i64: 1792 case INDEX_op_ld8u_i64: 1793 case INDEX_op_ld8s_i64: 1794 case INDEX_op_ld16u_i64: 1795 case INDEX_op_ld16s_i64: 1796 case INDEX_op_ld32u_i64: 1797 case INDEX_op_ld32s_i64: 1798 case INDEX_op_ld_i64: 1799 case INDEX_op_st8_i64: 1800 case INDEX_op_st16_i64: 1801 case INDEX_op_st32_i64: 1802 case INDEX_op_st_i64: 1803 case INDEX_op_add_i64: 1804 case INDEX_op_sub_i64: 1805 case INDEX_op_mul_i64: 1806 case INDEX_op_and_i64: 1807 case INDEX_op_or_i64: 1808 case INDEX_op_xor_i64: 1809 case INDEX_op_shl_i64: 1810 case INDEX_op_shr_i64: 1811 case INDEX_op_sar_i64: 1812 case INDEX_op_ext_i32_i64: 1813 case INDEX_op_extu_i32_i64: 1814 return TCG_TARGET_REG_BITS == 64; 1815 1816 case INDEX_op_movcond_i64: 1817 return TCG_TARGET_HAS_movcond_i64; 1818 case INDEX_op_div_i64: 1819 case INDEX_op_divu_i64: 1820 return TCG_TARGET_HAS_div_i64; 1821 case INDEX_op_rem_i64: 1822 case INDEX_op_remu_i64: 1823 return TCG_TARGET_HAS_rem_i64; 1824 case INDEX_op_div2_i64: 1825 case INDEX_op_divu2_i64: 1826 return TCG_TARGET_HAS_div2_i64; 1827 case INDEX_op_rotl_i64: 1828 case INDEX_op_rotr_i64: 1829 return TCG_TARGET_HAS_rot_i64; 1830 case INDEX_op_deposit_i64: 1831 return TCG_TARGET_HAS_deposit_i64; 1832 case INDEX_op_extract_i64: 1833 return TCG_TARGET_HAS_extract_i64; 1834 case INDEX_op_sextract_i64: 1835 return TCG_TARGET_HAS_sextract_i64; 1836 case INDEX_op_extract2_i64: 1837 return TCG_TARGET_HAS_extract2_i64; 1838 case INDEX_op_extrl_i64_i32: 1839 return TCG_TARGET_HAS_extrl_i64_i32; 1840 case INDEX_op_extrh_i64_i32: 1841 return TCG_TARGET_HAS_extrh_i64_i32; 1842 case INDEX_op_ext8s_i64: 1843 return TCG_TARGET_HAS_ext8s_i64; 1844 case INDEX_op_ext16s_i64: 1845 return TCG_TARGET_HAS_ext16s_i64; 1846 case INDEX_op_ext32s_i64: 1847 return TCG_TARGET_HAS_ext32s_i64; 1848 case INDEX_op_ext8u_i64: 1849 return TCG_TARGET_HAS_ext8u_i64; 1850 case INDEX_op_ext16u_i64: 1851 return TCG_TARGET_HAS_ext16u_i64; 1852 case INDEX_op_ext32u_i64: 1853 return TCG_TARGET_HAS_ext32u_i64; 1854 case INDEX_op_bswap16_i64: 1855 return TCG_TARGET_HAS_bswap16_i64; 1856 case INDEX_op_bswap32_i64: 1857 return TCG_TARGET_HAS_bswap32_i64; 1858 case INDEX_op_bswap64_i64: 1859 return TCG_TARGET_HAS_bswap64_i64; 1860 case INDEX_op_not_i64: 1861 return TCG_TARGET_HAS_not_i64; 1862 case INDEX_op_neg_i64: 1863 return TCG_TARGET_HAS_neg_i64; 1864 case INDEX_op_andc_i64: 1865 return TCG_TARGET_HAS_andc_i64; 1866 case INDEX_op_orc_i64: 1867 return TCG_TARGET_HAS_orc_i64; 1868 case INDEX_op_eqv_i64: 1869 return TCG_TARGET_HAS_eqv_i64; 1870 case INDEX_op_nand_i64: 1871 return TCG_TARGET_HAS_nand_i64; 1872 case INDEX_op_nor_i64: 1873 return TCG_TARGET_HAS_nor_i64; 1874 case INDEX_op_clz_i64: 1875 return TCG_TARGET_HAS_clz_i64; 1876 case INDEX_op_ctz_i64: 1877 return TCG_TARGET_HAS_ctz_i64; 1878 case INDEX_op_ctpop_i64: 1879 return TCG_TARGET_HAS_ctpop_i64; 1880 case INDEX_op_add2_i64: 1881 return TCG_TARGET_HAS_add2_i64; 1882 case INDEX_op_sub2_i64: 1883 return TCG_TARGET_HAS_sub2_i64; 1884 case INDEX_op_mulu2_i64: 1885 return TCG_TARGET_HAS_mulu2_i64; 1886 case INDEX_op_muls2_i64: 1887 return TCG_TARGET_HAS_muls2_i64; 1888 case INDEX_op_muluh_i64: 1889 return TCG_TARGET_HAS_muluh_i64; 1890 case INDEX_op_mulsh_i64: 1891 return TCG_TARGET_HAS_mulsh_i64; 1892 1893 case INDEX_op_mov_vec: 1894 case INDEX_op_dup_vec: 1895 case INDEX_op_dupm_vec: 1896 case INDEX_op_ld_vec: 1897 case INDEX_op_st_vec: 1898 case INDEX_op_add_vec: 1899 case INDEX_op_sub_vec: 1900 case INDEX_op_and_vec: 1901 case INDEX_op_or_vec: 1902 case INDEX_op_xor_vec: 1903 case INDEX_op_cmp_vec: 1904 return have_vec; 1905 case INDEX_op_dup2_vec: 1906 return have_vec && TCG_TARGET_REG_BITS == 32; 1907 case INDEX_op_not_vec: 1908 return have_vec && TCG_TARGET_HAS_not_vec; 1909 case INDEX_op_neg_vec: 1910 return have_vec && TCG_TARGET_HAS_neg_vec; 1911 case INDEX_op_abs_vec: 1912 return have_vec && TCG_TARGET_HAS_abs_vec; 1913 case INDEX_op_andc_vec: 1914 return have_vec && TCG_TARGET_HAS_andc_vec; 1915 case INDEX_op_orc_vec: 1916 return have_vec && TCG_TARGET_HAS_orc_vec; 1917 case INDEX_op_mul_vec: 1918 return have_vec && TCG_TARGET_HAS_mul_vec; 1919 case INDEX_op_shli_vec: 1920 case INDEX_op_shri_vec: 1921 case INDEX_op_sari_vec: 1922 return have_vec && TCG_TARGET_HAS_shi_vec; 1923 case INDEX_op_shls_vec: 1924 case INDEX_op_shrs_vec: 1925 case INDEX_op_sars_vec: 1926 return have_vec && TCG_TARGET_HAS_shs_vec; 1927 case INDEX_op_shlv_vec: 1928 case INDEX_op_shrv_vec: 1929 case INDEX_op_sarv_vec: 1930 return have_vec && TCG_TARGET_HAS_shv_vec; 1931 case INDEX_op_rotli_vec: 1932 return have_vec && TCG_TARGET_HAS_roti_vec; 1933 case INDEX_op_rotls_vec: 1934 return have_vec && TCG_TARGET_HAS_rots_vec; 1935 case INDEX_op_rotlv_vec: 1936 case INDEX_op_rotrv_vec: 1937 return have_vec && TCG_TARGET_HAS_rotv_vec; 1938 case INDEX_op_ssadd_vec: 1939 case INDEX_op_usadd_vec: 1940 case INDEX_op_sssub_vec: 1941 case INDEX_op_ussub_vec: 1942 return have_vec && TCG_TARGET_HAS_sat_vec; 1943 case INDEX_op_smin_vec: 1944 case INDEX_op_umin_vec: 1945 case INDEX_op_smax_vec: 1946 case INDEX_op_umax_vec: 1947 return have_vec && TCG_TARGET_HAS_minmax_vec; 1948 case INDEX_op_bitsel_vec: 1949 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1950 case INDEX_op_cmpsel_vec: 1951 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1952 1953 default: 1954 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1955 return true; 1956 } 1957 } 1958 1959 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1960 and endian swap. Maybe it would be better to do the alignment 1961 and endian swap in tcg_reg_alloc_call(). */ 1962 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1963 { 1964 int i, real_args, nb_rets, pi; 1965 unsigned sizemask, flags; 1966 TCGHelperInfo *info; 1967 TCGOp *op; 1968 1969 info = g_hash_table_lookup(helper_table, (gpointer)func); 1970 flags = info->flags; 1971 sizemask = info->sizemask; 1972 1973 #ifdef CONFIG_PLUGIN 1974 /* detect non-plugin helpers */ 1975 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) { 1976 tcg_ctx->plugin_insn->calls_helpers = true; 1977 } 1978 #endif 1979 1980 #if defined(__sparc__) && !defined(__arch64__) \ 1981 && !defined(CONFIG_TCG_INTERPRETER) 1982 /* We have 64-bit values in one register, but need to pass as two 1983 separate parameters. Split them. */ 1984 int orig_sizemask = sizemask; 1985 int orig_nargs = nargs; 1986 TCGv_i64 retl, reth; 1987 TCGTemp *split_args[MAX_OPC_PARAM]; 1988 1989 retl = NULL; 1990 reth = NULL; 1991 if (sizemask != 0) { 1992 for (i = real_args = 0; i < nargs; ++i) { 1993 int is_64bit = sizemask & (1 << (i+1)*2); 1994 if (is_64bit) { 1995 TCGv_i64 orig = temp_tcgv_i64(args[i]); 1996 TCGv_i32 h = tcg_temp_new_i32(); 1997 TCGv_i32 l = tcg_temp_new_i32(); 1998 tcg_gen_extr_i64_i32(l, h, orig); 1999 split_args[real_args++] = tcgv_i32_temp(h); 2000 split_args[real_args++] = tcgv_i32_temp(l); 2001 } else { 2002 split_args[real_args++] = args[i]; 2003 } 2004 } 2005 nargs = real_args; 2006 args = split_args; 2007 sizemask = 0; 2008 } 2009 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 2010 for (i = 0; i < nargs; ++i) { 2011 int is_64bit = sizemask & (1 << (i+1)*2); 2012 int is_signed = sizemask & (2 << (i+1)*2); 2013 if (!is_64bit) { 2014 TCGv_i64 temp = tcg_temp_new_i64(); 2015 TCGv_i64 orig = temp_tcgv_i64(args[i]); 2016 if (is_signed) { 2017 tcg_gen_ext32s_i64(temp, orig); 2018 } else { 2019 tcg_gen_ext32u_i64(temp, orig); 2020 } 2021 args[i] = tcgv_i64_temp(temp); 2022 } 2023 } 2024 #endif /* TCG_TARGET_EXTEND_ARGS */ 2025 2026 op = tcg_emit_op(INDEX_op_call); 2027 2028 pi = 0; 2029 if (ret != NULL) { 2030 #if defined(__sparc__) && !defined(__arch64__) \ 2031 && !defined(CONFIG_TCG_INTERPRETER) 2032 if (orig_sizemask & 1) { 2033 /* The 32-bit ABI is going to return the 64-bit value in 2034 the %o0/%o1 register pair. Prepare for this by using 2035 two return temporaries, and reassemble below. */ 2036 retl = tcg_temp_new_i64(); 2037 reth = tcg_temp_new_i64(); 2038 op->args[pi++] = tcgv_i64_arg(reth); 2039 op->args[pi++] = tcgv_i64_arg(retl); 2040 nb_rets = 2; 2041 } else { 2042 op->args[pi++] = temp_arg(ret); 2043 nb_rets = 1; 2044 } 2045 #else 2046 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { 2047 #ifdef HOST_WORDS_BIGENDIAN 2048 op->args[pi++] = temp_arg(ret + 1); 2049 op->args[pi++] = temp_arg(ret); 2050 #else 2051 op->args[pi++] = temp_arg(ret); 2052 op->args[pi++] = temp_arg(ret + 1); 2053 #endif 2054 nb_rets = 2; 2055 } else { 2056 op->args[pi++] = temp_arg(ret); 2057 nb_rets = 1; 2058 } 2059 #endif 2060 } else { 2061 nb_rets = 0; 2062 } 2063 TCGOP_CALLO(op) = nb_rets; 2064 2065 real_args = 0; 2066 for (i = 0; i < nargs; i++) { 2067 int is_64bit = sizemask & (1 << (i+1)*2); 2068 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 2069 #ifdef TCG_TARGET_CALL_ALIGN_ARGS 2070 /* some targets want aligned 64 bit args */ 2071 if (real_args & 1) { 2072 op->args[pi++] = TCG_CALL_DUMMY_ARG; 2073 real_args++; 2074 } 2075 #endif 2076 /* If stack grows up, then we will be placing successive 2077 arguments at lower addresses, which means we need to 2078 reverse the order compared to how we would normally 2079 treat either big or little-endian. For those arguments 2080 that will wind up in registers, this still works for 2081 HPPA (the only current STACK_GROWSUP target) since the 2082 argument registers are *also* allocated in decreasing 2083 order. If another such target is added, this logic may 2084 have to get more complicated to differentiate between 2085 stack arguments and register arguments. */ 2086 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) 2087 op->args[pi++] = temp_arg(args[i] + 1); 2088 op->args[pi++] = temp_arg(args[i]); 2089 #else 2090 op->args[pi++] = temp_arg(args[i]); 2091 op->args[pi++] = temp_arg(args[i] + 1); 2092 #endif 2093 real_args += 2; 2094 continue; 2095 } 2096 2097 op->args[pi++] = temp_arg(args[i]); 2098 real_args++; 2099 } 2100 op->args[pi++] = (uintptr_t)func; 2101 op->args[pi++] = flags; 2102 TCGOP_CALLI(op) = real_args; 2103 2104 /* Make sure the fields didn't overflow. */ 2105 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 2106 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 2107 2108 #if defined(__sparc__) && !defined(__arch64__) \ 2109 && !defined(CONFIG_TCG_INTERPRETER) 2110 /* Free all of the parts we allocated above. */ 2111 for (i = real_args = 0; i < orig_nargs; ++i) { 2112 int is_64bit = orig_sizemask & (1 << (i+1)*2); 2113 if (is_64bit) { 2114 tcg_temp_free_internal(args[real_args++]); 2115 tcg_temp_free_internal(args[real_args++]); 2116 } else { 2117 real_args++; 2118 } 2119 } 2120 if (orig_sizemask & 1) { 2121 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. 2122 Note that describing these as TCGv_i64 eliminates an unnecessary 2123 zero-extension that tcg_gen_concat_i32_i64 would create. */ 2124 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); 2125 tcg_temp_free_i64(retl); 2126 tcg_temp_free_i64(reth); 2127 } 2128 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 2129 for (i = 0; i < nargs; ++i) { 2130 int is_64bit = sizemask & (1 << (i+1)*2); 2131 if (!is_64bit) { 2132 tcg_temp_free_internal(args[i]); 2133 } 2134 } 2135 #endif /* TCG_TARGET_EXTEND_ARGS */ 2136 } 2137 2138 static void tcg_reg_alloc_start(TCGContext *s) 2139 { 2140 int i, n; 2141 2142 for (i = 0, n = s->nb_temps; i < n; i++) { 2143 TCGTemp *ts = &s->temps[i]; 2144 TCGTempVal val = TEMP_VAL_MEM; 2145 2146 switch (ts->kind) { 2147 case TEMP_CONST: 2148 val = TEMP_VAL_CONST; 2149 break; 2150 case TEMP_FIXED: 2151 val = TEMP_VAL_REG; 2152 break; 2153 case TEMP_GLOBAL: 2154 break; 2155 case TEMP_NORMAL: 2156 val = TEMP_VAL_DEAD; 2157 /* fall through */ 2158 case TEMP_LOCAL: 2159 ts->mem_allocated = 0; 2160 break; 2161 default: 2162 g_assert_not_reached(); 2163 } 2164 ts->val_type = val; 2165 } 2166 2167 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 2168 } 2169 2170 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 2171 TCGTemp *ts) 2172 { 2173 int idx = temp_idx(ts); 2174 2175 switch (ts->kind) { 2176 case TEMP_FIXED: 2177 case TEMP_GLOBAL: 2178 pstrcpy(buf, buf_size, ts->name); 2179 break; 2180 case TEMP_LOCAL: 2181 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 2182 break; 2183 case TEMP_NORMAL: 2184 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 2185 break; 2186 case TEMP_CONST: 2187 switch (ts->type) { 2188 case TCG_TYPE_I32: 2189 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 2190 break; 2191 #if TCG_TARGET_REG_BITS > 32 2192 case TCG_TYPE_I64: 2193 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 2194 break; 2195 #endif 2196 case TCG_TYPE_V64: 2197 case TCG_TYPE_V128: 2198 case TCG_TYPE_V256: 2199 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 2200 64 << (ts->type - TCG_TYPE_V64), ts->val); 2201 break; 2202 default: 2203 g_assert_not_reached(); 2204 } 2205 break; 2206 } 2207 return buf; 2208 } 2209 2210 static char *tcg_get_arg_str(TCGContext *s, char *buf, 2211 int buf_size, TCGArg arg) 2212 { 2213 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 2214 } 2215 2216 /* Find helper name. */ 2217 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) 2218 { 2219 const char *ret = NULL; 2220 if (helper_table) { 2221 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); 2222 if (info) { 2223 ret = info->name; 2224 } 2225 } 2226 return ret; 2227 } 2228 2229 static const char * const cond_name[] = 2230 { 2231 [TCG_COND_NEVER] = "never", 2232 [TCG_COND_ALWAYS] = "always", 2233 [TCG_COND_EQ] = "eq", 2234 [TCG_COND_NE] = "ne", 2235 [TCG_COND_LT] = "lt", 2236 [TCG_COND_GE] = "ge", 2237 [TCG_COND_LE] = "le", 2238 [TCG_COND_GT] = "gt", 2239 [TCG_COND_LTU] = "ltu", 2240 [TCG_COND_GEU] = "geu", 2241 [TCG_COND_LEU] = "leu", 2242 [TCG_COND_GTU] = "gtu" 2243 }; 2244 2245 static const char * const ldst_name[] = 2246 { 2247 [MO_UB] = "ub", 2248 [MO_SB] = "sb", 2249 [MO_LEUW] = "leuw", 2250 [MO_LESW] = "lesw", 2251 [MO_LEUL] = "leul", 2252 [MO_LESL] = "lesl", 2253 [MO_LEQ] = "leq", 2254 [MO_BEUW] = "beuw", 2255 [MO_BESW] = "besw", 2256 [MO_BEUL] = "beul", 2257 [MO_BESL] = "besl", 2258 [MO_BEQ] = "beq", 2259 }; 2260 2261 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 2262 #ifdef TARGET_ALIGNED_ONLY 2263 [MO_UNALN >> MO_ASHIFT] = "un+", 2264 [MO_ALIGN >> MO_ASHIFT] = "", 2265 #else 2266 [MO_UNALN >> MO_ASHIFT] = "", 2267 [MO_ALIGN >> MO_ASHIFT] = "al+", 2268 #endif 2269 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 2270 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 2271 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 2272 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 2273 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 2274 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 2275 }; 2276 2277 static inline bool tcg_regset_single(TCGRegSet d) 2278 { 2279 return (d & (d - 1)) == 0; 2280 } 2281 2282 static inline TCGReg tcg_regset_first(TCGRegSet d) 2283 { 2284 if (TCG_TARGET_NB_REGS <= 32) { 2285 return ctz32(d); 2286 } else { 2287 return ctz64(d); 2288 } 2289 } 2290 2291 static void tcg_dump_ops(TCGContext *s, bool have_prefs) 2292 { 2293 char buf[128]; 2294 TCGOp *op; 2295 2296 QTAILQ_FOREACH(op, &s->ops, link) { 2297 int i, k, nb_oargs, nb_iargs, nb_cargs; 2298 const TCGOpDef *def; 2299 TCGOpcode c; 2300 int col = 0; 2301 2302 c = op->opc; 2303 def = &tcg_op_defs[c]; 2304 2305 if (c == INDEX_op_insn_start) { 2306 nb_oargs = 0; 2307 col += qemu_log("\n ----"); 2308 2309 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 2310 target_ulong a; 2311 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 2312 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 2313 #else 2314 a = op->args[i]; 2315 #endif 2316 col += qemu_log(" " TARGET_FMT_lx, a); 2317 } 2318 } else if (c == INDEX_op_call) { 2319 /* variable number of arguments */ 2320 nb_oargs = TCGOP_CALLO(op); 2321 nb_iargs = TCGOP_CALLI(op); 2322 nb_cargs = def->nb_cargs; 2323 2324 /* function name, flags, out args */ 2325 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, 2326 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), 2327 op->args[nb_oargs + nb_iargs + 1], nb_oargs); 2328 for (i = 0; i < nb_oargs; i++) { 2329 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), 2330 op->args[i])); 2331 } 2332 for (i = 0; i < nb_iargs; i++) { 2333 TCGArg arg = op->args[nb_oargs + i]; 2334 const char *t = "<dummy>"; 2335 if (arg != TCG_CALL_DUMMY_ARG) { 2336 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 2337 } 2338 col += qemu_log(",%s", t); 2339 } 2340 } else { 2341 col += qemu_log(" %s ", def->name); 2342 2343 nb_oargs = def->nb_oargs; 2344 nb_iargs = def->nb_iargs; 2345 nb_cargs = def->nb_cargs; 2346 2347 if (def->flags & TCG_OPF_VECTOR) { 2348 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op), 2349 8 << TCGOP_VECE(op)); 2350 } 2351 2352 k = 0; 2353 for (i = 0; i < nb_oargs; i++) { 2354 if (k != 0) { 2355 col += qemu_log(","); 2356 } 2357 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 2358 op->args[k++])); 2359 } 2360 for (i = 0; i < nb_iargs; i++) { 2361 if (k != 0) { 2362 col += qemu_log(","); 2363 } 2364 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf), 2365 op->args[k++])); 2366 } 2367 switch (c) { 2368 case INDEX_op_brcond_i32: 2369 case INDEX_op_setcond_i32: 2370 case INDEX_op_movcond_i32: 2371 case INDEX_op_brcond2_i32: 2372 case INDEX_op_setcond2_i32: 2373 case INDEX_op_brcond_i64: 2374 case INDEX_op_setcond_i64: 2375 case INDEX_op_movcond_i64: 2376 case INDEX_op_cmp_vec: 2377 case INDEX_op_cmpsel_vec: 2378 if (op->args[k] < ARRAY_SIZE(cond_name) 2379 && cond_name[op->args[k]]) { 2380 col += qemu_log(",%s", cond_name[op->args[k++]]); 2381 } else { 2382 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]); 2383 } 2384 i = 1; 2385 break; 2386 case INDEX_op_qemu_ld_i32: 2387 case INDEX_op_qemu_st_i32: 2388 case INDEX_op_qemu_st8_i32: 2389 case INDEX_op_qemu_ld_i64: 2390 case INDEX_op_qemu_st_i64: 2391 { 2392 TCGMemOpIdx oi = op->args[k++]; 2393 MemOp op = get_memop(oi); 2394 unsigned ix = get_mmuidx(oi); 2395 2396 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 2397 col += qemu_log(",$0x%x,%u", op, ix); 2398 } else { 2399 const char *s_al, *s_op; 2400 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 2401 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2402 col += qemu_log(",%s%s,%u", s_al, s_op, ix); 2403 } 2404 i = 1; 2405 } 2406 break; 2407 default: 2408 i = 0; 2409 break; 2410 } 2411 switch (c) { 2412 case INDEX_op_set_label: 2413 case INDEX_op_br: 2414 case INDEX_op_brcond_i32: 2415 case INDEX_op_brcond_i64: 2416 case INDEX_op_brcond2_i32: 2417 col += qemu_log("%s$L%d", k ? "," : "", 2418 arg_label(op->args[k])->id); 2419 i++, k++; 2420 break; 2421 default: 2422 break; 2423 } 2424 for (; i < nb_cargs; i++, k++) { 2425 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); 2426 } 2427 } 2428 2429 if (have_prefs || op->life) { 2430 2431 QemuLogFile *logfile; 2432 2433 rcu_read_lock(); 2434 logfile = qatomic_rcu_read(&qemu_logfile); 2435 if (logfile) { 2436 for (; col < 40; ++col) { 2437 putc(' ', logfile->fd); 2438 } 2439 } 2440 rcu_read_unlock(); 2441 } 2442 2443 if (op->life) { 2444 unsigned life = op->life; 2445 2446 if (life & (SYNC_ARG * 3)) { 2447 qemu_log(" sync:"); 2448 for (i = 0; i < 2; ++i) { 2449 if (life & (SYNC_ARG << i)) { 2450 qemu_log(" %d", i); 2451 } 2452 } 2453 } 2454 life /= DEAD_ARG; 2455 if (life) { 2456 qemu_log(" dead:"); 2457 for (i = 0; life; ++i, life >>= 1) { 2458 if (life & 1) { 2459 qemu_log(" %d", i); 2460 } 2461 } 2462 } 2463 } 2464 2465 if (have_prefs) { 2466 for (i = 0; i < nb_oargs; ++i) { 2467 TCGRegSet set = op->output_pref[i]; 2468 2469 if (i == 0) { 2470 qemu_log(" pref="); 2471 } else { 2472 qemu_log(","); 2473 } 2474 if (set == 0) { 2475 qemu_log("none"); 2476 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2477 qemu_log("all"); 2478 #ifdef CONFIG_DEBUG_TCG 2479 } else if (tcg_regset_single(set)) { 2480 TCGReg reg = tcg_regset_first(set); 2481 qemu_log("%s", tcg_target_reg_names[reg]); 2482 #endif 2483 } else if (TCG_TARGET_NB_REGS <= 32) { 2484 qemu_log("%#x", (uint32_t)set); 2485 } else { 2486 qemu_log("%#" PRIx64, (uint64_t)set); 2487 } 2488 } 2489 } 2490 2491 qemu_log("\n"); 2492 } 2493 } 2494 2495 /* we give more priority to constraints with less registers */ 2496 static int get_constraint_priority(const TCGOpDef *def, int k) 2497 { 2498 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2499 int n; 2500 2501 if (arg_ct->oalias) { 2502 /* an alias is equivalent to a single register */ 2503 n = 1; 2504 } else { 2505 n = ctpop64(arg_ct->regs); 2506 } 2507 return TCG_TARGET_NB_REGS - n + 1; 2508 } 2509 2510 /* sort from highest priority to lowest */ 2511 static void sort_constraints(TCGOpDef *def, int start, int n) 2512 { 2513 int i, j; 2514 TCGArgConstraint *a = def->args_ct; 2515 2516 for (i = 0; i < n; i++) { 2517 a[start + i].sort_index = start + i; 2518 } 2519 if (n <= 1) { 2520 return; 2521 } 2522 for (i = 0; i < n - 1; i++) { 2523 for (j = i + 1; j < n; j++) { 2524 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2525 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2526 if (p1 < p2) { 2527 int tmp = a[start + i].sort_index; 2528 a[start + i].sort_index = a[start + j].sort_index; 2529 a[start + j].sort_index = tmp; 2530 } 2531 } 2532 } 2533 } 2534 2535 static void process_op_defs(TCGContext *s) 2536 { 2537 TCGOpcode op; 2538 2539 for (op = 0; op < NB_OPS; op++) { 2540 TCGOpDef *def = &tcg_op_defs[op]; 2541 const TCGTargetOpDef *tdefs; 2542 int i, nb_args; 2543 2544 if (def->flags & TCG_OPF_NOT_PRESENT) { 2545 continue; 2546 } 2547 2548 nb_args = def->nb_iargs + def->nb_oargs; 2549 if (nb_args == 0) { 2550 continue; 2551 } 2552 2553 /* 2554 * Macro magic should make it impossible, but double-check that 2555 * the array index is in range. Since the signness of an enum 2556 * is implementation defined, force the result to unsigned. 2557 */ 2558 unsigned con_set = tcg_target_op_def(op); 2559 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2560 tdefs = &constraint_sets[con_set]; 2561 2562 for (i = 0; i < nb_args; i++) { 2563 const char *ct_str = tdefs->args_ct_str[i]; 2564 /* Incomplete TCGTargetOpDef entry. */ 2565 tcg_debug_assert(ct_str != NULL); 2566 2567 while (*ct_str != '\0') { 2568 switch(*ct_str) { 2569 case '0' ... '9': 2570 { 2571 int oarg = *ct_str - '0'; 2572 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 2573 tcg_debug_assert(oarg < def->nb_oargs); 2574 tcg_debug_assert(def->args_ct[oarg].regs != 0); 2575 def->args_ct[i] = def->args_ct[oarg]; 2576 /* The output sets oalias. */ 2577 def->args_ct[oarg].oalias = true; 2578 def->args_ct[oarg].alias_index = i; 2579 /* The input sets ialias. */ 2580 def->args_ct[i].ialias = true; 2581 def->args_ct[i].alias_index = oarg; 2582 } 2583 ct_str++; 2584 break; 2585 case '&': 2586 def->args_ct[i].newreg = true; 2587 ct_str++; 2588 break; 2589 case 'i': 2590 def->args_ct[i].ct |= TCG_CT_CONST; 2591 ct_str++; 2592 break; 2593 2594 /* Include all of the target-specific constraints. */ 2595 2596 #undef CONST 2597 #define CONST(CASE, MASK) \ 2598 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break; 2599 #define REGS(CASE, MASK) \ 2600 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break; 2601 2602 #include "tcg-target-con-str.h" 2603 2604 #undef REGS 2605 #undef CONST 2606 default: 2607 /* Typo in TCGTargetOpDef constraint. */ 2608 g_assert_not_reached(); 2609 } 2610 } 2611 } 2612 2613 /* TCGTargetOpDef entry with too much information? */ 2614 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2615 2616 /* sort the constraints (XXX: this is just an heuristic) */ 2617 sort_constraints(def, 0, def->nb_oargs); 2618 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2619 } 2620 } 2621 2622 void tcg_op_remove(TCGContext *s, TCGOp *op) 2623 { 2624 TCGLabel *label; 2625 2626 switch (op->opc) { 2627 case INDEX_op_br: 2628 label = arg_label(op->args[0]); 2629 label->refs--; 2630 break; 2631 case INDEX_op_brcond_i32: 2632 case INDEX_op_brcond_i64: 2633 label = arg_label(op->args[3]); 2634 label->refs--; 2635 break; 2636 case INDEX_op_brcond2_i32: 2637 label = arg_label(op->args[5]); 2638 label->refs--; 2639 break; 2640 default: 2641 break; 2642 } 2643 2644 QTAILQ_REMOVE(&s->ops, op, link); 2645 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2646 s->nb_ops--; 2647 2648 #ifdef CONFIG_PROFILER 2649 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2650 #endif 2651 } 2652 2653 static TCGOp *tcg_op_alloc(TCGOpcode opc) 2654 { 2655 TCGContext *s = tcg_ctx; 2656 TCGOp *op; 2657 2658 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 2659 op = tcg_malloc(sizeof(TCGOp)); 2660 } else { 2661 op = QTAILQ_FIRST(&s->free_ops); 2662 QTAILQ_REMOVE(&s->free_ops, op, link); 2663 } 2664 memset(op, 0, offsetof(TCGOp, link)); 2665 op->opc = opc; 2666 s->nb_ops++; 2667 2668 return op; 2669 } 2670 2671 TCGOp *tcg_emit_op(TCGOpcode opc) 2672 { 2673 TCGOp *op = tcg_op_alloc(opc); 2674 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2675 return op; 2676 } 2677 2678 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2679 { 2680 TCGOp *new_op = tcg_op_alloc(opc); 2681 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2682 return new_op; 2683 } 2684 2685 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2686 { 2687 TCGOp *new_op = tcg_op_alloc(opc); 2688 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2689 return new_op; 2690 } 2691 2692 /* Reachable analysis : remove unreachable code. */ 2693 static void reachable_code_pass(TCGContext *s) 2694 { 2695 TCGOp *op, *op_next; 2696 bool dead = false; 2697 2698 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2699 bool remove = dead; 2700 TCGLabel *label; 2701 int call_flags; 2702 2703 switch (op->opc) { 2704 case INDEX_op_set_label: 2705 label = arg_label(op->args[0]); 2706 if (label->refs == 0) { 2707 /* 2708 * While there is an occasional backward branch, virtually 2709 * all branches generated by the translators are forward. 2710 * Which means that generally we will have already removed 2711 * all references to the label that will be, and there is 2712 * little to be gained by iterating. 2713 */ 2714 remove = true; 2715 } else { 2716 /* Once we see a label, insns become live again. */ 2717 dead = false; 2718 remove = false; 2719 2720 /* 2721 * Optimization can fold conditional branches to unconditional. 2722 * If we find a label with one reference which is preceded by 2723 * an unconditional branch to it, remove both. This needed to 2724 * wait until the dead code in between them was removed. 2725 */ 2726 if (label->refs == 1) { 2727 TCGOp *op_prev = QTAILQ_PREV(op, link); 2728 if (op_prev->opc == INDEX_op_br && 2729 label == arg_label(op_prev->args[0])) { 2730 tcg_op_remove(s, op_prev); 2731 remove = true; 2732 } 2733 } 2734 } 2735 break; 2736 2737 case INDEX_op_br: 2738 case INDEX_op_exit_tb: 2739 case INDEX_op_goto_ptr: 2740 /* Unconditional branches; everything following is dead. */ 2741 dead = true; 2742 break; 2743 2744 case INDEX_op_call: 2745 /* Notice noreturn helper calls, raising exceptions. */ 2746 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; 2747 if (call_flags & TCG_CALL_NO_RETURN) { 2748 dead = true; 2749 } 2750 break; 2751 2752 case INDEX_op_insn_start: 2753 /* Never remove -- we need to keep these for unwind. */ 2754 remove = false; 2755 break; 2756 2757 default: 2758 break; 2759 } 2760 2761 if (remove) { 2762 tcg_op_remove(s, op); 2763 } 2764 } 2765 } 2766 2767 #define TS_DEAD 1 2768 #define TS_MEM 2 2769 2770 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2771 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2772 2773 /* For liveness_pass_1, the register preferences for a given temp. */ 2774 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2775 { 2776 return ts->state_ptr; 2777 } 2778 2779 /* For liveness_pass_1, reset the preferences for a given temp to the 2780 * maximal regset for its type. 2781 */ 2782 static inline void la_reset_pref(TCGTemp *ts) 2783 { 2784 *la_temp_pref(ts) 2785 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2786 } 2787 2788 /* liveness analysis: end of function: all temps are dead, and globals 2789 should be in memory. */ 2790 static void la_func_end(TCGContext *s, int ng, int nt) 2791 { 2792 int i; 2793 2794 for (i = 0; i < ng; ++i) { 2795 s->temps[i].state = TS_DEAD | TS_MEM; 2796 la_reset_pref(&s->temps[i]); 2797 } 2798 for (i = ng; i < nt; ++i) { 2799 s->temps[i].state = TS_DEAD; 2800 la_reset_pref(&s->temps[i]); 2801 } 2802 } 2803 2804 /* liveness analysis: end of basic block: all temps are dead, globals 2805 and local temps should be in memory. */ 2806 static void la_bb_end(TCGContext *s, int ng, int nt) 2807 { 2808 int i; 2809 2810 for (i = 0; i < nt; ++i) { 2811 TCGTemp *ts = &s->temps[i]; 2812 int state; 2813 2814 switch (ts->kind) { 2815 case TEMP_FIXED: 2816 case TEMP_GLOBAL: 2817 case TEMP_LOCAL: 2818 state = TS_DEAD | TS_MEM; 2819 break; 2820 case TEMP_NORMAL: 2821 case TEMP_CONST: 2822 state = TS_DEAD; 2823 break; 2824 default: 2825 g_assert_not_reached(); 2826 } 2827 ts->state = state; 2828 la_reset_pref(ts); 2829 } 2830 } 2831 2832 /* liveness analysis: sync globals back to memory. */ 2833 static void la_global_sync(TCGContext *s, int ng) 2834 { 2835 int i; 2836 2837 for (i = 0; i < ng; ++i) { 2838 int state = s->temps[i].state; 2839 s->temps[i].state = state | TS_MEM; 2840 if (state == TS_DEAD) { 2841 /* If the global was previously dead, reset prefs. */ 2842 la_reset_pref(&s->temps[i]); 2843 } 2844 } 2845 } 2846 2847 /* 2848 * liveness analysis: conditional branch: all temps are dead, 2849 * globals and local temps should be synced. 2850 */ 2851 static void la_bb_sync(TCGContext *s, int ng, int nt) 2852 { 2853 la_global_sync(s, ng); 2854 2855 for (int i = ng; i < nt; ++i) { 2856 TCGTemp *ts = &s->temps[i]; 2857 int state; 2858 2859 switch (ts->kind) { 2860 case TEMP_LOCAL: 2861 state = ts->state; 2862 ts->state = state | TS_MEM; 2863 if (state != TS_DEAD) { 2864 continue; 2865 } 2866 break; 2867 case TEMP_NORMAL: 2868 s->temps[i].state = TS_DEAD; 2869 break; 2870 case TEMP_CONST: 2871 continue; 2872 default: 2873 g_assert_not_reached(); 2874 } 2875 la_reset_pref(&s->temps[i]); 2876 } 2877 } 2878 2879 /* liveness analysis: sync globals back to memory and kill. */ 2880 static void la_global_kill(TCGContext *s, int ng) 2881 { 2882 int i; 2883 2884 for (i = 0; i < ng; i++) { 2885 s->temps[i].state = TS_DEAD | TS_MEM; 2886 la_reset_pref(&s->temps[i]); 2887 } 2888 } 2889 2890 /* liveness analysis: note live globals crossing calls. */ 2891 static void la_cross_call(TCGContext *s, int nt) 2892 { 2893 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2894 int i; 2895 2896 for (i = 0; i < nt; i++) { 2897 TCGTemp *ts = &s->temps[i]; 2898 if (!(ts->state & TS_DEAD)) { 2899 TCGRegSet *pset = la_temp_pref(ts); 2900 TCGRegSet set = *pset; 2901 2902 set &= mask; 2903 /* If the combination is not possible, restart. */ 2904 if (set == 0) { 2905 set = tcg_target_available_regs[ts->type] & mask; 2906 } 2907 *pset = set; 2908 } 2909 } 2910 } 2911 2912 /* Liveness analysis : update the opc_arg_life array to tell if a 2913 given input arguments is dead. Instructions updating dead 2914 temporaries are removed. */ 2915 static void liveness_pass_1(TCGContext *s) 2916 { 2917 int nb_globals = s->nb_globals; 2918 int nb_temps = s->nb_temps; 2919 TCGOp *op, *op_prev; 2920 TCGRegSet *prefs; 2921 int i; 2922 2923 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2924 for (i = 0; i < nb_temps; ++i) { 2925 s->temps[i].state_ptr = prefs + i; 2926 } 2927 2928 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2929 la_func_end(s, nb_globals, nb_temps); 2930 2931 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2932 int nb_iargs, nb_oargs; 2933 TCGOpcode opc_new, opc_new2; 2934 bool have_opc_new2; 2935 TCGLifeData arg_life = 0; 2936 TCGTemp *ts; 2937 TCGOpcode opc = op->opc; 2938 const TCGOpDef *def = &tcg_op_defs[opc]; 2939 2940 switch (opc) { 2941 case INDEX_op_call: 2942 { 2943 int call_flags; 2944 int nb_call_regs; 2945 2946 nb_oargs = TCGOP_CALLO(op); 2947 nb_iargs = TCGOP_CALLI(op); 2948 call_flags = op->args[nb_oargs + nb_iargs + 1]; 2949 2950 /* pure functions can be removed if their result is unused */ 2951 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2952 for (i = 0; i < nb_oargs; i++) { 2953 ts = arg_temp(op->args[i]); 2954 if (ts->state != TS_DEAD) { 2955 goto do_not_remove_call; 2956 } 2957 } 2958 goto do_remove; 2959 } 2960 do_not_remove_call: 2961 2962 /* Output args are dead. */ 2963 for (i = 0; i < nb_oargs; i++) { 2964 ts = arg_temp(op->args[i]); 2965 if (ts->state & TS_DEAD) { 2966 arg_life |= DEAD_ARG << i; 2967 } 2968 if (ts->state & TS_MEM) { 2969 arg_life |= SYNC_ARG << i; 2970 } 2971 ts->state = TS_DEAD; 2972 la_reset_pref(ts); 2973 2974 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ 2975 op->output_pref[i] = 0; 2976 } 2977 2978 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2979 TCG_CALL_NO_READ_GLOBALS))) { 2980 la_global_kill(s, nb_globals); 2981 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2982 la_global_sync(s, nb_globals); 2983 } 2984 2985 /* Record arguments that die in this helper. */ 2986 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2987 ts = arg_temp(op->args[i]); 2988 if (ts && ts->state & TS_DEAD) { 2989 arg_life |= DEAD_ARG << i; 2990 } 2991 } 2992 2993 /* For all live registers, remove call-clobbered prefs. */ 2994 la_cross_call(s, nb_temps); 2995 2996 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2997 2998 /* Input arguments are live for preceding opcodes. */ 2999 for (i = 0; i < nb_iargs; i++) { 3000 ts = arg_temp(op->args[i + nb_oargs]); 3001 if (ts && ts->state & TS_DEAD) { 3002 /* For those arguments that die, and will be allocated 3003 * in registers, clear the register set for that arg, 3004 * to be filled in below. For args that will be on 3005 * the stack, reset to any available reg. 3006 */ 3007 *la_temp_pref(ts) 3008 = (i < nb_call_regs ? 0 : 3009 tcg_target_available_regs[ts->type]); 3010 ts->state &= ~TS_DEAD; 3011 } 3012 } 3013 3014 /* For each input argument, add its input register to prefs. 3015 If a temp is used once, this produces a single set bit. */ 3016 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { 3017 ts = arg_temp(op->args[i + nb_oargs]); 3018 if (ts) { 3019 tcg_regset_set_reg(*la_temp_pref(ts), 3020 tcg_target_call_iarg_regs[i]); 3021 } 3022 } 3023 } 3024 break; 3025 case INDEX_op_insn_start: 3026 break; 3027 case INDEX_op_discard: 3028 /* mark the temporary as dead */ 3029 ts = arg_temp(op->args[0]); 3030 ts->state = TS_DEAD; 3031 la_reset_pref(ts); 3032 break; 3033 3034 case INDEX_op_add2_i32: 3035 opc_new = INDEX_op_add_i32; 3036 goto do_addsub2; 3037 case INDEX_op_sub2_i32: 3038 opc_new = INDEX_op_sub_i32; 3039 goto do_addsub2; 3040 case INDEX_op_add2_i64: 3041 opc_new = INDEX_op_add_i64; 3042 goto do_addsub2; 3043 case INDEX_op_sub2_i64: 3044 opc_new = INDEX_op_sub_i64; 3045 do_addsub2: 3046 nb_iargs = 4; 3047 nb_oargs = 2; 3048 /* Test if the high part of the operation is dead, but not 3049 the low part. The result can be optimized to a simple 3050 add or sub. This happens often for x86_64 guest when the 3051 cpu mode is set to 32 bit. */ 3052 if (arg_temp(op->args[1])->state == TS_DEAD) { 3053 if (arg_temp(op->args[0])->state == TS_DEAD) { 3054 goto do_remove; 3055 } 3056 /* Replace the opcode and adjust the args in place, 3057 leaving 3 unused args at the end. */ 3058 op->opc = opc = opc_new; 3059 op->args[1] = op->args[2]; 3060 op->args[2] = op->args[4]; 3061 /* Fall through and mark the single-word operation live. */ 3062 nb_iargs = 2; 3063 nb_oargs = 1; 3064 } 3065 goto do_not_remove; 3066 3067 case INDEX_op_mulu2_i32: 3068 opc_new = INDEX_op_mul_i32; 3069 opc_new2 = INDEX_op_muluh_i32; 3070 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 3071 goto do_mul2; 3072 case INDEX_op_muls2_i32: 3073 opc_new = INDEX_op_mul_i32; 3074 opc_new2 = INDEX_op_mulsh_i32; 3075 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 3076 goto do_mul2; 3077 case INDEX_op_mulu2_i64: 3078 opc_new = INDEX_op_mul_i64; 3079 opc_new2 = INDEX_op_muluh_i64; 3080 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 3081 goto do_mul2; 3082 case INDEX_op_muls2_i64: 3083 opc_new = INDEX_op_mul_i64; 3084 opc_new2 = INDEX_op_mulsh_i64; 3085 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 3086 goto do_mul2; 3087 do_mul2: 3088 nb_iargs = 2; 3089 nb_oargs = 2; 3090 if (arg_temp(op->args[1])->state == TS_DEAD) { 3091 if (arg_temp(op->args[0])->state == TS_DEAD) { 3092 /* Both parts of the operation are dead. */ 3093 goto do_remove; 3094 } 3095 /* The high part of the operation is dead; generate the low. */ 3096 op->opc = opc = opc_new; 3097 op->args[1] = op->args[2]; 3098 op->args[2] = op->args[3]; 3099 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 3100 /* The low part of the operation is dead; generate the high. */ 3101 op->opc = opc = opc_new2; 3102 op->args[0] = op->args[1]; 3103 op->args[1] = op->args[2]; 3104 op->args[2] = op->args[3]; 3105 } else { 3106 goto do_not_remove; 3107 } 3108 /* Mark the single-word operation live. */ 3109 nb_oargs = 1; 3110 goto do_not_remove; 3111 3112 default: 3113 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 3114 nb_iargs = def->nb_iargs; 3115 nb_oargs = def->nb_oargs; 3116 3117 /* Test if the operation can be removed because all 3118 its outputs are dead. We assume that nb_oargs == 0 3119 implies side effects */ 3120 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 3121 for (i = 0; i < nb_oargs; i++) { 3122 if (arg_temp(op->args[i])->state != TS_DEAD) { 3123 goto do_not_remove; 3124 } 3125 } 3126 goto do_remove; 3127 } 3128 goto do_not_remove; 3129 3130 do_remove: 3131 tcg_op_remove(s, op); 3132 break; 3133 3134 do_not_remove: 3135 for (i = 0; i < nb_oargs; i++) { 3136 ts = arg_temp(op->args[i]); 3137 3138 /* Remember the preference of the uses that followed. */ 3139 op->output_pref[i] = *la_temp_pref(ts); 3140 3141 /* Output args are dead. */ 3142 if (ts->state & TS_DEAD) { 3143 arg_life |= DEAD_ARG << i; 3144 } 3145 if (ts->state & TS_MEM) { 3146 arg_life |= SYNC_ARG << i; 3147 } 3148 ts->state = TS_DEAD; 3149 la_reset_pref(ts); 3150 } 3151 3152 /* If end of basic block, update. */ 3153 if (def->flags & TCG_OPF_BB_EXIT) { 3154 la_func_end(s, nb_globals, nb_temps); 3155 } else if (def->flags & TCG_OPF_COND_BRANCH) { 3156 la_bb_sync(s, nb_globals, nb_temps); 3157 } else if (def->flags & TCG_OPF_BB_END) { 3158 la_bb_end(s, nb_globals, nb_temps); 3159 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3160 la_global_sync(s, nb_globals); 3161 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3162 la_cross_call(s, nb_temps); 3163 } 3164 } 3165 3166 /* Record arguments that die in this opcode. */ 3167 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3168 ts = arg_temp(op->args[i]); 3169 if (ts->state & TS_DEAD) { 3170 arg_life |= DEAD_ARG << i; 3171 } 3172 } 3173 3174 /* Input arguments are live for preceding opcodes. */ 3175 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3176 ts = arg_temp(op->args[i]); 3177 if (ts->state & TS_DEAD) { 3178 /* For operands that were dead, initially allow 3179 all regs for the type. */ 3180 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 3181 ts->state &= ~TS_DEAD; 3182 } 3183 } 3184 3185 /* Incorporate constraints for this operand. */ 3186 switch (opc) { 3187 case INDEX_op_mov_i32: 3188 case INDEX_op_mov_i64: 3189 /* Note that these are TCG_OPF_NOT_PRESENT and do not 3190 have proper constraints. That said, special case 3191 moves to propagate preferences backward. */ 3192 if (IS_DEAD_ARG(1)) { 3193 *la_temp_pref(arg_temp(op->args[0])) 3194 = *la_temp_pref(arg_temp(op->args[1])); 3195 } 3196 break; 3197 3198 default: 3199 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3200 const TCGArgConstraint *ct = &def->args_ct[i]; 3201 TCGRegSet set, *pset; 3202 3203 ts = arg_temp(op->args[i]); 3204 pset = la_temp_pref(ts); 3205 set = *pset; 3206 3207 set &= ct->regs; 3208 if (ct->ialias) { 3209 set &= op->output_pref[ct->alias_index]; 3210 } 3211 /* If the combination is not possible, restart. */ 3212 if (set == 0) { 3213 set = ct->regs; 3214 } 3215 *pset = set; 3216 } 3217 break; 3218 } 3219 break; 3220 } 3221 op->life = arg_life; 3222 } 3223 } 3224 3225 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3226 static bool liveness_pass_2(TCGContext *s) 3227 { 3228 int nb_globals = s->nb_globals; 3229 int nb_temps, i; 3230 bool changes = false; 3231 TCGOp *op, *op_next; 3232 3233 /* Create a temporary for each indirect global. */ 3234 for (i = 0; i < nb_globals; ++i) { 3235 TCGTemp *its = &s->temps[i]; 3236 if (its->indirect_reg) { 3237 TCGTemp *dts = tcg_temp_alloc(s); 3238 dts->type = its->type; 3239 dts->base_type = its->base_type; 3240 its->state_ptr = dts; 3241 } else { 3242 its->state_ptr = NULL; 3243 } 3244 /* All globals begin dead. */ 3245 its->state = TS_DEAD; 3246 } 3247 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3248 TCGTemp *its = &s->temps[i]; 3249 its->state_ptr = NULL; 3250 its->state = TS_DEAD; 3251 } 3252 3253 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3254 TCGOpcode opc = op->opc; 3255 const TCGOpDef *def = &tcg_op_defs[opc]; 3256 TCGLifeData arg_life = op->life; 3257 int nb_iargs, nb_oargs, call_flags; 3258 TCGTemp *arg_ts, *dir_ts; 3259 3260 if (opc == INDEX_op_call) { 3261 nb_oargs = TCGOP_CALLO(op); 3262 nb_iargs = TCGOP_CALLI(op); 3263 call_flags = op->args[nb_oargs + nb_iargs + 1]; 3264 } else { 3265 nb_iargs = def->nb_iargs; 3266 nb_oargs = def->nb_oargs; 3267 3268 /* Set flags similar to how calls require. */ 3269 if (def->flags & TCG_OPF_COND_BRANCH) { 3270 /* Like reading globals: sync_globals */ 3271 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3272 } else if (def->flags & TCG_OPF_BB_END) { 3273 /* Like writing globals: save_globals */ 3274 call_flags = 0; 3275 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3276 /* Like reading globals: sync_globals */ 3277 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3278 } else { 3279 /* No effect on globals. */ 3280 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3281 TCG_CALL_NO_WRITE_GLOBALS); 3282 } 3283 } 3284 3285 /* Make sure that input arguments are available. */ 3286 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3287 arg_ts = arg_temp(op->args[i]); 3288 if (arg_ts) { 3289 dir_ts = arg_ts->state_ptr; 3290 if (dir_ts && arg_ts->state == TS_DEAD) { 3291 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3292 ? INDEX_op_ld_i32 3293 : INDEX_op_ld_i64); 3294 TCGOp *lop = tcg_op_insert_before(s, op, lopc); 3295 3296 lop->args[0] = temp_arg(dir_ts); 3297 lop->args[1] = temp_arg(arg_ts->mem_base); 3298 lop->args[2] = arg_ts->mem_offset; 3299 3300 /* Loaded, but synced with memory. */ 3301 arg_ts->state = TS_MEM; 3302 } 3303 } 3304 } 3305 3306 /* Perform input replacement, and mark inputs that became dead. 3307 No action is required except keeping temp_state up to date 3308 so that we reload when needed. */ 3309 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3310 arg_ts = arg_temp(op->args[i]); 3311 if (arg_ts) { 3312 dir_ts = arg_ts->state_ptr; 3313 if (dir_ts) { 3314 op->args[i] = temp_arg(dir_ts); 3315 changes = true; 3316 if (IS_DEAD_ARG(i)) { 3317 arg_ts->state = TS_DEAD; 3318 } 3319 } 3320 } 3321 } 3322 3323 /* Liveness analysis should ensure that the following are 3324 all correct, for call sites and basic block end points. */ 3325 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3326 /* Nothing to do */ 3327 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3328 for (i = 0; i < nb_globals; ++i) { 3329 /* Liveness should see that globals are synced back, 3330 that is, either TS_DEAD or TS_MEM. */ 3331 arg_ts = &s->temps[i]; 3332 tcg_debug_assert(arg_ts->state_ptr == 0 3333 || arg_ts->state != 0); 3334 } 3335 } else { 3336 for (i = 0; i < nb_globals; ++i) { 3337 /* Liveness should see that globals are saved back, 3338 that is, TS_DEAD, waiting to be reloaded. */ 3339 arg_ts = &s->temps[i]; 3340 tcg_debug_assert(arg_ts->state_ptr == 0 3341 || arg_ts->state == TS_DEAD); 3342 } 3343 } 3344 3345 /* Outputs become available. */ 3346 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3347 arg_ts = arg_temp(op->args[0]); 3348 dir_ts = arg_ts->state_ptr; 3349 if (dir_ts) { 3350 op->args[0] = temp_arg(dir_ts); 3351 changes = true; 3352 3353 /* The output is now live and modified. */ 3354 arg_ts->state = 0; 3355 3356 if (NEED_SYNC_ARG(0)) { 3357 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3358 ? INDEX_op_st_i32 3359 : INDEX_op_st_i64); 3360 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 3361 TCGTemp *out_ts = dir_ts; 3362 3363 if (IS_DEAD_ARG(0)) { 3364 out_ts = arg_temp(op->args[1]); 3365 arg_ts->state = TS_DEAD; 3366 tcg_op_remove(s, op); 3367 } else { 3368 arg_ts->state = TS_MEM; 3369 } 3370 3371 sop->args[0] = temp_arg(out_ts); 3372 sop->args[1] = temp_arg(arg_ts->mem_base); 3373 sop->args[2] = arg_ts->mem_offset; 3374 } else { 3375 tcg_debug_assert(!IS_DEAD_ARG(0)); 3376 } 3377 } 3378 } else { 3379 for (i = 0; i < nb_oargs; i++) { 3380 arg_ts = arg_temp(op->args[i]); 3381 dir_ts = arg_ts->state_ptr; 3382 if (!dir_ts) { 3383 continue; 3384 } 3385 op->args[i] = temp_arg(dir_ts); 3386 changes = true; 3387 3388 /* The output is now live and modified. */ 3389 arg_ts->state = 0; 3390 3391 /* Sync outputs upon their last write. */ 3392 if (NEED_SYNC_ARG(i)) { 3393 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3394 ? INDEX_op_st_i32 3395 : INDEX_op_st_i64); 3396 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 3397 3398 sop->args[0] = temp_arg(dir_ts); 3399 sop->args[1] = temp_arg(arg_ts->mem_base); 3400 sop->args[2] = arg_ts->mem_offset; 3401 3402 arg_ts->state = TS_MEM; 3403 } 3404 /* Drop outputs that are dead. */ 3405 if (IS_DEAD_ARG(i)) { 3406 arg_ts->state = TS_DEAD; 3407 } 3408 } 3409 } 3410 } 3411 3412 return changes; 3413 } 3414 3415 #ifdef CONFIG_DEBUG_TCG 3416 static void dump_regs(TCGContext *s) 3417 { 3418 TCGTemp *ts; 3419 int i; 3420 char buf[64]; 3421 3422 for(i = 0; i < s->nb_temps; i++) { 3423 ts = &s->temps[i]; 3424 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3425 switch(ts->val_type) { 3426 case TEMP_VAL_REG: 3427 printf("%s", tcg_target_reg_names[ts->reg]); 3428 break; 3429 case TEMP_VAL_MEM: 3430 printf("%d(%s)", (int)ts->mem_offset, 3431 tcg_target_reg_names[ts->mem_base->reg]); 3432 break; 3433 case TEMP_VAL_CONST: 3434 printf("$0x%" PRIx64, ts->val); 3435 break; 3436 case TEMP_VAL_DEAD: 3437 printf("D"); 3438 break; 3439 default: 3440 printf("???"); 3441 break; 3442 } 3443 printf("\n"); 3444 } 3445 3446 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 3447 if (s->reg_to_temp[i] != NULL) { 3448 printf("%s: %s\n", 3449 tcg_target_reg_names[i], 3450 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 3451 } 3452 } 3453 } 3454 3455 static void check_regs(TCGContext *s) 3456 { 3457 int reg; 3458 int k; 3459 TCGTemp *ts; 3460 char buf[64]; 3461 3462 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 3463 ts = s->reg_to_temp[reg]; 3464 if (ts != NULL) { 3465 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 3466 printf("Inconsistency for register %s:\n", 3467 tcg_target_reg_names[reg]); 3468 goto fail; 3469 } 3470 } 3471 } 3472 for (k = 0; k < s->nb_temps; k++) { 3473 ts = &s->temps[k]; 3474 if (ts->val_type == TEMP_VAL_REG 3475 && ts->kind != TEMP_FIXED 3476 && s->reg_to_temp[ts->reg] != ts) { 3477 printf("Inconsistency for temp %s:\n", 3478 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 3479 fail: 3480 printf("reg state:\n"); 3481 dump_regs(s); 3482 tcg_abort(); 3483 } 3484 } 3485 } 3486 #endif 3487 3488 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3489 { 3490 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) 3491 /* Sparc64 stack is accessed with offset of 2047 */ 3492 s->current_frame_offset = (s->current_frame_offset + 3493 (tcg_target_long)sizeof(tcg_target_long) - 1) & 3494 ~(sizeof(tcg_target_long) - 1); 3495 #endif 3496 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > 3497 s->frame_end) { 3498 tcg_abort(); 3499 } 3500 ts->mem_offset = s->current_frame_offset; 3501 ts->mem_base = s->frame_temp; 3502 ts->mem_allocated = 1; 3503 s->current_frame_offset += sizeof(tcg_target_long); 3504 } 3505 3506 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3507 3508 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3509 mark it free; otherwise mark it dead. */ 3510 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3511 { 3512 TCGTempVal new_type; 3513 3514 switch (ts->kind) { 3515 case TEMP_FIXED: 3516 return; 3517 case TEMP_GLOBAL: 3518 case TEMP_LOCAL: 3519 new_type = TEMP_VAL_MEM; 3520 break; 3521 case TEMP_NORMAL: 3522 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3523 break; 3524 case TEMP_CONST: 3525 new_type = TEMP_VAL_CONST; 3526 break; 3527 default: 3528 g_assert_not_reached(); 3529 } 3530 if (ts->val_type == TEMP_VAL_REG) { 3531 s->reg_to_temp[ts->reg] = NULL; 3532 } 3533 ts->val_type = new_type; 3534 } 3535 3536 /* Mark a temporary as dead. */ 3537 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3538 { 3539 temp_free_or_dead(s, ts, 1); 3540 } 3541 3542 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3543 registers needs to be allocated to store a constant. If 'free_or_dead' 3544 is non-zero, subsequently release the temporary; if it is positive, the 3545 temp is dead; if it is negative, the temp is free. */ 3546 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3547 TCGRegSet preferred_regs, int free_or_dead) 3548 { 3549 if (!temp_readonly(ts) && !ts->mem_coherent) { 3550 if (!ts->mem_allocated) { 3551 temp_allocate_frame(s, ts); 3552 } 3553 switch (ts->val_type) { 3554 case TEMP_VAL_CONST: 3555 /* If we're going to free the temp immediately, then we won't 3556 require it later in a register, so attempt to store the 3557 constant to memory directly. */ 3558 if (free_or_dead 3559 && tcg_out_sti(s, ts->type, ts->val, 3560 ts->mem_base->reg, ts->mem_offset)) { 3561 break; 3562 } 3563 temp_load(s, ts, tcg_target_available_regs[ts->type], 3564 allocated_regs, preferred_regs); 3565 /* fallthrough */ 3566 3567 case TEMP_VAL_REG: 3568 tcg_out_st(s, ts->type, ts->reg, 3569 ts->mem_base->reg, ts->mem_offset); 3570 break; 3571 3572 case TEMP_VAL_MEM: 3573 break; 3574 3575 case TEMP_VAL_DEAD: 3576 default: 3577 tcg_abort(); 3578 } 3579 ts->mem_coherent = 1; 3580 } 3581 if (free_or_dead) { 3582 temp_free_or_dead(s, ts, free_or_dead); 3583 } 3584 } 3585 3586 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3587 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3588 { 3589 TCGTemp *ts = s->reg_to_temp[reg]; 3590 if (ts != NULL) { 3591 temp_sync(s, ts, allocated_regs, 0, -1); 3592 } 3593 } 3594 3595 /** 3596 * tcg_reg_alloc: 3597 * @required_regs: Set of registers in which we must allocate. 3598 * @allocated_regs: Set of registers which must be avoided. 3599 * @preferred_regs: Set of registers we should prefer. 3600 * @rev: True if we search the registers in "indirect" order. 3601 * 3602 * The allocated register must be in @required_regs & ~@allocated_regs, 3603 * but if we can put it in @preferred_regs we may save a move later. 3604 */ 3605 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3606 TCGRegSet allocated_regs, 3607 TCGRegSet preferred_regs, bool rev) 3608 { 3609 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3610 TCGRegSet reg_ct[2]; 3611 const int *order; 3612 3613 reg_ct[1] = required_regs & ~allocated_regs; 3614 tcg_debug_assert(reg_ct[1] != 0); 3615 reg_ct[0] = reg_ct[1] & preferred_regs; 3616 3617 /* Skip the preferred_regs option if it cannot be satisfied, 3618 or if the preference made no difference. */ 3619 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3620 3621 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3622 3623 /* Try free registers, preferences first. */ 3624 for (j = f; j < 2; j++) { 3625 TCGRegSet set = reg_ct[j]; 3626 3627 if (tcg_regset_single(set)) { 3628 /* One register in the set. */ 3629 TCGReg reg = tcg_regset_first(set); 3630 if (s->reg_to_temp[reg] == NULL) { 3631 return reg; 3632 } 3633 } else { 3634 for (i = 0; i < n; i++) { 3635 TCGReg reg = order[i]; 3636 if (s->reg_to_temp[reg] == NULL && 3637 tcg_regset_test_reg(set, reg)) { 3638 return reg; 3639 } 3640 } 3641 } 3642 } 3643 3644 /* We must spill something. */ 3645 for (j = f; j < 2; j++) { 3646 TCGRegSet set = reg_ct[j]; 3647 3648 if (tcg_regset_single(set)) { 3649 /* One register in the set. */ 3650 TCGReg reg = tcg_regset_first(set); 3651 tcg_reg_free(s, reg, allocated_regs); 3652 return reg; 3653 } else { 3654 for (i = 0; i < n; i++) { 3655 TCGReg reg = order[i]; 3656 if (tcg_regset_test_reg(set, reg)) { 3657 tcg_reg_free(s, reg, allocated_regs); 3658 return reg; 3659 } 3660 } 3661 } 3662 } 3663 3664 tcg_abort(); 3665 } 3666 3667 /* Make sure the temporary is in a register. If needed, allocate the register 3668 from DESIRED while avoiding ALLOCATED. */ 3669 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3670 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3671 { 3672 TCGReg reg; 3673 3674 switch (ts->val_type) { 3675 case TEMP_VAL_REG: 3676 return; 3677 case TEMP_VAL_CONST: 3678 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3679 preferred_regs, ts->indirect_base); 3680 if (ts->type <= TCG_TYPE_I64) { 3681 tcg_out_movi(s, ts->type, reg, ts->val); 3682 } else { 3683 uint64_t val = ts->val; 3684 MemOp vece = MO_64; 3685 3686 /* 3687 * Find the minimal vector element that matches the constant. 3688 * The targets will, in general, have to do this search anyway, 3689 * do this generically. 3690 */ 3691 if (val == dup_const(MO_8, val)) { 3692 vece = MO_8; 3693 } else if (val == dup_const(MO_16, val)) { 3694 vece = MO_16; 3695 } else if (val == dup_const(MO_32, val)) { 3696 vece = MO_32; 3697 } 3698 3699 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3700 } 3701 ts->mem_coherent = 0; 3702 break; 3703 case TEMP_VAL_MEM: 3704 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3705 preferred_regs, ts->indirect_base); 3706 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3707 ts->mem_coherent = 1; 3708 break; 3709 case TEMP_VAL_DEAD: 3710 default: 3711 tcg_abort(); 3712 } 3713 ts->reg = reg; 3714 ts->val_type = TEMP_VAL_REG; 3715 s->reg_to_temp[reg] = ts; 3716 } 3717 3718 /* Save a temporary to memory. 'allocated_regs' is used in case a 3719 temporary registers needs to be allocated to store a constant. */ 3720 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3721 { 3722 /* The liveness analysis already ensures that globals are back 3723 in memory. Keep an tcg_debug_assert for safety. */ 3724 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3725 } 3726 3727 /* save globals to their canonical location and assume they can be 3728 modified be the following code. 'allocated_regs' is used in case a 3729 temporary registers needs to be allocated to store a constant. */ 3730 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3731 { 3732 int i, n; 3733 3734 for (i = 0, n = s->nb_globals; i < n; i++) { 3735 temp_save(s, &s->temps[i], allocated_regs); 3736 } 3737 } 3738 3739 /* sync globals to their canonical location and assume they can be 3740 read by the following code. 'allocated_regs' is used in case a 3741 temporary registers needs to be allocated to store a constant. */ 3742 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3743 { 3744 int i, n; 3745 3746 for (i = 0, n = s->nb_globals; i < n; i++) { 3747 TCGTemp *ts = &s->temps[i]; 3748 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3749 || ts->kind == TEMP_FIXED 3750 || ts->mem_coherent); 3751 } 3752 } 3753 3754 /* at the end of a basic block, we assume all temporaries are dead and 3755 all globals are stored at their canonical location. */ 3756 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3757 { 3758 int i; 3759 3760 for (i = s->nb_globals; i < s->nb_temps; i++) { 3761 TCGTemp *ts = &s->temps[i]; 3762 3763 switch (ts->kind) { 3764 case TEMP_LOCAL: 3765 temp_save(s, ts, allocated_regs); 3766 break; 3767 case TEMP_NORMAL: 3768 /* The liveness analysis already ensures that temps are dead. 3769 Keep an tcg_debug_assert for safety. */ 3770 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3771 break; 3772 case TEMP_CONST: 3773 /* Similarly, we should have freed any allocated register. */ 3774 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3775 break; 3776 default: 3777 g_assert_not_reached(); 3778 } 3779 } 3780 3781 save_globals(s, allocated_regs); 3782 } 3783 3784 /* 3785 * At a conditional branch, we assume all temporaries are dead and 3786 * all globals and local temps are synced to their location. 3787 */ 3788 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3789 { 3790 sync_globals(s, allocated_regs); 3791 3792 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3793 TCGTemp *ts = &s->temps[i]; 3794 /* 3795 * The liveness analysis already ensures that temps are dead. 3796 * Keep tcg_debug_asserts for safety. 3797 */ 3798 switch (ts->kind) { 3799 case TEMP_LOCAL: 3800 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3801 break; 3802 case TEMP_NORMAL: 3803 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3804 break; 3805 case TEMP_CONST: 3806 break; 3807 default: 3808 g_assert_not_reached(); 3809 } 3810 } 3811 } 3812 3813 /* 3814 * Specialized code generation for INDEX_op_mov_* with a constant. 3815 */ 3816 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3817 tcg_target_ulong val, TCGLifeData arg_life, 3818 TCGRegSet preferred_regs) 3819 { 3820 /* ENV should not be modified. */ 3821 tcg_debug_assert(!temp_readonly(ots)); 3822 3823 /* The movi is not explicitly generated here. */ 3824 if (ots->val_type == TEMP_VAL_REG) { 3825 s->reg_to_temp[ots->reg] = NULL; 3826 } 3827 ots->val_type = TEMP_VAL_CONST; 3828 ots->val = val; 3829 ots->mem_coherent = 0; 3830 if (NEED_SYNC_ARG(0)) { 3831 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3832 } else if (IS_DEAD_ARG(0)) { 3833 temp_dead(s, ots); 3834 } 3835 } 3836 3837 /* 3838 * Specialized code generation for INDEX_op_mov_*. 3839 */ 3840 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3841 { 3842 const TCGLifeData arg_life = op->life; 3843 TCGRegSet allocated_regs, preferred_regs; 3844 TCGTemp *ts, *ots; 3845 TCGType otype, itype; 3846 3847 allocated_regs = s->reserved_regs; 3848 preferred_regs = op->output_pref[0]; 3849 ots = arg_temp(op->args[0]); 3850 ts = arg_temp(op->args[1]); 3851 3852 /* ENV should not be modified. */ 3853 tcg_debug_assert(!temp_readonly(ots)); 3854 3855 /* Note that otype != itype for no-op truncation. */ 3856 otype = ots->type; 3857 itype = ts->type; 3858 3859 if (ts->val_type == TEMP_VAL_CONST) { 3860 /* propagate constant or generate sti */ 3861 tcg_target_ulong val = ts->val; 3862 if (IS_DEAD_ARG(1)) { 3863 temp_dead(s, ts); 3864 } 3865 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3866 return; 3867 } 3868 3869 /* If the source value is in memory we're going to be forced 3870 to have it in a register in order to perform the copy. Copy 3871 the SOURCE value into its own register first, that way we 3872 don't have to reload SOURCE the next time it is used. */ 3873 if (ts->val_type == TEMP_VAL_MEM) { 3874 temp_load(s, ts, tcg_target_available_regs[itype], 3875 allocated_regs, preferred_regs); 3876 } 3877 3878 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3879 if (IS_DEAD_ARG(0)) { 3880 /* mov to a non-saved dead register makes no sense (even with 3881 liveness analysis disabled). */ 3882 tcg_debug_assert(NEED_SYNC_ARG(0)); 3883 if (!ots->mem_allocated) { 3884 temp_allocate_frame(s, ots); 3885 } 3886 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 3887 if (IS_DEAD_ARG(1)) { 3888 temp_dead(s, ts); 3889 } 3890 temp_dead(s, ots); 3891 } else { 3892 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 3893 /* the mov can be suppressed */ 3894 if (ots->val_type == TEMP_VAL_REG) { 3895 s->reg_to_temp[ots->reg] = NULL; 3896 } 3897 ots->reg = ts->reg; 3898 temp_dead(s, ts); 3899 } else { 3900 if (ots->val_type != TEMP_VAL_REG) { 3901 /* When allocating a new register, make sure to not spill the 3902 input one. */ 3903 tcg_regset_set_reg(allocated_regs, ts->reg); 3904 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3905 allocated_regs, preferred_regs, 3906 ots->indirect_base); 3907 } 3908 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { 3909 /* 3910 * Cross register class move not supported. 3911 * Store the source register into the destination slot 3912 * and leave the destination temp as TEMP_VAL_MEM. 3913 */ 3914 assert(!temp_readonly(ots)); 3915 if (!ts->mem_allocated) { 3916 temp_allocate_frame(s, ots); 3917 } 3918 tcg_out_st(s, ts->type, ts->reg, 3919 ots->mem_base->reg, ots->mem_offset); 3920 ots->mem_coherent = 1; 3921 temp_free_or_dead(s, ots, -1); 3922 return; 3923 } 3924 } 3925 ots->val_type = TEMP_VAL_REG; 3926 ots->mem_coherent = 0; 3927 s->reg_to_temp[ots->reg] = ots; 3928 if (NEED_SYNC_ARG(0)) { 3929 temp_sync(s, ots, allocated_regs, 0, 0); 3930 } 3931 } 3932 } 3933 3934 /* 3935 * Specialized code generation for INDEX_op_dup_vec. 3936 */ 3937 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 3938 { 3939 const TCGLifeData arg_life = op->life; 3940 TCGRegSet dup_out_regs, dup_in_regs; 3941 TCGTemp *its, *ots; 3942 TCGType itype, vtype; 3943 intptr_t endian_fixup; 3944 unsigned vece; 3945 bool ok; 3946 3947 ots = arg_temp(op->args[0]); 3948 its = arg_temp(op->args[1]); 3949 3950 /* ENV should not be modified. */ 3951 tcg_debug_assert(!temp_readonly(ots)); 3952 3953 itype = its->type; 3954 vece = TCGOP_VECE(op); 3955 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3956 3957 if (its->val_type == TEMP_VAL_CONST) { 3958 /* Propagate constant via movi -> dupi. */ 3959 tcg_target_ulong val = its->val; 3960 if (IS_DEAD_ARG(1)) { 3961 temp_dead(s, its); 3962 } 3963 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); 3964 return; 3965 } 3966 3967 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3968 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 3969 3970 /* Allocate the output register now. */ 3971 if (ots->val_type != TEMP_VAL_REG) { 3972 TCGRegSet allocated_regs = s->reserved_regs; 3973 3974 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 3975 /* Make sure to not spill the input register. */ 3976 tcg_regset_set_reg(allocated_regs, its->reg); 3977 } 3978 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3979 op->output_pref[0], ots->indirect_base); 3980 ots->val_type = TEMP_VAL_REG; 3981 ots->mem_coherent = 0; 3982 s->reg_to_temp[ots->reg] = ots; 3983 } 3984 3985 switch (its->val_type) { 3986 case TEMP_VAL_REG: 3987 /* 3988 * The dup constriaints must be broad, covering all possible VECE. 3989 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 3990 * to fail, indicating that extra moves are required for that case. 3991 */ 3992 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 3993 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 3994 goto done; 3995 } 3996 /* Try again from memory or a vector input register. */ 3997 } 3998 if (!its->mem_coherent) { 3999 /* 4000 * The input register is not synced, and so an extra store 4001 * would be required to use memory. Attempt an integer-vector 4002 * register move first. We do not have a TCGRegSet for this. 4003 */ 4004 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 4005 break; 4006 } 4007 /* Sync the temp back to its slot and load from there. */ 4008 temp_sync(s, its, s->reserved_regs, 0, 0); 4009 } 4010 /* fall through */ 4011 4012 case TEMP_VAL_MEM: 4013 #ifdef HOST_WORDS_BIGENDIAN 4014 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; 4015 endian_fixup -= 1 << vece; 4016 #else 4017 endian_fixup = 0; 4018 #endif 4019 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 4020 its->mem_offset + endian_fixup)) { 4021 goto done; 4022 } 4023 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 4024 break; 4025 4026 default: 4027 g_assert_not_reached(); 4028 } 4029 4030 /* We now have a vector input register, so dup must succeed. */ 4031 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 4032 tcg_debug_assert(ok); 4033 4034 done: 4035 if (IS_DEAD_ARG(1)) { 4036 temp_dead(s, its); 4037 } 4038 if (NEED_SYNC_ARG(0)) { 4039 temp_sync(s, ots, s->reserved_regs, 0, 0); 4040 } 4041 if (IS_DEAD_ARG(0)) { 4042 temp_dead(s, ots); 4043 } 4044 } 4045 4046 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 4047 { 4048 const TCGLifeData arg_life = op->life; 4049 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 4050 TCGRegSet i_allocated_regs; 4051 TCGRegSet o_allocated_regs; 4052 int i, k, nb_iargs, nb_oargs; 4053 TCGReg reg; 4054 TCGArg arg; 4055 const TCGArgConstraint *arg_ct; 4056 TCGTemp *ts; 4057 TCGArg new_args[TCG_MAX_OP_ARGS]; 4058 int const_args[TCG_MAX_OP_ARGS]; 4059 4060 nb_oargs = def->nb_oargs; 4061 nb_iargs = def->nb_iargs; 4062 4063 /* copy constants */ 4064 memcpy(new_args + nb_oargs + nb_iargs, 4065 op->args + nb_oargs + nb_iargs, 4066 sizeof(TCGArg) * def->nb_cargs); 4067 4068 i_allocated_regs = s->reserved_regs; 4069 o_allocated_regs = s->reserved_regs; 4070 4071 /* satisfy input constraints */ 4072 for (k = 0; k < nb_iargs; k++) { 4073 TCGRegSet i_preferred_regs, o_preferred_regs; 4074 4075 i = def->args_ct[nb_oargs + k].sort_index; 4076 arg = op->args[i]; 4077 arg_ct = &def->args_ct[i]; 4078 ts = arg_temp(arg); 4079 4080 if (ts->val_type == TEMP_VAL_CONST 4081 && tcg_target_const_match(ts->val, ts->type, arg_ct)) { 4082 /* constant is OK for instruction */ 4083 const_args[i] = 1; 4084 new_args[i] = ts->val; 4085 continue; 4086 } 4087 4088 i_preferred_regs = o_preferred_regs = 0; 4089 if (arg_ct->ialias) { 4090 o_preferred_regs = op->output_pref[arg_ct->alias_index]; 4091 4092 /* 4093 * If the input is readonly, then it cannot also be an 4094 * output and aliased to itself. If the input is not 4095 * dead after the instruction, we must allocate a new 4096 * register and move it. 4097 */ 4098 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 4099 goto allocate_in_reg; 4100 } 4101 4102 /* 4103 * Check if the current register has already been allocated 4104 * for another input aliased to an output. 4105 */ 4106 if (ts->val_type == TEMP_VAL_REG) { 4107 reg = ts->reg; 4108 for (int k2 = 0; k2 < k; k2++) { 4109 int i2 = def->args_ct[nb_oargs + k2].sort_index; 4110 if (def->args_ct[i2].ialias && reg == new_args[i2]) { 4111 goto allocate_in_reg; 4112 } 4113 } 4114 } 4115 i_preferred_regs = o_preferred_regs; 4116 } 4117 4118 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); 4119 reg = ts->reg; 4120 4121 if (!tcg_regset_test_reg(arg_ct->regs, reg)) { 4122 allocate_in_reg: 4123 /* 4124 * Allocate a new register matching the constraint 4125 * and move the temporary register into it. 4126 */ 4127 temp_load(s, ts, tcg_target_available_regs[ts->type], 4128 i_allocated_regs, 0); 4129 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, 4130 o_preferred_regs, ts->indirect_base); 4131 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4132 /* 4133 * Cross register class move not supported. Sync the 4134 * temp back to its slot and load from there. 4135 */ 4136 temp_sync(s, ts, i_allocated_regs, 0, 0); 4137 tcg_out_ld(s, ts->type, reg, 4138 ts->mem_base->reg, ts->mem_offset); 4139 } 4140 } 4141 new_args[i] = reg; 4142 const_args[i] = 0; 4143 tcg_regset_set_reg(i_allocated_regs, reg); 4144 } 4145 4146 /* mark dead temporaries and free the associated registers */ 4147 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4148 if (IS_DEAD_ARG(i)) { 4149 temp_dead(s, arg_temp(op->args[i])); 4150 } 4151 } 4152 4153 if (def->flags & TCG_OPF_COND_BRANCH) { 4154 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4155 } else if (def->flags & TCG_OPF_BB_END) { 4156 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4157 } else { 4158 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4159 /* XXX: permit generic clobber register list ? */ 4160 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4161 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4162 tcg_reg_free(s, i, i_allocated_regs); 4163 } 4164 } 4165 } 4166 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4167 /* sync globals if the op has side effects and might trigger 4168 an exception. */ 4169 sync_globals(s, i_allocated_regs); 4170 } 4171 4172 /* satisfy the output constraints */ 4173 for(k = 0; k < nb_oargs; k++) { 4174 i = def->args_ct[k].sort_index; 4175 arg = op->args[i]; 4176 arg_ct = &def->args_ct[i]; 4177 ts = arg_temp(arg); 4178 4179 /* ENV should not be modified. */ 4180 tcg_debug_assert(!temp_readonly(ts)); 4181 4182 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4183 reg = new_args[arg_ct->alias_index]; 4184 } else if (arg_ct->newreg) { 4185 reg = tcg_reg_alloc(s, arg_ct->regs, 4186 i_allocated_regs | o_allocated_regs, 4187 op->output_pref[k], ts->indirect_base); 4188 } else { 4189 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4190 op->output_pref[k], ts->indirect_base); 4191 } 4192 tcg_regset_set_reg(o_allocated_regs, reg); 4193 if (ts->val_type == TEMP_VAL_REG) { 4194 s->reg_to_temp[ts->reg] = NULL; 4195 } 4196 ts->val_type = TEMP_VAL_REG; 4197 ts->reg = reg; 4198 /* 4199 * Temp value is modified, so the value kept in memory is 4200 * potentially not the same. 4201 */ 4202 ts->mem_coherent = 0; 4203 s->reg_to_temp[reg] = ts; 4204 new_args[i] = reg; 4205 } 4206 } 4207 4208 /* emit instruction */ 4209 if (def->flags & TCG_OPF_VECTOR) { 4210 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4211 new_args, const_args); 4212 } else { 4213 tcg_out_op(s, op->opc, new_args, const_args); 4214 } 4215 4216 /* move the outputs in the correct register if needed */ 4217 for(i = 0; i < nb_oargs; i++) { 4218 ts = arg_temp(op->args[i]); 4219 4220 /* ENV should not be modified. */ 4221 tcg_debug_assert(!temp_readonly(ts)); 4222 4223 if (NEED_SYNC_ARG(i)) { 4224 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4225 } else if (IS_DEAD_ARG(i)) { 4226 temp_dead(s, ts); 4227 } 4228 } 4229 } 4230 4231 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4232 { 4233 const TCGLifeData arg_life = op->life; 4234 TCGTemp *ots, *itsl, *itsh; 4235 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4236 4237 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4238 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4239 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4240 4241 ots = arg_temp(op->args[0]); 4242 itsl = arg_temp(op->args[1]); 4243 itsh = arg_temp(op->args[2]); 4244 4245 /* ENV should not be modified. */ 4246 tcg_debug_assert(!temp_readonly(ots)); 4247 4248 /* Allocate the output register now. */ 4249 if (ots->val_type != TEMP_VAL_REG) { 4250 TCGRegSet allocated_regs = s->reserved_regs; 4251 TCGRegSet dup_out_regs = 4252 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4253 4254 /* Make sure to not spill the input registers. */ 4255 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4256 tcg_regset_set_reg(allocated_regs, itsl->reg); 4257 } 4258 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4259 tcg_regset_set_reg(allocated_regs, itsh->reg); 4260 } 4261 4262 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4263 op->output_pref[0], ots->indirect_base); 4264 ots->val_type = TEMP_VAL_REG; 4265 ots->mem_coherent = 0; 4266 s->reg_to_temp[ots->reg] = ots; 4267 } 4268 4269 /* Promote dup2 of immediates to dupi_vec. */ 4270 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 4271 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 4272 MemOp vece = MO_64; 4273 4274 if (val == dup_const(MO_8, val)) { 4275 vece = MO_8; 4276 } else if (val == dup_const(MO_16, val)) { 4277 vece = MO_16; 4278 } else if (val == dup_const(MO_32, val)) { 4279 vece = MO_32; 4280 } 4281 4282 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 4283 goto done; 4284 } 4285 4286 /* If the two inputs form one 64-bit value, try dupm_vec. */ 4287 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { 4288 if (!itsl->mem_coherent) { 4289 temp_sync(s, itsl, s->reserved_regs, 0, 0); 4290 } 4291 if (!itsh->mem_coherent) { 4292 temp_sync(s, itsh, s->reserved_regs, 0, 0); 4293 } 4294 #ifdef HOST_WORDS_BIGENDIAN 4295 TCGTemp *its = itsh; 4296 #else 4297 TCGTemp *its = itsl; 4298 #endif 4299 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 4300 its->mem_base->reg, its->mem_offset)) { 4301 goto done; 4302 } 4303 } 4304 4305 /* Fall back to generic expansion. */ 4306 return false; 4307 4308 done: 4309 if (IS_DEAD_ARG(1)) { 4310 temp_dead(s, itsl); 4311 } 4312 if (IS_DEAD_ARG(2)) { 4313 temp_dead(s, itsh); 4314 } 4315 if (NEED_SYNC_ARG(0)) { 4316 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 4317 } else if (IS_DEAD_ARG(0)) { 4318 temp_dead(s, ots); 4319 } 4320 return true; 4321 } 4322 4323 #ifdef TCG_TARGET_STACK_GROWSUP 4324 #define STACK_DIR(x) (-(x)) 4325 #else 4326 #define STACK_DIR(x) (x) 4327 #endif 4328 4329 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 4330 { 4331 const int nb_oargs = TCGOP_CALLO(op); 4332 const int nb_iargs = TCGOP_CALLI(op); 4333 const TCGLifeData arg_life = op->life; 4334 int flags, nb_regs, i; 4335 TCGReg reg; 4336 TCGArg arg; 4337 TCGTemp *ts; 4338 intptr_t stack_offset; 4339 size_t call_stack_size; 4340 tcg_insn_unit *func_addr; 4341 int allocate_args; 4342 TCGRegSet allocated_regs; 4343 4344 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; 4345 flags = op->args[nb_oargs + nb_iargs + 1]; 4346 4347 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 4348 if (nb_regs > nb_iargs) { 4349 nb_regs = nb_iargs; 4350 } 4351 4352 /* assign stack slots first */ 4353 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 4354 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 4355 ~(TCG_TARGET_STACK_ALIGN - 1); 4356 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 4357 if (allocate_args) { 4358 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 4359 preallocate call stack */ 4360 tcg_abort(); 4361 } 4362 4363 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 4364 for (i = nb_regs; i < nb_iargs; i++) { 4365 arg = op->args[nb_oargs + i]; 4366 #ifdef TCG_TARGET_STACK_GROWSUP 4367 stack_offset -= sizeof(tcg_target_long); 4368 #endif 4369 if (arg != TCG_CALL_DUMMY_ARG) { 4370 ts = arg_temp(arg); 4371 temp_load(s, ts, tcg_target_available_regs[ts->type], 4372 s->reserved_regs, 0); 4373 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 4374 } 4375 #ifndef TCG_TARGET_STACK_GROWSUP 4376 stack_offset += sizeof(tcg_target_long); 4377 #endif 4378 } 4379 4380 /* assign input registers */ 4381 allocated_regs = s->reserved_regs; 4382 for (i = 0; i < nb_regs; i++) { 4383 arg = op->args[nb_oargs + i]; 4384 if (arg != TCG_CALL_DUMMY_ARG) { 4385 ts = arg_temp(arg); 4386 reg = tcg_target_call_iarg_regs[i]; 4387 4388 if (ts->val_type == TEMP_VAL_REG) { 4389 if (ts->reg != reg) { 4390 tcg_reg_free(s, reg, allocated_regs); 4391 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4392 /* 4393 * Cross register class move not supported. Sync the 4394 * temp back to its slot and load from there. 4395 */ 4396 temp_sync(s, ts, allocated_regs, 0, 0); 4397 tcg_out_ld(s, ts->type, reg, 4398 ts->mem_base->reg, ts->mem_offset); 4399 } 4400 } 4401 } else { 4402 TCGRegSet arg_set = 0; 4403 4404 tcg_reg_free(s, reg, allocated_regs); 4405 tcg_regset_set_reg(arg_set, reg); 4406 temp_load(s, ts, arg_set, allocated_regs, 0); 4407 } 4408 4409 tcg_regset_set_reg(allocated_regs, reg); 4410 } 4411 } 4412 4413 /* mark dead temporaries and free the associated registers */ 4414 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4415 if (IS_DEAD_ARG(i)) { 4416 temp_dead(s, arg_temp(op->args[i])); 4417 } 4418 } 4419 4420 /* clobber call registers */ 4421 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4422 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4423 tcg_reg_free(s, i, allocated_regs); 4424 } 4425 } 4426 4427 /* Save globals if they might be written by the helper, sync them if 4428 they might be read. */ 4429 if (flags & TCG_CALL_NO_READ_GLOBALS) { 4430 /* Nothing to do */ 4431 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 4432 sync_globals(s, allocated_regs); 4433 } else { 4434 save_globals(s, allocated_regs); 4435 } 4436 4437 tcg_out_call(s, func_addr); 4438 4439 /* assign output registers and emit moves if needed */ 4440 for(i = 0; i < nb_oargs; i++) { 4441 arg = op->args[i]; 4442 ts = arg_temp(arg); 4443 4444 /* ENV should not be modified. */ 4445 tcg_debug_assert(!temp_readonly(ts)); 4446 4447 reg = tcg_target_call_oarg_regs[i]; 4448 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 4449 if (ts->val_type == TEMP_VAL_REG) { 4450 s->reg_to_temp[ts->reg] = NULL; 4451 } 4452 ts->val_type = TEMP_VAL_REG; 4453 ts->reg = reg; 4454 ts->mem_coherent = 0; 4455 s->reg_to_temp[reg] = ts; 4456 if (NEED_SYNC_ARG(i)) { 4457 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); 4458 } else if (IS_DEAD_ARG(i)) { 4459 temp_dead(s, ts); 4460 } 4461 } 4462 } 4463 4464 #ifdef CONFIG_PROFILER 4465 4466 /* avoid copy/paste errors */ 4467 #define PROF_ADD(to, from, field) \ 4468 do { \ 4469 (to)->field += qatomic_read(&((from)->field)); \ 4470 } while (0) 4471 4472 #define PROF_MAX(to, from, field) \ 4473 do { \ 4474 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4475 if (val__ > (to)->field) { \ 4476 (to)->field = val__; \ 4477 } \ 4478 } while (0) 4479 4480 /* Pass in a zero'ed @prof */ 4481 static inline 4482 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4483 { 4484 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 4485 unsigned int i; 4486 4487 for (i = 0; i < n_ctxs; i++) { 4488 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4489 const TCGProfile *orig = &s->prof; 4490 4491 if (counters) { 4492 PROF_ADD(prof, orig, cpu_exec_time); 4493 PROF_ADD(prof, orig, tb_count1); 4494 PROF_ADD(prof, orig, tb_count); 4495 PROF_ADD(prof, orig, op_count); 4496 PROF_MAX(prof, orig, op_count_max); 4497 PROF_ADD(prof, orig, temp_count); 4498 PROF_MAX(prof, orig, temp_count_max); 4499 PROF_ADD(prof, orig, del_op_count); 4500 PROF_ADD(prof, orig, code_in_len); 4501 PROF_ADD(prof, orig, code_out_len); 4502 PROF_ADD(prof, orig, search_out_len); 4503 PROF_ADD(prof, orig, interm_time); 4504 PROF_ADD(prof, orig, code_time); 4505 PROF_ADD(prof, orig, la_time); 4506 PROF_ADD(prof, orig, opt_time); 4507 PROF_ADD(prof, orig, restore_count); 4508 PROF_ADD(prof, orig, restore_time); 4509 } 4510 if (table) { 4511 int i; 4512 4513 for (i = 0; i < NB_OPS; i++) { 4514 PROF_ADD(prof, orig, table_op_count[i]); 4515 } 4516 } 4517 } 4518 } 4519 4520 #undef PROF_ADD 4521 #undef PROF_MAX 4522 4523 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4524 { 4525 tcg_profile_snapshot(prof, true, false); 4526 } 4527 4528 static void tcg_profile_snapshot_table(TCGProfile *prof) 4529 { 4530 tcg_profile_snapshot(prof, false, true); 4531 } 4532 4533 void tcg_dump_op_count(void) 4534 { 4535 TCGProfile prof = {}; 4536 int i; 4537 4538 tcg_profile_snapshot_table(&prof); 4539 for (i = 0; i < NB_OPS; i++) { 4540 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, 4541 prof.table_op_count[i]); 4542 } 4543 } 4544 4545 int64_t tcg_cpu_exec_time(void) 4546 { 4547 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 4548 unsigned int i; 4549 int64_t ret = 0; 4550 4551 for (i = 0; i < n_ctxs; i++) { 4552 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4553 const TCGProfile *prof = &s->prof; 4554 4555 ret += qatomic_read(&prof->cpu_exec_time); 4556 } 4557 return ret; 4558 } 4559 #else 4560 void tcg_dump_op_count(void) 4561 { 4562 qemu_printf("[TCG profiler not compiled]\n"); 4563 } 4564 4565 int64_t tcg_cpu_exec_time(void) 4566 { 4567 error_report("%s: TCG profiler not compiled", __func__); 4568 exit(EXIT_FAILURE); 4569 } 4570 #endif 4571 4572 4573 int tcg_gen_code(TCGContext *s, TranslationBlock *tb) 4574 { 4575 #ifdef CONFIG_PROFILER 4576 TCGProfile *prof = &s->prof; 4577 #endif 4578 int i, num_insns; 4579 TCGOp *op; 4580 4581 #ifdef CONFIG_PROFILER 4582 { 4583 int n = 0; 4584 4585 QTAILQ_FOREACH(op, &s->ops, link) { 4586 n++; 4587 } 4588 qatomic_set(&prof->op_count, prof->op_count + n); 4589 if (n > prof->op_count_max) { 4590 qatomic_set(&prof->op_count_max, n); 4591 } 4592 4593 n = s->nb_temps; 4594 qatomic_set(&prof->temp_count, prof->temp_count + n); 4595 if (n > prof->temp_count_max) { 4596 qatomic_set(&prof->temp_count_max, n); 4597 } 4598 } 4599 #endif 4600 4601 #ifdef DEBUG_DISAS 4602 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4603 && qemu_log_in_addr_range(tb->pc))) { 4604 FILE *logfile = qemu_log_lock(); 4605 qemu_log("OP:\n"); 4606 tcg_dump_ops(s, false); 4607 qemu_log("\n"); 4608 qemu_log_unlock(logfile); 4609 } 4610 #endif 4611 4612 #ifdef CONFIG_DEBUG_TCG 4613 /* Ensure all labels referenced have been emitted. */ 4614 { 4615 TCGLabel *l; 4616 bool error = false; 4617 4618 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4619 if (unlikely(!l->present) && l->refs) { 4620 qemu_log_mask(CPU_LOG_TB_OP, 4621 "$L%d referenced but not present.\n", l->id); 4622 error = true; 4623 } 4624 } 4625 assert(!error); 4626 } 4627 #endif 4628 4629 #ifdef CONFIG_PROFILER 4630 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4631 #endif 4632 4633 #ifdef USE_TCG_OPTIMIZATIONS 4634 tcg_optimize(s); 4635 #endif 4636 4637 #ifdef CONFIG_PROFILER 4638 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4639 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4640 #endif 4641 4642 reachable_code_pass(s); 4643 liveness_pass_1(s); 4644 4645 if (s->nb_indirects > 0) { 4646 #ifdef DEBUG_DISAS 4647 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4648 && qemu_log_in_addr_range(tb->pc))) { 4649 FILE *logfile = qemu_log_lock(); 4650 qemu_log("OP before indirect lowering:\n"); 4651 tcg_dump_ops(s, false); 4652 qemu_log("\n"); 4653 qemu_log_unlock(logfile); 4654 } 4655 #endif 4656 /* Replace indirect temps with direct temps. */ 4657 if (liveness_pass_2(s)) { 4658 /* If changes were made, re-run liveness. */ 4659 liveness_pass_1(s); 4660 } 4661 } 4662 4663 #ifdef CONFIG_PROFILER 4664 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4665 #endif 4666 4667 #ifdef DEBUG_DISAS 4668 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4669 && qemu_log_in_addr_range(tb->pc))) { 4670 FILE *logfile = qemu_log_lock(); 4671 qemu_log("OP after optimization and liveness analysis:\n"); 4672 tcg_dump_ops(s, true); 4673 qemu_log("\n"); 4674 qemu_log_unlock(logfile); 4675 } 4676 #endif 4677 4678 tcg_reg_alloc_start(s); 4679 4680 /* 4681 * Reset the buffer pointers when restarting after overflow. 4682 * TODO: Move this into translate-all.c with the rest of the 4683 * buffer management. Having only this done here is confusing. 4684 */ 4685 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 4686 s->code_ptr = s->code_buf; 4687 4688 #ifdef TCG_TARGET_NEED_LDST_LABELS 4689 QSIMPLEQ_INIT(&s->ldst_labels); 4690 #endif 4691 #ifdef TCG_TARGET_NEED_POOL_LABELS 4692 s->pool_labels = NULL; 4693 #endif 4694 4695 num_insns = -1; 4696 QTAILQ_FOREACH(op, &s->ops, link) { 4697 TCGOpcode opc = op->opc; 4698 4699 #ifdef CONFIG_PROFILER 4700 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 4701 #endif 4702 4703 switch (opc) { 4704 case INDEX_op_mov_i32: 4705 case INDEX_op_mov_i64: 4706 case INDEX_op_mov_vec: 4707 tcg_reg_alloc_mov(s, op); 4708 break; 4709 case INDEX_op_dup_vec: 4710 tcg_reg_alloc_dup(s, op); 4711 break; 4712 case INDEX_op_insn_start: 4713 if (num_insns >= 0) { 4714 size_t off = tcg_current_code_size(s); 4715 s->gen_insn_end_off[num_insns] = off; 4716 /* Assert that we do not overflow our stored offset. */ 4717 assert(s->gen_insn_end_off[num_insns] == off); 4718 } 4719 num_insns++; 4720 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 4721 target_ulong a; 4722 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 4723 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 4724 #else 4725 a = op->args[i]; 4726 #endif 4727 s->gen_insn_data[num_insns][i] = a; 4728 } 4729 break; 4730 case INDEX_op_discard: 4731 temp_dead(s, arg_temp(op->args[0])); 4732 break; 4733 case INDEX_op_set_label: 4734 tcg_reg_alloc_bb_end(s, s->reserved_regs); 4735 tcg_out_label(s, arg_label(op->args[0])); 4736 break; 4737 case INDEX_op_call: 4738 tcg_reg_alloc_call(s, op); 4739 break; 4740 case INDEX_op_dup2_vec: 4741 if (tcg_reg_alloc_dup2(s, op)) { 4742 break; 4743 } 4744 /* fall through */ 4745 default: 4746 /* Sanity check that we've not introduced any unhandled opcodes. */ 4747 tcg_debug_assert(tcg_op_supported(opc)); 4748 /* Note: in order to speed up the code, it would be much 4749 faster to have specialized register allocator functions for 4750 some common argument patterns */ 4751 tcg_reg_alloc_op(s, op); 4752 break; 4753 } 4754 #ifdef CONFIG_DEBUG_TCG 4755 check_regs(s); 4756 #endif 4757 /* Test for (pending) buffer overflow. The assumption is that any 4758 one operation beginning below the high water mark cannot overrun 4759 the buffer completely. Thus we can test for overflow after 4760 generating code without having to check during generation. */ 4761 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 4762 return -1; 4763 } 4764 /* Test for TB overflow, as seen by gen_insn_end_off. */ 4765 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 4766 return -2; 4767 } 4768 } 4769 tcg_debug_assert(num_insns >= 0); 4770 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 4771 4772 /* Generate TB finalization at the end of block */ 4773 #ifdef TCG_TARGET_NEED_LDST_LABELS 4774 i = tcg_out_ldst_finalize(s); 4775 if (i < 0) { 4776 return i; 4777 } 4778 #endif 4779 #ifdef TCG_TARGET_NEED_POOL_LABELS 4780 i = tcg_out_pool_finalize(s); 4781 if (i < 0) { 4782 return i; 4783 } 4784 #endif 4785 if (!tcg_resolve_relocs(s)) { 4786 return -2; 4787 } 4788 4789 #ifndef CONFIG_TCG_INTERPRETER 4790 /* flush instruction cache */ 4791 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 4792 (uintptr_t)s->code_buf, 4793 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 4794 #endif 4795 4796 return tcg_current_code_size(s); 4797 } 4798 4799 #ifdef CONFIG_PROFILER 4800 void tcg_dump_info(void) 4801 { 4802 TCGProfile prof = {}; 4803 const TCGProfile *s; 4804 int64_t tb_count; 4805 int64_t tb_div_count; 4806 int64_t tot; 4807 4808 tcg_profile_snapshot_counters(&prof); 4809 s = &prof; 4810 tb_count = s->tb_count; 4811 tb_div_count = tb_count ? tb_count : 1; 4812 tot = s->interm_time + s->code_time; 4813 4814 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", 4815 tot, tot / 2.4e9); 4816 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64 4817 " %0.1f%%)\n", 4818 tb_count, s->tb_count1 - tb_count, 4819 (double)(s->tb_count1 - s->tb_count) 4820 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 4821 qemu_printf("avg ops/TB %0.1f max=%d\n", 4822 (double)s->op_count / tb_div_count, s->op_count_max); 4823 qemu_printf("deleted ops/TB %0.2f\n", 4824 (double)s->del_op_count / tb_div_count); 4825 qemu_printf("avg temps/TB %0.2f max=%d\n", 4826 (double)s->temp_count / tb_div_count, s->temp_count_max); 4827 qemu_printf("avg host code/TB %0.1f\n", 4828 (double)s->code_out_len / tb_div_count); 4829 qemu_printf("avg search data/TB %0.1f\n", 4830 (double)s->search_out_len / tb_div_count); 4831 4832 qemu_printf("cycles/op %0.1f\n", 4833 s->op_count ? (double)tot / s->op_count : 0); 4834 qemu_printf("cycles/in byte %0.1f\n", 4835 s->code_in_len ? (double)tot / s->code_in_len : 0); 4836 qemu_printf("cycles/out byte %0.1f\n", 4837 s->code_out_len ? (double)tot / s->code_out_len : 0); 4838 qemu_printf("cycles/search byte %0.1f\n", 4839 s->search_out_len ? (double)tot / s->search_out_len : 0); 4840 if (tot == 0) { 4841 tot = 1; 4842 } 4843 qemu_printf(" gen_interm time %0.1f%%\n", 4844 (double)s->interm_time / tot * 100.0); 4845 qemu_printf(" gen_code time %0.1f%%\n", 4846 (double)s->code_time / tot * 100.0); 4847 qemu_printf("optim./code time %0.1f%%\n", 4848 (double)s->opt_time / (s->code_time ? s->code_time : 1) 4849 * 100.0); 4850 qemu_printf("liveness/code time %0.1f%%\n", 4851 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); 4852 qemu_printf("cpu_restore count %" PRId64 "\n", 4853 s->restore_count); 4854 qemu_printf(" avg cycles %0.1f\n", 4855 s->restore_count ? (double)s->restore_time / s->restore_count : 0); 4856 } 4857 #else 4858 void tcg_dump_info(void) 4859 { 4860 qemu_printf("[TCG profiler not compiled]\n"); 4861 } 4862 #endif 4863 4864 #ifdef ELF_HOST_MACHINE 4865 /* In order to use this feature, the backend needs to do three things: 4866 4867 (1) Define ELF_HOST_MACHINE to indicate both what value to 4868 put into the ELF image and to indicate support for the feature. 4869 4870 (2) Define tcg_register_jit. This should create a buffer containing 4871 the contents of a .debug_frame section that describes the post- 4872 prologue unwind info for the tcg machine. 4873 4874 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 4875 */ 4876 4877 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 4878 typedef enum { 4879 JIT_NOACTION = 0, 4880 JIT_REGISTER_FN, 4881 JIT_UNREGISTER_FN 4882 } jit_actions_t; 4883 4884 struct jit_code_entry { 4885 struct jit_code_entry *next_entry; 4886 struct jit_code_entry *prev_entry; 4887 const void *symfile_addr; 4888 uint64_t symfile_size; 4889 }; 4890 4891 struct jit_descriptor { 4892 uint32_t version; 4893 uint32_t action_flag; 4894 struct jit_code_entry *relevant_entry; 4895 struct jit_code_entry *first_entry; 4896 }; 4897 4898 void __jit_debug_register_code(void) __attribute__((noinline)); 4899 void __jit_debug_register_code(void) 4900 { 4901 asm(""); 4902 } 4903 4904 /* Must statically initialize the version, because GDB may check 4905 the version before we can set it. */ 4906 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 4907 4908 /* End GDB interface. */ 4909 4910 static int find_string(const char *strtab, const char *str) 4911 { 4912 const char *p = strtab + 1; 4913 4914 while (1) { 4915 if (strcmp(p, str) == 0) { 4916 return p - strtab; 4917 } 4918 p += strlen(p) + 1; 4919 } 4920 } 4921 4922 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 4923 const void *debug_frame, 4924 size_t debug_frame_size) 4925 { 4926 struct __attribute__((packed)) DebugInfo { 4927 uint32_t len; 4928 uint16_t version; 4929 uint32_t abbrev; 4930 uint8_t ptr_size; 4931 uint8_t cu_die; 4932 uint16_t cu_lang; 4933 uintptr_t cu_low_pc; 4934 uintptr_t cu_high_pc; 4935 uint8_t fn_die; 4936 char fn_name[16]; 4937 uintptr_t fn_low_pc; 4938 uintptr_t fn_high_pc; 4939 uint8_t cu_eoc; 4940 }; 4941 4942 struct ElfImage { 4943 ElfW(Ehdr) ehdr; 4944 ElfW(Phdr) phdr; 4945 ElfW(Shdr) shdr[7]; 4946 ElfW(Sym) sym[2]; 4947 struct DebugInfo di; 4948 uint8_t da[24]; 4949 char str[80]; 4950 }; 4951 4952 struct ElfImage *img; 4953 4954 static const struct ElfImage img_template = { 4955 .ehdr = { 4956 .e_ident[EI_MAG0] = ELFMAG0, 4957 .e_ident[EI_MAG1] = ELFMAG1, 4958 .e_ident[EI_MAG2] = ELFMAG2, 4959 .e_ident[EI_MAG3] = ELFMAG3, 4960 .e_ident[EI_CLASS] = ELF_CLASS, 4961 .e_ident[EI_DATA] = ELF_DATA, 4962 .e_ident[EI_VERSION] = EV_CURRENT, 4963 .e_type = ET_EXEC, 4964 .e_machine = ELF_HOST_MACHINE, 4965 .e_version = EV_CURRENT, 4966 .e_phoff = offsetof(struct ElfImage, phdr), 4967 .e_shoff = offsetof(struct ElfImage, shdr), 4968 .e_ehsize = sizeof(ElfW(Shdr)), 4969 .e_phentsize = sizeof(ElfW(Phdr)), 4970 .e_phnum = 1, 4971 .e_shentsize = sizeof(ElfW(Shdr)), 4972 .e_shnum = ARRAY_SIZE(img->shdr), 4973 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 4974 #ifdef ELF_HOST_FLAGS 4975 .e_flags = ELF_HOST_FLAGS, 4976 #endif 4977 #ifdef ELF_OSABI 4978 .e_ident[EI_OSABI] = ELF_OSABI, 4979 #endif 4980 }, 4981 .phdr = { 4982 .p_type = PT_LOAD, 4983 .p_flags = PF_X, 4984 }, 4985 .shdr = { 4986 [0] = { .sh_type = SHT_NULL }, 4987 /* Trick: The contents of code_gen_buffer are not present in 4988 this fake ELF file; that got allocated elsewhere. Therefore 4989 we mark .text as SHT_NOBITS (similar to .bss) so that readers 4990 will not look for contents. We can record any address. */ 4991 [1] = { /* .text */ 4992 .sh_type = SHT_NOBITS, 4993 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 4994 }, 4995 [2] = { /* .debug_info */ 4996 .sh_type = SHT_PROGBITS, 4997 .sh_offset = offsetof(struct ElfImage, di), 4998 .sh_size = sizeof(struct DebugInfo), 4999 }, 5000 [3] = { /* .debug_abbrev */ 5001 .sh_type = SHT_PROGBITS, 5002 .sh_offset = offsetof(struct ElfImage, da), 5003 .sh_size = sizeof(img->da), 5004 }, 5005 [4] = { /* .debug_frame */ 5006 .sh_type = SHT_PROGBITS, 5007 .sh_offset = sizeof(struct ElfImage), 5008 }, 5009 [5] = { /* .symtab */ 5010 .sh_type = SHT_SYMTAB, 5011 .sh_offset = offsetof(struct ElfImage, sym), 5012 .sh_size = sizeof(img->sym), 5013 .sh_info = 1, 5014 .sh_link = ARRAY_SIZE(img->shdr) - 1, 5015 .sh_entsize = sizeof(ElfW(Sym)), 5016 }, 5017 [6] = { /* .strtab */ 5018 .sh_type = SHT_STRTAB, 5019 .sh_offset = offsetof(struct ElfImage, str), 5020 .sh_size = sizeof(img->str), 5021 } 5022 }, 5023 .sym = { 5024 [1] = { /* code_gen_buffer */ 5025 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 5026 .st_shndx = 1, 5027 } 5028 }, 5029 .di = { 5030 .len = sizeof(struct DebugInfo) - 4, 5031 .version = 2, 5032 .ptr_size = sizeof(void *), 5033 .cu_die = 1, 5034 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 5035 .fn_die = 2, 5036 .fn_name = "code_gen_buffer" 5037 }, 5038 .da = { 5039 1, /* abbrev number (the cu) */ 5040 0x11, 1, /* DW_TAG_compile_unit, has children */ 5041 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 5042 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5043 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5044 0, 0, /* end of abbrev */ 5045 2, /* abbrev number (the fn) */ 5046 0x2e, 0, /* DW_TAG_subprogram, no children */ 5047 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 5048 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5049 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5050 0, 0, /* end of abbrev */ 5051 0 /* no more abbrev */ 5052 }, 5053 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 5054 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 5055 }; 5056 5057 /* We only need a single jit entry; statically allocate it. */ 5058 static struct jit_code_entry one_entry; 5059 5060 uintptr_t buf = (uintptr_t)buf_ptr; 5061 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 5062 DebugFrameHeader *dfh; 5063 5064 img = g_malloc(img_size); 5065 *img = img_template; 5066 5067 img->phdr.p_vaddr = buf; 5068 img->phdr.p_paddr = buf; 5069 img->phdr.p_memsz = buf_size; 5070 5071 img->shdr[1].sh_name = find_string(img->str, ".text"); 5072 img->shdr[1].sh_addr = buf; 5073 img->shdr[1].sh_size = buf_size; 5074 5075 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 5076 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 5077 5078 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 5079 img->shdr[4].sh_size = debug_frame_size; 5080 5081 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 5082 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 5083 5084 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 5085 img->sym[1].st_value = buf; 5086 img->sym[1].st_size = buf_size; 5087 5088 img->di.cu_low_pc = buf; 5089 img->di.cu_high_pc = buf + buf_size; 5090 img->di.fn_low_pc = buf; 5091 img->di.fn_high_pc = buf + buf_size; 5092 5093 dfh = (DebugFrameHeader *)(img + 1); 5094 memcpy(dfh, debug_frame, debug_frame_size); 5095 dfh->fde.func_start = buf; 5096 dfh->fde.func_len = buf_size; 5097 5098 #ifdef DEBUG_JIT 5099 /* Enable this block to be able to debug the ELF image file creation. 5100 One can use readelf, objdump, or other inspection utilities. */ 5101 { 5102 FILE *f = fopen("/tmp/qemu.jit", "w+b"); 5103 if (f) { 5104 if (fwrite(img, img_size, 1, f) != img_size) { 5105 /* Avoid stupid unused return value warning for fwrite. */ 5106 } 5107 fclose(f); 5108 } 5109 } 5110 #endif 5111 5112 one_entry.symfile_addr = img; 5113 one_entry.symfile_size = img_size; 5114 5115 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 5116 __jit_debug_descriptor.relevant_entry = &one_entry; 5117 __jit_debug_descriptor.first_entry = &one_entry; 5118 __jit_debug_register_code(); 5119 } 5120 #else 5121 /* No support for the feature. Provide the entry point expected by exec.c, 5122 and implement the internal function we declared earlier. */ 5123 5124 static void tcg_register_jit_int(const void *buf, size_t size, 5125 const void *debug_frame, 5126 size_t debug_frame_size) 5127 { 5128 } 5129 5130 void tcg_register_jit(const void *buf, size_t buf_size) 5131 { 5132 } 5133 #endif /* ELF_HOST_MACHINE */ 5134 5135 #if !TCG_TARGET_MAYBE_vec 5136 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 5137 { 5138 g_assert_not_reached(); 5139 } 5140 #endif 5141