1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/timer.h" 38 #include "qemu/cacheflush.h" 39 #include "qemu/cacheinfo.h" 40 41 /* Note: the long term plan is to reduce the dependencies on the QEMU 42 CPU definitions. Currently they are used for qemu_ld/st 43 instructions */ 44 #define NO_CPU_IO_DEFS 45 46 #include "exec/exec-all.h" 47 #include "tcg/tcg-op.h" 48 49 #if UINTPTR_MAX == UINT32_MAX 50 # define ELF_CLASS ELFCLASS32 51 #else 52 # define ELF_CLASS ELFCLASS64 53 #endif 54 #if HOST_BIG_ENDIAN 55 # define ELF_DATA ELFDATA2MSB 56 #else 57 # define ELF_DATA ELFDATA2LSB 58 #endif 59 60 #include "elf.h" 61 #include "exec/log.h" 62 #include "tcg/tcg-ldst.h" 63 #include "tcg-internal.h" 64 #include "accel/tcg/perf.h" 65 66 /* Forward declarations for functions declared in tcg-target.c.inc and 67 used here. */ 68 static void tcg_target_init(TCGContext *s); 69 static void tcg_target_qemu_prologue(TCGContext *s); 70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 71 intptr_t value, intptr_t addend); 72 73 /* The CIE and FDE header definitions will be common to all hosts. */ 74 typedef struct { 75 uint32_t len __attribute__((aligned((sizeof(void *))))); 76 uint32_t id; 77 uint8_t version; 78 char augmentation[1]; 79 uint8_t code_align; 80 uint8_t data_align; 81 uint8_t return_column; 82 } DebugFrameCIE; 83 84 typedef struct QEMU_PACKED { 85 uint32_t len __attribute__((aligned((sizeof(void *))))); 86 uint32_t cie_offset; 87 uintptr_t func_start; 88 uintptr_t func_len; 89 } DebugFrameFDEHeader; 90 91 typedef struct QEMU_PACKED { 92 DebugFrameCIE cie; 93 DebugFrameFDEHeader fde; 94 } DebugFrameHeader; 95 96 static void tcg_register_jit_int(const void *buf, size_t size, 97 const void *debug_frame, 98 size_t debug_frame_size) 99 __attribute__((unused)); 100 101 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 103 intptr_t arg2); 104 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 105 static void tcg_out_movi(TCGContext *s, TCGType type, 106 TCGReg ret, tcg_target_long arg); 107 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 108 const TCGArg args[TCG_MAX_OP_ARGS], 109 const int const_args[TCG_MAX_OP_ARGS]); 110 #if TCG_TARGET_MAYBE_vec 111 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 112 TCGReg dst, TCGReg src); 113 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 114 TCGReg dst, TCGReg base, intptr_t offset); 115 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 116 TCGReg dst, int64_t arg); 117 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 118 unsigned vecl, unsigned vece, 119 const TCGArg args[TCG_MAX_OP_ARGS], 120 const int const_args[TCG_MAX_OP_ARGS]); 121 #else 122 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 123 TCGReg dst, TCGReg src) 124 { 125 g_assert_not_reached(); 126 } 127 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 128 TCGReg dst, TCGReg base, intptr_t offset) 129 { 130 g_assert_not_reached(); 131 } 132 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 133 TCGReg dst, int64_t arg) 134 { 135 g_assert_not_reached(); 136 } 137 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 138 unsigned vecl, unsigned vece, 139 const TCGArg args[TCG_MAX_OP_ARGS], 140 const int const_args[TCG_MAX_OP_ARGS]) 141 { 142 g_assert_not_reached(); 143 } 144 #endif 145 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 146 intptr_t arg2); 147 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 148 TCGReg base, intptr_t ofs); 149 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 150 const TCGHelperInfo *info); 151 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 152 #ifdef TCG_TARGET_NEED_LDST_LABELS 153 static int tcg_out_ldst_finalize(TCGContext *s); 154 #endif 155 156 TCGContext tcg_init_ctx; 157 __thread TCGContext *tcg_ctx; 158 159 TCGContext **tcg_ctxs; 160 unsigned int tcg_cur_ctxs; 161 unsigned int tcg_max_ctxs; 162 TCGv_env cpu_env = 0; 163 const void *tcg_code_gen_epilogue; 164 uintptr_t tcg_splitwx_diff; 165 166 #ifndef CONFIG_TCG_INTERPRETER 167 tcg_prologue_fn *tcg_qemu_tb_exec; 168 #endif 169 170 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 171 static TCGRegSet tcg_target_call_clobber_regs; 172 173 #if TCG_TARGET_INSN_UNIT_SIZE == 1 174 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 175 { 176 *s->code_ptr++ = v; 177 } 178 179 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 180 uint8_t v) 181 { 182 *p = v; 183 } 184 #endif 185 186 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 187 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 188 { 189 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 190 *s->code_ptr++ = v; 191 } else { 192 tcg_insn_unit *p = s->code_ptr; 193 memcpy(p, &v, sizeof(v)); 194 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 195 } 196 } 197 198 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 199 uint16_t v) 200 { 201 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 202 *p = v; 203 } else { 204 memcpy(p, &v, sizeof(v)); 205 } 206 } 207 #endif 208 209 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 210 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 211 { 212 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 213 *s->code_ptr++ = v; 214 } else { 215 tcg_insn_unit *p = s->code_ptr; 216 memcpy(p, &v, sizeof(v)); 217 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 218 } 219 } 220 221 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 222 uint32_t v) 223 { 224 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 225 *p = v; 226 } else { 227 memcpy(p, &v, sizeof(v)); 228 } 229 } 230 #endif 231 232 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 233 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 234 { 235 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 236 *s->code_ptr++ = v; 237 } else { 238 tcg_insn_unit *p = s->code_ptr; 239 memcpy(p, &v, sizeof(v)); 240 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 241 } 242 } 243 244 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 245 uint64_t v) 246 { 247 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 248 *p = v; 249 } else { 250 memcpy(p, &v, sizeof(v)); 251 } 252 } 253 #endif 254 255 /* label relocation processing */ 256 257 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 258 TCGLabel *l, intptr_t addend) 259 { 260 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 261 262 r->type = type; 263 r->ptr = code_ptr; 264 r->addend = addend; 265 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 266 } 267 268 static void tcg_out_label(TCGContext *s, TCGLabel *l) 269 { 270 tcg_debug_assert(!l->has_value); 271 l->has_value = 1; 272 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 273 } 274 275 TCGLabel *gen_new_label(void) 276 { 277 TCGContext *s = tcg_ctx; 278 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 279 280 memset(l, 0, sizeof(TCGLabel)); 281 l->id = s->nb_labels++; 282 QSIMPLEQ_INIT(&l->relocs); 283 284 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 285 286 return l; 287 } 288 289 static bool tcg_resolve_relocs(TCGContext *s) 290 { 291 TCGLabel *l; 292 293 QSIMPLEQ_FOREACH(l, &s->labels, next) { 294 TCGRelocation *r; 295 uintptr_t value = l->u.value; 296 297 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 298 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 299 return false; 300 } 301 } 302 } 303 return true; 304 } 305 306 static void set_jmp_reset_offset(TCGContext *s, int which) 307 { 308 /* 309 * We will check for overflow at the end of the opcode loop in 310 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 311 */ 312 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); 313 } 314 315 /* Signal overflow, starting over with fewer guest insns. */ 316 static G_NORETURN 317 void tcg_raise_tb_overflow(TCGContext *s) 318 { 319 siglongjmp(s->jmp_trans, -2); 320 } 321 322 #define C_PFX1(P, A) P##A 323 #define C_PFX2(P, A, B) P##A##_##B 324 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 325 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 326 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 327 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 328 329 /* Define an enumeration for the various combinations. */ 330 331 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 332 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 333 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 334 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 335 336 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 337 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 338 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 339 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 340 341 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 342 343 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 344 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 345 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 346 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 347 348 typedef enum { 349 #include "tcg-target-con-set.h" 350 } TCGConstraintSetIndex; 351 352 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 353 354 #undef C_O0_I1 355 #undef C_O0_I2 356 #undef C_O0_I3 357 #undef C_O0_I4 358 #undef C_O1_I1 359 #undef C_O1_I2 360 #undef C_O1_I3 361 #undef C_O1_I4 362 #undef C_N1_I2 363 #undef C_O2_I1 364 #undef C_O2_I2 365 #undef C_O2_I3 366 #undef C_O2_I4 367 368 /* Put all of the constraint sets into an array, indexed by the enum. */ 369 370 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 371 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 372 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 373 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 374 375 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 376 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 377 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 378 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 379 380 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 381 382 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 383 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 384 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 385 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 386 387 static const TCGTargetOpDef constraint_sets[] = { 388 #include "tcg-target-con-set.h" 389 }; 390 391 392 #undef C_O0_I1 393 #undef C_O0_I2 394 #undef C_O0_I3 395 #undef C_O0_I4 396 #undef C_O1_I1 397 #undef C_O1_I2 398 #undef C_O1_I3 399 #undef C_O1_I4 400 #undef C_N1_I2 401 #undef C_O2_I1 402 #undef C_O2_I2 403 #undef C_O2_I3 404 #undef C_O2_I4 405 406 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 407 408 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 409 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 410 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 411 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 412 413 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 414 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 415 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 416 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 417 418 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 419 420 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 421 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 422 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 423 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 424 425 #include "tcg-target.c.inc" 426 427 static void alloc_tcg_plugin_context(TCGContext *s) 428 { 429 #ifdef CONFIG_PLUGIN 430 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 431 s->plugin_tb->insns = 432 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 433 #endif 434 } 435 436 /* 437 * All TCG threads except the parent (i.e. the one that called tcg_context_init 438 * and registered the target's TCG globals) must register with this function 439 * before initiating translation. 440 * 441 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 442 * of tcg_region_init() for the reasoning behind this. 443 * 444 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 445 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 446 * is not used anymore for translation once this function is called. 447 * 448 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 449 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 450 */ 451 #ifdef CONFIG_USER_ONLY 452 void tcg_register_thread(void) 453 { 454 tcg_ctx = &tcg_init_ctx; 455 } 456 #else 457 void tcg_register_thread(void) 458 { 459 TCGContext *s = g_malloc(sizeof(*s)); 460 unsigned int i, n; 461 462 *s = tcg_init_ctx; 463 464 /* Relink mem_base. */ 465 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 466 if (tcg_init_ctx.temps[i].mem_base) { 467 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 468 tcg_debug_assert(b >= 0 && b < n); 469 s->temps[i].mem_base = &s->temps[b]; 470 } 471 } 472 473 /* Claim an entry in tcg_ctxs */ 474 n = qatomic_fetch_inc(&tcg_cur_ctxs); 475 g_assert(n < tcg_max_ctxs); 476 qatomic_set(&tcg_ctxs[n], s); 477 478 if (n > 0) { 479 alloc_tcg_plugin_context(s); 480 tcg_region_initial_alloc(s); 481 } 482 483 tcg_ctx = s; 484 } 485 #endif /* !CONFIG_USER_ONLY */ 486 487 /* pool based memory allocation */ 488 void *tcg_malloc_internal(TCGContext *s, int size) 489 { 490 TCGPool *p; 491 int pool_size; 492 493 if (size > TCG_POOL_CHUNK_SIZE) { 494 /* big malloc: insert a new pool (XXX: could optimize) */ 495 p = g_malloc(sizeof(TCGPool) + size); 496 p->size = size; 497 p->next = s->pool_first_large; 498 s->pool_first_large = p; 499 return p->data; 500 } else { 501 p = s->pool_current; 502 if (!p) { 503 p = s->pool_first; 504 if (!p) 505 goto new_pool; 506 } else { 507 if (!p->next) { 508 new_pool: 509 pool_size = TCG_POOL_CHUNK_SIZE; 510 p = g_malloc(sizeof(TCGPool) + pool_size); 511 p->size = pool_size; 512 p->next = NULL; 513 if (s->pool_current) { 514 s->pool_current->next = p; 515 } else { 516 s->pool_first = p; 517 } 518 } else { 519 p = p->next; 520 } 521 } 522 } 523 s->pool_current = p; 524 s->pool_cur = p->data + size; 525 s->pool_end = p->data + p->size; 526 return p->data; 527 } 528 529 void tcg_pool_reset(TCGContext *s) 530 { 531 TCGPool *p, *t; 532 for (p = s->pool_first_large; p; p = t) { 533 t = p->next; 534 g_free(p); 535 } 536 s->pool_first_large = NULL; 537 s->pool_cur = s->pool_end = NULL; 538 s->pool_current = NULL; 539 } 540 541 #include "exec/helper-proto.h" 542 543 static TCGHelperInfo all_helpers[] = { 544 #include "exec/helper-tcg.h" 545 }; 546 static GHashTable *helper_table; 547 548 #ifdef CONFIG_TCG_INTERPRETER 549 static ffi_type *typecode_to_ffi(int argmask) 550 { 551 switch (argmask) { 552 case dh_typecode_void: 553 return &ffi_type_void; 554 case dh_typecode_i32: 555 return &ffi_type_uint32; 556 case dh_typecode_s32: 557 return &ffi_type_sint32; 558 case dh_typecode_i64: 559 return &ffi_type_uint64; 560 case dh_typecode_s64: 561 return &ffi_type_sint64; 562 case dh_typecode_ptr: 563 return &ffi_type_pointer; 564 } 565 g_assert_not_reached(); 566 } 567 568 static void init_ffi_layouts(void) 569 { 570 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 571 GHashTable *ffi_table = g_hash_table_new(NULL, NULL); 572 573 for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 574 TCGHelperInfo *info = &all_helpers[i]; 575 unsigned typemask = info->typemask; 576 gpointer hash = (gpointer)(uintptr_t)typemask; 577 struct { 578 ffi_cif cif; 579 ffi_type *args[]; 580 } *ca; 581 ffi_status status; 582 int nargs; 583 ffi_cif *cif; 584 585 cif = g_hash_table_lookup(ffi_table, hash); 586 if (cif) { 587 info->cif = cif; 588 continue; 589 } 590 591 /* Ignoring the return type, find the last non-zero field. */ 592 nargs = 32 - clz32(typemask >> 3); 593 nargs = DIV_ROUND_UP(nargs, 3); 594 595 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 596 ca->cif.rtype = typecode_to_ffi(typemask & 7); 597 ca->cif.nargs = nargs; 598 599 if (nargs != 0) { 600 ca->cif.arg_types = ca->args; 601 for (int j = 0; j < nargs; ++j) { 602 int typecode = extract32(typemask, (j + 1) * 3, 3); 603 ca->args[j] = typecode_to_ffi(typecode); 604 } 605 } 606 607 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 608 ca->cif.rtype, ca->cif.arg_types); 609 assert(status == FFI_OK); 610 611 cif = &ca->cif; 612 info->cif = cif; 613 g_hash_table_insert(ffi_table, hash, (gpointer)cif); 614 } 615 616 g_hash_table_destroy(ffi_table); 617 } 618 #endif /* CONFIG_TCG_INTERPRETER */ 619 620 typedef struct TCGCumulativeArgs { 621 int arg_idx; /* tcg_gen_callN args[] */ 622 int info_in_idx; /* TCGHelperInfo in[] */ 623 int arg_slot; /* regs+stack slot */ 624 int ref_slot; /* stack slots for references */ 625 } TCGCumulativeArgs; 626 627 static void layout_arg_even(TCGCumulativeArgs *cum) 628 { 629 cum->arg_slot += cum->arg_slot & 1; 630 } 631 632 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, 633 TCGCallArgumentKind kind) 634 { 635 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 636 637 *loc = (TCGCallArgumentLoc){ 638 .kind = kind, 639 .arg_idx = cum->arg_idx, 640 .arg_slot = cum->arg_slot, 641 }; 642 cum->info_in_idx++; 643 cum->arg_slot++; 644 } 645 646 static void layout_arg_normal_n(TCGCumulativeArgs *cum, 647 TCGHelperInfo *info, int n) 648 { 649 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; 650 651 for (int i = 0; i < n; ++i) { 652 /* Layout all using the same arg_idx, adjusting the subindex. */ 653 loc[i] = (TCGCallArgumentLoc){ 654 .kind = TCG_CALL_ARG_NORMAL, 655 .arg_idx = cum->arg_idx, 656 .tmp_subindex = i, 657 .arg_slot = cum->arg_slot + i, 658 }; 659 } 660 cum->info_in_idx += n; 661 cum->arg_slot += n; 662 } 663 664 static void init_call_layout(TCGHelperInfo *info) 665 { 666 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); 667 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); 668 unsigned typemask = info->typemask; 669 unsigned typecode; 670 TCGCumulativeArgs cum = { }; 671 672 /* 673 * Parse and place any function return value. 674 */ 675 typecode = typemask & 7; 676 switch (typecode) { 677 case dh_typecode_void: 678 info->nr_out = 0; 679 break; 680 case dh_typecode_i32: 681 case dh_typecode_s32: 682 case dh_typecode_ptr: 683 info->nr_out = 1; 684 info->out_kind = TCG_CALL_RET_NORMAL; 685 break; 686 case dh_typecode_i64: 687 case dh_typecode_s64: 688 info->nr_out = 64 / TCG_TARGET_REG_BITS; 689 info->out_kind = TCG_CALL_RET_NORMAL; 690 break; 691 default: 692 g_assert_not_reached(); 693 } 694 assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs)); 695 696 /* 697 * Parse and place function arguments. 698 */ 699 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { 700 TCGCallArgumentKind kind; 701 TCGType type; 702 703 typecode = typemask & 7; 704 switch (typecode) { 705 case dh_typecode_i32: 706 case dh_typecode_s32: 707 type = TCG_TYPE_I32; 708 break; 709 case dh_typecode_i64: 710 case dh_typecode_s64: 711 type = TCG_TYPE_I64; 712 break; 713 case dh_typecode_ptr: 714 type = TCG_TYPE_PTR; 715 break; 716 default: 717 g_assert_not_reached(); 718 } 719 720 switch (type) { 721 case TCG_TYPE_I32: 722 switch (TCG_TARGET_CALL_ARG_I32) { 723 case TCG_CALL_ARG_EVEN: 724 layout_arg_even(&cum); 725 /* fall through */ 726 case TCG_CALL_ARG_NORMAL: 727 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 728 break; 729 case TCG_CALL_ARG_EXTEND: 730 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); 731 layout_arg_1(&cum, info, kind); 732 break; 733 default: 734 qemu_build_not_reached(); 735 } 736 break; 737 738 case TCG_TYPE_I64: 739 switch (TCG_TARGET_CALL_ARG_I64) { 740 case TCG_CALL_ARG_EVEN: 741 layout_arg_even(&cum); 742 /* fall through */ 743 case TCG_CALL_ARG_NORMAL: 744 if (TCG_TARGET_REG_BITS == 32) { 745 layout_arg_normal_n(&cum, info, 2); 746 } else { 747 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); 748 } 749 break; 750 default: 751 qemu_build_not_reached(); 752 } 753 break; 754 755 default: 756 g_assert_not_reached(); 757 } 758 } 759 info->nr_in = cum.info_in_idx; 760 761 /* Validate that we didn't overrun the input array. */ 762 assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); 763 /* Validate the backend has enough argument space. */ 764 assert(cum.arg_slot <= max_reg_slots + max_stk_slots); 765 assert(cum.ref_slot <= max_stk_slots); 766 } 767 768 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 769 static void process_op_defs(TCGContext *s); 770 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 771 TCGReg reg, const char *name); 772 773 static void tcg_context_init(unsigned max_cpus) 774 { 775 TCGContext *s = &tcg_init_ctx; 776 int op, total_args, n, i; 777 TCGOpDef *def; 778 TCGArgConstraint *args_ct; 779 TCGTemp *ts; 780 781 memset(s, 0, sizeof(*s)); 782 s->nb_globals = 0; 783 784 /* Count total number of arguments and allocate the corresponding 785 space */ 786 total_args = 0; 787 for(op = 0; op < NB_OPS; op++) { 788 def = &tcg_op_defs[op]; 789 n = def->nb_iargs + def->nb_oargs; 790 total_args += n; 791 } 792 793 args_ct = g_new0(TCGArgConstraint, total_args); 794 795 for(op = 0; op < NB_OPS; op++) { 796 def = &tcg_op_defs[op]; 797 def->args_ct = args_ct; 798 n = def->nb_iargs + def->nb_oargs; 799 args_ct += n; 800 } 801 802 /* Register helpers. */ 803 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 804 helper_table = g_hash_table_new(NULL, NULL); 805 806 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 807 init_call_layout(&all_helpers[i]); 808 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 809 (gpointer)&all_helpers[i]); 810 } 811 812 #ifdef CONFIG_TCG_INTERPRETER 813 init_ffi_layouts(); 814 #endif 815 816 tcg_target_init(s); 817 process_op_defs(s); 818 819 /* Reverse the order of the saved registers, assuming they're all at 820 the start of tcg_target_reg_alloc_order. */ 821 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 822 int r = tcg_target_reg_alloc_order[n]; 823 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 824 break; 825 } 826 } 827 for (i = 0; i < n; ++i) { 828 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 829 } 830 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 831 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 832 } 833 834 alloc_tcg_plugin_context(s); 835 836 tcg_ctx = s; 837 /* 838 * In user-mode we simply share the init context among threads, since we 839 * use a single region. See the documentation tcg_region_init() for the 840 * reasoning behind this. 841 * In softmmu we will have at most max_cpus TCG threads. 842 */ 843 #ifdef CONFIG_USER_ONLY 844 tcg_ctxs = &tcg_ctx; 845 tcg_cur_ctxs = 1; 846 tcg_max_ctxs = 1; 847 #else 848 tcg_max_ctxs = max_cpus; 849 tcg_ctxs = g_new0(TCGContext *, max_cpus); 850 #endif 851 852 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 853 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 854 cpu_env = temp_tcgv_ptr(ts); 855 } 856 857 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 858 { 859 tcg_context_init(max_cpus); 860 tcg_region_init(tb_size, splitwx, max_cpus); 861 } 862 863 /* 864 * Allocate TBs right before their corresponding translated code, making 865 * sure that TBs and code are on different cache lines. 866 */ 867 TranslationBlock *tcg_tb_alloc(TCGContext *s) 868 { 869 uintptr_t align = qemu_icache_linesize; 870 TranslationBlock *tb; 871 void *next; 872 873 retry: 874 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 875 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 876 877 if (unlikely(next > s->code_gen_highwater)) { 878 if (tcg_region_alloc(s)) { 879 return NULL; 880 } 881 goto retry; 882 } 883 qatomic_set(&s->code_gen_ptr, next); 884 s->data_gen_ptr = NULL; 885 return tb; 886 } 887 888 void tcg_prologue_init(TCGContext *s) 889 { 890 size_t prologue_size; 891 892 s->code_ptr = s->code_gen_ptr; 893 s->code_buf = s->code_gen_ptr; 894 s->data_gen_ptr = NULL; 895 896 #ifndef CONFIG_TCG_INTERPRETER 897 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 898 #endif 899 900 #ifdef TCG_TARGET_NEED_POOL_LABELS 901 s->pool_labels = NULL; 902 #endif 903 904 qemu_thread_jit_write(); 905 /* Generate the prologue. */ 906 tcg_target_qemu_prologue(s); 907 908 #ifdef TCG_TARGET_NEED_POOL_LABELS 909 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 910 { 911 int result = tcg_out_pool_finalize(s); 912 tcg_debug_assert(result == 0); 913 } 914 #endif 915 916 prologue_size = tcg_current_code_size(s); 917 perf_report_prologue(s->code_gen_ptr, prologue_size); 918 919 #ifndef CONFIG_TCG_INTERPRETER 920 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 921 (uintptr_t)s->code_buf, prologue_size); 922 #endif 923 924 #ifdef DEBUG_DISAS 925 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 926 FILE *logfile = qemu_log_trylock(); 927 if (logfile) { 928 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 929 if (s->data_gen_ptr) { 930 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 931 size_t data_size = prologue_size - code_size; 932 size_t i; 933 934 disas(logfile, s->code_gen_ptr, code_size); 935 936 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 937 if (sizeof(tcg_target_ulong) == 8) { 938 fprintf(logfile, 939 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 940 (uintptr_t)s->data_gen_ptr + i, 941 *(uint64_t *)(s->data_gen_ptr + i)); 942 } else { 943 fprintf(logfile, 944 "0x%08" PRIxPTR ": .long 0x%08x\n", 945 (uintptr_t)s->data_gen_ptr + i, 946 *(uint32_t *)(s->data_gen_ptr + i)); 947 } 948 } 949 } else { 950 disas(logfile, s->code_gen_ptr, prologue_size); 951 } 952 fprintf(logfile, "\n"); 953 qemu_log_unlock(logfile); 954 } 955 } 956 #endif 957 958 #ifndef CONFIG_TCG_INTERPRETER 959 /* 960 * Assert that goto_ptr is implemented completely, setting an epilogue. 961 * For tci, we use NULL as the signal to return from the interpreter, 962 * so skip this check. 963 */ 964 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 965 #endif 966 967 tcg_region_prologue_set(s); 968 } 969 970 void tcg_func_start(TCGContext *s) 971 { 972 tcg_pool_reset(s); 973 s->nb_temps = s->nb_globals; 974 975 /* No temps have been previously allocated for size or locality. */ 976 memset(s->free_temps, 0, sizeof(s->free_temps)); 977 978 /* No constant temps have been previously allocated. */ 979 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 980 if (s->const_table[i]) { 981 g_hash_table_remove_all(s->const_table[i]); 982 } 983 } 984 985 s->nb_ops = 0; 986 s->nb_labels = 0; 987 s->current_frame_offset = s->frame_start; 988 989 #ifdef CONFIG_DEBUG_TCG 990 s->goto_tb_issue_mask = 0; 991 #endif 992 993 QTAILQ_INIT(&s->ops); 994 QTAILQ_INIT(&s->free_ops); 995 QSIMPLEQ_INIT(&s->labels); 996 } 997 998 static TCGTemp *tcg_temp_alloc(TCGContext *s) 999 { 1000 int n = s->nb_temps++; 1001 1002 if (n >= TCG_MAX_TEMPS) { 1003 tcg_raise_tb_overflow(s); 1004 } 1005 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 1006 } 1007 1008 static TCGTemp *tcg_global_alloc(TCGContext *s) 1009 { 1010 TCGTemp *ts; 1011 1012 tcg_debug_assert(s->nb_globals == s->nb_temps); 1013 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 1014 s->nb_globals++; 1015 ts = tcg_temp_alloc(s); 1016 ts->kind = TEMP_GLOBAL; 1017 1018 return ts; 1019 } 1020 1021 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 1022 TCGReg reg, const char *name) 1023 { 1024 TCGTemp *ts; 1025 1026 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 1027 tcg_abort(); 1028 } 1029 1030 ts = tcg_global_alloc(s); 1031 ts->base_type = type; 1032 ts->type = type; 1033 ts->kind = TEMP_FIXED; 1034 ts->reg = reg; 1035 ts->name = name; 1036 tcg_regset_set_reg(s->reserved_regs, reg); 1037 1038 return ts; 1039 } 1040 1041 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 1042 { 1043 s->frame_start = start; 1044 s->frame_end = start + size; 1045 s->frame_temp 1046 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 1047 } 1048 1049 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 1050 intptr_t offset, const char *name) 1051 { 1052 TCGContext *s = tcg_ctx; 1053 TCGTemp *base_ts = tcgv_ptr_temp(base); 1054 TCGTemp *ts = tcg_global_alloc(s); 1055 int indirect_reg = 0; 1056 1057 switch (base_ts->kind) { 1058 case TEMP_FIXED: 1059 break; 1060 case TEMP_GLOBAL: 1061 /* We do not support double-indirect registers. */ 1062 tcg_debug_assert(!base_ts->indirect_reg); 1063 base_ts->indirect_base = 1; 1064 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 1065 ? 2 : 1); 1066 indirect_reg = 1; 1067 break; 1068 default: 1069 g_assert_not_reached(); 1070 } 1071 1072 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1073 TCGTemp *ts2 = tcg_global_alloc(s); 1074 char buf[64]; 1075 1076 ts->base_type = TCG_TYPE_I64; 1077 ts->type = TCG_TYPE_I32; 1078 ts->indirect_reg = indirect_reg; 1079 ts->mem_allocated = 1; 1080 ts->mem_base = base_ts; 1081 ts->mem_offset = offset; 1082 pstrcpy(buf, sizeof(buf), name); 1083 pstrcat(buf, sizeof(buf), "_0"); 1084 ts->name = strdup(buf); 1085 1086 tcg_debug_assert(ts2 == ts + 1); 1087 ts2->base_type = TCG_TYPE_I64; 1088 ts2->type = TCG_TYPE_I32; 1089 ts2->indirect_reg = indirect_reg; 1090 ts2->mem_allocated = 1; 1091 ts2->mem_base = base_ts; 1092 ts2->mem_offset = offset + 4; 1093 ts2->temp_subindex = 1; 1094 pstrcpy(buf, sizeof(buf), name); 1095 pstrcat(buf, sizeof(buf), "_1"); 1096 ts2->name = strdup(buf); 1097 } else { 1098 ts->base_type = type; 1099 ts->type = type; 1100 ts->indirect_reg = indirect_reg; 1101 ts->mem_allocated = 1; 1102 ts->mem_base = base_ts; 1103 ts->mem_offset = offset; 1104 ts->name = name; 1105 } 1106 return ts; 1107 } 1108 1109 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 1110 { 1111 TCGContext *s = tcg_ctx; 1112 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL; 1113 TCGTemp *ts; 1114 int idx, k; 1115 1116 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 1117 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 1118 if (idx < TCG_MAX_TEMPS) { 1119 /* There is already an available temp with the right type. */ 1120 clear_bit(idx, s->free_temps[k].l); 1121 1122 ts = &s->temps[idx]; 1123 ts->temp_allocated = 1; 1124 tcg_debug_assert(ts->base_type == type); 1125 tcg_debug_assert(ts->kind == kind); 1126 } else { 1127 ts = tcg_temp_alloc(s); 1128 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1129 TCGTemp *ts2 = tcg_temp_alloc(s); 1130 1131 ts->base_type = type; 1132 ts->type = TCG_TYPE_I32; 1133 ts->temp_allocated = 1; 1134 ts->kind = kind; 1135 1136 tcg_debug_assert(ts2 == ts + 1); 1137 ts2->base_type = TCG_TYPE_I64; 1138 ts2->type = TCG_TYPE_I32; 1139 ts2->temp_allocated = 1; 1140 ts2->temp_subindex = 1; 1141 ts2->kind = kind; 1142 } else { 1143 ts->base_type = type; 1144 ts->type = type; 1145 ts->temp_allocated = 1; 1146 ts->kind = kind; 1147 } 1148 } 1149 1150 #if defined(CONFIG_DEBUG_TCG) 1151 s->temps_in_use++; 1152 #endif 1153 return ts; 1154 } 1155 1156 TCGv_vec tcg_temp_new_vec(TCGType type) 1157 { 1158 TCGTemp *t; 1159 1160 #ifdef CONFIG_DEBUG_TCG 1161 switch (type) { 1162 case TCG_TYPE_V64: 1163 assert(TCG_TARGET_HAS_v64); 1164 break; 1165 case TCG_TYPE_V128: 1166 assert(TCG_TARGET_HAS_v128); 1167 break; 1168 case TCG_TYPE_V256: 1169 assert(TCG_TARGET_HAS_v256); 1170 break; 1171 default: 1172 g_assert_not_reached(); 1173 } 1174 #endif 1175 1176 t = tcg_temp_new_internal(type, 0); 1177 return temp_tcgv_vec(t); 1178 } 1179 1180 /* Create a new temp of the same type as an existing temp. */ 1181 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1182 { 1183 TCGTemp *t = tcgv_vec_temp(match); 1184 1185 tcg_debug_assert(t->temp_allocated != 0); 1186 1187 t = tcg_temp_new_internal(t->base_type, 0); 1188 return temp_tcgv_vec(t); 1189 } 1190 1191 void tcg_temp_free_internal(TCGTemp *ts) 1192 { 1193 TCGContext *s = tcg_ctx; 1194 int k, idx; 1195 1196 switch (ts->kind) { 1197 case TEMP_CONST: 1198 /* 1199 * In order to simplify users of tcg_constant_*, 1200 * silently ignore free. 1201 */ 1202 return; 1203 case TEMP_NORMAL: 1204 case TEMP_LOCAL: 1205 break; 1206 default: 1207 g_assert_not_reached(); 1208 } 1209 1210 #if defined(CONFIG_DEBUG_TCG) 1211 s->temps_in_use--; 1212 if (s->temps_in_use < 0) { 1213 fprintf(stderr, "More temporaries freed than allocated!\n"); 1214 } 1215 #endif 1216 1217 tcg_debug_assert(ts->temp_allocated != 0); 1218 ts->temp_allocated = 0; 1219 1220 idx = temp_idx(ts); 1221 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT); 1222 set_bit(idx, s->free_temps[k].l); 1223 } 1224 1225 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1226 { 1227 TCGContext *s = tcg_ctx; 1228 GHashTable *h = s->const_table[type]; 1229 TCGTemp *ts; 1230 1231 if (h == NULL) { 1232 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1233 s->const_table[type] = h; 1234 } 1235 1236 ts = g_hash_table_lookup(h, &val); 1237 if (ts == NULL) { 1238 int64_t *val_ptr; 1239 1240 ts = tcg_temp_alloc(s); 1241 1242 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1243 TCGTemp *ts2 = tcg_temp_alloc(s); 1244 1245 tcg_debug_assert(ts2 == ts + 1); 1246 1247 ts->base_type = TCG_TYPE_I64; 1248 ts->type = TCG_TYPE_I32; 1249 ts->kind = TEMP_CONST; 1250 ts->temp_allocated = 1; 1251 1252 ts2->base_type = TCG_TYPE_I64; 1253 ts2->type = TCG_TYPE_I32; 1254 ts2->kind = TEMP_CONST; 1255 ts2->temp_allocated = 1; 1256 ts2->temp_subindex = 1; 1257 1258 /* 1259 * Retain the full value of the 64-bit constant in the low 1260 * part, so that the hash table works. Actual uses will 1261 * truncate the value to the low part. 1262 */ 1263 ts[HOST_BIG_ENDIAN].val = val; 1264 ts[!HOST_BIG_ENDIAN].val = val >> 32; 1265 val_ptr = &ts[HOST_BIG_ENDIAN].val; 1266 } else { 1267 ts->base_type = type; 1268 ts->type = type; 1269 ts->kind = TEMP_CONST; 1270 ts->temp_allocated = 1; 1271 ts->val = val; 1272 val_ptr = &ts->val; 1273 } 1274 g_hash_table_insert(h, val_ptr, ts); 1275 } 1276 1277 return ts; 1278 } 1279 1280 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1281 { 1282 val = dup_const(vece, val); 1283 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1284 } 1285 1286 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1287 { 1288 TCGTemp *t = tcgv_vec_temp(match); 1289 1290 tcg_debug_assert(t->temp_allocated != 0); 1291 return tcg_constant_vec(t->base_type, vece, val); 1292 } 1293 1294 TCGv_i32 tcg_const_i32(int32_t val) 1295 { 1296 TCGv_i32 t0; 1297 t0 = tcg_temp_new_i32(); 1298 tcg_gen_movi_i32(t0, val); 1299 return t0; 1300 } 1301 1302 TCGv_i64 tcg_const_i64(int64_t val) 1303 { 1304 TCGv_i64 t0; 1305 t0 = tcg_temp_new_i64(); 1306 tcg_gen_movi_i64(t0, val); 1307 return t0; 1308 } 1309 1310 TCGv_i32 tcg_const_local_i32(int32_t val) 1311 { 1312 TCGv_i32 t0; 1313 t0 = tcg_temp_local_new_i32(); 1314 tcg_gen_movi_i32(t0, val); 1315 return t0; 1316 } 1317 1318 TCGv_i64 tcg_const_local_i64(int64_t val) 1319 { 1320 TCGv_i64 t0; 1321 t0 = tcg_temp_local_new_i64(); 1322 tcg_gen_movi_i64(t0, val); 1323 return t0; 1324 } 1325 1326 #if defined(CONFIG_DEBUG_TCG) 1327 void tcg_clear_temp_count(void) 1328 { 1329 TCGContext *s = tcg_ctx; 1330 s->temps_in_use = 0; 1331 } 1332 1333 int tcg_check_temp_count(void) 1334 { 1335 TCGContext *s = tcg_ctx; 1336 if (s->temps_in_use) { 1337 /* Clear the count so that we don't give another 1338 * warning immediately next time around. 1339 */ 1340 s->temps_in_use = 0; 1341 return 1; 1342 } 1343 return 0; 1344 } 1345 #endif 1346 1347 /* Return true if OP may appear in the opcode stream. 1348 Test the runtime variable that controls each opcode. */ 1349 bool tcg_op_supported(TCGOpcode op) 1350 { 1351 const bool have_vec 1352 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1353 1354 switch (op) { 1355 case INDEX_op_discard: 1356 case INDEX_op_set_label: 1357 case INDEX_op_call: 1358 case INDEX_op_br: 1359 case INDEX_op_mb: 1360 case INDEX_op_insn_start: 1361 case INDEX_op_exit_tb: 1362 case INDEX_op_goto_tb: 1363 case INDEX_op_goto_ptr: 1364 case INDEX_op_qemu_ld_i32: 1365 case INDEX_op_qemu_st_i32: 1366 case INDEX_op_qemu_ld_i64: 1367 case INDEX_op_qemu_st_i64: 1368 return true; 1369 1370 case INDEX_op_qemu_st8_i32: 1371 return TCG_TARGET_HAS_qemu_st8_i32; 1372 1373 case INDEX_op_mov_i32: 1374 case INDEX_op_setcond_i32: 1375 case INDEX_op_brcond_i32: 1376 case INDEX_op_ld8u_i32: 1377 case INDEX_op_ld8s_i32: 1378 case INDEX_op_ld16u_i32: 1379 case INDEX_op_ld16s_i32: 1380 case INDEX_op_ld_i32: 1381 case INDEX_op_st8_i32: 1382 case INDEX_op_st16_i32: 1383 case INDEX_op_st_i32: 1384 case INDEX_op_add_i32: 1385 case INDEX_op_sub_i32: 1386 case INDEX_op_mul_i32: 1387 case INDEX_op_and_i32: 1388 case INDEX_op_or_i32: 1389 case INDEX_op_xor_i32: 1390 case INDEX_op_shl_i32: 1391 case INDEX_op_shr_i32: 1392 case INDEX_op_sar_i32: 1393 return true; 1394 1395 case INDEX_op_movcond_i32: 1396 return TCG_TARGET_HAS_movcond_i32; 1397 case INDEX_op_div_i32: 1398 case INDEX_op_divu_i32: 1399 return TCG_TARGET_HAS_div_i32; 1400 case INDEX_op_rem_i32: 1401 case INDEX_op_remu_i32: 1402 return TCG_TARGET_HAS_rem_i32; 1403 case INDEX_op_div2_i32: 1404 case INDEX_op_divu2_i32: 1405 return TCG_TARGET_HAS_div2_i32; 1406 case INDEX_op_rotl_i32: 1407 case INDEX_op_rotr_i32: 1408 return TCG_TARGET_HAS_rot_i32; 1409 case INDEX_op_deposit_i32: 1410 return TCG_TARGET_HAS_deposit_i32; 1411 case INDEX_op_extract_i32: 1412 return TCG_TARGET_HAS_extract_i32; 1413 case INDEX_op_sextract_i32: 1414 return TCG_TARGET_HAS_sextract_i32; 1415 case INDEX_op_extract2_i32: 1416 return TCG_TARGET_HAS_extract2_i32; 1417 case INDEX_op_add2_i32: 1418 return TCG_TARGET_HAS_add2_i32; 1419 case INDEX_op_sub2_i32: 1420 return TCG_TARGET_HAS_sub2_i32; 1421 case INDEX_op_mulu2_i32: 1422 return TCG_TARGET_HAS_mulu2_i32; 1423 case INDEX_op_muls2_i32: 1424 return TCG_TARGET_HAS_muls2_i32; 1425 case INDEX_op_muluh_i32: 1426 return TCG_TARGET_HAS_muluh_i32; 1427 case INDEX_op_mulsh_i32: 1428 return TCG_TARGET_HAS_mulsh_i32; 1429 case INDEX_op_ext8s_i32: 1430 return TCG_TARGET_HAS_ext8s_i32; 1431 case INDEX_op_ext16s_i32: 1432 return TCG_TARGET_HAS_ext16s_i32; 1433 case INDEX_op_ext8u_i32: 1434 return TCG_TARGET_HAS_ext8u_i32; 1435 case INDEX_op_ext16u_i32: 1436 return TCG_TARGET_HAS_ext16u_i32; 1437 case INDEX_op_bswap16_i32: 1438 return TCG_TARGET_HAS_bswap16_i32; 1439 case INDEX_op_bswap32_i32: 1440 return TCG_TARGET_HAS_bswap32_i32; 1441 case INDEX_op_not_i32: 1442 return TCG_TARGET_HAS_not_i32; 1443 case INDEX_op_neg_i32: 1444 return TCG_TARGET_HAS_neg_i32; 1445 case INDEX_op_andc_i32: 1446 return TCG_TARGET_HAS_andc_i32; 1447 case INDEX_op_orc_i32: 1448 return TCG_TARGET_HAS_orc_i32; 1449 case INDEX_op_eqv_i32: 1450 return TCG_TARGET_HAS_eqv_i32; 1451 case INDEX_op_nand_i32: 1452 return TCG_TARGET_HAS_nand_i32; 1453 case INDEX_op_nor_i32: 1454 return TCG_TARGET_HAS_nor_i32; 1455 case INDEX_op_clz_i32: 1456 return TCG_TARGET_HAS_clz_i32; 1457 case INDEX_op_ctz_i32: 1458 return TCG_TARGET_HAS_ctz_i32; 1459 case INDEX_op_ctpop_i32: 1460 return TCG_TARGET_HAS_ctpop_i32; 1461 1462 case INDEX_op_brcond2_i32: 1463 case INDEX_op_setcond2_i32: 1464 return TCG_TARGET_REG_BITS == 32; 1465 1466 case INDEX_op_mov_i64: 1467 case INDEX_op_setcond_i64: 1468 case INDEX_op_brcond_i64: 1469 case INDEX_op_ld8u_i64: 1470 case INDEX_op_ld8s_i64: 1471 case INDEX_op_ld16u_i64: 1472 case INDEX_op_ld16s_i64: 1473 case INDEX_op_ld32u_i64: 1474 case INDEX_op_ld32s_i64: 1475 case INDEX_op_ld_i64: 1476 case INDEX_op_st8_i64: 1477 case INDEX_op_st16_i64: 1478 case INDEX_op_st32_i64: 1479 case INDEX_op_st_i64: 1480 case INDEX_op_add_i64: 1481 case INDEX_op_sub_i64: 1482 case INDEX_op_mul_i64: 1483 case INDEX_op_and_i64: 1484 case INDEX_op_or_i64: 1485 case INDEX_op_xor_i64: 1486 case INDEX_op_shl_i64: 1487 case INDEX_op_shr_i64: 1488 case INDEX_op_sar_i64: 1489 case INDEX_op_ext_i32_i64: 1490 case INDEX_op_extu_i32_i64: 1491 return TCG_TARGET_REG_BITS == 64; 1492 1493 case INDEX_op_movcond_i64: 1494 return TCG_TARGET_HAS_movcond_i64; 1495 case INDEX_op_div_i64: 1496 case INDEX_op_divu_i64: 1497 return TCG_TARGET_HAS_div_i64; 1498 case INDEX_op_rem_i64: 1499 case INDEX_op_remu_i64: 1500 return TCG_TARGET_HAS_rem_i64; 1501 case INDEX_op_div2_i64: 1502 case INDEX_op_divu2_i64: 1503 return TCG_TARGET_HAS_div2_i64; 1504 case INDEX_op_rotl_i64: 1505 case INDEX_op_rotr_i64: 1506 return TCG_TARGET_HAS_rot_i64; 1507 case INDEX_op_deposit_i64: 1508 return TCG_TARGET_HAS_deposit_i64; 1509 case INDEX_op_extract_i64: 1510 return TCG_TARGET_HAS_extract_i64; 1511 case INDEX_op_sextract_i64: 1512 return TCG_TARGET_HAS_sextract_i64; 1513 case INDEX_op_extract2_i64: 1514 return TCG_TARGET_HAS_extract2_i64; 1515 case INDEX_op_extrl_i64_i32: 1516 return TCG_TARGET_HAS_extrl_i64_i32; 1517 case INDEX_op_extrh_i64_i32: 1518 return TCG_TARGET_HAS_extrh_i64_i32; 1519 case INDEX_op_ext8s_i64: 1520 return TCG_TARGET_HAS_ext8s_i64; 1521 case INDEX_op_ext16s_i64: 1522 return TCG_TARGET_HAS_ext16s_i64; 1523 case INDEX_op_ext32s_i64: 1524 return TCG_TARGET_HAS_ext32s_i64; 1525 case INDEX_op_ext8u_i64: 1526 return TCG_TARGET_HAS_ext8u_i64; 1527 case INDEX_op_ext16u_i64: 1528 return TCG_TARGET_HAS_ext16u_i64; 1529 case INDEX_op_ext32u_i64: 1530 return TCG_TARGET_HAS_ext32u_i64; 1531 case INDEX_op_bswap16_i64: 1532 return TCG_TARGET_HAS_bswap16_i64; 1533 case INDEX_op_bswap32_i64: 1534 return TCG_TARGET_HAS_bswap32_i64; 1535 case INDEX_op_bswap64_i64: 1536 return TCG_TARGET_HAS_bswap64_i64; 1537 case INDEX_op_not_i64: 1538 return TCG_TARGET_HAS_not_i64; 1539 case INDEX_op_neg_i64: 1540 return TCG_TARGET_HAS_neg_i64; 1541 case INDEX_op_andc_i64: 1542 return TCG_TARGET_HAS_andc_i64; 1543 case INDEX_op_orc_i64: 1544 return TCG_TARGET_HAS_orc_i64; 1545 case INDEX_op_eqv_i64: 1546 return TCG_TARGET_HAS_eqv_i64; 1547 case INDEX_op_nand_i64: 1548 return TCG_TARGET_HAS_nand_i64; 1549 case INDEX_op_nor_i64: 1550 return TCG_TARGET_HAS_nor_i64; 1551 case INDEX_op_clz_i64: 1552 return TCG_TARGET_HAS_clz_i64; 1553 case INDEX_op_ctz_i64: 1554 return TCG_TARGET_HAS_ctz_i64; 1555 case INDEX_op_ctpop_i64: 1556 return TCG_TARGET_HAS_ctpop_i64; 1557 case INDEX_op_add2_i64: 1558 return TCG_TARGET_HAS_add2_i64; 1559 case INDEX_op_sub2_i64: 1560 return TCG_TARGET_HAS_sub2_i64; 1561 case INDEX_op_mulu2_i64: 1562 return TCG_TARGET_HAS_mulu2_i64; 1563 case INDEX_op_muls2_i64: 1564 return TCG_TARGET_HAS_muls2_i64; 1565 case INDEX_op_muluh_i64: 1566 return TCG_TARGET_HAS_muluh_i64; 1567 case INDEX_op_mulsh_i64: 1568 return TCG_TARGET_HAS_mulsh_i64; 1569 1570 case INDEX_op_mov_vec: 1571 case INDEX_op_dup_vec: 1572 case INDEX_op_dupm_vec: 1573 case INDEX_op_ld_vec: 1574 case INDEX_op_st_vec: 1575 case INDEX_op_add_vec: 1576 case INDEX_op_sub_vec: 1577 case INDEX_op_and_vec: 1578 case INDEX_op_or_vec: 1579 case INDEX_op_xor_vec: 1580 case INDEX_op_cmp_vec: 1581 return have_vec; 1582 case INDEX_op_dup2_vec: 1583 return have_vec && TCG_TARGET_REG_BITS == 32; 1584 case INDEX_op_not_vec: 1585 return have_vec && TCG_TARGET_HAS_not_vec; 1586 case INDEX_op_neg_vec: 1587 return have_vec && TCG_TARGET_HAS_neg_vec; 1588 case INDEX_op_abs_vec: 1589 return have_vec && TCG_TARGET_HAS_abs_vec; 1590 case INDEX_op_andc_vec: 1591 return have_vec && TCG_TARGET_HAS_andc_vec; 1592 case INDEX_op_orc_vec: 1593 return have_vec && TCG_TARGET_HAS_orc_vec; 1594 case INDEX_op_nand_vec: 1595 return have_vec && TCG_TARGET_HAS_nand_vec; 1596 case INDEX_op_nor_vec: 1597 return have_vec && TCG_TARGET_HAS_nor_vec; 1598 case INDEX_op_eqv_vec: 1599 return have_vec && TCG_TARGET_HAS_eqv_vec; 1600 case INDEX_op_mul_vec: 1601 return have_vec && TCG_TARGET_HAS_mul_vec; 1602 case INDEX_op_shli_vec: 1603 case INDEX_op_shri_vec: 1604 case INDEX_op_sari_vec: 1605 return have_vec && TCG_TARGET_HAS_shi_vec; 1606 case INDEX_op_shls_vec: 1607 case INDEX_op_shrs_vec: 1608 case INDEX_op_sars_vec: 1609 return have_vec && TCG_TARGET_HAS_shs_vec; 1610 case INDEX_op_shlv_vec: 1611 case INDEX_op_shrv_vec: 1612 case INDEX_op_sarv_vec: 1613 return have_vec && TCG_TARGET_HAS_shv_vec; 1614 case INDEX_op_rotli_vec: 1615 return have_vec && TCG_TARGET_HAS_roti_vec; 1616 case INDEX_op_rotls_vec: 1617 return have_vec && TCG_TARGET_HAS_rots_vec; 1618 case INDEX_op_rotlv_vec: 1619 case INDEX_op_rotrv_vec: 1620 return have_vec && TCG_TARGET_HAS_rotv_vec; 1621 case INDEX_op_ssadd_vec: 1622 case INDEX_op_usadd_vec: 1623 case INDEX_op_sssub_vec: 1624 case INDEX_op_ussub_vec: 1625 return have_vec && TCG_TARGET_HAS_sat_vec; 1626 case INDEX_op_smin_vec: 1627 case INDEX_op_umin_vec: 1628 case INDEX_op_smax_vec: 1629 case INDEX_op_umax_vec: 1630 return have_vec && TCG_TARGET_HAS_minmax_vec; 1631 case INDEX_op_bitsel_vec: 1632 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1633 case INDEX_op_cmpsel_vec: 1634 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1635 1636 default: 1637 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1638 return true; 1639 } 1640 } 1641 1642 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); 1643 1644 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1645 { 1646 const TCGHelperInfo *info; 1647 TCGv_i64 extend_free[MAX_CALL_IARGS]; 1648 int n_extend = 0; 1649 TCGOp *op; 1650 int i, n, pi = 0, total_args; 1651 1652 info = g_hash_table_lookup(helper_table, (gpointer)func); 1653 total_args = info->nr_out + info->nr_in + 2; 1654 op = tcg_op_alloc(INDEX_op_call, total_args); 1655 1656 #ifdef CONFIG_PLUGIN 1657 /* detect non-plugin helpers */ 1658 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) { 1659 tcg_ctx->plugin_insn->calls_helpers = true; 1660 } 1661 #endif 1662 1663 TCGOP_CALLO(op) = n = info->nr_out; 1664 switch (n) { 1665 case 0: 1666 tcg_debug_assert(ret == NULL); 1667 break; 1668 case 1: 1669 tcg_debug_assert(ret != NULL); 1670 op->args[pi++] = temp_arg(ret); 1671 break; 1672 case 2: 1673 tcg_debug_assert(ret != NULL); 1674 tcg_debug_assert(ret->base_type == ret->type + 1); 1675 tcg_debug_assert(ret->temp_subindex == 0); 1676 op->args[pi++] = temp_arg(ret); 1677 op->args[pi++] = temp_arg(ret + 1); 1678 break; 1679 default: 1680 g_assert_not_reached(); 1681 } 1682 1683 TCGOP_CALLI(op) = n = info->nr_in; 1684 for (i = 0; i < n; i++) { 1685 const TCGCallArgumentLoc *loc = &info->in[i]; 1686 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; 1687 1688 switch (loc->kind) { 1689 case TCG_CALL_ARG_NORMAL: 1690 op->args[pi++] = temp_arg(ts); 1691 break; 1692 1693 case TCG_CALL_ARG_EXTEND_U: 1694 case TCG_CALL_ARG_EXTEND_S: 1695 { 1696 TCGv_i64 temp = tcg_temp_new_i64(); 1697 TCGv_i32 orig = temp_tcgv_i32(ts); 1698 1699 if (loc->kind == TCG_CALL_ARG_EXTEND_S) { 1700 tcg_gen_ext_i32_i64(temp, orig); 1701 } else { 1702 tcg_gen_extu_i32_i64(temp, orig); 1703 } 1704 op->args[pi++] = tcgv_i64_arg(temp); 1705 extend_free[n_extend++] = temp; 1706 } 1707 break; 1708 1709 default: 1710 g_assert_not_reached(); 1711 } 1712 } 1713 op->args[pi++] = (uintptr_t)func; 1714 op->args[pi++] = (uintptr_t)info; 1715 tcg_debug_assert(pi == total_args); 1716 1717 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 1718 1719 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); 1720 for (i = 0; i < n_extend; ++i) { 1721 tcg_temp_free_i64(extend_free[i]); 1722 } 1723 } 1724 1725 static void tcg_reg_alloc_start(TCGContext *s) 1726 { 1727 int i, n; 1728 1729 for (i = 0, n = s->nb_temps; i < n; i++) { 1730 TCGTemp *ts = &s->temps[i]; 1731 TCGTempVal val = TEMP_VAL_MEM; 1732 1733 switch (ts->kind) { 1734 case TEMP_CONST: 1735 val = TEMP_VAL_CONST; 1736 break; 1737 case TEMP_FIXED: 1738 val = TEMP_VAL_REG; 1739 break; 1740 case TEMP_GLOBAL: 1741 break; 1742 case TEMP_NORMAL: 1743 case TEMP_EBB: 1744 val = TEMP_VAL_DEAD; 1745 /* fall through */ 1746 case TEMP_LOCAL: 1747 ts->mem_allocated = 0; 1748 break; 1749 default: 1750 g_assert_not_reached(); 1751 } 1752 ts->val_type = val; 1753 } 1754 1755 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1756 } 1757 1758 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1759 TCGTemp *ts) 1760 { 1761 int idx = temp_idx(ts); 1762 1763 switch (ts->kind) { 1764 case TEMP_FIXED: 1765 case TEMP_GLOBAL: 1766 pstrcpy(buf, buf_size, ts->name); 1767 break; 1768 case TEMP_LOCAL: 1769 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1770 break; 1771 case TEMP_EBB: 1772 snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals); 1773 break; 1774 case TEMP_NORMAL: 1775 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1776 break; 1777 case TEMP_CONST: 1778 switch (ts->type) { 1779 case TCG_TYPE_I32: 1780 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 1781 break; 1782 #if TCG_TARGET_REG_BITS > 32 1783 case TCG_TYPE_I64: 1784 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 1785 break; 1786 #endif 1787 case TCG_TYPE_V64: 1788 case TCG_TYPE_V128: 1789 case TCG_TYPE_V256: 1790 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 1791 64 << (ts->type - TCG_TYPE_V64), ts->val); 1792 break; 1793 default: 1794 g_assert_not_reached(); 1795 } 1796 break; 1797 } 1798 return buf; 1799 } 1800 1801 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1802 int buf_size, TCGArg arg) 1803 { 1804 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1805 } 1806 1807 static const char * const cond_name[] = 1808 { 1809 [TCG_COND_NEVER] = "never", 1810 [TCG_COND_ALWAYS] = "always", 1811 [TCG_COND_EQ] = "eq", 1812 [TCG_COND_NE] = "ne", 1813 [TCG_COND_LT] = "lt", 1814 [TCG_COND_GE] = "ge", 1815 [TCG_COND_LE] = "le", 1816 [TCG_COND_GT] = "gt", 1817 [TCG_COND_LTU] = "ltu", 1818 [TCG_COND_GEU] = "geu", 1819 [TCG_COND_LEU] = "leu", 1820 [TCG_COND_GTU] = "gtu" 1821 }; 1822 1823 static const char * const ldst_name[] = 1824 { 1825 [MO_UB] = "ub", 1826 [MO_SB] = "sb", 1827 [MO_LEUW] = "leuw", 1828 [MO_LESW] = "lesw", 1829 [MO_LEUL] = "leul", 1830 [MO_LESL] = "lesl", 1831 [MO_LEUQ] = "leq", 1832 [MO_BEUW] = "beuw", 1833 [MO_BESW] = "besw", 1834 [MO_BEUL] = "beul", 1835 [MO_BESL] = "besl", 1836 [MO_BEUQ] = "beq", 1837 }; 1838 1839 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1840 #ifdef TARGET_ALIGNED_ONLY 1841 [MO_UNALN >> MO_ASHIFT] = "un+", 1842 [MO_ALIGN >> MO_ASHIFT] = "", 1843 #else 1844 [MO_UNALN >> MO_ASHIFT] = "", 1845 [MO_ALIGN >> MO_ASHIFT] = "al+", 1846 #endif 1847 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1848 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1849 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1850 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1851 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1852 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1853 }; 1854 1855 static const char bswap_flag_name[][6] = { 1856 [TCG_BSWAP_IZ] = "iz", 1857 [TCG_BSWAP_OZ] = "oz", 1858 [TCG_BSWAP_OS] = "os", 1859 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 1860 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 1861 }; 1862 1863 static inline bool tcg_regset_single(TCGRegSet d) 1864 { 1865 return (d & (d - 1)) == 0; 1866 } 1867 1868 static inline TCGReg tcg_regset_first(TCGRegSet d) 1869 { 1870 if (TCG_TARGET_NB_REGS <= 32) { 1871 return ctz32(d); 1872 } else { 1873 return ctz64(d); 1874 } 1875 } 1876 1877 /* Return only the number of characters output -- no error return. */ 1878 #define ne_fprintf(...) \ 1879 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 1880 1881 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 1882 { 1883 char buf[128]; 1884 TCGOp *op; 1885 1886 QTAILQ_FOREACH(op, &s->ops, link) { 1887 int i, k, nb_oargs, nb_iargs, nb_cargs; 1888 const TCGOpDef *def; 1889 TCGOpcode c; 1890 int col = 0; 1891 1892 c = op->opc; 1893 def = &tcg_op_defs[c]; 1894 1895 if (c == INDEX_op_insn_start) { 1896 nb_oargs = 0; 1897 col += ne_fprintf(f, "\n ----"); 1898 1899 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1900 target_ulong a; 1901 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1902 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 1903 #else 1904 a = op->args[i]; 1905 #endif 1906 col += ne_fprintf(f, " " TARGET_FMT_lx, a); 1907 } 1908 } else if (c == INDEX_op_call) { 1909 const TCGHelperInfo *info = tcg_call_info(op); 1910 void *func = tcg_call_func(op); 1911 1912 /* variable number of arguments */ 1913 nb_oargs = TCGOP_CALLO(op); 1914 nb_iargs = TCGOP_CALLI(op); 1915 nb_cargs = def->nb_cargs; 1916 1917 col += ne_fprintf(f, " %s ", def->name); 1918 1919 /* 1920 * Print the function name from TCGHelperInfo, if available. 1921 * Note that plugins have a template function for the info, 1922 * but the actual function pointer comes from the plugin. 1923 */ 1924 if (func == info->func) { 1925 col += ne_fprintf(f, "%s", info->name); 1926 } else { 1927 col += ne_fprintf(f, "plugin(%p)", func); 1928 } 1929 1930 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 1931 for (i = 0; i < nb_oargs; i++) { 1932 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 1933 op->args[i])); 1934 } 1935 for (i = 0; i < nb_iargs; i++) { 1936 TCGArg arg = op->args[nb_oargs + i]; 1937 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 1938 col += ne_fprintf(f, ",%s", t); 1939 } 1940 } else { 1941 col += ne_fprintf(f, " %s ", def->name); 1942 1943 nb_oargs = def->nb_oargs; 1944 nb_iargs = def->nb_iargs; 1945 nb_cargs = def->nb_cargs; 1946 1947 if (def->flags & TCG_OPF_VECTOR) { 1948 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 1949 8 << TCGOP_VECE(op)); 1950 } 1951 1952 k = 0; 1953 for (i = 0; i < nb_oargs; i++) { 1954 const char *sep = k ? "," : ""; 1955 col += ne_fprintf(f, "%s%s", sep, 1956 tcg_get_arg_str(s, buf, sizeof(buf), 1957 op->args[k++])); 1958 } 1959 for (i = 0; i < nb_iargs; i++) { 1960 const char *sep = k ? "," : ""; 1961 col += ne_fprintf(f, "%s%s", sep, 1962 tcg_get_arg_str(s, buf, sizeof(buf), 1963 op->args[k++])); 1964 } 1965 switch (c) { 1966 case INDEX_op_brcond_i32: 1967 case INDEX_op_setcond_i32: 1968 case INDEX_op_movcond_i32: 1969 case INDEX_op_brcond2_i32: 1970 case INDEX_op_setcond2_i32: 1971 case INDEX_op_brcond_i64: 1972 case INDEX_op_setcond_i64: 1973 case INDEX_op_movcond_i64: 1974 case INDEX_op_cmp_vec: 1975 case INDEX_op_cmpsel_vec: 1976 if (op->args[k] < ARRAY_SIZE(cond_name) 1977 && cond_name[op->args[k]]) { 1978 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 1979 } else { 1980 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 1981 } 1982 i = 1; 1983 break; 1984 case INDEX_op_qemu_ld_i32: 1985 case INDEX_op_qemu_st_i32: 1986 case INDEX_op_qemu_st8_i32: 1987 case INDEX_op_qemu_ld_i64: 1988 case INDEX_op_qemu_st_i64: 1989 { 1990 MemOpIdx oi = op->args[k++]; 1991 MemOp op = get_memop(oi); 1992 unsigned ix = get_mmuidx(oi); 1993 1994 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1995 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 1996 } else { 1997 const char *s_al, *s_op; 1998 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1999 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 2000 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix); 2001 } 2002 i = 1; 2003 } 2004 break; 2005 case INDEX_op_bswap16_i32: 2006 case INDEX_op_bswap16_i64: 2007 case INDEX_op_bswap32_i32: 2008 case INDEX_op_bswap32_i64: 2009 case INDEX_op_bswap64_i64: 2010 { 2011 TCGArg flags = op->args[k]; 2012 const char *name = NULL; 2013 2014 if (flags < ARRAY_SIZE(bswap_flag_name)) { 2015 name = bswap_flag_name[flags]; 2016 } 2017 if (name) { 2018 col += ne_fprintf(f, ",%s", name); 2019 } else { 2020 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 2021 } 2022 i = k = 1; 2023 } 2024 break; 2025 default: 2026 i = 0; 2027 break; 2028 } 2029 switch (c) { 2030 case INDEX_op_set_label: 2031 case INDEX_op_br: 2032 case INDEX_op_brcond_i32: 2033 case INDEX_op_brcond_i64: 2034 case INDEX_op_brcond2_i32: 2035 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 2036 arg_label(op->args[k])->id); 2037 i++, k++; 2038 break; 2039 default: 2040 break; 2041 } 2042 for (; i < nb_cargs; i++, k++) { 2043 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 2044 op->args[k]); 2045 } 2046 } 2047 2048 if (have_prefs || op->life) { 2049 for (; col < 40; ++col) { 2050 putc(' ', f); 2051 } 2052 } 2053 2054 if (op->life) { 2055 unsigned life = op->life; 2056 2057 if (life & (SYNC_ARG * 3)) { 2058 ne_fprintf(f, " sync:"); 2059 for (i = 0; i < 2; ++i) { 2060 if (life & (SYNC_ARG << i)) { 2061 ne_fprintf(f, " %d", i); 2062 } 2063 } 2064 } 2065 life /= DEAD_ARG; 2066 if (life) { 2067 ne_fprintf(f, " dead:"); 2068 for (i = 0; life; ++i, life >>= 1) { 2069 if (life & 1) { 2070 ne_fprintf(f, " %d", i); 2071 } 2072 } 2073 } 2074 } 2075 2076 if (have_prefs) { 2077 for (i = 0; i < nb_oargs; ++i) { 2078 TCGRegSet set = output_pref(op, i); 2079 2080 if (i == 0) { 2081 ne_fprintf(f, " pref="); 2082 } else { 2083 ne_fprintf(f, ","); 2084 } 2085 if (set == 0) { 2086 ne_fprintf(f, "none"); 2087 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 2088 ne_fprintf(f, "all"); 2089 #ifdef CONFIG_DEBUG_TCG 2090 } else if (tcg_regset_single(set)) { 2091 TCGReg reg = tcg_regset_first(set); 2092 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 2093 #endif 2094 } else if (TCG_TARGET_NB_REGS <= 32) { 2095 ne_fprintf(f, "0x%x", (uint32_t)set); 2096 } else { 2097 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 2098 } 2099 } 2100 } 2101 2102 putc('\n', f); 2103 } 2104 } 2105 2106 /* we give more priority to constraints with less registers */ 2107 static int get_constraint_priority(const TCGOpDef *def, int k) 2108 { 2109 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 2110 int n = ctpop64(arg_ct->regs); 2111 2112 /* 2113 * Sort constraints of a single register first, which includes output 2114 * aliases (which must exactly match the input already allocated). 2115 */ 2116 if (n == 1 || arg_ct->oalias) { 2117 return INT_MAX; 2118 } 2119 2120 /* 2121 * Sort register pairs next, first then second immediately after. 2122 * Arbitrarily sort multiple pairs by the index of the first reg; 2123 * there shouldn't be many pairs. 2124 */ 2125 switch (arg_ct->pair) { 2126 case 1: 2127 case 3: 2128 return (k + 1) * 2; 2129 case 2: 2130 return (arg_ct->pair_index + 1) * 2 - 1; 2131 } 2132 2133 /* Finally, sort by decreasing register count. */ 2134 assert(n > 1); 2135 return -n; 2136 } 2137 2138 /* sort from highest priority to lowest */ 2139 static void sort_constraints(TCGOpDef *def, int start, int n) 2140 { 2141 int i, j; 2142 TCGArgConstraint *a = def->args_ct; 2143 2144 for (i = 0; i < n; i++) { 2145 a[start + i].sort_index = start + i; 2146 } 2147 if (n <= 1) { 2148 return; 2149 } 2150 for (i = 0; i < n - 1; i++) { 2151 for (j = i + 1; j < n; j++) { 2152 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2153 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2154 if (p1 < p2) { 2155 int tmp = a[start + i].sort_index; 2156 a[start + i].sort_index = a[start + j].sort_index; 2157 a[start + j].sort_index = tmp; 2158 } 2159 } 2160 } 2161 } 2162 2163 static void process_op_defs(TCGContext *s) 2164 { 2165 TCGOpcode op; 2166 2167 for (op = 0; op < NB_OPS; op++) { 2168 TCGOpDef *def = &tcg_op_defs[op]; 2169 const TCGTargetOpDef *tdefs; 2170 bool saw_alias_pair = false; 2171 int i, o, i2, o2, nb_args; 2172 2173 if (def->flags & TCG_OPF_NOT_PRESENT) { 2174 continue; 2175 } 2176 2177 nb_args = def->nb_iargs + def->nb_oargs; 2178 if (nb_args == 0) { 2179 continue; 2180 } 2181 2182 /* 2183 * Macro magic should make it impossible, but double-check that 2184 * the array index is in range. Since the signness of an enum 2185 * is implementation defined, force the result to unsigned. 2186 */ 2187 unsigned con_set = tcg_target_op_def(op); 2188 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2189 tdefs = &constraint_sets[con_set]; 2190 2191 for (i = 0; i < nb_args; i++) { 2192 const char *ct_str = tdefs->args_ct_str[i]; 2193 bool input_p = i >= def->nb_oargs; 2194 2195 /* Incomplete TCGTargetOpDef entry. */ 2196 tcg_debug_assert(ct_str != NULL); 2197 2198 switch (*ct_str) { 2199 case '0' ... '9': 2200 o = *ct_str - '0'; 2201 tcg_debug_assert(input_p); 2202 tcg_debug_assert(o < def->nb_oargs); 2203 tcg_debug_assert(def->args_ct[o].regs != 0); 2204 tcg_debug_assert(!def->args_ct[o].oalias); 2205 def->args_ct[i] = def->args_ct[o]; 2206 /* The output sets oalias. */ 2207 def->args_ct[o].oalias = 1; 2208 def->args_ct[o].alias_index = i; 2209 /* The input sets ialias. */ 2210 def->args_ct[i].ialias = 1; 2211 def->args_ct[i].alias_index = o; 2212 if (def->args_ct[i].pair) { 2213 saw_alias_pair = true; 2214 } 2215 tcg_debug_assert(ct_str[1] == '\0'); 2216 continue; 2217 2218 case '&': 2219 tcg_debug_assert(!input_p); 2220 def->args_ct[i].newreg = true; 2221 ct_str++; 2222 break; 2223 2224 case 'p': /* plus */ 2225 /* Allocate to the register after the previous. */ 2226 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2227 o = i - 1; 2228 tcg_debug_assert(!def->args_ct[o].pair); 2229 tcg_debug_assert(!def->args_ct[o].ct); 2230 def->args_ct[i] = (TCGArgConstraint){ 2231 .pair = 2, 2232 .pair_index = o, 2233 .regs = def->args_ct[o].regs << 1, 2234 }; 2235 def->args_ct[o].pair = 1; 2236 def->args_ct[o].pair_index = i; 2237 tcg_debug_assert(ct_str[1] == '\0'); 2238 continue; 2239 2240 case 'm': /* minus */ 2241 /* Allocate to the register before the previous. */ 2242 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); 2243 o = i - 1; 2244 tcg_debug_assert(!def->args_ct[o].pair); 2245 tcg_debug_assert(!def->args_ct[o].ct); 2246 def->args_ct[i] = (TCGArgConstraint){ 2247 .pair = 1, 2248 .pair_index = o, 2249 .regs = def->args_ct[o].regs >> 1, 2250 }; 2251 def->args_ct[o].pair = 2; 2252 def->args_ct[o].pair_index = i; 2253 tcg_debug_assert(ct_str[1] == '\0'); 2254 continue; 2255 } 2256 2257 do { 2258 switch (*ct_str) { 2259 case 'i': 2260 def->args_ct[i].ct |= TCG_CT_CONST; 2261 break; 2262 2263 /* Include all of the target-specific constraints. */ 2264 2265 #undef CONST 2266 #define CONST(CASE, MASK) \ 2267 case CASE: def->args_ct[i].ct |= MASK; break; 2268 #define REGS(CASE, MASK) \ 2269 case CASE: def->args_ct[i].regs |= MASK; break; 2270 2271 #include "tcg-target-con-str.h" 2272 2273 #undef REGS 2274 #undef CONST 2275 default: 2276 case '0' ... '9': 2277 case '&': 2278 case 'p': 2279 case 'm': 2280 /* Typo in TCGTargetOpDef constraint. */ 2281 g_assert_not_reached(); 2282 } 2283 } while (*++ct_str != '\0'); 2284 } 2285 2286 /* TCGTargetOpDef entry with too much information? */ 2287 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2288 2289 /* 2290 * Fix up output pairs that are aliased with inputs. 2291 * When we created the alias, we copied pair from the output. 2292 * There are three cases: 2293 * (1a) Pairs of inputs alias pairs of outputs. 2294 * (1b) One input aliases the first of a pair of outputs. 2295 * (2) One input aliases the second of a pair of outputs. 2296 * 2297 * Case 1a is handled by making sure that the pair_index'es are 2298 * properly updated so that they appear the same as a pair of inputs. 2299 * 2300 * Case 1b is handled by setting the pair_index of the input to 2301 * itself, simply so it doesn't point to an unrelated argument. 2302 * Since we don't encounter the "second" during the input allocation 2303 * phase, nothing happens with the second half of the input pair. 2304 * 2305 * Case 2 is handled by setting the second input to pair=3, the 2306 * first output to pair=3, and the pair_index'es to match. 2307 */ 2308 if (saw_alias_pair) { 2309 for (i = def->nb_oargs; i < nb_args; i++) { 2310 /* 2311 * Since [0-9pm] must be alone in the constraint string, 2312 * the only way they can both be set is if the pair comes 2313 * from the output alias. 2314 */ 2315 if (!def->args_ct[i].ialias) { 2316 continue; 2317 } 2318 switch (def->args_ct[i].pair) { 2319 case 0: 2320 break; 2321 case 1: 2322 o = def->args_ct[i].alias_index; 2323 o2 = def->args_ct[o].pair_index; 2324 tcg_debug_assert(def->args_ct[o].pair == 1); 2325 tcg_debug_assert(def->args_ct[o2].pair == 2); 2326 if (def->args_ct[o2].oalias) { 2327 /* Case 1a */ 2328 i2 = def->args_ct[o2].alias_index; 2329 tcg_debug_assert(def->args_ct[i2].pair == 2); 2330 def->args_ct[i2].pair_index = i; 2331 def->args_ct[i].pair_index = i2; 2332 } else { 2333 /* Case 1b */ 2334 def->args_ct[i].pair_index = i; 2335 } 2336 break; 2337 case 2: 2338 o = def->args_ct[i].alias_index; 2339 o2 = def->args_ct[o].pair_index; 2340 tcg_debug_assert(def->args_ct[o].pair == 2); 2341 tcg_debug_assert(def->args_ct[o2].pair == 1); 2342 if (def->args_ct[o2].oalias) { 2343 /* Case 1a */ 2344 i2 = def->args_ct[o2].alias_index; 2345 tcg_debug_assert(def->args_ct[i2].pair == 1); 2346 def->args_ct[i2].pair_index = i; 2347 def->args_ct[i].pair_index = i2; 2348 } else { 2349 /* Case 2 */ 2350 def->args_ct[i].pair = 3; 2351 def->args_ct[o2].pair = 3; 2352 def->args_ct[i].pair_index = o2; 2353 def->args_ct[o2].pair_index = i; 2354 } 2355 break; 2356 default: 2357 g_assert_not_reached(); 2358 } 2359 } 2360 } 2361 2362 /* sort the constraints (XXX: this is just an heuristic) */ 2363 sort_constraints(def, 0, def->nb_oargs); 2364 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2365 } 2366 } 2367 2368 void tcg_op_remove(TCGContext *s, TCGOp *op) 2369 { 2370 TCGLabel *label; 2371 2372 switch (op->opc) { 2373 case INDEX_op_br: 2374 label = arg_label(op->args[0]); 2375 label->refs--; 2376 break; 2377 case INDEX_op_brcond_i32: 2378 case INDEX_op_brcond_i64: 2379 label = arg_label(op->args[3]); 2380 label->refs--; 2381 break; 2382 case INDEX_op_brcond2_i32: 2383 label = arg_label(op->args[5]); 2384 label->refs--; 2385 break; 2386 default: 2387 break; 2388 } 2389 2390 QTAILQ_REMOVE(&s->ops, op, link); 2391 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2392 s->nb_ops--; 2393 2394 #ifdef CONFIG_PROFILER 2395 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2396 #endif 2397 } 2398 2399 void tcg_remove_ops_after(TCGOp *op) 2400 { 2401 TCGContext *s = tcg_ctx; 2402 2403 while (true) { 2404 TCGOp *last = tcg_last_op(); 2405 if (last == op) { 2406 return; 2407 } 2408 tcg_op_remove(s, last); 2409 } 2410 } 2411 2412 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) 2413 { 2414 TCGContext *s = tcg_ctx; 2415 TCGOp *op = NULL; 2416 2417 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { 2418 QTAILQ_FOREACH(op, &s->free_ops, link) { 2419 if (nargs <= op->nargs) { 2420 QTAILQ_REMOVE(&s->free_ops, op, link); 2421 nargs = op->nargs; 2422 goto found; 2423 } 2424 } 2425 } 2426 2427 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ 2428 nargs = MAX(4, nargs); 2429 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); 2430 2431 found: 2432 memset(op, 0, offsetof(TCGOp, link)); 2433 op->opc = opc; 2434 op->nargs = nargs; 2435 2436 /* Check for bitfield overflow. */ 2437 tcg_debug_assert(op->nargs == nargs); 2438 2439 s->nb_ops++; 2440 return op; 2441 } 2442 2443 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) 2444 { 2445 TCGOp *op = tcg_op_alloc(opc, nargs); 2446 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2447 return op; 2448 } 2449 2450 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, 2451 TCGOpcode opc, unsigned nargs) 2452 { 2453 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2454 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2455 return new_op; 2456 } 2457 2458 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, 2459 TCGOpcode opc, unsigned nargs) 2460 { 2461 TCGOp *new_op = tcg_op_alloc(opc, nargs); 2462 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2463 return new_op; 2464 } 2465 2466 /* Reachable analysis : remove unreachable code. */ 2467 static void reachable_code_pass(TCGContext *s) 2468 { 2469 TCGOp *op, *op_next; 2470 bool dead = false; 2471 2472 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2473 bool remove = dead; 2474 TCGLabel *label; 2475 2476 switch (op->opc) { 2477 case INDEX_op_set_label: 2478 label = arg_label(op->args[0]); 2479 if (label->refs == 0) { 2480 /* 2481 * While there is an occasional backward branch, virtually 2482 * all branches generated by the translators are forward. 2483 * Which means that generally we will have already removed 2484 * all references to the label that will be, and there is 2485 * little to be gained by iterating. 2486 */ 2487 remove = true; 2488 } else { 2489 /* Once we see a label, insns become live again. */ 2490 dead = false; 2491 remove = false; 2492 2493 /* 2494 * Optimization can fold conditional branches to unconditional. 2495 * If we find a label with one reference which is preceded by 2496 * an unconditional branch to it, remove both. This needed to 2497 * wait until the dead code in between them was removed. 2498 */ 2499 if (label->refs == 1) { 2500 TCGOp *op_prev = QTAILQ_PREV(op, link); 2501 if (op_prev->opc == INDEX_op_br && 2502 label == arg_label(op_prev->args[0])) { 2503 tcg_op_remove(s, op_prev); 2504 remove = true; 2505 } 2506 } 2507 } 2508 break; 2509 2510 case INDEX_op_br: 2511 case INDEX_op_exit_tb: 2512 case INDEX_op_goto_ptr: 2513 /* Unconditional branches; everything following is dead. */ 2514 dead = true; 2515 break; 2516 2517 case INDEX_op_call: 2518 /* Notice noreturn helper calls, raising exceptions. */ 2519 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 2520 dead = true; 2521 } 2522 break; 2523 2524 case INDEX_op_insn_start: 2525 /* Never remove -- we need to keep these for unwind. */ 2526 remove = false; 2527 break; 2528 2529 default: 2530 break; 2531 } 2532 2533 if (remove) { 2534 tcg_op_remove(s, op); 2535 } 2536 } 2537 } 2538 2539 #define TS_DEAD 1 2540 #define TS_MEM 2 2541 2542 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2543 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2544 2545 /* For liveness_pass_1, the register preferences for a given temp. */ 2546 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2547 { 2548 return ts->state_ptr; 2549 } 2550 2551 /* For liveness_pass_1, reset the preferences for a given temp to the 2552 * maximal regset for its type. 2553 */ 2554 static inline void la_reset_pref(TCGTemp *ts) 2555 { 2556 *la_temp_pref(ts) 2557 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2558 } 2559 2560 /* liveness analysis: end of function: all temps are dead, and globals 2561 should be in memory. */ 2562 static void la_func_end(TCGContext *s, int ng, int nt) 2563 { 2564 int i; 2565 2566 for (i = 0; i < ng; ++i) { 2567 s->temps[i].state = TS_DEAD | TS_MEM; 2568 la_reset_pref(&s->temps[i]); 2569 } 2570 for (i = ng; i < nt; ++i) { 2571 s->temps[i].state = TS_DEAD; 2572 la_reset_pref(&s->temps[i]); 2573 } 2574 } 2575 2576 /* liveness analysis: end of basic block: all temps are dead, globals 2577 and local temps should be in memory. */ 2578 static void la_bb_end(TCGContext *s, int ng, int nt) 2579 { 2580 int i; 2581 2582 for (i = 0; i < nt; ++i) { 2583 TCGTemp *ts = &s->temps[i]; 2584 int state; 2585 2586 switch (ts->kind) { 2587 case TEMP_FIXED: 2588 case TEMP_GLOBAL: 2589 case TEMP_LOCAL: 2590 state = TS_DEAD | TS_MEM; 2591 break; 2592 case TEMP_NORMAL: 2593 case TEMP_EBB: 2594 case TEMP_CONST: 2595 state = TS_DEAD; 2596 break; 2597 default: 2598 g_assert_not_reached(); 2599 } 2600 ts->state = state; 2601 la_reset_pref(ts); 2602 } 2603 } 2604 2605 /* liveness analysis: sync globals back to memory. */ 2606 static void la_global_sync(TCGContext *s, int ng) 2607 { 2608 int i; 2609 2610 for (i = 0; i < ng; ++i) { 2611 int state = s->temps[i].state; 2612 s->temps[i].state = state | TS_MEM; 2613 if (state == TS_DEAD) { 2614 /* If the global was previously dead, reset prefs. */ 2615 la_reset_pref(&s->temps[i]); 2616 } 2617 } 2618 } 2619 2620 /* 2621 * liveness analysis: conditional branch: all temps are dead unless 2622 * explicitly live-across-conditional-branch, globals and local temps 2623 * should be synced. 2624 */ 2625 static void la_bb_sync(TCGContext *s, int ng, int nt) 2626 { 2627 la_global_sync(s, ng); 2628 2629 for (int i = ng; i < nt; ++i) { 2630 TCGTemp *ts = &s->temps[i]; 2631 int state; 2632 2633 switch (ts->kind) { 2634 case TEMP_LOCAL: 2635 state = ts->state; 2636 ts->state = state | TS_MEM; 2637 if (state != TS_DEAD) { 2638 continue; 2639 } 2640 break; 2641 case TEMP_NORMAL: 2642 s->temps[i].state = TS_DEAD; 2643 break; 2644 case TEMP_EBB: 2645 case TEMP_CONST: 2646 continue; 2647 default: 2648 g_assert_not_reached(); 2649 } 2650 la_reset_pref(&s->temps[i]); 2651 } 2652 } 2653 2654 /* liveness analysis: sync globals back to memory and kill. */ 2655 static void la_global_kill(TCGContext *s, int ng) 2656 { 2657 int i; 2658 2659 for (i = 0; i < ng; i++) { 2660 s->temps[i].state = TS_DEAD | TS_MEM; 2661 la_reset_pref(&s->temps[i]); 2662 } 2663 } 2664 2665 /* liveness analysis: note live globals crossing calls. */ 2666 static void la_cross_call(TCGContext *s, int nt) 2667 { 2668 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2669 int i; 2670 2671 for (i = 0; i < nt; i++) { 2672 TCGTemp *ts = &s->temps[i]; 2673 if (!(ts->state & TS_DEAD)) { 2674 TCGRegSet *pset = la_temp_pref(ts); 2675 TCGRegSet set = *pset; 2676 2677 set &= mask; 2678 /* If the combination is not possible, restart. */ 2679 if (set == 0) { 2680 set = tcg_target_available_regs[ts->type] & mask; 2681 } 2682 *pset = set; 2683 } 2684 } 2685 } 2686 2687 /* Liveness analysis : update the opc_arg_life array to tell if a 2688 given input arguments is dead. Instructions updating dead 2689 temporaries are removed. */ 2690 static void liveness_pass_1(TCGContext *s) 2691 { 2692 int nb_globals = s->nb_globals; 2693 int nb_temps = s->nb_temps; 2694 TCGOp *op, *op_prev; 2695 TCGRegSet *prefs; 2696 int i; 2697 2698 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2699 for (i = 0; i < nb_temps; ++i) { 2700 s->temps[i].state_ptr = prefs + i; 2701 } 2702 2703 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2704 la_func_end(s, nb_globals, nb_temps); 2705 2706 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2707 int nb_iargs, nb_oargs; 2708 TCGOpcode opc_new, opc_new2; 2709 bool have_opc_new2; 2710 TCGLifeData arg_life = 0; 2711 TCGTemp *ts; 2712 TCGOpcode opc = op->opc; 2713 const TCGOpDef *def = &tcg_op_defs[opc]; 2714 2715 switch (opc) { 2716 case INDEX_op_call: 2717 { 2718 const TCGHelperInfo *info = tcg_call_info(op); 2719 int call_flags = tcg_call_flags(op); 2720 2721 nb_oargs = TCGOP_CALLO(op); 2722 nb_iargs = TCGOP_CALLI(op); 2723 2724 /* pure functions can be removed if their result is unused */ 2725 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2726 for (i = 0; i < nb_oargs; i++) { 2727 ts = arg_temp(op->args[i]); 2728 if (ts->state != TS_DEAD) { 2729 goto do_not_remove_call; 2730 } 2731 } 2732 goto do_remove; 2733 } 2734 do_not_remove_call: 2735 2736 /* Output args are dead. */ 2737 for (i = 0; i < nb_oargs; i++) { 2738 ts = arg_temp(op->args[i]); 2739 if (ts->state & TS_DEAD) { 2740 arg_life |= DEAD_ARG << i; 2741 } 2742 if (ts->state & TS_MEM) { 2743 arg_life |= SYNC_ARG << i; 2744 } 2745 ts->state = TS_DEAD; 2746 la_reset_pref(ts); 2747 } 2748 2749 /* Not used -- it will be tcg_target_call_oarg_reg(). */ 2750 memset(op->output_pref, 0, sizeof(op->output_pref)); 2751 2752 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2753 TCG_CALL_NO_READ_GLOBALS))) { 2754 la_global_kill(s, nb_globals); 2755 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2756 la_global_sync(s, nb_globals); 2757 } 2758 2759 /* Record arguments that die in this helper. */ 2760 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2761 ts = arg_temp(op->args[i]); 2762 if (ts->state & TS_DEAD) { 2763 arg_life |= DEAD_ARG << i; 2764 } 2765 } 2766 2767 /* For all live registers, remove call-clobbered prefs. */ 2768 la_cross_call(s, nb_temps); 2769 2770 /* 2771 * Input arguments are live for preceding opcodes. 2772 * 2773 * For those arguments that die, and will be allocated in 2774 * registers, clear the register set for that arg, to be 2775 * filled in below. For args that will be on the stack, 2776 * reset to any available reg. Process arguments in reverse 2777 * order so that if a temp is used more than once, the stack 2778 * reset to max happens before the register reset to 0. 2779 */ 2780 for (i = nb_iargs - 1; i >= 0; i--) { 2781 const TCGCallArgumentLoc *loc = &info->in[i]; 2782 ts = arg_temp(op->args[nb_oargs + i]); 2783 2784 if (ts->state & TS_DEAD) { 2785 switch (loc->kind) { 2786 case TCG_CALL_ARG_NORMAL: 2787 case TCG_CALL_ARG_EXTEND_U: 2788 case TCG_CALL_ARG_EXTEND_S: 2789 if (REG_P(loc)) { 2790 *la_temp_pref(ts) = 0; 2791 break; 2792 } 2793 /* fall through */ 2794 default: 2795 *la_temp_pref(ts) = 2796 tcg_target_available_regs[ts->type]; 2797 break; 2798 } 2799 ts->state &= ~TS_DEAD; 2800 } 2801 } 2802 2803 /* 2804 * For each input argument, add its input register to prefs. 2805 * If a temp is used once, this produces a single set bit; 2806 * if a temp is used multiple times, this produces a set. 2807 */ 2808 for (i = 0; i < nb_iargs; i++) { 2809 const TCGCallArgumentLoc *loc = &info->in[i]; 2810 ts = arg_temp(op->args[nb_oargs + i]); 2811 2812 switch (loc->kind) { 2813 case TCG_CALL_ARG_NORMAL: 2814 case TCG_CALL_ARG_EXTEND_U: 2815 case TCG_CALL_ARG_EXTEND_S: 2816 if (REG_P(loc)) { 2817 tcg_regset_set_reg(*la_temp_pref(ts), 2818 tcg_target_call_iarg_regs[loc->arg_slot]); 2819 } 2820 break; 2821 default: 2822 break; 2823 } 2824 } 2825 } 2826 break; 2827 case INDEX_op_insn_start: 2828 break; 2829 case INDEX_op_discard: 2830 /* mark the temporary as dead */ 2831 ts = arg_temp(op->args[0]); 2832 ts->state = TS_DEAD; 2833 la_reset_pref(ts); 2834 break; 2835 2836 case INDEX_op_add2_i32: 2837 opc_new = INDEX_op_add_i32; 2838 goto do_addsub2; 2839 case INDEX_op_sub2_i32: 2840 opc_new = INDEX_op_sub_i32; 2841 goto do_addsub2; 2842 case INDEX_op_add2_i64: 2843 opc_new = INDEX_op_add_i64; 2844 goto do_addsub2; 2845 case INDEX_op_sub2_i64: 2846 opc_new = INDEX_op_sub_i64; 2847 do_addsub2: 2848 nb_iargs = 4; 2849 nb_oargs = 2; 2850 /* Test if the high part of the operation is dead, but not 2851 the low part. The result can be optimized to a simple 2852 add or sub. This happens often for x86_64 guest when the 2853 cpu mode is set to 32 bit. */ 2854 if (arg_temp(op->args[1])->state == TS_DEAD) { 2855 if (arg_temp(op->args[0])->state == TS_DEAD) { 2856 goto do_remove; 2857 } 2858 /* Replace the opcode and adjust the args in place, 2859 leaving 3 unused args at the end. */ 2860 op->opc = opc = opc_new; 2861 op->args[1] = op->args[2]; 2862 op->args[2] = op->args[4]; 2863 /* Fall through and mark the single-word operation live. */ 2864 nb_iargs = 2; 2865 nb_oargs = 1; 2866 } 2867 goto do_not_remove; 2868 2869 case INDEX_op_mulu2_i32: 2870 opc_new = INDEX_op_mul_i32; 2871 opc_new2 = INDEX_op_muluh_i32; 2872 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2873 goto do_mul2; 2874 case INDEX_op_muls2_i32: 2875 opc_new = INDEX_op_mul_i32; 2876 opc_new2 = INDEX_op_mulsh_i32; 2877 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2878 goto do_mul2; 2879 case INDEX_op_mulu2_i64: 2880 opc_new = INDEX_op_mul_i64; 2881 opc_new2 = INDEX_op_muluh_i64; 2882 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2883 goto do_mul2; 2884 case INDEX_op_muls2_i64: 2885 opc_new = INDEX_op_mul_i64; 2886 opc_new2 = INDEX_op_mulsh_i64; 2887 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2888 goto do_mul2; 2889 do_mul2: 2890 nb_iargs = 2; 2891 nb_oargs = 2; 2892 if (arg_temp(op->args[1])->state == TS_DEAD) { 2893 if (arg_temp(op->args[0])->state == TS_DEAD) { 2894 /* Both parts of the operation are dead. */ 2895 goto do_remove; 2896 } 2897 /* The high part of the operation is dead; generate the low. */ 2898 op->opc = opc = opc_new; 2899 op->args[1] = op->args[2]; 2900 op->args[2] = op->args[3]; 2901 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2902 /* The low part of the operation is dead; generate the high. */ 2903 op->opc = opc = opc_new2; 2904 op->args[0] = op->args[1]; 2905 op->args[1] = op->args[2]; 2906 op->args[2] = op->args[3]; 2907 } else { 2908 goto do_not_remove; 2909 } 2910 /* Mark the single-word operation live. */ 2911 nb_oargs = 1; 2912 goto do_not_remove; 2913 2914 default: 2915 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2916 nb_iargs = def->nb_iargs; 2917 nb_oargs = def->nb_oargs; 2918 2919 /* Test if the operation can be removed because all 2920 its outputs are dead. We assume that nb_oargs == 0 2921 implies side effects */ 2922 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2923 for (i = 0; i < nb_oargs; i++) { 2924 if (arg_temp(op->args[i])->state != TS_DEAD) { 2925 goto do_not_remove; 2926 } 2927 } 2928 goto do_remove; 2929 } 2930 goto do_not_remove; 2931 2932 do_remove: 2933 tcg_op_remove(s, op); 2934 break; 2935 2936 do_not_remove: 2937 for (i = 0; i < nb_oargs; i++) { 2938 ts = arg_temp(op->args[i]); 2939 2940 /* Remember the preference of the uses that followed. */ 2941 if (i < ARRAY_SIZE(op->output_pref)) { 2942 op->output_pref[i] = *la_temp_pref(ts); 2943 } 2944 2945 /* Output args are dead. */ 2946 if (ts->state & TS_DEAD) { 2947 arg_life |= DEAD_ARG << i; 2948 } 2949 if (ts->state & TS_MEM) { 2950 arg_life |= SYNC_ARG << i; 2951 } 2952 ts->state = TS_DEAD; 2953 la_reset_pref(ts); 2954 } 2955 2956 /* If end of basic block, update. */ 2957 if (def->flags & TCG_OPF_BB_EXIT) { 2958 la_func_end(s, nb_globals, nb_temps); 2959 } else if (def->flags & TCG_OPF_COND_BRANCH) { 2960 la_bb_sync(s, nb_globals, nb_temps); 2961 } else if (def->flags & TCG_OPF_BB_END) { 2962 la_bb_end(s, nb_globals, nb_temps); 2963 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2964 la_global_sync(s, nb_globals); 2965 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2966 la_cross_call(s, nb_temps); 2967 } 2968 } 2969 2970 /* Record arguments that die in this opcode. */ 2971 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2972 ts = arg_temp(op->args[i]); 2973 if (ts->state & TS_DEAD) { 2974 arg_life |= DEAD_ARG << i; 2975 } 2976 } 2977 2978 /* Input arguments are live for preceding opcodes. */ 2979 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2980 ts = arg_temp(op->args[i]); 2981 if (ts->state & TS_DEAD) { 2982 /* For operands that were dead, initially allow 2983 all regs for the type. */ 2984 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 2985 ts->state &= ~TS_DEAD; 2986 } 2987 } 2988 2989 /* Incorporate constraints for this operand. */ 2990 switch (opc) { 2991 case INDEX_op_mov_i32: 2992 case INDEX_op_mov_i64: 2993 /* Note that these are TCG_OPF_NOT_PRESENT and do not 2994 have proper constraints. That said, special case 2995 moves to propagate preferences backward. */ 2996 if (IS_DEAD_ARG(1)) { 2997 *la_temp_pref(arg_temp(op->args[0])) 2998 = *la_temp_pref(arg_temp(op->args[1])); 2999 } 3000 break; 3001 3002 default: 3003 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3004 const TCGArgConstraint *ct = &def->args_ct[i]; 3005 TCGRegSet set, *pset; 3006 3007 ts = arg_temp(op->args[i]); 3008 pset = la_temp_pref(ts); 3009 set = *pset; 3010 3011 set &= ct->regs; 3012 if (ct->ialias) { 3013 set &= output_pref(op, ct->alias_index); 3014 } 3015 /* If the combination is not possible, restart. */ 3016 if (set == 0) { 3017 set = ct->regs; 3018 } 3019 *pset = set; 3020 } 3021 break; 3022 } 3023 break; 3024 } 3025 op->life = arg_life; 3026 } 3027 } 3028 3029 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 3030 static bool liveness_pass_2(TCGContext *s) 3031 { 3032 int nb_globals = s->nb_globals; 3033 int nb_temps, i; 3034 bool changes = false; 3035 TCGOp *op, *op_next; 3036 3037 /* Create a temporary for each indirect global. */ 3038 for (i = 0; i < nb_globals; ++i) { 3039 TCGTemp *its = &s->temps[i]; 3040 if (its->indirect_reg) { 3041 TCGTemp *dts = tcg_temp_alloc(s); 3042 dts->type = its->type; 3043 dts->base_type = its->base_type; 3044 dts->kind = TEMP_EBB; 3045 its->state_ptr = dts; 3046 } else { 3047 its->state_ptr = NULL; 3048 } 3049 /* All globals begin dead. */ 3050 its->state = TS_DEAD; 3051 } 3052 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 3053 TCGTemp *its = &s->temps[i]; 3054 its->state_ptr = NULL; 3055 its->state = TS_DEAD; 3056 } 3057 3058 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 3059 TCGOpcode opc = op->opc; 3060 const TCGOpDef *def = &tcg_op_defs[opc]; 3061 TCGLifeData arg_life = op->life; 3062 int nb_iargs, nb_oargs, call_flags; 3063 TCGTemp *arg_ts, *dir_ts; 3064 3065 if (opc == INDEX_op_call) { 3066 nb_oargs = TCGOP_CALLO(op); 3067 nb_iargs = TCGOP_CALLI(op); 3068 call_flags = tcg_call_flags(op); 3069 } else { 3070 nb_iargs = def->nb_iargs; 3071 nb_oargs = def->nb_oargs; 3072 3073 /* Set flags similar to how calls require. */ 3074 if (def->flags & TCG_OPF_COND_BRANCH) { 3075 /* Like reading globals: sync_globals */ 3076 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3077 } else if (def->flags & TCG_OPF_BB_END) { 3078 /* Like writing globals: save_globals */ 3079 call_flags = 0; 3080 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3081 /* Like reading globals: sync_globals */ 3082 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 3083 } else { 3084 /* No effect on globals. */ 3085 call_flags = (TCG_CALL_NO_READ_GLOBALS | 3086 TCG_CALL_NO_WRITE_GLOBALS); 3087 } 3088 } 3089 3090 /* Make sure that input arguments are available. */ 3091 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3092 arg_ts = arg_temp(op->args[i]); 3093 dir_ts = arg_ts->state_ptr; 3094 if (dir_ts && arg_ts->state == TS_DEAD) { 3095 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 3096 ? INDEX_op_ld_i32 3097 : INDEX_op_ld_i64); 3098 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); 3099 3100 lop->args[0] = temp_arg(dir_ts); 3101 lop->args[1] = temp_arg(arg_ts->mem_base); 3102 lop->args[2] = arg_ts->mem_offset; 3103 3104 /* Loaded, but synced with memory. */ 3105 arg_ts->state = TS_MEM; 3106 } 3107 } 3108 3109 /* Perform input replacement, and mark inputs that became dead. 3110 No action is required except keeping temp_state up to date 3111 so that we reload when needed. */ 3112 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3113 arg_ts = arg_temp(op->args[i]); 3114 dir_ts = arg_ts->state_ptr; 3115 if (dir_ts) { 3116 op->args[i] = temp_arg(dir_ts); 3117 changes = true; 3118 if (IS_DEAD_ARG(i)) { 3119 arg_ts->state = TS_DEAD; 3120 } 3121 } 3122 } 3123 3124 /* Liveness analysis should ensure that the following are 3125 all correct, for call sites and basic block end points. */ 3126 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 3127 /* Nothing to do */ 3128 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 3129 for (i = 0; i < nb_globals; ++i) { 3130 /* Liveness should see that globals are synced back, 3131 that is, either TS_DEAD or TS_MEM. */ 3132 arg_ts = &s->temps[i]; 3133 tcg_debug_assert(arg_ts->state_ptr == 0 3134 || arg_ts->state != 0); 3135 } 3136 } else { 3137 for (i = 0; i < nb_globals; ++i) { 3138 /* Liveness should see that globals are saved back, 3139 that is, TS_DEAD, waiting to be reloaded. */ 3140 arg_ts = &s->temps[i]; 3141 tcg_debug_assert(arg_ts->state_ptr == 0 3142 || arg_ts->state == TS_DEAD); 3143 } 3144 } 3145 3146 /* Outputs become available. */ 3147 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 3148 arg_ts = arg_temp(op->args[0]); 3149 dir_ts = arg_ts->state_ptr; 3150 if (dir_ts) { 3151 op->args[0] = temp_arg(dir_ts); 3152 changes = true; 3153 3154 /* The output is now live and modified. */ 3155 arg_ts->state = 0; 3156 3157 if (NEED_SYNC_ARG(0)) { 3158 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3159 ? INDEX_op_st_i32 3160 : INDEX_op_st_i64); 3161 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3162 TCGTemp *out_ts = dir_ts; 3163 3164 if (IS_DEAD_ARG(0)) { 3165 out_ts = arg_temp(op->args[1]); 3166 arg_ts->state = TS_DEAD; 3167 tcg_op_remove(s, op); 3168 } else { 3169 arg_ts->state = TS_MEM; 3170 } 3171 3172 sop->args[0] = temp_arg(out_ts); 3173 sop->args[1] = temp_arg(arg_ts->mem_base); 3174 sop->args[2] = arg_ts->mem_offset; 3175 } else { 3176 tcg_debug_assert(!IS_DEAD_ARG(0)); 3177 } 3178 } 3179 } else { 3180 for (i = 0; i < nb_oargs; i++) { 3181 arg_ts = arg_temp(op->args[i]); 3182 dir_ts = arg_ts->state_ptr; 3183 if (!dir_ts) { 3184 continue; 3185 } 3186 op->args[i] = temp_arg(dir_ts); 3187 changes = true; 3188 3189 /* The output is now live and modified. */ 3190 arg_ts->state = 0; 3191 3192 /* Sync outputs upon their last write. */ 3193 if (NEED_SYNC_ARG(i)) { 3194 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 3195 ? INDEX_op_st_i32 3196 : INDEX_op_st_i64); 3197 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); 3198 3199 sop->args[0] = temp_arg(dir_ts); 3200 sop->args[1] = temp_arg(arg_ts->mem_base); 3201 sop->args[2] = arg_ts->mem_offset; 3202 3203 arg_ts->state = TS_MEM; 3204 } 3205 /* Drop outputs that are dead. */ 3206 if (IS_DEAD_ARG(i)) { 3207 arg_ts->state = TS_DEAD; 3208 } 3209 } 3210 } 3211 } 3212 3213 return changes; 3214 } 3215 3216 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 3217 { 3218 int size = tcg_type_size(ts->type); 3219 int align; 3220 intptr_t off; 3221 3222 switch (ts->type) { 3223 case TCG_TYPE_I32: 3224 align = 4; 3225 break; 3226 case TCG_TYPE_I64: 3227 case TCG_TYPE_V64: 3228 align = 8; 3229 break; 3230 case TCG_TYPE_V128: 3231 case TCG_TYPE_V256: 3232 /* Note that we do not require aligned storage for V256. */ 3233 align = 16; 3234 break; 3235 default: 3236 g_assert_not_reached(); 3237 } 3238 3239 /* 3240 * Assume the stack is sufficiently aligned. 3241 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3242 * and do not require 16 byte vector alignment. This seems slightly 3243 * easier than fully parameterizing the above switch statement. 3244 */ 3245 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3246 off = ROUND_UP(s->current_frame_offset, align); 3247 3248 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3249 if (off + size > s->frame_end) { 3250 tcg_raise_tb_overflow(s); 3251 } 3252 s->current_frame_offset = off + size; 3253 3254 ts->mem_offset = off; 3255 #if defined(__sparc__) 3256 ts->mem_offset += TCG_TARGET_STACK_BIAS; 3257 #endif 3258 ts->mem_base = s->frame_temp; 3259 ts->mem_allocated = 1; 3260 } 3261 3262 /* Assign @reg to @ts, and update reg_to_temp[]. */ 3263 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) 3264 { 3265 if (ts->val_type == TEMP_VAL_REG) { 3266 TCGReg old = ts->reg; 3267 tcg_debug_assert(s->reg_to_temp[old] == ts); 3268 if (old == reg) { 3269 return; 3270 } 3271 s->reg_to_temp[old] = NULL; 3272 } 3273 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3274 s->reg_to_temp[reg] = ts; 3275 ts->val_type = TEMP_VAL_REG; 3276 ts->reg = reg; 3277 } 3278 3279 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */ 3280 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) 3281 { 3282 tcg_debug_assert(type != TEMP_VAL_REG); 3283 if (ts->val_type == TEMP_VAL_REG) { 3284 TCGReg reg = ts->reg; 3285 tcg_debug_assert(s->reg_to_temp[reg] == ts); 3286 s->reg_to_temp[reg] = NULL; 3287 } 3288 ts->val_type = type; 3289 } 3290 3291 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3292 3293 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3294 mark it free; otherwise mark it dead. */ 3295 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3296 { 3297 TCGTempVal new_type; 3298 3299 switch (ts->kind) { 3300 case TEMP_FIXED: 3301 return; 3302 case TEMP_GLOBAL: 3303 case TEMP_LOCAL: 3304 new_type = TEMP_VAL_MEM; 3305 break; 3306 case TEMP_NORMAL: 3307 case TEMP_EBB: 3308 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3309 break; 3310 case TEMP_CONST: 3311 new_type = TEMP_VAL_CONST; 3312 break; 3313 default: 3314 g_assert_not_reached(); 3315 } 3316 set_temp_val_nonreg(s, ts, new_type); 3317 } 3318 3319 /* Mark a temporary as dead. */ 3320 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3321 { 3322 temp_free_or_dead(s, ts, 1); 3323 } 3324 3325 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3326 registers needs to be allocated to store a constant. If 'free_or_dead' 3327 is non-zero, subsequently release the temporary; if it is positive, the 3328 temp is dead; if it is negative, the temp is free. */ 3329 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3330 TCGRegSet preferred_regs, int free_or_dead) 3331 { 3332 if (!temp_readonly(ts) && !ts->mem_coherent) { 3333 if (!ts->mem_allocated) { 3334 temp_allocate_frame(s, ts); 3335 } 3336 switch (ts->val_type) { 3337 case TEMP_VAL_CONST: 3338 /* If we're going to free the temp immediately, then we won't 3339 require it later in a register, so attempt to store the 3340 constant to memory directly. */ 3341 if (free_or_dead 3342 && tcg_out_sti(s, ts->type, ts->val, 3343 ts->mem_base->reg, ts->mem_offset)) { 3344 break; 3345 } 3346 temp_load(s, ts, tcg_target_available_regs[ts->type], 3347 allocated_regs, preferred_regs); 3348 /* fallthrough */ 3349 3350 case TEMP_VAL_REG: 3351 tcg_out_st(s, ts->type, ts->reg, 3352 ts->mem_base->reg, ts->mem_offset); 3353 break; 3354 3355 case TEMP_VAL_MEM: 3356 break; 3357 3358 case TEMP_VAL_DEAD: 3359 default: 3360 tcg_abort(); 3361 } 3362 ts->mem_coherent = 1; 3363 } 3364 if (free_or_dead) { 3365 temp_free_or_dead(s, ts, free_or_dead); 3366 } 3367 } 3368 3369 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3370 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3371 { 3372 TCGTemp *ts = s->reg_to_temp[reg]; 3373 if (ts != NULL) { 3374 temp_sync(s, ts, allocated_regs, 0, -1); 3375 } 3376 } 3377 3378 /** 3379 * tcg_reg_alloc: 3380 * @required_regs: Set of registers in which we must allocate. 3381 * @allocated_regs: Set of registers which must be avoided. 3382 * @preferred_regs: Set of registers we should prefer. 3383 * @rev: True if we search the registers in "indirect" order. 3384 * 3385 * The allocated register must be in @required_regs & ~@allocated_regs, 3386 * but if we can put it in @preferred_regs we may save a move later. 3387 */ 3388 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3389 TCGRegSet allocated_regs, 3390 TCGRegSet preferred_regs, bool rev) 3391 { 3392 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3393 TCGRegSet reg_ct[2]; 3394 const int *order; 3395 3396 reg_ct[1] = required_regs & ~allocated_regs; 3397 tcg_debug_assert(reg_ct[1] != 0); 3398 reg_ct[0] = reg_ct[1] & preferred_regs; 3399 3400 /* Skip the preferred_regs option if it cannot be satisfied, 3401 or if the preference made no difference. */ 3402 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3403 3404 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3405 3406 /* Try free registers, preferences first. */ 3407 for (j = f; j < 2; j++) { 3408 TCGRegSet set = reg_ct[j]; 3409 3410 if (tcg_regset_single(set)) { 3411 /* One register in the set. */ 3412 TCGReg reg = tcg_regset_first(set); 3413 if (s->reg_to_temp[reg] == NULL) { 3414 return reg; 3415 } 3416 } else { 3417 for (i = 0; i < n; i++) { 3418 TCGReg reg = order[i]; 3419 if (s->reg_to_temp[reg] == NULL && 3420 tcg_regset_test_reg(set, reg)) { 3421 return reg; 3422 } 3423 } 3424 } 3425 } 3426 3427 /* We must spill something. */ 3428 for (j = f; j < 2; j++) { 3429 TCGRegSet set = reg_ct[j]; 3430 3431 if (tcg_regset_single(set)) { 3432 /* One register in the set. */ 3433 TCGReg reg = tcg_regset_first(set); 3434 tcg_reg_free(s, reg, allocated_regs); 3435 return reg; 3436 } else { 3437 for (i = 0; i < n; i++) { 3438 TCGReg reg = order[i]; 3439 if (tcg_regset_test_reg(set, reg)) { 3440 tcg_reg_free(s, reg, allocated_regs); 3441 return reg; 3442 } 3443 } 3444 } 3445 } 3446 3447 tcg_abort(); 3448 } 3449 3450 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, 3451 TCGRegSet allocated_regs, 3452 TCGRegSet preferred_regs, bool rev) 3453 { 3454 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3455 TCGRegSet reg_ct[2]; 3456 const int *order; 3457 3458 /* Ensure that if I is not in allocated_regs, I+1 is not either. */ 3459 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); 3460 tcg_debug_assert(reg_ct[1] != 0); 3461 reg_ct[0] = reg_ct[1] & preferred_regs; 3462 3463 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3464 3465 /* 3466 * Skip the preferred_regs option if it cannot be satisfied, 3467 * or if the preference made no difference. 3468 */ 3469 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3470 3471 /* 3472 * Minimize the number of flushes by looking for 2 free registers first, 3473 * then a single flush, then two flushes. 3474 */ 3475 for (fmin = 2; fmin >= 0; fmin--) { 3476 for (j = k; j < 2; j++) { 3477 TCGRegSet set = reg_ct[j]; 3478 3479 for (i = 0; i < n; i++) { 3480 TCGReg reg = order[i]; 3481 3482 if (tcg_regset_test_reg(set, reg)) { 3483 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; 3484 if (f >= fmin) { 3485 tcg_reg_free(s, reg, allocated_regs); 3486 tcg_reg_free(s, reg + 1, allocated_regs); 3487 return reg; 3488 } 3489 } 3490 } 3491 } 3492 } 3493 tcg_abort(); 3494 } 3495 3496 /* Make sure the temporary is in a register. If needed, allocate the register 3497 from DESIRED while avoiding ALLOCATED. */ 3498 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3499 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3500 { 3501 TCGReg reg; 3502 3503 switch (ts->val_type) { 3504 case TEMP_VAL_REG: 3505 return; 3506 case TEMP_VAL_CONST: 3507 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3508 preferred_regs, ts->indirect_base); 3509 if (ts->type <= TCG_TYPE_I64) { 3510 tcg_out_movi(s, ts->type, reg, ts->val); 3511 } else { 3512 uint64_t val = ts->val; 3513 MemOp vece = MO_64; 3514 3515 /* 3516 * Find the minimal vector element that matches the constant. 3517 * The targets will, in general, have to do this search anyway, 3518 * do this generically. 3519 */ 3520 if (val == dup_const(MO_8, val)) { 3521 vece = MO_8; 3522 } else if (val == dup_const(MO_16, val)) { 3523 vece = MO_16; 3524 } else if (val == dup_const(MO_32, val)) { 3525 vece = MO_32; 3526 } 3527 3528 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3529 } 3530 ts->mem_coherent = 0; 3531 break; 3532 case TEMP_VAL_MEM: 3533 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3534 preferred_regs, ts->indirect_base); 3535 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3536 ts->mem_coherent = 1; 3537 break; 3538 case TEMP_VAL_DEAD: 3539 default: 3540 tcg_abort(); 3541 } 3542 set_temp_val_reg(s, ts, reg); 3543 } 3544 3545 /* Save a temporary to memory. 'allocated_regs' is used in case a 3546 temporary registers needs to be allocated to store a constant. */ 3547 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3548 { 3549 /* The liveness analysis already ensures that globals are back 3550 in memory. Keep an tcg_debug_assert for safety. */ 3551 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3552 } 3553 3554 /* save globals to their canonical location and assume they can be 3555 modified be the following code. 'allocated_regs' is used in case a 3556 temporary registers needs to be allocated to store a constant. */ 3557 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3558 { 3559 int i, n; 3560 3561 for (i = 0, n = s->nb_globals; i < n; i++) { 3562 temp_save(s, &s->temps[i], allocated_regs); 3563 } 3564 } 3565 3566 /* sync globals to their canonical location and assume they can be 3567 read by the following code. 'allocated_regs' is used in case a 3568 temporary registers needs to be allocated to store a constant. */ 3569 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3570 { 3571 int i, n; 3572 3573 for (i = 0, n = s->nb_globals; i < n; i++) { 3574 TCGTemp *ts = &s->temps[i]; 3575 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3576 || ts->kind == TEMP_FIXED 3577 || ts->mem_coherent); 3578 } 3579 } 3580 3581 /* at the end of a basic block, we assume all temporaries are dead and 3582 all globals are stored at their canonical location. */ 3583 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3584 { 3585 int i; 3586 3587 for (i = s->nb_globals; i < s->nb_temps; i++) { 3588 TCGTemp *ts = &s->temps[i]; 3589 3590 switch (ts->kind) { 3591 case TEMP_LOCAL: 3592 temp_save(s, ts, allocated_regs); 3593 break; 3594 case TEMP_NORMAL: 3595 case TEMP_EBB: 3596 /* The liveness analysis already ensures that temps are dead. 3597 Keep an tcg_debug_assert for safety. */ 3598 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3599 break; 3600 case TEMP_CONST: 3601 /* Similarly, we should have freed any allocated register. */ 3602 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3603 break; 3604 default: 3605 g_assert_not_reached(); 3606 } 3607 } 3608 3609 save_globals(s, allocated_regs); 3610 } 3611 3612 /* 3613 * At a conditional branch, we assume all temporaries are dead unless 3614 * explicitly live-across-conditional-branch; all globals and local 3615 * temps are synced to their location. 3616 */ 3617 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3618 { 3619 sync_globals(s, allocated_regs); 3620 3621 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3622 TCGTemp *ts = &s->temps[i]; 3623 /* 3624 * The liveness analysis already ensures that temps are dead. 3625 * Keep tcg_debug_asserts for safety. 3626 */ 3627 switch (ts->kind) { 3628 case TEMP_LOCAL: 3629 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3630 break; 3631 case TEMP_NORMAL: 3632 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3633 break; 3634 case TEMP_EBB: 3635 case TEMP_CONST: 3636 break; 3637 default: 3638 g_assert_not_reached(); 3639 } 3640 } 3641 } 3642 3643 /* 3644 * Specialized code generation for INDEX_op_mov_* with a constant. 3645 */ 3646 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3647 tcg_target_ulong val, TCGLifeData arg_life, 3648 TCGRegSet preferred_regs) 3649 { 3650 /* ENV should not be modified. */ 3651 tcg_debug_assert(!temp_readonly(ots)); 3652 3653 /* The movi is not explicitly generated here. */ 3654 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); 3655 ots->val = val; 3656 ots->mem_coherent = 0; 3657 if (NEED_SYNC_ARG(0)) { 3658 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3659 } else if (IS_DEAD_ARG(0)) { 3660 temp_dead(s, ots); 3661 } 3662 } 3663 3664 /* 3665 * Specialized code generation for INDEX_op_mov_*. 3666 */ 3667 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3668 { 3669 const TCGLifeData arg_life = op->life; 3670 TCGRegSet allocated_regs, preferred_regs; 3671 TCGTemp *ts, *ots; 3672 TCGType otype, itype; 3673 TCGReg oreg, ireg; 3674 3675 allocated_regs = s->reserved_regs; 3676 preferred_regs = output_pref(op, 0); 3677 ots = arg_temp(op->args[0]); 3678 ts = arg_temp(op->args[1]); 3679 3680 /* ENV should not be modified. */ 3681 tcg_debug_assert(!temp_readonly(ots)); 3682 3683 /* Note that otype != itype for no-op truncation. */ 3684 otype = ots->type; 3685 itype = ts->type; 3686 3687 if (ts->val_type == TEMP_VAL_CONST) { 3688 /* propagate constant or generate sti */ 3689 tcg_target_ulong val = ts->val; 3690 if (IS_DEAD_ARG(1)) { 3691 temp_dead(s, ts); 3692 } 3693 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3694 return; 3695 } 3696 3697 /* If the source value is in memory we're going to be forced 3698 to have it in a register in order to perform the copy. Copy 3699 the SOURCE value into its own register first, that way we 3700 don't have to reload SOURCE the next time it is used. */ 3701 if (ts->val_type == TEMP_VAL_MEM) { 3702 temp_load(s, ts, tcg_target_available_regs[itype], 3703 allocated_regs, preferred_regs); 3704 } 3705 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3706 ireg = ts->reg; 3707 3708 if (IS_DEAD_ARG(0)) { 3709 /* mov to a non-saved dead register makes no sense (even with 3710 liveness analysis disabled). */ 3711 tcg_debug_assert(NEED_SYNC_ARG(0)); 3712 if (!ots->mem_allocated) { 3713 temp_allocate_frame(s, ots); 3714 } 3715 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); 3716 if (IS_DEAD_ARG(1)) { 3717 temp_dead(s, ts); 3718 } 3719 temp_dead(s, ots); 3720 return; 3721 } 3722 3723 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 3724 /* 3725 * The mov can be suppressed. Kill input first, so that it 3726 * is unlinked from reg_to_temp, then set the output to the 3727 * reg that we saved from the input. 3728 */ 3729 temp_dead(s, ts); 3730 oreg = ireg; 3731 } else { 3732 if (ots->val_type == TEMP_VAL_REG) { 3733 oreg = ots->reg; 3734 } else { 3735 /* Make sure to not spill the input register during allocation. */ 3736 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3737 allocated_regs | ((TCGRegSet)1 << ireg), 3738 preferred_regs, ots->indirect_base); 3739 } 3740 if (!tcg_out_mov(s, otype, oreg, ireg)) { 3741 /* 3742 * Cross register class move not supported. 3743 * Store the source register into the destination slot 3744 * and leave the destination temp as TEMP_VAL_MEM. 3745 */ 3746 assert(!temp_readonly(ots)); 3747 if (!ts->mem_allocated) { 3748 temp_allocate_frame(s, ots); 3749 } 3750 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); 3751 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); 3752 ots->mem_coherent = 1; 3753 return; 3754 } 3755 } 3756 set_temp_val_reg(s, ots, oreg); 3757 ots->mem_coherent = 0; 3758 3759 if (NEED_SYNC_ARG(0)) { 3760 temp_sync(s, ots, allocated_regs, 0, 0); 3761 } 3762 } 3763 3764 /* 3765 * Specialized code generation for INDEX_op_dup_vec. 3766 */ 3767 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 3768 { 3769 const TCGLifeData arg_life = op->life; 3770 TCGRegSet dup_out_regs, dup_in_regs; 3771 TCGTemp *its, *ots; 3772 TCGType itype, vtype; 3773 unsigned vece; 3774 int lowpart_ofs; 3775 bool ok; 3776 3777 ots = arg_temp(op->args[0]); 3778 its = arg_temp(op->args[1]); 3779 3780 /* ENV should not be modified. */ 3781 tcg_debug_assert(!temp_readonly(ots)); 3782 3783 itype = its->type; 3784 vece = TCGOP_VECE(op); 3785 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3786 3787 if (its->val_type == TEMP_VAL_CONST) { 3788 /* Propagate constant via movi -> dupi. */ 3789 tcg_target_ulong val = its->val; 3790 if (IS_DEAD_ARG(1)) { 3791 temp_dead(s, its); 3792 } 3793 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); 3794 return; 3795 } 3796 3797 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3798 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 3799 3800 /* Allocate the output register now. */ 3801 if (ots->val_type != TEMP_VAL_REG) { 3802 TCGRegSet allocated_regs = s->reserved_regs; 3803 TCGReg oreg; 3804 3805 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 3806 /* Make sure to not spill the input register. */ 3807 tcg_regset_set_reg(allocated_regs, its->reg); 3808 } 3809 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3810 output_pref(op, 0), ots->indirect_base); 3811 set_temp_val_reg(s, ots, oreg); 3812 } 3813 3814 switch (its->val_type) { 3815 case TEMP_VAL_REG: 3816 /* 3817 * The dup constriaints must be broad, covering all possible VECE. 3818 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 3819 * to fail, indicating that extra moves are required for that case. 3820 */ 3821 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 3822 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 3823 goto done; 3824 } 3825 /* Try again from memory or a vector input register. */ 3826 } 3827 if (!its->mem_coherent) { 3828 /* 3829 * The input register is not synced, and so an extra store 3830 * would be required to use memory. Attempt an integer-vector 3831 * register move first. We do not have a TCGRegSet for this. 3832 */ 3833 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 3834 break; 3835 } 3836 /* Sync the temp back to its slot and load from there. */ 3837 temp_sync(s, its, s->reserved_regs, 0, 0); 3838 } 3839 /* fall through */ 3840 3841 case TEMP_VAL_MEM: 3842 lowpart_ofs = 0; 3843 if (HOST_BIG_ENDIAN) { 3844 lowpart_ofs = tcg_type_size(itype) - (1 << vece); 3845 } 3846 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 3847 its->mem_offset + lowpart_ofs)) { 3848 goto done; 3849 } 3850 /* Load the input into the destination vector register. */ 3851 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 3852 break; 3853 3854 default: 3855 g_assert_not_reached(); 3856 } 3857 3858 /* We now have a vector input register, so dup must succeed. */ 3859 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 3860 tcg_debug_assert(ok); 3861 3862 done: 3863 ots->mem_coherent = 0; 3864 if (IS_DEAD_ARG(1)) { 3865 temp_dead(s, its); 3866 } 3867 if (NEED_SYNC_ARG(0)) { 3868 temp_sync(s, ots, s->reserved_regs, 0, 0); 3869 } 3870 if (IS_DEAD_ARG(0)) { 3871 temp_dead(s, ots); 3872 } 3873 } 3874 3875 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 3876 { 3877 const TCGLifeData arg_life = op->life; 3878 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 3879 TCGRegSet i_allocated_regs; 3880 TCGRegSet o_allocated_regs; 3881 int i, k, nb_iargs, nb_oargs; 3882 TCGReg reg; 3883 TCGArg arg; 3884 const TCGArgConstraint *arg_ct; 3885 TCGTemp *ts; 3886 TCGArg new_args[TCG_MAX_OP_ARGS]; 3887 int const_args[TCG_MAX_OP_ARGS]; 3888 3889 nb_oargs = def->nb_oargs; 3890 nb_iargs = def->nb_iargs; 3891 3892 /* copy constants */ 3893 memcpy(new_args + nb_oargs + nb_iargs, 3894 op->args + nb_oargs + nb_iargs, 3895 sizeof(TCGArg) * def->nb_cargs); 3896 3897 i_allocated_regs = s->reserved_regs; 3898 o_allocated_regs = s->reserved_regs; 3899 3900 /* satisfy input constraints */ 3901 for (k = 0; k < nb_iargs; k++) { 3902 TCGRegSet i_preferred_regs, i_required_regs; 3903 bool allocate_new_reg, copyto_new_reg; 3904 TCGTemp *ts2; 3905 int i1, i2; 3906 3907 i = def->args_ct[nb_oargs + k].sort_index; 3908 arg = op->args[i]; 3909 arg_ct = &def->args_ct[i]; 3910 ts = arg_temp(arg); 3911 3912 if (ts->val_type == TEMP_VAL_CONST 3913 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 3914 /* constant is OK for instruction */ 3915 const_args[i] = 1; 3916 new_args[i] = ts->val; 3917 continue; 3918 } 3919 3920 reg = ts->reg; 3921 i_preferred_regs = 0; 3922 i_required_regs = arg_ct->regs; 3923 allocate_new_reg = false; 3924 copyto_new_reg = false; 3925 3926 switch (arg_ct->pair) { 3927 case 0: /* not paired */ 3928 if (arg_ct->ialias) { 3929 i_preferred_regs = output_pref(op, arg_ct->alias_index); 3930 3931 /* 3932 * If the input is readonly, then it cannot also be an 3933 * output and aliased to itself. If the input is not 3934 * dead after the instruction, we must allocate a new 3935 * register and move it. 3936 */ 3937 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 3938 allocate_new_reg = true; 3939 } else if (ts->val_type == TEMP_VAL_REG) { 3940 /* 3941 * Check if the current register has already been 3942 * allocated for another input. 3943 */ 3944 allocate_new_reg = 3945 tcg_regset_test_reg(i_allocated_regs, reg); 3946 } 3947 } 3948 if (!allocate_new_reg) { 3949 temp_load(s, ts, i_required_regs, i_allocated_regs, 3950 i_preferred_regs); 3951 reg = ts->reg; 3952 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); 3953 } 3954 if (allocate_new_reg) { 3955 /* 3956 * Allocate a new register matching the constraint 3957 * and move the temporary register into it. 3958 */ 3959 temp_load(s, ts, tcg_target_available_regs[ts->type], 3960 i_allocated_regs, 0); 3961 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, 3962 i_preferred_regs, ts->indirect_base); 3963 copyto_new_reg = true; 3964 } 3965 break; 3966 3967 case 1: 3968 /* First of an input pair; if i1 == i2, the second is an output. */ 3969 i1 = i; 3970 i2 = arg_ct->pair_index; 3971 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; 3972 3973 /* 3974 * It is easier to default to allocating a new pair 3975 * and to identify a few cases where it's not required. 3976 */ 3977 if (arg_ct->ialias) { 3978 i_preferred_regs = output_pref(op, arg_ct->alias_index); 3979 if (IS_DEAD_ARG(i1) && 3980 IS_DEAD_ARG(i2) && 3981 !temp_readonly(ts) && 3982 ts->val_type == TEMP_VAL_REG && 3983 ts->reg < TCG_TARGET_NB_REGS - 1 && 3984 tcg_regset_test_reg(i_required_regs, reg) && 3985 !tcg_regset_test_reg(i_allocated_regs, reg) && 3986 !tcg_regset_test_reg(i_allocated_regs, reg + 1) && 3987 (ts2 3988 ? ts2->val_type == TEMP_VAL_REG && 3989 ts2->reg == reg + 1 && 3990 !temp_readonly(ts2) 3991 : s->reg_to_temp[reg + 1] == NULL)) { 3992 break; 3993 } 3994 } else { 3995 /* Without aliasing, the pair must also be an input. */ 3996 tcg_debug_assert(ts2); 3997 if (ts->val_type == TEMP_VAL_REG && 3998 ts2->val_type == TEMP_VAL_REG && 3999 ts2->reg == reg + 1 && 4000 tcg_regset_test_reg(i_required_regs, reg)) { 4001 break; 4002 } 4003 } 4004 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, 4005 0, ts->indirect_base); 4006 goto do_pair; 4007 4008 case 2: /* pair second */ 4009 reg = new_args[arg_ct->pair_index] + 1; 4010 goto do_pair; 4011 4012 case 3: /* ialias with second output, no first input */ 4013 tcg_debug_assert(arg_ct->ialias); 4014 i_preferred_regs = output_pref(op, arg_ct->alias_index); 4015 4016 if (IS_DEAD_ARG(i) && 4017 !temp_readonly(ts) && 4018 ts->val_type == TEMP_VAL_REG && 4019 reg > 0 && 4020 s->reg_to_temp[reg - 1] == NULL && 4021 tcg_regset_test_reg(i_required_regs, reg) && 4022 !tcg_regset_test_reg(i_allocated_regs, reg) && 4023 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { 4024 tcg_regset_set_reg(i_allocated_regs, reg - 1); 4025 break; 4026 } 4027 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, 4028 i_allocated_regs, 0, 4029 ts->indirect_base); 4030 tcg_regset_set_reg(i_allocated_regs, reg); 4031 reg += 1; 4032 goto do_pair; 4033 4034 do_pair: 4035 /* 4036 * If an aliased input is not dead after the instruction, 4037 * we must allocate a new register and move it. 4038 */ 4039 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { 4040 TCGRegSet t_allocated_regs = i_allocated_regs; 4041 4042 /* 4043 * Because of the alias, and the continued life, make sure 4044 * that the temp is somewhere *other* than the reg pair, 4045 * and we get a copy in reg. 4046 */ 4047 tcg_regset_set_reg(t_allocated_regs, reg); 4048 tcg_regset_set_reg(t_allocated_regs, reg + 1); 4049 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { 4050 /* If ts was already in reg, copy it somewhere else. */ 4051 TCGReg nr; 4052 bool ok; 4053 4054 tcg_debug_assert(ts->kind != TEMP_FIXED); 4055 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 4056 t_allocated_regs, 0, ts->indirect_base); 4057 ok = tcg_out_mov(s, ts->type, nr, reg); 4058 tcg_debug_assert(ok); 4059 4060 set_temp_val_reg(s, ts, nr); 4061 } else { 4062 temp_load(s, ts, tcg_target_available_regs[ts->type], 4063 t_allocated_regs, 0); 4064 copyto_new_reg = true; 4065 } 4066 } else { 4067 /* Preferably allocate to reg, otherwise copy. */ 4068 i_required_regs = (TCGRegSet)1 << reg; 4069 temp_load(s, ts, i_required_regs, i_allocated_regs, 4070 i_preferred_regs); 4071 copyto_new_reg = ts->reg != reg; 4072 } 4073 break; 4074 4075 default: 4076 g_assert_not_reached(); 4077 } 4078 4079 if (copyto_new_reg) { 4080 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4081 /* 4082 * Cross register class move not supported. Sync the 4083 * temp back to its slot and load from there. 4084 */ 4085 temp_sync(s, ts, i_allocated_regs, 0, 0); 4086 tcg_out_ld(s, ts->type, reg, 4087 ts->mem_base->reg, ts->mem_offset); 4088 } 4089 } 4090 new_args[i] = reg; 4091 const_args[i] = 0; 4092 tcg_regset_set_reg(i_allocated_regs, reg); 4093 } 4094 4095 /* mark dead temporaries and free the associated registers */ 4096 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 4097 if (IS_DEAD_ARG(i)) { 4098 temp_dead(s, arg_temp(op->args[i])); 4099 } 4100 } 4101 4102 if (def->flags & TCG_OPF_COND_BRANCH) { 4103 tcg_reg_alloc_cbranch(s, i_allocated_regs); 4104 } else if (def->flags & TCG_OPF_BB_END) { 4105 tcg_reg_alloc_bb_end(s, i_allocated_regs); 4106 } else { 4107 if (def->flags & TCG_OPF_CALL_CLOBBER) { 4108 /* XXX: permit generic clobber register list ? */ 4109 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4110 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4111 tcg_reg_free(s, i, i_allocated_regs); 4112 } 4113 } 4114 } 4115 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 4116 /* sync globals if the op has side effects and might trigger 4117 an exception. */ 4118 sync_globals(s, i_allocated_regs); 4119 } 4120 4121 /* satisfy the output constraints */ 4122 for(k = 0; k < nb_oargs; k++) { 4123 i = def->args_ct[k].sort_index; 4124 arg = op->args[i]; 4125 arg_ct = &def->args_ct[i]; 4126 ts = arg_temp(arg); 4127 4128 /* ENV should not be modified. */ 4129 tcg_debug_assert(!temp_readonly(ts)); 4130 4131 switch (arg_ct->pair) { 4132 case 0: /* not paired */ 4133 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 4134 reg = new_args[arg_ct->alias_index]; 4135 } else if (arg_ct->newreg) { 4136 reg = tcg_reg_alloc(s, arg_ct->regs, 4137 i_allocated_regs | o_allocated_regs, 4138 output_pref(op, k), ts->indirect_base); 4139 } else { 4140 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 4141 output_pref(op, k), ts->indirect_base); 4142 } 4143 break; 4144 4145 case 1: /* first of pair */ 4146 tcg_debug_assert(!arg_ct->newreg); 4147 if (arg_ct->oalias) { 4148 reg = new_args[arg_ct->alias_index]; 4149 break; 4150 } 4151 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, 4152 output_pref(op, k), ts->indirect_base); 4153 break; 4154 4155 case 2: /* second of pair */ 4156 tcg_debug_assert(!arg_ct->newreg); 4157 if (arg_ct->oalias) { 4158 reg = new_args[arg_ct->alias_index]; 4159 } else { 4160 reg = new_args[arg_ct->pair_index] + 1; 4161 } 4162 break; 4163 4164 case 3: /* first of pair, aliasing with a second input */ 4165 tcg_debug_assert(!arg_ct->newreg); 4166 reg = new_args[arg_ct->pair_index] - 1; 4167 break; 4168 4169 default: 4170 g_assert_not_reached(); 4171 } 4172 tcg_regset_set_reg(o_allocated_regs, reg); 4173 set_temp_val_reg(s, ts, reg); 4174 ts->mem_coherent = 0; 4175 new_args[i] = reg; 4176 } 4177 } 4178 4179 /* emit instruction */ 4180 if (def->flags & TCG_OPF_VECTOR) { 4181 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 4182 new_args, const_args); 4183 } else { 4184 tcg_out_op(s, op->opc, new_args, const_args); 4185 } 4186 4187 /* move the outputs in the correct register if needed */ 4188 for(i = 0; i < nb_oargs; i++) { 4189 ts = arg_temp(op->args[i]); 4190 4191 /* ENV should not be modified. */ 4192 tcg_debug_assert(!temp_readonly(ts)); 4193 4194 if (NEED_SYNC_ARG(i)) { 4195 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 4196 } else if (IS_DEAD_ARG(i)) { 4197 temp_dead(s, ts); 4198 } 4199 } 4200 } 4201 4202 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 4203 { 4204 const TCGLifeData arg_life = op->life; 4205 TCGTemp *ots, *itsl, *itsh; 4206 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 4207 4208 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 4209 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 4210 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 4211 4212 ots = arg_temp(op->args[0]); 4213 itsl = arg_temp(op->args[1]); 4214 itsh = arg_temp(op->args[2]); 4215 4216 /* ENV should not be modified. */ 4217 tcg_debug_assert(!temp_readonly(ots)); 4218 4219 /* Allocate the output register now. */ 4220 if (ots->val_type != TEMP_VAL_REG) { 4221 TCGRegSet allocated_regs = s->reserved_regs; 4222 TCGRegSet dup_out_regs = 4223 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 4224 TCGReg oreg; 4225 4226 /* Make sure to not spill the input registers. */ 4227 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 4228 tcg_regset_set_reg(allocated_regs, itsl->reg); 4229 } 4230 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 4231 tcg_regset_set_reg(allocated_regs, itsh->reg); 4232 } 4233 4234 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 4235 output_pref(op, 0), ots->indirect_base); 4236 set_temp_val_reg(s, ots, oreg); 4237 } 4238 4239 /* Promote dup2 of immediates to dupi_vec. */ 4240 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 4241 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 4242 MemOp vece = MO_64; 4243 4244 if (val == dup_const(MO_8, val)) { 4245 vece = MO_8; 4246 } else if (val == dup_const(MO_16, val)) { 4247 vece = MO_16; 4248 } else if (val == dup_const(MO_32, val)) { 4249 vece = MO_32; 4250 } 4251 4252 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 4253 goto done; 4254 } 4255 4256 /* If the two inputs form one 64-bit value, try dupm_vec. */ 4257 if (itsl->temp_subindex == HOST_BIG_ENDIAN && 4258 itsh->temp_subindex == !HOST_BIG_ENDIAN && 4259 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { 4260 TCGTemp *its = itsl - HOST_BIG_ENDIAN; 4261 4262 temp_sync(s, its + 0, s->reserved_regs, 0, 0); 4263 temp_sync(s, its + 1, s->reserved_regs, 0, 0); 4264 4265 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 4266 its->mem_base->reg, its->mem_offset)) { 4267 goto done; 4268 } 4269 } 4270 4271 /* Fall back to generic expansion. */ 4272 return false; 4273 4274 done: 4275 ots->mem_coherent = 0; 4276 if (IS_DEAD_ARG(1)) { 4277 temp_dead(s, itsl); 4278 } 4279 if (IS_DEAD_ARG(2)) { 4280 temp_dead(s, itsh); 4281 } 4282 if (NEED_SYNC_ARG(0)) { 4283 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 4284 } else if (IS_DEAD_ARG(0)) { 4285 temp_dead(s, ots); 4286 } 4287 return true; 4288 } 4289 4290 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, 4291 TCGRegSet allocated_regs) 4292 { 4293 if (ts->val_type == TEMP_VAL_REG) { 4294 if (ts->reg != reg) { 4295 tcg_reg_free(s, reg, allocated_regs); 4296 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 4297 /* 4298 * Cross register class move not supported. Sync the 4299 * temp back to its slot and load from there. 4300 */ 4301 temp_sync(s, ts, allocated_regs, 0, 0); 4302 tcg_out_ld(s, ts->type, reg, 4303 ts->mem_base->reg, ts->mem_offset); 4304 } 4305 } 4306 } else { 4307 TCGRegSet arg_set = 0; 4308 4309 tcg_reg_free(s, reg, allocated_regs); 4310 tcg_regset_set_reg(arg_set, reg); 4311 temp_load(s, ts, arg_set, allocated_regs, 0); 4312 } 4313 } 4314 4315 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts, 4316 TCGRegSet allocated_regs) 4317 { 4318 /* 4319 * When the destination is on the stack, load up the temp and store. 4320 * If there are many call-saved registers, the temp might live to 4321 * see another use; otherwise it'll be discarded. 4322 */ 4323 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); 4324 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, 4325 TCG_TARGET_CALL_STACK_OFFSET + 4326 stk_slot * sizeof(tcg_target_long)); 4327 } 4328 4329 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, 4330 TCGTemp *ts, TCGRegSet *allocated_regs) 4331 { 4332 if (REG_P(l)) { 4333 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; 4334 load_arg_reg(s, reg, ts, *allocated_regs); 4335 tcg_regset_set_reg(*allocated_regs, reg); 4336 } else { 4337 load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs), 4338 ts, *allocated_regs); 4339 } 4340 } 4341 4342 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 4343 { 4344 const int nb_oargs = TCGOP_CALLO(op); 4345 const int nb_iargs = TCGOP_CALLI(op); 4346 const TCGLifeData arg_life = op->life; 4347 const TCGHelperInfo *info = tcg_call_info(op); 4348 TCGRegSet allocated_regs = s->reserved_regs; 4349 int i; 4350 4351 /* 4352 * Move inputs into place in reverse order, 4353 * so that we place stacked arguments first. 4354 */ 4355 for (i = nb_iargs - 1; i >= 0; --i) { 4356 const TCGCallArgumentLoc *loc = &info->in[i]; 4357 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); 4358 4359 switch (loc->kind) { 4360 case TCG_CALL_ARG_NORMAL: 4361 case TCG_CALL_ARG_EXTEND_U: 4362 case TCG_CALL_ARG_EXTEND_S: 4363 load_arg_normal(s, loc, ts, &allocated_regs); 4364 break; 4365 default: 4366 g_assert_not_reached(); 4367 } 4368 } 4369 4370 /* Mark dead temporaries and free the associated registers. */ 4371 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 4372 if (IS_DEAD_ARG(i)) { 4373 temp_dead(s, arg_temp(op->args[i])); 4374 } 4375 } 4376 4377 /* Clobber call registers. */ 4378 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 4379 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 4380 tcg_reg_free(s, i, allocated_regs); 4381 } 4382 } 4383 4384 /* 4385 * Save globals if they might be written by the helper, 4386 * sync them if they might be read. 4387 */ 4388 if (info->flags & TCG_CALL_NO_READ_GLOBALS) { 4389 /* Nothing to do */ 4390 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { 4391 sync_globals(s, allocated_regs); 4392 } else { 4393 save_globals(s, allocated_regs); 4394 } 4395 4396 tcg_out_call(s, tcg_call_func(op), info); 4397 4398 /* Assign output registers and emit moves if needed. */ 4399 switch (info->out_kind) { 4400 case TCG_CALL_RET_NORMAL: 4401 for (i = 0; i < nb_oargs; i++) { 4402 TCGTemp *ts = arg_temp(op->args[i]); 4403 TCGReg reg = tcg_target_call_oarg_regs[i]; 4404 4405 /* ENV should not be modified. */ 4406 tcg_debug_assert(!temp_readonly(ts)); 4407 4408 set_temp_val_reg(s, ts, reg); 4409 ts->mem_coherent = 0; 4410 } 4411 break; 4412 default: 4413 g_assert_not_reached(); 4414 } 4415 4416 /* Flush or discard output registers as needed. */ 4417 for (i = 0; i < nb_oargs; i++) { 4418 TCGTemp *ts = arg_temp(op->args[i]); 4419 if (NEED_SYNC_ARG(i)) { 4420 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); 4421 } else if (IS_DEAD_ARG(i)) { 4422 temp_dead(s, ts); 4423 } 4424 } 4425 } 4426 4427 #ifdef CONFIG_PROFILER 4428 4429 /* avoid copy/paste errors */ 4430 #define PROF_ADD(to, from, field) \ 4431 do { \ 4432 (to)->field += qatomic_read(&((from)->field)); \ 4433 } while (0) 4434 4435 #define PROF_MAX(to, from, field) \ 4436 do { \ 4437 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4438 if (val__ > (to)->field) { \ 4439 (to)->field = val__; \ 4440 } \ 4441 } while (0) 4442 4443 /* Pass in a zero'ed @prof */ 4444 static inline 4445 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4446 { 4447 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4448 unsigned int i; 4449 4450 for (i = 0; i < n_ctxs; i++) { 4451 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4452 const TCGProfile *orig = &s->prof; 4453 4454 if (counters) { 4455 PROF_ADD(prof, orig, cpu_exec_time); 4456 PROF_ADD(prof, orig, tb_count1); 4457 PROF_ADD(prof, orig, tb_count); 4458 PROF_ADD(prof, orig, op_count); 4459 PROF_MAX(prof, orig, op_count_max); 4460 PROF_ADD(prof, orig, temp_count); 4461 PROF_MAX(prof, orig, temp_count_max); 4462 PROF_ADD(prof, orig, del_op_count); 4463 PROF_ADD(prof, orig, code_in_len); 4464 PROF_ADD(prof, orig, code_out_len); 4465 PROF_ADD(prof, orig, search_out_len); 4466 PROF_ADD(prof, orig, interm_time); 4467 PROF_ADD(prof, orig, code_time); 4468 PROF_ADD(prof, orig, la_time); 4469 PROF_ADD(prof, orig, opt_time); 4470 PROF_ADD(prof, orig, restore_count); 4471 PROF_ADD(prof, orig, restore_time); 4472 } 4473 if (table) { 4474 int i; 4475 4476 for (i = 0; i < NB_OPS; i++) { 4477 PROF_ADD(prof, orig, table_op_count[i]); 4478 } 4479 } 4480 } 4481 } 4482 4483 #undef PROF_ADD 4484 #undef PROF_MAX 4485 4486 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4487 { 4488 tcg_profile_snapshot(prof, true, false); 4489 } 4490 4491 static void tcg_profile_snapshot_table(TCGProfile *prof) 4492 { 4493 tcg_profile_snapshot(prof, false, true); 4494 } 4495 4496 void tcg_dump_op_count(GString *buf) 4497 { 4498 TCGProfile prof = {}; 4499 int i; 4500 4501 tcg_profile_snapshot_table(&prof); 4502 for (i = 0; i < NB_OPS; i++) { 4503 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 4504 prof.table_op_count[i]); 4505 } 4506 } 4507 4508 int64_t tcg_cpu_exec_time(void) 4509 { 4510 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4511 unsigned int i; 4512 int64_t ret = 0; 4513 4514 for (i = 0; i < n_ctxs; i++) { 4515 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4516 const TCGProfile *prof = &s->prof; 4517 4518 ret += qatomic_read(&prof->cpu_exec_time); 4519 } 4520 return ret; 4521 } 4522 #else 4523 void tcg_dump_op_count(GString *buf) 4524 { 4525 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4526 } 4527 4528 int64_t tcg_cpu_exec_time(void) 4529 { 4530 error_report("%s: TCG profiler not compiled", __func__); 4531 exit(EXIT_FAILURE); 4532 } 4533 #endif 4534 4535 4536 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) 4537 { 4538 #ifdef CONFIG_PROFILER 4539 TCGProfile *prof = &s->prof; 4540 #endif 4541 int i, num_insns; 4542 TCGOp *op; 4543 4544 #ifdef CONFIG_PROFILER 4545 { 4546 int n = 0; 4547 4548 QTAILQ_FOREACH(op, &s->ops, link) { 4549 n++; 4550 } 4551 qatomic_set(&prof->op_count, prof->op_count + n); 4552 if (n > prof->op_count_max) { 4553 qatomic_set(&prof->op_count_max, n); 4554 } 4555 4556 n = s->nb_temps; 4557 qatomic_set(&prof->temp_count, prof->temp_count + n); 4558 if (n > prof->temp_count_max) { 4559 qatomic_set(&prof->temp_count_max, n); 4560 } 4561 } 4562 #endif 4563 4564 #ifdef DEBUG_DISAS 4565 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4566 && qemu_log_in_addr_range(pc_start))) { 4567 FILE *logfile = qemu_log_trylock(); 4568 if (logfile) { 4569 fprintf(logfile, "OP:\n"); 4570 tcg_dump_ops(s, logfile, false); 4571 fprintf(logfile, "\n"); 4572 qemu_log_unlock(logfile); 4573 } 4574 } 4575 #endif 4576 4577 #ifdef CONFIG_DEBUG_TCG 4578 /* Ensure all labels referenced have been emitted. */ 4579 { 4580 TCGLabel *l; 4581 bool error = false; 4582 4583 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4584 if (unlikely(!l->present) && l->refs) { 4585 qemu_log_mask(CPU_LOG_TB_OP, 4586 "$L%d referenced but not present.\n", l->id); 4587 error = true; 4588 } 4589 } 4590 assert(!error); 4591 } 4592 #endif 4593 4594 #ifdef CONFIG_PROFILER 4595 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4596 #endif 4597 4598 #ifdef USE_TCG_OPTIMIZATIONS 4599 tcg_optimize(s); 4600 #endif 4601 4602 #ifdef CONFIG_PROFILER 4603 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4604 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4605 #endif 4606 4607 reachable_code_pass(s); 4608 liveness_pass_1(s); 4609 4610 if (s->nb_indirects > 0) { 4611 #ifdef DEBUG_DISAS 4612 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4613 && qemu_log_in_addr_range(pc_start))) { 4614 FILE *logfile = qemu_log_trylock(); 4615 if (logfile) { 4616 fprintf(logfile, "OP before indirect lowering:\n"); 4617 tcg_dump_ops(s, logfile, false); 4618 fprintf(logfile, "\n"); 4619 qemu_log_unlock(logfile); 4620 } 4621 } 4622 #endif 4623 /* Replace indirect temps with direct temps. */ 4624 if (liveness_pass_2(s)) { 4625 /* If changes were made, re-run liveness. */ 4626 liveness_pass_1(s); 4627 } 4628 } 4629 4630 #ifdef CONFIG_PROFILER 4631 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4632 #endif 4633 4634 #ifdef DEBUG_DISAS 4635 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4636 && qemu_log_in_addr_range(pc_start))) { 4637 FILE *logfile = qemu_log_trylock(); 4638 if (logfile) { 4639 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 4640 tcg_dump_ops(s, logfile, true); 4641 fprintf(logfile, "\n"); 4642 qemu_log_unlock(logfile); 4643 } 4644 } 4645 #endif 4646 4647 /* Initialize goto_tb jump offsets. */ 4648 tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; 4649 tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID; 4650 tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset; 4651 if (TCG_TARGET_HAS_direct_jump) { 4652 tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg; 4653 tcg_ctx->tb_jmp_target_addr = NULL; 4654 } else { 4655 tcg_ctx->tb_jmp_insn_offset = NULL; 4656 tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg; 4657 } 4658 4659 tcg_reg_alloc_start(s); 4660 4661 /* 4662 * Reset the buffer pointers when restarting after overflow. 4663 * TODO: Move this into translate-all.c with the rest of the 4664 * buffer management. Having only this done here is confusing. 4665 */ 4666 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 4667 s->code_ptr = s->code_buf; 4668 4669 #ifdef TCG_TARGET_NEED_LDST_LABELS 4670 QSIMPLEQ_INIT(&s->ldst_labels); 4671 #endif 4672 #ifdef TCG_TARGET_NEED_POOL_LABELS 4673 s->pool_labels = NULL; 4674 #endif 4675 4676 num_insns = -1; 4677 QTAILQ_FOREACH(op, &s->ops, link) { 4678 TCGOpcode opc = op->opc; 4679 4680 #ifdef CONFIG_PROFILER 4681 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 4682 #endif 4683 4684 switch (opc) { 4685 case INDEX_op_mov_i32: 4686 case INDEX_op_mov_i64: 4687 case INDEX_op_mov_vec: 4688 tcg_reg_alloc_mov(s, op); 4689 break; 4690 case INDEX_op_dup_vec: 4691 tcg_reg_alloc_dup(s, op); 4692 break; 4693 case INDEX_op_insn_start: 4694 if (num_insns >= 0) { 4695 size_t off = tcg_current_code_size(s); 4696 s->gen_insn_end_off[num_insns] = off; 4697 /* Assert that we do not overflow our stored offset. */ 4698 assert(s->gen_insn_end_off[num_insns] == off); 4699 } 4700 num_insns++; 4701 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 4702 target_ulong a; 4703 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 4704 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 4705 #else 4706 a = op->args[i]; 4707 #endif 4708 s->gen_insn_data[num_insns][i] = a; 4709 } 4710 break; 4711 case INDEX_op_discard: 4712 temp_dead(s, arg_temp(op->args[0])); 4713 break; 4714 case INDEX_op_set_label: 4715 tcg_reg_alloc_bb_end(s, s->reserved_regs); 4716 tcg_out_label(s, arg_label(op->args[0])); 4717 break; 4718 case INDEX_op_call: 4719 tcg_reg_alloc_call(s, op); 4720 break; 4721 case INDEX_op_dup2_vec: 4722 if (tcg_reg_alloc_dup2(s, op)) { 4723 break; 4724 } 4725 /* fall through */ 4726 default: 4727 /* Sanity check that we've not introduced any unhandled opcodes. */ 4728 tcg_debug_assert(tcg_op_supported(opc)); 4729 /* Note: in order to speed up the code, it would be much 4730 faster to have specialized register allocator functions for 4731 some common argument patterns */ 4732 tcg_reg_alloc_op(s, op); 4733 break; 4734 } 4735 /* Test for (pending) buffer overflow. The assumption is that any 4736 one operation beginning below the high water mark cannot overrun 4737 the buffer completely. Thus we can test for overflow after 4738 generating code without having to check during generation. */ 4739 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 4740 return -1; 4741 } 4742 /* Test for TB overflow, as seen by gen_insn_end_off. */ 4743 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 4744 return -2; 4745 } 4746 } 4747 tcg_debug_assert(num_insns >= 0); 4748 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 4749 4750 /* Generate TB finalization at the end of block */ 4751 #ifdef TCG_TARGET_NEED_LDST_LABELS 4752 i = tcg_out_ldst_finalize(s); 4753 if (i < 0) { 4754 return i; 4755 } 4756 #endif 4757 #ifdef TCG_TARGET_NEED_POOL_LABELS 4758 i = tcg_out_pool_finalize(s); 4759 if (i < 0) { 4760 return i; 4761 } 4762 #endif 4763 if (!tcg_resolve_relocs(s)) { 4764 return -2; 4765 } 4766 4767 #ifndef CONFIG_TCG_INTERPRETER 4768 /* flush instruction cache */ 4769 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 4770 (uintptr_t)s->code_buf, 4771 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 4772 #endif 4773 4774 return tcg_current_code_size(s); 4775 } 4776 4777 #ifdef CONFIG_PROFILER 4778 void tcg_dump_info(GString *buf) 4779 { 4780 TCGProfile prof = {}; 4781 const TCGProfile *s; 4782 int64_t tb_count; 4783 int64_t tb_div_count; 4784 int64_t tot; 4785 4786 tcg_profile_snapshot_counters(&prof); 4787 s = &prof; 4788 tb_count = s->tb_count; 4789 tb_div_count = tb_count ? tb_count : 1; 4790 tot = s->interm_time + s->code_time; 4791 4792 g_string_append_printf(buf, "JIT cycles %" PRId64 4793 " (%0.3f s at 2.4 GHz)\n", 4794 tot, tot / 2.4e9); 4795 g_string_append_printf(buf, "translated TBs %" PRId64 4796 " (aborted=%" PRId64 " %0.1f%%)\n", 4797 tb_count, s->tb_count1 - tb_count, 4798 (double)(s->tb_count1 - s->tb_count) 4799 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 4800 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 4801 (double)s->op_count / tb_div_count, s->op_count_max); 4802 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 4803 (double)s->del_op_count / tb_div_count); 4804 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 4805 (double)s->temp_count / tb_div_count, 4806 s->temp_count_max); 4807 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 4808 (double)s->code_out_len / tb_div_count); 4809 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 4810 (double)s->search_out_len / tb_div_count); 4811 4812 g_string_append_printf(buf, "cycles/op %0.1f\n", 4813 s->op_count ? (double)tot / s->op_count : 0); 4814 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 4815 s->code_in_len ? (double)tot / s->code_in_len : 0); 4816 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 4817 s->code_out_len ? (double)tot / s->code_out_len : 0); 4818 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 4819 s->search_out_len ? 4820 (double)tot / s->search_out_len : 0); 4821 if (tot == 0) { 4822 tot = 1; 4823 } 4824 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 4825 (double)s->interm_time / tot * 100.0); 4826 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 4827 (double)s->code_time / tot * 100.0); 4828 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 4829 (double)s->opt_time / (s->code_time ? 4830 s->code_time : 1) 4831 * 100.0); 4832 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 4833 (double)s->la_time / (s->code_time ? 4834 s->code_time : 1) * 100.0); 4835 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 4836 s->restore_count); 4837 g_string_append_printf(buf, " avg cycles %0.1f\n", 4838 s->restore_count ? 4839 (double)s->restore_time / s->restore_count : 0); 4840 } 4841 #else 4842 void tcg_dump_info(GString *buf) 4843 { 4844 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4845 } 4846 #endif 4847 4848 #ifdef ELF_HOST_MACHINE 4849 /* In order to use this feature, the backend needs to do three things: 4850 4851 (1) Define ELF_HOST_MACHINE to indicate both what value to 4852 put into the ELF image and to indicate support for the feature. 4853 4854 (2) Define tcg_register_jit. This should create a buffer containing 4855 the contents of a .debug_frame section that describes the post- 4856 prologue unwind info for the tcg machine. 4857 4858 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 4859 */ 4860 4861 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 4862 typedef enum { 4863 JIT_NOACTION = 0, 4864 JIT_REGISTER_FN, 4865 JIT_UNREGISTER_FN 4866 } jit_actions_t; 4867 4868 struct jit_code_entry { 4869 struct jit_code_entry *next_entry; 4870 struct jit_code_entry *prev_entry; 4871 const void *symfile_addr; 4872 uint64_t symfile_size; 4873 }; 4874 4875 struct jit_descriptor { 4876 uint32_t version; 4877 uint32_t action_flag; 4878 struct jit_code_entry *relevant_entry; 4879 struct jit_code_entry *first_entry; 4880 }; 4881 4882 void __jit_debug_register_code(void) __attribute__((noinline)); 4883 void __jit_debug_register_code(void) 4884 { 4885 asm(""); 4886 } 4887 4888 /* Must statically initialize the version, because GDB may check 4889 the version before we can set it. */ 4890 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 4891 4892 /* End GDB interface. */ 4893 4894 static int find_string(const char *strtab, const char *str) 4895 { 4896 const char *p = strtab + 1; 4897 4898 while (1) { 4899 if (strcmp(p, str) == 0) { 4900 return p - strtab; 4901 } 4902 p += strlen(p) + 1; 4903 } 4904 } 4905 4906 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 4907 const void *debug_frame, 4908 size_t debug_frame_size) 4909 { 4910 struct __attribute__((packed)) DebugInfo { 4911 uint32_t len; 4912 uint16_t version; 4913 uint32_t abbrev; 4914 uint8_t ptr_size; 4915 uint8_t cu_die; 4916 uint16_t cu_lang; 4917 uintptr_t cu_low_pc; 4918 uintptr_t cu_high_pc; 4919 uint8_t fn_die; 4920 char fn_name[16]; 4921 uintptr_t fn_low_pc; 4922 uintptr_t fn_high_pc; 4923 uint8_t cu_eoc; 4924 }; 4925 4926 struct ElfImage { 4927 ElfW(Ehdr) ehdr; 4928 ElfW(Phdr) phdr; 4929 ElfW(Shdr) shdr[7]; 4930 ElfW(Sym) sym[2]; 4931 struct DebugInfo di; 4932 uint8_t da[24]; 4933 char str[80]; 4934 }; 4935 4936 struct ElfImage *img; 4937 4938 static const struct ElfImage img_template = { 4939 .ehdr = { 4940 .e_ident[EI_MAG0] = ELFMAG0, 4941 .e_ident[EI_MAG1] = ELFMAG1, 4942 .e_ident[EI_MAG2] = ELFMAG2, 4943 .e_ident[EI_MAG3] = ELFMAG3, 4944 .e_ident[EI_CLASS] = ELF_CLASS, 4945 .e_ident[EI_DATA] = ELF_DATA, 4946 .e_ident[EI_VERSION] = EV_CURRENT, 4947 .e_type = ET_EXEC, 4948 .e_machine = ELF_HOST_MACHINE, 4949 .e_version = EV_CURRENT, 4950 .e_phoff = offsetof(struct ElfImage, phdr), 4951 .e_shoff = offsetof(struct ElfImage, shdr), 4952 .e_ehsize = sizeof(ElfW(Shdr)), 4953 .e_phentsize = sizeof(ElfW(Phdr)), 4954 .e_phnum = 1, 4955 .e_shentsize = sizeof(ElfW(Shdr)), 4956 .e_shnum = ARRAY_SIZE(img->shdr), 4957 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 4958 #ifdef ELF_HOST_FLAGS 4959 .e_flags = ELF_HOST_FLAGS, 4960 #endif 4961 #ifdef ELF_OSABI 4962 .e_ident[EI_OSABI] = ELF_OSABI, 4963 #endif 4964 }, 4965 .phdr = { 4966 .p_type = PT_LOAD, 4967 .p_flags = PF_X, 4968 }, 4969 .shdr = { 4970 [0] = { .sh_type = SHT_NULL }, 4971 /* Trick: The contents of code_gen_buffer are not present in 4972 this fake ELF file; that got allocated elsewhere. Therefore 4973 we mark .text as SHT_NOBITS (similar to .bss) so that readers 4974 will not look for contents. We can record any address. */ 4975 [1] = { /* .text */ 4976 .sh_type = SHT_NOBITS, 4977 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 4978 }, 4979 [2] = { /* .debug_info */ 4980 .sh_type = SHT_PROGBITS, 4981 .sh_offset = offsetof(struct ElfImage, di), 4982 .sh_size = sizeof(struct DebugInfo), 4983 }, 4984 [3] = { /* .debug_abbrev */ 4985 .sh_type = SHT_PROGBITS, 4986 .sh_offset = offsetof(struct ElfImage, da), 4987 .sh_size = sizeof(img->da), 4988 }, 4989 [4] = { /* .debug_frame */ 4990 .sh_type = SHT_PROGBITS, 4991 .sh_offset = sizeof(struct ElfImage), 4992 }, 4993 [5] = { /* .symtab */ 4994 .sh_type = SHT_SYMTAB, 4995 .sh_offset = offsetof(struct ElfImage, sym), 4996 .sh_size = sizeof(img->sym), 4997 .sh_info = 1, 4998 .sh_link = ARRAY_SIZE(img->shdr) - 1, 4999 .sh_entsize = sizeof(ElfW(Sym)), 5000 }, 5001 [6] = { /* .strtab */ 5002 .sh_type = SHT_STRTAB, 5003 .sh_offset = offsetof(struct ElfImage, str), 5004 .sh_size = sizeof(img->str), 5005 } 5006 }, 5007 .sym = { 5008 [1] = { /* code_gen_buffer */ 5009 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 5010 .st_shndx = 1, 5011 } 5012 }, 5013 .di = { 5014 .len = sizeof(struct DebugInfo) - 4, 5015 .version = 2, 5016 .ptr_size = sizeof(void *), 5017 .cu_die = 1, 5018 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 5019 .fn_die = 2, 5020 .fn_name = "code_gen_buffer" 5021 }, 5022 .da = { 5023 1, /* abbrev number (the cu) */ 5024 0x11, 1, /* DW_TAG_compile_unit, has children */ 5025 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 5026 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5027 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5028 0, 0, /* end of abbrev */ 5029 2, /* abbrev number (the fn) */ 5030 0x2e, 0, /* DW_TAG_subprogram, no children */ 5031 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 5032 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 5033 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 5034 0, 0, /* end of abbrev */ 5035 0 /* no more abbrev */ 5036 }, 5037 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 5038 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 5039 }; 5040 5041 /* We only need a single jit entry; statically allocate it. */ 5042 static struct jit_code_entry one_entry; 5043 5044 uintptr_t buf = (uintptr_t)buf_ptr; 5045 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 5046 DebugFrameHeader *dfh; 5047 5048 img = g_malloc(img_size); 5049 *img = img_template; 5050 5051 img->phdr.p_vaddr = buf; 5052 img->phdr.p_paddr = buf; 5053 img->phdr.p_memsz = buf_size; 5054 5055 img->shdr[1].sh_name = find_string(img->str, ".text"); 5056 img->shdr[1].sh_addr = buf; 5057 img->shdr[1].sh_size = buf_size; 5058 5059 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 5060 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 5061 5062 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 5063 img->shdr[4].sh_size = debug_frame_size; 5064 5065 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 5066 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 5067 5068 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 5069 img->sym[1].st_value = buf; 5070 img->sym[1].st_size = buf_size; 5071 5072 img->di.cu_low_pc = buf; 5073 img->di.cu_high_pc = buf + buf_size; 5074 img->di.fn_low_pc = buf; 5075 img->di.fn_high_pc = buf + buf_size; 5076 5077 dfh = (DebugFrameHeader *)(img + 1); 5078 memcpy(dfh, debug_frame, debug_frame_size); 5079 dfh->fde.func_start = buf; 5080 dfh->fde.func_len = buf_size; 5081 5082 #ifdef DEBUG_JIT 5083 /* Enable this block to be able to debug the ELF image file creation. 5084 One can use readelf, objdump, or other inspection utilities. */ 5085 { 5086 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 5087 FILE *f = fopen(jit, "w+b"); 5088 if (f) { 5089 if (fwrite(img, img_size, 1, f) != img_size) { 5090 /* Avoid stupid unused return value warning for fwrite. */ 5091 } 5092 fclose(f); 5093 } 5094 } 5095 #endif 5096 5097 one_entry.symfile_addr = img; 5098 one_entry.symfile_size = img_size; 5099 5100 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 5101 __jit_debug_descriptor.relevant_entry = &one_entry; 5102 __jit_debug_descriptor.first_entry = &one_entry; 5103 __jit_debug_register_code(); 5104 } 5105 #else 5106 /* No support for the feature. Provide the entry point expected by exec.c, 5107 and implement the internal function we declared earlier. */ 5108 5109 static void tcg_register_jit_int(const void *buf, size_t size, 5110 const void *debug_frame, 5111 size_t debug_frame_size) 5112 { 5113 } 5114 5115 void tcg_register_jit(const void *buf, size_t buf_size) 5116 { 5117 } 5118 #endif /* ELF_HOST_MACHINE */ 5119 5120 #if !TCG_TARGET_MAYBE_vec 5121 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 5122 { 5123 g_assert_not_reached(); 5124 } 5125 #endif 5126