1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 /* define it to use liveness analysis (better code) */ 26 #define USE_TCG_OPTIMIZATIONS 27 28 #include "qemu/osdep.h" 29 30 /* Define to jump the ELF file used to communicate with GDB. */ 31 #undef DEBUG_JIT 32 33 #include "qemu/error-report.h" 34 #include "qemu/cutils.h" 35 #include "qemu/host-utils.h" 36 #include "qemu/qemu-print.h" 37 #include "qemu/timer.h" 38 #include "qemu/cacheflush.h" 39 #include "qemu/cacheinfo.h" 40 41 /* Note: the long term plan is to reduce the dependencies on the QEMU 42 CPU definitions. Currently they are used for qemu_ld/st 43 instructions */ 44 #define NO_CPU_IO_DEFS 45 46 #include "exec/exec-all.h" 47 #include "tcg/tcg-op.h" 48 49 #if UINTPTR_MAX == UINT32_MAX 50 # define ELF_CLASS ELFCLASS32 51 #else 52 # define ELF_CLASS ELFCLASS64 53 #endif 54 #if HOST_BIG_ENDIAN 55 # define ELF_DATA ELFDATA2MSB 56 #else 57 # define ELF_DATA ELFDATA2LSB 58 #endif 59 60 #include "elf.h" 61 #include "exec/log.h" 62 #include "tcg/tcg-ldst.h" 63 #include "tcg-internal.h" 64 65 #ifdef CONFIG_TCG_INTERPRETER 66 #include <ffi.h> 67 #endif 68 69 /* Forward declarations for functions declared in tcg-target.c.inc and 70 used here. */ 71 static void tcg_target_init(TCGContext *s); 72 static void tcg_target_qemu_prologue(TCGContext *s); 73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type, 74 intptr_t value, intptr_t addend); 75 76 /* The CIE and FDE header definitions will be common to all hosts. */ 77 typedef struct { 78 uint32_t len __attribute__((aligned((sizeof(void *))))); 79 uint32_t id; 80 uint8_t version; 81 char augmentation[1]; 82 uint8_t code_align; 83 uint8_t data_align; 84 uint8_t return_column; 85 } DebugFrameCIE; 86 87 typedef struct QEMU_PACKED { 88 uint32_t len __attribute__((aligned((sizeof(void *))))); 89 uint32_t cie_offset; 90 uintptr_t func_start; 91 uintptr_t func_len; 92 } DebugFrameFDEHeader; 93 94 typedef struct QEMU_PACKED { 95 DebugFrameCIE cie; 96 DebugFrameFDEHeader fde; 97 } DebugFrameHeader; 98 99 static void tcg_register_jit_int(const void *buf, size_t size, 100 const void *debug_frame, 101 size_t debug_frame_size) 102 __attribute__((unused)); 103 104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */ 105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, 106 intptr_t arg2); 107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); 108 static void tcg_out_movi(TCGContext *s, TCGType type, 109 TCGReg ret, tcg_target_long arg); 110 static void tcg_out_op(TCGContext *s, TCGOpcode opc, 111 const TCGArg args[TCG_MAX_OP_ARGS], 112 const int const_args[TCG_MAX_OP_ARGS]); 113 #if TCG_TARGET_MAYBE_vec 114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 115 TCGReg dst, TCGReg src); 116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 117 TCGReg dst, TCGReg base, intptr_t offset); 118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 119 TCGReg dst, int64_t arg); 120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 121 unsigned vecl, unsigned vece, 122 const TCGArg args[TCG_MAX_OP_ARGS], 123 const int const_args[TCG_MAX_OP_ARGS]); 124 #else 125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, 126 TCGReg dst, TCGReg src) 127 { 128 g_assert_not_reached(); 129 } 130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, 131 TCGReg dst, TCGReg base, intptr_t offset) 132 { 133 g_assert_not_reached(); 134 } 135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, 136 TCGReg dst, int64_t arg) 137 { 138 g_assert_not_reached(); 139 } 140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, 141 unsigned vecl, unsigned vece, 142 const TCGArg args[TCG_MAX_OP_ARGS], 143 const int const_args[TCG_MAX_OP_ARGS]) 144 { 145 g_assert_not_reached(); 146 } 147 #endif 148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, 149 intptr_t arg2); 150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, 151 TCGReg base, intptr_t ofs); 152 #ifdef CONFIG_TCG_INTERPRETER 153 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, 154 ffi_cif *cif); 155 #else 156 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); 157 #endif 158 static bool tcg_target_const_match(int64_t val, TCGType type, int ct); 159 #ifdef TCG_TARGET_NEED_LDST_LABELS 160 static int tcg_out_ldst_finalize(TCGContext *s); 161 #endif 162 163 TCGContext tcg_init_ctx; 164 __thread TCGContext *tcg_ctx; 165 166 TCGContext **tcg_ctxs; 167 unsigned int tcg_cur_ctxs; 168 unsigned int tcg_max_ctxs; 169 TCGv_env cpu_env = 0; 170 const void *tcg_code_gen_epilogue; 171 uintptr_t tcg_splitwx_diff; 172 173 #ifndef CONFIG_TCG_INTERPRETER 174 tcg_prologue_fn *tcg_qemu_tb_exec; 175 #endif 176 177 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT]; 178 static TCGRegSet tcg_target_call_clobber_regs; 179 180 #if TCG_TARGET_INSN_UNIT_SIZE == 1 181 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) 182 { 183 *s->code_ptr++ = v; 184 } 185 186 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, 187 uint8_t v) 188 { 189 *p = v; 190 } 191 #endif 192 193 #if TCG_TARGET_INSN_UNIT_SIZE <= 2 194 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) 195 { 196 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 197 *s->code_ptr++ = v; 198 } else { 199 tcg_insn_unit *p = s->code_ptr; 200 memcpy(p, &v, sizeof(v)); 201 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); 202 } 203 } 204 205 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, 206 uint16_t v) 207 { 208 if (TCG_TARGET_INSN_UNIT_SIZE == 2) { 209 *p = v; 210 } else { 211 memcpy(p, &v, sizeof(v)); 212 } 213 } 214 #endif 215 216 #if TCG_TARGET_INSN_UNIT_SIZE <= 4 217 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) 218 { 219 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 220 *s->code_ptr++ = v; 221 } else { 222 tcg_insn_unit *p = s->code_ptr; 223 memcpy(p, &v, sizeof(v)); 224 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); 225 } 226 } 227 228 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, 229 uint32_t v) 230 { 231 if (TCG_TARGET_INSN_UNIT_SIZE == 4) { 232 *p = v; 233 } else { 234 memcpy(p, &v, sizeof(v)); 235 } 236 } 237 #endif 238 239 #if TCG_TARGET_INSN_UNIT_SIZE <= 8 240 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) 241 { 242 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 243 *s->code_ptr++ = v; 244 } else { 245 tcg_insn_unit *p = s->code_ptr; 246 memcpy(p, &v, sizeof(v)); 247 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); 248 } 249 } 250 251 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, 252 uint64_t v) 253 { 254 if (TCG_TARGET_INSN_UNIT_SIZE == 8) { 255 *p = v; 256 } else { 257 memcpy(p, &v, sizeof(v)); 258 } 259 } 260 #endif 261 262 /* label relocation processing */ 263 264 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, 265 TCGLabel *l, intptr_t addend) 266 { 267 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); 268 269 r->type = type; 270 r->ptr = code_ptr; 271 r->addend = addend; 272 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); 273 } 274 275 static void tcg_out_label(TCGContext *s, TCGLabel *l) 276 { 277 tcg_debug_assert(!l->has_value); 278 l->has_value = 1; 279 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); 280 } 281 282 TCGLabel *gen_new_label(void) 283 { 284 TCGContext *s = tcg_ctx; 285 TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); 286 287 memset(l, 0, sizeof(TCGLabel)); 288 l->id = s->nb_labels++; 289 QSIMPLEQ_INIT(&l->relocs); 290 291 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); 292 293 return l; 294 } 295 296 static bool tcg_resolve_relocs(TCGContext *s) 297 { 298 TCGLabel *l; 299 300 QSIMPLEQ_FOREACH(l, &s->labels, next) { 301 TCGRelocation *r; 302 uintptr_t value = l->u.value; 303 304 QSIMPLEQ_FOREACH(r, &l->relocs, next) { 305 if (!patch_reloc(r->ptr, r->type, value, r->addend)) { 306 return false; 307 } 308 } 309 } 310 return true; 311 } 312 313 static void set_jmp_reset_offset(TCGContext *s, int which) 314 { 315 /* 316 * We will check for overflow at the end of the opcode loop in 317 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. 318 */ 319 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); 320 } 321 322 /* Signal overflow, starting over with fewer guest insns. */ 323 static G_NORETURN 324 void tcg_raise_tb_overflow(TCGContext *s) 325 { 326 siglongjmp(s->jmp_trans, -2); 327 } 328 329 #define C_PFX1(P, A) P##A 330 #define C_PFX2(P, A, B) P##A##_##B 331 #define C_PFX3(P, A, B, C) P##A##_##B##_##C 332 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D 333 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E 334 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F 335 336 /* Define an enumeration for the various combinations. */ 337 338 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1), 339 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2), 340 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3), 341 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4), 342 343 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1), 344 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2), 345 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3), 346 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), 347 348 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), 349 350 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), 351 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), 352 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), 353 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), 354 355 typedef enum { 356 #include "tcg-target-con-set.h" 357 } TCGConstraintSetIndex; 358 359 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); 360 361 #undef C_O0_I1 362 #undef C_O0_I2 363 #undef C_O0_I3 364 #undef C_O0_I4 365 #undef C_O1_I1 366 #undef C_O1_I2 367 #undef C_O1_I3 368 #undef C_O1_I4 369 #undef C_N1_I2 370 #undef C_O2_I1 371 #undef C_O2_I2 372 #undef C_O2_I3 373 #undef C_O2_I4 374 375 /* Put all of the constraint sets into an array, indexed by the enum. */ 376 377 #define C_O0_I1(I1) { .args_ct_str = { #I1 } }, 378 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, 379 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, 380 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, 381 382 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, 383 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, 384 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, 385 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, 386 387 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, 388 389 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, 390 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, 391 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, 392 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, 393 394 static const TCGTargetOpDef constraint_sets[] = { 395 #include "tcg-target-con-set.h" 396 }; 397 398 399 #undef C_O0_I1 400 #undef C_O0_I2 401 #undef C_O0_I3 402 #undef C_O0_I4 403 #undef C_O1_I1 404 #undef C_O1_I2 405 #undef C_O1_I3 406 #undef C_O1_I4 407 #undef C_N1_I2 408 #undef C_O2_I1 409 #undef C_O2_I2 410 #undef C_O2_I3 411 #undef C_O2_I4 412 413 /* Expand the enumerator to be returned from tcg_target_op_def(). */ 414 415 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1) 416 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2) 417 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3) 418 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4) 419 420 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1) 421 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2) 422 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3) 423 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) 424 425 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) 426 427 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) 428 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) 429 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) 430 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) 431 432 #include "tcg-target.c.inc" 433 434 static void alloc_tcg_plugin_context(TCGContext *s) 435 { 436 #ifdef CONFIG_PLUGIN 437 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1); 438 s->plugin_tb->insns = 439 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn); 440 #endif 441 } 442 443 /* 444 * All TCG threads except the parent (i.e. the one that called tcg_context_init 445 * and registered the target's TCG globals) must register with this function 446 * before initiating translation. 447 * 448 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation 449 * of tcg_region_init() for the reasoning behind this. 450 * 451 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in 452 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context 453 * is not used anymore for translation once this function is called. 454 * 455 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates 456 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode. 457 */ 458 #ifdef CONFIG_USER_ONLY 459 void tcg_register_thread(void) 460 { 461 tcg_ctx = &tcg_init_ctx; 462 } 463 #else 464 void tcg_register_thread(void) 465 { 466 TCGContext *s = g_malloc(sizeof(*s)); 467 unsigned int i, n; 468 469 *s = tcg_init_ctx; 470 471 /* Relink mem_base. */ 472 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { 473 if (tcg_init_ctx.temps[i].mem_base) { 474 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps; 475 tcg_debug_assert(b >= 0 && b < n); 476 s->temps[i].mem_base = &s->temps[b]; 477 } 478 } 479 480 /* Claim an entry in tcg_ctxs */ 481 n = qatomic_fetch_inc(&tcg_cur_ctxs); 482 g_assert(n < tcg_max_ctxs); 483 qatomic_set(&tcg_ctxs[n], s); 484 485 if (n > 0) { 486 alloc_tcg_plugin_context(s); 487 tcg_region_initial_alloc(s); 488 } 489 490 tcg_ctx = s; 491 } 492 #endif /* !CONFIG_USER_ONLY */ 493 494 /* pool based memory allocation */ 495 void *tcg_malloc_internal(TCGContext *s, int size) 496 { 497 TCGPool *p; 498 int pool_size; 499 500 if (size > TCG_POOL_CHUNK_SIZE) { 501 /* big malloc: insert a new pool (XXX: could optimize) */ 502 p = g_malloc(sizeof(TCGPool) + size); 503 p->size = size; 504 p->next = s->pool_first_large; 505 s->pool_first_large = p; 506 return p->data; 507 } else { 508 p = s->pool_current; 509 if (!p) { 510 p = s->pool_first; 511 if (!p) 512 goto new_pool; 513 } else { 514 if (!p->next) { 515 new_pool: 516 pool_size = TCG_POOL_CHUNK_SIZE; 517 p = g_malloc(sizeof(TCGPool) + pool_size); 518 p->size = pool_size; 519 p->next = NULL; 520 if (s->pool_current) { 521 s->pool_current->next = p; 522 } else { 523 s->pool_first = p; 524 } 525 } else { 526 p = p->next; 527 } 528 } 529 } 530 s->pool_current = p; 531 s->pool_cur = p->data + size; 532 s->pool_end = p->data + p->size; 533 return p->data; 534 } 535 536 void tcg_pool_reset(TCGContext *s) 537 { 538 TCGPool *p, *t; 539 for (p = s->pool_first_large; p; p = t) { 540 t = p->next; 541 g_free(p); 542 } 543 s->pool_first_large = NULL; 544 s->pool_cur = s->pool_end = NULL; 545 s->pool_current = NULL; 546 } 547 548 #include "exec/helper-proto.h" 549 550 static const TCGHelperInfo all_helpers[] = { 551 #include "exec/helper-tcg.h" 552 }; 553 static GHashTable *helper_table; 554 555 #ifdef CONFIG_TCG_INTERPRETER 556 static GHashTable *ffi_table; 557 558 static ffi_type * const typecode_to_ffi[8] = { 559 [dh_typecode_void] = &ffi_type_void, 560 [dh_typecode_i32] = &ffi_type_uint32, 561 [dh_typecode_s32] = &ffi_type_sint32, 562 [dh_typecode_i64] = &ffi_type_uint64, 563 [dh_typecode_s64] = &ffi_type_sint64, 564 [dh_typecode_ptr] = &ffi_type_pointer, 565 }; 566 #endif 567 568 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; 569 static void process_op_defs(TCGContext *s); 570 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 571 TCGReg reg, const char *name); 572 573 static void tcg_context_init(unsigned max_cpus) 574 { 575 TCGContext *s = &tcg_init_ctx; 576 int op, total_args, n, i; 577 TCGOpDef *def; 578 TCGArgConstraint *args_ct; 579 TCGTemp *ts; 580 581 memset(s, 0, sizeof(*s)); 582 s->nb_globals = 0; 583 584 /* Count total number of arguments and allocate the corresponding 585 space */ 586 total_args = 0; 587 for(op = 0; op < NB_OPS; op++) { 588 def = &tcg_op_defs[op]; 589 n = def->nb_iargs + def->nb_oargs; 590 total_args += n; 591 } 592 593 args_ct = g_new0(TCGArgConstraint, total_args); 594 595 for(op = 0; op < NB_OPS; op++) { 596 def = &tcg_op_defs[op]; 597 def->args_ct = args_ct; 598 n = def->nb_iargs + def->nb_oargs; 599 args_ct += n; 600 } 601 602 /* Register helpers. */ 603 /* Use g_direct_hash/equal for direct pointer comparisons on func. */ 604 helper_table = g_hash_table_new(NULL, NULL); 605 606 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 607 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, 608 (gpointer)&all_helpers[i]); 609 } 610 611 #ifdef CONFIG_TCG_INTERPRETER 612 /* g_direct_hash/equal for direct comparisons on uint32_t. */ 613 ffi_table = g_hash_table_new(NULL, NULL); 614 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { 615 struct { 616 ffi_cif cif; 617 ffi_type *args[]; 618 } *ca; 619 uint32_t typemask = all_helpers[i].typemask; 620 gpointer hash = (gpointer)(uintptr_t)typemask; 621 ffi_status status; 622 int nargs; 623 624 if (g_hash_table_lookup(ffi_table, hash)) { 625 continue; 626 } 627 628 /* Ignoring the return type, find the last non-zero field. */ 629 nargs = 32 - clz32(typemask >> 3); 630 nargs = DIV_ROUND_UP(nargs, 3); 631 632 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); 633 ca->cif.rtype = typecode_to_ffi[typemask & 7]; 634 ca->cif.nargs = nargs; 635 636 if (nargs != 0) { 637 ca->cif.arg_types = ca->args; 638 for (int j = 0; j < nargs; ++j) { 639 int typecode = extract32(typemask, (j + 1) * 3, 3); 640 ca->args[j] = typecode_to_ffi[typecode]; 641 } 642 } 643 644 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, 645 ca->cif.rtype, ca->cif.arg_types); 646 assert(status == FFI_OK); 647 648 g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif); 649 } 650 #endif 651 652 tcg_target_init(s); 653 process_op_defs(s); 654 655 /* Reverse the order of the saved registers, assuming they're all at 656 the start of tcg_target_reg_alloc_order. */ 657 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) { 658 int r = tcg_target_reg_alloc_order[n]; 659 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) { 660 break; 661 } 662 } 663 for (i = 0; i < n; ++i) { 664 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i]; 665 } 666 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) { 667 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i]; 668 } 669 670 alloc_tcg_plugin_context(s); 671 672 tcg_ctx = s; 673 /* 674 * In user-mode we simply share the init context among threads, since we 675 * use a single region. See the documentation tcg_region_init() for the 676 * reasoning behind this. 677 * In softmmu we will have at most max_cpus TCG threads. 678 */ 679 #ifdef CONFIG_USER_ONLY 680 tcg_ctxs = &tcg_ctx; 681 tcg_cur_ctxs = 1; 682 tcg_max_ctxs = 1; 683 #else 684 tcg_max_ctxs = max_cpus; 685 tcg_ctxs = g_new0(TCGContext *, max_cpus); 686 #endif 687 688 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); 689 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env"); 690 cpu_env = temp_tcgv_ptr(ts); 691 } 692 693 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) 694 { 695 tcg_context_init(max_cpus); 696 tcg_region_init(tb_size, splitwx, max_cpus); 697 } 698 699 /* 700 * Allocate TBs right before their corresponding translated code, making 701 * sure that TBs and code are on different cache lines. 702 */ 703 TranslationBlock *tcg_tb_alloc(TCGContext *s) 704 { 705 uintptr_t align = qemu_icache_linesize; 706 TranslationBlock *tb; 707 void *next; 708 709 retry: 710 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); 711 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); 712 713 if (unlikely(next > s->code_gen_highwater)) { 714 if (tcg_region_alloc(s)) { 715 return NULL; 716 } 717 goto retry; 718 } 719 qatomic_set(&s->code_gen_ptr, next); 720 s->data_gen_ptr = NULL; 721 return tb; 722 } 723 724 void tcg_prologue_init(TCGContext *s) 725 { 726 size_t prologue_size; 727 728 s->code_ptr = s->code_gen_ptr; 729 s->code_buf = s->code_gen_ptr; 730 s->data_gen_ptr = NULL; 731 732 #ifndef CONFIG_TCG_INTERPRETER 733 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); 734 #endif 735 736 #ifdef TCG_TARGET_NEED_POOL_LABELS 737 s->pool_labels = NULL; 738 #endif 739 740 qemu_thread_jit_write(); 741 /* Generate the prologue. */ 742 tcg_target_qemu_prologue(s); 743 744 #ifdef TCG_TARGET_NEED_POOL_LABELS 745 /* Allow the prologue to put e.g. guest_base into a pool entry. */ 746 { 747 int result = tcg_out_pool_finalize(s); 748 tcg_debug_assert(result == 0); 749 } 750 #endif 751 752 prologue_size = tcg_current_code_size(s); 753 754 #ifndef CONFIG_TCG_INTERPRETER 755 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 756 (uintptr_t)s->code_buf, prologue_size); 757 #endif 758 759 #ifdef DEBUG_DISAS 760 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { 761 FILE *logfile = qemu_log_trylock(); 762 if (logfile) { 763 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size); 764 if (s->data_gen_ptr) { 765 size_t code_size = s->data_gen_ptr - s->code_gen_ptr; 766 size_t data_size = prologue_size - code_size; 767 size_t i; 768 769 disas(logfile, s->code_gen_ptr, code_size); 770 771 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) { 772 if (sizeof(tcg_target_ulong) == 8) { 773 fprintf(logfile, 774 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n", 775 (uintptr_t)s->data_gen_ptr + i, 776 *(uint64_t *)(s->data_gen_ptr + i)); 777 } else { 778 fprintf(logfile, 779 "0x%08" PRIxPTR ": .long 0x%08x\n", 780 (uintptr_t)s->data_gen_ptr + i, 781 *(uint32_t *)(s->data_gen_ptr + i)); 782 } 783 } 784 } else { 785 disas(logfile, s->code_gen_ptr, prologue_size); 786 } 787 fprintf(logfile, "\n"); 788 qemu_log_unlock(logfile); 789 } 790 } 791 #endif 792 793 #ifndef CONFIG_TCG_INTERPRETER 794 /* 795 * Assert that goto_ptr is implemented completely, setting an epilogue. 796 * For tci, we use NULL as the signal to return from the interpreter, 797 * so skip this check. 798 */ 799 tcg_debug_assert(tcg_code_gen_epilogue != NULL); 800 #endif 801 802 tcg_region_prologue_set(s); 803 } 804 805 void tcg_func_start(TCGContext *s) 806 { 807 tcg_pool_reset(s); 808 s->nb_temps = s->nb_globals; 809 810 /* No temps have been previously allocated for size or locality. */ 811 memset(s->free_temps, 0, sizeof(s->free_temps)); 812 813 /* No constant temps have been previously allocated. */ 814 for (int i = 0; i < TCG_TYPE_COUNT; ++i) { 815 if (s->const_table[i]) { 816 g_hash_table_remove_all(s->const_table[i]); 817 } 818 } 819 820 s->nb_ops = 0; 821 s->nb_labels = 0; 822 s->current_frame_offset = s->frame_start; 823 824 #ifdef CONFIG_DEBUG_TCG 825 s->goto_tb_issue_mask = 0; 826 #endif 827 828 QTAILQ_INIT(&s->ops); 829 QTAILQ_INIT(&s->free_ops); 830 QSIMPLEQ_INIT(&s->labels); 831 } 832 833 static TCGTemp *tcg_temp_alloc(TCGContext *s) 834 { 835 int n = s->nb_temps++; 836 837 if (n >= TCG_MAX_TEMPS) { 838 tcg_raise_tb_overflow(s); 839 } 840 return memset(&s->temps[n], 0, sizeof(TCGTemp)); 841 } 842 843 static TCGTemp *tcg_global_alloc(TCGContext *s) 844 { 845 TCGTemp *ts; 846 847 tcg_debug_assert(s->nb_globals == s->nb_temps); 848 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); 849 s->nb_globals++; 850 ts = tcg_temp_alloc(s); 851 ts->kind = TEMP_GLOBAL; 852 853 return ts; 854 } 855 856 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, 857 TCGReg reg, const char *name) 858 { 859 TCGTemp *ts; 860 861 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) { 862 tcg_abort(); 863 } 864 865 ts = tcg_global_alloc(s); 866 ts->base_type = type; 867 ts->type = type; 868 ts->kind = TEMP_FIXED; 869 ts->reg = reg; 870 ts->name = name; 871 tcg_regset_set_reg(s->reserved_regs, reg); 872 873 return ts; 874 } 875 876 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size) 877 { 878 s->frame_start = start; 879 s->frame_end = start + size; 880 s->frame_temp 881 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame"); 882 } 883 884 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, 885 intptr_t offset, const char *name) 886 { 887 TCGContext *s = tcg_ctx; 888 TCGTemp *base_ts = tcgv_ptr_temp(base); 889 TCGTemp *ts = tcg_global_alloc(s); 890 int indirect_reg = 0, bigendian = 0; 891 #if HOST_BIG_ENDIAN 892 bigendian = 1; 893 #endif 894 895 switch (base_ts->kind) { 896 case TEMP_FIXED: 897 break; 898 case TEMP_GLOBAL: 899 /* We do not support double-indirect registers. */ 900 tcg_debug_assert(!base_ts->indirect_reg); 901 base_ts->indirect_base = 1; 902 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64 903 ? 2 : 1); 904 indirect_reg = 1; 905 break; 906 default: 907 g_assert_not_reached(); 908 } 909 910 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 911 TCGTemp *ts2 = tcg_global_alloc(s); 912 char buf[64]; 913 914 ts->base_type = TCG_TYPE_I64; 915 ts->type = TCG_TYPE_I32; 916 ts->indirect_reg = indirect_reg; 917 ts->mem_allocated = 1; 918 ts->mem_base = base_ts; 919 ts->mem_offset = offset + bigendian * 4; 920 pstrcpy(buf, sizeof(buf), name); 921 pstrcat(buf, sizeof(buf), "_0"); 922 ts->name = strdup(buf); 923 924 tcg_debug_assert(ts2 == ts + 1); 925 ts2->base_type = TCG_TYPE_I64; 926 ts2->type = TCG_TYPE_I32; 927 ts2->indirect_reg = indirect_reg; 928 ts2->mem_allocated = 1; 929 ts2->mem_base = base_ts; 930 ts2->mem_offset = offset + (1 - bigendian) * 4; 931 pstrcpy(buf, sizeof(buf), name); 932 pstrcat(buf, sizeof(buf), "_1"); 933 ts2->name = strdup(buf); 934 } else { 935 ts->base_type = type; 936 ts->type = type; 937 ts->indirect_reg = indirect_reg; 938 ts->mem_allocated = 1; 939 ts->mem_base = base_ts; 940 ts->mem_offset = offset; 941 ts->name = name; 942 } 943 return ts; 944 } 945 946 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) 947 { 948 TCGContext *s = tcg_ctx; 949 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL; 950 TCGTemp *ts; 951 int idx, k; 952 953 k = type + (temp_local ? TCG_TYPE_COUNT : 0); 954 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); 955 if (idx < TCG_MAX_TEMPS) { 956 /* There is already an available temp with the right type. */ 957 clear_bit(idx, s->free_temps[k].l); 958 959 ts = &s->temps[idx]; 960 ts->temp_allocated = 1; 961 tcg_debug_assert(ts->base_type == type); 962 tcg_debug_assert(ts->kind == kind); 963 } else { 964 ts = tcg_temp_alloc(s); 965 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 966 TCGTemp *ts2 = tcg_temp_alloc(s); 967 968 ts->base_type = type; 969 ts->type = TCG_TYPE_I32; 970 ts->temp_allocated = 1; 971 ts->kind = kind; 972 973 tcg_debug_assert(ts2 == ts + 1); 974 ts2->base_type = TCG_TYPE_I64; 975 ts2->type = TCG_TYPE_I32; 976 ts2->temp_allocated = 1; 977 ts2->kind = kind; 978 } else { 979 ts->base_type = type; 980 ts->type = type; 981 ts->temp_allocated = 1; 982 ts->kind = kind; 983 } 984 } 985 986 #if defined(CONFIG_DEBUG_TCG) 987 s->temps_in_use++; 988 #endif 989 return ts; 990 } 991 992 TCGv_vec tcg_temp_new_vec(TCGType type) 993 { 994 TCGTemp *t; 995 996 #ifdef CONFIG_DEBUG_TCG 997 switch (type) { 998 case TCG_TYPE_V64: 999 assert(TCG_TARGET_HAS_v64); 1000 break; 1001 case TCG_TYPE_V128: 1002 assert(TCG_TARGET_HAS_v128); 1003 break; 1004 case TCG_TYPE_V256: 1005 assert(TCG_TARGET_HAS_v256); 1006 break; 1007 default: 1008 g_assert_not_reached(); 1009 } 1010 #endif 1011 1012 t = tcg_temp_new_internal(type, 0); 1013 return temp_tcgv_vec(t); 1014 } 1015 1016 /* Create a new temp of the same type as an existing temp. */ 1017 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) 1018 { 1019 TCGTemp *t = tcgv_vec_temp(match); 1020 1021 tcg_debug_assert(t->temp_allocated != 0); 1022 1023 t = tcg_temp_new_internal(t->base_type, 0); 1024 return temp_tcgv_vec(t); 1025 } 1026 1027 void tcg_temp_free_internal(TCGTemp *ts) 1028 { 1029 TCGContext *s = tcg_ctx; 1030 int k, idx; 1031 1032 switch (ts->kind) { 1033 case TEMP_CONST: 1034 /* 1035 * In order to simplify users of tcg_constant_*, 1036 * silently ignore free. 1037 */ 1038 return; 1039 case TEMP_NORMAL: 1040 case TEMP_LOCAL: 1041 break; 1042 default: 1043 g_assert_not_reached(); 1044 } 1045 1046 #if defined(CONFIG_DEBUG_TCG) 1047 s->temps_in_use--; 1048 if (s->temps_in_use < 0) { 1049 fprintf(stderr, "More temporaries freed than allocated!\n"); 1050 } 1051 #endif 1052 1053 tcg_debug_assert(ts->temp_allocated != 0); 1054 ts->temp_allocated = 0; 1055 1056 idx = temp_idx(ts); 1057 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT); 1058 set_bit(idx, s->free_temps[k].l); 1059 } 1060 1061 TCGTemp *tcg_constant_internal(TCGType type, int64_t val) 1062 { 1063 TCGContext *s = tcg_ctx; 1064 GHashTable *h = s->const_table[type]; 1065 TCGTemp *ts; 1066 1067 if (h == NULL) { 1068 h = g_hash_table_new(g_int64_hash, g_int64_equal); 1069 s->const_table[type] = h; 1070 } 1071 1072 ts = g_hash_table_lookup(h, &val); 1073 if (ts == NULL) { 1074 ts = tcg_temp_alloc(s); 1075 1076 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { 1077 TCGTemp *ts2 = tcg_temp_alloc(s); 1078 1079 ts->base_type = TCG_TYPE_I64; 1080 ts->type = TCG_TYPE_I32; 1081 ts->kind = TEMP_CONST; 1082 ts->temp_allocated = 1; 1083 /* 1084 * Retain the full value of the 64-bit constant in the low 1085 * part, so that the hash table works. Actual uses will 1086 * truncate the value to the low part. 1087 */ 1088 ts->val = val; 1089 1090 tcg_debug_assert(ts2 == ts + 1); 1091 ts2->base_type = TCG_TYPE_I64; 1092 ts2->type = TCG_TYPE_I32; 1093 ts2->kind = TEMP_CONST; 1094 ts2->temp_allocated = 1; 1095 ts2->val = val >> 32; 1096 } else { 1097 ts->base_type = type; 1098 ts->type = type; 1099 ts->kind = TEMP_CONST; 1100 ts->temp_allocated = 1; 1101 ts->val = val; 1102 } 1103 g_hash_table_insert(h, &ts->val, ts); 1104 } 1105 1106 return ts; 1107 } 1108 1109 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val) 1110 { 1111 val = dup_const(vece, val); 1112 return temp_tcgv_vec(tcg_constant_internal(type, val)); 1113 } 1114 1115 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val) 1116 { 1117 TCGTemp *t = tcgv_vec_temp(match); 1118 1119 tcg_debug_assert(t->temp_allocated != 0); 1120 return tcg_constant_vec(t->base_type, vece, val); 1121 } 1122 1123 TCGv_i32 tcg_const_i32(int32_t val) 1124 { 1125 TCGv_i32 t0; 1126 t0 = tcg_temp_new_i32(); 1127 tcg_gen_movi_i32(t0, val); 1128 return t0; 1129 } 1130 1131 TCGv_i64 tcg_const_i64(int64_t val) 1132 { 1133 TCGv_i64 t0; 1134 t0 = tcg_temp_new_i64(); 1135 tcg_gen_movi_i64(t0, val); 1136 return t0; 1137 } 1138 1139 TCGv_i32 tcg_const_local_i32(int32_t val) 1140 { 1141 TCGv_i32 t0; 1142 t0 = tcg_temp_local_new_i32(); 1143 tcg_gen_movi_i32(t0, val); 1144 return t0; 1145 } 1146 1147 TCGv_i64 tcg_const_local_i64(int64_t val) 1148 { 1149 TCGv_i64 t0; 1150 t0 = tcg_temp_local_new_i64(); 1151 tcg_gen_movi_i64(t0, val); 1152 return t0; 1153 } 1154 1155 #if defined(CONFIG_DEBUG_TCG) 1156 void tcg_clear_temp_count(void) 1157 { 1158 TCGContext *s = tcg_ctx; 1159 s->temps_in_use = 0; 1160 } 1161 1162 int tcg_check_temp_count(void) 1163 { 1164 TCGContext *s = tcg_ctx; 1165 if (s->temps_in_use) { 1166 /* Clear the count so that we don't give another 1167 * warning immediately next time around. 1168 */ 1169 s->temps_in_use = 0; 1170 return 1; 1171 } 1172 return 0; 1173 } 1174 #endif 1175 1176 /* Return true if OP may appear in the opcode stream. 1177 Test the runtime variable that controls each opcode. */ 1178 bool tcg_op_supported(TCGOpcode op) 1179 { 1180 const bool have_vec 1181 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; 1182 1183 switch (op) { 1184 case INDEX_op_discard: 1185 case INDEX_op_set_label: 1186 case INDEX_op_call: 1187 case INDEX_op_br: 1188 case INDEX_op_mb: 1189 case INDEX_op_insn_start: 1190 case INDEX_op_exit_tb: 1191 case INDEX_op_goto_tb: 1192 case INDEX_op_goto_ptr: 1193 case INDEX_op_qemu_ld_i32: 1194 case INDEX_op_qemu_st_i32: 1195 case INDEX_op_qemu_ld_i64: 1196 case INDEX_op_qemu_st_i64: 1197 return true; 1198 1199 case INDEX_op_qemu_st8_i32: 1200 return TCG_TARGET_HAS_qemu_st8_i32; 1201 1202 case INDEX_op_mov_i32: 1203 case INDEX_op_setcond_i32: 1204 case INDEX_op_brcond_i32: 1205 case INDEX_op_ld8u_i32: 1206 case INDEX_op_ld8s_i32: 1207 case INDEX_op_ld16u_i32: 1208 case INDEX_op_ld16s_i32: 1209 case INDEX_op_ld_i32: 1210 case INDEX_op_st8_i32: 1211 case INDEX_op_st16_i32: 1212 case INDEX_op_st_i32: 1213 case INDEX_op_add_i32: 1214 case INDEX_op_sub_i32: 1215 case INDEX_op_mul_i32: 1216 case INDEX_op_and_i32: 1217 case INDEX_op_or_i32: 1218 case INDEX_op_xor_i32: 1219 case INDEX_op_shl_i32: 1220 case INDEX_op_shr_i32: 1221 case INDEX_op_sar_i32: 1222 return true; 1223 1224 case INDEX_op_movcond_i32: 1225 return TCG_TARGET_HAS_movcond_i32; 1226 case INDEX_op_div_i32: 1227 case INDEX_op_divu_i32: 1228 return TCG_TARGET_HAS_div_i32; 1229 case INDEX_op_rem_i32: 1230 case INDEX_op_remu_i32: 1231 return TCG_TARGET_HAS_rem_i32; 1232 case INDEX_op_div2_i32: 1233 case INDEX_op_divu2_i32: 1234 return TCG_TARGET_HAS_div2_i32; 1235 case INDEX_op_rotl_i32: 1236 case INDEX_op_rotr_i32: 1237 return TCG_TARGET_HAS_rot_i32; 1238 case INDEX_op_deposit_i32: 1239 return TCG_TARGET_HAS_deposit_i32; 1240 case INDEX_op_extract_i32: 1241 return TCG_TARGET_HAS_extract_i32; 1242 case INDEX_op_sextract_i32: 1243 return TCG_TARGET_HAS_sextract_i32; 1244 case INDEX_op_extract2_i32: 1245 return TCG_TARGET_HAS_extract2_i32; 1246 case INDEX_op_add2_i32: 1247 return TCG_TARGET_HAS_add2_i32; 1248 case INDEX_op_sub2_i32: 1249 return TCG_TARGET_HAS_sub2_i32; 1250 case INDEX_op_mulu2_i32: 1251 return TCG_TARGET_HAS_mulu2_i32; 1252 case INDEX_op_muls2_i32: 1253 return TCG_TARGET_HAS_muls2_i32; 1254 case INDEX_op_muluh_i32: 1255 return TCG_TARGET_HAS_muluh_i32; 1256 case INDEX_op_mulsh_i32: 1257 return TCG_TARGET_HAS_mulsh_i32; 1258 case INDEX_op_ext8s_i32: 1259 return TCG_TARGET_HAS_ext8s_i32; 1260 case INDEX_op_ext16s_i32: 1261 return TCG_TARGET_HAS_ext16s_i32; 1262 case INDEX_op_ext8u_i32: 1263 return TCG_TARGET_HAS_ext8u_i32; 1264 case INDEX_op_ext16u_i32: 1265 return TCG_TARGET_HAS_ext16u_i32; 1266 case INDEX_op_bswap16_i32: 1267 return TCG_TARGET_HAS_bswap16_i32; 1268 case INDEX_op_bswap32_i32: 1269 return TCG_TARGET_HAS_bswap32_i32; 1270 case INDEX_op_not_i32: 1271 return TCG_TARGET_HAS_not_i32; 1272 case INDEX_op_neg_i32: 1273 return TCG_TARGET_HAS_neg_i32; 1274 case INDEX_op_andc_i32: 1275 return TCG_TARGET_HAS_andc_i32; 1276 case INDEX_op_orc_i32: 1277 return TCG_TARGET_HAS_orc_i32; 1278 case INDEX_op_eqv_i32: 1279 return TCG_TARGET_HAS_eqv_i32; 1280 case INDEX_op_nand_i32: 1281 return TCG_TARGET_HAS_nand_i32; 1282 case INDEX_op_nor_i32: 1283 return TCG_TARGET_HAS_nor_i32; 1284 case INDEX_op_clz_i32: 1285 return TCG_TARGET_HAS_clz_i32; 1286 case INDEX_op_ctz_i32: 1287 return TCG_TARGET_HAS_ctz_i32; 1288 case INDEX_op_ctpop_i32: 1289 return TCG_TARGET_HAS_ctpop_i32; 1290 1291 case INDEX_op_brcond2_i32: 1292 case INDEX_op_setcond2_i32: 1293 return TCG_TARGET_REG_BITS == 32; 1294 1295 case INDEX_op_mov_i64: 1296 case INDEX_op_setcond_i64: 1297 case INDEX_op_brcond_i64: 1298 case INDEX_op_ld8u_i64: 1299 case INDEX_op_ld8s_i64: 1300 case INDEX_op_ld16u_i64: 1301 case INDEX_op_ld16s_i64: 1302 case INDEX_op_ld32u_i64: 1303 case INDEX_op_ld32s_i64: 1304 case INDEX_op_ld_i64: 1305 case INDEX_op_st8_i64: 1306 case INDEX_op_st16_i64: 1307 case INDEX_op_st32_i64: 1308 case INDEX_op_st_i64: 1309 case INDEX_op_add_i64: 1310 case INDEX_op_sub_i64: 1311 case INDEX_op_mul_i64: 1312 case INDEX_op_and_i64: 1313 case INDEX_op_or_i64: 1314 case INDEX_op_xor_i64: 1315 case INDEX_op_shl_i64: 1316 case INDEX_op_shr_i64: 1317 case INDEX_op_sar_i64: 1318 case INDEX_op_ext_i32_i64: 1319 case INDEX_op_extu_i32_i64: 1320 return TCG_TARGET_REG_BITS == 64; 1321 1322 case INDEX_op_movcond_i64: 1323 return TCG_TARGET_HAS_movcond_i64; 1324 case INDEX_op_div_i64: 1325 case INDEX_op_divu_i64: 1326 return TCG_TARGET_HAS_div_i64; 1327 case INDEX_op_rem_i64: 1328 case INDEX_op_remu_i64: 1329 return TCG_TARGET_HAS_rem_i64; 1330 case INDEX_op_div2_i64: 1331 case INDEX_op_divu2_i64: 1332 return TCG_TARGET_HAS_div2_i64; 1333 case INDEX_op_rotl_i64: 1334 case INDEX_op_rotr_i64: 1335 return TCG_TARGET_HAS_rot_i64; 1336 case INDEX_op_deposit_i64: 1337 return TCG_TARGET_HAS_deposit_i64; 1338 case INDEX_op_extract_i64: 1339 return TCG_TARGET_HAS_extract_i64; 1340 case INDEX_op_sextract_i64: 1341 return TCG_TARGET_HAS_sextract_i64; 1342 case INDEX_op_extract2_i64: 1343 return TCG_TARGET_HAS_extract2_i64; 1344 case INDEX_op_extrl_i64_i32: 1345 return TCG_TARGET_HAS_extrl_i64_i32; 1346 case INDEX_op_extrh_i64_i32: 1347 return TCG_TARGET_HAS_extrh_i64_i32; 1348 case INDEX_op_ext8s_i64: 1349 return TCG_TARGET_HAS_ext8s_i64; 1350 case INDEX_op_ext16s_i64: 1351 return TCG_TARGET_HAS_ext16s_i64; 1352 case INDEX_op_ext32s_i64: 1353 return TCG_TARGET_HAS_ext32s_i64; 1354 case INDEX_op_ext8u_i64: 1355 return TCG_TARGET_HAS_ext8u_i64; 1356 case INDEX_op_ext16u_i64: 1357 return TCG_TARGET_HAS_ext16u_i64; 1358 case INDEX_op_ext32u_i64: 1359 return TCG_TARGET_HAS_ext32u_i64; 1360 case INDEX_op_bswap16_i64: 1361 return TCG_TARGET_HAS_bswap16_i64; 1362 case INDEX_op_bswap32_i64: 1363 return TCG_TARGET_HAS_bswap32_i64; 1364 case INDEX_op_bswap64_i64: 1365 return TCG_TARGET_HAS_bswap64_i64; 1366 case INDEX_op_not_i64: 1367 return TCG_TARGET_HAS_not_i64; 1368 case INDEX_op_neg_i64: 1369 return TCG_TARGET_HAS_neg_i64; 1370 case INDEX_op_andc_i64: 1371 return TCG_TARGET_HAS_andc_i64; 1372 case INDEX_op_orc_i64: 1373 return TCG_TARGET_HAS_orc_i64; 1374 case INDEX_op_eqv_i64: 1375 return TCG_TARGET_HAS_eqv_i64; 1376 case INDEX_op_nand_i64: 1377 return TCG_TARGET_HAS_nand_i64; 1378 case INDEX_op_nor_i64: 1379 return TCG_TARGET_HAS_nor_i64; 1380 case INDEX_op_clz_i64: 1381 return TCG_TARGET_HAS_clz_i64; 1382 case INDEX_op_ctz_i64: 1383 return TCG_TARGET_HAS_ctz_i64; 1384 case INDEX_op_ctpop_i64: 1385 return TCG_TARGET_HAS_ctpop_i64; 1386 case INDEX_op_add2_i64: 1387 return TCG_TARGET_HAS_add2_i64; 1388 case INDEX_op_sub2_i64: 1389 return TCG_TARGET_HAS_sub2_i64; 1390 case INDEX_op_mulu2_i64: 1391 return TCG_TARGET_HAS_mulu2_i64; 1392 case INDEX_op_muls2_i64: 1393 return TCG_TARGET_HAS_muls2_i64; 1394 case INDEX_op_muluh_i64: 1395 return TCG_TARGET_HAS_muluh_i64; 1396 case INDEX_op_mulsh_i64: 1397 return TCG_TARGET_HAS_mulsh_i64; 1398 1399 case INDEX_op_mov_vec: 1400 case INDEX_op_dup_vec: 1401 case INDEX_op_dupm_vec: 1402 case INDEX_op_ld_vec: 1403 case INDEX_op_st_vec: 1404 case INDEX_op_add_vec: 1405 case INDEX_op_sub_vec: 1406 case INDEX_op_and_vec: 1407 case INDEX_op_or_vec: 1408 case INDEX_op_xor_vec: 1409 case INDEX_op_cmp_vec: 1410 return have_vec; 1411 case INDEX_op_dup2_vec: 1412 return have_vec && TCG_TARGET_REG_BITS == 32; 1413 case INDEX_op_not_vec: 1414 return have_vec && TCG_TARGET_HAS_not_vec; 1415 case INDEX_op_neg_vec: 1416 return have_vec && TCG_TARGET_HAS_neg_vec; 1417 case INDEX_op_abs_vec: 1418 return have_vec && TCG_TARGET_HAS_abs_vec; 1419 case INDEX_op_andc_vec: 1420 return have_vec && TCG_TARGET_HAS_andc_vec; 1421 case INDEX_op_orc_vec: 1422 return have_vec && TCG_TARGET_HAS_orc_vec; 1423 case INDEX_op_nand_vec: 1424 return have_vec && TCG_TARGET_HAS_nand_vec; 1425 case INDEX_op_nor_vec: 1426 return have_vec && TCG_TARGET_HAS_nor_vec; 1427 case INDEX_op_eqv_vec: 1428 return have_vec && TCG_TARGET_HAS_eqv_vec; 1429 case INDEX_op_mul_vec: 1430 return have_vec && TCG_TARGET_HAS_mul_vec; 1431 case INDEX_op_shli_vec: 1432 case INDEX_op_shri_vec: 1433 case INDEX_op_sari_vec: 1434 return have_vec && TCG_TARGET_HAS_shi_vec; 1435 case INDEX_op_shls_vec: 1436 case INDEX_op_shrs_vec: 1437 case INDEX_op_sars_vec: 1438 return have_vec && TCG_TARGET_HAS_shs_vec; 1439 case INDEX_op_shlv_vec: 1440 case INDEX_op_shrv_vec: 1441 case INDEX_op_sarv_vec: 1442 return have_vec && TCG_TARGET_HAS_shv_vec; 1443 case INDEX_op_rotli_vec: 1444 return have_vec && TCG_TARGET_HAS_roti_vec; 1445 case INDEX_op_rotls_vec: 1446 return have_vec && TCG_TARGET_HAS_rots_vec; 1447 case INDEX_op_rotlv_vec: 1448 case INDEX_op_rotrv_vec: 1449 return have_vec && TCG_TARGET_HAS_rotv_vec; 1450 case INDEX_op_ssadd_vec: 1451 case INDEX_op_usadd_vec: 1452 case INDEX_op_sssub_vec: 1453 case INDEX_op_ussub_vec: 1454 return have_vec && TCG_TARGET_HAS_sat_vec; 1455 case INDEX_op_smin_vec: 1456 case INDEX_op_umin_vec: 1457 case INDEX_op_smax_vec: 1458 case INDEX_op_umax_vec: 1459 return have_vec && TCG_TARGET_HAS_minmax_vec; 1460 case INDEX_op_bitsel_vec: 1461 return have_vec && TCG_TARGET_HAS_bitsel_vec; 1462 case INDEX_op_cmpsel_vec: 1463 return have_vec && TCG_TARGET_HAS_cmpsel_vec; 1464 1465 default: 1466 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); 1467 return true; 1468 } 1469 } 1470 1471 /* Note: we convert the 64 bit args to 32 bit and do some alignment 1472 and endian swap. Maybe it would be better to do the alignment 1473 and endian swap in tcg_reg_alloc_call(). */ 1474 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) 1475 { 1476 int i, real_args, nb_rets, pi; 1477 unsigned typemask; 1478 const TCGHelperInfo *info; 1479 TCGOp *op; 1480 1481 info = g_hash_table_lookup(helper_table, (gpointer)func); 1482 typemask = info->typemask; 1483 1484 #ifdef CONFIG_PLUGIN 1485 /* detect non-plugin helpers */ 1486 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) { 1487 tcg_ctx->plugin_insn->calls_helpers = true; 1488 } 1489 #endif 1490 1491 #if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1492 for (i = 0; i < nargs; ++i) { 1493 int argtype = extract32(typemask, (i + 1) * 3, 3); 1494 bool is_32bit = (argtype & ~1) == dh_typecode_i32; 1495 bool is_signed = argtype & 1; 1496 1497 if (is_32bit) { 1498 TCGv_i64 temp = tcg_temp_new_i64(); 1499 TCGv_i32 orig = temp_tcgv_i32(args[i]); 1500 if (is_signed) { 1501 tcg_gen_ext_i32_i64(temp, orig); 1502 } else { 1503 tcg_gen_extu_i32_i64(temp, orig); 1504 } 1505 args[i] = tcgv_i64_temp(temp); 1506 } 1507 } 1508 #endif /* TCG_TARGET_EXTEND_ARGS */ 1509 1510 op = tcg_emit_op(INDEX_op_call); 1511 1512 pi = 0; 1513 if (ret != NULL) { 1514 if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) { 1515 #if HOST_BIG_ENDIAN 1516 op->args[pi++] = temp_arg(ret + 1); 1517 op->args[pi++] = temp_arg(ret); 1518 #else 1519 op->args[pi++] = temp_arg(ret); 1520 op->args[pi++] = temp_arg(ret + 1); 1521 #endif 1522 nb_rets = 2; 1523 } else { 1524 op->args[pi++] = temp_arg(ret); 1525 nb_rets = 1; 1526 } 1527 } else { 1528 nb_rets = 0; 1529 } 1530 TCGOP_CALLO(op) = nb_rets; 1531 1532 real_args = 0; 1533 for (i = 0; i < nargs; i++) { 1534 int argtype = extract32(typemask, (i + 1) * 3, 3); 1535 bool is_64bit = (argtype & ~1) == dh_typecode_i64; 1536 bool want_align = false; 1537 1538 #if defined(CONFIG_TCG_INTERPRETER) 1539 /* 1540 * Align all arguments, so that they land in predictable places 1541 * for passing off to ffi_call. 1542 */ 1543 want_align = true; 1544 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS) 1545 /* Some targets want aligned 64 bit args */ 1546 want_align = is_64bit; 1547 #endif 1548 1549 if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) { 1550 op->args[pi++] = TCG_CALL_DUMMY_ARG; 1551 real_args++; 1552 } 1553 1554 if (TCG_TARGET_REG_BITS < 64 && is_64bit) { 1555 /* 1556 * If stack grows up, then we will be placing successive 1557 * arguments at lower addresses, which means we need to 1558 * reverse the order compared to how we would normally 1559 * treat either big or little-endian. For those arguments 1560 * that will wind up in registers, this still works for 1561 * HPPA (the only current STACK_GROWSUP target) since the 1562 * argument registers are *also* allocated in decreasing 1563 * order. If another such target is added, this logic may 1564 * have to get more complicated to differentiate between 1565 * stack arguments and register arguments. 1566 */ 1567 #if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP) 1568 op->args[pi++] = temp_arg(args[i] + 1); 1569 op->args[pi++] = temp_arg(args[i]); 1570 #else 1571 op->args[pi++] = temp_arg(args[i]); 1572 op->args[pi++] = temp_arg(args[i] + 1); 1573 #endif 1574 real_args += 2; 1575 continue; 1576 } 1577 1578 op->args[pi++] = temp_arg(args[i]); 1579 real_args++; 1580 } 1581 op->args[pi++] = (uintptr_t)func; 1582 op->args[pi++] = (uintptr_t)info; 1583 TCGOP_CALLI(op) = real_args; 1584 1585 /* Make sure the fields didn't overflow. */ 1586 tcg_debug_assert(TCGOP_CALLI(op) == real_args); 1587 tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); 1588 1589 #if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 1590 for (i = 0; i < nargs; ++i) { 1591 int argtype = extract32(typemask, (i + 1) * 3, 3); 1592 bool is_32bit = (argtype & ~1) == dh_typecode_i32; 1593 1594 if (is_32bit) { 1595 tcg_temp_free_internal(args[i]); 1596 } 1597 } 1598 #endif /* TCG_TARGET_EXTEND_ARGS */ 1599 } 1600 1601 static void tcg_reg_alloc_start(TCGContext *s) 1602 { 1603 int i, n; 1604 1605 for (i = 0, n = s->nb_temps; i < n; i++) { 1606 TCGTemp *ts = &s->temps[i]; 1607 TCGTempVal val = TEMP_VAL_MEM; 1608 1609 switch (ts->kind) { 1610 case TEMP_CONST: 1611 val = TEMP_VAL_CONST; 1612 break; 1613 case TEMP_FIXED: 1614 val = TEMP_VAL_REG; 1615 break; 1616 case TEMP_GLOBAL: 1617 break; 1618 case TEMP_NORMAL: 1619 case TEMP_EBB: 1620 val = TEMP_VAL_DEAD; 1621 /* fall through */ 1622 case TEMP_LOCAL: 1623 ts->mem_allocated = 0; 1624 break; 1625 default: 1626 g_assert_not_reached(); 1627 } 1628 ts->val_type = val; 1629 } 1630 1631 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp)); 1632 } 1633 1634 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size, 1635 TCGTemp *ts) 1636 { 1637 int idx = temp_idx(ts); 1638 1639 switch (ts->kind) { 1640 case TEMP_FIXED: 1641 case TEMP_GLOBAL: 1642 pstrcpy(buf, buf_size, ts->name); 1643 break; 1644 case TEMP_LOCAL: 1645 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); 1646 break; 1647 case TEMP_EBB: 1648 snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals); 1649 break; 1650 case TEMP_NORMAL: 1651 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); 1652 break; 1653 case TEMP_CONST: 1654 switch (ts->type) { 1655 case TCG_TYPE_I32: 1656 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val); 1657 break; 1658 #if TCG_TARGET_REG_BITS > 32 1659 case TCG_TYPE_I64: 1660 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val); 1661 break; 1662 #endif 1663 case TCG_TYPE_V64: 1664 case TCG_TYPE_V128: 1665 case TCG_TYPE_V256: 1666 snprintf(buf, buf_size, "v%d$0x%" PRIx64, 1667 64 << (ts->type - TCG_TYPE_V64), ts->val); 1668 break; 1669 default: 1670 g_assert_not_reached(); 1671 } 1672 break; 1673 } 1674 return buf; 1675 } 1676 1677 static char *tcg_get_arg_str(TCGContext *s, char *buf, 1678 int buf_size, TCGArg arg) 1679 { 1680 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); 1681 } 1682 1683 static const char * const cond_name[] = 1684 { 1685 [TCG_COND_NEVER] = "never", 1686 [TCG_COND_ALWAYS] = "always", 1687 [TCG_COND_EQ] = "eq", 1688 [TCG_COND_NE] = "ne", 1689 [TCG_COND_LT] = "lt", 1690 [TCG_COND_GE] = "ge", 1691 [TCG_COND_LE] = "le", 1692 [TCG_COND_GT] = "gt", 1693 [TCG_COND_LTU] = "ltu", 1694 [TCG_COND_GEU] = "geu", 1695 [TCG_COND_LEU] = "leu", 1696 [TCG_COND_GTU] = "gtu" 1697 }; 1698 1699 static const char * const ldst_name[] = 1700 { 1701 [MO_UB] = "ub", 1702 [MO_SB] = "sb", 1703 [MO_LEUW] = "leuw", 1704 [MO_LESW] = "lesw", 1705 [MO_LEUL] = "leul", 1706 [MO_LESL] = "lesl", 1707 [MO_LEUQ] = "leq", 1708 [MO_BEUW] = "beuw", 1709 [MO_BESW] = "besw", 1710 [MO_BEUL] = "beul", 1711 [MO_BESL] = "besl", 1712 [MO_BEUQ] = "beq", 1713 }; 1714 1715 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { 1716 #ifdef TARGET_ALIGNED_ONLY 1717 [MO_UNALN >> MO_ASHIFT] = "un+", 1718 [MO_ALIGN >> MO_ASHIFT] = "", 1719 #else 1720 [MO_UNALN >> MO_ASHIFT] = "", 1721 [MO_ALIGN >> MO_ASHIFT] = "al+", 1722 #endif 1723 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+", 1724 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+", 1725 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+", 1726 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+", 1727 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+", 1728 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", 1729 }; 1730 1731 static const char bswap_flag_name[][6] = { 1732 [TCG_BSWAP_IZ] = "iz", 1733 [TCG_BSWAP_OZ] = "oz", 1734 [TCG_BSWAP_OS] = "os", 1735 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz", 1736 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os", 1737 }; 1738 1739 static inline bool tcg_regset_single(TCGRegSet d) 1740 { 1741 return (d & (d - 1)) == 0; 1742 } 1743 1744 static inline TCGReg tcg_regset_first(TCGRegSet d) 1745 { 1746 if (TCG_TARGET_NB_REGS <= 32) { 1747 return ctz32(d); 1748 } else { 1749 return ctz64(d); 1750 } 1751 } 1752 1753 /* Return only the number of characters output -- no error return. */ 1754 #define ne_fprintf(...) \ 1755 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; }) 1756 1757 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) 1758 { 1759 char buf[128]; 1760 TCGOp *op; 1761 1762 QTAILQ_FOREACH(op, &s->ops, link) { 1763 int i, k, nb_oargs, nb_iargs, nb_cargs; 1764 const TCGOpDef *def; 1765 TCGOpcode c; 1766 int col = 0; 1767 1768 c = op->opc; 1769 def = &tcg_op_defs[c]; 1770 1771 if (c == INDEX_op_insn_start) { 1772 nb_oargs = 0; 1773 col += ne_fprintf(f, "\n ----"); 1774 1775 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 1776 target_ulong a; 1777 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 1778 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 1779 #else 1780 a = op->args[i]; 1781 #endif 1782 col += ne_fprintf(f, " " TARGET_FMT_lx, a); 1783 } 1784 } else if (c == INDEX_op_call) { 1785 const TCGHelperInfo *info = tcg_call_info(op); 1786 void *func = tcg_call_func(op); 1787 1788 /* variable number of arguments */ 1789 nb_oargs = TCGOP_CALLO(op); 1790 nb_iargs = TCGOP_CALLI(op); 1791 nb_cargs = def->nb_cargs; 1792 1793 col += ne_fprintf(f, " %s ", def->name); 1794 1795 /* 1796 * Print the function name from TCGHelperInfo, if available. 1797 * Note that plugins have a template function for the info, 1798 * but the actual function pointer comes from the plugin. 1799 */ 1800 if (func == info->func) { 1801 col += ne_fprintf(f, "%s", info->name); 1802 } else { 1803 col += ne_fprintf(f, "plugin(%p)", func); 1804 } 1805 1806 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs); 1807 for (i = 0; i < nb_oargs; i++) { 1808 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf), 1809 op->args[i])); 1810 } 1811 for (i = 0; i < nb_iargs; i++) { 1812 TCGArg arg = op->args[nb_oargs + i]; 1813 const char *t = "<dummy>"; 1814 if (arg != TCG_CALL_DUMMY_ARG) { 1815 t = tcg_get_arg_str(s, buf, sizeof(buf), arg); 1816 } 1817 col += ne_fprintf(f, ",%s", t); 1818 } 1819 } else { 1820 col += ne_fprintf(f, " %s ", def->name); 1821 1822 nb_oargs = def->nb_oargs; 1823 nb_iargs = def->nb_iargs; 1824 nb_cargs = def->nb_cargs; 1825 1826 if (def->flags & TCG_OPF_VECTOR) { 1827 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), 1828 8 << TCGOP_VECE(op)); 1829 } 1830 1831 k = 0; 1832 for (i = 0; i < nb_oargs; i++) { 1833 const char *sep = k ? "," : ""; 1834 col += ne_fprintf(f, "%s%s", sep, 1835 tcg_get_arg_str(s, buf, sizeof(buf), 1836 op->args[k++])); 1837 } 1838 for (i = 0; i < nb_iargs; i++) { 1839 const char *sep = k ? "," : ""; 1840 col += ne_fprintf(f, "%s%s", sep, 1841 tcg_get_arg_str(s, buf, sizeof(buf), 1842 op->args[k++])); 1843 } 1844 switch (c) { 1845 case INDEX_op_brcond_i32: 1846 case INDEX_op_setcond_i32: 1847 case INDEX_op_movcond_i32: 1848 case INDEX_op_brcond2_i32: 1849 case INDEX_op_setcond2_i32: 1850 case INDEX_op_brcond_i64: 1851 case INDEX_op_setcond_i64: 1852 case INDEX_op_movcond_i64: 1853 case INDEX_op_cmp_vec: 1854 case INDEX_op_cmpsel_vec: 1855 if (op->args[k] < ARRAY_SIZE(cond_name) 1856 && cond_name[op->args[k]]) { 1857 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]); 1858 } else { 1859 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]); 1860 } 1861 i = 1; 1862 break; 1863 case INDEX_op_qemu_ld_i32: 1864 case INDEX_op_qemu_st_i32: 1865 case INDEX_op_qemu_st8_i32: 1866 case INDEX_op_qemu_ld_i64: 1867 case INDEX_op_qemu_st_i64: 1868 { 1869 MemOpIdx oi = op->args[k++]; 1870 MemOp op = get_memop(oi); 1871 unsigned ix = get_mmuidx(oi); 1872 1873 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { 1874 col += ne_fprintf(f, ",$0x%x,%u", op, ix); 1875 } else { 1876 const char *s_al, *s_op; 1877 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT]; 1878 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; 1879 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix); 1880 } 1881 i = 1; 1882 } 1883 break; 1884 case INDEX_op_bswap16_i32: 1885 case INDEX_op_bswap16_i64: 1886 case INDEX_op_bswap32_i32: 1887 case INDEX_op_bswap32_i64: 1888 case INDEX_op_bswap64_i64: 1889 { 1890 TCGArg flags = op->args[k]; 1891 const char *name = NULL; 1892 1893 if (flags < ARRAY_SIZE(bswap_flag_name)) { 1894 name = bswap_flag_name[flags]; 1895 } 1896 if (name) { 1897 col += ne_fprintf(f, ",%s", name); 1898 } else { 1899 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags); 1900 } 1901 i = k = 1; 1902 } 1903 break; 1904 default: 1905 i = 0; 1906 break; 1907 } 1908 switch (c) { 1909 case INDEX_op_set_label: 1910 case INDEX_op_br: 1911 case INDEX_op_brcond_i32: 1912 case INDEX_op_brcond_i64: 1913 case INDEX_op_brcond2_i32: 1914 col += ne_fprintf(f, "%s$L%d", k ? "," : "", 1915 arg_label(op->args[k])->id); 1916 i++, k++; 1917 break; 1918 default: 1919 break; 1920 } 1921 for (; i < nb_cargs; i++, k++) { 1922 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "", 1923 op->args[k]); 1924 } 1925 } 1926 1927 if (have_prefs || op->life) { 1928 for (; col < 40; ++col) { 1929 putc(' ', f); 1930 } 1931 } 1932 1933 if (op->life) { 1934 unsigned life = op->life; 1935 1936 if (life & (SYNC_ARG * 3)) { 1937 ne_fprintf(f, " sync:"); 1938 for (i = 0; i < 2; ++i) { 1939 if (life & (SYNC_ARG << i)) { 1940 ne_fprintf(f, " %d", i); 1941 } 1942 } 1943 } 1944 life /= DEAD_ARG; 1945 if (life) { 1946 ne_fprintf(f, " dead:"); 1947 for (i = 0; life; ++i, life >>= 1) { 1948 if (life & 1) { 1949 ne_fprintf(f, " %d", i); 1950 } 1951 } 1952 } 1953 } 1954 1955 if (have_prefs) { 1956 for (i = 0; i < nb_oargs; ++i) { 1957 TCGRegSet set = op->output_pref[i]; 1958 1959 if (i == 0) { 1960 ne_fprintf(f, " pref="); 1961 } else { 1962 ne_fprintf(f, ","); 1963 } 1964 if (set == 0) { 1965 ne_fprintf(f, "none"); 1966 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { 1967 ne_fprintf(f, "all"); 1968 #ifdef CONFIG_DEBUG_TCG 1969 } else if (tcg_regset_single(set)) { 1970 TCGReg reg = tcg_regset_first(set); 1971 ne_fprintf(f, "%s", tcg_target_reg_names[reg]); 1972 #endif 1973 } else if (TCG_TARGET_NB_REGS <= 32) { 1974 ne_fprintf(f, "0x%x", (uint32_t)set); 1975 } else { 1976 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set); 1977 } 1978 } 1979 } 1980 1981 putc('\n', f); 1982 } 1983 } 1984 1985 /* we give more priority to constraints with less registers */ 1986 static int get_constraint_priority(const TCGOpDef *def, int k) 1987 { 1988 const TCGArgConstraint *arg_ct = &def->args_ct[k]; 1989 int n; 1990 1991 if (arg_ct->oalias) { 1992 /* an alias is equivalent to a single register */ 1993 n = 1; 1994 } else { 1995 n = ctpop64(arg_ct->regs); 1996 } 1997 return TCG_TARGET_NB_REGS - n + 1; 1998 } 1999 2000 /* sort from highest priority to lowest */ 2001 static void sort_constraints(TCGOpDef *def, int start, int n) 2002 { 2003 int i, j; 2004 TCGArgConstraint *a = def->args_ct; 2005 2006 for (i = 0; i < n; i++) { 2007 a[start + i].sort_index = start + i; 2008 } 2009 if (n <= 1) { 2010 return; 2011 } 2012 for (i = 0; i < n - 1; i++) { 2013 for (j = i + 1; j < n; j++) { 2014 int p1 = get_constraint_priority(def, a[start + i].sort_index); 2015 int p2 = get_constraint_priority(def, a[start + j].sort_index); 2016 if (p1 < p2) { 2017 int tmp = a[start + i].sort_index; 2018 a[start + i].sort_index = a[start + j].sort_index; 2019 a[start + j].sort_index = tmp; 2020 } 2021 } 2022 } 2023 } 2024 2025 static void process_op_defs(TCGContext *s) 2026 { 2027 TCGOpcode op; 2028 2029 for (op = 0; op < NB_OPS; op++) { 2030 TCGOpDef *def = &tcg_op_defs[op]; 2031 const TCGTargetOpDef *tdefs; 2032 int i, nb_args; 2033 2034 if (def->flags & TCG_OPF_NOT_PRESENT) { 2035 continue; 2036 } 2037 2038 nb_args = def->nb_iargs + def->nb_oargs; 2039 if (nb_args == 0) { 2040 continue; 2041 } 2042 2043 /* 2044 * Macro magic should make it impossible, but double-check that 2045 * the array index is in range. Since the signness of an enum 2046 * is implementation defined, force the result to unsigned. 2047 */ 2048 unsigned con_set = tcg_target_op_def(op); 2049 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); 2050 tdefs = &constraint_sets[con_set]; 2051 2052 for (i = 0; i < nb_args; i++) { 2053 const char *ct_str = tdefs->args_ct_str[i]; 2054 /* Incomplete TCGTargetOpDef entry. */ 2055 tcg_debug_assert(ct_str != NULL); 2056 2057 while (*ct_str != '\0') { 2058 switch(*ct_str) { 2059 case '0' ... '9': 2060 { 2061 int oarg = *ct_str - '0'; 2062 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); 2063 tcg_debug_assert(oarg < def->nb_oargs); 2064 tcg_debug_assert(def->args_ct[oarg].regs != 0); 2065 def->args_ct[i] = def->args_ct[oarg]; 2066 /* The output sets oalias. */ 2067 def->args_ct[oarg].oalias = true; 2068 def->args_ct[oarg].alias_index = i; 2069 /* The input sets ialias. */ 2070 def->args_ct[i].ialias = true; 2071 def->args_ct[i].alias_index = oarg; 2072 } 2073 ct_str++; 2074 break; 2075 case '&': 2076 def->args_ct[i].newreg = true; 2077 ct_str++; 2078 break; 2079 case 'i': 2080 def->args_ct[i].ct |= TCG_CT_CONST; 2081 ct_str++; 2082 break; 2083 2084 /* Include all of the target-specific constraints. */ 2085 2086 #undef CONST 2087 #define CONST(CASE, MASK) \ 2088 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break; 2089 #define REGS(CASE, MASK) \ 2090 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break; 2091 2092 #include "tcg-target-con-str.h" 2093 2094 #undef REGS 2095 #undef CONST 2096 default: 2097 /* Typo in TCGTargetOpDef constraint. */ 2098 g_assert_not_reached(); 2099 } 2100 } 2101 } 2102 2103 /* TCGTargetOpDef entry with too much information? */ 2104 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); 2105 2106 /* sort the constraints (XXX: this is just an heuristic) */ 2107 sort_constraints(def, 0, def->nb_oargs); 2108 sort_constraints(def, def->nb_oargs, def->nb_iargs); 2109 } 2110 } 2111 2112 void tcg_op_remove(TCGContext *s, TCGOp *op) 2113 { 2114 TCGLabel *label; 2115 2116 switch (op->opc) { 2117 case INDEX_op_br: 2118 label = arg_label(op->args[0]); 2119 label->refs--; 2120 break; 2121 case INDEX_op_brcond_i32: 2122 case INDEX_op_brcond_i64: 2123 label = arg_label(op->args[3]); 2124 label->refs--; 2125 break; 2126 case INDEX_op_brcond2_i32: 2127 label = arg_label(op->args[5]); 2128 label->refs--; 2129 break; 2130 default: 2131 break; 2132 } 2133 2134 QTAILQ_REMOVE(&s->ops, op, link); 2135 QTAILQ_INSERT_TAIL(&s->free_ops, op, link); 2136 s->nb_ops--; 2137 2138 #ifdef CONFIG_PROFILER 2139 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1); 2140 #endif 2141 } 2142 2143 void tcg_remove_ops_after(TCGOp *op) 2144 { 2145 TCGContext *s = tcg_ctx; 2146 2147 while (true) { 2148 TCGOp *last = tcg_last_op(); 2149 if (last == op) { 2150 return; 2151 } 2152 tcg_op_remove(s, last); 2153 } 2154 } 2155 2156 static TCGOp *tcg_op_alloc(TCGOpcode opc) 2157 { 2158 TCGContext *s = tcg_ctx; 2159 TCGOp *op; 2160 2161 if (likely(QTAILQ_EMPTY(&s->free_ops))) { 2162 op = tcg_malloc(sizeof(TCGOp)); 2163 } else { 2164 op = QTAILQ_FIRST(&s->free_ops); 2165 QTAILQ_REMOVE(&s->free_ops, op, link); 2166 } 2167 memset(op, 0, offsetof(TCGOp, link)); 2168 op->opc = opc; 2169 s->nb_ops++; 2170 2171 return op; 2172 } 2173 2174 TCGOp *tcg_emit_op(TCGOpcode opc) 2175 { 2176 TCGOp *op = tcg_op_alloc(opc); 2177 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); 2178 return op; 2179 } 2180 2181 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2182 { 2183 TCGOp *new_op = tcg_op_alloc(opc); 2184 QTAILQ_INSERT_BEFORE(old_op, new_op, link); 2185 return new_op; 2186 } 2187 2188 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) 2189 { 2190 TCGOp *new_op = tcg_op_alloc(opc); 2191 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); 2192 return new_op; 2193 } 2194 2195 /* Reachable analysis : remove unreachable code. */ 2196 static void reachable_code_pass(TCGContext *s) 2197 { 2198 TCGOp *op, *op_next; 2199 bool dead = false; 2200 2201 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2202 bool remove = dead; 2203 TCGLabel *label; 2204 2205 switch (op->opc) { 2206 case INDEX_op_set_label: 2207 label = arg_label(op->args[0]); 2208 if (label->refs == 0) { 2209 /* 2210 * While there is an occasional backward branch, virtually 2211 * all branches generated by the translators are forward. 2212 * Which means that generally we will have already removed 2213 * all references to the label that will be, and there is 2214 * little to be gained by iterating. 2215 */ 2216 remove = true; 2217 } else { 2218 /* Once we see a label, insns become live again. */ 2219 dead = false; 2220 remove = false; 2221 2222 /* 2223 * Optimization can fold conditional branches to unconditional. 2224 * If we find a label with one reference which is preceded by 2225 * an unconditional branch to it, remove both. This needed to 2226 * wait until the dead code in between them was removed. 2227 */ 2228 if (label->refs == 1) { 2229 TCGOp *op_prev = QTAILQ_PREV(op, link); 2230 if (op_prev->opc == INDEX_op_br && 2231 label == arg_label(op_prev->args[0])) { 2232 tcg_op_remove(s, op_prev); 2233 remove = true; 2234 } 2235 } 2236 } 2237 break; 2238 2239 case INDEX_op_br: 2240 case INDEX_op_exit_tb: 2241 case INDEX_op_goto_ptr: 2242 /* Unconditional branches; everything following is dead. */ 2243 dead = true; 2244 break; 2245 2246 case INDEX_op_call: 2247 /* Notice noreturn helper calls, raising exceptions. */ 2248 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { 2249 dead = true; 2250 } 2251 break; 2252 2253 case INDEX_op_insn_start: 2254 /* Never remove -- we need to keep these for unwind. */ 2255 remove = false; 2256 break; 2257 2258 default: 2259 break; 2260 } 2261 2262 if (remove) { 2263 tcg_op_remove(s, op); 2264 } 2265 } 2266 } 2267 2268 #define TS_DEAD 1 2269 #define TS_MEM 2 2270 2271 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) 2272 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) 2273 2274 /* For liveness_pass_1, the register preferences for a given temp. */ 2275 static inline TCGRegSet *la_temp_pref(TCGTemp *ts) 2276 { 2277 return ts->state_ptr; 2278 } 2279 2280 /* For liveness_pass_1, reset the preferences for a given temp to the 2281 * maximal regset for its type. 2282 */ 2283 static inline void la_reset_pref(TCGTemp *ts) 2284 { 2285 *la_temp_pref(ts) 2286 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); 2287 } 2288 2289 /* liveness analysis: end of function: all temps are dead, and globals 2290 should be in memory. */ 2291 static void la_func_end(TCGContext *s, int ng, int nt) 2292 { 2293 int i; 2294 2295 for (i = 0; i < ng; ++i) { 2296 s->temps[i].state = TS_DEAD | TS_MEM; 2297 la_reset_pref(&s->temps[i]); 2298 } 2299 for (i = ng; i < nt; ++i) { 2300 s->temps[i].state = TS_DEAD; 2301 la_reset_pref(&s->temps[i]); 2302 } 2303 } 2304 2305 /* liveness analysis: end of basic block: all temps are dead, globals 2306 and local temps should be in memory. */ 2307 static void la_bb_end(TCGContext *s, int ng, int nt) 2308 { 2309 int i; 2310 2311 for (i = 0; i < nt; ++i) { 2312 TCGTemp *ts = &s->temps[i]; 2313 int state; 2314 2315 switch (ts->kind) { 2316 case TEMP_FIXED: 2317 case TEMP_GLOBAL: 2318 case TEMP_LOCAL: 2319 state = TS_DEAD | TS_MEM; 2320 break; 2321 case TEMP_NORMAL: 2322 case TEMP_EBB: 2323 case TEMP_CONST: 2324 state = TS_DEAD; 2325 break; 2326 default: 2327 g_assert_not_reached(); 2328 } 2329 ts->state = state; 2330 la_reset_pref(ts); 2331 } 2332 } 2333 2334 /* liveness analysis: sync globals back to memory. */ 2335 static void la_global_sync(TCGContext *s, int ng) 2336 { 2337 int i; 2338 2339 for (i = 0; i < ng; ++i) { 2340 int state = s->temps[i].state; 2341 s->temps[i].state = state | TS_MEM; 2342 if (state == TS_DEAD) { 2343 /* If the global was previously dead, reset prefs. */ 2344 la_reset_pref(&s->temps[i]); 2345 } 2346 } 2347 } 2348 2349 /* 2350 * liveness analysis: conditional branch: all temps are dead unless 2351 * explicitly live-across-conditional-branch, globals and local temps 2352 * should be synced. 2353 */ 2354 static void la_bb_sync(TCGContext *s, int ng, int nt) 2355 { 2356 la_global_sync(s, ng); 2357 2358 for (int i = ng; i < nt; ++i) { 2359 TCGTemp *ts = &s->temps[i]; 2360 int state; 2361 2362 switch (ts->kind) { 2363 case TEMP_LOCAL: 2364 state = ts->state; 2365 ts->state = state | TS_MEM; 2366 if (state != TS_DEAD) { 2367 continue; 2368 } 2369 break; 2370 case TEMP_NORMAL: 2371 s->temps[i].state = TS_DEAD; 2372 break; 2373 case TEMP_EBB: 2374 case TEMP_CONST: 2375 continue; 2376 default: 2377 g_assert_not_reached(); 2378 } 2379 la_reset_pref(&s->temps[i]); 2380 } 2381 } 2382 2383 /* liveness analysis: sync globals back to memory and kill. */ 2384 static void la_global_kill(TCGContext *s, int ng) 2385 { 2386 int i; 2387 2388 for (i = 0; i < ng; i++) { 2389 s->temps[i].state = TS_DEAD | TS_MEM; 2390 la_reset_pref(&s->temps[i]); 2391 } 2392 } 2393 2394 /* liveness analysis: note live globals crossing calls. */ 2395 static void la_cross_call(TCGContext *s, int nt) 2396 { 2397 TCGRegSet mask = ~tcg_target_call_clobber_regs; 2398 int i; 2399 2400 for (i = 0; i < nt; i++) { 2401 TCGTemp *ts = &s->temps[i]; 2402 if (!(ts->state & TS_DEAD)) { 2403 TCGRegSet *pset = la_temp_pref(ts); 2404 TCGRegSet set = *pset; 2405 2406 set &= mask; 2407 /* If the combination is not possible, restart. */ 2408 if (set == 0) { 2409 set = tcg_target_available_regs[ts->type] & mask; 2410 } 2411 *pset = set; 2412 } 2413 } 2414 } 2415 2416 /* Liveness analysis : update the opc_arg_life array to tell if a 2417 given input arguments is dead. Instructions updating dead 2418 temporaries are removed. */ 2419 static void liveness_pass_1(TCGContext *s) 2420 { 2421 int nb_globals = s->nb_globals; 2422 int nb_temps = s->nb_temps; 2423 TCGOp *op, *op_prev; 2424 TCGRegSet *prefs; 2425 int i; 2426 2427 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); 2428 for (i = 0; i < nb_temps; ++i) { 2429 s->temps[i].state_ptr = prefs + i; 2430 } 2431 2432 /* ??? Should be redundant with the exit_tb that ends the TB. */ 2433 la_func_end(s, nb_globals, nb_temps); 2434 2435 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { 2436 int nb_iargs, nb_oargs; 2437 TCGOpcode opc_new, opc_new2; 2438 bool have_opc_new2; 2439 TCGLifeData arg_life = 0; 2440 TCGTemp *ts; 2441 TCGOpcode opc = op->opc; 2442 const TCGOpDef *def = &tcg_op_defs[opc]; 2443 2444 switch (opc) { 2445 case INDEX_op_call: 2446 { 2447 int call_flags; 2448 int nb_call_regs; 2449 2450 nb_oargs = TCGOP_CALLO(op); 2451 nb_iargs = TCGOP_CALLI(op); 2452 call_flags = tcg_call_flags(op); 2453 2454 /* pure functions can be removed if their result is unused */ 2455 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { 2456 for (i = 0; i < nb_oargs; i++) { 2457 ts = arg_temp(op->args[i]); 2458 if (ts->state != TS_DEAD) { 2459 goto do_not_remove_call; 2460 } 2461 } 2462 goto do_remove; 2463 } 2464 do_not_remove_call: 2465 2466 /* Output args are dead. */ 2467 for (i = 0; i < nb_oargs; i++) { 2468 ts = arg_temp(op->args[i]); 2469 if (ts->state & TS_DEAD) { 2470 arg_life |= DEAD_ARG << i; 2471 } 2472 if (ts->state & TS_MEM) { 2473 arg_life |= SYNC_ARG << i; 2474 } 2475 ts->state = TS_DEAD; 2476 la_reset_pref(ts); 2477 2478 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ 2479 op->output_pref[i] = 0; 2480 } 2481 2482 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | 2483 TCG_CALL_NO_READ_GLOBALS))) { 2484 la_global_kill(s, nb_globals); 2485 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { 2486 la_global_sync(s, nb_globals); 2487 } 2488 2489 /* Record arguments that die in this helper. */ 2490 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2491 ts = arg_temp(op->args[i]); 2492 if (ts && ts->state & TS_DEAD) { 2493 arg_life |= DEAD_ARG << i; 2494 } 2495 } 2496 2497 /* For all live registers, remove call-clobbered prefs. */ 2498 la_cross_call(s, nb_temps); 2499 2500 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 2501 2502 /* Input arguments are live for preceding opcodes. */ 2503 for (i = 0; i < nb_iargs; i++) { 2504 ts = arg_temp(op->args[i + nb_oargs]); 2505 if (ts && ts->state & TS_DEAD) { 2506 /* For those arguments that die, and will be allocated 2507 * in registers, clear the register set for that arg, 2508 * to be filled in below. For args that will be on 2509 * the stack, reset to any available reg. 2510 */ 2511 *la_temp_pref(ts) 2512 = (i < nb_call_regs ? 0 : 2513 tcg_target_available_regs[ts->type]); 2514 ts->state &= ~TS_DEAD; 2515 } 2516 } 2517 2518 /* For each input argument, add its input register to prefs. 2519 If a temp is used once, this produces a single set bit. */ 2520 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { 2521 ts = arg_temp(op->args[i + nb_oargs]); 2522 if (ts) { 2523 tcg_regset_set_reg(*la_temp_pref(ts), 2524 tcg_target_call_iarg_regs[i]); 2525 } 2526 } 2527 } 2528 break; 2529 case INDEX_op_insn_start: 2530 break; 2531 case INDEX_op_discard: 2532 /* mark the temporary as dead */ 2533 ts = arg_temp(op->args[0]); 2534 ts->state = TS_DEAD; 2535 la_reset_pref(ts); 2536 break; 2537 2538 case INDEX_op_add2_i32: 2539 opc_new = INDEX_op_add_i32; 2540 goto do_addsub2; 2541 case INDEX_op_sub2_i32: 2542 opc_new = INDEX_op_sub_i32; 2543 goto do_addsub2; 2544 case INDEX_op_add2_i64: 2545 opc_new = INDEX_op_add_i64; 2546 goto do_addsub2; 2547 case INDEX_op_sub2_i64: 2548 opc_new = INDEX_op_sub_i64; 2549 do_addsub2: 2550 nb_iargs = 4; 2551 nb_oargs = 2; 2552 /* Test if the high part of the operation is dead, but not 2553 the low part. The result can be optimized to a simple 2554 add or sub. This happens often for x86_64 guest when the 2555 cpu mode is set to 32 bit. */ 2556 if (arg_temp(op->args[1])->state == TS_DEAD) { 2557 if (arg_temp(op->args[0])->state == TS_DEAD) { 2558 goto do_remove; 2559 } 2560 /* Replace the opcode and adjust the args in place, 2561 leaving 3 unused args at the end. */ 2562 op->opc = opc = opc_new; 2563 op->args[1] = op->args[2]; 2564 op->args[2] = op->args[4]; 2565 /* Fall through and mark the single-word operation live. */ 2566 nb_iargs = 2; 2567 nb_oargs = 1; 2568 } 2569 goto do_not_remove; 2570 2571 case INDEX_op_mulu2_i32: 2572 opc_new = INDEX_op_mul_i32; 2573 opc_new2 = INDEX_op_muluh_i32; 2574 have_opc_new2 = TCG_TARGET_HAS_muluh_i32; 2575 goto do_mul2; 2576 case INDEX_op_muls2_i32: 2577 opc_new = INDEX_op_mul_i32; 2578 opc_new2 = INDEX_op_mulsh_i32; 2579 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; 2580 goto do_mul2; 2581 case INDEX_op_mulu2_i64: 2582 opc_new = INDEX_op_mul_i64; 2583 opc_new2 = INDEX_op_muluh_i64; 2584 have_opc_new2 = TCG_TARGET_HAS_muluh_i64; 2585 goto do_mul2; 2586 case INDEX_op_muls2_i64: 2587 opc_new = INDEX_op_mul_i64; 2588 opc_new2 = INDEX_op_mulsh_i64; 2589 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; 2590 goto do_mul2; 2591 do_mul2: 2592 nb_iargs = 2; 2593 nb_oargs = 2; 2594 if (arg_temp(op->args[1])->state == TS_DEAD) { 2595 if (arg_temp(op->args[0])->state == TS_DEAD) { 2596 /* Both parts of the operation are dead. */ 2597 goto do_remove; 2598 } 2599 /* The high part of the operation is dead; generate the low. */ 2600 op->opc = opc = opc_new; 2601 op->args[1] = op->args[2]; 2602 op->args[2] = op->args[3]; 2603 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { 2604 /* The low part of the operation is dead; generate the high. */ 2605 op->opc = opc = opc_new2; 2606 op->args[0] = op->args[1]; 2607 op->args[1] = op->args[2]; 2608 op->args[2] = op->args[3]; 2609 } else { 2610 goto do_not_remove; 2611 } 2612 /* Mark the single-word operation live. */ 2613 nb_oargs = 1; 2614 goto do_not_remove; 2615 2616 default: 2617 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ 2618 nb_iargs = def->nb_iargs; 2619 nb_oargs = def->nb_oargs; 2620 2621 /* Test if the operation can be removed because all 2622 its outputs are dead. We assume that nb_oargs == 0 2623 implies side effects */ 2624 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { 2625 for (i = 0; i < nb_oargs; i++) { 2626 if (arg_temp(op->args[i])->state != TS_DEAD) { 2627 goto do_not_remove; 2628 } 2629 } 2630 goto do_remove; 2631 } 2632 goto do_not_remove; 2633 2634 do_remove: 2635 tcg_op_remove(s, op); 2636 break; 2637 2638 do_not_remove: 2639 for (i = 0; i < nb_oargs; i++) { 2640 ts = arg_temp(op->args[i]); 2641 2642 /* Remember the preference of the uses that followed. */ 2643 op->output_pref[i] = *la_temp_pref(ts); 2644 2645 /* Output args are dead. */ 2646 if (ts->state & TS_DEAD) { 2647 arg_life |= DEAD_ARG << i; 2648 } 2649 if (ts->state & TS_MEM) { 2650 arg_life |= SYNC_ARG << i; 2651 } 2652 ts->state = TS_DEAD; 2653 la_reset_pref(ts); 2654 } 2655 2656 /* If end of basic block, update. */ 2657 if (def->flags & TCG_OPF_BB_EXIT) { 2658 la_func_end(s, nb_globals, nb_temps); 2659 } else if (def->flags & TCG_OPF_COND_BRANCH) { 2660 la_bb_sync(s, nb_globals, nb_temps); 2661 } else if (def->flags & TCG_OPF_BB_END) { 2662 la_bb_end(s, nb_globals, nb_temps); 2663 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2664 la_global_sync(s, nb_globals); 2665 if (def->flags & TCG_OPF_CALL_CLOBBER) { 2666 la_cross_call(s, nb_temps); 2667 } 2668 } 2669 2670 /* Record arguments that die in this opcode. */ 2671 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2672 ts = arg_temp(op->args[i]); 2673 if (ts->state & TS_DEAD) { 2674 arg_life |= DEAD_ARG << i; 2675 } 2676 } 2677 2678 /* Input arguments are live for preceding opcodes. */ 2679 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2680 ts = arg_temp(op->args[i]); 2681 if (ts->state & TS_DEAD) { 2682 /* For operands that were dead, initially allow 2683 all regs for the type. */ 2684 *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; 2685 ts->state &= ~TS_DEAD; 2686 } 2687 } 2688 2689 /* Incorporate constraints for this operand. */ 2690 switch (opc) { 2691 case INDEX_op_mov_i32: 2692 case INDEX_op_mov_i64: 2693 /* Note that these are TCG_OPF_NOT_PRESENT and do not 2694 have proper constraints. That said, special case 2695 moves to propagate preferences backward. */ 2696 if (IS_DEAD_ARG(1)) { 2697 *la_temp_pref(arg_temp(op->args[0])) 2698 = *la_temp_pref(arg_temp(op->args[1])); 2699 } 2700 break; 2701 2702 default: 2703 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 2704 const TCGArgConstraint *ct = &def->args_ct[i]; 2705 TCGRegSet set, *pset; 2706 2707 ts = arg_temp(op->args[i]); 2708 pset = la_temp_pref(ts); 2709 set = *pset; 2710 2711 set &= ct->regs; 2712 if (ct->ialias) { 2713 set &= op->output_pref[ct->alias_index]; 2714 } 2715 /* If the combination is not possible, restart. */ 2716 if (set == 0) { 2717 set = ct->regs; 2718 } 2719 *pset = set; 2720 } 2721 break; 2722 } 2723 break; 2724 } 2725 op->life = arg_life; 2726 } 2727 } 2728 2729 /* Liveness analysis: Convert indirect regs to direct temporaries. */ 2730 static bool liveness_pass_2(TCGContext *s) 2731 { 2732 int nb_globals = s->nb_globals; 2733 int nb_temps, i; 2734 bool changes = false; 2735 TCGOp *op, *op_next; 2736 2737 /* Create a temporary for each indirect global. */ 2738 for (i = 0; i < nb_globals; ++i) { 2739 TCGTemp *its = &s->temps[i]; 2740 if (its->indirect_reg) { 2741 TCGTemp *dts = tcg_temp_alloc(s); 2742 dts->type = its->type; 2743 dts->base_type = its->base_type; 2744 dts->kind = TEMP_EBB; 2745 its->state_ptr = dts; 2746 } else { 2747 its->state_ptr = NULL; 2748 } 2749 /* All globals begin dead. */ 2750 its->state = TS_DEAD; 2751 } 2752 for (nb_temps = s->nb_temps; i < nb_temps; ++i) { 2753 TCGTemp *its = &s->temps[i]; 2754 its->state_ptr = NULL; 2755 its->state = TS_DEAD; 2756 } 2757 2758 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2759 TCGOpcode opc = op->opc; 2760 const TCGOpDef *def = &tcg_op_defs[opc]; 2761 TCGLifeData arg_life = op->life; 2762 int nb_iargs, nb_oargs, call_flags; 2763 TCGTemp *arg_ts, *dir_ts; 2764 2765 if (opc == INDEX_op_call) { 2766 nb_oargs = TCGOP_CALLO(op); 2767 nb_iargs = TCGOP_CALLI(op); 2768 call_flags = tcg_call_flags(op); 2769 } else { 2770 nb_iargs = def->nb_iargs; 2771 nb_oargs = def->nb_oargs; 2772 2773 /* Set flags similar to how calls require. */ 2774 if (def->flags & TCG_OPF_COND_BRANCH) { 2775 /* Like reading globals: sync_globals */ 2776 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2777 } else if (def->flags & TCG_OPF_BB_END) { 2778 /* Like writing globals: save_globals */ 2779 call_flags = 0; 2780 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { 2781 /* Like reading globals: sync_globals */ 2782 call_flags = TCG_CALL_NO_WRITE_GLOBALS; 2783 } else { 2784 /* No effect on globals. */ 2785 call_flags = (TCG_CALL_NO_READ_GLOBALS | 2786 TCG_CALL_NO_WRITE_GLOBALS); 2787 } 2788 } 2789 2790 /* Make sure that input arguments are available. */ 2791 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2792 arg_ts = arg_temp(op->args[i]); 2793 if (arg_ts) { 2794 dir_ts = arg_ts->state_ptr; 2795 if (dir_ts && arg_ts->state == TS_DEAD) { 2796 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 2797 ? INDEX_op_ld_i32 2798 : INDEX_op_ld_i64); 2799 TCGOp *lop = tcg_op_insert_before(s, op, lopc); 2800 2801 lop->args[0] = temp_arg(dir_ts); 2802 lop->args[1] = temp_arg(arg_ts->mem_base); 2803 lop->args[2] = arg_ts->mem_offset; 2804 2805 /* Loaded, but synced with memory. */ 2806 arg_ts->state = TS_MEM; 2807 } 2808 } 2809 } 2810 2811 /* Perform input replacement, and mark inputs that became dead. 2812 No action is required except keeping temp_state up to date 2813 so that we reload when needed. */ 2814 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 2815 arg_ts = arg_temp(op->args[i]); 2816 if (arg_ts) { 2817 dir_ts = arg_ts->state_ptr; 2818 if (dir_ts) { 2819 op->args[i] = temp_arg(dir_ts); 2820 changes = true; 2821 if (IS_DEAD_ARG(i)) { 2822 arg_ts->state = TS_DEAD; 2823 } 2824 } 2825 } 2826 } 2827 2828 /* Liveness analysis should ensure that the following are 2829 all correct, for call sites and basic block end points. */ 2830 if (call_flags & TCG_CALL_NO_READ_GLOBALS) { 2831 /* Nothing to do */ 2832 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) { 2833 for (i = 0; i < nb_globals; ++i) { 2834 /* Liveness should see that globals are synced back, 2835 that is, either TS_DEAD or TS_MEM. */ 2836 arg_ts = &s->temps[i]; 2837 tcg_debug_assert(arg_ts->state_ptr == 0 2838 || arg_ts->state != 0); 2839 } 2840 } else { 2841 for (i = 0; i < nb_globals; ++i) { 2842 /* Liveness should see that globals are saved back, 2843 that is, TS_DEAD, waiting to be reloaded. */ 2844 arg_ts = &s->temps[i]; 2845 tcg_debug_assert(arg_ts->state_ptr == 0 2846 || arg_ts->state == TS_DEAD); 2847 } 2848 } 2849 2850 /* Outputs become available. */ 2851 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { 2852 arg_ts = arg_temp(op->args[0]); 2853 dir_ts = arg_ts->state_ptr; 2854 if (dir_ts) { 2855 op->args[0] = temp_arg(dir_ts); 2856 changes = true; 2857 2858 /* The output is now live and modified. */ 2859 arg_ts->state = 0; 2860 2861 if (NEED_SYNC_ARG(0)) { 2862 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2863 ? INDEX_op_st_i32 2864 : INDEX_op_st_i64); 2865 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 2866 TCGTemp *out_ts = dir_ts; 2867 2868 if (IS_DEAD_ARG(0)) { 2869 out_ts = arg_temp(op->args[1]); 2870 arg_ts->state = TS_DEAD; 2871 tcg_op_remove(s, op); 2872 } else { 2873 arg_ts->state = TS_MEM; 2874 } 2875 2876 sop->args[0] = temp_arg(out_ts); 2877 sop->args[1] = temp_arg(arg_ts->mem_base); 2878 sop->args[2] = arg_ts->mem_offset; 2879 } else { 2880 tcg_debug_assert(!IS_DEAD_ARG(0)); 2881 } 2882 } 2883 } else { 2884 for (i = 0; i < nb_oargs; i++) { 2885 arg_ts = arg_temp(op->args[i]); 2886 dir_ts = arg_ts->state_ptr; 2887 if (!dir_ts) { 2888 continue; 2889 } 2890 op->args[i] = temp_arg(dir_ts); 2891 changes = true; 2892 2893 /* The output is now live and modified. */ 2894 arg_ts->state = 0; 2895 2896 /* Sync outputs upon their last write. */ 2897 if (NEED_SYNC_ARG(i)) { 2898 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 2899 ? INDEX_op_st_i32 2900 : INDEX_op_st_i64); 2901 TCGOp *sop = tcg_op_insert_after(s, op, sopc); 2902 2903 sop->args[0] = temp_arg(dir_ts); 2904 sop->args[1] = temp_arg(arg_ts->mem_base); 2905 sop->args[2] = arg_ts->mem_offset; 2906 2907 arg_ts->state = TS_MEM; 2908 } 2909 /* Drop outputs that are dead. */ 2910 if (IS_DEAD_ARG(i)) { 2911 arg_ts->state = TS_DEAD; 2912 } 2913 } 2914 } 2915 } 2916 2917 return changes; 2918 } 2919 2920 #ifdef CONFIG_DEBUG_TCG 2921 static void dump_regs(TCGContext *s) 2922 { 2923 TCGTemp *ts; 2924 int i; 2925 char buf[64]; 2926 2927 for(i = 0; i < s->nb_temps; i++) { 2928 ts = &s->temps[i]; 2929 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2930 switch(ts->val_type) { 2931 case TEMP_VAL_REG: 2932 printf("%s", tcg_target_reg_names[ts->reg]); 2933 break; 2934 case TEMP_VAL_MEM: 2935 printf("%d(%s)", (int)ts->mem_offset, 2936 tcg_target_reg_names[ts->mem_base->reg]); 2937 break; 2938 case TEMP_VAL_CONST: 2939 printf("$0x%" PRIx64, ts->val); 2940 break; 2941 case TEMP_VAL_DEAD: 2942 printf("D"); 2943 break; 2944 default: 2945 printf("???"); 2946 break; 2947 } 2948 printf("\n"); 2949 } 2950 2951 for(i = 0; i < TCG_TARGET_NB_REGS; i++) { 2952 if (s->reg_to_temp[i] != NULL) { 2953 printf("%s: %s\n", 2954 tcg_target_reg_names[i], 2955 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); 2956 } 2957 } 2958 } 2959 2960 static void check_regs(TCGContext *s) 2961 { 2962 int reg; 2963 int k; 2964 TCGTemp *ts; 2965 char buf[64]; 2966 2967 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { 2968 ts = s->reg_to_temp[reg]; 2969 if (ts != NULL) { 2970 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { 2971 printf("Inconsistency for register %s:\n", 2972 tcg_target_reg_names[reg]); 2973 goto fail; 2974 } 2975 } 2976 } 2977 for (k = 0; k < s->nb_temps; k++) { 2978 ts = &s->temps[k]; 2979 if (ts->val_type == TEMP_VAL_REG 2980 && ts->kind != TEMP_FIXED 2981 && s->reg_to_temp[ts->reg] != ts) { 2982 printf("Inconsistency for temp %s:\n", 2983 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); 2984 fail: 2985 printf("reg state:\n"); 2986 dump_regs(s); 2987 tcg_abort(); 2988 } 2989 } 2990 } 2991 #endif 2992 2993 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) 2994 { 2995 intptr_t off, size, align; 2996 2997 switch (ts->type) { 2998 case TCG_TYPE_I32: 2999 size = align = 4; 3000 break; 3001 case TCG_TYPE_I64: 3002 case TCG_TYPE_V64: 3003 size = align = 8; 3004 break; 3005 case TCG_TYPE_V128: 3006 size = align = 16; 3007 break; 3008 case TCG_TYPE_V256: 3009 /* Note that we do not require aligned storage for V256. */ 3010 size = 32, align = 16; 3011 break; 3012 default: 3013 g_assert_not_reached(); 3014 } 3015 3016 /* 3017 * Assume the stack is sufficiently aligned. 3018 * This affects e.g. ARM NEON, where we have 8 byte stack alignment 3019 * and do not require 16 byte vector alignment. This seems slightly 3020 * easier than fully parameterizing the above switch statement. 3021 */ 3022 align = MIN(TCG_TARGET_STACK_ALIGN, align); 3023 off = ROUND_UP(s->current_frame_offset, align); 3024 3025 /* If we've exhausted the stack frame, restart with a smaller TB. */ 3026 if (off + size > s->frame_end) { 3027 tcg_raise_tb_overflow(s); 3028 } 3029 s->current_frame_offset = off + size; 3030 3031 ts->mem_offset = off; 3032 #if defined(__sparc__) 3033 ts->mem_offset += TCG_TARGET_STACK_BIAS; 3034 #endif 3035 ts->mem_base = s->frame_temp; 3036 ts->mem_allocated = 1; 3037 } 3038 3039 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); 3040 3041 /* Mark a temporary as free or dead. If 'free_or_dead' is negative, 3042 mark it free; otherwise mark it dead. */ 3043 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) 3044 { 3045 TCGTempVal new_type; 3046 3047 switch (ts->kind) { 3048 case TEMP_FIXED: 3049 return; 3050 case TEMP_GLOBAL: 3051 case TEMP_LOCAL: 3052 new_type = TEMP_VAL_MEM; 3053 break; 3054 case TEMP_NORMAL: 3055 case TEMP_EBB: 3056 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD; 3057 break; 3058 case TEMP_CONST: 3059 new_type = TEMP_VAL_CONST; 3060 break; 3061 default: 3062 g_assert_not_reached(); 3063 } 3064 if (ts->val_type == TEMP_VAL_REG) { 3065 s->reg_to_temp[ts->reg] = NULL; 3066 } 3067 ts->val_type = new_type; 3068 } 3069 3070 /* Mark a temporary as dead. */ 3071 static inline void temp_dead(TCGContext *s, TCGTemp *ts) 3072 { 3073 temp_free_or_dead(s, ts, 1); 3074 } 3075 3076 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary 3077 registers needs to be allocated to store a constant. If 'free_or_dead' 3078 is non-zero, subsequently release the temporary; if it is positive, the 3079 temp is dead; if it is negative, the temp is free. */ 3080 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, 3081 TCGRegSet preferred_regs, int free_or_dead) 3082 { 3083 if (!temp_readonly(ts) && !ts->mem_coherent) { 3084 if (!ts->mem_allocated) { 3085 temp_allocate_frame(s, ts); 3086 } 3087 switch (ts->val_type) { 3088 case TEMP_VAL_CONST: 3089 /* If we're going to free the temp immediately, then we won't 3090 require it later in a register, so attempt to store the 3091 constant to memory directly. */ 3092 if (free_or_dead 3093 && tcg_out_sti(s, ts->type, ts->val, 3094 ts->mem_base->reg, ts->mem_offset)) { 3095 break; 3096 } 3097 temp_load(s, ts, tcg_target_available_regs[ts->type], 3098 allocated_regs, preferred_regs); 3099 /* fallthrough */ 3100 3101 case TEMP_VAL_REG: 3102 tcg_out_st(s, ts->type, ts->reg, 3103 ts->mem_base->reg, ts->mem_offset); 3104 break; 3105 3106 case TEMP_VAL_MEM: 3107 break; 3108 3109 case TEMP_VAL_DEAD: 3110 default: 3111 tcg_abort(); 3112 } 3113 ts->mem_coherent = 1; 3114 } 3115 if (free_or_dead) { 3116 temp_free_or_dead(s, ts, free_or_dead); 3117 } 3118 } 3119 3120 /* free register 'reg' by spilling the corresponding temporary if necessary */ 3121 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) 3122 { 3123 TCGTemp *ts = s->reg_to_temp[reg]; 3124 if (ts != NULL) { 3125 temp_sync(s, ts, allocated_regs, 0, -1); 3126 } 3127 } 3128 3129 /** 3130 * tcg_reg_alloc: 3131 * @required_regs: Set of registers in which we must allocate. 3132 * @allocated_regs: Set of registers which must be avoided. 3133 * @preferred_regs: Set of registers we should prefer. 3134 * @rev: True if we search the registers in "indirect" order. 3135 * 3136 * The allocated register must be in @required_regs & ~@allocated_regs, 3137 * but if we can put it in @preferred_regs we may save a move later. 3138 */ 3139 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, 3140 TCGRegSet allocated_regs, 3141 TCGRegSet preferred_regs, bool rev) 3142 { 3143 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); 3144 TCGRegSet reg_ct[2]; 3145 const int *order; 3146 3147 reg_ct[1] = required_regs & ~allocated_regs; 3148 tcg_debug_assert(reg_ct[1] != 0); 3149 reg_ct[0] = reg_ct[1] & preferred_regs; 3150 3151 /* Skip the preferred_regs option if it cannot be satisfied, 3152 or if the preference made no difference. */ 3153 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; 3154 3155 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; 3156 3157 /* Try free registers, preferences first. */ 3158 for (j = f; j < 2; j++) { 3159 TCGRegSet set = reg_ct[j]; 3160 3161 if (tcg_regset_single(set)) { 3162 /* One register in the set. */ 3163 TCGReg reg = tcg_regset_first(set); 3164 if (s->reg_to_temp[reg] == NULL) { 3165 return reg; 3166 } 3167 } else { 3168 for (i = 0; i < n; i++) { 3169 TCGReg reg = order[i]; 3170 if (s->reg_to_temp[reg] == NULL && 3171 tcg_regset_test_reg(set, reg)) { 3172 return reg; 3173 } 3174 } 3175 } 3176 } 3177 3178 /* We must spill something. */ 3179 for (j = f; j < 2; j++) { 3180 TCGRegSet set = reg_ct[j]; 3181 3182 if (tcg_regset_single(set)) { 3183 /* One register in the set. */ 3184 TCGReg reg = tcg_regset_first(set); 3185 tcg_reg_free(s, reg, allocated_regs); 3186 return reg; 3187 } else { 3188 for (i = 0; i < n; i++) { 3189 TCGReg reg = order[i]; 3190 if (tcg_regset_test_reg(set, reg)) { 3191 tcg_reg_free(s, reg, allocated_regs); 3192 return reg; 3193 } 3194 } 3195 } 3196 } 3197 3198 tcg_abort(); 3199 } 3200 3201 /* Make sure the temporary is in a register. If needed, allocate the register 3202 from DESIRED while avoiding ALLOCATED. */ 3203 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, 3204 TCGRegSet allocated_regs, TCGRegSet preferred_regs) 3205 { 3206 TCGReg reg; 3207 3208 switch (ts->val_type) { 3209 case TEMP_VAL_REG: 3210 return; 3211 case TEMP_VAL_CONST: 3212 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3213 preferred_regs, ts->indirect_base); 3214 if (ts->type <= TCG_TYPE_I64) { 3215 tcg_out_movi(s, ts->type, reg, ts->val); 3216 } else { 3217 uint64_t val = ts->val; 3218 MemOp vece = MO_64; 3219 3220 /* 3221 * Find the minimal vector element that matches the constant. 3222 * The targets will, in general, have to do this search anyway, 3223 * do this generically. 3224 */ 3225 if (val == dup_const(MO_8, val)) { 3226 vece = MO_8; 3227 } else if (val == dup_const(MO_16, val)) { 3228 vece = MO_16; 3229 } else if (val == dup_const(MO_32, val)) { 3230 vece = MO_32; 3231 } 3232 3233 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val); 3234 } 3235 ts->mem_coherent = 0; 3236 break; 3237 case TEMP_VAL_MEM: 3238 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, 3239 preferred_regs, ts->indirect_base); 3240 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); 3241 ts->mem_coherent = 1; 3242 break; 3243 case TEMP_VAL_DEAD: 3244 default: 3245 tcg_abort(); 3246 } 3247 ts->reg = reg; 3248 ts->val_type = TEMP_VAL_REG; 3249 s->reg_to_temp[reg] = ts; 3250 } 3251 3252 /* Save a temporary to memory. 'allocated_regs' is used in case a 3253 temporary registers needs to be allocated to store a constant. */ 3254 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs) 3255 { 3256 /* The liveness analysis already ensures that globals are back 3257 in memory. Keep an tcg_debug_assert for safety. */ 3258 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts)); 3259 } 3260 3261 /* save globals to their canonical location and assume they can be 3262 modified be the following code. 'allocated_regs' is used in case a 3263 temporary registers needs to be allocated to store a constant. */ 3264 static void save_globals(TCGContext *s, TCGRegSet allocated_regs) 3265 { 3266 int i, n; 3267 3268 for (i = 0, n = s->nb_globals; i < n; i++) { 3269 temp_save(s, &s->temps[i], allocated_regs); 3270 } 3271 } 3272 3273 /* sync globals to their canonical location and assume they can be 3274 read by the following code. 'allocated_regs' is used in case a 3275 temporary registers needs to be allocated to store a constant. */ 3276 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) 3277 { 3278 int i, n; 3279 3280 for (i = 0, n = s->nb_globals; i < n; i++) { 3281 TCGTemp *ts = &s->temps[i]; 3282 tcg_debug_assert(ts->val_type != TEMP_VAL_REG 3283 || ts->kind == TEMP_FIXED 3284 || ts->mem_coherent); 3285 } 3286 } 3287 3288 /* at the end of a basic block, we assume all temporaries are dead and 3289 all globals are stored at their canonical location. */ 3290 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) 3291 { 3292 int i; 3293 3294 for (i = s->nb_globals; i < s->nb_temps; i++) { 3295 TCGTemp *ts = &s->temps[i]; 3296 3297 switch (ts->kind) { 3298 case TEMP_LOCAL: 3299 temp_save(s, ts, allocated_regs); 3300 break; 3301 case TEMP_NORMAL: 3302 case TEMP_EBB: 3303 /* The liveness analysis already ensures that temps are dead. 3304 Keep an tcg_debug_assert for safety. */ 3305 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3306 break; 3307 case TEMP_CONST: 3308 /* Similarly, we should have freed any allocated register. */ 3309 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST); 3310 break; 3311 default: 3312 g_assert_not_reached(); 3313 } 3314 } 3315 3316 save_globals(s, allocated_regs); 3317 } 3318 3319 /* 3320 * At a conditional branch, we assume all temporaries are dead unless 3321 * explicitly live-across-conditional-branch; all globals and local 3322 * temps are synced to their location. 3323 */ 3324 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) 3325 { 3326 sync_globals(s, allocated_regs); 3327 3328 for (int i = s->nb_globals; i < s->nb_temps; i++) { 3329 TCGTemp *ts = &s->temps[i]; 3330 /* 3331 * The liveness analysis already ensures that temps are dead. 3332 * Keep tcg_debug_asserts for safety. 3333 */ 3334 switch (ts->kind) { 3335 case TEMP_LOCAL: 3336 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); 3337 break; 3338 case TEMP_NORMAL: 3339 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); 3340 break; 3341 case TEMP_EBB: 3342 case TEMP_CONST: 3343 break; 3344 default: 3345 g_assert_not_reached(); 3346 } 3347 } 3348 } 3349 3350 /* 3351 * Specialized code generation for INDEX_op_mov_* with a constant. 3352 */ 3353 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, 3354 tcg_target_ulong val, TCGLifeData arg_life, 3355 TCGRegSet preferred_regs) 3356 { 3357 /* ENV should not be modified. */ 3358 tcg_debug_assert(!temp_readonly(ots)); 3359 3360 /* The movi is not explicitly generated here. */ 3361 if (ots->val_type == TEMP_VAL_REG) { 3362 s->reg_to_temp[ots->reg] = NULL; 3363 } 3364 ots->val_type = TEMP_VAL_CONST; 3365 ots->val = val; 3366 ots->mem_coherent = 0; 3367 if (NEED_SYNC_ARG(0)) { 3368 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); 3369 } else if (IS_DEAD_ARG(0)) { 3370 temp_dead(s, ots); 3371 } 3372 } 3373 3374 /* 3375 * Specialized code generation for INDEX_op_mov_*. 3376 */ 3377 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) 3378 { 3379 const TCGLifeData arg_life = op->life; 3380 TCGRegSet allocated_regs, preferred_regs; 3381 TCGTemp *ts, *ots; 3382 TCGType otype, itype; 3383 3384 allocated_regs = s->reserved_regs; 3385 preferred_regs = op->output_pref[0]; 3386 ots = arg_temp(op->args[0]); 3387 ts = arg_temp(op->args[1]); 3388 3389 /* ENV should not be modified. */ 3390 tcg_debug_assert(!temp_readonly(ots)); 3391 3392 /* Note that otype != itype for no-op truncation. */ 3393 otype = ots->type; 3394 itype = ts->type; 3395 3396 if (ts->val_type == TEMP_VAL_CONST) { 3397 /* propagate constant or generate sti */ 3398 tcg_target_ulong val = ts->val; 3399 if (IS_DEAD_ARG(1)) { 3400 temp_dead(s, ts); 3401 } 3402 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); 3403 return; 3404 } 3405 3406 /* If the source value is in memory we're going to be forced 3407 to have it in a register in order to perform the copy. Copy 3408 the SOURCE value into its own register first, that way we 3409 don't have to reload SOURCE the next time it is used. */ 3410 if (ts->val_type == TEMP_VAL_MEM) { 3411 temp_load(s, ts, tcg_target_available_regs[itype], 3412 allocated_regs, preferred_regs); 3413 } 3414 3415 tcg_debug_assert(ts->val_type == TEMP_VAL_REG); 3416 if (IS_DEAD_ARG(0)) { 3417 /* mov to a non-saved dead register makes no sense (even with 3418 liveness analysis disabled). */ 3419 tcg_debug_assert(NEED_SYNC_ARG(0)); 3420 if (!ots->mem_allocated) { 3421 temp_allocate_frame(s, ots); 3422 } 3423 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); 3424 if (IS_DEAD_ARG(1)) { 3425 temp_dead(s, ts); 3426 } 3427 temp_dead(s, ots); 3428 } else { 3429 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { 3430 /* the mov can be suppressed */ 3431 if (ots->val_type == TEMP_VAL_REG) { 3432 s->reg_to_temp[ots->reg] = NULL; 3433 } 3434 ots->reg = ts->reg; 3435 temp_dead(s, ts); 3436 } else { 3437 if (ots->val_type != TEMP_VAL_REG) { 3438 /* When allocating a new register, make sure to not spill the 3439 input one. */ 3440 tcg_regset_set_reg(allocated_regs, ts->reg); 3441 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], 3442 allocated_regs, preferred_regs, 3443 ots->indirect_base); 3444 } 3445 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { 3446 /* 3447 * Cross register class move not supported. 3448 * Store the source register into the destination slot 3449 * and leave the destination temp as TEMP_VAL_MEM. 3450 */ 3451 assert(!temp_readonly(ots)); 3452 if (!ts->mem_allocated) { 3453 temp_allocate_frame(s, ots); 3454 } 3455 tcg_out_st(s, ts->type, ts->reg, 3456 ots->mem_base->reg, ots->mem_offset); 3457 ots->mem_coherent = 1; 3458 temp_free_or_dead(s, ots, -1); 3459 return; 3460 } 3461 } 3462 ots->val_type = TEMP_VAL_REG; 3463 ots->mem_coherent = 0; 3464 s->reg_to_temp[ots->reg] = ots; 3465 if (NEED_SYNC_ARG(0)) { 3466 temp_sync(s, ots, allocated_regs, 0, 0); 3467 } 3468 } 3469 } 3470 3471 /* 3472 * Specialized code generation for INDEX_op_dup_vec. 3473 */ 3474 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) 3475 { 3476 const TCGLifeData arg_life = op->life; 3477 TCGRegSet dup_out_regs, dup_in_regs; 3478 TCGTemp *its, *ots; 3479 TCGType itype, vtype; 3480 intptr_t endian_fixup; 3481 unsigned vece; 3482 bool ok; 3483 3484 ots = arg_temp(op->args[0]); 3485 its = arg_temp(op->args[1]); 3486 3487 /* ENV should not be modified. */ 3488 tcg_debug_assert(!temp_readonly(ots)); 3489 3490 itype = its->type; 3491 vece = TCGOP_VECE(op); 3492 vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3493 3494 if (its->val_type == TEMP_VAL_CONST) { 3495 /* Propagate constant via movi -> dupi. */ 3496 tcg_target_ulong val = its->val; 3497 if (IS_DEAD_ARG(1)) { 3498 temp_dead(s, its); 3499 } 3500 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); 3501 return; 3502 } 3503 3504 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3505 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; 3506 3507 /* Allocate the output register now. */ 3508 if (ots->val_type != TEMP_VAL_REG) { 3509 TCGRegSet allocated_regs = s->reserved_regs; 3510 3511 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { 3512 /* Make sure to not spill the input register. */ 3513 tcg_regset_set_reg(allocated_regs, its->reg); 3514 } 3515 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3516 op->output_pref[0], ots->indirect_base); 3517 ots->val_type = TEMP_VAL_REG; 3518 ots->mem_coherent = 0; 3519 s->reg_to_temp[ots->reg] = ots; 3520 } 3521 3522 switch (its->val_type) { 3523 case TEMP_VAL_REG: 3524 /* 3525 * The dup constriaints must be broad, covering all possible VECE. 3526 * However, tcg_op_dup_vec() gets to see the VECE and we allow it 3527 * to fail, indicating that extra moves are required for that case. 3528 */ 3529 if (tcg_regset_test_reg(dup_in_regs, its->reg)) { 3530 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { 3531 goto done; 3532 } 3533 /* Try again from memory or a vector input register. */ 3534 } 3535 if (!its->mem_coherent) { 3536 /* 3537 * The input register is not synced, and so an extra store 3538 * would be required to use memory. Attempt an integer-vector 3539 * register move first. We do not have a TCGRegSet for this. 3540 */ 3541 if (tcg_out_mov(s, itype, ots->reg, its->reg)) { 3542 break; 3543 } 3544 /* Sync the temp back to its slot and load from there. */ 3545 temp_sync(s, its, s->reserved_regs, 0, 0); 3546 } 3547 /* fall through */ 3548 3549 case TEMP_VAL_MEM: 3550 #if HOST_BIG_ENDIAN 3551 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; 3552 endian_fixup -= 1 << vece; 3553 #else 3554 endian_fixup = 0; 3555 #endif 3556 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, 3557 its->mem_offset + endian_fixup)) { 3558 goto done; 3559 } 3560 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); 3561 break; 3562 3563 default: 3564 g_assert_not_reached(); 3565 } 3566 3567 /* We now have a vector input register, so dup must succeed. */ 3568 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); 3569 tcg_debug_assert(ok); 3570 3571 done: 3572 if (IS_DEAD_ARG(1)) { 3573 temp_dead(s, its); 3574 } 3575 if (NEED_SYNC_ARG(0)) { 3576 temp_sync(s, ots, s->reserved_regs, 0, 0); 3577 } 3578 if (IS_DEAD_ARG(0)) { 3579 temp_dead(s, ots); 3580 } 3581 } 3582 3583 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) 3584 { 3585 const TCGLifeData arg_life = op->life; 3586 const TCGOpDef * const def = &tcg_op_defs[op->opc]; 3587 TCGRegSet i_allocated_regs; 3588 TCGRegSet o_allocated_regs; 3589 int i, k, nb_iargs, nb_oargs; 3590 TCGReg reg; 3591 TCGArg arg; 3592 const TCGArgConstraint *arg_ct; 3593 TCGTemp *ts; 3594 TCGArg new_args[TCG_MAX_OP_ARGS]; 3595 int const_args[TCG_MAX_OP_ARGS]; 3596 3597 nb_oargs = def->nb_oargs; 3598 nb_iargs = def->nb_iargs; 3599 3600 /* copy constants */ 3601 memcpy(new_args + nb_oargs + nb_iargs, 3602 op->args + nb_oargs + nb_iargs, 3603 sizeof(TCGArg) * def->nb_cargs); 3604 3605 i_allocated_regs = s->reserved_regs; 3606 o_allocated_regs = s->reserved_regs; 3607 3608 /* satisfy input constraints */ 3609 for (k = 0; k < nb_iargs; k++) { 3610 TCGRegSet i_preferred_regs, o_preferred_regs; 3611 3612 i = def->args_ct[nb_oargs + k].sort_index; 3613 arg = op->args[i]; 3614 arg_ct = &def->args_ct[i]; 3615 ts = arg_temp(arg); 3616 3617 if (ts->val_type == TEMP_VAL_CONST 3618 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) { 3619 /* constant is OK for instruction */ 3620 const_args[i] = 1; 3621 new_args[i] = ts->val; 3622 continue; 3623 } 3624 3625 i_preferred_regs = o_preferred_regs = 0; 3626 if (arg_ct->ialias) { 3627 o_preferred_regs = op->output_pref[arg_ct->alias_index]; 3628 3629 /* 3630 * If the input is readonly, then it cannot also be an 3631 * output and aliased to itself. If the input is not 3632 * dead after the instruction, we must allocate a new 3633 * register and move it. 3634 */ 3635 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { 3636 goto allocate_in_reg; 3637 } 3638 3639 /* 3640 * Check if the current register has already been allocated 3641 * for another input aliased to an output. 3642 */ 3643 if (ts->val_type == TEMP_VAL_REG) { 3644 reg = ts->reg; 3645 for (int k2 = 0; k2 < k; k2++) { 3646 int i2 = def->args_ct[nb_oargs + k2].sort_index; 3647 if (def->args_ct[i2].ialias && reg == new_args[i2]) { 3648 goto allocate_in_reg; 3649 } 3650 } 3651 } 3652 i_preferred_regs = o_preferred_regs; 3653 } 3654 3655 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); 3656 reg = ts->reg; 3657 3658 if (!tcg_regset_test_reg(arg_ct->regs, reg)) { 3659 allocate_in_reg: 3660 /* 3661 * Allocate a new register matching the constraint 3662 * and move the temporary register into it. 3663 */ 3664 temp_load(s, ts, tcg_target_available_regs[ts->type], 3665 i_allocated_regs, 0); 3666 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, 3667 o_preferred_regs, ts->indirect_base); 3668 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 3669 /* 3670 * Cross register class move not supported. Sync the 3671 * temp back to its slot and load from there. 3672 */ 3673 temp_sync(s, ts, i_allocated_regs, 0, 0); 3674 tcg_out_ld(s, ts->type, reg, 3675 ts->mem_base->reg, ts->mem_offset); 3676 } 3677 } 3678 new_args[i] = reg; 3679 const_args[i] = 0; 3680 tcg_regset_set_reg(i_allocated_regs, reg); 3681 } 3682 3683 /* mark dead temporaries and free the associated registers */ 3684 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 3685 if (IS_DEAD_ARG(i)) { 3686 temp_dead(s, arg_temp(op->args[i])); 3687 } 3688 } 3689 3690 if (def->flags & TCG_OPF_COND_BRANCH) { 3691 tcg_reg_alloc_cbranch(s, i_allocated_regs); 3692 } else if (def->flags & TCG_OPF_BB_END) { 3693 tcg_reg_alloc_bb_end(s, i_allocated_regs); 3694 } else { 3695 if (def->flags & TCG_OPF_CALL_CLOBBER) { 3696 /* XXX: permit generic clobber register list ? */ 3697 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3698 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3699 tcg_reg_free(s, i, i_allocated_regs); 3700 } 3701 } 3702 } 3703 if (def->flags & TCG_OPF_SIDE_EFFECTS) { 3704 /* sync globals if the op has side effects and might trigger 3705 an exception. */ 3706 sync_globals(s, i_allocated_regs); 3707 } 3708 3709 /* satisfy the output constraints */ 3710 for(k = 0; k < nb_oargs; k++) { 3711 i = def->args_ct[k].sort_index; 3712 arg = op->args[i]; 3713 arg_ct = &def->args_ct[i]; 3714 ts = arg_temp(arg); 3715 3716 /* ENV should not be modified. */ 3717 tcg_debug_assert(!temp_readonly(ts)); 3718 3719 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { 3720 reg = new_args[arg_ct->alias_index]; 3721 } else if (arg_ct->newreg) { 3722 reg = tcg_reg_alloc(s, arg_ct->regs, 3723 i_allocated_regs | o_allocated_regs, 3724 op->output_pref[k], ts->indirect_base); 3725 } else { 3726 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, 3727 op->output_pref[k], ts->indirect_base); 3728 } 3729 tcg_regset_set_reg(o_allocated_regs, reg); 3730 if (ts->val_type == TEMP_VAL_REG) { 3731 s->reg_to_temp[ts->reg] = NULL; 3732 } 3733 ts->val_type = TEMP_VAL_REG; 3734 ts->reg = reg; 3735 /* 3736 * Temp value is modified, so the value kept in memory is 3737 * potentially not the same. 3738 */ 3739 ts->mem_coherent = 0; 3740 s->reg_to_temp[reg] = ts; 3741 new_args[i] = reg; 3742 } 3743 } 3744 3745 /* emit instruction */ 3746 if (def->flags & TCG_OPF_VECTOR) { 3747 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), 3748 new_args, const_args); 3749 } else { 3750 tcg_out_op(s, op->opc, new_args, const_args); 3751 } 3752 3753 /* move the outputs in the correct register if needed */ 3754 for(i = 0; i < nb_oargs; i++) { 3755 ts = arg_temp(op->args[i]); 3756 3757 /* ENV should not be modified. */ 3758 tcg_debug_assert(!temp_readonly(ts)); 3759 3760 if (NEED_SYNC_ARG(i)) { 3761 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); 3762 } else if (IS_DEAD_ARG(i)) { 3763 temp_dead(s, ts); 3764 } 3765 } 3766 } 3767 3768 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) 3769 { 3770 const TCGLifeData arg_life = op->life; 3771 TCGTemp *ots, *itsl, *itsh; 3772 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; 3773 3774 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ 3775 tcg_debug_assert(TCG_TARGET_REG_BITS == 32); 3776 tcg_debug_assert(TCGOP_VECE(op) == MO_64); 3777 3778 ots = arg_temp(op->args[0]); 3779 itsl = arg_temp(op->args[1]); 3780 itsh = arg_temp(op->args[2]); 3781 3782 /* ENV should not be modified. */ 3783 tcg_debug_assert(!temp_readonly(ots)); 3784 3785 /* Allocate the output register now. */ 3786 if (ots->val_type != TEMP_VAL_REG) { 3787 TCGRegSet allocated_regs = s->reserved_regs; 3788 TCGRegSet dup_out_regs = 3789 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; 3790 3791 /* Make sure to not spill the input registers. */ 3792 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { 3793 tcg_regset_set_reg(allocated_regs, itsl->reg); 3794 } 3795 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) { 3796 tcg_regset_set_reg(allocated_regs, itsh->reg); 3797 } 3798 3799 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, 3800 op->output_pref[0], ots->indirect_base); 3801 ots->val_type = TEMP_VAL_REG; 3802 ots->mem_coherent = 0; 3803 s->reg_to_temp[ots->reg] = ots; 3804 } 3805 3806 /* Promote dup2 of immediates to dupi_vec. */ 3807 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) { 3808 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val); 3809 MemOp vece = MO_64; 3810 3811 if (val == dup_const(MO_8, val)) { 3812 vece = MO_8; 3813 } else if (val == dup_const(MO_16, val)) { 3814 vece = MO_16; 3815 } else if (val == dup_const(MO_32, val)) { 3816 vece = MO_32; 3817 } 3818 3819 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val); 3820 goto done; 3821 } 3822 3823 /* If the two inputs form one 64-bit value, try dupm_vec. */ 3824 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { 3825 if (!itsl->mem_coherent) { 3826 temp_sync(s, itsl, s->reserved_regs, 0, 0); 3827 } 3828 if (!itsh->mem_coherent) { 3829 temp_sync(s, itsh, s->reserved_regs, 0, 0); 3830 } 3831 #if HOST_BIG_ENDIAN 3832 TCGTemp *its = itsh; 3833 #else 3834 TCGTemp *its = itsl; 3835 #endif 3836 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, 3837 its->mem_base->reg, its->mem_offset)) { 3838 goto done; 3839 } 3840 } 3841 3842 /* Fall back to generic expansion. */ 3843 return false; 3844 3845 done: 3846 if (IS_DEAD_ARG(1)) { 3847 temp_dead(s, itsl); 3848 } 3849 if (IS_DEAD_ARG(2)) { 3850 temp_dead(s, itsh); 3851 } 3852 if (NEED_SYNC_ARG(0)) { 3853 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0)); 3854 } else if (IS_DEAD_ARG(0)) { 3855 temp_dead(s, ots); 3856 } 3857 return true; 3858 } 3859 3860 #ifdef TCG_TARGET_STACK_GROWSUP 3861 #define STACK_DIR(x) (-(x)) 3862 #else 3863 #define STACK_DIR(x) (x) 3864 #endif 3865 3866 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) 3867 { 3868 const int nb_oargs = TCGOP_CALLO(op); 3869 const int nb_iargs = TCGOP_CALLI(op); 3870 const TCGLifeData arg_life = op->life; 3871 const TCGHelperInfo *info; 3872 int flags, nb_regs, i; 3873 TCGReg reg; 3874 TCGArg arg; 3875 TCGTemp *ts; 3876 intptr_t stack_offset; 3877 size_t call_stack_size; 3878 tcg_insn_unit *func_addr; 3879 int allocate_args; 3880 TCGRegSet allocated_regs; 3881 3882 func_addr = tcg_call_func(op); 3883 info = tcg_call_info(op); 3884 flags = info->flags; 3885 3886 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); 3887 if (nb_regs > nb_iargs) { 3888 nb_regs = nb_iargs; 3889 } 3890 3891 /* assign stack slots first */ 3892 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); 3893 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 3894 ~(TCG_TARGET_STACK_ALIGN - 1); 3895 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); 3896 if (allocate_args) { 3897 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, 3898 preallocate call stack */ 3899 tcg_abort(); 3900 } 3901 3902 stack_offset = TCG_TARGET_CALL_STACK_OFFSET; 3903 for (i = nb_regs; i < nb_iargs; i++) { 3904 arg = op->args[nb_oargs + i]; 3905 #ifdef TCG_TARGET_STACK_GROWSUP 3906 stack_offset -= sizeof(tcg_target_long); 3907 #endif 3908 if (arg != TCG_CALL_DUMMY_ARG) { 3909 ts = arg_temp(arg); 3910 temp_load(s, ts, tcg_target_available_regs[ts->type], 3911 s->reserved_regs, 0); 3912 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); 3913 } 3914 #ifndef TCG_TARGET_STACK_GROWSUP 3915 stack_offset += sizeof(tcg_target_long); 3916 #endif 3917 } 3918 3919 /* assign input registers */ 3920 allocated_regs = s->reserved_regs; 3921 for (i = 0; i < nb_regs; i++) { 3922 arg = op->args[nb_oargs + i]; 3923 if (arg != TCG_CALL_DUMMY_ARG) { 3924 ts = arg_temp(arg); 3925 reg = tcg_target_call_iarg_regs[i]; 3926 3927 if (ts->val_type == TEMP_VAL_REG) { 3928 if (ts->reg != reg) { 3929 tcg_reg_free(s, reg, allocated_regs); 3930 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { 3931 /* 3932 * Cross register class move not supported. Sync the 3933 * temp back to its slot and load from there. 3934 */ 3935 temp_sync(s, ts, allocated_regs, 0, 0); 3936 tcg_out_ld(s, ts->type, reg, 3937 ts->mem_base->reg, ts->mem_offset); 3938 } 3939 } 3940 } else { 3941 TCGRegSet arg_set = 0; 3942 3943 tcg_reg_free(s, reg, allocated_regs); 3944 tcg_regset_set_reg(arg_set, reg); 3945 temp_load(s, ts, arg_set, allocated_regs, 0); 3946 } 3947 3948 tcg_regset_set_reg(allocated_regs, reg); 3949 } 3950 } 3951 3952 /* mark dead temporaries and free the associated registers */ 3953 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { 3954 if (IS_DEAD_ARG(i)) { 3955 temp_dead(s, arg_temp(op->args[i])); 3956 } 3957 } 3958 3959 /* clobber call registers */ 3960 for (i = 0; i < TCG_TARGET_NB_REGS; i++) { 3961 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { 3962 tcg_reg_free(s, i, allocated_regs); 3963 } 3964 } 3965 3966 /* Save globals if they might be written by the helper, sync them if 3967 they might be read. */ 3968 if (flags & TCG_CALL_NO_READ_GLOBALS) { 3969 /* Nothing to do */ 3970 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { 3971 sync_globals(s, allocated_regs); 3972 } else { 3973 save_globals(s, allocated_regs); 3974 } 3975 3976 #ifdef CONFIG_TCG_INTERPRETER 3977 { 3978 gpointer hash = (gpointer)(uintptr_t)info->typemask; 3979 ffi_cif *cif = g_hash_table_lookup(ffi_table, hash); 3980 assert(cif != NULL); 3981 tcg_out_call(s, func_addr, cif); 3982 } 3983 #else 3984 tcg_out_call(s, func_addr); 3985 #endif 3986 3987 /* assign output registers and emit moves if needed */ 3988 for(i = 0; i < nb_oargs; i++) { 3989 arg = op->args[i]; 3990 ts = arg_temp(arg); 3991 3992 /* ENV should not be modified. */ 3993 tcg_debug_assert(!temp_readonly(ts)); 3994 3995 reg = tcg_target_call_oarg_regs[i]; 3996 tcg_debug_assert(s->reg_to_temp[reg] == NULL); 3997 if (ts->val_type == TEMP_VAL_REG) { 3998 s->reg_to_temp[ts->reg] = NULL; 3999 } 4000 ts->val_type = TEMP_VAL_REG; 4001 ts->reg = reg; 4002 ts->mem_coherent = 0; 4003 s->reg_to_temp[reg] = ts; 4004 if (NEED_SYNC_ARG(i)) { 4005 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); 4006 } else if (IS_DEAD_ARG(i)) { 4007 temp_dead(s, ts); 4008 } 4009 } 4010 } 4011 4012 #ifdef CONFIG_PROFILER 4013 4014 /* avoid copy/paste errors */ 4015 #define PROF_ADD(to, from, field) \ 4016 do { \ 4017 (to)->field += qatomic_read(&((from)->field)); \ 4018 } while (0) 4019 4020 #define PROF_MAX(to, from, field) \ 4021 do { \ 4022 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \ 4023 if (val__ > (to)->field) { \ 4024 (to)->field = val__; \ 4025 } \ 4026 } while (0) 4027 4028 /* Pass in a zero'ed @prof */ 4029 static inline 4030 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) 4031 { 4032 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4033 unsigned int i; 4034 4035 for (i = 0; i < n_ctxs; i++) { 4036 TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4037 const TCGProfile *orig = &s->prof; 4038 4039 if (counters) { 4040 PROF_ADD(prof, orig, cpu_exec_time); 4041 PROF_ADD(prof, orig, tb_count1); 4042 PROF_ADD(prof, orig, tb_count); 4043 PROF_ADD(prof, orig, op_count); 4044 PROF_MAX(prof, orig, op_count_max); 4045 PROF_ADD(prof, orig, temp_count); 4046 PROF_MAX(prof, orig, temp_count_max); 4047 PROF_ADD(prof, orig, del_op_count); 4048 PROF_ADD(prof, orig, code_in_len); 4049 PROF_ADD(prof, orig, code_out_len); 4050 PROF_ADD(prof, orig, search_out_len); 4051 PROF_ADD(prof, orig, interm_time); 4052 PROF_ADD(prof, orig, code_time); 4053 PROF_ADD(prof, orig, la_time); 4054 PROF_ADD(prof, orig, opt_time); 4055 PROF_ADD(prof, orig, restore_count); 4056 PROF_ADD(prof, orig, restore_time); 4057 } 4058 if (table) { 4059 int i; 4060 4061 for (i = 0; i < NB_OPS; i++) { 4062 PROF_ADD(prof, orig, table_op_count[i]); 4063 } 4064 } 4065 } 4066 } 4067 4068 #undef PROF_ADD 4069 #undef PROF_MAX 4070 4071 static void tcg_profile_snapshot_counters(TCGProfile *prof) 4072 { 4073 tcg_profile_snapshot(prof, true, false); 4074 } 4075 4076 static void tcg_profile_snapshot_table(TCGProfile *prof) 4077 { 4078 tcg_profile_snapshot(prof, false, true); 4079 } 4080 4081 void tcg_dump_op_count(GString *buf) 4082 { 4083 TCGProfile prof = {}; 4084 int i; 4085 4086 tcg_profile_snapshot_table(&prof); 4087 for (i = 0; i < NB_OPS; i++) { 4088 g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name, 4089 prof.table_op_count[i]); 4090 } 4091 } 4092 4093 int64_t tcg_cpu_exec_time(void) 4094 { 4095 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs); 4096 unsigned int i; 4097 int64_t ret = 0; 4098 4099 for (i = 0; i < n_ctxs; i++) { 4100 const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 4101 const TCGProfile *prof = &s->prof; 4102 4103 ret += qatomic_read(&prof->cpu_exec_time); 4104 } 4105 return ret; 4106 } 4107 #else 4108 void tcg_dump_op_count(GString *buf) 4109 { 4110 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4111 } 4112 4113 int64_t tcg_cpu_exec_time(void) 4114 { 4115 error_report("%s: TCG profiler not compiled", __func__); 4116 exit(EXIT_FAILURE); 4117 } 4118 #endif 4119 4120 4121 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) 4122 { 4123 #ifdef CONFIG_PROFILER 4124 TCGProfile *prof = &s->prof; 4125 #endif 4126 int i, num_insns; 4127 TCGOp *op; 4128 4129 #ifdef CONFIG_PROFILER 4130 { 4131 int n = 0; 4132 4133 QTAILQ_FOREACH(op, &s->ops, link) { 4134 n++; 4135 } 4136 qatomic_set(&prof->op_count, prof->op_count + n); 4137 if (n > prof->op_count_max) { 4138 qatomic_set(&prof->op_count_max, n); 4139 } 4140 4141 n = s->nb_temps; 4142 qatomic_set(&prof->temp_count, prof->temp_count + n); 4143 if (n > prof->temp_count_max) { 4144 qatomic_set(&prof->temp_count_max, n); 4145 } 4146 } 4147 #endif 4148 4149 #ifdef DEBUG_DISAS 4150 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) 4151 && qemu_log_in_addr_range(pc_start))) { 4152 FILE *logfile = qemu_log_trylock(); 4153 if (logfile) { 4154 fprintf(logfile, "OP:\n"); 4155 tcg_dump_ops(s, logfile, false); 4156 fprintf(logfile, "\n"); 4157 qemu_log_unlock(logfile); 4158 } 4159 } 4160 #endif 4161 4162 #ifdef CONFIG_DEBUG_TCG 4163 /* Ensure all labels referenced have been emitted. */ 4164 { 4165 TCGLabel *l; 4166 bool error = false; 4167 4168 QSIMPLEQ_FOREACH(l, &s->labels, next) { 4169 if (unlikely(!l->present) && l->refs) { 4170 qemu_log_mask(CPU_LOG_TB_OP, 4171 "$L%d referenced but not present.\n", l->id); 4172 error = true; 4173 } 4174 } 4175 assert(!error); 4176 } 4177 #endif 4178 4179 #ifdef CONFIG_PROFILER 4180 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); 4181 #endif 4182 4183 #ifdef USE_TCG_OPTIMIZATIONS 4184 tcg_optimize(s); 4185 #endif 4186 4187 #ifdef CONFIG_PROFILER 4188 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); 4189 qatomic_set(&prof->la_time, prof->la_time - profile_getclock()); 4190 #endif 4191 4192 reachable_code_pass(s); 4193 liveness_pass_1(s); 4194 4195 if (s->nb_indirects > 0) { 4196 #ifdef DEBUG_DISAS 4197 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND) 4198 && qemu_log_in_addr_range(pc_start))) { 4199 FILE *logfile = qemu_log_trylock(); 4200 if (logfile) { 4201 fprintf(logfile, "OP before indirect lowering:\n"); 4202 tcg_dump_ops(s, logfile, false); 4203 fprintf(logfile, "\n"); 4204 qemu_log_unlock(logfile); 4205 } 4206 } 4207 #endif 4208 /* Replace indirect temps with direct temps. */ 4209 if (liveness_pass_2(s)) { 4210 /* If changes were made, re-run liveness. */ 4211 liveness_pass_1(s); 4212 } 4213 } 4214 4215 #ifdef CONFIG_PROFILER 4216 qatomic_set(&prof->la_time, prof->la_time + profile_getclock()); 4217 #endif 4218 4219 #ifdef DEBUG_DISAS 4220 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) 4221 && qemu_log_in_addr_range(pc_start))) { 4222 FILE *logfile = qemu_log_trylock(); 4223 if (logfile) { 4224 fprintf(logfile, "OP after optimization and liveness analysis:\n"); 4225 tcg_dump_ops(s, logfile, true); 4226 fprintf(logfile, "\n"); 4227 qemu_log_unlock(logfile); 4228 } 4229 } 4230 #endif 4231 4232 /* Initialize goto_tb jump offsets. */ 4233 tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; 4234 tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID; 4235 tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset; 4236 if (TCG_TARGET_HAS_direct_jump) { 4237 tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg; 4238 tcg_ctx->tb_jmp_target_addr = NULL; 4239 } else { 4240 tcg_ctx->tb_jmp_insn_offset = NULL; 4241 tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg; 4242 } 4243 4244 tcg_reg_alloc_start(s); 4245 4246 /* 4247 * Reset the buffer pointers when restarting after overflow. 4248 * TODO: Move this into translate-all.c with the rest of the 4249 * buffer management. Having only this done here is confusing. 4250 */ 4251 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); 4252 s->code_ptr = s->code_buf; 4253 4254 #ifdef TCG_TARGET_NEED_LDST_LABELS 4255 QSIMPLEQ_INIT(&s->ldst_labels); 4256 #endif 4257 #ifdef TCG_TARGET_NEED_POOL_LABELS 4258 s->pool_labels = NULL; 4259 #endif 4260 4261 num_insns = -1; 4262 QTAILQ_FOREACH(op, &s->ops, link) { 4263 TCGOpcode opc = op->opc; 4264 4265 #ifdef CONFIG_PROFILER 4266 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); 4267 #endif 4268 4269 switch (opc) { 4270 case INDEX_op_mov_i32: 4271 case INDEX_op_mov_i64: 4272 case INDEX_op_mov_vec: 4273 tcg_reg_alloc_mov(s, op); 4274 break; 4275 case INDEX_op_dup_vec: 4276 tcg_reg_alloc_dup(s, op); 4277 break; 4278 case INDEX_op_insn_start: 4279 if (num_insns >= 0) { 4280 size_t off = tcg_current_code_size(s); 4281 s->gen_insn_end_off[num_insns] = off; 4282 /* Assert that we do not overflow our stored offset. */ 4283 assert(s->gen_insn_end_off[num_insns] == off); 4284 } 4285 num_insns++; 4286 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { 4287 target_ulong a; 4288 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS 4289 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]); 4290 #else 4291 a = op->args[i]; 4292 #endif 4293 s->gen_insn_data[num_insns][i] = a; 4294 } 4295 break; 4296 case INDEX_op_discard: 4297 temp_dead(s, arg_temp(op->args[0])); 4298 break; 4299 case INDEX_op_set_label: 4300 tcg_reg_alloc_bb_end(s, s->reserved_regs); 4301 tcg_out_label(s, arg_label(op->args[0])); 4302 break; 4303 case INDEX_op_call: 4304 tcg_reg_alloc_call(s, op); 4305 break; 4306 case INDEX_op_dup2_vec: 4307 if (tcg_reg_alloc_dup2(s, op)) { 4308 break; 4309 } 4310 /* fall through */ 4311 default: 4312 /* Sanity check that we've not introduced any unhandled opcodes. */ 4313 tcg_debug_assert(tcg_op_supported(opc)); 4314 /* Note: in order to speed up the code, it would be much 4315 faster to have specialized register allocator functions for 4316 some common argument patterns */ 4317 tcg_reg_alloc_op(s, op); 4318 break; 4319 } 4320 #ifdef CONFIG_DEBUG_TCG 4321 check_regs(s); 4322 #endif 4323 /* Test for (pending) buffer overflow. The assumption is that any 4324 one operation beginning below the high water mark cannot overrun 4325 the buffer completely. Thus we can test for overflow after 4326 generating code without having to check during generation. */ 4327 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { 4328 return -1; 4329 } 4330 /* Test for TB overflow, as seen by gen_insn_end_off. */ 4331 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { 4332 return -2; 4333 } 4334 } 4335 tcg_debug_assert(num_insns >= 0); 4336 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); 4337 4338 /* Generate TB finalization at the end of block */ 4339 #ifdef TCG_TARGET_NEED_LDST_LABELS 4340 i = tcg_out_ldst_finalize(s); 4341 if (i < 0) { 4342 return i; 4343 } 4344 #endif 4345 #ifdef TCG_TARGET_NEED_POOL_LABELS 4346 i = tcg_out_pool_finalize(s); 4347 if (i < 0) { 4348 return i; 4349 } 4350 #endif 4351 if (!tcg_resolve_relocs(s)) { 4352 return -2; 4353 } 4354 4355 #ifndef CONFIG_TCG_INTERPRETER 4356 /* flush instruction cache */ 4357 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), 4358 (uintptr_t)s->code_buf, 4359 tcg_ptr_byte_diff(s->code_ptr, s->code_buf)); 4360 #endif 4361 4362 return tcg_current_code_size(s); 4363 } 4364 4365 #ifdef CONFIG_PROFILER 4366 void tcg_dump_info(GString *buf) 4367 { 4368 TCGProfile prof = {}; 4369 const TCGProfile *s; 4370 int64_t tb_count; 4371 int64_t tb_div_count; 4372 int64_t tot; 4373 4374 tcg_profile_snapshot_counters(&prof); 4375 s = &prof; 4376 tb_count = s->tb_count; 4377 tb_div_count = tb_count ? tb_count : 1; 4378 tot = s->interm_time + s->code_time; 4379 4380 g_string_append_printf(buf, "JIT cycles %" PRId64 4381 " (%0.3f s at 2.4 GHz)\n", 4382 tot, tot / 2.4e9); 4383 g_string_append_printf(buf, "translated TBs %" PRId64 4384 " (aborted=%" PRId64 " %0.1f%%)\n", 4385 tb_count, s->tb_count1 - tb_count, 4386 (double)(s->tb_count1 - s->tb_count) 4387 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); 4388 g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n", 4389 (double)s->op_count / tb_div_count, s->op_count_max); 4390 g_string_append_printf(buf, "deleted ops/TB %0.2f\n", 4391 (double)s->del_op_count / tb_div_count); 4392 g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n", 4393 (double)s->temp_count / tb_div_count, 4394 s->temp_count_max); 4395 g_string_append_printf(buf, "avg host code/TB %0.1f\n", 4396 (double)s->code_out_len / tb_div_count); 4397 g_string_append_printf(buf, "avg search data/TB %0.1f\n", 4398 (double)s->search_out_len / tb_div_count); 4399 4400 g_string_append_printf(buf, "cycles/op %0.1f\n", 4401 s->op_count ? (double)tot / s->op_count : 0); 4402 g_string_append_printf(buf, "cycles/in byte %0.1f\n", 4403 s->code_in_len ? (double)tot / s->code_in_len : 0); 4404 g_string_append_printf(buf, "cycles/out byte %0.1f\n", 4405 s->code_out_len ? (double)tot / s->code_out_len : 0); 4406 g_string_append_printf(buf, "cycles/search byte %0.1f\n", 4407 s->search_out_len ? 4408 (double)tot / s->search_out_len : 0); 4409 if (tot == 0) { 4410 tot = 1; 4411 } 4412 g_string_append_printf(buf, " gen_interm time %0.1f%%\n", 4413 (double)s->interm_time / tot * 100.0); 4414 g_string_append_printf(buf, " gen_code time %0.1f%%\n", 4415 (double)s->code_time / tot * 100.0); 4416 g_string_append_printf(buf, "optim./code time %0.1f%%\n", 4417 (double)s->opt_time / (s->code_time ? 4418 s->code_time : 1) 4419 * 100.0); 4420 g_string_append_printf(buf, "liveness/code time %0.1f%%\n", 4421 (double)s->la_time / (s->code_time ? 4422 s->code_time : 1) * 100.0); 4423 g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n", 4424 s->restore_count); 4425 g_string_append_printf(buf, " avg cycles %0.1f\n", 4426 s->restore_count ? 4427 (double)s->restore_time / s->restore_count : 0); 4428 } 4429 #else 4430 void tcg_dump_info(GString *buf) 4431 { 4432 g_string_append_printf(buf, "[TCG profiler not compiled]\n"); 4433 } 4434 #endif 4435 4436 #ifdef ELF_HOST_MACHINE 4437 /* In order to use this feature, the backend needs to do three things: 4438 4439 (1) Define ELF_HOST_MACHINE to indicate both what value to 4440 put into the ELF image and to indicate support for the feature. 4441 4442 (2) Define tcg_register_jit. This should create a buffer containing 4443 the contents of a .debug_frame section that describes the post- 4444 prologue unwind info for the tcg machine. 4445 4446 (3) Call tcg_register_jit_int, with the constructed .debug_frame. 4447 */ 4448 4449 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */ 4450 typedef enum { 4451 JIT_NOACTION = 0, 4452 JIT_REGISTER_FN, 4453 JIT_UNREGISTER_FN 4454 } jit_actions_t; 4455 4456 struct jit_code_entry { 4457 struct jit_code_entry *next_entry; 4458 struct jit_code_entry *prev_entry; 4459 const void *symfile_addr; 4460 uint64_t symfile_size; 4461 }; 4462 4463 struct jit_descriptor { 4464 uint32_t version; 4465 uint32_t action_flag; 4466 struct jit_code_entry *relevant_entry; 4467 struct jit_code_entry *first_entry; 4468 }; 4469 4470 void __jit_debug_register_code(void) __attribute__((noinline)); 4471 void __jit_debug_register_code(void) 4472 { 4473 asm(""); 4474 } 4475 4476 /* Must statically initialize the version, because GDB may check 4477 the version before we can set it. */ 4478 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; 4479 4480 /* End GDB interface. */ 4481 4482 static int find_string(const char *strtab, const char *str) 4483 { 4484 const char *p = strtab + 1; 4485 4486 while (1) { 4487 if (strcmp(p, str) == 0) { 4488 return p - strtab; 4489 } 4490 p += strlen(p) + 1; 4491 } 4492 } 4493 4494 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size, 4495 const void *debug_frame, 4496 size_t debug_frame_size) 4497 { 4498 struct __attribute__((packed)) DebugInfo { 4499 uint32_t len; 4500 uint16_t version; 4501 uint32_t abbrev; 4502 uint8_t ptr_size; 4503 uint8_t cu_die; 4504 uint16_t cu_lang; 4505 uintptr_t cu_low_pc; 4506 uintptr_t cu_high_pc; 4507 uint8_t fn_die; 4508 char fn_name[16]; 4509 uintptr_t fn_low_pc; 4510 uintptr_t fn_high_pc; 4511 uint8_t cu_eoc; 4512 }; 4513 4514 struct ElfImage { 4515 ElfW(Ehdr) ehdr; 4516 ElfW(Phdr) phdr; 4517 ElfW(Shdr) shdr[7]; 4518 ElfW(Sym) sym[2]; 4519 struct DebugInfo di; 4520 uint8_t da[24]; 4521 char str[80]; 4522 }; 4523 4524 struct ElfImage *img; 4525 4526 static const struct ElfImage img_template = { 4527 .ehdr = { 4528 .e_ident[EI_MAG0] = ELFMAG0, 4529 .e_ident[EI_MAG1] = ELFMAG1, 4530 .e_ident[EI_MAG2] = ELFMAG2, 4531 .e_ident[EI_MAG3] = ELFMAG3, 4532 .e_ident[EI_CLASS] = ELF_CLASS, 4533 .e_ident[EI_DATA] = ELF_DATA, 4534 .e_ident[EI_VERSION] = EV_CURRENT, 4535 .e_type = ET_EXEC, 4536 .e_machine = ELF_HOST_MACHINE, 4537 .e_version = EV_CURRENT, 4538 .e_phoff = offsetof(struct ElfImage, phdr), 4539 .e_shoff = offsetof(struct ElfImage, shdr), 4540 .e_ehsize = sizeof(ElfW(Shdr)), 4541 .e_phentsize = sizeof(ElfW(Phdr)), 4542 .e_phnum = 1, 4543 .e_shentsize = sizeof(ElfW(Shdr)), 4544 .e_shnum = ARRAY_SIZE(img->shdr), 4545 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, 4546 #ifdef ELF_HOST_FLAGS 4547 .e_flags = ELF_HOST_FLAGS, 4548 #endif 4549 #ifdef ELF_OSABI 4550 .e_ident[EI_OSABI] = ELF_OSABI, 4551 #endif 4552 }, 4553 .phdr = { 4554 .p_type = PT_LOAD, 4555 .p_flags = PF_X, 4556 }, 4557 .shdr = { 4558 [0] = { .sh_type = SHT_NULL }, 4559 /* Trick: The contents of code_gen_buffer are not present in 4560 this fake ELF file; that got allocated elsewhere. Therefore 4561 we mark .text as SHT_NOBITS (similar to .bss) so that readers 4562 will not look for contents. We can record any address. */ 4563 [1] = { /* .text */ 4564 .sh_type = SHT_NOBITS, 4565 .sh_flags = SHF_EXECINSTR | SHF_ALLOC, 4566 }, 4567 [2] = { /* .debug_info */ 4568 .sh_type = SHT_PROGBITS, 4569 .sh_offset = offsetof(struct ElfImage, di), 4570 .sh_size = sizeof(struct DebugInfo), 4571 }, 4572 [3] = { /* .debug_abbrev */ 4573 .sh_type = SHT_PROGBITS, 4574 .sh_offset = offsetof(struct ElfImage, da), 4575 .sh_size = sizeof(img->da), 4576 }, 4577 [4] = { /* .debug_frame */ 4578 .sh_type = SHT_PROGBITS, 4579 .sh_offset = sizeof(struct ElfImage), 4580 }, 4581 [5] = { /* .symtab */ 4582 .sh_type = SHT_SYMTAB, 4583 .sh_offset = offsetof(struct ElfImage, sym), 4584 .sh_size = sizeof(img->sym), 4585 .sh_info = 1, 4586 .sh_link = ARRAY_SIZE(img->shdr) - 1, 4587 .sh_entsize = sizeof(ElfW(Sym)), 4588 }, 4589 [6] = { /* .strtab */ 4590 .sh_type = SHT_STRTAB, 4591 .sh_offset = offsetof(struct ElfImage, str), 4592 .sh_size = sizeof(img->str), 4593 } 4594 }, 4595 .sym = { 4596 [1] = { /* code_gen_buffer */ 4597 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), 4598 .st_shndx = 1, 4599 } 4600 }, 4601 .di = { 4602 .len = sizeof(struct DebugInfo) - 4, 4603 .version = 2, 4604 .ptr_size = sizeof(void *), 4605 .cu_die = 1, 4606 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */ 4607 .fn_die = 2, 4608 .fn_name = "code_gen_buffer" 4609 }, 4610 .da = { 4611 1, /* abbrev number (the cu) */ 4612 0x11, 1, /* DW_TAG_compile_unit, has children */ 4613 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */ 4614 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4615 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4616 0, 0, /* end of abbrev */ 4617 2, /* abbrev number (the fn) */ 4618 0x2e, 0, /* DW_TAG_subprogram, no children */ 4619 0x3, 0x8, /* DW_AT_name, DW_FORM_string */ 4620 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */ 4621 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */ 4622 0, 0, /* end of abbrev */ 4623 0 /* no more abbrev */ 4624 }, 4625 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" 4626 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", 4627 }; 4628 4629 /* We only need a single jit entry; statically allocate it. */ 4630 static struct jit_code_entry one_entry; 4631 4632 uintptr_t buf = (uintptr_t)buf_ptr; 4633 size_t img_size = sizeof(struct ElfImage) + debug_frame_size; 4634 DebugFrameHeader *dfh; 4635 4636 img = g_malloc(img_size); 4637 *img = img_template; 4638 4639 img->phdr.p_vaddr = buf; 4640 img->phdr.p_paddr = buf; 4641 img->phdr.p_memsz = buf_size; 4642 4643 img->shdr[1].sh_name = find_string(img->str, ".text"); 4644 img->shdr[1].sh_addr = buf; 4645 img->shdr[1].sh_size = buf_size; 4646 4647 img->shdr[2].sh_name = find_string(img->str, ".debug_info"); 4648 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); 4649 4650 img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); 4651 img->shdr[4].sh_size = debug_frame_size; 4652 4653 img->shdr[5].sh_name = find_string(img->str, ".symtab"); 4654 img->shdr[6].sh_name = find_string(img->str, ".strtab"); 4655 4656 img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); 4657 img->sym[1].st_value = buf; 4658 img->sym[1].st_size = buf_size; 4659 4660 img->di.cu_low_pc = buf; 4661 img->di.cu_high_pc = buf + buf_size; 4662 img->di.fn_low_pc = buf; 4663 img->di.fn_high_pc = buf + buf_size; 4664 4665 dfh = (DebugFrameHeader *)(img + 1); 4666 memcpy(dfh, debug_frame, debug_frame_size); 4667 dfh->fde.func_start = buf; 4668 dfh->fde.func_len = buf_size; 4669 4670 #ifdef DEBUG_JIT 4671 /* Enable this block to be able to debug the ELF image file creation. 4672 One can use readelf, objdump, or other inspection utilities. */ 4673 { 4674 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir()); 4675 FILE *f = fopen(jit, "w+b"); 4676 if (f) { 4677 if (fwrite(img, img_size, 1, f) != img_size) { 4678 /* Avoid stupid unused return value warning for fwrite. */ 4679 } 4680 fclose(f); 4681 } 4682 } 4683 #endif 4684 4685 one_entry.symfile_addr = img; 4686 one_entry.symfile_size = img_size; 4687 4688 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; 4689 __jit_debug_descriptor.relevant_entry = &one_entry; 4690 __jit_debug_descriptor.first_entry = &one_entry; 4691 __jit_debug_register_code(); 4692 } 4693 #else 4694 /* No support for the feature. Provide the entry point expected by exec.c, 4695 and implement the internal function we declared earlier. */ 4696 4697 static void tcg_register_jit_int(const void *buf, size_t size, 4698 const void *debug_frame, 4699 size_t debug_frame_size) 4700 { 4701 } 4702 4703 void tcg_register_jit(const void *buf, size_t buf_size) 4704 { 4705 } 4706 #endif /* ELF_HOST_MACHINE */ 4707 4708 #if !TCG_TARGET_MAYBE_vec 4709 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...) 4710 { 4711 g_assert_not_reached(); 4712 } 4713 #endif 4714