1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu/osdep.h"
26
27 /* Define to jump the ELF file used to communicate with GDB. */
28 #undef DEBUG_JIT
29
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/target_page.h"
38 #include "exec/translation-block.h"
39 #include "exec/tlb-common.h"
40 #include "tcg/startup.h"
41 #include "tcg/tcg-op-common.h"
42
43 #if UINTPTR_MAX == UINT32_MAX
44 # define ELF_CLASS ELFCLASS32
45 #else
46 # define ELF_CLASS ELFCLASS64
47 #endif
48 #if HOST_BIG_ENDIAN
49 # define ELF_DATA ELFDATA2MSB
50 #else
51 # define ELF_DATA ELFDATA2LSB
52 #endif
53
54 #include "elf.h"
55 #include "exec/log.h"
56 #include "tcg/tcg-ldst.h"
57 #include "tcg/tcg-temp-internal.h"
58 #include "tcg-internal.h"
59 #include "tcg/perf.h"
60 #include "tcg-has.h"
61 #ifdef CONFIG_USER_ONLY
62 #include "user/guest-base.h"
63 #endif
64
65 /* Forward declarations for functions declared in tcg-target.c.inc and
66 used here. */
67 static void tcg_target_init(TCGContext *s);
68 static void tcg_target_qemu_prologue(TCGContext *s);
69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
70 intptr_t value, intptr_t addend);
71 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
72
73 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
74 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
76
77 /* The CIE and FDE header definitions will be common to all hosts. */
78 typedef struct {
79 uint32_t len __attribute__((aligned((sizeof(void *)))));
80 uint32_t id;
81 uint8_t version;
82 char augmentation[1];
83 uint8_t code_align;
84 uint8_t data_align;
85 uint8_t return_column;
86 } DebugFrameCIE;
87
88 typedef struct QEMU_PACKED {
89 uint32_t len __attribute__((aligned((sizeof(void *)))));
90 uint32_t cie_offset;
91 uintptr_t func_start;
92 uintptr_t func_len;
93 } DebugFrameFDEHeader;
94
95 typedef struct QEMU_PACKED {
96 DebugFrameCIE cie;
97 DebugFrameFDEHeader fde;
98 } DebugFrameHeader;
99
100 struct TCGLabelQemuLdst {
101 bool is_ld; /* qemu_ld: true, qemu_st: false */
102 MemOpIdx oi;
103 TCGType type; /* result type of a load */
104 TCGReg addr_reg; /* reg index for guest virtual addr */
105 TCGReg datalo_reg; /* reg index for low word to be loaded or stored */
106 TCGReg datahi_reg; /* reg index for high word to be loaded or stored */
107 const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */
108 tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
109 QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
110 };
111
112 static void tcg_register_jit_int(const void *buf, size_t size,
113 const void *debug_frame,
114 size_t debug_frame_size)
115 __attribute__((unused));
116
117 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
118 static void tcg_out_tb_start(TCGContext *s);
119 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
120 intptr_t arg2);
121 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
122 static void tcg_out_movi(TCGContext *s, TCGType type,
123 TCGReg ret, tcg_target_long arg);
124 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
133 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
134 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
135 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
136 static void tcg_out_goto_tb(TCGContext *s, int which);
137 static void tcg_out_goto_ptr(TCGContext *s, TCGReg dest);
138 static void tcg_out_mb(TCGContext *s, unsigned bar);
139 static void tcg_out_br(TCGContext *s, TCGLabel *l);
140 static void tcg_out_set_carry(TCGContext *s);
141 static void tcg_out_set_borrow(TCGContext *s);
142 #if TCG_TARGET_MAYBE_vec
143 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
144 TCGReg dst, TCGReg src);
145 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
146 TCGReg dst, TCGReg base, intptr_t offset);
147 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
148 TCGReg dst, int64_t arg);
149 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
150 unsigned vecl, unsigned vece,
151 const TCGArg args[TCG_MAX_OP_ARGS],
152 const int const_args[TCG_MAX_OP_ARGS]);
153 #else
tcg_out_dup_vec(TCGContext * s,TCGType type,unsigned vece,TCGReg dst,TCGReg src)154 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
155 TCGReg dst, TCGReg src)
156 {
157 g_assert_not_reached();
158 }
tcg_out_dupm_vec(TCGContext * s,TCGType type,unsigned vece,TCGReg dst,TCGReg base,intptr_t offset)159 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
160 TCGReg dst, TCGReg base, intptr_t offset)
161 {
162 g_assert_not_reached();
163 }
tcg_out_dupi_vec(TCGContext * s,TCGType type,unsigned vece,TCGReg dst,int64_t arg)164 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
165 TCGReg dst, int64_t arg)
166 {
167 g_assert_not_reached();
168 }
tcg_out_vec_op(TCGContext * s,TCGOpcode opc,unsigned vecl,unsigned vece,const TCGArg args[TCG_MAX_OP_ARGS],const int const_args[TCG_MAX_OP_ARGS])169 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
170 unsigned vecl, unsigned vece,
171 const TCGArg args[TCG_MAX_OP_ARGS],
172 const int const_args[TCG_MAX_OP_ARGS])
173 {
174 g_assert_not_reached();
175 }
tcg_can_emit_vec_op(TCGOpcode o,TCGType t,unsigned ve)176 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
177 {
178 return 0;
179 }
180 #endif
181 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
182 intptr_t arg2);
183 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
184 TCGReg base, intptr_t ofs);
185 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
186 const TCGHelperInfo *info);
187 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
188 static bool tcg_target_const_match(int64_t val, int ct,
189 TCGType type, TCGCond cond, int vece);
190
191 #ifndef CONFIG_USER_ONLY
192 #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; })
193 #endif
194
195 typedef struct TCGLdstHelperParam {
196 TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
197 unsigned ntmp;
198 int tmp[3];
199 } TCGLdstHelperParam;
200
201 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
202 const TCGLdstHelperParam *p)
203 __attribute__((unused));
204 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
205 bool load_sign, const TCGLdstHelperParam *p)
206 __attribute__((unused));
207 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
208 const TCGLdstHelperParam *p)
209 __attribute__((unused));
210
211 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
212 [MO_UB] = helper_ldub_mmu,
213 [MO_SB] = helper_ldsb_mmu,
214 [MO_UW] = helper_lduw_mmu,
215 [MO_SW] = helper_ldsw_mmu,
216 [MO_UL] = helper_ldul_mmu,
217 [MO_UQ] = helper_ldq_mmu,
218 #if TCG_TARGET_REG_BITS == 64
219 [MO_SL] = helper_ldsl_mmu,
220 [MO_128] = helper_ld16_mmu,
221 #endif
222 };
223
224 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
225 [MO_8] = helper_stb_mmu,
226 [MO_16] = helper_stw_mmu,
227 [MO_32] = helper_stl_mmu,
228 [MO_64] = helper_stq_mmu,
229 #if TCG_TARGET_REG_BITS == 64
230 [MO_128] = helper_st16_mmu,
231 #endif
232 };
233
234 typedef struct {
235 MemOp atom; /* lg2 bits of atomicity required */
236 MemOp align; /* lg2 bits of alignment to use */
237 } TCGAtomAlign;
238
239 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
240 MemOp host_atom, bool allow_two_ops)
241 __attribute__((unused));
242
243 #ifdef CONFIG_USER_ONLY
244 bool tcg_use_softmmu;
245 #endif
246
247 TCGContext tcg_init_ctx;
248 __thread TCGContext *tcg_ctx;
249
250 TCGContext **tcg_ctxs;
251 unsigned int tcg_cur_ctxs;
252 unsigned int tcg_max_ctxs;
253 TCGv_env tcg_env;
254 const void *tcg_code_gen_epilogue;
255 uintptr_t tcg_splitwx_diff;
256
257 #ifndef CONFIG_TCG_INTERPRETER
258 tcg_prologue_fn *tcg_qemu_tb_exec;
259 #endif
260
261 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
262 static TCGRegSet tcg_target_call_clobber_regs;
263
264 #if TCG_TARGET_INSN_UNIT_SIZE == 1
tcg_out8(TCGContext * s,uint8_t v)265 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
266 {
267 *s->code_ptr++ = v;
268 }
269
tcg_patch8(tcg_insn_unit * p,uint8_t v)270 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
271 uint8_t v)
272 {
273 *p = v;
274 }
275 #endif
276
277 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
tcg_out16(TCGContext * s,uint16_t v)278 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
279 {
280 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
281 *s->code_ptr++ = v;
282 } else {
283 tcg_insn_unit *p = s->code_ptr;
284 memcpy(p, &v, sizeof(v));
285 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
286 }
287 }
288
tcg_patch16(tcg_insn_unit * p,uint16_t v)289 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
290 uint16_t v)
291 {
292 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
293 *p = v;
294 } else {
295 memcpy(p, &v, sizeof(v));
296 }
297 }
298 #endif
299
300 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
tcg_out32(TCGContext * s,uint32_t v)301 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
302 {
303 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
304 *s->code_ptr++ = v;
305 } else {
306 tcg_insn_unit *p = s->code_ptr;
307 memcpy(p, &v, sizeof(v));
308 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
309 }
310 }
311
tcg_patch32(tcg_insn_unit * p,uint32_t v)312 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
313 uint32_t v)
314 {
315 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
316 *p = v;
317 } else {
318 memcpy(p, &v, sizeof(v));
319 }
320 }
321 #endif
322
323 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
tcg_out64(TCGContext * s,uint64_t v)324 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
325 {
326 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
327 *s->code_ptr++ = v;
328 } else {
329 tcg_insn_unit *p = s->code_ptr;
330 memcpy(p, &v, sizeof(v));
331 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
332 }
333 }
334
tcg_patch64(tcg_insn_unit * p,uint64_t v)335 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
336 uint64_t v)
337 {
338 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
339 *p = v;
340 } else {
341 memcpy(p, &v, sizeof(v));
342 }
343 }
344 #endif
345
346 /* label relocation processing */
347
tcg_out_reloc(TCGContext * s,tcg_insn_unit * code_ptr,int type,TCGLabel * l,intptr_t addend)348 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
349 TCGLabel *l, intptr_t addend)
350 {
351 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
352
353 r->type = type;
354 r->ptr = code_ptr;
355 r->addend = addend;
356 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
357 }
358
tcg_out_label(TCGContext * s,TCGLabel * l)359 static void tcg_out_label(TCGContext *s, TCGLabel *l)
360 {
361 tcg_debug_assert(!l->has_value);
362 l->has_value = 1;
363 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
364 }
365
gen_new_label(void)366 TCGLabel *gen_new_label(void)
367 {
368 TCGContext *s = tcg_ctx;
369 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
370
371 memset(l, 0, sizeof(TCGLabel));
372 l->id = s->nb_labels++;
373 QSIMPLEQ_INIT(&l->branches);
374 QSIMPLEQ_INIT(&l->relocs);
375
376 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
377
378 return l;
379 }
380
tcg_resolve_relocs(TCGContext * s)381 static bool tcg_resolve_relocs(TCGContext *s)
382 {
383 TCGLabel *l;
384
385 QSIMPLEQ_FOREACH(l, &s->labels, next) {
386 TCGRelocation *r;
387 uintptr_t value = l->u.value;
388
389 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
390 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
391 return false;
392 }
393 }
394 }
395 return true;
396 }
397
set_jmp_reset_offset(TCGContext * s,int which)398 static void set_jmp_reset_offset(TCGContext *s, int which)
399 {
400 /*
401 * We will check for overflow at the end of the opcode loop in
402 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
403 */
404 s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
405 }
406
set_jmp_insn_offset(TCGContext * s,int which)407 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
408 {
409 /*
410 * We will check for overflow at the end of the opcode loop in
411 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
412 */
413 s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
414 }
415
get_jmp_target_addr(TCGContext * s,int which)416 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
417 {
418 /*
419 * Return the read-execute version of the pointer, for the benefit
420 * of any pc-relative addressing mode.
421 */
422 return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
423 }
424
425 static int __attribute__((unused))
tlb_mask_table_ofs(TCGContext * s,int which)426 tlb_mask_table_ofs(TCGContext *s, int which)
427 {
428 return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
429 sizeof(CPUNegativeOffsetState));
430 }
431
432 /* Signal overflow, starting over with fewer guest insns. */
433 static G_NORETURN
tcg_raise_tb_overflow(TCGContext * s)434 void tcg_raise_tb_overflow(TCGContext *s)
435 {
436 siglongjmp(s->jmp_trans, -2);
437 }
438
439 /*
440 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
441 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
442 *
443 * However, tcg_out_helper_load_slots reuses this field to hold an
444 * argument slot number (which may designate a argument register or an
445 * argument stack slot), converting to TCGReg once all arguments that
446 * are destined for the stack are processed.
447 */
448 typedef struct TCGMovExtend {
449 unsigned dst;
450 TCGReg src;
451 TCGType dst_type;
452 TCGType src_type;
453 MemOp src_ext;
454 } TCGMovExtend;
455
456 /**
457 * tcg_out_movext -- move and extend
458 * @s: tcg context
459 * @dst_type: integral type for destination
460 * @dst: destination register
461 * @src_type: integral type for source
462 * @src_ext: extension to apply to source
463 * @src: source register
464 *
465 * Move or extend @src into @dst, depending on @src_ext and the types.
466 */
tcg_out_movext(TCGContext * s,TCGType dst_type,TCGReg dst,TCGType src_type,MemOp src_ext,TCGReg src)467 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
468 TCGType src_type, MemOp src_ext, TCGReg src)
469 {
470 switch (src_ext) {
471 case MO_UB:
472 tcg_out_ext8u(s, dst, src);
473 break;
474 case MO_SB:
475 tcg_out_ext8s(s, dst_type, dst, src);
476 break;
477 case MO_UW:
478 tcg_out_ext16u(s, dst, src);
479 break;
480 case MO_SW:
481 tcg_out_ext16s(s, dst_type, dst, src);
482 break;
483 case MO_UL:
484 case MO_SL:
485 if (dst_type == TCG_TYPE_I32) {
486 if (src_type == TCG_TYPE_I32) {
487 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
488 } else {
489 tcg_out_extrl_i64_i32(s, dst, src);
490 }
491 } else if (src_type == TCG_TYPE_I32) {
492 if (src_ext & MO_SIGN) {
493 tcg_out_exts_i32_i64(s, dst, src);
494 } else {
495 tcg_out_extu_i32_i64(s, dst, src);
496 }
497 } else {
498 if (src_ext & MO_SIGN) {
499 tcg_out_ext32s(s, dst, src);
500 } else {
501 tcg_out_ext32u(s, dst, src);
502 }
503 }
504 break;
505 case MO_UQ:
506 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
507 if (dst_type == TCG_TYPE_I32) {
508 tcg_out_extrl_i64_i32(s, dst, src);
509 } else {
510 tcg_out_mov(s, TCG_TYPE_I64, dst, src);
511 }
512 break;
513 default:
514 g_assert_not_reached();
515 }
516 }
517
518 /* Minor variations on a theme, using a structure. */
tcg_out_movext1_new_src(TCGContext * s,const TCGMovExtend * i,TCGReg src)519 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
520 TCGReg src)
521 {
522 tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
523 }
524
tcg_out_movext1(TCGContext * s,const TCGMovExtend * i)525 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
526 {
527 tcg_out_movext1_new_src(s, i, i->src);
528 }
529
530 /**
531 * tcg_out_movext2 -- move and extend two pair
532 * @s: tcg context
533 * @i1: first move description
534 * @i2: second move description
535 * @scratch: temporary register, or -1 for none
536 *
537 * As tcg_out_movext, for both @i1 and @i2, caring for overlap
538 * between the sources and destinations.
539 */
540
tcg_out_movext2(TCGContext * s,const TCGMovExtend * i1,const TCGMovExtend * i2,int scratch)541 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
542 const TCGMovExtend *i2, int scratch)
543 {
544 TCGReg src1 = i1->src;
545 TCGReg src2 = i2->src;
546
547 if (i1->dst != src2) {
548 tcg_out_movext1(s, i1);
549 tcg_out_movext1(s, i2);
550 return;
551 }
552 if (i2->dst == src1) {
553 TCGType src1_type = i1->src_type;
554 TCGType src2_type = i2->src_type;
555
556 if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
557 /* The data is now in the correct registers, now extend. */
558 src1 = i2->src;
559 src2 = i1->src;
560 } else {
561 tcg_debug_assert(scratch >= 0);
562 tcg_out_mov(s, src1_type, scratch, src1);
563 src1 = scratch;
564 }
565 }
566 tcg_out_movext1_new_src(s, i2, src2);
567 tcg_out_movext1_new_src(s, i1, src1);
568 }
569
570 /**
571 * tcg_out_movext3 -- move and extend three pair
572 * @s: tcg context
573 * @i1: first move description
574 * @i2: second move description
575 * @i3: third move description
576 * @scratch: temporary register, or -1 for none
577 *
578 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
579 * between the sources and destinations.
580 */
581
tcg_out_movext3(TCGContext * s,const TCGMovExtend * i1,const TCGMovExtend * i2,const TCGMovExtend * i3,int scratch)582 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
583 const TCGMovExtend *i2, const TCGMovExtend *i3,
584 int scratch)
585 {
586 TCGReg src1 = i1->src;
587 TCGReg src2 = i2->src;
588 TCGReg src3 = i3->src;
589
590 if (i1->dst != src2 && i1->dst != src3) {
591 tcg_out_movext1(s, i1);
592 tcg_out_movext2(s, i2, i3, scratch);
593 return;
594 }
595 if (i2->dst != src1 && i2->dst != src3) {
596 tcg_out_movext1(s, i2);
597 tcg_out_movext2(s, i1, i3, scratch);
598 return;
599 }
600 if (i3->dst != src1 && i3->dst != src2) {
601 tcg_out_movext1(s, i3);
602 tcg_out_movext2(s, i1, i2, scratch);
603 return;
604 }
605
606 /*
607 * There is a cycle. Since there are only 3 nodes, the cycle is
608 * either "clockwise" or "anti-clockwise", and can be solved with
609 * a single scratch or two xchg.
610 */
611 if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
612 /* "Clockwise" */
613 if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
614 tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
615 /* The data is now in the correct registers, now extend. */
616 tcg_out_movext1_new_src(s, i1, i1->dst);
617 tcg_out_movext1_new_src(s, i2, i2->dst);
618 tcg_out_movext1_new_src(s, i3, i3->dst);
619 } else {
620 tcg_debug_assert(scratch >= 0);
621 tcg_out_mov(s, i1->src_type, scratch, src1);
622 tcg_out_movext1(s, i3);
623 tcg_out_movext1(s, i2);
624 tcg_out_movext1_new_src(s, i1, scratch);
625 }
626 } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
627 /* "Anti-clockwise" */
628 if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
629 tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
630 /* The data is now in the correct registers, now extend. */
631 tcg_out_movext1_new_src(s, i1, i1->dst);
632 tcg_out_movext1_new_src(s, i2, i2->dst);
633 tcg_out_movext1_new_src(s, i3, i3->dst);
634 } else {
635 tcg_debug_assert(scratch >= 0);
636 tcg_out_mov(s, i1->src_type, scratch, src1);
637 tcg_out_movext1(s, i2);
638 tcg_out_movext1(s, i3);
639 tcg_out_movext1_new_src(s, i1, scratch);
640 }
641 } else {
642 g_assert_not_reached();
643 }
644 }
645
646 /*
647 * Allocate a new TCGLabelQemuLdst entry.
648 */
649
650 __attribute__((unused))
new_ldst_label(TCGContext * s)651 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
652 {
653 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
654
655 memset(l, 0, sizeof(*l));
656 QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
657
658 return l;
659 }
660
661 /*
662 * Allocate new constant pool entries.
663 */
664
665 typedef struct TCGLabelPoolData {
666 struct TCGLabelPoolData *next;
667 tcg_insn_unit *label;
668 intptr_t addend;
669 int rtype;
670 unsigned nlong;
671 tcg_target_ulong data[];
672 } TCGLabelPoolData;
673
new_pool_alloc(TCGContext * s,int nlong,int rtype,tcg_insn_unit * label,intptr_t addend)674 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
675 tcg_insn_unit *label, intptr_t addend)
676 {
677 TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
678 + sizeof(tcg_target_ulong) * nlong);
679
680 n->label = label;
681 n->addend = addend;
682 n->rtype = rtype;
683 n->nlong = nlong;
684 return n;
685 }
686
new_pool_insert(TCGContext * s,TCGLabelPoolData * n)687 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
688 {
689 TCGLabelPoolData *i, **pp;
690 int nlong = n->nlong;
691
692 /* Insertion sort on the pool. */
693 for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
694 if (nlong > i->nlong) {
695 break;
696 }
697 if (nlong < i->nlong) {
698 continue;
699 }
700 if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
701 break;
702 }
703 }
704 n->next = *pp;
705 *pp = n;
706 }
707
708 /* The "usual" for generic integer code. */
709 __attribute__((unused))
new_pool_label(TCGContext * s,tcg_target_ulong d,int rtype,tcg_insn_unit * label,intptr_t addend)710 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
711 tcg_insn_unit *label, intptr_t addend)
712 {
713 TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
714 n->data[0] = d;
715 new_pool_insert(s, n);
716 }
717
718 /* For v64 or v128, depending on the host. */
719 __attribute__((unused))
new_pool_l2(TCGContext * s,int rtype,tcg_insn_unit * label,intptr_t addend,tcg_target_ulong d0,tcg_target_ulong d1)720 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
721 intptr_t addend, tcg_target_ulong d0,
722 tcg_target_ulong d1)
723 {
724 TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
725 n->data[0] = d0;
726 n->data[1] = d1;
727 new_pool_insert(s, n);
728 }
729
730 /* For v128 or v256, depending on the host. */
731 __attribute__((unused))
new_pool_l4(TCGContext * s,int rtype,tcg_insn_unit * label,intptr_t addend,tcg_target_ulong d0,tcg_target_ulong d1,tcg_target_ulong d2,tcg_target_ulong d3)732 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
733 intptr_t addend, tcg_target_ulong d0,
734 tcg_target_ulong d1, tcg_target_ulong d2,
735 tcg_target_ulong d3)
736 {
737 TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
738 n->data[0] = d0;
739 n->data[1] = d1;
740 n->data[2] = d2;
741 n->data[3] = d3;
742 new_pool_insert(s, n);
743 }
744
745 /* For v256, for 32-bit host. */
746 __attribute__((unused))
new_pool_l8(TCGContext * s,int rtype,tcg_insn_unit * label,intptr_t addend,tcg_target_ulong d0,tcg_target_ulong d1,tcg_target_ulong d2,tcg_target_ulong d3,tcg_target_ulong d4,tcg_target_ulong d5,tcg_target_ulong d6,tcg_target_ulong d7)747 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
748 intptr_t addend, tcg_target_ulong d0,
749 tcg_target_ulong d1, tcg_target_ulong d2,
750 tcg_target_ulong d3, tcg_target_ulong d4,
751 tcg_target_ulong d5, tcg_target_ulong d6,
752 tcg_target_ulong d7)
753 {
754 TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
755 n->data[0] = d0;
756 n->data[1] = d1;
757 n->data[2] = d2;
758 n->data[3] = d3;
759 n->data[4] = d4;
760 n->data[5] = d5;
761 n->data[6] = d6;
762 n->data[7] = d7;
763 new_pool_insert(s, n);
764 }
765
766 /*
767 * Generate TB finalization at the end of block
768 */
769
tcg_out_ldst_finalize(TCGContext * s)770 static int tcg_out_ldst_finalize(TCGContext *s)
771 {
772 TCGLabelQemuLdst *lb;
773
774 /* qemu_ld/st slow paths */
775 QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
776 if (lb->is_ld
777 ? !tcg_out_qemu_ld_slow_path(s, lb)
778 : !tcg_out_qemu_st_slow_path(s, lb)) {
779 return -2;
780 }
781
782 /*
783 * Test for (pending) buffer overflow. The assumption is that any
784 * one operation beginning below the high water mark cannot overrun
785 * the buffer completely. Thus we can test for overflow after
786 * generating code without having to check during generation.
787 */
788 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
789 return -1;
790 }
791 }
792 return 0;
793 }
794
tcg_out_pool_finalize(TCGContext * s)795 static int tcg_out_pool_finalize(TCGContext *s)
796 {
797 TCGLabelPoolData *p = s->pool_labels;
798 TCGLabelPoolData *l = NULL;
799 void *a;
800
801 if (p == NULL) {
802 return 0;
803 }
804
805 /*
806 * ??? Round up to qemu_icache_linesize, but then do not round
807 * again when allocating the next TranslationBlock structure.
808 */
809 a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
810 sizeof(tcg_target_ulong) * p->nlong);
811 tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
812 s->data_gen_ptr = a;
813
814 for (; p != NULL; p = p->next) {
815 size_t size = sizeof(tcg_target_ulong) * p->nlong;
816 uintptr_t value;
817
818 if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
819 if (unlikely(a > s->code_gen_highwater)) {
820 return -1;
821 }
822 memcpy(a, p->data, size);
823 a += size;
824 l = p;
825 }
826
827 value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
828 if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
829 return -2;
830 }
831 }
832
833 s->code_ptr = a;
834 return 0;
835 }
836
837 #define C_PFX1(P, A) P##A
838 #define C_PFX2(P, A, B) P##A##_##B
839 #define C_PFX3(P, A, B, C) P##A##_##B##_##C
840 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
841 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
842 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
843
844 /* Define an enumeration for the various combinations. */
845
846 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
847 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
848 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
849 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
850
851 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
852 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
853 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
854 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
855
856 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
857 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1),
858 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1),
859
860 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
861 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
862 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
863 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
864 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
865
866 typedef enum {
867 C_Dynamic = -2,
868 C_NotImplemented = -1,
869 #include "tcg-target-con-set.h"
870 } TCGConstraintSetIndex;
871
872 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
873
874 #undef C_O0_I1
875 #undef C_O0_I2
876 #undef C_O0_I3
877 #undef C_O0_I4
878 #undef C_O1_I1
879 #undef C_O1_I2
880 #undef C_O1_I3
881 #undef C_O1_I4
882 #undef C_N1_I2
883 #undef C_N1O1_I1
884 #undef C_N2_I1
885 #undef C_O2_I1
886 #undef C_O2_I2
887 #undef C_O2_I3
888 #undef C_O2_I4
889 #undef C_N1_O1_I4
890
891 /* Put all of the constraint sets into an array, indexed by the enum. */
892
893 typedef struct TCGConstraintSet {
894 uint8_t nb_oargs, nb_iargs;
895 const char *args_ct_str[TCG_MAX_OP_ARGS];
896 } TCGConstraintSet;
897
898 #define C_O0_I1(I1) { 0, 1, { #I1 } },
899 #define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } },
900 #define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } },
901 #define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } },
902
903 #define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } },
904 #define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } },
905 #define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } },
906 #define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
907
908 #define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } },
909 #define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } },
910 #define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } },
911
912 #define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } },
913 #define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } },
914 #define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
915 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
916 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
917
918 static const TCGConstraintSet constraint_sets[] = {
919 #include "tcg-target-con-set.h"
920 };
921
922 #undef C_O0_I1
923 #undef C_O0_I2
924 #undef C_O0_I3
925 #undef C_O0_I4
926 #undef C_O1_I1
927 #undef C_O1_I2
928 #undef C_O1_I3
929 #undef C_O1_I4
930 #undef C_N1_I2
931 #undef C_N1O1_I1
932 #undef C_N2_I1
933 #undef C_O2_I1
934 #undef C_O2_I2
935 #undef C_O2_I3
936 #undef C_O2_I4
937 #undef C_N1_O1_I4
938
939 /* Expand the enumerator to be returned from tcg_target_op_def(). */
940
941 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
942 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
943 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
944 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
945
946 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
947 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
948 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
949 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
950
951 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
952 #define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1)
953 #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1)
954
955 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
956 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
957 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
958 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
959 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
960
961 /*
962 * TCGOutOp is the base class for a set of structures that describe how
963 * to generate code for a given TCGOpcode.
964 *
965 * @static_constraint:
966 * C_NotImplemented: The TCGOpcode is not supported by the backend.
967 * C_Dynamic: Use @dynamic_constraint to select a constraint set
968 * based on any of @type, @flags, or host isa.
969 * Otherwise: The register allocation constrains for the TCGOpcode.
970 *
971 * Subclasses of TCGOutOp will define a set of output routines that may
972 * be used. Such routines will often be selected by the set of registers
973 * and constants that come out of register allocation. The set of
974 * routines that are provided will guide the set of constraints that are
975 * legal. In particular, assume that tcg_optimize() has done its job in
976 * swapping commutative operands and folding operations for which all
977 * operands are constant.
978 */
979 typedef struct TCGOutOp {
980 TCGConstraintSetIndex static_constraint;
981 TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
982 } TCGOutOp;
983
984 typedef struct TCGOutOpAddSubCarry {
985 TCGOutOp base;
986 void (*out_rrr)(TCGContext *s, TCGType type,
987 TCGReg a0, TCGReg a1, TCGReg a2);
988 void (*out_rri)(TCGContext *s, TCGType type,
989 TCGReg a0, TCGReg a1, tcg_target_long a2);
990 void (*out_rir)(TCGContext *s, TCGType type,
991 TCGReg a0, tcg_target_long a1, TCGReg a2);
992 void (*out_rii)(TCGContext *s, TCGType type,
993 TCGReg a0, tcg_target_long a1, tcg_target_long a2);
994 } TCGOutOpAddSubCarry;
995
996 typedef struct TCGOutOpBinary {
997 TCGOutOp base;
998 void (*out_rrr)(TCGContext *s, TCGType type,
999 TCGReg a0, TCGReg a1, TCGReg a2);
1000 void (*out_rri)(TCGContext *s, TCGType type,
1001 TCGReg a0, TCGReg a1, tcg_target_long a2);
1002 } TCGOutOpBinary;
1003
1004 typedef struct TCGOutOpBrcond {
1005 TCGOutOp base;
1006 void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond,
1007 TCGReg a1, TCGReg a2, TCGLabel *label);
1008 void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond,
1009 TCGReg a1, tcg_target_long a2, TCGLabel *label);
1010 } TCGOutOpBrcond;
1011
1012 typedef struct TCGOutOpBrcond2 {
1013 TCGOutOp base;
1014 void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
1015 TCGArg bl, bool const_bl,
1016 TCGArg bh, bool const_bh, TCGLabel *l);
1017 } TCGOutOpBrcond2;
1018
1019 typedef struct TCGOutOpBswap {
1020 TCGOutOp base;
1021 void (*out_rr)(TCGContext *s, TCGType type,
1022 TCGReg a0, TCGReg a1, unsigned flags);
1023 } TCGOutOpBswap;
1024
1025 typedef struct TCGOutOpDeposit {
1026 TCGOutOp base;
1027 void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1028 TCGReg a2, unsigned ofs, unsigned len);
1029 void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1030 tcg_target_long a2, unsigned ofs, unsigned len);
1031 void (*out_rzr)(TCGContext *s, TCGType type, TCGReg a0,
1032 TCGReg a2, unsigned ofs, unsigned len);
1033 } TCGOutOpDeposit;
1034
1035 typedef struct TCGOutOpDivRem {
1036 TCGOutOp base;
1037 void (*out_rr01r)(TCGContext *s, TCGType type,
1038 TCGReg a0, TCGReg a1, TCGReg a4);
1039 } TCGOutOpDivRem;
1040
1041 typedef struct TCGOutOpExtract {
1042 TCGOutOp base;
1043 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1044 unsigned ofs, unsigned len);
1045 } TCGOutOpExtract;
1046
1047 typedef struct TCGOutOpExtract2 {
1048 TCGOutOp base;
1049 void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1050 TCGReg a2, unsigned shr);
1051 } TCGOutOpExtract2;
1052
1053 typedef struct TCGOutOpLoad {
1054 TCGOutOp base;
1055 void (*out)(TCGContext *s, TCGType type, TCGReg dest,
1056 TCGReg base, intptr_t offset);
1057 } TCGOutOpLoad;
1058
1059 typedef struct TCGOutOpMovcond {
1060 TCGOutOp base;
1061 void (*out)(TCGContext *s, TCGType type, TCGCond cond,
1062 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
1063 TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf);
1064 } TCGOutOpMovcond;
1065
1066 typedef struct TCGOutOpMul2 {
1067 TCGOutOp base;
1068 void (*out_rrrr)(TCGContext *s, TCGType type,
1069 TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3);
1070 } TCGOutOpMul2;
1071
1072 typedef struct TCGOutOpQemuLdSt {
1073 TCGOutOp base;
1074 void (*out)(TCGContext *s, TCGType type, TCGReg dest,
1075 TCGReg addr, MemOpIdx oi);
1076 } TCGOutOpQemuLdSt;
1077
1078 typedef struct TCGOutOpQemuLdSt2 {
1079 TCGOutOp base;
1080 void (*out)(TCGContext *s, TCGType type, TCGReg dlo, TCGReg dhi,
1081 TCGReg addr, MemOpIdx oi);
1082 } TCGOutOpQemuLdSt2;
1083
1084 typedef struct TCGOutOpUnary {
1085 TCGOutOp base;
1086 void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
1087 } TCGOutOpUnary;
1088
1089 typedef struct TCGOutOpSetcond {
1090 TCGOutOp base;
1091 void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond,
1092 TCGReg ret, TCGReg a1, TCGReg a2);
1093 void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond,
1094 TCGReg ret, TCGReg a1, tcg_target_long a2);
1095 } TCGOutOpSetcond;
1096
1097 typedef struct TCGOutOpSetcond2 {
1098 TCGOutOp base;
1099 void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah,
1100 TCGArg bl, bool const_bl, TCGArg bh, bool const_bh);
1101 } TCGOutOpSetcond2;
1102
1103 typedef struct TCGOutOpStore {
1104 TCGOutOp base;
1105 void (*out_r)(TCGContext *s, TCGType type, TCGReg data,
1106 TCGReg base, intptr_t offset);
1107 void (*out_i)(TCGContext *s, TCGType type, tcg_target_long data,
1108 TCGReg base, intptr_t offset);
1109 } TCGOutOpStore;
1110
1111 typedef struct TCGOutOpSubtract {
1112 TCGOutOp base;
1113 void (*out_rrr)(TCGContext *s, TCGType type,
1114 TCGReg a0, TCGReg a1, TCGReg a2);
1115 void (*out_rir)(TCGContext *s, TCGType type,
1116 TCGReg a0, tcg_target_long a1, TCGReg a2);
1117 } TCGOutOpSubtract;
1118
1119 #include "tcg-target.c.inc"
1120
1121 #ifndef CONFIG_TCG_INTERPRETER
1122 /* Validate CPUTLBDescFast placement. */
1123 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1124 sizeof(CPUNegativeOffsetState))
1125 < MIN_TLB_MASK_TABLE_OFS);
1126 #endif
1127
1128 #if TCG_TARGET_REG_BITS == 64
1129 /*
1130 * We require these functions for slow-path function calls.
1131 * Adapt them generically for opcode output.
1132 */
1133
tgen_exts_i32_i64(TCGContext * s,TCGType t,TCGReg a0,TCGReg a1)1134 static void tgen_exts_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1135 {
1136 tcg_out_exts_i32_i64(s, a0, a1);
1137 }
1138
1139 static const TCGOutOpUnary outop_exts_i32_i64 = {
1140 .base.static_constraint = C_O1_I1(r, r),
1141 .out_rr = tgen_exts_i32_i64,
1142 };
1143
tgen_extu_i32_i64(TCGContext * s,TCGType t,TCGReg a0,TCGReg a1)1144 static void tgen_extu_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1145 {
1146 tcg_out_extu_i32_i64(s, a0, a1);
1147 }
1148
1149 static const TCGOutOpUnary outop_extu_i32_i64 = {
1150 .base.static_constraint = C_O1_I1(r, r),
1151 .out_rr = tgen_extu_i32_i64,
1152 };
1153
tgen_extrl_i64_i32(TCGContext * s,TCGType t,TCGReg a0,TCGReg a1)1154 static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1155 {
1156 tcg_out_extrl_i64_i32(s, a0, a1);
1157 }
1158
1159 static const TCGOutOpUnary outop_extrl_i64_i32 = {
1160 .base.static_constraint = C_O1_I1(r, r),
1161 .out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL,
1162 };
1163 #endif
1164
1165 static const TCGOutOp outop_goto_ptr = {
1166 .static_constraint = C_O0_I1(r),
1167 };
1168
1169 static const TCGOutOpLoad outop_ld = {
1170 .base.static_constraint = C_O1_I1(r, r),
1171 .out = tcg_out_ld,
1172 };
1173
1174 /*
1175 * Register V as the TCGOutOp for O.
1176 * This verifies that V is of type T, otherwise give a nice compiler error.
1177 * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1178 */
1179 #define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base)
1180
1181 /* Register allocation descriptions for every TCGOpcode. */
1182 static const TCGOutOp * const all_outop[NB_OPS] = {
1183 OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1184 OUTOP(INDEX_op_addci, TCGOutOpAddSubCarry, outop_addci),
1185 OUTOP(INDEX_op_addcio, TCGOutOpBinary, outop_addcio),
1186 OUTOP(INDEX_op_addco, TCGOutOpBinary, outop_addco),
1187 /* addc1o is implemented with set_carry + addcio */
1188 OUTOP(INDEX_op_addc1o, TCGOutOpBinary, outop_addcio),
1189 OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1190 OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1191 OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond),
1192 OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16),
1193 OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32),
1194 OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1195 OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1196 OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1197 OUTOP(INDEX_op_deposit, TCGOutOpDeposit, outop_deposit),
1198 OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1199 OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1200 OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1201 OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1202 OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1203 OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract),
1204 OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2),
1205 OUTOP(INDEX_op_ld8u, TCGOutOpLoad, outop_ld8u),
1206 OUTOP(INDEX_op_ld8s, TCGOutOpLoad, outop_ld8s),
1207 OUTOP(INDEX_op_ld16u, TCGOutOpLoad, outop_ld16u),
1208 OUTOP(INDEX_op_ld16s, TCGOutOpLoad, outop_ld16s),
1209 OUTOP(INDEX_op_ld, TCGOutOpLoad, outop_ld),
1210 OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond),
1211 OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1212 OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
1213 OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1214 OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2),
1215 OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1216 OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1217 OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1218 OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond),
1219 OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1220 OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1221 OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1222 OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1223 OUTOP(INDEX_op_qemu_ld, TCGOutOpQemuLdSt, outop_qemu_ld),
1224 OUTOP(INDEX_op_qemu_ld2, TCGOutOpQemuLdSt2, outop_qemu_ld2),
1225 OUTOP(INDEX_op_qemu_st, TCGOutOpQemuLdSt, outop_qemu_st),
1226 OUTOP(INDEX_op_qemu_st2, TCGOutOpQemuLdSt2, outop_qemu_st2),
1227 OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1228 OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1229 OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1230 OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1231 OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1232 OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond),
1233 OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract),
1234 OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1235 OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1236 OUTOP(INDEX_op_st, TCGOutOpStore, outop_st),
1237 OUTOP(INDEX_op_st8, TCGOutOpStore, outop_st8),
1238 OUTOP(INDEX_op_st16, TCGOutOpStore, outop_st16),
1239 OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1240 OUTOP(INDEX_op_subbi, TCGOutOpAddSubCarry, outop_subbi),
1241 OUTOP(INDEX_op_subbio, TCGOutOpAddSubCarry, outop_subbio),
1242 OUTOP(INDEX_op_subbo, TCGOutOpAddSubCarry, outop_subbo),
1243 /* subb1o is implemented with set_borrow + subbio */
1244 OUTOP(INDEX_op_subb1o, TCGOutOpAddSubCarry, outop_subbio),
1245 OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1246
1247 [INDEX_op_goto_ptr] = &outop_goto_ptr,
1248
1249 #if TCG_TARGET_REG_BITS == 32
1250 OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2),
1251 OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2),
1252 #else
1253 OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64),
1254 OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64),
1255 OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64),
1256 OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32),
1257 OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32),
1258 OUTOP(INDEX_op_ld32u, TCGOutOpLoad, outop_ld32u),
1259 OUTOP(INDEX_op_ld32s, TCGOutOpLoad, outop_ld32s),
1260 OUTOP(INDEX_op_st32, TCGOutOpStore, outop_st),
1261 #endif
1262 };
1263
1264 #undef OUTOP
1265
1266 /*
1267 * All TCG threads except the parent (i.e. the one that called tcg_context_init
1268 * and registered the target's TCG globals) must register with this function
1269 * before initiating translation.
1270 *
1271 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1272 * of tcg_region_init() for the reasoning behind this.
1273 *
1274 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1275 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1276 * is not used anymore for translation once this function is called.
1277 *
1278 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1279 * iterates over the array (e.g. tcg_code_size() the same for both system/user
1280 * modes.
1281 */
1282 #ifdef CONFIG_USER_ONLY
tcg_register_thread(void)1283 void tcg_register_thread(void)
1284 {
1285 tcg_ctx = &tcg_init_ctx;
1286 }
1287 #else
tcg_register_thread(void)1288 void tcg_register_thread(void)
1289 {
1290 TCGContext *s = g_malloc(sizeof(*s));
1291 unsigned int i, n;
1292
1293 *s = tcg_init_ctx;
1294
1295 /* Relink mem_base. */
1296 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1297 if (tcg_init_ctx.temps[i].mem_base) {
1298 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1299 tcg_debug_assert(b >= 0 && b < n);
1300 s->temps[i].mem_base = &s->temps[b];
1301 }
1302 }
1303
1304 /* Claim an entry in tcg_ctxs */
1305 n = qatomic_fetch_inc(&tcg_cur_ctxs);
1306 g_assert(n < tcg_max_ctxs);
1307 qatomic_set(&tcg_ctxs[n], s);
1308
1309 if (n > 0) {
1310 tcg_region_initial_alloc(s);
1311 }
1312
1313 tcg_ctx = s;
1314 }
1315 #endif /* !CONFIG_USER_ONLY */
1316
1317 /* pool based memory allocation */
tcg_malloc_internal(TCGContext * s,int size)1318 void *tcg_malloc_internal(TCGContext *s, int size)
1319 {
1320 TCGPool *p;
1321 int pool_size;
1322
1323 if (size > TCG_POOL_CHUNK_SIZE) {
1324 /* big malloc: insert a new pool (XXX: could optimize) */
1325 p = g_malloc(sizeof(TCGPool) + size);
1326 p->size = size;
1327 p->next = s->pool_first_large;
1328 s->pool_first_large = p;
1329 return p->data;
1330 } else {
1331 p = s->pool_current;
1332 if (!p) {
1333 p = s->pool_first;
1334 if (!p) {
1335 goto new_pool;
1336 }
1337 } else {
1338 if (!p->next) {
1339 new_pool:
1340 pool_size = TCG_POOL_CHUNK_SIZE;
1341 p = g_malloc(sizeof(TCGPool) + pool_size);
1342 p->size = pool_size;
1343 p->next = NULL;
1344 if (s->pool_current) {
1345 s->pool_current->next = p;
1346 } else {
1347 s->pool_first = p;
1348 }
1349 } else {
1350 p = p->next;
1351 }
1352 }
1353 }
1354 s->pool_current = p;
1355 s->pool_cur = (uintptr_t)p->data + size;
1356 s->pool_end = (uintptr_t)p->data + p->size;
1357 return p->data;
1358 }
1359
tcg_pool_reset(TCGContext * s)1360 void tcg_pool_reset(TCGContext *s)
1361 {
1362 TCGPool *p, *t;
1363 for (p = s->pool_first_large; p; p = t) {
1364 t = p->next;
1365 g_free(p);
1366 }
1367 s->pool_first_large = NULL;
1368 s->pool_cur = s->pool_end = 0;
1369 s->pool_current = NULL;
1370 }
1371
1372 /*
1373 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1374 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1375 * We only use these for layout in tcg_out_ld_helper_ret and
1376 * tcg_out_st_helper_args, and share them between several of
1377 * the helpers, with the end result that it's easier to build manually.
1378 */
1379
1380 #if TCG_TARGET_REG_BITS == 32
1381 # define dh_typecode_ttl dh_typecode_i32
1382 #else
1383 # define dh_typecode_ttl dh_typecode_i64
1384 #endif
1385
1386 static TCGHelperInfo info_helper_ld32_mmu = {
1387 .flags = TCG_CALL_NO_WG,
1388 .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
1389 | dh_typemask(env, 1)
1390 | dh_typemask(i64, 2) /* uint64_t addr */
1391 | dh_typemask(i32, 3) /* unsigned oi */
1392 | dh_typemask(ptr, 4) /* uintptr_t ra */
1393 };
1394
1395 static TCGHelperInfo info_helper_ld64_mmu = {
1396 .flags = TCG_CALL_NO_WG,
1397 .typemask = dh_typemask(i64, 0) /* return uint64_t */
1398 | dh_typemask(env, 1)
1399 | dh_typemask(i64, 2) /* uint64_t addr */
1400 | dh_typemask(i32, 3) /* unsigned oi */
1401 | dh_typemask(ptr, 4) /* uintptr_t ra */
1402 };
1403
1404 static TCGHelperInfo info_helper_ld128_mmu = {
1405 .flags = TCG_CALL_NO_WG,
1406 .typemask = dh_typemask(i128, 0) /* return Int128 */
1407 | dh_typemask(env, 1)
1408 | dh_typemask(i64, 2) /* uint64_t addr */
1409 | dh_typemask(i32, 3) /* unsigned oi */
1410 | dh_typemask(ptr, 4) /* uintptr_t ra */
1411 };
1412
1413 static TCGHelperInfo info_helper_st32_mmu = {
1414 .flags = TCG_CALL_NO_WG,
1415 .typemask = dh_typemask(void, 0)
1416 | dh_typemask(env, 1)
1417 | dh_typemask(i64, 2) /* uint64_t addr */
1418 | dh_typemask(i32, 3) /* uint32_t data */
1419 | dh_typemask(i32, 4) /* unsigned oi */
1420 | dh_typemask(ptr, 5) /* uintptr_t ra */
1421 };
1422
1423 static TCGHelperInfo info_helper_st64_mmu = {
1424 .flags = TCG_CALL_NO_WG,
1425 .typemask = dh_typemask(void, 0)
1426 | dh_typemask(env, 1)
1427 | dh_typemask(i64, 2) /* uint64_t addr */
1428 | dh_typemask(i64, 3) /* uint64_t data */
1429 | dh_typemask(i32, 4) /* unsigned oi */
1430 | dh_typemask(ptr, 5) /* uintptr_t ra */
1431 };
1432
1433 static TCGHelperInfo info_helper_st128_mmu = {
1434 .flags = TCG_CALL_NO_WG,
1435 .typemask = dh_typemask(void, 0)
1436 | dh_typemask(env, 1)
1437 | dh_typemask(i64, 2) /* uint64_t addr */
1438 | dh_typemask(i128, 3) /* Int128 data */
1439 | dh_typemask(i32, 4) /* unsigned oi */
1440 | dh_typemask(ptr, 5) /* uintptr_t ra */
1441 };
1442
1443 #ifdef CONFIG_TCG_INTERPRETER
typecode_to_ffi(int argmask)1444 static ffi_type *typecode_to_ffi(int argmask)
1445 {
1446 /*
1447 * libffi does not support __int128_t, so we have forced Int128
1448 * to use the structure definition instead of the builtin type.
1449 */
1450 static ffi_type *ffi_type_i128_elements[3] = {
1451 &ffi_type_uint64,
1452 &ffi_type_uint64,
1453 NULL
1454 };
1455 static ffi_type ffi_type_i128 = {
1456 .size = 16,
1457 .alignment = __alignof__(Int128),
1458 .type = FFI_TYPE_STRUCT,
1459 .elements = ffi_type_i128_elements,
1460 };
1461
1462 switch (argmask) {
1463 case dh_typecode_void:
1464 return &ffi_type_void;
1465 case dh_typecode_i32:
1466 return &ffi_type_uint32;
1467 case dh_typecode_s32:
1468 return &ffi_type_sint32;
1469 case dh_typecode_i64:
1470 return &ffi_type_uint64;
1471 case dh_typecode_s64:
1472 return &ffi_type_sint64;
1473 case dh_typecode_ptr:
1474 return &ffi_type_pointer;
1475 case dh_typecode_i128:
1476 return &ffi_type_i128;
1477 }
1478 g_assert_not_reached();
1479 }
1480
init_ffi_layout(TCGHelperInfo * info)1481 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1482 {
1483 unsigned typemask = info->typemask;
1484 struct {
1485 ffi_cif cif;
1486 ffi_type *args[];
1487 } *ca;
1488 ffi_status status;
1489 int nargs;
1490
1491 /* Ignoring the return type, find the last non-zero field. */
1492 nargs = 32 - clz32(typemask >> 3);
1493 nargs = DIV_ROUND_UP(nargs, 3);
1494 assert(nargs <= MAX_CALL_IARGS);
1495
1496 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1497 ca->cif.rtype = typecode_to_ffi(typemask & 7);
1498 ca->cif.nargs = nargs;
1499
1500 if (nargs != 0) {
1501 ca->cif.arg_types = ca->args;
1502 for (int j = 0; j < nargs; ++j) {
1503 int typecode = extract32(typemask, (j + 1) * 3, 3);
1504 ca->args[j] = typecode_to_ffi(typecode);
1505 }
1506 }
1507
1508 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1509 ca->cif.rtype, ca->cif.arg_types);
1510 assert(status == FFI_OK);
1511
1512 return &ca->cif;
1513 }
1514
1515 #define HELPER_INFO_INIT(I) (&(I)->cif)
1516 #define HELPER_INFO_INIT_VAL(I) init_ffi_layout(I)
1517 #else
1518 #define HELPER_INFO_INIT(I) (&(I)->init)
1519 #define HELPER_INFO_INIT_VAL(I) 1
1520 #endif /* CONFIG_TCG_INTERPRETER */
1521
arg_slot_reg_p(unsigned arg_slot)1522 static inline bool arg_slot_reg_p(unsigned arg_slot)
1523 {
1524 /*
1525 * Split the sizeof away from the comparison to avoid Werror from
1526 * "unsigned < 0 is always false", when iarg_regs is empty.
1527 */
1528 unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1529 return arg_slot < nreg;
1530 }
1531
arg_slot_stk_ofs(unsigned arg_slot)1532 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1533 {
1534 unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1535 unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1536
1537 tcg_debug_assert(stk_slot < max);
1538 return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1539 }
1540
1541 typedef struct TCGCumulativeArgs {
1542 int arg_idx; /* tcg_gen_callN args[] */
1543 int info_in_idx; /* TCGHelperInfo in[] */
1544 int arg_slot; /* regs+stack slot */
1545 int ref_slot; /* stack slots for references */
1546 } TCGCumulativeArgs;
1547
layout_arg_even(TCGCumulativeArgs * cum)1548 static void layout_arg_even(TCGCumulativeArgs *cum)
1549 {
1550 cum->arg_slot += cum->arg_slot & 1;
1551 }
1552
layout_arg_1(TCGCumulativeArgs * cum,TCGHelperInfo * info,TCGCallArgumentKind kind)1553 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1554 TCGCallArgumentKind kind)
1555 {
1556 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1557
1558 *loc = (TCGCallArgumentLoc){
1559 .kind = kind,
1560 .arg_idx = cum->arg_idx,
1561 .arg_slot = cum->arg_slot,
1562 };
1563 cum->info_in_idx++;
1564 cum->arg_slot++;
1565 }
1566
layout_arg_normal_n(TCGCumulativeArgs * cum,TCGHelperInfo * info,int n)1567 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1568 TCGHelperInfo *info, int n)
1569 {
1570 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1571
1572 for (int i = 0; i < n; ++i) {
1573 /* Layout all using the same arg_idx, adjusting the subindex. */
1574 loc[i] = (TCGCallArgumentLoc){
1575 .kind = TCG_CALL_ARG_NORMAL,
1576 .arg_idx = cum->arg_idx,
1577 .tmp_subindex = i,
1578 .arg_slot = cum->arg_slot + i,
1579 };
1580 }
1581 cum->info_in_idx += n;
1582 cum->arg_slot += n;
1583 }
1584
layout_arg_by_ref(TCGCumulativeArgs * cum,TCGHelperInfo * info)1585 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1586 {
1587 TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1588 int n = 128 / TCG_TARGET_REG_BITS;
1589
1590 /* The first subindex carries the pointer. */
1591 layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1592
1593 /*
1594 * The callee is allowed to clobber memory associated with
1595 * structure pass by-reference. Therefore we must make copies.
1596 * Allocate space from "ref_slot", which will be adjusted to
1597 * follow the parameters on the stack.
1598 */
1599 loc[0].ref_slot = cum->ref_slot;
1600
1601 /*
1602 * Subsequent words also go into the reference slot, but
1603 * do not accumulate into the regular arguments.
1604 */
1605 for (int i = 1; i < n; ++i) {
1606 loc[i] = (TCGCallArgumentLoc){
1607 .kind = TCG_CALL_ARG_BY_REF_N,
1608 .arg_idx = cum->arg_idx,
1609 .tmp_subindex = i,
1610 .ref_slot = cum->ref_slot + i,
1611 };
1612 }
1613 cum->info_in_idx += n - 1; /* i=0 accounted for in layout_arg_1 */
1614 cum->ref_slot += n;
1615 }
1616
init_call_layout(TCGHelperInfo * info)1617 static void init_call_layout(TCGHelperInfo *info)
1618 {
1619 int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1620 int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1621 unsigned typemask = info->typemask;
1622 unsigned typecode;
1623 TCGCumulativeArgs cum = { };
1624
1625 /*
1626 * Parse and place any function return value.
1627 */
1628 typecode = typemask & 7;
1629 switch (typecode) {
1630 case dh_typecode_void:
1631 info->nr_out = 0;
1632 break;
1633 case dh_typecode_i32:
1634 case dh_typecode_s32:
1635 case dh_typecode_ptr:
1636 info->nr_out = 1;
1637 info->out_kind = TCG_CALL_RET_NORMAL;
1638 break;
1639 case dh_typecode_i64:
1640 case dh_typecode_s64:
1641 info->nr_out = 64 / TCG_TARGET_REG_BITS;
1642 info->out_kind = TCG_CALL_RET_NORMAL;
1643 /* Query the last register now to trigger any assert early. */
1644 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1645 break;
1646 case dh_typecode_i128:
1647 info->nr_out = 128 / TCG_TARGET_REG_BITS;
1648 info->out_kind = TCG_TARGET_CALL_RET_I128;
1649 switch (TCG_TARGET_CALL_RET_I128) {
1650 case TCG_CALL_RET_NORMAL:
1651 /* Query the last register now to trigger any assert early. */
1652 tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1653 break;
1654 case TCG_CALL_RET_BY_VEC:
1655 /* Query the single register now to trigger any assert early. */
1656 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1657 break;
1658 case TCG_CALL_RET_BY_REF:
1659 /*
1660 * Allocate the first argument to the output.
1661 * We don't need to store this anywhere, just make it
1662 * unavailable for use in the input loop below.
1663 */
1664 cum.arg_slot = 1;
1665 break;
1666 default:
1667 qemu_build_not_reached();
1668 }
1669 break;
1670 default:
1671 g_assert_not_reached();
1672 }
1673
1674 /*
1675 * Parse and place function arguments.
1676 */
1677 for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1678 TCGCallArgumentKind kind;
1679 TCGType type;
1680
1681 typecode = typemask & 7;
1682 switch (typecode) {
1683 case dh_typecode_i32:
1684 case dh_typecode_s32:
1685 type = TCG_TYPE_I32;
1686 break;
1687 case dh_typecode_i64:
1688 case dh_typecode_s64:
1689 type = TCG_TYPE_I64;
1690 break;
1691 case dh_typecode_ptr:
1692 type = TCG_TYPE_PTR;
1693 break;
1694 case dh_typecode_i128:
1695 type = TCG_TYPE_I128;
1696 break;
1697 default:
1698 g_assert_not_reached();
1699 }
1700
1701 switch (type) {
1702 case TCG_TYPE_I32:
1703 switch (TCG_TARGET_CALL_ARG_I32) {
1704 case TCG_CALL_ARG_EVEN:
1705 layout_arg_even(&cum);
1706 /* fall through */
1707 case TCG_CALL_ARG_NORMAL:
1708 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1709 break;
1710 case TCG_CALL_ARG_EXTEND:
1711 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1712 layout_arg_1(&cum, info, kind);
1713 break;
1714 default:
1715 qemu_build_not_reached();
1716 }
1717 break;
1718
1719 case TCG_TYPE_I64:
1720 switch (TCG_TARGET_CALL_ARG_I64) {
1721 case TCG_CALL_ARG_EVEN:
1722 layout_arg_even(&cum);
1723 /* fall through */
1724 case TCG_CALL_ARG_NORMAL:
1725 if (TCG_TARGET_REG_BITS == 32) {
1726 layout_arg_normal_n(&cum, info, 2);
1727 } else {
1728 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1729 }
1730 break;
1731 default:
1732 qemu_build_not_reached();
1733 }
1734 break;
1735
1736 case TCG_TYPE_I128:
1737 switch (TCG_TARGET_CALL_ARG_I128) {
1738 case TCG_CALL_ARG_EVEN:
1739 layout_arg_even(&cum);
1740 /* fall through */
1741 case TCG_CALL_ARG_NORMAL:
1742 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1743 break;
1744 case TCG_CALL_ARG_BY_REF:
1745 layout_arg_by_ref(&cum, info);
1746 break;
1747 default:
1748 qemu_build_not_reached();
1749 }
1750 break;
1751
1752 default:
1753 g_assert_not_reached();
1754 }
1755 }
1756 info->nr_in = cum.info_in_idx;
1757
1758 /* Validate that we didn't overrun the input array. */
1759 assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1760 /* Validate the backend has enough argument space. */
1761 assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1762
1763 /*
1764 * Relocate the "ref_slot" area to the end of the parameters.
1765 * Minimizing this stack offset helps code size for x86,
1766 * which has a signed 8-bit offset encoding.
1767 */
1768 if (cum.ref_slot != 0) {
1769 int ref_base = 0;
1770
1771 if (cum.arg_slot > max_reg_slots) {
1772 int align = __alignof(Int128) / sizeof(tcg_target_long);
1773
1774 ref_base = cum.arg_slot - max_reg_slots;
1775 if (align > 1) {
1776 ref_base = ROUND_UP(ref_base, align);
1777 }
1778 }
1779 assert(ref_base + cum.ref_slot <= max_stk_slots);
1780 ref_base += max_reg_slots;
1781
1782 if (ref_base != 0) {
1783 for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1784 TCGCallArgumentLoc *loc = &info->in[i];
1785 switch (loc->kind) {
1786 case TCG_CALL_ARG_BY_REF:
1787 case TCG_CALL_ARG_BY_REF_N:
1788 loc->ref_slot += ref_base;
1789 break;
1790 default:
1791 break;
1792 }
1793 }
1794 }
1795 }
1796 }
1797
1798 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1799 static void process_constraint_sets(void);
1800 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1801 TCGReg reg, const char *name);
1802
tcg_context_init(unsigned max_threads)1803 static void tcg_context_init(unsigned max_threads)
1804 {
1805 TCGContext *s = &tcg_init_ctx;
1806 int n, i;
1807 TCGTemp *ts;
1808
1809 memset(s, 0, sizeof(*s));
1810 s->nb_globals = 0;
1811
1812 init_call_layout(&info_helper_ld32_mmu);
1813 init_call_layout(&info_helper_ld64_mmu);
1814 init_call_layout(&info_helper_ld128_mmu);
1815 init_call_layout(&info_helper_st32_mmu);
1816 init_call_layout(&info_helper_st64_mmu);
1817 init_call_layout(&info_helper_st128_mmu);
1818
1819 tcg_target_init(s);
1820 process_constraint_sets();
1821
1822 /* Reverse the order of the saved registers, assuming they're all at
1823 the start of tcg_target_reg_alloc_order. */
1824 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1825 int r = tcg_target_reg_alloc_order[n];
1826 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1827 break;
1828 }
1829 }
1830 for (i = 0; i < n; ++i) {
1831 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1832 }
1833 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1834 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1835 }
1836
1837 tcg_ctx = s;
1838 /*
1839 * In user-mode we simply share the init context among threads, since we
1840 * use a single region. See the documentation tcg_region_init() for the
1841 * reasoning behind this.
1842 * In system-mode we will have at most max_threads TCG threads.
1843 */
1844 #ifdef CONFIG_USER_ONLY
1845 tcg_ctxs = &tcg_ctx;
1846 tcg_cur_ctxs = 1;
1847 tcg_max_ctxs = 1;
1848 #else
1849 tcg_max_ctxs = max_threads;
1850 tcg_ctxs = g_new0(TCGContext *, max_threads);
1851 #endif
1852
1853 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1854 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1855 tcg_env = temp_tcgv_ptr(ts);
1856 }
1857
tcg_init(size_t tb_size,int splitwx,unsigned max_threads)1858 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1859 {
1860 tcg_context_init(max_threads);
1861 tcg_region_init(tb_size, splitwx, max_threads);
1862 }
1863
1864 /*
1865 * Allocate TBs right before their corresponding translated code, making
1866 * sure that TBs and code are on different cache lines.
1867 */
tcg_tb_alloc(TCGContext * s)1868 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1869 {
1870 uintptr_t align = qemu_icache_linesize;
1871 TranslationBlock *tb;
1872 void *next;
1873
1874 retry:
1875 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1876 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1877
1878 if (unlikely(next > s->code_gen_highwater)) {
1879 if (tcg_region_alloc(s)) {
1880 return NULL;
1881 }
1882 goto retry;
1883 }
1884 qatomic_set(&s->code_gen_ptr, next);
1885 return tb;
1886 }
1887
tcg_prologue_init(void)1888 void tcg_prologue_init(void)
1889 {
1890 TCGContext *s = tcg_ctx;
1891 size_t prologue_size;
1892
1893 s->code_ptr = s->code_gen_ptr;
1894 s->code_buf = s->code_gen_ptr;
1895 s->data_gen_ptr = NULL;
1896
1897 #ifndef CONFIG_TCG_INTERPRETER
1898 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1899 #endif
1900
1901 s->pool_labels = NULL;
1902
1903 qemu_thread_jit_write();
1904 /* Generate the prologue. */
1905 tcg_target_qemu_prologue(s);
1906
1907 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1908 {
1909 int result = tcg_out_pool_finalize(s);
1910 tcg_debug_assert(result == 0);
1911 }
1912
1913 prologue_size = tcg_current_code_size(s);
1914 perf_report_prologue(s->code_gen_ptr, prologue_size);
1915
1916 #ifndef CONFIG_TCG_INTERPRETER
1917 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1918 (uintptr_t)s->code_buf, prologue_size);
1919 #endif
1920
1921 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1922 FILE *logfile = qemu_log_trylock();
1923 if (logfile) {
1924 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1925 if (s->data_gen_ptr) {
1926 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1927 size_t data_size = prologue_size - code_size;
1928 size_t i;
1929
1930 disas(logfile, s->code_gen_ptr, code_size);
1931
1932 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1933 if (sizeof(tcg_target_ulong) == 8) {
1934 fprintf(logfile,
1935 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1936 (uintptr_t)s->data_gen_ptr + i,
1937 *(uint64_t *)(s->data_gen_ptr + i));
1938 } else {
1939 fprintf(logfile,
1940 "0x%08" PRIxPTR ": .long 0x%08x\n",
1941 (uintptr_t)s->data_gen_ptr + i,
1942 *(uint32_t *)(s->data_gen_ptr + i));
1943 }
1944 }
1945 } else {
1946 disas(logfile, s->code_gen_ptr, prologue_size);
1947 }
1948 fprintf(logfile, "\n");
1949 qemu_log_unlock(logfile);
1950 }
1951 }
1952
1953 #ifndef CONFIG_TCG_INTERPRETER
1954 /*
1955 * Assert that goto_ptr is implemented completely, setting an epilogue.
1956 * For tci, we use NULL as the signal to return from the interpreter,
1957 * so skip this check.
1958 */
1959 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1960 #endif
1961
1962 tcg_region_prologue_set(s);
1963 }
1964
tcg_func_start(TCGContext * s)1965 void tcg_func_start(TCGContext *s)
1966 {
1967 tcg_pool_reset(s);
1968 s->nb_temps = s->nb_globals;
1969
1970 /* No temps have been previously allocated for size or locality. */
1971 tcg_temp_ebb_reset_freed(s);
1972
1973 /* No constant temps have been previously allocated. */
1974 for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1975 if (s->const_table[i]) {
1976 g_hash_table_remove_all(s->const_table[i]);
1977 }
1978 }
1979
1980 s->nb_ops = 0;
1981 s->nb_labels = 0;
1982 s->current_frame_offset = s->frame_start;
1983
1984 #ifdef CONFIG_DEBUG_TCG
1985 s->goto_tb_issue_mask = 0;
1986 #endif
1987
1988 QTAILQ_INIT(&s->ops);
1989 QTAILQ_INIT(&s->free_ops);
1990 s->emit_before_op = NULL;
1991 QSIMPLEQ_INIT(&s->labels);
1992
1993 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1994 }
1995
tcg_temp_alloc(TCGContext * s)1996 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1997 {
1998 int n = s->nb_temps++;
1999
2000 if (n >= TCG_MAX_TEMPS) {
2001 tcg_raise_tb_overflow(s);
2002 }
2003 return memset(&s->temps[n], 0, sizeof(TCGTemp));
2004 }
2005
tcg_global_alloc(TCGContext * s)2006 static TCGTemp *tcg_global_alloc(TCGContext *s)
2007 {
2008 TCGTemp *ts;
2009
2010 tcg_debug_assert(s->nb_globals == s->nb_temps);
2011 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
2012 s->nb_globals++;
2013 ts = tcg_temp_alloc(s);
2014 ts->kind = TEMP_GLOBAL;
2015
2016 return ts;
2017 }
2018
tcg_global_reg_new_internal(TCGContext * s,TCGType type,TCGReg reg,const char * name)2019 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
2020 TCGReg reg, const char *name)
2021 {
2022 TCGTemp *ts;
2023
2024 tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
2025
2026 ts = tcg_global_alloc(s);
2027 ts->base_type = type;
2028 ts->type = type;
2029 ts->kind = TEMP_FIXED;
2030 ts->reg = reg;
2031 ts->name = name;
2032 tcg_regset_set_reg(s->reserved_regs, reg);
2033
2034 return ts;
2035 }
2036
tcg_set_frame(TCGContext * s,TCGReg reg,intptr_t start,intptr_t size)2037 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
2038 {
2039 s->frame_start = start;
2040 s->frame_end = start + size;
2041 s->frame_temp
2042 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
2043 }
2044
tcg_global_mem_new_internal(TCGv_ptr base,intptr_t offset,const char * name,TCGType type)2045 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
2046 const char *name, TCGType type)
2047 {
2048 TCGContext *s = tcg_ctx;
2049 TCGTemp *base_ts = tcgv_ptr_temp(base);
2050 TCGTemp *ts = tcg_global_alloc(s);
2051 int indirect_reg = 0;
2052
2053 switch (base_ts->kind) {
2054 case TEMP_FIXED:
2055 break;
2056 case TEMP_GLOBAL:
2057 /* We do not support double-indirect registers. */
2058 tcg_debug_assert(!base_ts->indirect_reg);
2059 base_ts->indirect_base = 1;
2060 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
2061 ? 2 : 1);
2062 indirect_reg = 1;
2063 break;
2064 default:
2065 g_assert_not_reached();
2066 }
2067
2068 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2069 TCGTemp *ts2 = tcg_global_alloc(s);
2070 char buf[64];
2071
2072 ts->base_type = TCG_TYPE_I64;
2073 ts->type = TCG_TYPE_I32;
2074 ts->indirect_reg = indirect_reg;
2075 ts->mem_allocated = 1;
2076 ts->mem_base = base_ts;
2077 ts->mem_offset = offset;
2078 pstrcpy(buf, sizeof(buf), name);
2079 pstrcat(buf, sizeof(buf), "_0");
2080 ts->name = strdup(buf);
2081
2082 tcg_debug_assert(ts2 == ts + 1);
2083 ts2->base_type = TCG_TYPE_I64;
2084 ts2->type = TCG_TYPE_I32;
2085 ts2->indirect_reg = indirect_reg;
2086 ts2->mem_allocated = 1;
2087 ts2->mem_base = base_ts;
2088 ts2->mem_offset = offset + 4;
2089 ts2->temp_subindex = 1;
2090 pstrcpy(buf, sizeof(buf), name);
2091 pstrcat(buf, sizeof(buf), "_1");
2092 ts2->name = strdup(buf);
2093 } else {
2094 ts->base_type = type;
2095 ts->type = type;
2096 ts->indirect_reg = indirect_reg;
2097 ts->mem_allocated = 1;
2098 ts->mem_base = base_ts;
2099 ts->mem_offset = offset;
2100 ts->name = name;
2101 }
2102 return ts;
2103 }
2104
tcg_global_mem_new_i32(TCGv_ptr reg,intptr_t off,const char * name)2105 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
2106 {
2107 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
2108 return temp_tcgv_i32(ts);
2109 }
2110
tcg_global_mem_new_i64(TCGv_ptr reg,intptr_t off,const char * name)2111 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
2112 {
2113 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
2114 return temp_tcgv_i64(ts);
2115 }
2116
tcg_global_mem_new_ptr(TCGv_ptr reg,intptr_t off,const char * name)2117 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
2118 {
2119 TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
2120 return temp_tcgv_ptr(ts);
2121 }
2122
tcg_temp_new_internal(TCGType type,TCGTempKind kind)2123 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
2124 {
2125 TCGContext *s = tcg_ctx;
2126 TCGTemp *ts;
2127 int n;
2128
2129 if (kind == TEMP_EBB) {
2130 int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
2131
2132 if (idx < TCG_MAX_TEMPS) {
2133 /* There is already an available temp with the right type. */
2134 clear_bit(idx, s->free_temps[type].l);
2135
2136 ts = &s->temps[idx];
2137 ts->temp_allocated = 1;
2138 tcg_debug_assert(ts->base_type == type);
2139 tcg_debug_assert(ts->kind == kind);
2140 return ts;
2141 }
2142 } else {
2143 tcg_debug_assert(kind == TEMP_TB);
2144 }
2145
2146 switch (type) {
2147 case TCG_TYPE_I32:
2148 case TCG_TYPE_V64:
2149 case TCG_TYPE_V128:
2150 case TCG_TYPE_V256:
2151 n = 1;
2152 break;
2153 case TCG_TYPE_I64:
2154 n = 64 / TCG_TARGET_REG_BITS;
2155 break;
2156 case TCG_TYPE_I128:
2157 n = 128 / TCG_TARGET_REG_BITS;
2158 break;
2159 default:
2160 g_assert_not_reached();
2161 }
2162
2163 ts = tcg_temp_alloc(s);
2164 ts->base_type = type;
2165 ts->temp_allocated = 1;
2166 ts->kind = kind;
2167
2168 if (n == 1) {
2169 ts->type = type;
2170 } else {
2171 ts->type = TCG_TYPE_REG;
2172
2173 for (int i = 1; i < n; ++i) {
2174 TCGTemp *ts2 = tcg_temp_alloc(s);
2175
2176 tcg_debug_assert(ts2 == ts + i);
2177 ts2->base_type = type;
2178 ts2->type = TCG_TYPE_REG;
2179 ts2->temp_allocated = 1;
2180 ts2->temp_subindex = i;
2181 ts2->kind = kind;
2182 }
2183 }
2184 return ts;
2185 }
2186
tcg_temp_new_i32(void)2187 TCGv_i32 tcg_temp_new_i32(void)
2188 {
2189 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
2190 }
2191
tcg_temp_ebb_new_i32(void)2192 TCGv_i32 tcg_temp_ebb_new_i32(void)
2193 {
2194 return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
2195 }
2196
tcg_temp_new_i64(void)2197 TCGv_i64 tcg_temp_new_i64(void)
2198 {
2199 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
2200 }
2201
tcg_temp_ebb_new_i64(void)2202 TCGv_i64 tcg_temp_ebb_new_i64(void)
2203 {
2204 return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
2205 }
2206
tcg_temp_new_ptr(void)2207 TCGv_ptr tcg_temp_new_ptr(void)
2208 {
2209 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2210 }
2211
tcg_temp_ebb_new_ptr(void)2212 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2213 {
2214 return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2215 }
2216
tcg_temp_new_i128(void)2217 TCGv_i128 tcg_temp_new_i128(void)
2218 {
2219 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2220 }
2221
tcg_temp_ebb_new_i128(void)2222 TCGv_i128 tcg_temp_ebb_new_i128(void)
2223 {
2224 return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2225 }
2226
tcg_temp_new_vec(TCGType type)2227 TCGv_vec tcg_temp_new_vec(TCGType type)
2228 {
2229 TCGTemp *t;
2230
2231 #ifdef CONFIG_DEBUG_TCG
2232 switch (type) {
2233 case TCG_TYPE_V64:
2234 assert(TCG_TARGET_HAS_v64);
2235 break;
2236 case TCG_TYPE_V128:
2237 assert(TCG_TARGET_HAS_v128);
2238 break;
2239 case TCG_TYPE_V256:
2240 assert(TCG_TARGET_HAS_v256);
2241 break;
2242 default:
2243 g_assert_not_reached();
2244 }
2245 #endif
2246
2247 t = tcg_temp_new_internal(type, TEMP_EBB);
2248 return temp_tcgv_vec(t);
2249 }
2250
2251 /* Create a new temp of the same type as an existing temp. */
tcg_temp_new_vec_matching(TCGv_vec match)2252 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2253 {
2254 TCGTemp *t = tcgv_vec_temp(match);
2255
2256 tcg_debug_assert(t->temp_allocated != 0);
2257
2258 t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2259 return temp_tcgv_vec(t);
2260 }
2261
tcg_temp_free_internal(TCGTemp * ts)2262 void tcg_temp_free_internal(TCGTemp *ts)
2263 {
2264 TCGContext *s = tcg_ctx;
2265
2266 switch (ts->kind) {
2267 case TEMP_CONST:
2268 case TEMP_TB:
2269 /* Silently ignore free. */
2270 break;
2271 case TEMP_EBB:
2272 tcg_debug_assert(ts->temp_allocated != 0);
2273 ts->temp_allocated = 0;
2274 set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2275 break;
2276 default:
2277 /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2278 g_assert_not_reached();
2279 }
2280 }
2281
tcg_temp_free_i32(TCGv_i32 arg)2282 void tcg_temp_free_i32(TCGv_i32 arg)
2283 {
2284 tcg_temp_free_internal(tcgv_i32_temp(arg));
2285 }
2286
tcg_temp_free_i64(TCGv_i64 arg)2287 void tcg_temp_free_i64(TCGv_i64 arg)
2288 {
2289 tcg_temp_free_internal(tcgv_i64_temp(arg));
2290 }
2291
tcg_temp_free_i128(TCGv_i128 arg)2292 void tcg_temp_free_i128(TCGv_i128 arg)
2293 {
2294 tcg_temp_free_internal(tcgv_i128_temp(arg));
2295 }
2296
tcg_temp_free_ptr(TCGv_ptr arg)2297 void tcg_temp_free_ptr(TCGv_ptr arg)
2298 {
2299 tcg_temp_free_internal(tcgv_ptr_temp(arg));
2300 }
2301
tcg_temp_free_vec(TCGv_vec arg)2302 void tcg_temp_free_vec(TCGv_vec arg)
2303 {
2304 tcg_temp_free_internal(tcgv_vec_temp(arg));
2305 }
2306
tcg_constant_internal(TCGType type,int64_t val)2307 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2308 {
2309 TCGContext *s = tcg_ctx;
2310 GHashTable *h = s->const_table[type];
2311 TCGTemp *ts;
2312
2313 if (h == NULL) {
2314 h = g_hash_table_new(g_int64_hash, g_int64_equal);
2315 s->const_table[type] = h;
2316 }
2317
2318 ts = g_hash_table_lookup(h, &val);
2319 if (ts == NULL) {
2320 int64_t *val_ptr;
2321
2322 ts = tcg_temp_alloc(s);
2323
2324 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2325 TCGTemp *ts2 = tcg_temp_alloc(s);
2326
2327 tcg_debug_assert(ts2 == ts + 1);
2328
2329 ts->base_type = TCG_TYPE_I64;
2330 ts->type = TCG_TYPE_I32;
2331 ts->kind = TEMP_CONST;
2332 ts->temp_allocated = 1;
2333
2334 ts2->base_type = TCG_TYPE_I64;
2335 ts2->type = TCG_TYPE_I32;
2336 ts2->kind = TEMP_CONST;
2337 ts2->temp_allocated = 1;
2338 ts2->temp_subindex = 1;
2339
2340 /*
2341 * Retain the full value of the 64-bit constant in the low
2342 * part, so that the hash table works. Actual uses will
2343 * truncate the value to the low part.
2344 */
2345 ts[HOST_BIG_ENDIAN].val = val;
2346 ts[!HOST_BIG_ENDIAN].val = val >> 32;
2347 val_ptr = &ts[HOST_BIG_ENDIAN].val;
2348 } else {
2349 ts->base_type = type;
2350 ts->type = type;
2351 ts->kind = TEMP_CONST;
2352 ts->temp_allocated = 1;
2353 ts->val = val;
2354 val_ptr = &ts->val;
2355 }
2356 g_hash_table_insert(h, val_ptr, ts);
2357 }
2358
2359 return ts;
2360 }
2361
tcg_constant_i32(int32_t val)2362 TCGv_i32 tcg_constant_i32(int32_t val)
2363 {
2364 return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2365 }
2366
tcg_constant_i64(int64_t val)2367 TCGv_i64 tcg_constant_i64(int64_t val)
2368 {
2369 return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2370 }
2371
tcg_constant_vaddr(uintptr_t val)2372 TCGv_vaddr tcg_constant_vaddr(uintptr_t val)
2373 {
2374 return temp_tcgv_vaddr(tcg_constant_internal(TCG_TYPE_PTR, val));
2375 }
2376
tcg_constant_ptr_int(intptr_t val)2377 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2378 {
2379 return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2380 }
2381
tcg_constant_vec(TCGType type,unsigned vece,int64_t val)2382 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2383 {
2384 val = dup_const(vece, val);
2385 return temp_tcgv_vec(tcg_constant_internal(type, val));
2386 }
2387
tcg_constant_vec_matching(TCGv_vec match,unsigned vece,int64_t val)2388 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2389 {
2390 TCGTemp *t = tcgv_vec_temp(match);
2391
2392 tcg_debug_assert(t->temp_allocated != 0);
2393 return tcg_constant_vec(t->base_type, vece, val);
2394 }
2395
2396 #ifdef CONFIG_DEBUG_TCG
temp_idx(TCGTemp * ts)2397 size_t temp_idx(TCGTemp *ts)
2398 {
2399 ptrdiff_t n = ts - tcg_ctx->temps;
2400 assert(n >= 0 && n < tcg_ctx->nb_temps);
2401 return n;
2402 }
2403
tcgv_i32_temp(TCGv_i32 v)2404 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2405 {
2406 uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2407
2408 assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2409 assert(o % sizeof(TCGTemp) == 0);
2410
2411 return (void *)tcg_ctx + (uintptr_t)v;
2412 }
2413 #endif /* CONFIG_DEBUG_TCG */
2414
2415 /*
2416 * Return true if OP may appear in the opcode stream with TYPE.
2417 * Test the runtime variable that controls each opcode.
2418 */
tcg_op_supported(TCGOpcode op,TCGType type,unsigned flags)2419 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2420 {
2421 bool has_type;
2422
2423 switch (type) {
2424 case TCG_TYPE_I32:
2425 has_type = true;
2426 break;
2427 case TCG_TYPE_I64:
2428 has_type = TCG_TARGET_REG_BITS == 64;
2429 break;
2430 case TCG_TYPE_V64:
2431 has_type = TCG_TARGET_HAS_v64;
2432 break;
2433 case TCG_TYPE_V128:
2434 has_type = TCG_TARGET_HAS_v128;
2435 break;
2436 case TCG_TYPE_V256:
2437 has_type = TCG_TARGET_HAS_v256;
2438 break;
2439 default:
2440 has_type = false;
2441 break;
2442 }
2443
2444 switch (op) {
2445 case INDEX_op_discard:
2446 case INDEX_op_set_label:
2447 case INDEX_op_call:
2448 case INDEX_op_br:
2449 case INDEX_op_mb:
2450 case INDEX_op_insn_start:
2451 case INDEX_op_exit_tb:
2452 case INDEX_op_goto_tb:
2453 case INDEX_op_goto_ptr:
2454 return true;
2455
2456 case INDEX_op_qemu_ld:
2457 case INDEX_op_qemu_st:
2458 tcg_debug_assert(type <= TCG_TYPE_REG);
2459 return true;
2460
2461 case INDEX_op_qemu_ld2:
2462 case INDEX_op_qemu_st2:
2463 if (TCG_TARGET_REG_BITS == 32) {
2464 tcg_debug_assert(type == TCG_TYPE_I64);
2465 return true;
2466 }
2467 tcg_debug_assert(type == TCG_TYPE_I128);
2468 goto do_lookup;
2469
2470 case INDEX_op_add:
2471 case INDEX_op_and:
2472 case INDEX_op_brcond:
2473 case INDEX_op_deposit:
2474 case INDEX_op_extract:
2475 case INDEX_op_ld8u:
2476 case INDEX_op_ld8s:
2477 case INDEX_op_ld16u:
2478 case INDEX_op_ld16s:
2479 case INDEX_op_ld:
2480 case INDEX_op_mov:
2481 case INDEX_op_movcond:
2482 case INDEX_op_negsetcond:
2483 case INDEX_op_or:
2484 case INDEX_op_setcond:
2485 case INDEX_op_sextract:
2486 case INDEX_op_st8:
2487 case INDEX_op_st16:
2488 case INDEX_op_st:
2489 case INDEX_op_xor:
2490 return has_type;
2491
2492 case INDEX_op_brcond2_i32:
2493 case INDEX_op_setcond2_i32:
2494 return TCG_TARGET_REG_BITS == 32;
2495
2496 case INDEX_op_ld32u:
2497 case INDEX_op_ld32s:
2498 case INDEX_op_st32:
2499 case INDEX_op_ext_i32_i64:
2500 case INDEX_op_extu_i32_i64:
2501 case INDEX_op_extrl_i64_i32:
2502 case INDEX_op_extrh_i64_i32:
2503 return TCG_TARGET_REG_BITS == 64;
2504
2505 case INDEX_op_mov_vec:
2506 case INDEX_op_dup_vec:
2507 case INDEX_op_dupm_vec:
2508 case INDEX_op_ld_vec:
2509 case INDEX_op_st_vec:
2510 case INDEX_op_add_vec:
2511 case INDEX_op_sub_vec:
2512 case INDEX_op_and_vec:
2513 case INDEX_op_or_vec:
2514 case INDEX_op_xor_vec:
2515 case INDEX_op_cmp_vec:
2516 return has_type;
2517 case INDEX_op_dup2_vec:
2518 return has_type && TCG_TARGET_REG_BITS == 32;
2519 case INDEX_op_not_vec:
2520 return has_type && TCG_TARGET_HAS_not_vec;
2521 case INDEX_op_neg_vec:
2522 return has_type && TCG_TARGET_HAS_neg_vec;
2523 case INDEX_op_abs_vec:
2524 return has_type && TCG_TARGET_HAS_abs_vec;
2525 case INDEX_op_andc_vec:
2526 return has_type && TCG_TARGET_HAS_andc_vec;
2527 case INDEX_op_orc_vec:
2528 return has_type && TCG_TARGET_HAS_orc_vec;
2529 case INDEX_op_nand_vec:
2530 return has_type && TCG_TARGET_HAS_nand_vec;
2531 case INDEX_op_nor_vec:
2532 return has_type && TCG_TARGET_HAS_nor_vec;
2533 case INDEX_op_eqv_vec:
2534 return has_type && TCG_TARGET_HAS_eqv_vec;
2535 case INDEX_op_mul_vec:
2536 return has_type && TCG_TARGET_HAS_mul_vec;
2537 case INDEX_op_shli_vec:
2538 case INDEX_op_shri_vec:
2539 case INDEX_op_sari_vec:
2540 return has_type && TCG_TARGET_HAS_shi_vec;
2541 case INDEX_op_shls_vec:
2542 case INDEX_op_shrs_vec:
2543 case INDEX_op_sars_vec:
2544 return has_type && TCG_TARGET_HAS_shs_vec;
2545 case INDEX_op_shlv_vec:
2546 case INDEX_op_shrv_vec:
2547 case INDEX_op_sarv_vec:
2548 return has_type && TCG_TARGET_HAS_shv_vec;
2549 case INDEX_op_rotli_vec:
2550 return has_type && TCG_TARGET_HAS_roti_vec;
2551 case INDEX_op_rotls_vec:
2552 return has_type && TCG_TARGET_HAS_rots_vec;
2553 case INDEX_op_rotlv_vec:
2554 case INDEX_op_rotrv_vec:
2555 return has_type && TCG_TARGET_HAS_rotv_vec;
2556 case INDEX_op_ssadd_vec:
2557 case INDEX_op_usadd_vec:
2558 case INDEX_op_sssub_vec:
2559 case INDEX_op_ussub_vec:
2560 return has_type && TCG_TARGET_HAS_sat_vec;
2561 case INDEX_op_smin_vec:
2562 case INDEX_op_umin_vec:
2563 case INDEX_op_smax_vec:
2564 case INDEX_op_umax_vec:
2565 return has_type && TCG_TARGET_HAS_minmax_vec;
2566 case INDEX_op_bitsel_vec:
2567 return has_type && TCG_TARGET_HAS_bitsel_vec;
2568 case INDEX_op_cmpsel_vec:
2569 return has_type && TCG_TARGET_HAS_cmpsel_vec;
2570
2571 default:
2572 if (op < INDEX_op_last_generic) {
2573 const TCGOutOp *outop;
2574 TCGConstraintSetIndex con_set;
2575
2576 if (!has_type) {
2577 return false;
2578 }
2579
2580 do_lookup:
2581 outop = all_outop[op];
2582 tcg_debug_assert(outop != NULL);
2583
2584 con_set = outop->static_constraint;
2585 if (con_set == C_Dynamic) {
2586 con_set = outop->dynamic_constraint(type, flags);
2587 }
2588 if (con_set >= 0) {
2589 return true;
2590 }
2591 tcg_debug_assert(con_set == C_NotImplemented);
2592 return false;
2593 }
2594 tcg_debug_assert(op < NB_OPS);
2595 return true;
2596
2597 case INDEX_op_last_generic:
2598 g_assert_not_reached();
2599 }
2600 }
2601
tcg_op_deposit_valid(TCGType type,unsigned ofs,unsigned len)2602 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2603 {
2604 unsigned width;
2605
2606 tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2607 width = (type == TCG_TYPE_I32 ? 32 : 64);
2608
2609 tcg_debug_assert(ofs < width);
2610 tcg_debug_assert(len > 0);
2611 tcg_debug_assert(len <= width - ofs);
2612
2613 return TCG_TARGET_deposit_valid(type, ofs, len);
2614 }
2615
2616 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2617
tcg_gen_callN(void * func,TCGHelperInfo * info,TCGTemp * ret,TCGTemp ** args)2618 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2619 TCGTemp *ret, TCGTemp **args)
2620 {
2621 TCGv_i64 extend_free[MAX_CALL_IARGS];
2622 int n_extend = 0;
2623 TCGOp *op;
2624 int i, n, pi = 0, total_args;
2625
2626 if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2627 init_call_layout(info);
2628 g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2629 }
2630
2631 total_args = info->nr_out + info->nr_in + 2;
2632 op = tcg_op_alloc(INDEX_op_call, total_args);
2633
2634 #ifdef CONFIG_PLUGIN
2635 /* Flag helpers that may affect guest state */
2636 if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2637 tcg_ctx->plugin_insn->calls_helpers = true;
2638 }
2639 #endif
2640
2641 TCGOP_CALLO(op) = n = info->nr_out;
2642 switch (n) {
2643 case 0:
2644 tcg_debug_assert(ret == NULL);
2645 break;
2646 case 1:
2647 tcg_debug_assert(ret != NULL);
2648 op->args[pi++] = temp_arg(ret);
2649 break;
2650 case 2:
2651 case 4:
2652 tcg_debug_assert(ret != NULL);
2653 tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2654 tcg_debug_assert(ret->temp_subindex == 0);
2655 for (i = 0; i < n; ++i) {
2656 op->args[pi++] = temp_arg(ret + i);
2657 }
2658 break;
2659 default:
2660 g_assert_not_reached();
2661 }
2662
2663 TCGOP_CALLI(op) = n = info->nr_in;
2664 for (i = 0; i < n; i++) {
2665 const TCGCallArgumentLoc *loc = &info->in[i];
2666 TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2667
2668 switch (loc->kind) {
2669 case TCG_CALL_ARG_NORMAL:
2670 case TCG_CALL_ARG_BY_REF:
2671 case TCG_CALL_ARG_BY_REF_N:
2672 op->args[pi++] = temp_arg(ts);
2673 break;
2674
2675 case TCG_CALL_ARG_EXTEND_U:
2676 case TCG_CALL_ARG_EXTEND_S:
2677 {
2678 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2679 TCGv_i32 orig = temp_tcgv_i32(ts);
2680
2681 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2682 tcg_gen_ext_i32_i64(temp, orig);
2683 } else {
2684 tcg_gen_extu_i32_i64(temp, orig);
2685 }
2686 op->args[pi++] = tcgv_i64_arg(temp);
2687 extend_free[n_extend++] = temp;
2688 }
2689 break;
2690
2691 default:
2692 g_assert_not_reached();
2693 }
2694 }
2695 op->args[pi++] = (uintptr_t)func;
2696 op->args[pi++] = (uintptr_t)info;
2697 tcg_debug_assert(pi == total_args);
2698
2699 if (tcg_ctx->emit_before_op) {
2700 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2701 } else {
2702 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2703 }
2704
2705 tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2706 for (i = 0; i < n_extend; ++i) {
2707 tcg_temp_free_i64(extend_free[i]);
2708 }
2709 }
2710
tcg_gen_call0(void * func,TCGHelperInfo * info,TCGTemp * ret)2711 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2712 {
2713 tcg_gen_callN(func, info, ret, NULL);
2714 }
2715
tcg_gen_call1(void * func,TCGHelperInfo * info,TCGTemp * ret,TCGTemp * t1)2716 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2717 {
2718 tcg_gen_callN(func, info, ret, &t1);
2719 }
2720
tcg_gen_call2(void * func,TCGHelperInfo * info,TCGTemp * ret,TCGTemp * t1,TCGTemp * t2)2721 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2722 TCGTemp *t1, TCGTemp *t2)
2723 {
2724 TCGTemp *args[2] = { t1, t2 };
2725 tcg_gen_callN(func, info, ret, args);
2726 }
2727
tcg_gen_call3(void * func,TCGHelperInfo * info,TCGTemp * ret,TCGTemp * t1,TCGTemp * t2,TCGTemp * t3)2728 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2729 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2730 {
2731 TCGTemp *args[3] = { t1, t2, t3 };
2732 tcg_gen_callN(func, info, ret, args);
2733 }
2734
tcg_gen_call4(void * func,TCGHelperInfo * info,TCGTemp * ret,TCGTemp * t1,TCGTemp * t2,TCGTemp * t3,TCGTemp * t4)2735 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2736 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2737 {
2738 TCGTemp *args[4] = { t1, t2, t3, t4 };
2739 tcg_gen_callN(func, info, ret, args);
2740 }
2741
tcg_gen_call5(void * func,TCGHelperInfo * info,TCGTemp * ret,TCGTemp * t1,TCGTemp * t2,TCGTemp * t3,TCGTemp * t4,TCGTemp * t5)2742 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2743 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2744 {
2745 TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2746 tcg_gen_callN(func, info, ret, args);
2747 }
2748
tcg_gen_call6(void * func,TCGHelperInfo * info,TCGTemp * ret,TCGTemp * t1,TCGTemp * t2,TCGTemp * t3,TCGTemp * t4,TCGTemp * t5,TCGTemp * t6)2749 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2750 TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2751 TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2752 {
2753 TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2754 tcg_gen_callN(func, info, ret, args);
2755 }
2756
tcg_gen_call7(void * func,TCGHelperInfo * info,TCGTemp * ret,TCGTemp * t1,TCGTemp * t2,TCGTemp * t3,TCGTemp * t4,TCGTemp * t5,TCGTemp * t6,TCGTemp * t7)2757 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2758 TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2759 TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2760 {
2761 TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2762 tcg_gen_callN(func, info, ret, args);
2763 }
2764
tcg_reg_alloc_start(TCGContext * s)2765 static void tcg_reg_alloc_start(TCGContext *s)
2766 {
2767 int i, n;
2768
2769 for (i = 0, n = s->nb_temps; i < n; i++) {
2770 TCGTemp *ts = &s->temps[i];
2771 TCGTempVal val = TEMP_VAL_MEM;
2772
2773 switch (ts->kind) {
2774 case TEMP_CONST:
2775 val = TEMP_VAL_CONST;
2776 break;
2777 case TEMP_FIXED:
2778 val = TEMP_VAL_REG;
2779 break;
2780 case TEMP_GLOBAL:
2781 break;
2782 case TEMP_EBB:
2783 val = TEMP_VAL_DEAD;
2784 /* fall through */
2785 case TEMP_TB:
2786 ts->mem_allocated = 0;
2787 break;
2788 default:
2789 g_assert_not_reached();
2790 }
2791 ts->val_type = val;
2792 }
2793
2794 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2795 }
2796
tcg_get_arg_str_ptr(TCGContext * s,char * buf,int buf_size,TCGTemp * ts)2797 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2798 TCGTemp *ts)
2799 {
2800 int idx = temp_idx(ts);
2801
2802 switch (ts->kind) {
2803 case TEMP_FIXED:
2804 case TEMP_GLOBAL:
2805 pstrcpy(buf, buf_size, ts->name);
2806 break;
2807 case TEMP_TB:
2808 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2809 break;
2810 case TEMP_EBB:
2811 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2812 break;
2813 case TEMP_CONST:
2814 switch (ts->type) {
2815 case TCG_TYPE_I32:
2816 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2817 break;
2818 #if TCG_TARGET_REG_BITS > 32
2819 case TCG_TYPE_I64:
2820 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2821 break;
2822 #endif
2823 case TCG_TYPE_V64:
2824 case TCG_TYPE_V128:
2825 case TCG_TYPE_V256:
2826 snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2827 64 << (ts->type - TCG_TYPE_V64), ts->val);
2828 break;
2829 default:
2830 g_assert_not_reached();
2831 }
2832 break;
2833 }
2834 return buf;
2835 }
2836
tcg_get_arg_str(TCGContext * s,char * buf,int buf_size,TCGArg arg)2837 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2838 int buf_size, TCGArg arg)
2839 {
2840 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2841 }
2842
2843 static const char * const cond_name[] =
2844 {
2845 [TCG_COND_NEVER] = "never",
2846 [TCG_COND_ALWAYS] = "always",
2847 [TCG_COND_EQ] = "eq",
2848 [TCG_COND_NE] = "ne",
2849 [TCG_COND_LT] = "lt",
2850 [TCG_COND_GE] = "ge",
2851 [TCG_COND_LE] = "le",
2852 [TCG_COND_GT] = "gt",
2853 [TCG_COND_LTU] = "ltu",
2854 [TCG_COND_GEU] = "geu",
2855 [TCG_COND_LEU] = "leu",
2856 [TCG_COND_GTU] = "gtu",
2857 [TCG_COND_TSTEQ] = "tsteq",
2858 [TCG_COND_TSTNE] = "tstne",
2859 };
2860
2861 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2862 {
2863 [MO_UB] = "ub",
2864 [MO_SB] = "sb",
2865 [MO_LEUW] = "leuw",
2866 [MO_LESW] = "lesw",
2867 [MO_LEUL] = "leul",
2868 [MO_LESL] = "lesl",
2869 [MO_LEUQ] = "leq",
2870 [MO_BEUW] = "beuw",
2871 [MO_BESW] = "besw",
2872 [MO_BEUL] = "beul",
2873 [MO_BESL] = "besl",
2874 [MO_BEUQ] = "beq",
2875 [MO_128 + MO_BE] = "beo",
2876 [MO_128 + MO_LE] = "leo",
2877 };
2878
2879 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2880 [MO_UNALN >> MO_ASHIFT] = "un+",
2881 [MO_ALIGN >> MO_ASHIFT] = "al+",
2882 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
2883 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
2884 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
2885 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2886 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2887 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2888 };
2889
2890 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2891 [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2892 [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2893 [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2894 [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2895 [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2896 [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2897 };
2898
2899 static const char bswap_flag_name[][6] = {
2900 [TCG_BSWAP_IZ] = "iz",
2901 [TCG_BSWAP_OZ] = "oz",
2902 [TCG_BSWAP_OS] = "os",
2903 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2904 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2905 };
2906
2907 #ifdef CONFIG_PLUGIN
2908 static const char * const plugin_from_name[] = {
2909 "from-tb",
2910 "from-insn",
2911 "after-insn",
2912 "after-tb",
2913 };
2914 #endif
2915
tcg_regset_single(TCGRegSet d)2916 static inline bool tcg_regset_single(TCGRegSet d)
2917 {
2918 return (d & (d - 1)) == 0;
2919 }
2920
tcg_regset_first(TCGRegSet d)2921 static inline TCGReg tcg_regset_first(TCGRegSet d)
2922 {
2923 if (TCG_TARGET_NB_REGS <= 32) {
2924 return ctz32(d);
2925 } else {
2926 return ctz64(d);
2927 }
2928 }
2929
2930 /* Return only the number of characters output -- no error return. */
2931 #define ne_fprintf(...) \
2932 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2933
tcg_dump_ops(TCGContext * s,FILE * f,bool have_prefs)2934 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2935 {
2936 char buf[128];
2937 TCGOp *op;
2938
2939 QTAILQ_FOREACH(op, &s->ops, link) {
2940 int i, k, nb_oargs, nb_iargs, nb_cargs;
2941 const TCGOpDef *def;
2942 TCGOpcode c;
2943 int col = 0;
2944
2945 c = op->opc;
2946 def = &tcg_op_defs[c];
2947
2948 if (c == INDEX_op_insn_start) {
2949 nb_oargs = 0;
2950 col += ne_fprintf(f, "\n ----");
2951
2952 for (i = 0, k = INSN_START_WORDS; i < k; ++i) {
2953 col += ne_fprintf(f, " %016" PRIx64,
2954 tcg_get_insn_start_param(op, i));
2955 }
2956 } else if (c == INDEX_op_call) {
2957 const TCGHelperInfo *info = tcg_call_info(op);
2958 void *func = tcg_call_func(op);
2959
2960 /* variable number of arguments */
2961 nb_oargs = TCGOP_CALLO(op);
2962 nb_iargs = TCGOP_CALLI(op);
2963 nb_cargs = def->nb_cargs;
2964
2965 col += ne_fprintf(f, " %s ", def->name);
2966
2967 /*
2968 * Print the function name from TCGHelperInfo, if available.
2969 * Note that plugins have a template function for the info,
2970 * but the actual function pointer comes from the plugin.
2971 */
2972 if (func == info->func) {
2973 col += ne_fprintf(f, "%s", info->name);
2974 } else {
2975 col += ne_fprintf(f, "plugin(%p)", func);
2976 }
2977
2978 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2979 for (i = 0; i < nb_oargs; i++) {
2980 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2981 op->args[i]));
2982 }
2983 for (i = 0; i < nb_iargs; i++) {
2984 TCGArg arg = op->args[nb_oargs + i];
2985 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2986 col += ne_fprintf(f, ",%s", t);
2987 }
2988 } else {
2989 if (def->flags & TCG_OPF_INT) {
2990 col += ne_fprintf(f, " %s_i%d ",
2991 def->name,
2992 8 * tcg_type_size(TCGOP_TYPE(op)));
2993 } else if (def->flags & TCG_OPF_VECTOR) {
2994 col += ne_fprintf(f, "%s v%d,e%d,",
2995 def->name,
2996 8 * tcg_type_size(TCGOP_TYPE(op)),
2997 8 << TCGOP_VECE(op));
2998 } else {
2999 col += ne_fprintf(f, " %s ", def->name);
3000 }
3001
3002 nb_oargs = def->nb_oargs;
3003 nb_iargs = def->nb_iargs;
3004 nb_cargs = def->nb_cargs;
3005
3006 k = 0;
3007 for (i = 0; i < nb_oargs; i++) {
3008 const char *sep = k ? "," : "";
3009 col += ne_fprintf(f, "%s%s", sep,
3010 tcg_get_arg_str(s, buf, sizeof(buf),
3011 op->args[k++]));
3012 }
3013 for (i = 0; i < nb_iargs; i++) {
3014 const char *sep = k ? "," : "";
3015 col += ne_fprintf(f, "%s%s", sep,
3016 tcg_get_arg_str(s, buf, sizeof(buf),
3017 op->args[k++]));
3018 }
3019 switch (c) {
3020 case INDEX_op_brcond:
3021 case INDEX_op_setcond:
3022 case INDEX_op_negsetcond:
3023 case INDEX_op_movcond:
3024 case INDEX_op_brcond2_i32:
3025 case INDEX_op_setcond2_i32:
3026 case INDEX_op_cmp_vec:
3027 case INDEX_op_cmpsel_vec:
3028 if (op->args[k] < ARRAY_SIZE(cond_name)
3029 && cond_name[op->args[k]]) {
3030 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
3031 } else {
3032 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
3033 }
3034 i = 1;
3035 break;
3036 case INDEX_op_qemu_ld:
3037 case INDEX_op_qemu_st:
3038 case INDEX_op_qemu_ld2:
3039 case INDEX_op_qemu_st2:
3040 {
3041 const char *s_al, *s_op, *s_at;
3042 MemOpIdx oi = op->args[k++];
3043 MemOp mop = get_memop(oi);
3044 unsigned ix = get_mmuidx(oi);
3045
3046 s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
3047 s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
3048 s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
3049 mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
3050
3051 /* If all fields are accounted for, print symbolically. */
3052 if (!mop && s_al && s_op && s_at) {
3053 col += ne_fprintf(f, ",%s%s%s,%u",
3054 s_at, s_al, s_op, ix);
3055 } else {
3056 mop = get_memop(oi);
3057 col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
3058 }
3059 i = 1;
3060 }
3061 break;
3062 case INDEX_op_bswap16:
3063 case INDEX_op_bswap32:
3064 case INDEX_op_bswap64:
3065 {
3066 TCGArg flags = op->args[k];
3067 const char *name = NULL;
3068
3069 if (flags < ARRAY_SIZE(bswap_flag_name)) {
3070 name = bswap_flag_name[flags];
3071 }
3072 if (name) {
3073 col += ne_fprintf(f, ",%s", name);
3074 } else {
3075 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
3076 }
3077 i = k = 1;
3078 }
3079 break;
3080 #ifdef CONFIG_PLUGIN
3081 case INDEX_op_plugin_cb:
3082 {
3083 TCGArg from = op->args[k++];
3084 const char *name = NULL;
3085
3086 if (from < ARRAY_SIZE(plugin_from_name)) {
3087 name = plugin_from_name[from];
3088 }
3089 if (name) {
3090 col += ne_fprintf(f, "%s", name);
3091 } else {
3092 col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
3093 }
3094 i = 1;
3095 }
3096 break;
3097 #endif
3098 default:
3099 i = 0;
3100 break;
3101 }
3102 switch (c) {
3103 case INDEX_op_set_label:
3104 case INDEX_op_br:
3105 case INDEX_op_brcond:
3106 case INDEX_op_brcond2_i32:
3107 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
3108 arg_label(op->args[k])->id);
3109 i++, k++;
3110 break;
3111 case INDEX_op_mb:
3112 {
3113 TCGBar membar = op->args[k];
3114 const char *b_op, *m_op;
3115
3116 switch (membar & TCG_BAR_SC) {
3117 case 0:
3118 b_op = "none";
3119 break;
3120 case TCG_BAR_LDAQ:
3121 b_op = "acq";
3122 break;
3123 case TCG_BAR_STRL:
3124 b_op = "rel";
3125 break;
3126 case TCG_BAR_SC:
3127 b_op = "seq";
3128 break;
3129 default:
3130 g_assert_not_reached();
3131 }
3132
3133 switch (membar & TCG_MO_ALL) {
3134 case 0:
3135 m_op = "none";
3136 break;
3137 case TCG_MO_LD_LD:
3138 m_op = "rr";
3139 break;
3140 case TCG_MO_LD_ST:
3141 m_op = "rw";
3142 break;
3143 case TCG_MO_ST_LD:
3144 m_op = "wr";
3145 break;
3146 case TCG_MO_ST_ST:
3147 m_op = "ww";
3148 break;
3149 case TCG_MO_LD_LD | TCG_MO_LD_ST:
3150 m_op = "rr+rw";
3151 break;
3152 case TCG_MO_LD_LD | TCG_MO_ST_LD:
3153 m_op = "rr+wr";
3154 break;
3155 case TCG_MO_LD_LD | TCG_MO_ST_ST:
3156 m_op = "rr+ww";
3157 break;
3158 case TCG_MO_LD_ST | TCG_MO_ST_LD:
3159 m_op = "rw+wr";
3160 break;
3161 case TCG_MO_LD_ST | TCG_MO_ST_ST:
3162 m_op = "rw+ww";
3163 break;
3164 case TCG_MO_ST_LD | TCG_MO_ST_ST:
3165 m_op = "wr+ww";
3166 break;
3167 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3168 m_op = "rr+rw+wr";
3169 break;
3170 case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3171 m_op = "rr+rw+ww";
3172 break;
3173 case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3174 m_op = "rr+wr+ww";
3175 break;
3176 case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3177 m_op = "rw+wr+ww";
3178 break;
3179 case TCG_MO_ALL:
3180 m_op = "all";
3181 break;
3182 default:
3183 g_assert_not_reached();
3184 }
3185
3186 col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3187 i++, k++;
3188 }
3189 break;
3190 default:
3191 break;
3192 }
3193 for (; i < nb_cargs; i++, k++) {
3194 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3195 op->args[k]);
3196 }
3197 }
3198
3199 if (have_prefs || op->life) {
3200 for (; col < 40; ++col) {
3201 putc(' ', f);
3202 }
3203 }
3204
3205 if (op->life) {
3206 unsigned life = op->life;
3207
3208 if (life & (SYNC_ARG * 3)) {
3209 ne_fprintf(f, " sync:");
3210 for (i = 0; i < 2; ++i) {
3211 if (life & (SYNC_ARG << i)) {
3212 ne_fprintf(f, " %d", i);
3213 }
3214 }
3215 }
3216 life /= DEAD_ARG;
3217 if (life) {
3218 ne_fprintf(f, " dead:");
3219 for (i = 0; life; ++i, life >>= 1) {
3220 if (life & 1) {
3221 ne_fprintf(f, " %d", i);
3222 }
3223 }
3224 }
3225 }
3226
3227 if (have_prefs) {
3228 for (i = 0; i < nb_oargs; ++i) {
3229 TCGRegSet set = output_pref(op, i);
3230
3231 if (i == 0) {
3232 ne_fprintf(f, " pref=");
3233 } else {
3234 ne_fprintf(f, ",");
3235 }
3236 if (set == 0) {
3237 ne_fprintf(f, "none");
3238 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3239 ne_fprintf(f, "all");
3240 #ifdef CONFIG_DEBUG_TCG
3241 } else if (tcg_regset_single(set)) {
3242 TCGReg reg = tcg_regset_first(set);
3243 ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3244 #endif
3245 } else if (TCG_TARGET_NB_REGS <= 32) {
3246 ne_fprintf(f, "0x%x", (uint32_t)set);
3247 } else {
3248 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3249 }
3250 }
3251 }
3252
3253 putc('\n', f);
3254 }
3255 }
3256
3257 /* we give more priority to constraints with less registers */
get_constraint_priority(const TCGArgConstraint * arg_ct,int k)3258 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3259 {
3260 int n;
3261
3262 arg_ct += k;
3263 n = ctpop64(arg_ct->regs);
3264
3265 /*
3266 * Sort constraints of a single register first, which includes output
3267 * aliases (which must exactly match the input already allocated).
3268 */
3269 if (n == 1 || arg_ct->oalias) {
3270 return INT_MAX;
3271 }
3272
3273 /*
3274 * Sort register pairs next, first then second immediately after.
3275 * Arbitrarily sort multiple pairs by the index of the first reg;
3276 * there shouldn't be many pairs.
3277 */
3278 switch (arg_ct->pair) {
3279 case 1:
3280 case 3:
3281 return (k + 1) * 2;
3282 case 2:
3283 return (arg_ct->pair_index + 1) * 2 - 1;
3284 }
3285
3286 /* Finally, sort by decreasing register count. */
3287 assert(n > 1);
3288 return -n;
3289 }
3290
3291 /* sort from highest priority to lowest */
sort_constraints(TCGArgConstraint * a,int start,int n)3292 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3293 {
3294 int i, j;
3295
3296 for (i = 0; i < n; i++) {
3297 a[start + i].sort_index = start + i;
3298 }
3299 if (n <= 1) {
3300 return;
3301 }
3302 for (i = 0; i < n - 1; i++) {
3303 for (j = i + 1; j < n; j++) {
3304 int p1 = get_constraint_priority(a, a[start + i].sort_index);
3305 int p2 = get_constraint_priority(a, a[start + j].sort_index);
3306 if (p1 < p2) {
3307 int tmp = a[start + i].sort_index;
3308 a[start + i].sort_index = a[start + j].sort_index;
3309 a[start + j].sort_index = tmp;
3310 }
3311 }
3312 }
3313 }
3314
3315 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3316 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3317
process_constraint_sets(void)3318 static void process_constraint_sets(void)
3319 {
3320 for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3321 const TCGConstraintSet *tdefs = &constraint_sets[c];
3322 TCGArgConstraint *args_ct = all_cts[c];
3323 int nb_oargs = tdefs->nb_oargs;
3324 int nb_iargs = tdefs->nb_iargs;
3325 int nb_args = nb_oargs + nb_iargs;
3326 bool saw_alias_pair = false;
3327
3328 for (int i = 0; i < nb_args; i++) {
3329 const char *ct_str = tdefs->args_ct_str[i];
3330 bool input_p = i >= nb_oargs;
3331 int o;
3332
3333 switch (*ct_str) {
3334 case '0' ... '9':
3335 o = *ct_str - '0';
3336 tcg_debug_assert(input_p);
3337 tcg_debug_assert(o < nb_oargs);
3338 tcg_debug_assert(args_ct[o].regs != 0);
3339 tcg_debug_assert(!args_ct[o].oalias);
3340 args_ct[i] = args_ct[o];
3341 /* The output sets oalias. */
3342 args_ct[o].oalias = 1;
3343 args_ct[o].alias_index = i;
3344 /* The input sets ialias. */
3345 args_ct[i].ialias = 1;
3346 args_ct[i].alias_index = o;
3347 if (args_ct[i].pair) {
3348 saw_alias_pair = true;
3349 }
3350 tcg_debug_assert(ct_str[1] == '\0');
3351 continue;
3352
3353 case '&':
3354 tcg_debug_assert(!input_p);
3355 args_ct[i].newreg = true;
3356 ct_str++;
3357 break;
3358
3359 case 'p': /* plus */
3360 /* Allocate to the register after the previous. */
3361 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3362 o = i - 1;
3363 tcg_debug_assert(!args_ct[o].pair);
3364 tcg_debug_assert(!args_ct[o].ct);
3365 args_ct[i] = (TCGArgConstraint){
3366 .pair = 2,
3367 .pair_index = o,
3368 .regs = args_ct[o].regs << 1,
3369 .newreg = args_ct[o].newreg,
3370 };
3371 args_ct[o].pair = 1;
3372 args_ct[o].pair_index = i;
3373 tcg_debug_assert(ct_str[1] == '\0');
3374 continue;
3375
3376 case 'm': /* minus */
3377 /* Allocate to the register before the previous. */
3378 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3379 o = i - 1;
3380 tcg_debug_assert(!args_ct[o].pair);
3381 tcg_debug_assert(!args_ct[o].ct);
3382 args_ct[i] = (TCGArgConstraint){
3383 .pair = 1,
3384 .pair_index = o,
3385 .regs = args_ct[o].regs >> 1,
3386 .newreg = args_ct[o].newreg,
3387 };
3388 args_ct[o].pair = 2;
3389 args_ct[o].pair_index = i;
3390 tcg_debug_assert(ct_str[1] == '\0');
3391 continue;
3392 }
3393
3394 do {
3395 switch (*ct_str) {
3396 case 'i':
3397 args_ct[i].ct |= TCG_CT_CONST;
3398 break;
3399 #ifdef TCG_REG_ZERO
3400 case 'z':
3401 args_ct[i].ct |= TCG_CT_REG_ZERO;
3402 break;
3403 #endif
3404
3405 /* Include all of the target-specific constraints. */
3406
3407 #undef CONST
3408 #define CONST(CASE, MASK) \
3409 case CASE: args_ct[i].ct |= MASK; break;
3410 #define REGS(CASE, MASK) \
3411 case CASE: args_ct[i].regs |= MASK; break;
3412
3413 #include "tcg-target-con-str.h"
3414
3415 #undef REGS
3416 #undef CONST
3417 default:
3418 case '0' ... '9':
3419 case '&':
3420 case 'p':
3421 case 'm':
3422 /* Typo in TCGConstraintSet constraint. */
3423 g_assert_not_reached();
3424 }
3425 } while (*++ct_str != '\0');
3426 }
3427
3428 /*
3429 * Fix up output pairs that are aliased with inputs.
3430 * When we created the alias, we copied pair from the output.
3431 * There are three cases:
3432 * (1a) Pairs of inputs alias pairs of outputs.
3433 * (1b) One input aliases the first of a pair of outputs.
3434 * (2) One input aliases the second of a pair of outputs.
3435 *
3436 * Case 1a is handled by making sure that the pair_index'es are
3437 * properly updated so that they appear the same as a pair of inputs.
3438 *
3439 * Case 1b is handled by setting the pair_index of the input to
3440 * itself, simply so it doesn't point to an unrelated argument.
3441 * Since we don't encounter the "second" during the input allocation
3442 * phase, nothing happens with the second half of the input pair.
3443 *
3444 * Case 2 is handled by setting the second input to pair=3, the
3445 * first output to pair=3, and the pair_index'es to match.
3446 */
3447 if (saw_alias_pair) {
3448 for (int i = nb_oargs; i < nb_args; i++) {
3449 int o, o2, i2;
3450
3451 /*
3452 * Since [0-9pm] must be alone in the constraint string,
3453 * the only way they can both be set is if the pair comes
3454 * from the output alias.
3455 */
3456 if (!args_ct[i].ialias) {
3457 continue;
3458 }
3459 switch (args_ct[i].pair) {
3460 case 0:
3461 break;
3462 case 1:
3463 o = args_ct[i].alias_index;
3464 o2 = args_ct[o].pair_index;
3465 tcg_debug_assert(args_ct[o].pair == 1);
3466 tcg_debug_assert(args_ct[o2].pair == 2);
3467 if (args_ct[o2].oalias) {
3468 /* Case 1a */
3469 i2 = args_ct[o2].alias_index;
3470 tcg_debug_assert(args_ct[i2].pair == 2);
3471 args_ct[i2].pair_index = i;
3472 args_ct[i].pair_index = i2;
3473 } else {
3474 /* Case 1b */
3475 args_ct[i].pair_index = i;
3476 }
3477 break;
3478 case 2:
3479 o = args_ct[i].alias_index;
3480 o2 = args_ct[o].pair_index;
3481 tcg_debug_assert(args_ct[o].pair == 2);
3482 tcg_debug_assert(args_ct[o2].pair == 1);
3483 if (args_ct[o2].oalias) {
3484 /* Case 1a */
3485 i2 = args_ct[o2].alias_index;
3486 tcg_debug_assert(args_ct[i2].pair == 1);
3487 args_ct[i2].pair_index = i;
3488 args_ct[i].pair_index = i2;
3489 } else {
3490 /* Case 2 */
3491 args_ct[i].pair = 3;
3492 args_ct[o2].pair = 3;
3493 args_ct[i].pair_index = o2;
3494 args_ct[o2].pair_index = i;
3495 }
3496 break;
3497 default:
3498 g_assert_not_reached();
3499 }
3500 }
3501 }
3502
3503 /* sort the constraints (XXX: this is just an heuristic) */
3504 sort_constraints(args_ct, 0, nb_oargs);
3505 sort_constraints(args_ct, nb_oargs, nb_iargs);
3506 }
3507 }
3508
opcode_args_ct(const TCGOp * op)3509 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3510 {
3511 TCGOpcode opc = op->opc;
3512 TCGType type = TCGOP_TYPE(op);
3513 unsigned flags = TCGOP_FLAGS(op);
3514 const TCGOpDef *def = &tcg_op_defs[opc];
3515 const TCGOutOp *outop = all_outop[opc];
3516 TCGConstraintSetIndex con_set;
3517
3518 if (def->flags & TCG_OPF_NOT_PRESENT) {
3519 return empty_cts;
3520 }
3521
3522 if (outop) {
3523 con_set = outop->static_constraint;
3524 if (con_set == C_Dynamic) {
3525 con_set = outop->dynamic_constraint(type, flags);
3526 }
3527 } else {
3528 con_set = tcg_target_op_def(opc, type, flags);
3529 }
3530 tcg_debug_assert(con_set >= 0);
3531 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3532
3533 /* The constraint arguments must match TCGOpcode arguments. */
3534 tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3535 tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3536
3537 return all_cts[con_set];
3538 }
3539
remove_label_use(TCGOp * op,int idx)3540 static void remove_label_use(TCGOp *op, int idx)
3541 {
3542 TCGLabel *label = arg_label(op->args[idx]);
3543 TCGLabelUse *use;
3544
3545 QSIMPLEQ_FOREACH(use, &label->branches, next) {
3546 if (use->op == op) {
3547 QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3548 return;
3549 }
3550 }
3551 g_assert_not_reached();
3552 }
3553
tcg_op_remove(TCGContext * s,TCGOp * op)3554 void tcg_op_remove(TCGContext *s, TCGOp *op)
3555 {
3556 switch (op->opc) {
3557 case INDEX_op_br:
3558 remove_label_use(op, 0);
3559 break;
3560 case INDEX_op_brcond:
3561 remove_label_use(op, 3);
3562 break;
3563 case INDEX_op_brcond2_i32:
3564 remove_label_use(op, 5);
3565 break;
3566 default:
3567 break;
3568 }
3569
3570 QTAILQ_REMOVE(&s->ops, op, link);
3571 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3572 s->nb_ops--;
3573 }
3574
tcg_remove_ops_after(TCGOp * op)3575 void tcg_remove_ops_after(TCGOp *op)
3576 {
3577 TCGContext *s = tcg_ctx;
3578
3579 while (true) {
3580 TCGOp *last = tcg_last_op();
3581 if (last == op) {
3582 return;
3583 }
3584 tcg_op_remove(s, last);
3585 }
3586 }
3587
tcg_op_alloc(TCGOpcode opc,unsigned nargs)3588 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3589 {
3590 TCGContext *s = tcg_ctx;
3591 TCGOp *op = NULL;
3592
3593 if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3594 QTAILQ_FOREACH(op, &s->free_ops, link) {
3595 if (nargs <= op->nargs) {
3596 QTAILQ_REMOVE(&s->free_ops, op, link);
3597 nargs = op->nargs;
3598 goto found;
3599 }
3600 }
3601 }
3602
3603 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3604 nargs = MAX(4, nargs);
3605 op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3606
3607 found:
3608 memset(op, 0, offsetof(TCGOp, link));
3609 op->opc = opc;
3610 op->nargs = nargs;
3611
3612 /* Check for bitfield overflow. */
3613 tcg_debug_assert(op->nargs == nargs);
3614
3615 s->nb_ops++;
3616 return op;
3617 }
3618
tcg_emit_op(TCGOpcode opc,unsigned nargs)3619 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3620 {
3621 TCGOp *op = tcg_op_alloc(opc, nargs);
3622
3623 if (tcg_ctx->emit_before_op) {
3624 QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3625 } else {
3626 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3627 }
3628 return op;
3629 }
3630
tcg_op_insert_before(TCGContext * s,TCGOp * old_op,TCGOpcode opc,TCGType type,unsigned nargs)3631 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3632 TCGOpcode opc, TCGType type, unsigned nargs)
3633 {
3634 TCGOp *new_op = tcg_op_alloc(opc, nargs);
3635
3636 TCGOP_TYPE(new_op) = type;
3637 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3638 return new_op;
3639 }
3640
tcg_op_insert_after(TCGContext * s,TCGOp * old_op,TCGOpcode opc,TCGType type,unsigned nargs)3641 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3642 TCGOpcode opc, TCGType type, unsigned nargs)
3643 {
3644 TCGOp *new_op = tcg_op_alloc(opc, nargs);
3645
3646 TCGOP_TYPE(new_op) = type;
3647 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3648 return new_op;
3649 }
3650
move_label_uses(TCGLabel * to,TCGLabel * from)3651 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3652 {
3653 TCGLabelUse *u;
3654
3655 QSIMPLEQ_FOREACH(u, &from->branches, next) {
3656 TCGOp *op = u->op;
3657 switch (op->opc) {
3658 case INDEX_op_br:
3659 op->args[0] = label_arg(to);
3660 break;
3661 case INDEX_op_brcond:
3662 op->args[3] = label_arg(to);
3663 break;
3664 case INDEX_op_brcond2_i32:
3665 op->args[5] = label_arg(to);
3666 break;
3667 default:
3668 g_assert_not_reached();
3669 }
3670 }
3671
3672 QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3673 }
3674
3675 /* Reachable analysis : remove unreachable code. */
3676 static void __attribute__((noinline))
reachable_code_pass(TCGContext * s)3677 reachable_code_pass(TCGContext *s)
3678 {
3679 TCGOp *op, *op_next, *op_prev;
3680 bool dead = false;
3681
3682 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3683 bool remove = dead;
3684 TCGLabel *label;
3685
3686 switch (op->opc) {
3687 case INDEX_op_set_label:
3688 label = arg_label(op->args[0]);
3689
3690 /*
3691 * Note that the first op in the TB is always a load,
3692 * so there is always something before a label.
3693 */
3694 op_prev = QTAILQ_PREV(op, link);
3695
3696 /*
3697 * If we find two sequential labels, move all branches to
3698 * reference the second label and remove the first label.
3699 * Do this before branch to next optimization, so that the
3700 * middle label is out of the way.
3701 */
3702 if (op_prev->opc == INDEX_op_set_label) {
3703 move_label_uses(label, arg_label(op_prev->args[0]));
3704 tcg_op_remove(s, op_prev);
3705 op_prev = QTAILQ_PREV(op, link);
3706 }
3707
3708 /*
3709 * Optimization can fold conditional branches to unconditional.
3710 * If we find a label which is preceded by an unconditional
3711 * branch to next, remove the branch. We couldn't do this when
3712 * processing the branch because any dead code between the branch
3713 * and label had not yet been removed.
3714 */
3715 if (op_prev->opc == INDEX_op_br &&
3716 label == arg_label(op_prev->args[0])) {
3717 tcg_op_remove(s, op_prev);
3718 /* Fall through means insns become live again. */
3719 dead = false;
3720 }
3721
3722 if (QSIMPLEQ_EMPTY(&label->branches)) {
3723 /*
3724 * While there is an occasional backward branch, virtually
3725 * all branches generated by the translators are forward.
3726 * Which means that generally we will have already removed
3727 * all references to the label that will be, and there is
3728 * little to be gained by iterating.
3729 */
3730 remove = true;
3731 } else {
3732 /* Once we see a label, insns become live again. */
3733 dead = false;
3734 remove = false;
3735 }
3736 break;
3737
3738 case INDEX_op_br:
3739 case INDEX_op_exit_tb:
3740 case INDEX_op_goto_ptr:
3741 /* Unconditional branches; everything following is dead. */
3742 dead = true;
3743 break;
3744
3745 case INDEX_op_call:
3746 /* Notice noreturn helper calls, raising exceptions. */
3747 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3748 dead = true;
3749 }
3750 break;
3751
3752 case INDEX_op_insn_start:
3753 /* Never remove -- we need to keep these for unwind. */
3754 remove = false;
3755 break;
3756
3757 default:
3758 break;
3759 }
3760
3761 if (remove) {
3762 tcg_op_remove(s, op);
3763 }
3764 }
3765 }
3766
3767 #define TS_DEAD 1
3768 #define TS_MEM 2
3769
3770 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
3771 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3772
3773 /* For liveness_pass_1, the register preferences for a given temp. */
la_temp_pref(TCGTemp * ts)3774 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3775 {
3776 return ts->state_ptr;
3777 }
3778
3779 /* For liveness_pass_1, reset the preferences for a given temp to the
3780 * maximal regset for its type.
3781 */
la_reset_pref(TCGTemp * ts)3782 static inline void la_reset_pref(TCGTemp *ts)
3783 {
3784 *la_temp_pref(ts)
3785 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3786 }
3787
3788 /* liveness analysis: end of function: all temps are dead, and globals
3789 should be in memory. */
la_func_end(TCGContext * s,int ng,int nt)3790 static void la_func_end(TCGContext *s, int ng, int nt)
3791 {
3792 int i;
3793
3794 for (i = 0; i < ng; ++i) {
3795 s->temps[i].state = TS_DEAD | TS_MEM;
3796 la_reset_pref(&s->temps[i]);
3797 }
3798 for (i = ng; i < nt; ++i) {
3799 s->temps[i].state = TS_DEAD;
3800 la_reset_pref(&s->temps[i]);
3801 }
3802 }
3803
3804 /* liveness analysis: end of basic block: all temps are dead, globals
3805 and local temps should be in memory. */
la_bb_end(TCGContext * s,int ng,int nt)3806 static void la_bb_end(TCGContext *s, int ng, int nt)
3807 {
3808 int i;
3809
3810 for (i = 0; i < nt; ++i) {
3811 TCGTemp *ts = &s->temps[i];
3812 int state;
3813
3814 switch (ts->kind) {
3815 case TEMP_FIXED:
3816 case TEMP_GLOBAL:
3817 case TEMP_TB:
3818 state = TS_DEAD | TS_MEM;
3819 break;
3820 case TEMP_EBB:
3821 case TEMP_CONST:
3822 state = TS_DEAD;
3823 break;
3824 default:
3825 g_assert_not_reached();
3826 }
3827 ts->state = state;
3828 la_reset_pref(ts);
3829 }
3830 }
3831
3832 /* liveness analysis: sync globals back to memory. */
la_global_sync(TCGContext * s,int ng)3833 static void la_global_sync(TCGContext *s, int ng)
3834 {
3835 int i;
3836
3837 for (i = 0; i < ng; ++i) {
3838 int state = s->temps[i].state;
3839 s->temps[i].state = state | TS_MEM;
3840 if (state == TS_DEAD) {
3841 /* If the global was previously dead, reset prefs. */
3842 la_reset_pref(&s->temps[i]);
3843 }
3844 }
3845 }
3846
3847 /*
3848 * liveness analysis: conditional branch: all temps are dead unless
3849 * explicitly live-across-conditional-branch, globals and local temps
3850 * should be synced.
3851 */
la_bb_sync(TCGContext * s,int ng,int nt)3852 static void la_bb_sync(TCGContext *s, int ng, int nt)
3853 {
3854 la_global_sync(s, ng);
3855
3856 for (int i = ng; i < nt; ++i) {
3857 TCGTemp *ts = &s->temps[i];
3858 int state;
3859
3860 switch (ts->kind) {
3861 case TEMP_TB:
3862 state = ts->state;
3863 ts->state = state | TS_MEM;
3864 if (state != TS_DEAD) {
3865 continue;
3866 }
3867 break;
3868 case TEMP_EBB:
3869 case TEMP_CONST:
3870 continue;
3871 default:
3872 g_assert_not_reached();
3873 }
3874 la_reset_pref(&s->temps[i]);
3875 }
3876 }
3877
3878 /* liveness analysis: sync globals back to memory and kill. */
la_global_kill(TCGContext * s,int ng)3879 static void la_global_kill(TCGContext *s, int ng)
3880 {
3881 int i;
3882
3883 for (i = 0; i < ng; i++) {
3884 s->temps[i].state = TS_DEAD | TS_MEM;
3885 la_reset_pref(&s->temps[i]);
3886 }
3887 }
3888
3889 /* liveness analysis: note live globals crossing calls. */
la_cross_call(TCGContext * s,int nt)3890 static void la_cross_call(TCGContext *s, int nt)
3891 {
3892 TCGRegSet mask = ~tcg_target_call_clobber_regs;
3893 int i;
3894
3895 for (i = 0; i < nt; i++) {
3896 TCGTemp *ts = &s->temps[i];
3897 if (!(ts->state & TS_DEAD)) {
3898 TCGRegSet *pset = la_temp_pref(ts);
3899 TCGRegSet set = *pset;
3900
3901 set &= mask;
3902 /* If the combination is not possible, restart. */
3903 if (set == 0) {
3904 set = tcg_target_available_regs[ts->type] & mask;
3905 }
3906 *pset = set;
3907 }
3908 }
3909 }
3910
3911 /*
3912 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3913 * to TEMP_EBB, if possible.
3914 */
3915 static void __attribute__((noinline))
liveness_pass_0(TCGContext * s)3916 liveness_pass_0(TCGContext *s)
3917 {
3918 void * const multiple_ebb = (void *)(uintptr_t)-1;
3919 int nb_temps = s->nb_temps;
3920 TCGOp *op, *ebb;
3921
3922 for (int i = s->nb_globals; i < nb_temps; ++i) {
3923 s->temps[i].state_ptr = NULL;
3924 }
3925
3926 /*
3927 * Represent each EBB by the op at which it begins. In the case of
3928 * the first EBB, this is the first op, otherwise it is a label.
3929 * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3930 * within a single EBB, else MULTIPLE_EBB.
3931 */
3932 ebb = QTAILQ_FIRST(&s->ops);
3933 QTAILQ_FOREACH(op, &s->ops, link) {
3934 const TCGOpDef *def;
3935 int nb_oargs, nb_iargs;
3936
3937 switch (op->opc) {
3938 case INDEX_op_set_label:
3939 ebb = op;
3940 continue;
3941 case INDEX_op_discard:
3942 continue;
3943 case INDEX_op_call:
3944 nb_oargs = TCGOP_CALLO(op);
3945 nb_iargs = TCGOP_CALLI(op);
3946 break;
3947 default:
3948 def = &tcg_op_defs[op->opc];
3949 nb_oargs = def->nb_oargs;
3950 nb_iargs = def->nb_iargs;
3951 break;
3952 }
3953
3954 for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3955 TCGTemp *ts = arg_temp(op->args[i]);
3956
3957 if (ts->kind != TEMP_TB) {
3958 continue;
3959 }
3960 if (ts->state_ptr == NULL) {
3961 ts->state_ptr = ebb;
3962 } else if (ts->state_ptr != ebb) {
3963 ts->state_ptr = multiple_ebb;
3964 }
3965 }
3966 }
3967
3968 /*
3969 * For TEMP_TB that turned out not to be used beyond one EBB,
3970 * reduce the liveness to TEMP_EBB.
3971 */
3972 for (int i = s->nb_globals; i < nb_temps; ++i) {
3973 TCGTemp *ts = &s->temps[i];
3974 if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3975 ts->kind = TEMP_EBB;
3976 }
3977 }
3978 }
3979
assert_carry_dead(TCGContext * s)3980 static void assert_carry_dead(TCGContext *s)
3981 {
3982 /*
3983 * Carry operations can be separated by a few insns like mov,
3984 * load or store, but they should always be "close", and
3985 * carry-out operations should always be paired with carry-in.
3986 * At various boundaries, carry must have been consumed.
3987 */
3988 tcg_debug_assert(!s->carry_live);
3989 }
3990
3991 /* Liveness analysis : update the opc_arg_life array to tell if a
3992 given input arguments is dead. Instructions updating dead
3993 temporaries are removed. */
3994 static void __attribute__((noinline))
liveness_pass_1(TCGContext * s)3995 liveness_pass_1(TCGContext *s)
3996 {
3997 int nb_globals = s->nb_globals;
3998 int nb_temps = s->nb_temps;
3999 TCGOp *op, *op_prev;
4000 TCGRegSet *prefs;
4001
4002 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
4003 for (int i = 0; i < nb_temps; ++i) {
4004 s->temps[i].state_ptr = prefs + i;
4005 }
4006
4007 /* ??? Should be redundant with the exit_tb that ends the TB. */
4008 la_func_end(s, nb_globals, nb_temps);
4009
4010 s->carry_live = false;
4011 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
4012 int nb_iargs, nb_oargs;
4013 TCGOpcode opc_new, opc_new2;
4014 TCGLifeData arg_life = 0;
4015 TCGTemp *ts;
4016 TCGOpcode opc = op->opc;
4017 const TCGOpDef *def;
4018 const TCGArgConstraint *args_ct;
4019
4020 switch (opc) {
4021 case INDEX_op_call:
4022 assert_carry_dead(s);
4023 {
4024 const TCGHelperInfo *info = tcg_call_info(op);
4025 int call_flags = tcg_call_flags(op);
4026
4027 nb_oargs = TCGOP_CALLO(op);
4028 nb_iargs = TCGOP_CALLI(op);
4029
4030 /* pure functions can be removed if their result is unused */
4031 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
4032 for (int i = 0; i < nb_oargs; i++) {
4033 ts = arg_temp(op->args[i]);
4034 if (ts->state != TS_DEAD) {
4035 goto do_not_remove_call;
4036 }
4037 }
4038 goto do_remove;
4039 }
4040 do_not_remove_call:
4041
4042 /* Output args are dead. */
4043 for (int i = 0; i < nb_oargs; i++) {
4044 ts = arg_temp(op->args[i]);
4045 if (ts->state & TS_DEAD) {
4046 arg_life |= DEAD_ARG << i;
4047 }
4048 if (ts->state & TS_MEM) {
4049 arg_life |= SYNC_ARG << i;
4050 }
4051 ts->state = TS_DEAD;
4052 la_reset_pref(ts);
4053 }
4054
4055 /* Not used -- it will be tcg_target_call_oarg_reg(). */
4056 memset(op->output_pref, 0, sizeof(op->output_pref));
4057
4058 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
4059 TCG_CALL_NO_READ_GLOBALS))) {
4060 la_global_kill(s, nb_globals);
4061 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
4062 la_global_sync(s, nb_globals);
4063 }
4064
4065 /* Record arguments that die in this helper. */
4066 for (int i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4067 ts = arg_temp(op->args[i]);
4068 if (ts->state & TS_DEAD) {
4069 arg_life |= DEAD_ARG << i;
4070 }
4071 }
4072
4073 /* For all live registers, remove call-clobbered prefs. */
4074 la_cross_call(s, nb_temps);
4075
4076 /*
4077 * Input arguments are live for preceding opcodes.
4078 *
4079 * For those arguments that die, and will be allocated in
4080 * registers, clear the register set for that arg, to be
4081 * filled in below. For args that will be on the stack,
4082 * reset to any available reg. Process arguments in reverse
4083 * order so that if a temp is used more than once, the stack
4084 * reset to max happens before the register reset to 0.
4085 */
4086 for (int i = nb_iargs - 1; i >= 0; i--) {
4087 const TCGCallArgumentLoc *loc = &info->in[i];
4088 ts = arg_temp(op->args[nb_oargs + i]);
4089
4090 if (ts->state & TS_DEAD) {
4091 switch (loc->kind) {
4092 case TCG_CALL_ARG_NORMAL:
4093 case TCG_CALL_ARG_EXTEND_U:
4094 case TCG_CALL_ARG_EXTEND_S:
4095 if (arg_slot_reg_p(loc->arg_slot)) {
4096 *la_temp_pref(ts) = 0;
4097 break;
4098 }
4099 /* fall through */
4100 default:
4101 *la_temp_pref(ts) =
4102 tcg_target_available_regs[ts->type];
4103 break;
4104 }
4105 ts->state &= ~TS_DEAD;
4106 }
4107 }
4108
4109 /*
4110 * For each input argument, add its input register to prefs.
4111 * If a temp is used once, this produces a single set bit;
4112 * if a temp is used multiple times, this produces a set.
4113 */
4114 for (int i = 0; i < nb_iargs; i++) {
4115 const TCGCallArgumentLoc *loc = &info->in[i];
4116 ts = arg_temp(op->args[nb_oargs + i]);
4117
4118 switch (loc->kind) {
4119 case TCG_CALL_ARG_NORMAL:
4120 case TCG_CALL_ARG_EXTEND_U:
4121 case TCG_CALL_ARG_EXTEND_S:
4122 if (arg_slot_reg_p(loc->arg_slot)) {
4123 tcg_regset_set_reg(*la_temp_pref(ts),
4124 tcg_target_call_iarg_regs[loc->arg_slot]);
4125 }
4126 break;
4127 default:
4128 break;
4129 }
4130 }
4131 }
4132 break;
4133 case INDEX_op_insn_start:
4134 assert_carry_dead(s);
4135 break;
4136 case INDEX_op_discard:
4137 /* mark the temporary as dead */
4138 ts = arg_temp(op->args[0]);
4139 ts->state = TS_DEAD;
4140 la_reset_pref(ts);
4141 break;
4142
4143 case INDEX_op_muls2:
4144 opc_new = INDEX_op_mul;
4145 opc_new2 = INDEX_op_mulsh;
4146 goto do_mul2;
4147 case INDEX_op_mulu2:
4148 opc_new = INDEX_op_mul;
4149 opc_new2 = INDEX_op_muluh;
4150 do_mul2:
4151 assert_carry_dead(s);
4152 if (arg_temp(op->args[1])->state == TS_DEAD) {
4153 if (arg_temp(op->args[0])->state == TS_DEAD) {
4154 /* Both parts of the operation are dead. */
4155 goto do_remove;
4156 }
4157 /* The high part of the operation is dead; generate the low. */
4158 op->opc = opc = opc_new;
4159 op->args[1] = op->args[2];
4160 op->args[2] = op->args[3];
4161 } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4162 tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4163 /* The low part of the operation is dead; generate the high. */
4164 op->opc = opc = opc_new2;
4165 op->args[0] = op->args[1];
4166 op->args[1] = op->args[2];
4167 op->args[2] = op->args[3];
4168 } else {
4169 goto do_not_remove;
4170 }
4171 /* Mark the single-word operation live. */
4172 goto do_not_remove;
4173
4174 case INDEX_op_addco:
4175 if (s->carry_live) {
4176 goto do_not_remove;
4177 }
4178 op->opc = opc = INDEX_op_add;
4179 goto do_default;
4180
4181 case INDEX_op_addcio:
4182 if (s->carry_live) {
4183 goto do_not_remove;
4184 }
4185 op->opc = opc = INDEX_op_addci;
4186 goto do_default;
4187
4188 case INDEX_op_subbo:
4189 if (s->carry_live) {
4190 goto do_not_remove;
4191 }
4192 /* Lower to sub, but this may also require canonicalization. */
4193 op->opc = opc = INDEX_op_sub;
4194 ts = arg_temp(op->args[2]);
4195 if (ts->kind == TEMP_CONST) {
4196 ts = tcg_constant_internal(ts->type, -ts->val);
4197 if (ts->state_ptr == NULL) {
4198 tcg_debug_assert(temp_idx(ts) == nb_temps);
4199 nb_temps++;
4200 ts->state_ptr = tcg_malloc(sizeof(TCGRegSet));
4201 ts->state = TS_DEAD;
4202 la_reset_pref(ts);
4203 }
4204 op->args[2] = temp_arg(ts);
4205 op->opc = opc = INDEX_op_add;
4206 }
4207 goto do_default;
4208
4209 case INDEX_op_subbio:
4210 if (s->carry_live) {
4211 goto do_not_remove;
4212 }
4213 op->opc = opc = INDEX_op_subbi;
4214 goto do_default;
4215
4216 case INDEX_op_addc1o:
4217 if (s->carry_live) {
4218 goto do_not_remove;
4219 }
4220 /* Lower to add, add +1. */
4221 op_prev = tcg_op_insert_before(s, op, INDEX_op_add,
4222 TCGOP_TYPE(op), 3);
4223 op_prev->args[0] = op->args[0];
4224 op_prev->args[1] = op->args[1];
4225 op_prev->args[2] = op->args[2];
4226 op->opc = opc = INDEX_op_add;
4227 op->args[1] = op->args[0];
4228 ts = arg_temp(op->args[0]);
4229 ts = tcg_constant_internal(ts->type, 1);
4230 op->args[2] = temp_arg(ts);
4231 goto do_default;
4232
4233 case INDEX_op_subb1o:
4234 if (s->carry_live) {
4235 goto do_not_remove;
4236 }
4237 /* Lower to sub, add -1. */
4238 op_prev = tcg_op_insert_before(s, op, INDEX_op_sub,
4239 TCGOP_TYPE(op), 3);
4240 op_prev->args[0] = op->args[0];
4241 op_prev->args[1] = op->args[1];
4242 op_prev->args[2] = op->args[2];
4243 op->opc = opc = INDEX_op_add;
4244 op->args[1] = op->args[0];
4245 ts = arg_temp(op->args[0]);
4246 ts = tcg_constant_internal(ts->type, -1);
4247 op->args[2] = temp_arg(ts);
4248 goto do_default;
4249
4250 default:
4251 do_default:
4252 /*
4253 * Test if the operation can be removed because all
4254 * its outputs are dead. We assume that nb_oargs == 0
4255 * implies side effects.
4256 */
4257 def = &tcg_op_defs[opc];
4258 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) {
4259 for (int i = def->nb_oargs - 1; i >= 0; i--) {
4260 if (arg_temp(op->args[i])->state != TS_DEAD) {
4261 goto do_not_remove;
4262 }
4263 }
4264 goto do_remove;
4265 }
4266 goto do_not_remove;
4267
4268 do_remove:
4269 tcg_op_remove(s, op);
4270 break;
4271
4272 do_not_remove:
4273 def = &tcg_op_defs[opc];
4274 nb_iargs = def->nb_iargs;
4275 nb_oargs = def->nb_oargs;
4276
4277 for (int i = 0; i < nb_oargs; i++) {
4278 ts = arg_temp(op->args[i]);
4279
4280 /* Remember the preference of the uses that followed. */
4281 if (i < ARRAY_SIZE(op->output_pref)) {
4282 op->output_pref[i] = *la_temp_pref(ts);
4283 }
4284
4285 /* Output args are dead. */
4286 if (ts->state & TS_DEAD) {
4287 arg_life |= DEAD_ARG << i;
4288 }
4289 if (ts->state & TS_MEM) {
4290 arg_life |= SYNC_ARG << i;
4291 }
4292 ts->state = TS_DEAD;
4293 la_reset_pref(ts);
4294 }
4295
4296 /* If end of basic block, update. */
4297 if (def->flags & TCG_OPF_BB_EXIT) {
4298 assert_carry_dead(s);
4299 la_func_end(s, nb_globals, nb_temps);
4300 } else if (def->flags & TCG_OPF_COND_BRANCH) {
4301 assert_carry_dead(s);
4302 la_bb_sync(s, nb_globals, nb_temps);
4303 } else if (def->flags & TCG_OPF_BB_END) {
4304 assert_carry_dead(s);
4305 la_bb_end(s, nb_globals, nb_temps);
4306 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4307 assert_carry_dead(s);
4308 la_global_sync(s, nb_globals);
4309 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4310 la_cross_call(s, nb_temps);
4311 }
4312 }
4313
4314 /* Record arguments that die in this opcode. */
4315 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4316 ts = arg_temp(op->args[i]);
4317 if (ts->state & TS_DEAD) {
4318 arg_life |= DEAD_ARG << i;
4319 }
4320 }
4321 if (def->flags & TCG_OPF_CARRY_OUT) {
4322 s->carry_live = false;
4323 }
4324
4325 /* Input arguments are live for preceding opcodes. */
4326 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4327 ts = arg_temp(op->args[i]);
4328 if (ts->state & TS_DEAD) {
4329 /* For operands that were dead, initially allow
4330 all regs for the type. */
4331 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4332 ts->state &= ~TS_DEAD;
4333 }
4334 }
4335 if (def->flags & TCG_OPF_CARRY_IN) {
4336 s->carry_live = true;
4337 }
4338
4339 /* Incorporate constraints for this operand. */
4340 switch (opc) {
4341 case INDEX_op_mov:
4342 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4343 have proper constraints. That said, special case
4344 moves to propagate preferences backward. */
4345 if (IS_DEAD_ARG(1)) {
4346 *la_temp_pref(arg_temp(op->args[0]))
4347 = *la_temp_pref(arg_temp(op->args[1]));
4348 }
4349 break;
4350
4351 default:
4352 args_ct = opcode_args_ct(op);
4353 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4354 const TCGArgConstraint *ct = &args_ct[i];
4355 TCGRegSet set, *pset;
4356
4357 ts = arg_temp(op->args[i]);
4358 pset = la_temp_pref(ts);
4359 set = *pset;
4360
4361 set &= ct->regs;
4362 if (ct->ialias) {
4363 set &= output_pref(op, ct->alias_index);
4364 }
4365 /* If the combination is not possible, restart. */
4366 if (set == 0) {
4367 set = ct->regs;
4368 }
4369 *pset = set;
4370 }
4371 break;
4372 }
4373 break;
4374 }
4375 op->life = arg_life;
4376 }
4377 assert_carry_dead(s);
4378 }
4379
4380 /* Liveness analysis: Convert indirect regs to direct temporaries. */
4381 static bool __attribute__((noinline))
liveness_pass_2(TCGContext * s)4382 liveness_pass_2(TCGContext *s)
4383 {
4384 int nb_globals = s->nb_globals;
4385 int nb_temps, i;
4386 bool changes = false;
4387 TCGOp *op, *op_next;
4388
4389 /* Create a temporary for each indirect global. */
4390 for (i = 0; i < nb_globals; ++i) {
4391 TCGTemp *its = &s->temps[i];
4392 if (its->indirect_reg) {
4393 TCGTemp *dts = tcg_temp_alloc(s);
4394 dts->type = its->type;
4395 dts->base_type = its->base_type;
4396 dts->temp_subindex = its->temp_subindex;
4397 dts->kind = TEMP_EBB;
4398 its->state_ptr = dts;
4399 } else {
4400 its->state_ptr = NULL;
4401 }
4402 /* All globals begin dead. */
4403 its->state = TS_DEAD;
4404 }
4405 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4406 TCGTemp *its = &s->temps[i];
4407 its->state_ptr = NULL;
4408 its->state = TS_DEAD;
4409 }
4410
4411 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4412 TCGOpcode opc = op->opc;
4413 const TCGOpDef *def = &tcg_op_defs[opc];
4414 TCGLifeData arg_life = op->life;
4415 int nb_iargs, nb_oargs, call_flags;
4416 TCGTemp *arg_ts, *dir_ts;
4417
4418 if (opc == INDEX_op_call) {
4419 nb_oargs = TCGOP_CALLO(op);
4420 nb_iargs = TCGOP_CALLI(op);
4421 call_flags = tcg_call_flags(op);
4422 } else {
4423 nb_iargs = def->nb_iargs;
4424 nb_oargs = def->nb_oargs;
4425
4426 /* Set flags similar to how calls require. */
4427 if (def->flags & TCG_OPF_COND_BRANCH) {
4428 /* Like reading globals: sync_globals */
4429 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4430 } else if (def->flags & TCG_OPF_BB_END) {
4431 /* Like writing globals: save_globals */
4432 call_flags = 0;
4433 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4434 /* Like reading globals: sync_globals */
4435 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4436 } else {
4437 /* No effect on globals. */
4438 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4439 TCG_CALL_NO_WRITE_GLOBALS);
4440 }
4441 }
4442
4443 /* Make sure that input arguments are available. */
4444 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4445 arg_ts = arg_temp(op->args[i]);
4446 dir_ts = arg_ts->state_ptr;
4447 if (dir_ts && arg_ts->state == TS_DEAD) {
4448 TCGOp *lop = tcg_op_insert_before(s, op, INDEX_op_ld,
4449 arg_ts->type, 3);
4450
4451 lop->args[0] = temp_arg(dir_ts);
4452 lop->args[1] = temp_arg(arg_ts->mem_base);
4453 lop->args[2] = arg_ts->mem_offset;
4454
4455 /* Loaded, but synced with memory. */
4456 arg_ts->state = TS_MEM;
4457 }
4458 }
4459
4460 /* Perform input replacement, and mark inputs that became dead.
4461 No action is required except keeping temp_state up to date
4462 so that we reload when needed. */
4463 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4464 arg_ts = arg_temp(op->args[i]);
4465 dir_ts = arg_ts->state_ptr;
4466 if (dir_ts) {
4467 op->args[i] = temp_arg(dir_ts);
4468 changes = true;
4469 if (IS_DEAD_ARG(i)) {
4470 arg_ts->state = TS_DEAD;
4471 }
4472 }
4473 }
4474
4475 /* Liveness analysis should ensure that the following are
4476 all correct, for call sites and basic block end points. */
4477 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4478 /* Nothing to do */
4479 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4480 for (i = 0; i < nb_globals; ++i) {
4481 /* Liveness should see that globals are synced back,
4482 that is, either TS_DEAD or TS_MEM. */
4483 arg_ts = &s->temps[i];
4484 tcg_debug_assert(arg_ts->state_ptr == 0
4485 || arg_ts->state != 0);
4486 }
4487 } else {
4488 for (i = 0; i < nb_globals; ++i) {
4489 /* Liveness should see that globals are saved back,
4490 that is, TS_DEAD, waiting to be reloaded. */
4491 arg_ts = &s->temps[i];
4492 tcg_debug_assert(arg_ts->state_ptr == 0
4493 || arg_ts->state == TS_DEAD);
4494 }
4495 }
4496
4497 /* Outputs become available. */
4498 if (opc == INDEX_op_mov) {
4499 arg_ts = arg_temp(op->args[0]);
4500 dir_ts = arg_ts->state_ptr;
4501 if (dir_ts) {
4502 op->args[0] = temp_arg(dir_ts);
4503 changes = true;
4504
4505 /* The output is now live and modified. */
4506 arg_ts->state = 0;
4507
4508 if (NEED_SYNC_ARG(0)) {
4509 TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st,
4510 arg_ts->type, 3);
4511 TCGTemp *out_ts = dir_ts;
4512
4513 if (IS_DEAD_ARG(0)) {
4514 out_ts = arg_temp(op->args[1]);
4515 arg_ts->state = TS_DEAD;
4516 tcg_op_remove(s, op);
4517 } else {
4518 arg_ts->state = TS_MEM;
4519 }
4520
4521 sop->args[0] = temp_arg(out_ts);
4522 sop->args[1] = temp_arg(arg_ts->mem_base);
4523 sop->args[2] = arg_ts->mem_offset;
4524 } else {
4525 tcg_debug_assert(!IS_DEAD_ARG(0));
4526 }
4527 }
4528 } else {
4529 for (i = 0; i < nb_oargs; i++) {
4530 arg_ts = arg_temp(op->args[i]);
4531 dir_ts = arg_ts->state_ptr;
4532 if (!dir_ts) {
4533 continue;
4534 }
4535 op->args[i] = temp_arg(dir_ts);
4536 changes = true;
4537
4538 /* The output is now live and modified. */
4539 arg_ts->state = 0;
4540
4541 /* Sync outputs upon their last write. */
4542 if (NEED_SYNC_ARG(i)) {
4543 TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st,
4544 arg_ts->type, 3);
4545
4546 sop->args[0] = temp_arg(dir_ts);
4547 sop->args[1] = temp_arg(arg_ts->mem_base);
4548 sop->args[2] = arg_ts->mem_offset;
4549
4550 arg_ts->state = TS_MEM;
4551 }
4552 /* Drop outputs that are dead. */
4553 if (IS_DEAD_ARG(i)) {
4554 arg_ts->state = TS_DEAD;
4555 }
4556 }
4557 }
4558 }
4559
4560 return changes;
4561 }
4562
temp_allocate_frame(TCGContext * s,TCGTemp * ts)4563 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4564 {
4565 intptr_t off;
4566 int size, align;
4567
4568 /* When allocating an object, look at the full type. */
4569 size = tcg_type_size(ts->base_type);
4570 switch (ts->base_type) {
4571 case TCG_TYPE_I32:
4572 align = 4;
4573 break;
4574 case TCG_TYPE_I64:
4575 case TCG_TYPE_V64:
4576 align = 8;
4577 break;
4578 case TCG_TYPE_I128:
4579 case TCG_TYPE_V128:
4580 case TCG_TYPE_V256:
4581 /*
4582 * Note that we do not require aligned storage for V256,
4583 * and that we provide alignment for I128 to match V128,
4584 * even if that's above what the host ABI requires.
4585 */
4586 align = 16;
4587 break;
4588 default:
4589 g_assert_not_reached();
4590 }
4591
4592 /*
4593 * Assume the stack is sufficiently aligned.
4594 * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4595 * and do not require 16 byte vector alignment. This seems slightly
4596 * easier than fully parameterizing the above switch statement.
4597 */
4598 align = MIN(TCG_TARGET_STACK_ALIGN, align);
4599 off = ROUND_UP(s->current_frame_offset, align);
4600
4601 /* If we've exhausted the stack frame, restart with a smaller TB. */
4602 if (off + size > s->frame_end) {
4603 tcg_raise_tb_overflow(s);
4604 }
4605 s->current_frame_offset = off + size;
4606 #if defined(__sparc__)
4607 off += TCG_TARGET_STACK_BIAS;
4608 #endif
4609
4610 /* If the object was subdivided, assign memory to all the parts. */
4611 if (ts->base_type != ts->type) {
4612 int part_size = tcg_type_size(ts->type);
4613 int part_count = size / part_size;
4614
4615 /*
4616 * Each part is allocated sequentially in tcg_temp_new_internal.
4617 * Jump back to the first part by subtracting the current index.
4618 */
4619 ts -= ts->temp_subindex;
4620 for (int i = 0; i < part_count; ++i) {
4621 ts[i].mem_offset = off + i * part_size;
4622 ts[i].mem_base = s->frame_temp;
4623 ts[i].mem_allocated = 1;
4624 }
4625 } else {
4626 ts->mem_offset = off;
4627 ts->mem_base = s->frame_temp;
4628 ts->mem_allocated = 1;
4629 }
4630 }
4631
4632 /* Assign @reg to @ts, and update reg_to_temp[]. */
set_temp_val_reg(TCGContext * s,TCGTemp * ts,TCGReg reg)4633 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4634 {
4635 if (ts->val_type == TEMP_VAL_REG) {
4636 TCGReg old = ts->reg;
4637 tcg_debug_assert(s->reg_to_temp[old] == ts);
4638 if (old == reg) {
4639 return;
4640 }
4641 s->reg_to_temp[old] = NULL;
4642 }
4643 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4644 s->reg_to_temp[reg] = ts;
4645 ts->val_type = TEMP_VAL_REG;
4646 ts->reg = reg;
4647 }
4648
4649 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
set_temp_val_nonreg(TCGContext * s,TCGTemp * ts,TCGTempVal type)4650 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4651 {
4652 tcg_debug_assert(type != TEMP_VAL_REG);
4653 if (ts->val_type == TEMP_VAL_REG) {
4654 TCGReg reg = ts->reg;
4655 tcg_debug_assert(s->reg_to_temp[reg] == ts);
4656 s->reg_to_temp[reg] = NULL;
4657 }
4658 ts->val_type = type;
4659 }
4660
4661 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4662
4663 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
4664 mark it free; otherwise mark it dead. */
temp_free_or_dead(TCGContext * s,TCGTemp * ts,int free_or_dead)4665 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4666 {
4667 TCGTempVal new_type;
4668
4669 switch (ts->kind) {
4670 case TEMP_FIXED:
4671 return;
4672 case TEMP_GLOBAL:
4673 case TEMP_TB:
4674 new_type = TEMP_VAL_MEM;
4675 break;
4676 case TEMP_EBB:
4677 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4678 break;
4679 case TEMP_CONST:
4680 new_type = TEMP_VAL_CONST;
4681 break;
4682 default:
4683 g_assert_not_reached();
4684 }
4685 set_temp_val_nonreg(s, ts, new_type);
4686 }
4687
4688 /* Mark a temporary as dead. */
temp_dead(TCGContext * s,TCGTemp * ts)4689 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4690 {
4691 temp_free_or_dead(s, ts, 1);
4692 }
4693
4694 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4695 registers needs to be allocated to store a constant. If 'free_or_dead'
4696 is non-zero, subsequently release the temporary; if it is positive, the
4697 temp is dead; if it is negative, the temp is free. */
temp_sync(TCGContext * s,TCGTemp * ts,TCGRegSet allocated_regs,TCGRegSet preferred_regs,int free_or_dead)4698 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4699 TCGRegSet preferred_regs, int free_or_dead)
4700 {
4701 if (!temp_readonly(ts) && !ts->mem_coherent) {
4702 if (!ts->mem_allocated) {
4703 temp_allocate_frame(s, ts);
4704 }
4705 switch (ts->val_type) {
4706 case TEMP_VAL_CONST:
4707 /* If we're going to free the temp immediately, then we won't
4708 require it later in a register, so attempt to store the
4709 constant to memory directly. */
4710 if (free_or_dead
4711 && tcg_out_sti(s, ts->type, ts->val,
4712 ts->mem_base->reg, ts->mem_offset)) {
4713 break;
4714 }
4715 temp_load(s, ts, tcg_target_available_regs[ts->type],
4716 allocated_regs, preferred_regs);
4717 /* fallthrough */
4718
4719 case TEMP_VAL_REG:
4720 tcg_out_st(s, ts->type, ts->reg,
4721 ts->mem_base->reg, ts->mem_offset);
4722 break;
4723
4724 case TEMP_VAL_MEM:
4725 break;
4726
4727 case TEMP_VAL_DEAD:
4728 default:
4729 g_assert_not_reached();
4730 }
4731 ts->mem_coherent = 1;
4732 }
4733 if (free_or_dead) {
4734 temp_free_or_dead(s, ts, free_or_dead);
4735 }
4736 }
4737
4738 /* free register 'reg' by spilling the corresponding temporary if necessary */
tcg_reg_free(TCGContext * s,TCGReg reg,TCGRegSet allocated_regs)4739 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4740 {
4741 TCGTemp *ts = s->reg_to_temp[reg];
4742 if (ts != NULL) {
4743 temp_sync(s, ts, allocated_regs, 0, -1);
4744 }
4745 }
4746
4747 /**
4748 * tcg_reg_alloc:
4749 * @required_regs: Set of registers in which we must allocate.
4750 * @allocated_regs: Set of registers which must be avoided.
4751 * @preferred_regs: Set of registers we should prefer.
4752 * @rev: True if we search the registers in "indirect" order.
4753 *
4754 * The allocated register must be in @required_regs & ~@allocated_regs,
4755 * but if we can put it in @preferred_regs we may save a move later.
4756 */
tcg_reg_alloc(TCGContext * s,TCGRegSet required_regs,TCGRegSet allocated_regs,TCGRegSet preferred_regs,bool rev)4757 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4758 TCGRegSet allocated_regs,
4759 TCGRegSet preferred_regs, bool rev)
4760 {
4761 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4762 TCGRegSet reg_ct[2];
4763 const int *order;
4764
4765 reg_ct[1] = required_regs & ~allocated_regs;
4766 tcg_debug_assert(reg_ct[1] != 0);
4767 reg_ct[0] = reg_ct[1] & preferred_regs;
4768
4769 /* Skip the preferred_regs option if it cannot be satisfied,
4770 or if the preference made no difference. */
4771 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4772
4773 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4774
4775 /* Try free registers, preferences first. */
4776 for (j = f; j < 2; j++) {
4777 TCGRegSet set = reg_ct[j];
4778
4779 if (tcg_regset_single(set)) {
4780 /* One register in the set. */
4781 TCGReg reg = tcg_regset_first(set);
4782 if (s->reg_to_temp[reg] == NULL) {
4783 return reg;
4784 }
4785 } else {
4786 for (i = 0; i < n; i++) {
4787 TCGReg reg = order[i];
4788 if (s->reg_to_temp[reg] == NULL &&
4789 tcg_regset_test_reg(set, reg)) {
4790 return reg;
4791 }
4792 }
4793 }
4794 }
4795
4796 /* We must spill something. */
4797 for (j = f; j < 2; j++) {
4798 TCGRegSet set = reg_ct[j];
4799
4800 if (tcg_regset_single(set)) {
4801 /* One register in the set. */
4802 TCGReg reg = tcg_regset_first(set);
4803 tcg_reg_free(s, reg, allocated_regs);
4804 return reg;
4805 } else {
4806 for (i = 0; i < n; i++) {
4807 TCGReg reg = order[i];
4808 if (tcg_regset_test_reg(set, reg)) {
4809 tcg_reg_free(s, reg, allocated_regs);
4810 return reg;
4811 }
4812 }
4813 }
4814 }
4815
4816 g_assert_not_reached();
4817 }
4818
tcg_reg_alloc_pair(TCGContext * s,TCGRegSet required_regs,TCGRegSet allocated_regs,TCGRegSet preferred_regs,bool rev)4819 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4820 TCGRegSet allocated_regs,
4821 TCGRegSet preferred_regs, bool rev)
4822 {
4823 int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4824 TCGRegSet reg_ct[2];
4825 const int *order;
4826
4827 /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4828 reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4829 tcg_debug_assert(reg_ct[1] != 0);
4830 reg_ct[0] = reg_ct[1] & preferred_regs;
4831
4832 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4833
4834 /*
4835 * Skip the preferred_regs option if it cannot be satisfied,
4836 * or if the preference made no difference.
4837 */
4838 k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4839
4840 /*
4841 * Minimize the number of flushes by looking for 2 free registers first,
4842 * then a single flush, then two flushes.
4843 */
4844 for (fmin = 2; fmin >= 0; fmin--) {
4845 for (j = k; j < 2; j++) {
4846 TCGRegSet set = reg_ct[j];
4847
4848 for (i = 0; i < n; i++) {
4849 TCGReg reg = order[i];
4850
4851 if (tcg_regset_test_reg(set, reg)) {
4852 int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4853 if (f >= fmin) {
4854 tcg_reg_free(s, reg, allocated_regs);
4855 tcg_reg_free(s, reg + 1, allocated_regs);
4856 return reg;
4857 }
4858 }
4859 }
4860 }
4861 }
4862 g_assert_not_reached();
4863 }
4864
4865 /* Make sure the temporary is in a register. If needed, allocate the register
4866 from DESIRED while avoiding ALLOCATED. */
temp_load(TCGContext * s,TCGTemp * ts,TCGRegSet desired_regs,TCGRegSet allocated_regs,TCGRegSet preferred_regs)4867 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4868 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4869 {
4870 TCGReg reg;
4871
4872 switch (ts->val_type) {
4873 case TEMP_VAL_REG:
4874 return;
4875 case TEMP_VAL_CONST:
4876 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4877 preferred_regs, ts->indirect_base);
4878 if (ts->type <= TCG_TYPE_I64) {
4879 tcg_out_movi(s, ts->type, reg, ts->val);
4880 } else {
4881 uint64_t val = ts->val;
4882 MemOp vece = MO_64;
4883
4884 /*
4885 * Find the minimal vector element that matches the constant.
4886 * The targets will, in general, have to do this search anyway,
4887 * do this generically.
4888 */
4889 if (val == dup_const(MO_8, val)) {
4890 vece = MO_8;
4891 } else if (val == dup_const(MO_16, val)) {
4892 vece = MO_16;
4893 } else if (val == dup_const(MO_32, val)) {
4894 vece = MO_32;
4895 }
4896
4897 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4898 }
4899 ts->mem_coherent = 0;
4900 break;
4901 case TEMP_VAL_MEM:
4902 if (!ts->mem_allocated) {
4903 temp_allocate_frame(s, ts);
4904 }
4905 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4906 preferred_regs, ts->indirect_base);
4907 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4908 ts->mem_coherent = 1;
4909 break;
4910 case TEMP_VAL_DEAD:
4911 default:
4912 g_assert_not_reached();
4913 }
4914 set_temp_val_reg(s, ts, reg);
4915 }
4916
4917 /* Save a temporary to memory. 'allocated_regs' is used in case a
4918 temporary registers needs to be allocated to store a constant. */
temp_save(TCGContext * s,TCGTemp * ts,TCGRegSet allocated_regs)4919 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4920 {
4921 /* The liveness analysis already ensures that globals are back
4922 in memory. Keep an tcg_debug_assert for safety. */
4923 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4924 }
4925
4926 /* save globals to their canonical location and assume they can be
4927 modified be the following code. 'allocated_regs' is used in case a
4928 temporary registers needs to be allocated to store a constant. */
save_globals(TCGContext * s,TCGRegSet allocated_regs)4929 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4930 {
4931 int i, n;
4932
4933 for (i = 0, n = s->nb_globals; i < n; i++) {
4934 temp_save(s, &s->temps[i], allocated_regs);
4935 }
4936 }
4937
4938 /* sync globals to their canonical location and assume they can be
4939 read by the following code. 'allocated_regs' is used in case a
4940 temporary registers needs to be allocated to store a constant. */
sync_globals(TCGContext * s,TCGRegSet allocated_regs)4941 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4942 {
4943 int i, n;
4944
4945 for (i = 0, n = s->nb_globals; i < n; i++) {
4946 TCGTemp *ts = &s->temps[i];
4947 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4948 || ts->kind == TEMP_FIXED
4949 || ts->mem_coherent);
4950 }
4951 }
4952
4953 /* at the end of a basic block, we assume all temporaries are dead and
4954 all globals are stored at their canonical location. */
tcg_reg_alloc_bb_end(TCGContext * s,TCGRegSet allocated_regs)4955 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4956 {
4957 assert_carry_dead(s);
4958 for (int i = s->nb_globals; i < s->nb_temps; i++) {
4959 TCGTemp *ts = &s->temps[i];
4960
4961 switch (ts->kind) {
4962 case TEMP_TB:
4963 temp_save(s, ts, allocated_regs);
4964 break;
4965 case TEMP_EBB:
4966 /* The liveness analysis already ensures that temps are dead.
4967 Keep an tcg_debug_assert for safety. */
4968 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4969 break;
4970 case TEMP_CONST:
4971 /* Similarly, we should have freed any allocated register. */
4972 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4973 break;
4974 default:
4975 g_assert_not_reached();
4976 }
4977 }
4978
4979 save_globals(s, allocated_regs);
4980 }
4981
4982 /*
4983 * At a conditional branch, we assume all temporaries are dead unless
4984 * explicitly live-across-conditional-branch; all globals and local
4985 * temps are synced to their location.
4986 */
tcg_reg_alloc_cbranch(TCGContext * s,TCGRegSet allocated_regs)4987 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4988 {
4989 assert_carry_dead(s);
4990 sync_globals(s, allocated_regs);
4991
4992 for (int i = s->nb_globals; i < s->nb_temps; i++) {
4993 TCGTemp *ts = &s->temps[i];
4994 /*
4995 * The liveness analysis already ensures that temps are dead.
4996 * Keep tcg_debug_asserts for safety.
4997 */
4998 switch (ts->kind) {
4999 case TEMP_TB:
5000 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
5001 break;
5002 case TEMP_EBB:
5003 case TEMP_CONST:
5004 break;
5005 default:
5006 g_assert_not_reached();
5007 }
5008 }
5009 }
5010
5011 /*
5012 * Specialized code generation for INDEX_op_mov_* with a constant.
5013 */
tcg_reg_alloc_do_movi(TCGContext * s,TCGTemp * ots,tcg_target_ulong val,TCGLifeData arg_life,TCGRegSet preferred_regs)5014 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
5015 tcg_target_ulong val, TCGLifeData arg_life,
5016 TCGRegSet preferred_regs)
5017 {
5018 /* ENV should not be modified. */
5019 tcg_debug_assert(!temp_readonly(ots));
5020
5021 /* The movi is not explicitly generated here. */
5022 set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
5023 ots->val = val;
5024 ots->mem_coherent = 0;
5025 if (NEED_SYNC_ARG(0)) {
5026 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
5027 } else if (IS_DEAD_ARG(0)) {
5028 temp_dead(s, ots);
5029 }
5030 }
5031
5032 /*
5033 * Specialized code generation for INDEX_op_mov_*.
5034 */
tcg_reg_alloc_mov(TCGContext * s,const TCGOp * op)5035 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
5036 {
5037 const TCGLifeData arg_life = op->life;
5038 TCGRegSet allocated_regs, preferred_regs;
5039 TCGTemp *ts, *ots;
5040 TCGType otype, itype;
5041 TCGReg oreg, ireg;
5042
5043 allocated_regs = s->reserved_regs;
5044 preferred_regs = output_pref(op, 0);
5045 ots = arg_temp(op->args[0]);
5046 ts = arg_temp(op->args[1]);
5047
5048 /* ENV should not be modified. */
5049 tcg_debug_assert(!temp_readonly(ots));
5050
5051 /* Note that otype != itype for no-op truncation. */
5052 otype = ots->type;
5053 itype = ts->type;
5054
5055 if (ts->val_type == TEMP_VAL_CONST) {
5056 /* propagate constant or generate sti */
5057 tcg_target_ulong val = ts->val;
5058 if (IS_DEAD_ARG(1)) {
5059 temp_dead(s, ts);
5060 }
5061 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
5062 return;
5063 }
5064
5065 /* If the source value is in memory we're going to be forced
5066 to have it in a register in order to perform the copy. Copy
5067 the SOURCE value into its own register first, that way we
5068 don't have to reload SOURCE the next time it is used. */
5069 if (ts->val_type == TEMP_VAL_MEM) {
5070 temp_load(s, ts, tcg_target_available_regs[itype],
5071 allocated_regs, preferred_regs);
5072 }
5073 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
5074 ireg = ts->reg;
5075
5076 if (IS_DEAD_ARG(0)) {
5077 /* mov to a non-saved dead register makes no sense (even with
5078 liveness analysis disabled). */
5079 tcg_debug_assert(NEED_SYNC_ARG(0));
5080 if (!ots->mem_allocated) {
5081 temp_allocate_frame(s, ots);
5082 }
5083 tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
5084 if (IS_DEAD_ARG(1)) {
5085 temp_dead(s, ts);
5086 }
5087 temp_dead(s, ots);
5088 return;
5089 }
5090
5091 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
5092 /*
5093 * The mov can be suppressed. Kill input first, so that it
5094 * is unlinked from reg_to_temp, then set the output to the
5095 * reg that we saved from the input.
5096 */
5097 temp_dead(s, ts);
5098 oreg = ireg;
5099 } else {
5100 if (ots->val_type == TEMP_VAL_REG) {
5101 oreg = ots->reg;
5102 } else {
5103 /* Make sure to not spill the input register during allocation. */
5104 oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
5105 allocated_regs | ((TCGRegSet)1 << ireg),
5106 preferred_regs, ots->indirect_base);
5107 }
5108 if (!tcg_out_mov(s, otype, oreg, ireg)) {
5109 /*
5110 * Cross register class move not supported.
5111 * Store the source register into the destination slot
5112 * and leave the destination temp as TEMP_VAL_MEM.
5113 */
5114 assert(!temp_readonly(ots));
5115 if (!ts->mem_allocated) {
5116 temp_allocate_frame(s, ots);
5117 }
5118 tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
5119 set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
5120 ots->mem_coherent = 1;
5121 return;
5122 }
5123 }
5124 set_temp_val_reg(s, ots, oreg);
5125 ots->mem_coherent = 0;
5126
5127 if (NEED_SYNC_ARG(0)) {
5128 temp_sync(s, ots, allocated_regs, 0, 0);
5129 }
5130 }
5131
5132 /*
5133 * Specialized code generation for INDEX_op_dup_vec.
5134 */
tcg_reg_alloc_dup(TCGContext * s,const TCGOp * op)5135 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
5136 {
5137 const TCGLifeData arg_life = op->life;
5138 TCGRegSet dup_out_regs, dup_in_regs;
5139 const TCGArgConstraint *dup_args_ct;
5140 TCGTemp *its, *ots;
5141 TCGType itype, vtype;
5142 unsigned vece;
5143 int lowpart_ofs;
5144 bool ok;
5145
5146 ots = arg_temp(op->args[0]);
5147 its = arg_temp(op->args[1]);
5148
5149 /* ENV should not be modified. */
5150 tcg_debug_assert(!temp_readonly(ots));
5151
5152 itype = its->type;
5153 vece = TCGOP_VECE(op);
5154 vtype = TCGOP_TYPE(op);
5155
5156 if (its->val_type == TEMP_VAL_CONST) {
5157 /* Propagate constant via movi -> dupi. */
5158 tcg_target_ulong val = dup_const(vece, its->val);
5159 if (IS_DEAD_ARG(1)) {
5160 temp_dead(s, its);
5161 }
5162 tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
5163 return;
5164 }
5165
5166 dup_args_ct = opcode_args_ct(op);
5167 dup_out_regs = dup_args_ct[0].regs;
5168 dup_in_regs = dup_args_ct[1].regs;
5169
5170 /* Allocate the output register now. */
5171 if (ots->val_type != TEMP_VAL_REG) {
5172 TCGRegSet allocated_regs = s->reserved_regs;
5173 TCGReg oreg;
5174
5175 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
5176 /* Make sure to not spill the input register. */
5177 tcg_regset_set_reg(allocated_regs, its->reg);
5178 }
5179 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5180 output_pref(op, 0), ots->indirect_base);
5181 set_temp_val_reg(s, ots, oreg);
5182 }
5183
5184 switch (its->val_type) {
5185 case TEMP_VAL_REG:
5186 /*
5187 * The dup constriaints must be broad, covering all possible VECE.
5188 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5189 * to fail, indicating that extra moves are required for that case.
5190 */
5191 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5192 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5193 goto done;
5194 }
5195 /* Try again from memory or a vector input register. */
5196 }
5197 if (!its->mem_coherent) {
5198 /*
5199 * The input register is not synced, and so an extra store
5200 * would be required to use memory. Attempt an integer-vector
5201 * register move first. We do not have a TCGRegSet for this.
5202 */
5203 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5204 break;
5205 }
5206 /* Sync the temp back to its slot and load from there. */
5207 temp_sync(s, its, s->reserved_regs, 0, 0);
5208 }
5209 /* fall through */
5210
5211 case TEMP_VAL_MEM:
5212 lowpart_ofs = 0;
5213 if (HOST_BIG_ENDIAN) {
5214 lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5215 }
5216 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5217 its->mem_offset + lowpart_ofs)) {
5218 goto done;
5219 }
5220 /* Load the input into the destination vector register. */
5221 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5222 break;
5223
5224 default:
5225 g_assert_not_reached();
5226 }
5227
5228 /* We now have a vector input register, so dup must succeed. */
5229 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5230 tcg_debug_assert(ok);
5231
5232 done:
5233 ots->mem_coherent = 0;
5234 if (IS_DEAD_ARG(1)) {
5235 temp_dead(s, its);
5236 }
5237 if (NEED_SYNC_ARG(0)) {
5238 temp_sync(s, ots, s->reserved_regs, 0, 0);
5239 }
5240 if (IS_DEAD_ARG(0)) {
5241 temp_dead(s, ots);
5242 }
5243 }
5244
tcg_reg_alloc_op(TCGContext * s,const TCGOp * op)5245 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5246 {
5247 const TCGLifeData arg_life = op->life;
5248 const TCGOpDef * const def = &tcg_op_defs[op->opc];
5249 TCGRegSet i_allocated_regs;
5250 TCGRegSet o_allocated_regs;
5251 int i, k, nb_iargs, nb_oargs;
5252 TCGReg reg;
5253 TCGArg arg;
5254 const TCGArgConstraint *args_ct;
5255 const TCGArgConstraint *arg_ct;
5256 TCGTemp *ts;
5257 TCGArg new_args[TCG_MAX_OP_ARGS];
5258 int const_args[TCG_MAX_OP_ARGS];
5259 TCGCond op_cond;
5260
5261 if (def->flags & TCG_OPF_CARRY_IN) {
5262 tcg_debug_assert(s->carry_live);
5263 }
5264
5265 nb_oargs = def->nb_oargs;
5266 nb_iargs = def->nb_iargs;
5267
5268 /* copy constants */
5269 memcpy(new_args + nb_oargs + nb_iargs,
5270 op->args + nb_oargs + nb_iargs,
5271 sizeof(TCGArg) * def->nb_cargs);
5272
5273 i_allocated_regs = s->reserved_regs;
5274 o_allocated_regs = s->reserved_regs;
5275
5276 switch (op->opc) {
5277 case INDEX_op_brcond:
5278 op_cond = op->args[2];
5279 break;
5280 case INDEX_op_setcond:
5281 case INDEX_op_negsetcond:
5282 case INDEX_op_cmp_vec:
5283 op_cond = op->args[3];
5284 break;
5285 case INDEX_op_brcond2_i32:
5286 op_cond = op->args[4];
5287 break;
5288 case INDEX_op_movcond:
5289 case INDEX_op_setcond2_i32:
5290 case INDEX_op_cmpsel_vec:
5291 op_cond = op->args[5];
5292 break;
5293 default:
5294 /* No condition within opcode. */
5295 op_cond = TCG_COND_ALWAYS;
5296 break;
5297 }
5298
5299 args_ct = opcode_args_ct(op);
5300
5301 /* satisfy input constraints */
5302 for (k = 0; k < nb_iargs; k++) {
5303 TCGRegSet i_preferred_regs, i_required_regs;
5304 bool allocate_new_reg, copyto_new_reg;
5305 TCGTemp *ts2;
5306 int i1, i2;
5307
5308 i = args_ct[nb_oargs + k].sort_index;
5309 arg = op->args[i];
5310 arg_ct = &args_ct[i];
5311 ts = arg_temp(arg);
5312
5313 if (ts->val_type == TEMP_VAL_CONST) {
5314 #ifdef TCG_REG_ZERO
5315 if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5316 /* Hardware zero register: indicate register via non-const. */
5317 const_args[i] = 0;
5318 new_args[i] = TCG_REG_ZERO;
5319 continue;
5320 }
5321 #endif
5322
5323 if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5324 op_cond, TCGOP_VECE(op))) {
5325 /* constant is OK for instruction */
5326 const_args[i] = 1;
5327 new_args[i] = ts->val;
5328 continue;
5329 }
5330 }
5331
5332 reg = ts->reg;
5333 i_preferred_regs = 0;
5334 i_required_regs = arg_ct->regs;
5335 allocate_new_reg = false;
5336 copyto_new_reg = false;
5337
5338 switch (arg_ct->pair) {
5339 case 0: /* not paired */
5340 if (arg_ct->ialias) {
5341 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5342
5343 /*
5344 * If the input is readonly, then it cannot also be an
5345 * output and aliased to itself. If the input is not
5346 * dead after the instruction, we must allocate a new
5347 * register and move it.
5348 */
5349 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5350 || args_ct[arg_ct->alias_index].newreg) {
5351 allocate_new_reg = true;
5352 } else if (ts->val_type == TEMP_VAL_REG) {
5353 /*
5354 * Check if the current register has already been
5355 * allocated for another input.
5356 */
5357 allocate_new_reg =
5358 tcg_regset_test_reg(i_allocated_regs, reg);
5359 }
5360 }
5361 if (!allocate_new_reg) {
5362 temp_load(s, ts, i_required_regs, i_allocated_regs,
5363 i_preferred_regs);
5364 reg = ts->reg;
5365 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5366 }
5367 if (allocate_new_reg) {
5368 /*
5369 * Allocate a new register matching the constraint
5370 * and move the temporary register into it.
5371 */
5372 temp_load(s, ts, tcg_target_available_regs[ts->type],
5373 i_allocated_regs, 0);
5374 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5375 i_preferred_regs, ts->indirect_base);
5376 copyto_new_reg = true;
5377 }
5378 break;
5379
5380 case 1:
5381 /* First of an input pair; if i1 == i2, the second is an output. */
5382 i1 = i;
5383 i2 = arg_ct->pair_index;
5384 ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5385
5386 /*
5387 * It is easier to default to allocating a new pair
5388 * and to identify a few cases where it's not required.
5389 */
5390 if (arg_ct->ialias) {
5391 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5392 if (IS_DEAD_ARG(i1) &&
5393 IS_DEAD_ARG(i2) &&
5394 !temp_readonly(ts) &&
5395 ts->val_type == TEMP_VAL_REG &&
5396 ts->reg < TCG_TARGET_NB_REGS - 1 &&
5397 tcg_regset_test_reg(i_required_regs, reg) &&
5398 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5399 !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5400 (ts2
5401 ? ts2->val_type == TEMP_VAL_REG &&
5402 ts2->reg == reg + 1 &&
5403 !temp_readonly(ts2)
5404 : s->reg_to_temp[reg + 1] == NULL)) {
5405 break;
5406 }
5407 } else {
5408 /* Without aliasing, the pair must also be an input. */
5409 tcg_debug_assert(ts2);
5410 if (ts->val_type == TEMP_VAL_REG &&
5411 ts2->val_type == TEMP_VAL_REG &&
5412 ts2->reg == reg + 1 &&
5413 tcg_regset_test_reg(i_required_regs, reg)) {
5414 break;
5415 }
5416 }
5417 reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5418 0, ts->indirect_base);
5419 goto do_pair;
5420
5421 case 2: /* pair second */
5422 reg = new_args[arg_ct->pair_index] + 1;
5423 goto do_pair;
5424
5425 case 3: /* ialias with second output, no first input */
5426 tcg_debug_assert(arg_ct->ialias);
5427 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5428
5429 if (IS_DEAD_ARG(i) &&
5430 !temp_readonly(ts) &&
5431 ts->val_type == TEMP_VAL_REG &&
5432 reg > 0 &&
5433 s->reg_to_temp[reg - 1] == NULL &&
5434 tcg_regset_test_reg(i_required_regs, reg) &&
5435 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5436 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5437 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5438 break;
5439 }
5440 reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5441 i_allocated_regs, 0,
5442 ts->indirect_base);
5443 tcg_regset_set_reg(i_allocated_regs, reg);
5444 reg += 1;
5445 goto do_pair;
5446
5447 do_pair:
5448 /*
5449 * If an aliased input is not dead after the instruction,
5450 * we must allocate a new register and move it.
5451 */
5452 if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5453 TCGRegSet t_allocated_regs = i_allocated_regs;
5454
5455 /*
5456 * Because of the alias, and the continued life, make sure
5457 * that the temp is somewhere *other* than the reg pair,
5458 * and we get a copy in reg.
5459 */
5460 tcg_regset_set_reg(t_allocated_regs, reg);
5461 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5462 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5463 /* If ts was already in reg, copy it somewhere else. */
5464 TCGReg nr;
5465 bool ok;
5466
5467 tcg_debug_assert(ts->kind != TEMP_FIXED);
5468 nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5469 t_allocated_regs, 0, ts->indirect_base);
5470 ok = tcg_out_mov(s, ts->type, nr, reg);
5471 tcg_debug_assert(ok);
5472
5473 set_temp_val_reg(s, ts, nr);
5474 } else {
5475 temp_load(s, ts, tcg_target_available_regs[ts->type],
5476 t_allocated_regs, 0);
5477 copyto_new_reg = true;
5478 }
5479 } else {
5480 /* Preferably allocate to reg, otherwise copy. */
5481 i_required_regs = (TCGRegSet)1 << reg;
5482 temp_load(s, ts, i_required_regs, i_allocated_regs,
5483 i_preferred_regs);
5484 copyto_new_reg = ts->reg != reg;
5485 }
5486 break;
5487
5488 default:
5489 g_assert_not_reached();
5490 }
5491
5492 if (copyto_new_reg) {
5493 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5494 /*
5495 * Cross register class move not supported. Sync the
5496 * temp back to its slot and load from there.
5497 */
5498 temp_sync(s, ts, i_allocated_regs, 0, 0);
5499 tcg_out_ld(s, ts->type, reg,
5500 ts->mem_base->reg, ts->mem_offset);
5501 }
5502 }
5503 new_args[i] = reg;
5504 const_args[i] = 0;
5505 tcg_regset_set_reg(i_allocated_regs, reg);
5506 }
5507
5508 /* mark dead temporaries and free the associated registers */
5509 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5510 if (IS_DEAD_ARG(i)) {
5511 temp_dead(s, arg_temp(op->args[i]));
5512 }
5513 }
5514
5515 if (def->flags & TCG_OPF_COND_BRANCH) {
5516 tcg_reg_alloc_cbranch(s, i_allocated_regs);
5517 } else if (def->flags & TCG_OPF_BB_END) {
5518 tcg_reg_alloc_bb_end(s, i_allocated_regs);
5519 } else {
5520 if (def->flags & TCG_OPF_CALL_CLOBBER) {
5521 assert_carry_dead(s);
5522 /* XXX: permit generic clobber register list ? */
5523 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5524 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5525 tcg_reg_free(s, i, i_allocated_regs);
5526 }
5527 }
5528 }
5529 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5530 /* sync globals if the op has side effects and might trigger
5531 an exception. */
5532 sync_globals(s, i_allocated_regs);
5533 }
5534
5535 /* satisfy the output constraints */
5536 for (k = 0; k < nb_oargs; k++) {
5537 i = args_ct[k].sort_index;
5538 arg = op->args[i];
5539 arg_ct = &args_ct[i];
5540 ts = arg_temp(arg);
5541
5542 /* ENV should not be modified. */
5543 tcg_debug_assert(!temp_readonly(ts));
5544
5545 switch (arg_ct->pair) {
5546 case 0: /* not paired */
5547 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5548 reg = new_args[arg_ct->alias_index];
5549 } else if (arg_ct->newreg) {
5550 reg = tcg_reg_alloc(s, arg_ct->regs,
5551 i_allocated_regs | o_allocated_regs,
5552 output_pref(op, k), ts->indirect_base);
5553 } else {
5554 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5555 output_pref(op, k), ts->indirect_base);
5556 }
5557 break;
5558
5559 case 1: /* first of pair */
5560 if (arg_ct->oalias) {
5561 reg = new_args[arg_ct->alias_index];
5562 } else if (arg_ct->newreg) {
5563 reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5564 i_allocated_regs | o_allocated_regs,
5565 output_pref(op, k),
5566 ts->indirect_base);
5567 } else {
5568 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5569 output_pref(op, k),
5570 ts->indirect_base);
5571 }
5572 break;
5573
5574 case 2: /* second of pair */
5575 if (arg_ct->oalias) {
5576 reg = new_args[arg_ct->alias_index];
5577 } else {
5578 reg = new_args[arg_ct->pair_index] + 1;
5579 }
5580 break;
5581
5582 case 3: /* first of pair, aliasing with a second input */
5583 tcg_debug_assert(!arg_ct->newreg);
5584 reg = new_args[arg_ct->pair_index] - 1;
5585 break;
5586
5587 default:
5588 g_assert_not_reached();
5589 }
5590 tcg_regset_set_reg(o_allocated_regs, reg);
5591 set_temp_val_reg(s, ts, reg);
5592 ts->mem_coherent = 0;
5593 new_args[i] = reg;
5594 }
5595 }
5596
5597 /* emit instruction */
5598 TCGType type = TCGOP_TYPE(op);
5599 switch (op->opc) {
5600 case INDEX_op_addc1o:
5601 tcg_out_set_carry(s);
5602 /* fall through */
5603 case INDEX_op_add:
5604 case INDEX_op_addcio:
5605 case INDEX_op_addco:
5606 case INDEX_op_and:
5607 case INDEX_op_andc:
5608 case INDEX_op_clz:
5609 case INDEX_op_ctz:
5610 case INDEX_op_divs:
5611 case INDEX_op_divu:
5612 case INDEX_op_eqv:
5613 case INDEX_op_mul:
5614 case INDEX_op_mulsh:
5615 case INDEX_op_muluh:
5616 case INDEX_op_nand:
5617 case INDEX_op_nor:
5618 case INDEX_op_or:
5619 case INDEX_op_orc:
5620 case INDEX_op_rems:
5621 case INDEX_op_remu:
5622 case INDEX_op_rotl:
5623 case INDEX_op_rotr:
5624 case INDEX_op_sar:
5625 case INDEX_op_shl:
5626 case INDEX_op_shr:
5627 case INDEX_op_xor:
5628 {
5629 const TCGOutOpBinary *out =
5630 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5631
5632 /* Constants should never appear in the first source operand. */
5633 tcg_debug_assert(!const_args[1]);
5634 if (const_args[2]) {
5635 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5636 } else {
5637 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5638 }
5639 }
5640 break;
5641
5642 case INDEX_op_sub:
5643 {
5644 const TCGOutOpSubtract *out = &outop_sub;
5645
5646 /*
5647 * Constants should never appear in the second source operand.
5648 * These are folded to add with negative constant.
5649 */
5650 tcg_debug_assert(!const_args[2]);
5651 if (const_args[1]) {
5652 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5653 } else {
5654 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5655 }
5656 }
5657 break;
5658
5659 case INDEX_op_subb1o:
5660 tcg_out_set_borrow(s);
5661 /* fall through */
5662 case INDEX_op_addci:
5663 case INDEX_op_subbi:
5664 case INDEX_op_subbio:
5665 case INDEX_op_subbo:
5666 {
5667 const TCGOutOpAddSubCarry *out =
5668 container_of(all_outop[op->opc], TCGOutOpAddSubCarry, base);
5669
5670 if (const_args[2]) {
5671 if (const_args[1]) {
5672 out->out_rii(s, type, new_args[0],
5673 new_args[1], new_args[2]);
5674 } else {
5675 out->out_rri(s, type, new_args[0],
5676 new_args[1], new_args[2]);
5677 }
5678 } else if (const_args[1]) {
5679 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5680 } else {
5681 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5682 }
5683 }
5684 break;
5685
5686 case INDEX_op_bswap64:
5687 case INDEX_op_ext_i32_i64:
5688 case INDEX_op_extu_i32_i64:
5689 case INDEX_op_extrl_i64_i32:
5690 case INDEX_op_extrh_i64_i32:
5691 assert(TCG_TARGET_REG_BITS == 64);
5692 /* fall through */
5693 case INDEX_op_ctpop:
5694 case INDEX_op_neg:
5695 case INDEX_op_not:
5696 {
5697 const TCGOutOpUnary *out =
5698 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5699
5700 /* Constants should have been folded. */
5701 tcg_debug_assert(!const_args[1]);
5702 out->out_rr(s, type, new_args[0], new_args[1]);
5703 }
5704 break;
5705
5706 case INDEX_op_bswap16:
5707 case INDEX_op_bswap32:
5708 {
5709 const TCGOutOpBswap *out =
5710 container_of(all_outop[op->opc], TCGOutOpBswap, base);
5711
5712 tcg_debug_assert(!const_args[1]);
5713 out->out_rr(s, type, new_args[0], new_args[1], new_args[2]);
5714 }
5715 break;
5716
5717 case INDEX_op_deposit:
5718 {
5719 const TCGOutOpDeposit *out = &outop_deposit;
5720
5721 if (const_args[2]) {
5722 tcg_debug_assert(!const_args[1]);
5723 out->out_rri(s, type, new_args[0], new_args[1],
5724 new_args[2], new_args[3], new_args[4]);
5725 } else if (const_args[1]) {
5726 tcg_debug_assert(new_args[1] == 0);
5727 tcg_debug_assert(!const_args[2]);
5728 out->out_rzr(s, type, new_args[0], new_args[2],
5729 new_args[3], new_args[4]);
5730 } else {
5731 out->out_rrr(s, type, new_args[0], new_args[1],
5732 new_args[2], new_args[3], new_args[4]);
5733 }
5734 }
5735 break;
5736
5737 case INDEX_op_divs2:
5738 case INDEX_op_divu2:
5739 {
5740 const TCGOutOpDivRem *out =
5741 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5742
5743 /* Only used by x86 and s390x, which use matching constraints. */
5744 tcg_debug_assert(new_args[0] == new_args[2]);
5745 tcg_debug_assert(new_args[1] == new_args[3]);
5746 tcg_debug_assert(!const_args[4]);
5747 out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5748 }
5749 break;
5750
5751 case INDEX_op_extract:
5752 case INDEX_op_sextract:
5753 {
5754 const TCGOutOpExtract *out =
5755 container_of(all_outop[op->opc], TCGOutOpExtract, base);
5756
5757 tcg_debug_assert(!const_args[1]);
5758 out->out_rr(s, type, new_args[0], new_args[1],
5759 new_args[2], new_args[3]);
5760 }
5761 break;
5762
5763 case INDEX_op_extract2:
5764 {
5765 const TCGOutOpExtract2 *out = &outop_extract2;
5766
5767 tcg_debug_assert(!const_args[1]);
5768 tcg_debug_assert(!const_args[2]);
5769 out->out_rrr(s, type, new_args[0], new_args[1],
5770 new_args[2], new_args[3]);
5771 }
5772 break;
5773
5774 case INDEX_op_ld8u:
5775 case INDEX_op_ld8s:
5776 case INDEX_op_ld16u:
5777 case INDEX_op_ld16s:
5778 case INDEX_op_ld32u:
5779 case INDEX_op_ld32s:
5780 case INDEX_op_ld:
5781 {
5782 const TCGOutOpLoad *out =
5783 container_of(all_outop[op->opc], TCGOutOpLoad, base);
5784
5785 tcg_debug_assert(!const_args[1]);
5786 out->out(s, type, new_args[0], new_args[1], new_args[2]);
5787 }
5788 break;
5789
5790 case INDEX_op_muls2:
5791 case INDEX_op_mulu2:
5792 {
5793 const TCGOutOpMul2 *out =
5794 container_of(all_outop[op->opc], TCGOutOpMul2, base);
5795
5796 tcg_debug_assert(!const_args[2]);
5797 tcg_debug_assert(!const_args[3]);
5798 out->out_rrrr(s, type, new_args[0], new_args[1],
5799 new_args[2], new_args[3]);
5800 }
5801 break;
5802
5803 case INDEX_op_st32:
5804 /* Use tcg_op_st w/ I32. */
5805 type = TCG_TYPE_I32;
5806 /* fall through */
5807 case INDEX_op_st:
5808 case INDEX_op_st8:
5809 case INDEX_op_st16:
5810 {
5811 const TCGOutOpStore *out =
5812 container_of(all_outop[op->opc], TCGOutOpStore, base);
5813
5814 if (const_args[0]) {
5815 out->out_i(s, type, new_args[0], new_args[1], new_args[2]);
5816 } else {
5817 out->out_r(s, type, new_args[0], new_args[1], new_args[2]);
5818 }
5819 }
5820 break;
5821
5822 case INDEX_op_qemu_ld:
5823 case INDEX_op_qemu_st:
5824 {
5825 const TCGOutOpQemuLdSt *out =
5826 container_of(all_outop[op->opc], TCGOutOpQemuLdSt, base);
5827
5828 out->out(s, type, new_args[0], new_args[1], new_args[2]);
5829 }
5830 break;
5831
5832 case INDEX_op_qemu_ld2:
5833 case INDEX_op_qemu_st2:
5834 {
5835 const TCGOutOpQemuLdSt2 *out =
5836 container_of(all_outop[op->opc], TCGOutOpQemuLdSt2, base);
5837
5838 out->out(s, type, new_args[0], new_args[1],
5839 new_args[2], new_args[3]);
5840 }
5841 break;
5842
5843 case INDEX_op_brcond:
5844 {
5845 const TCGOutOpBrcond *out = &outop_brcond;
5846 TCGCond cond = new_args[2];
5847 TCGLabel *label = arg_label(new_args[3]);
5848
5849 tcg_debug_assert(!const_args[0]);
5850 if (const_args[1]) {
5851 out->out_ri(s, type, cond, new_args[0], new_args[1], label);
5852 } else {
5853 out->out_rr(s, type, cond, new_args[0], new_args[1], label);
5854 }
5855 }
5856 break;
5857
5858 case INDEX_op_movcond:
5859 {
5860 const TCGOutOpMovcond *out = &outop_movcond;
5861 TCGCond cond = new_args[5];
5862
5863 tcg_debug_assert(!const_args[1]);
5864 out->out(s, type, cond, new_args[0],
5865 new_args[1], new_args[2], const_args[2],
5866 new_args[3], const_args[3],
5867 new_args[4], const_args[4]);
5868 }
5869 break;
5870
5871 case INDEX_op_setcond:
5872 case INDEX_op_negsetcond:
5873 {
5874 const TCGOutOpSetcond *out =
5875 container_of(all_outop[op->opc], TCGOutOpSetcond, base);
5876 TCGCond cond = new_args[3];
5877
5878 tcg_debug_assert(!const_args[1]);
5879 if (const_args[2]) {
5880 out->out_rri(s, type, cond,
5881 new_args[0], new_args[1], new_args[2]);
5882 } else {
5883 out->out_rrr(s, type, cond,
5884 new_args[0], new_args[1], new_args[2]);
5885 }
5886 }
5887 break;
5888
5889 #if TCG_TARGET_REG_BITS == 32
5890 case INDEX_op_brcond2_i32:
5891 {
5892 const TCGOutOpBrcond2 *out = &outop_brcond2;
5893 TCGCond cond = new_args[4];
5894 TCGLabel *label = arg_label(new_args[5]);
5895
5896 tcg_debug_assert(!const_args[0]);
5897 tcg_debug_assert(!const_args[1]);
5898 out->out(s, cond, new_args[0], new_args[1],
5899 new_args[2], const_args[2],
5900 new_args[3], const_args[3], label);
5901 }
5902 break;
5903 case INDEX_op_setcond2_i32:
5904 {
5905 const TCGOutOpSetcond2 *out = &outop_setcond2;
5906 TCGCond cond = new_args[5];
5907
5908 tcg_debug_assert(!const_args[1]);
5909 tcg_debug_assert(!const_args[2]);
5910 out->out(s, cond, new_args[0], new_args[1], new_args[2],
5911 new_args[3], const_args[3], new_args[4], const_args[4]);
5912 }
5913 break;
5914 #else
5915 case INDEX_op_brcond2_i32:
5916 case INDEX_op_setcond2_i32:
5917 g_assert_not_reached();
5918 #endif
5919
5920 case INDEX_op_goto_ptr:
5921 tcg_debug_assert(!const_args[0]);
5922 tcg_out_goto_ptr(s, new_args[0]);
5923 break;
5924
5925 default:
5926 tcg_debug_assert(def->flags & TCG_OPF_VECTOR);
5927 tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5928 TCGOP_VECE(op), new_args, const_args);
5929 break;
5930 }
5931
5932 if (def->flags & TCG_OPF_CARRY_IN) {
5933 s->carry_live = false;
5934 }
5935 if (def->flags & TCG_OPF_CARRY_OUT) {
5936 s->carry_live = true;
5937 }
5938
5939 /* move the outputs in the correct register if needed */
5940 for(i = 0; i < nb_oargs; i++) {
5941 ts = arg_temp(op->args[i]);
5942
5943 /* ENV should not be modified. */
5944 tcg_debug_assert(!temp_readonly(ts));
5945
5946 if (NEED_SYNC_ARG(i)) {
5947 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5948 } else if (IS_DEAD_ARG(i)) {
5949 temp_dead(s, ts);
5950 }
5951 }
5952 }
5953
tcg_reg_alloc_dup2(TCGContext * s,const TCGOp * op)5954 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5955 {
5956 const TCGLifeData arg_life = op->life;
5957 TCGTemp *ots, *itsl, *itsh;
5958 TCGType vtype = TCGOP_TYPE(op);
5959
5960 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5961 tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5962 tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5963
5964 ots = arg_temp(op->args[0]);
5965 itsl = arg_temp(op->args[1]);
5966 itsh = arg_temp(op->args[2]);
5967
5968 /* ENV should not be modified. */
5969 tcg_debug_assert(!temp_readonly(ots));
5970
5971 /* Allocate the output register now. */
5972 if (ots->val_type != TEMP_VAL_REG) {
5973 TCGRegSet allocated_regs = s->reserved_regs;
5974 TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5975 TCGReg oreg;
5976
5977 /* Make sure to not spill the input registers. */
5978 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5979 tcg_regset_set_reg(allocated_regs, itsl->reg);
5980 }
5981 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5982 tcg_regset_set_reg(allocated_regs, itsh->reg);
5983 }
5984
5985 oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5986 output_pref(op, 0), ots->indirect_base);
5987 set_temp_val_reg(s, ots, oreg);
5988 }
5989
5990 /* Promote dup2 of immediates to dupi_vec. */
5991 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5992 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5993 MemOp vece = MO_64;
5994
5995 if (val == dup_const(MO_8, val)) {
5996 vece = MO_8;
5997 } else if (val == dup_const(MO_16, val)) {
5998 vece = MO_16;
5999 } else if (val == dup_const(MO_32, val)) {
6000 vece = MO_32;
6001 }
6002
6003 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
6004 goto done;
6005 }
6006
6007 /* If the two inputs form one 64-bit value, try dupm_vec. */
6008 if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
6009 itsh->temp_subindex == !HOST_BIG_ENDIAN &&
6010 itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
6011 TCGTemp *its = itsl - HOST_BIG_ENDIAN;
6012
6013 temp_sync(s, its + 0, s->reserved_regs, 0, 0);
6014 temp_sync(s, its + 1, s->reserved_regs, 0, 0);
6015
6016 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
6017 its->mem_base->reg, its->mem_offset)) {
6018 goto done;
6019 }
6020 }
6021
6022 /* Fall back to generic expansion. */
6023 return false;
6024
6025 done:
6026 ots->mem_coherent = 0;
6027 if (IS_DEAD_ARG(1)) {
6028 temp_dead(s, itsl);
6029 }
6030 if (IS_DEAD_ARG(2)) {
6031 temp_dead(s, itsh);
6032 }
6033 if (NEED_SYNC_ARG(0)) {
6034 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
6035 } else if (IS_DEAD_ARG(0)) {
6036 temp_dead(s, ots);
6037 }
6038 return true;
6039 }
6040
load_arg_reg(TCGContext * s,TCGReg reg,TCGTemp * ts,TCGRegSet allocated_regs)6041 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
6042 TCGRegSet allocated_regs)
6043 {
6044 if (ts->val_type == TEMP_VAL_REG) {
6045 if (ts->reg != reg) {
6046 tcg_reg_free(s, reg, allocated_regs);
6047 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
6048 /*
6049 * Cross register class move not supported. Sync the
6050 * temp back to its slot and load from there.
6051 */
6052 temp_sync(s, ts, allocated_regs, 0, 0);
6053 tcg_out_ld(s, ts->type, reg,
6054 ts->mem_base->reg, ts->mem_offset);
6055 }
6056 }
6057 } else {
6058 TCGRegSet arg_set = 0;
6059
6060 tcg_reg_free(s, reg, allocated_regs);
6061 tcg_regset_set_reg(arg_set, reg);
6062 temp_load(s, ts, arg_set, allocated_regs, 0);
6063 }
6064 }
6065
load_arg_stk(TCGContext * s,unsigned arg_slot,TCGTemp * ts,TCGRegSet allocated_regs)6066 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
6067 TCGRegSet allocated_regs)
6068 {
6069 /*
6070 * When the destination is on the stack, load up the temp and store.
6071 * If there are many call-saved registers, the temp might live to
6072 * see another use; otherwise it'll be discarded.
6073 */
6074 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
6075 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
6076 arg_slot_stk_ofs(arg_slot));
6077 }
6078
load_arg_normal(TCGContext * s,const TCGCallArgumentLoc * l,TCGTemp * ts,TCGRegSet * allocated_regs)6079 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
6080 TCGTemp *ts, TCGRegSet *allocated_regs)
6081 {
6082 if (arg_slot_reg_p(l->arg_slot)) {
6083 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
6084 load_arg_reg(s, reg, ts, *allocated_regs);
6085 tcg_regset_set_reg(*allocated_regs, reg);
6086 } else {
6087 load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
6088 }
6089 }
6090
load_arg_ref(TCGContext * s,unsigned arg_slot,TCGReg ref_base,intptr_t ref_off,TCGRegSet * allocated_regs)6091 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
6092 intptr_t ref_off, TCGRegSet *allocated_regs)
6093 {
6094 TCGReg reg;
6095
6096 if (arg_slot_reg_p(arg_slot)) {
6097 reg = tcg_target_call_iarg_regs[arg_slot];
6098 tcg_reg_free(s, reg, *allocated_regs);
6099 tcg_out_addi_ptr(s, reg, ref_base, ref_off);
6100 tcg_regset_set_reg(*allocated_regs, reg);
6101 } else {
6102 reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
6103 *allocated_regs, 0, false);
6104 tcg_out_addi_ptr(s, reg, ref_base, ref_off);
6105 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
6106 arg_slot_stk_ofs(arg_slot));
6107 }
6108 }
6109
tcg_reg_alloc_call(TCGContext * s,TCGOp * op)6110 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
6111 {
6112 const int nb_oargs = TCGOP_CALLO(op);
6113 const int nb_iargs = TCGOP_CALLI(op);
6114 const TCGLifeData arg_life = op->life;
6115 const TCGHelperInfo *info = tcg_call_info(op);
6116 TCGRegSet allocated_regs = s->reserved_regs;
6117 int i;
6118
6119 /*
6120 * Move inputs into place in reverse order,
6121 * so that we place stacked arguments first.
6122 */
6123 for (i = nb_iargs - 1; i >= 0; --i) {
6124 const TCGCallArgumentLoc *loc = &info->in[i];
6125 TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
6126
6127 switch (loc->kind) {
6128 case TCG_CALL_ARG_NORMAL:
6129 case TCG_CALL_ARG_EXTEND_U:
6130 case TCG_CALL_ARG_EXTEND_S:
6131 load_arg_normal(s, loc, ts, &allocated_regs);
6132 break;
6133 case TCG_CALL_ARG_BY_REF:
6134 load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
6135 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
6136 arg_slot_stk_ofs(loc->ref_slot),
6137 &allocated_regs);
6138 break;
6139 case TCG_CALL_ARG_BY_REF_N:
6140 load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
6141 break;
6142 default:
6143 g_assert_not_reached();
6144 }
6145 }
6146
6147 /* Mark dead temporaries and free the associated registers. */
6148 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
6149 if (IS_DEAD_ARG(i)) {
6150 temp_dead(s, arg_temp(op->args[i]));
6151 }
6152 }
6153
6154 /* Clobber call registers. */
6155 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
6156 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
6157 tcg_reg_free(s, i, allocated_regs);
6158 }
6159 }
6160
6161 /*
6162 * Save globals if they might be written by the helper,
6163 * sync them if they might be read.
6164 */
6165 if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
6166 /* Nothing to do */
6167 } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
6168 sync_globals(s, allocated_regs);
6169 } else {
6170 save_globals(s, allocated_regs);
6171 }
6172
6173 /*
6174 * If the ABI passes a pointer to the returned struct as the first
6175 * argument, load that now. Pass a pointer to the output home slot.
6176 */
6177 if (info->out_kind == TCG_CALL_RET_BY_REF) {
6178 TCGTemp *ts = arg_temp(op->args[0]);
6179
6180 if (!ts->mem_allocated) {
6181 temp_allocate_frame(s, ts);
6182 }
6183 load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
6184 }
6185
6186 tcg_out_call(s, tcg_call_func(op), info);
6187
6188 /* Assign output registers and emit moves if needed. */
6189 switch (info->out_kind) {
6190 case TCG_CALL_RET_NORMAL:
6191 for (i = 0; i < nb_oargs; i++) {
6192 TCGTemp *ts = arg_temp(op->args[i]);
6193 TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
6194
6195 /* ENV should not be modified. */
6196 tcg_debug_assert(!temp_readonly(ts));
6197
6198 set_temp_val_reg(s, ts, reg);
6199 ts->mem_coherent = 0;
6200 }
6201 break;
6202
6203 case TCG_CALL_RET_BY_VEC:
6204 {
6205 TCGTemp *ts = arg_temp(op->args[0]);
6206
6207 tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
6208 tcg_debug_assert(ts->temp_subindex == 0);
6209 if (!ts->mem_allocated) {
6210 temp_allocate_frame(s, ts);
6211 }
6212 tcg_out_st(s, TCG_TYPE_V128,
6213 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6214 ts->mem_base->reg, ts->mem_offset);
6215 }
6216 /* fall through to mark all parts in memory */
6217
6218 case TCG_CALL_RET_BY_REF:
6219 /* The callee has performed a write through the reference. */
6220 for (i = 0; i < nb_oargs; i++) {
6221 TCGTemp *ts = arg_temp(op->args[i]);
6222 ts->val_type = TEMP_VAL_MEM;
6223 }
6224 break;
6225
6226 default:
6227 g_assert_not_reached();
6228 }
6229
6230 /* Flush or discard output registers as needed. */
6231 for (i = 0; i < nb_oargs; i++) {
6232 TCGTemp *ts = arg_temp(op->args[i]);
6233 if (NEED_SYNC_ARG(i)) {
6234 temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
6235 } else if (IS_DEAD_ARG(i)) {
6236 temp_dead(s, ts);
6237 }
6238 }
6239 }
6240
6241 /**
6242 * atom_and_align_for_opc:
6243 * @s: tcg context
6244 * @opc: memory operation code
6245 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
6246 * @allow_two_ops: true if we are prepared to issue two operations
6247 *
6248 * Return the alignment and atomicity to use for the inline fast path
6249 * for the given memory operation. The alignment may be larger than
6250 * that specified in @opc, and the correct alignment will be diagnosed
6251 * by the slow path helper.
6252 *
6253 * If @allow_two_ops, the host is prepared to test for 2x alignment,
6254 * and issue two loads or stores for subalignment.
6255 */
atom_and_align_for_opc(TCGContext * s,MemOp opc,MemOp host_atom,bool allow_two_ops)6256 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
6257 MemOp host_atom, bool allow_two_ops)
6258 {
6259 MemOp align = memop_alignment_bits(opc);
6260 MemOp size = opc & MO_SIZE;
6261 MemOp half = size ? size - 1 : 0;
6262 MemOp atom = opc & MO_ATOM_MASK;
6263 MemOp atmax;
6264
6265 switch (atom) {
6266 case MO_ATOM_NONE:
6267 /* The operation requires no specific atomicity. */
6268 atmax = MO_8;
6269 break;
6270
6271 case MO_ATOM_IFALIGN:
6272 atmax = size;
6273 break;
6274
6275 case MO_ATOM_IFALIGN_PAIR:
6276 atmax = half;
6277 break;
6278
6279 case MO_ATOM_WITHIN16:
6280 atmax = size;
6281 if (size == MO_128) {
6282 /* Misalignment implies !within16, and therefore no atomicity. */
6283 } else if (host_atom != MO_ATOM_WITHIN16) {
6284 /* The host does not implement within16, so require alignment. */
6285 align = MAX(align, size);
6286 }
6287 break;
6288
6289 case MO_ATOM_WITHIN16_PAIR:
6290 atmax = size;
6291 /*
6292 * Misalignment implies !within16, and therefore half atomicity.
6293 * Any host prepared for two operations can implement this with
6294 * half alignment.
6295 */
6296 if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
6297 align = MAX(align, half);
6298 }
6299 break;
6300
6301 case MO_ATOM_SUBALIGN:
6302 atmax = size;
6303 if (host_atom != MO_ATOM_SUBALIGN) {
6304 /* If unaligned but not odd, there are subobjects up to half. */
6305 if (allow_two_ops) {
6306 align = MAX(align, half);
6307 } else {
6308 align = MAX(align, size);
6309 }
6310 }
6311 break;
6312
6313 default:
6314 g_assert_not_reached();
6315 }
6316
6317 return (TCGAtomAlign){ .atom = atmax, .align = align };
6318 }
6319
6320 /*
6321 * Similarly for qemu_ld/st slow path helpers.
6322 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
6323 * using only the provided backend tcg_out_* functions.
6324 */
6325
tcg_out_helper_stk_ofs(TCGType type,unsigned slot)6326 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
6327 {
6328 int ofs = arg_slot_stk_ofs(slot);
6329
6330 /*
6331 * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not
6332 * require extension to uint64_t, adjust the address for uint32_t.
6333 */
6334 if (HOST_BIG_ENDIAN &&
6335 TCG_TARGET_REG_BITS == 64 &&
6336 type == TCG_TYPE_I32) {
6337 ofs += 4;
6338 }
6339 return ofs;
6340 }
6341
tcg_out_helper_load_slots(TCGContext * s,unsigned nmov,TCGMovExtend * mov,const TCGLdstHelperParam * parm)6342 static void tcg_out_helper_load_slots(TCGContext *s,
6343 unsigned nmov, TCGMovExtend *mov,
6344 const TCGLdstHelperParam *parm)
6345 {
6346 unsigned i;
6347 TCGReg dst3;
6348
6349 /*
6350 * Start from the end, storing to the stack first.
6351 * This frees those registers, so we need not consider overlap.
6352 */
6353 for (i = nmov; i-- > 0; ) {
6354 unsigned slot = mov[i].dst;
6355
6356 if (arg_slot_reg_p(slot)) {
6357 goto found_reg;
6358 }
6359
6360 TCGReg src = mov[i].src;
6361 TCGType dst_type = mov[i].dst_type;
6362 MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6363
6364 /* The argument is going onto the stack; extend into scratch. */
6365 if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
6366 tcg_debug_assert(parm->ntmp != 0);
6367 mov[i].dst = src = parm->tmp[0];
6368 tcg_out_movext1(s, &mov[i]);
6369 }
6370
6371 tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
6372 tcg_out_helper_stk_ofs(dst_type, slot));
6373 }
6374 return;
6375
6376 found_reg:
6377 /*
6378 * The remaining arguments are in registers.
6379 * Convert slot numbers to argument registers.
6380 */
6381 nmov = i + 1;
6382 for (i = 0; i < nmov; ++i) {
6383 mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
6384 }
6385
6386 switch (nmov) {
6387 case 4:
6388 /* The backend must have provided enough temps for the worst case. */
6389 tcg_debug_assert(parm->ntmp >= 2);
6390
6391 dst3 = mov[3].dst;
6392 for (unsigned j = 0; j < 3; ++j) {
6393 if (dst3 == mov[j].src) {
6394 /*
6395 * Conflict. Copy the source to a temporary, perform the
6396 * remaining moves, then the extension from our scratch
6397 * on the way out.
6398 */
6399 TCGReg scratch = parm->tmp[1];
6400
6401 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
6402 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
6403 tcg_out_movext1_new_src(s, &mov[3], scratch);
6404 break;
6405 }
6406 }
6407
6408 /* No conflicts: perform this move and continue. */
6409 tcg_out_movext1(s, &mov[3]);
6410 /* fall through */
6411
6412 case 3:
6413 tcg_out_movext3(s, mov, mov + 1, mov + 2,
6414 parm->ntmp ? parm->tmp[0] : -1);
6415 break;
6416 case 2:
6417 tcg_out_movext2(s, mov, mov + 1,
6418 parm->ntmp ? parm->tmp[0] : -1);
6419 break;
6420 case 1:
6421 tcg_out_movext1(s, mov);
6422 break;
6423 default:
6424 g_assert_not_reached();
6425 }
6426 }
6427
tcg_out_helper_load_imm(TCGContext * s,unsigned slot,TCGType type,tcg_target_long imm,const TCGLdstHelperParam * parm)6428 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
6429 TCGType type, tcg_target_long imm,
6430 const TCGLdstHelperParam *parm)
6431 {
6432 if (arg_slot_reg_p(slot)) {
6433 tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
6434 } else {
6435 int ofs = tcg_out_helper_stk_ofs(type, slot);
6436 if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
6437 tcg_debug_assert(parm->ntmp != 0);
6438 tcg_out_movi(s, type, parm->tmp[0], imm);
6439 tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
6440 }
6441 }
6442 }
6443
tcg_out_helper_load_common_args(TCGContext * s,const TCGLabelQemuLdst * ldst,const TCGLdstHelperParam * parm,const TCGHelperInfo * info,unsigned next_arg)6444 static void tcg_out_helper_load_common_args(TCGContext *s,
6445 const TCGLabelQemuLdst *ldst,
6446 const TCGLdstHelperParam *parm,
6447 const TCGHelperInfo *info,
6448 unsigned next_arg)
6449 {
6450 TCGMovExtend ptr_mov = {
6451 .dst_type = TCG_TYPE_PTR,
6452 .src_type = TCG_TYPE_PTR,
6453 .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6454 };
6455 const TCGCallArgumentLoc *loc = &info->in[0];
6456 TCGType type;
6457 unsigned slot;
6458 tcg_target_ulong imm;
6459
6460 /*
6461 * Handle env, which is always first.
6462 */
6463 ptr_mov.dst = loc->arg_slot;
6464 ptr_mov.src = TCG_AREG0;
6465 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6466
6467 /*
6468 * Handle oi.
6469 */
6470 imm = ldst->oi;
6471 loc = &info->in[next_arg];
6472 type = TCG_TYPE_I32;
6473 switch (loc->kind) {
6474 case TCG_CALL_ARG_NORMAL:
6475 break;
6476 case TCG_CALL_ARG_EXTEND_U:
6477 case TCG_CALL_ARG_EXTEND_S:
6478 /* No extension required for MemOpIdx. */
6479 tcg_debug_assert(imm <= INT32_MAX);
6480 type = TCG_TYPE_REG;
6481 break;
6482 default:
6483 g_assert_not_reached();
6484 }
6485 tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6486 next_arg++;
6487
6488 /*
6489 * Handle ra.
6490 */
6491 loc = &info->in[next_arg];
6492 slot = loc->arg_slot;
6493 if (parm->ra_gen) {
6494 int arg_reg = -1;
6495 TCGReg ra_reg;
6496
6497 if (arg_slot_reg_p(slot)) {
6498 arg_reg = tcg_target_call_iarg_regs[slot];
6499 }
6500 ra_reg = parm->ra_gen(s, ldst, arg_reg);
6501
6502 ptr_mov.dst = slot;
6503 ptr_mov.src = ra_reg;
6504 tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6505 } else {
6506 imm = (uintptr_t)ldst->raddr;
6507 tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6508 }
6509 }
6510
tcg_out_helper_add_mov(TCGMovExtend * mov,const TCGCallArgumentLoc * loc,TCGType dst_type,TCGType src_type,TCGReg lo,TCGReg hi)6511 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6512 const TCGCallArgumentLoc *loc,
6513 TCGType dst_type, TCGType src_type,
6514 TCGReg lo, TCGReg hi)
6515 {
6516 MemOp reg_mo;
6517
6518 if (dst_type <= TCG_TYPE_REG) {
6519 MemOp src_ext;
6520
6521 switch (loc->kind) {
6522 case TCG_CALL_ARG_NORMAL:
6523 src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6524 break;
6525 case TCG_CALL_ARG_EXTEND_U:
6526 dst_type = TCG_TYPE_REG;
6527 src_ext = MO_UL;
6528 break;
6529 case TCG_CALL_ARG_EXTEND_S:
6530 dst_type = TCG_TYPE_REG;
6531 src_ext = MO_SL;
6532 break;
6533 default:
6534 g_assert_not_reached();
6535 }
6536
6537 mov[0].dst = loc->arg_slot;
6538 mov[0].dst_type = dst_type;
6539 mov[0].src = lo;
6540 mov[0].src_type = src_type;
6541 mov[0].src_ext = src_ext;
6542 return 1;
6543 }
6544
6545 if (TCG_TARGET_REG_BITS == 32) {
6546 assert(dst_type == TCG_TYPE_I64);
6547 reg_mo = MO_32;
6548 } else {
6549 assert(dst_type == TCG_TYPE_I128);
6550 reg_mo = MO_64;
6551 }
6552
6553 mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6554 mov[0].src = lo;
6555 mov[0].dst_type = TCG_TYPE_REG;
6556 mov[0].src_type = TCG_TYPE_REG;
6557 mov[0].src_ext = reg_mo;
6558
6559 mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6560 mov[1].src = hi;
6561 mov[1].dst_type = TCG_TYPE_REG;
6562 mov[1].src_type = TCG_TYPE_REG;
6563 mov[1].src_ext = reg_mo;
6564
6565 return 2;
6566 }
6567
tcg_out_ld_helper_args(TCGContext * s,const TCGLabelQemuLdst * ldst,const TCGLdstHelperParam * parm)6568 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6569 const TCGLdstHelperParam *parm)
6570 {
6571 const TCGHelperInfo *info;
6572 const TCGCallArgumentLoc *loc;
6573 TCGMovExtend mov[2];
6574 unsigned next_arg, nmov;
6575 MemOp mop = get_memop(ldst->oi);
6576
6577 switch (mop & MO_SIZE) {
6578 case MO_8:
6579 case MO_16:
6580 case MO_32:
6581 info = &info_helper_ld32_mmu;
6582 break;
6583 case MO_64:
6584 info = &info_helper_ld64_mmu;
6585 break;
6586 case MO_128:
6587 info = &info_helper_ld128_mmu;
6588 break;
6589 default:
6590 g_assert_not_reached();
6591 }
6592
6593 /* Defer env argument. */
6594 next_arg = 1;
6595
6596 loc = &info->in[next_arg];
6597 if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6598 /*
6599 * 32-bit host with 32-bit guest: zero-extend the guest address
6600 * to 64-bits for the helper by storing the low part, then
6601 * load a zero for the high part.
6602 */
6603 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6604 TCG_TYPE_I32, TCG_TYPE_I32,
6605 ldst->addr_reg, -1);
6606 tcg_out_helper_load_slots(s, 1, mov, parm);
6607
6608 tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6609 TCG_TYPE_I32, 0, parm);
6610 next_arg += 2;
6611 } else {
6612 nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6613 ldst->addr_reg, -1);
6614 tcg_out_helper_load_slots(s, nmov, mov, parm);
6615 next_arg += nmov;
6616 }
6617
6618 switch (info->out_kind) {
6619 case TCG_CALL_RET_NORMAL:
6620 case TCG_CALL_RET_BY_VEC:
6621 break;
6622 case TCG_CALL_RET_BY_REF:
6623 /*
6624 * The return reference is in the first argument slot.
6625 * We need memory in which to return: re-use the top of stack.
6626 */
6627 {
6628 int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6629
6630 if (arg_slot_reg_p(0)) {
6631 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6632 TCG_REG_CALL_STACK, ofs_slot0);
6633 } else {
6634 tcg_debug_assert(parm->ntmp != 0);
6635 tcg_out_addi_ptr(s, parm->tmp[0],
6636 TCG_REG_CALL_STACK, ofs_slot0);
6637 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6638 TCG_REG_CALL_STACK, ofs_slot0);
6639 }
6640 }
6641 break;
6642 default:
6643 g_assert_not_reached();
6644 }
6645
6646 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6647 }
6648
tcg_out_ld_helper_ret(TCGContext * s,const TCGLabelQemuLdst * ldst,bool load_sign,const TCGLdstHelperParam * parm)6649 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6650 bool load_sign,
6651 const TCGLdstHelperParam *parm)
6652 {
6653 MemOp mop = get_memop(ldst->oi);
6654 TCGMovExtend mov[2];
6655 int ofs_slot0;
6656
6657 switch (ldst->type) {
6658 case TCG_TYPE_I64:
6659 if (TCG_TARGET_REG_BITS == 32) {
6660 break;
6661 }
6662 /* fall through */
6663
6664 case TCG_TYPE_I32:
6665 mov[0].dst = ldst->datalo_reg;
6666 mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6667 mov[0].dst_type = ldst->type;
6668 mov[0].src_type = TCG_TYPE_REG;
6669
6670 /*
6671 * If load_sign, then we allowed the helper to perform the
6672 * appropriate sign extension to tcg_target_ulong, and all
6673 * we need now is a plain move.
6674 *
6675 * If they do not, then we expect the relevant extension
6676 * instruction to be no more expensive than a move, and
6677 * we thus save the icache etc by only using one of two
6678 * helper functions.
6679 */
6680 if (load_sign || !(mop & MO_SIGN)) {
6681 if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6682 mov[0].src_ext = MO_32;
6683 } else {
6684 mov[0].src_ext = MO_64;
6685 }
6686 } else {
6687 mov[0].src_ext = mop & MO_SSIZE;
6688 }
6689 tcg_out_movext1(s, mov);
6690 return;
6691
6692 case TCG_TYPE_I128:
6693 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6694 ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6695 switch (TCG_TARGET_CALL_RET_I128) {
6696 case TCG_CALL_RET_NORMAL:
6697 break;
6698 case TCG_CALL_RET_BY_VEC:
6699 tcg_out_st(s, TCG_TYPE_V128,
6700 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6701 TCG_REG_CALL_STACK, ofs_slot0);
6702 /* fall through */
6703 case TCG_CALL_RET_BY_REF:
6704 tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6705 TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6706 tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6707 TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6708 return;
6709 default:
6710 g_assert_not_reached();
6711 }
6712 break;
6713
6714 default:
6715 g_assert_not_reached();
6716 }
6717
6718 mov[0].dst = ldst->datalo_reg;
6719 mov[0].src =
6720 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6721 mov[0].dst_type = TCG_TYPE_REG;
6722 mov[0].src_type = TCG_TYPE_REG;
6723 mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6724
6725 mov[1].dst = ldst->datahi_reg;
6726 mov[1].src =
6727 tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6728 mov[1].dst_type = TCG_TYPE_REG;
6729 mov[1].src_type = TCG_TYPE_REG;
6730 mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6731
6732 tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6733 }
6734
tcg_out_st_helper_args(TCGContext * s,const TCGLabelQemuLdst * ldst,const TCGLdstHelperParam * parm)6735 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6736 const TCGLdstHelperParam *parm)
6737 {
6738 const TCGHelperInfo *info;
6739 const TCGCallArgumentLoc *loc;
6740 TCGMovExtend mov[4];
6741 TCGType data_type;
6742 unsigned next_arg, nmov, n;
6743 MemOp mop = get_memop(ldst->oi);
6744
6745 switch (mop & MO_SIZE) {
6746 case MO_8:
6747 case MO_16:
6748 case MO_32:
6749 info = &info_helper_st32_mmu;
6750 data_type = TCG_TYPE_I32;
6751 break;
6752 case MO_64:
6753 info = &info_helper_st64_mmu;
6754 data_type = TCG_TYPE_I64;
6755 break;
6756 case MO_128:
6757 info = &info_helper_st128_mmu;
6758 data_type = TCG_TYPE_I128;
6759 break;
6760 default:
6761 g_assert_not_reached();
6762 }
6763
6764 /* Defer env argument. */
6765 next_arg = 1;
6766 nmov = 0;
6767
6768 /* Handle addr argument. */
6769 loc = &info->in[next_arg];
6770 tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6771 if (TCG_TARGET_REG_BITS == 32) {
6772 /*
6773 * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6774 * to 64-bits for the helper by storing the low part. Later,
6775 * after we have processed the register inputs, we will load a
6776 * zero for the high part.
6777 */
6778 tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6779 TCG_TYPE_I32, TCG_TYPE_I32,
6780 ldst->addr_reg, -1);
6781 next_arg += 2;
6782 nmov += 1;
6783 } else {
6784 n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6785 ldst->addr_reg, -1);
6786 next_arg += n;
6787 nmov += n;
6788 }
6789
6790 /* Handle data argument. */
6791 loc = &info->in[next_arg];
6792 switch (loc->kind) {
6793 case TCG_CALL_ARG_NORMAL:
6794 case TCG_CALL_ARG_EXTEND_U:
6795 case TCG_CALL_ARG_EXTEND_S:
6796 n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6797 ldst->datalo_reg, ldst->datahi_reg);
6798 next_arg += n;
6799 nmov += n;
6800 tcg_out_helper_load_slots(s, nmov, mov, parm);
6801 break;
6802
6803 case TCG_CALL_ARG_BY_REF:
6804 tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6805 tcg_debug_assert(data_type == TCG_TYPE_I128);
6806 tcg_out_st(s, TCG_TYPE_I64,
6807 HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6808 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6809 tcg_out_st(s, TCG_TYPE_I64,
6810 HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6811 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6812
6813 tcg_out_helper_load_slots(s, nmov, mov, parm);
6814
6815 if (arg_slot_reg_p(loc->arg_slot)) {
6816 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6817 TCG_REG_CALL_STACK,
6818 arg_slot_stk_ofs(loc->ref_slot));
6819 } else {
6820 tcg_debug_assert(parm->ntmp != 0);
6821 tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6822 arg_slot_stk_ofs(loc->ref_slot));
6823 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6824 TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6825 }
6826 next_arg += 2;
6827 break;
6828
6829 default:
6830 g_assert_not_reached();
6831 }
6832
6833 if (TCG_TARGET_REG_BITS == 32) {
6834 /* Zero extend the address by loading a zero for the high part. */
6835 loc = &info->in[1 + !HOST_BIG_ENDIAN];
6836 tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6837 }
6838
6839 tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6840 }
6841
tcg_gen_code(TCGContext * s,TranslationBlock * tb,uint64_t pc_start)6842 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6843 {
6844 int i, num_insns;
6845 TCGOp *op;
6846
6847 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6848 && qemu_log_in_addr_range(pc_start))) {
6849 FILE *logfile = qemu_log_trylock();
6850 if (logfile) {
6851 fprintf(logfile, "OP:\n");
6852 tcg_dump_ops(s, logfile, false);
6853 fprintf(logfile, "\n");
6854 qemu_log_unlock(logfile);
6855 }
6856 }
6857
6858 #ifdef CONFIG_DEBUG_TCG
6859 /* Ensure all labels referenced have been emitted. */
6860 {
6861 TCGLabel *l;
6862 bool error = false;
6863
6864 QSIMPLEQ_FOREACH(l, &s->labels, next) {
6865 if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6866 qemu_log_mask(CPU_LOG_TB_OP,
6867 "$L%d referenced but not present.\n", l->id);
6868 error = true;
6869 }
6870 }
6871 assert(!error);
6872 }
6873 #endif
6874
6875 /* Do not reuse any EBB that may be allocated within the TB. */
6876 tcg_temp_ebb_reset_freed(s);
6877
6878 tcg_optimize(s);
6879
6880 reachable_code_pass(s);
6881 liveness_pass_0(s);
6882 liveness_pass_1(s);
6883
6884 if (s->nb_indirects > 0) {
6885 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6886 && qemu_log_in_addr_range(pc_start))) {
6887 FILE *logfile = qemu_log_trylock();
6888 if (logfile) {
6889 fprintf(logfile, "OP before indirect lowering:\n");
6890 tcg_dump_ops(s, logfile, false);
6891 fprintf(logfile, "\n");
6892 qemu_log_unlock(logfile);
6893 }
6894 }
6895
6896 /* Replace indirect temps with direct temps. */
6897 if (liveness_pass_2(s)) {
6898 /* If changes were made, re-run liveness. */
6899 liveness_pass_1(s);
6900 }
6901 }
6902
6903 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6904 && qemu_log_in_addr_range(pc_start))) {
6905 FILE *logfile = qemu_log_trylock();
6906 if (logfile) {
6907 fprintf(logfile, "OP after optimization and liveness analysis:\n");
6908 tcg_dump_ops(s, logfile, true);
6909 fprintf(logfile, "\n");
6910 qemu_log_unlock(logfile);
6911 }
6912 }
6913
6914 /* Initialize goto_tb jump offsets. */
6915 tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6916 tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6917 tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6918 tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6919
6920 tcg_reg_alloc_start(s);
6921
6922 /*
6923 * Reset the buffer pointers when restarting after overflow.
6924 * TODO: Move this into translate-all.c with the rest of the
6925 * buffer management. Having only this done here is confusing.
6926 */
6927 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6928 s->code_ptr = s->code_buf;
6929 s->data_gen_ptr = NULL;
6930
6931 QSIMPLEQ_INIT(&s->ldst_labels);
6932 s->pool_labels = NULL;
6933
6934 s->gen_insn_data =
6935 tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * INSN_START_WORDS);
6936
6937 tcg_out_tb_start(s);
6938
6939 num_insns = -1;
6940 s->carry_live = false;
6941 QTAILQ_FOREACH(op, &s->ops, link) {
6942 TCGOpcode opc = op->opc;
6943
6944 switch (opc) {
6945 case INDEX_op_extrl_i64_i32:
6946 assert(TCG_TARGET_REG_BITS == 64);
6947 /*
6948 * If TCG_TYPE_I32 is represented in some canonical form,
6949 * e.g. zero or sign-extended, then emit as a unary op.
6950 * Otherwise we can treat this as a plain move.
6951 * If the output dies, treat this as a plain move, because
6952 * this will be implemented with a store.
6953 */
6954 if (TCG_TARGET_HAS_extr_i64_i32) {
6955 TCGLifeData arg_life = op->life;
6956 if (!IS_DEAD_ARG(0)) {
6957 goto do_default;
6958 }
6959 }
6960 /* fall through */
6961 case INDEX_op_mov:
6962 case INDEX_op_mov_vec:
6963 tcg_reg_alloc_mov(s, op);
6964 break;
6965 case INDEX_op_dup_vec:
6966 tcg_reg_alloc_dup(s, op);
6967 break;
6968 case INDEX_op_insn_start:
6969 assert_carry_dead(s);
6970 if (num_insns >= 0) {
6971 size_t off = tcg_current_code_size(s);
6972 s->gen_insn_end_off[num_insns] = off;
6973 /* Assert that we do not overflow our stored offset. */
6974 assert(s->gen_insn_end_off[num_insns] == off);
6975 }
6976 num_insns++;
6977 for (i = 0; i < INSN_START_WORDS; ++i) {
6978 s->gen_insn_data[num_insns * INSN_START_WORDS + i] =
6979 tcg_get_insn_start_param(op, i);
6980 }
6981 break;
6982 case INDEX_op_discard:
6983 temp_dead(s, arg_temp(op->args[0]));
6984 break;
6985 case INDEX_op_set_label:
6986 tcg_reg_alloc_bb_end(s, s->reserved_regs);
6987 tcg_out_label(s, arg_label(op->args[0]));
6988 break;
6989 case INDEX_op_call:
6990 assert_carry_dead(s);
6991 tcg_reg_alloc_call(s, op);
6992 break;
6993 case INDEX_op_exit_tb:
6994 tcg_out_exit_tb(s, op->args[0]);
6995 break;
6996 case INDEX_op_goto_tb:
6997 tcg_out_goto_tb(s, op->args[0]);
6998 break;
6999 case INDEX_op_br:
7000 tcg_out_br(s, arg_label(op->args[0]));
7001 break;
7002 case INDEX_op_mb:
7003 tcg_out_mb(s, op->args[0]);
7004 break;
7005 case INDEX_op_dup2_vec:
7006 if (tcg_reg_alloc_dup2(s, op)) {
7007 break;
7008 }
7009 /* fall through */
7010 default:
7011 do_default:
7012 /* Sanity check that we've not introduced any unhandled opcodes. */
7013 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
7014 TCGOP_FLAGS(op)));
7015 /* Note: in order to speed up the code, it would be much
7016 faster to have specialized register allocator functions for
7017 some common argument patterns */
7018 tcg_reg_alloc_op(s, op);
7019 break;
7020 }
7021 /* Test for (pending) buffer overflow. The assumption is that any
7022 one operation beginning below the high water mark cannot overrun
7023 the buffer completely. Thus we can test for overflow after
7024 generating code without having to check during generation. */
7025 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
7026 return -1;
7027 }
7028 /* Test for TB overflow, as seen by gen_insn_end_off. */
7029 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
7030 return -2;
7031 }
7032 }
7033 assert_carry_dead(s);
7034
7035 tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
7036 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
7037
7038 /* Generate TB finalization at the end of block */
7039 i = tcg_out_ldst_finalize(s);
7040 if (i < 0) {
7041 return i;
7042 }
7043 i = tcg_out_pool_finalize(s);
7044 if (i < 0) {
7045 return i;
7046 }
7047 if (!tcg_resolve_relocs(s)) {
7048 return -2;
7049 }
7050
7051 #ifndef CONFIG_TCG_INTERPRETER
7052 /* flush instruction cache */
7053 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
7054 (uintptr_t)s->code_buf,
7055 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
7056 #endif
7057
7058 return tcg_current_code_size(s);
7059 }
7060
7061 #ifdef ELF_HOST_MACHINE
7062 /* In order to use this feature, the backend needs to do three things:
7063
7064 (1) Define ELF_HOST_MACHINE to indicate both what value to
7065 put into the ELF image and to indicate support for the feature.
7066
7067 (2) Define tcg_register_jit. This should create a buffer containing
7068 the contents of a .debug_frame section that describes the post-
7069 prologue unwind info for the tcg machine.
7070
7071 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
7072 */
7073
7074 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
7075 typedef enum {
7076 JIT_NOACTION = 0,
7077 JIT_REGISTER_FN,
7078 JIT_UNREGISTER_FN
7079 } jit_actions_t;
7080
7081 struct jit_code_entry {
7082 struct jit_code_entry *next_entry;
7083 struct jit_code_entry *prev_entry;
7084 const void *symfile_addr;
7085 uint64_t symfile_size;
7086 };
7087
7088 struct jit_descriptor {
7089 uint32_t version;
7090 uint32_t action_flag;
7091 struct jit_code_entry *relevant_entry;
7092 struct jit_code_entry *first_entry;
7093 };
7094
7095 void __jit_debug_register_code(void) __attribute__((noinline));
__jit_debug_register_code(void)7096 void __jit_debug_register_code(void)
7097 {
7098 asm("");
7099 }
7100
7101 /* Must statically initialize the version, because GDB may check
7102 the version before we can set it. */
7103 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
7104
7105 /* End GDB interface. */
7106
find_string(const char * strtab,const char * str)7107 static int find_string(const char *strtab, const char *str)
7108 {
7109 const char *p = strtab + 1;
7110
7111 while (1) {
7112 if (strcmp(p, str) == 0) {
7113 return p - strtab;
7114 }
7115 p += strlen(p) + 1;
7116 }
7117 }
7118
tcg_register_jit_int(const void * buf_ptr,size_t buf_size,const void * debug_frame,size_t debug_frame_size)7119 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
7120 const void *debug_frame,
7121 size_t debug_frame_size)
7122 {
7123 struct __attribute__((packed)) DebugInfo {
7124 uint32_t len;
7125 uint16_t version;
7126 uint32_t abbrev;
7127 uint8_t ptr_size;
7128 uint8_t cu_die;
7129 uint16_t cu_lang;
7130 uintptr_t cu_low_pc;
7131 uintptr_t cu_high_pc;
7132 uint8_t fn_die;
7133 char fn_name[16];
7134 uintptr_t fn_low_pc;
7135 uintptr_t fn_high_pc;
7136 uint8_t cu_eoc;
7137 };
7138
7139 struct ElfImage {
7140 ElfW(Ehdr) ehdr;
7141 ElfW(Phdr) phdr;
7142 ElfW(Shdr) shdr[7];
7143 ElfW(Sym) sym[2];
7144 struct DebugInfo di;
7145 uint8_t da[24];
7146 char str[80];
7147 };
7148
7149 struct ElfImage *img;
7150
7151 static const struct ElfImage img_template = {
7152 .ehdr = {
7153 .e_ident[EI_MAG0] = ELFMAG0,
7154 .e_ident[EI_MAG1] = ELFMAG1,
7155 .e_ident[EI_MAG2] = ELFMAG2,
7156 .e_ident[EI_MAG3] = ELFMAG3,
7157 .e_ident[EI_CLASS] = ELF_CLASS,
7158 .e_ident[EI_DATA] = ELF_DATA,
7159 .e_ident[EI_VERSION] = EV_CURRENT,
7160 .e_type = ET_EXEC,
7161 .e_machine = ELF_HOST_MACHINE,
7162 .e_version = EV_CURRENT,
7163 .e_phoff = offsetof(struct ElfImage, phdr),
7164 .e_shoff = offsetof(struct ElfImage, shdr),
7165 .e_ehsize = sizeof(ElfW(Shdr)),
7166 .e_phentsize = sizeof(ElfW(Phdr)),
7167 .e_phnum = 1,
7168 .e_shentsize = sizeof(ElfW(Shdr)),
7169 .e_shnum = ARRAY_SIZE(img->shdr),
7170 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
7171 #ifdef ELF_HOST_FLAGS
7172 .e_flags = ELF_HOST_FLAGS,
7173 #endif
7174 #ifdef ELF_OSABI
7175 .e_ident[EI_OSABI] = ELF_OSABI,
7176 #endif
7177 },
7178 .phdr = {
7179 .p_type = PT_LOAD,
7180 .p_flags = PF_X,
7181 },
7182 .shdr = {
7183 [0] = { .sh_type = SHT_NULL },
7184 /* Trick: The contents of code_gen_buffer are not present in
7185 this fake ELF file; that got allocated elsewhere. Therefore
7186 we mark .text as SHT_NOBITS (similar to .bss) so that readers
7187 will not look for contents. We can record any address. */
7188 [1] = { /* .text */
7189 .sh_type = SHT_NOBITS,
7190 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
7191 },
7192 [2] = { /* .debug_info */
7193 .sh_type = SHT_PROGBITS,
7194 .sh_offset = offsetof(struct ElfImage, di),
7195 .sh_size = sizeof(struct DebugInfo),
7196 },
7197 [3] = { /* .debug_abbrev */
7198 .sh_type = SHT_PROGBITS,
7199 .sh_offset = offsetof(struct ElfImage, da),
7200 .sh_size = sizeof(img->da),
7201 },
7202 [4] = { /* .debug_frame */
7203 .sh_type = SHT_PROGBITS,
7204 .sh_offset = sizeof(struct ElfImage),
7205 },
7206 [5] = { /* .symtab */
7207 .sh_type = SHT_SYMTAB,
7208 .sh_offset = offsetof(struct ElfImage, sym),
7209 .sh_size = sizeof(img->sym),
7210 .sh_info = 1,
7211 .sh_link = ARRAY_SIZE(img->shdr) - 1,
7212 .sh_entsize = sizeof(ElfW(Sym)),
7213 },
7214 [6] = { /* .strtab */
7215 .sh_type = SHT_STRTAB,
7216 .sh_offset = offsetof(struct ElfImage, str),
7217 .sh_size = sizeof(img->str),
7218 }
7219 },
7220 .sym = {
7221 [1] = { /* code_gen_buffer */
7222 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
7223 .st_shndx = 1,
7224 }
7225 },
7226 .di = {
7227 .len = sizeof(struct DebugInfo) - 4,
7228 .version = 2,
7229 .ptr_size = sizeof(void *),
7230 .cu_die = 1,
7231 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
7232 .fn_die = 2,
7233 .fn_name = "code_gen_buffer"
7234 },
7235 .da = {
7236 1, /* abbrev number (the cu) */
7237 0x11, 1, /* DW_TAG_compile_unit, has children */
7238 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
7239 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
7240 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
7241 0, 0, /* end of abbrev */
7242 2, /* abbrev number (the fn) */
7243 0x2e, 0, /* DW_TAG_subprogram, no children */
7244 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
7245 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
7246 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
7247 0, 0, /* end of abbrev */
7248 0 /* no more abbrev */
7249 },
7250 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
7251 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
7252 };
7253
7254 /* We only need a single jit entry; statically allocate it. */
7255 static struct jit_code_entry one_entry;
7256
7257 uintptr_t buf = (uintptr_t)buf_ptr;
7258 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
7259 DebugFrameHeader *dfh;
7260
7261 img = g_malloc(img_size);
7262 *img = img_template;
7263
7264 img->phdr.p_vaddr = buf;
7265 img->phdr.p_paddr = buf;
7266 img->phdr.p_memsz = buf_size;
7267
7268 img->shdr[1].sh_name = find_string(img->str, ".text");
7269 img->shdr[1].sh_addr = buf;
7270 img->shdr[1].sh_size = buf_size;
7271
7272 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
7273 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
7274
7275 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
7276 img->shdr[4].sh_size = debug_frame_size;
7277
7278 img->shdr[5].sh_name = find_string(img->str, ".symtab");
7279 img->shdr[6].sh_name = find_string(img->str, ".strtab");
7280
7281 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
7282 img->sym[1].st_value = buf;
7283 img->sym[1].st_size = buf_size;
7284
7285 img->di.cu_low_pc = buf;
7286 img->di.cu_high_pc = buf + buf_size;
7287 img->di.fn_low_pc = buf;
7288 img->di.fn_high_pc = buf + buf_size;
7289
7290 dfh = (DebugFrameHeader *)(img + 1);
7291 memcpy(dfh, debug_frame, debug_frame_size);
7292 dfh->fde.func_start = buf;
7293 dfh->fde.func_len = buf_size;
7294
7295 #ifdef DEBUG_JIT
7296 /* Enable this block to be able to debug the ELF image file creation.
7297 One can use readelf, objdump, or other inspection utilities. */
7298 {
7299 g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
7300 FILE *f = fopen(jit, "w+b");
7301 if (f) {
7302 if (fwrite(img, img_size, 1, f) != img_size) {
7303 /* Avoid stupid unused return value warning for fwrite. */
7304 }
7305 fclose(f);
7306 }
7307 }
7308 #endif
7309
7310 one_entry.symfile_addr = img;
7311 one_entry.symfile_size = img_size;
7312
7313 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
7314 __jit_debug_descriptor.relevant_entry = &one_entry;
7315 __jit_debug_descriptor.first_entry = &one_entry;
7316 __jit_debug_register_code();
7317 }
7318 #else
7319 /* No support for the feature. Provide the entry point expected by exec.c,
7320 and implement the internal function we declared earlier. */
7321
tcg_register_jit_int(const void * buf,size_t size,const void * debug_frame,size_t debug_frame_size)7322 static void tcg_register_jit_int(const void *buf, size_t size,
7323 const void *debug_frame,
7324 size_t debug_frame_size)
7325 {
7326 }
7327
tcg_register_jit(const void * buf,size_t buf_size)7328 void tcg_register_jit(const void *buf, size_t buf_size)
7329 {
7330 }
7331 #endif /* ELF_HOST_MACHINE */
7332
7333 #if !TCG_TARGET_MAYBE_vec
tcg_expand_vec_op(TCGOpcode o,TCGType t,unsigned e,TCGArg a0,...)7334 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
7335 {
7336 g_assert_not_reached();
7337 }
7338 #endif
7339