xref: /openbmc/qemu/tcg/tcg.c (revision 51e47cf8)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/cacheflush.h"
38 #include "qemu/cacheinfo.h"
39 #include "qemu/timer.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg/tcg-temp-internal.h"
64 #include "tcg-internal.h"
65 #include "accel/tcg/perf.h"
66 
67 /* Forward declarations for functions declared in tcg-target.c.inc and
68    used here. */
69 static void tcg_target_init(TCGContext *s);
70 static void tcg_target_qemu_prologue(TCGContext *s);
71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
72                         intptr_t value, intptr_t addend);
73 
74 /* The CIE and FDE header definitions will be common to all hosts.  */
75 typedef struct {
76     uint32_t len __attribute__((aligned((sizeof(void *)))));
77     uint32_t id;
78     uint8_t version;
79     char augmentation[1];
80     uint8_t code_align;
81     uint8_t data_align;
82     uint8_t return_column;
83 } DebugFrameCIE;
84 
85 typedef struct QEMU_PACKED {
86     uint32_t len __attribute__((aligned((sizeof(void *)))));
87     uint32_t cie_offset;
88     uintptr_t func_start;
89     uintptr_t func_len;
90 } DebugFrameFDEHeader;
91 
92 typedef struct QEMU_PACKED {
93     DebugFrameCIE cie;
94     DebugFrameFDEHeader fde;
95 } DebugFrameHeader;
96 
97 typedef struct TCGLabelQemuLdst {
98     bool is_ld;             /* qemu_ld: true, qemu_st: false */
99     MemOpIdx oi;
100     TCGType type;           /* result type of a load */
101     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
102     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
103     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
104     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
105     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
106     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
107     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
108 } TCGLabelQemuLdst;
109 
110 static void tcg_register_jit_int(const void *buf, size_t size,
111                                  const void *debug_frame,
112                                  size_t debug_frame_size)
113     __attribute__((unused));
114 
115 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
116 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
117                        intptr_t arg2);
118 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_movi(TCGContext *s, TCGType type,
120                          TCGReg ret, tcg_target_long arg);
121 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
131 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
132 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
133 static void tcg_out_goto_tb(TCGContext *s, int which);
134 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
135                        const TCGArg args[TCG_MAX_OP_ARGS],
136                        const int const_args[TCG_MAX_OP_ARGS]);
137 #if TCG_TARGET_MAYBE_vec
138 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
139                             TCGReg dst, TCGReg src);
140 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
141                              TCGReg dst, TCGReg base, intptr_t offset);
142 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, int64_t arg);
144 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
145                            unsigned vecl, unsigned vece,
146                            const TCGArg args[TCG_MAX_OP_ARGS],
147                            const int const_args[TCG_MAX_OP_ARGS]);
148 #else
149 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
150                                    TCGReg dst, TCGReg src)
151 {
152     g_assert_not_reached();
153 }
154 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
155                                     TCGReg dst, TCGReg base, intptr_t offset)
156 {
157     g_assert_not_reached();
158 }
159 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
160                                     TCGReg dst, int64_t arg)
161 {
162     g_assert_not_reached();
163 }
164 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
165                                   unsigned vecl, unsigned vece,
166                                   const TCGArg args[TCG_MAX_OP_ARGS],
167                                   const int const_args[TCG_MAX_OP_ARGS])
168 {
169     g_assert_not_reached();
170 }
171 #endif
172 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
173                        intptr_t arg2);
174 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
175                         TCGReg base, intptr_t ofs);
176 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
177                          const TCGHelperInfo *info);
178 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
179 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
180 #ifdef TCG_TARGET_NEED_LDST_LABELS
181 static int tcg_out_ldst_finalize(TCGContext *s);
182 #endif
183 
184 TCGContext tcg_init_ctx;
185 __thread TCGContext *tcg_ctx;
186 
187 TCGContext **tcg_ctxs;
188 unsigned int tcg_cur_ctxs;
189 unsigned int tcg_max_ctxs;
190 TCGv_env cpu_env = 0;
191 const void *tcg_code_gen_epilogue;
192 uintptr_t tcg_splitwx_diff;
193 
194 #ifndef CONFIG_TCG_INTERPRETER
195 tcg_prologue_fn *tcg_qemu_tb_exec;
196 #endif
197 
198 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
199 static TCGRegSet tcg_target_call_clobber_regs;
200 
201 #if TCG_TARGET_INSN_UNIT_SIZE == 1
202 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
203 {
204     *s->code_ptr++ = v;
205 }
206 
207 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
208                                                       uint8_t v)
209 {
210     *p = v;
211 }
212 #endif
213 
214 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
215 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
216 {
217     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
218         *s->code_ptr++ = v;
219     } else {
220         tcg_insn_unit *p = s->code_ptr;
221         memcpy(p, &v, sizeof(v));
222         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
223     }
224 }
225 
226 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
227                                                        uint16_t v)
228 {
229     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
230         *p = v;
231     } else {
232         memcpy(p, &v, sizeof(v));
233     }
234 }
235 #endif
236 
237 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
238 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
239 {
240     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
241         *s->code_ptr++ = v;
242     } else {
243         tcg_insn_unit *p = s->code_ptr;
244         memcpy(p, &v, sizeof(v));
245         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
246     }
247 }
248 
249 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
250                                                        uint32_t v)
251 {
252     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
253         *p = v;
254     } else {
255         memcpy(p, &v, sizeof(v));
256     }
257 }
258 #endif
259 
260 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
261 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
262 {
263     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
264         *s->code_ptr++ = v;
265     } else {
266         tcg_insn_unit *p = s->code_ptr;
267         memcpy(p, &v, sizeof(v));
268         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
269     }
270 }
271 
272 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
273                                                        uint64_t v)
274 {
275     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
276         *p = v;
277     } else {
278         memcpy(p, &v, sizeof(v));
279     }
280 }
281 #endif
282 
283 /* label relocation processing */
284 
285 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
286                           TCGLabel *l, intptr_t addend)
287 {
288     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
289 
290     r->type = type;
291     r->ptr = code_ptr;
292     r->addend = addend;
293     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
294 }
295 
296 static void tcg_out_label(TCGContext *s, TCGLabel *l)
297 {
298     tcg_debug_assert(!l->has_value);
299     l->has_value = 1;
300     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
301 }
302 
303 TCGLabel *gen_new_label(void)
304 {
305     TCGContext *s = tcg_ctx;
306     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
307 
308     memset(l, 0, sizeof(TCGLabel));
309     l->id = s->nb_labels++;
310     QSIMPLEQ_INIT(&l->branches);
311     QSIMPLEQ_INIT(&l->relocs);
312 
313     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
314 
315     return l;
316 }
317 
318 static bool tcg_resolve_relocs(TCGContext *s)
319 {
320     TCGLabel *l;
321 
322     QSIMPLEQ_FOREACH(l, &s->labels, next) {
323         TCGRelocation *r;
324         uintptr_t value = l->u.value;
325 
326         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
327             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
328                 return false;
329             }
330         }
331     }
332     return true;
333 }
334 
335 static void set_jmp_reset_offset(TCGContext *s, int which)
336 {
337     /*
338      * We will check for overflow at the end of the opcode loop in
339      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
340      */
341     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
342 }
343 
344 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
345 {
346     /*
347      * We will check for overflow at the end of the opcode loop in
348      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
349      */
350     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
351 }
352 
353 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
354 {
355     /*
356      * Return the read-execute version of the pointer, for the benefit
357      * of any pc-relative addressing mode.
358      */
359     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
360 }
361 
362 /* Signal overflow, starting over with fewer guest insns. */
363 static G_NORETURN
364 void tcg_raise_tb_overflow(TCGContext *s)
365 {
366     siglongjmp(s->jmp_trans, -2);
367 }
368 
369 typedef struct TCGMovExtend {
370     TCGReg dst;
371     TCGReg src;
372     TCGType dst_type;
373     TCGType src_type;
374     MemOp src_ext;
375 } TCGMovExtend;
376 
377 /**
378  * tcg_out_movext -- move and extend
379  * @s: tcg context
380  * @dst_type: integral type for destination
381  * @dst: destination register
382  * @src_type: integral type for source
383  * @src_ext: extension to apply to source
384  * @src: source register
385  *
386  * Move or extend @src into @dst, depending on @src_ext and the types.
387  */
388 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
389                            TCGType src_type, MemOp src_ext, TCGReg src)
390 {
391     switch (src_ext) {
392     case MO_UB:
393         tcg_out_ext8u(s, dst, src);
394         break;
395     case MO_SB:
396         tcg_out_ext8s(s, dst_type, dst, src);
397         break;
398     case MO_UW:
399         tcg_out_ext16u(s, dst, src);
400         break;
401     case MO_SW:
402         tcg_out_ext16s(s, dst_type, dst, src);
403         break;
404     case MO_UL:
405     case MO_SL:
406         if (dst_type == TCG_TYPE_I32) {
407             if (src_type == TCG_TYPE_I32) {
408                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
409             } else {
410                 tcg_out_extrl_i64_i32(s, dst, src);
411             }
412         } else if (src_type == TCG_TYPE_I32) {
413             if (src_ext & MO_SIGN) {
414                 tcg_out_exts_i32_i64(s, dst, src);
415             } else {
416                 tcg_out_extu_i32_i64(s, dst, src);
417             }
418         } else {
419             if (src_ext & MO_SIGN) {
420                 tcg_out_ext32s(s, dst, src);
421             } else {
422                 tcg_out_ext32u(s, dst, src);
423             }
424         }
425         break;
426     case MO_UQ:
427         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
428         if (dst_type == TCG_TYPE_I32) {
429             tcg_out_extrl_i64_i32(s, dst, src);
430         } else {
431             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
432         }
433         break;
434     default:
435         g_assert_not_reached();
436     }
437 }
438 
439 /* Minor variations on a theme, using a structure. */
440 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
441                                     TCGReg src)
442 {
443     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
444 }
445 
446 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
447 {
448     tcg_out_movext1_new_src(s, i, i->src);
449 }
450 
451 /**
452  * tcg_out_movext2 -- move and extend two pair
453  * @s: tcg context
454  * @i1: first move description
455  * @i2: second move description
456  * @scratch: temporary register, or -1 for none
457  *
458  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
459  * between the sources and destinations.
460  */
461 
462 static void __attribute__((unused))
463 tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
464                 const TCGMovExtend *i2, int scratch)
465 {
466     TCGReg src1 = i1->src;
467     TCGReg src2 = i2->src;
468 
469     if (i1->dst != src2) {
470         tcg_out_movext1(s, i1);
471         tcg_out_movext1(s, i2);
472         return;
473     }
474     if (i2->dst == src1) {
475         TCGType src1_type = i1->src_type;
476         TCGType src2_type = i2->src_type;
477 
478         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
479             /* The data is now in the correct registers, now extend. */
480             src1 = i2->src;
481             src2 = i1->src;
482         } else {
483             tcg_debug_assert(scratch >= 0);
484             tcg_out_mov(s, src1_type, scratch, src1);
485             src1 = scratch;
486         }
487     }
488     tcg_out_movext1_new_src(s, i2, src2);
489     tcg_out_movext1_new_src(s, i1, src1);
490 }
491 
492 #define C_PFX1(P, A)                    P##A
493 #define C_PFX2(P, A, B)                 P##A##_##B
494 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
495 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
496 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
497 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
498 
499 /* Define an enumeration for the various combinations. */
500 
501 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
502 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
503 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
504 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
505 
506 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
507 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
508 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
509 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
510 
511 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
512 
513 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
514 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
515 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
516 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
517 
518 typedef enum {
519 #include "tcg-target-con-set.h"
520 } TCGConstraintSetIndex;
521 
522 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
523 
524 #undef C_O0_I1
525 #undef C_O0_I2
526 #undef C_O0_I3
527 #undef C_O0_I4
528 #undef C_O1_I1
529 #undef C_O1_I2
530 #undef C_O1_I3
531 #undef C_O1_I4
532 #undef C_N1_I2
533 #undef C_O2_I1
534 #undef C_O2_I2
535 #undef C_O2_I3
536 #undef C_O2_I4
537 
538 /* Put all of the constraint sets into an array, indexed by the enum. */
539 
540 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
541 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
542 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
543 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
544 
545 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
546 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
547 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
548 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
549 
550 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
551 
552 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
553 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
554 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
555 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
556 
557 static const TCGTargetOpDef constraint_sets[] = {
558 #include "tcg-target-con-set.h"
559 };
560 
561 
562 #undef C_O0_I1
563 #undef C_O0_I2
564 #undef C_O0_I3
565 #undef C_O0_I4
566 #undef C_O1_I1
567 #undef C_O1_I2
568 #undef C_O1_I3
569 #undef C_O1_I4
570 #undef C_N1_I2
571 #undef C_O2_I1
572 #undef C_O2_I2
573 #undef C_O2_I3
574 #undef C_O2_I4
575 
576 /* Expand the enumerator to be returned from tcg_target_op_def(). */
577 
578 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
579 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
580 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
581 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
582 
583 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
584 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
585 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
586 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
587 
588 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
589 
590 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
591 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
592 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
593 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
594 
595 #include "tcg-target.c.inc"
596 
597 static void alloc_tcg_plugin_context(TCGContext *s)
598 {
599 #ifdef CONFIG_PLUGIN
600     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
601     s->plugin_tb->insns =
602         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
603 #endif
604 }
605 
606 /*
607  * All TCG threads except the parent (i.e. the one that called tcg_context_init
608  * and registered the target's TCG globals) must register with this function
609  * before initiating translation.
610  *
611  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
612  * of tcg_region_init() for the reasoning behind this.
613  *
614  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
615  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
616  * is not used anymore for translation once this function is called.
617  *
618  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
619  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
620  */
621 #ifdef CONFIG_USER_ONLY
622 void tcg_register_thread(void)
623 {
624     tcg_ctx = &tcg_init_ctx;
625 }
626 #else
627 void tcg_register_thread(void)
628 {
629     TCGContext *s = g_malloc(sizeof(*s));
630     unsigned int i, n;
631 
632     *s = tcg_init_ctx;
633 
634     /* Relink mem_base.  */
635     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
636         if (tcg_init_ctx.temps[i].mem_base) {
637             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
638             tcg_debug_assert(b >= 0 && b < n);
639             s->temps[i].mem_base = &s->temps[b];
640         }
641     }
642 
643     /* Claim an entry in tcg_ctxs */
644     n = qatomic_fetch_inc(&tcg_cur_ctxs);
645     g_assert(n < tcg_max_ctxs);
646     qatomic_set(&tcg_ctxs[n], s);
647 
648     if (n > 0) {
649         alloc_tcg_plugin_context(s);
650         tcg_region_initial_alloc(s);
651     }
652 
653     tcg_ctx = s;
654 }
655 #endif /* !CONFIG_USER_ONLY */
656 
657 /* pool based memory allocation */
658 void *tcg_malloc_internal(TCGContext *s, int size)
659 {
660     TCGPool *p;
661     int pool_size;
662 
663     if (size > TCG_POOL_CHUNK_SIZE) {
664         /* big malloc: insert a new pool (XXX: could optimize) */
665         p = g_malloc(sizeof(TCGPool) + size);
666         p->size = size;
667         p->next = s->pool_first_large;
668         s->pool_first_large = p;
669         return p->data;
670     } else {
671         p = s->pool_current;
672         if (!p) {
673             p = s->pool_first;
674             if (!p)
675                 goto new_pool;
676         } else {
677             if (!p->next) {
678             new_pool:
679                 pool_size = TCG_POOL_CHUNK_SIZE;
680                 p = g_malloc(sizeof(TCGPool) + pool_size);
681                 p->size = pool_size;
682                 p->next = NULL;
683                 if (s->pool_current) {
684                     s->pool_current->next = p;
685                 } else {
686                     s->pool_first = p;
687                 }
688             } else {
689                 p = p->next;
690             }
691         }
692     }
693     s->pool_current = p;
694     s->pool_cur = p->data + size;
695     s->pool_end = p->data + p->size;
696     return p->data;
697 }
698 
699 void tcg_pool_reset(TCGContext *s)
700 {
701     TCGPool *p, *t;
702     for (p = s->pool_first_large; p; p = t) {
703         t = p->next;
704         g_free(p);
705     }
706     s->pool_first_large = NULL;
707     s->pool_cur = s->pool_end = NULL;
708     s->pool_current = NULL;
709 }
710 
711 #include "exec/helper-proto.h"
712 
713 static TCGHelperInfo all_helpers[] = {
714 #include "exec/helper-tcg.h"
715 };
716 static GHashTable *helper_table;
717 
718 #ifdef CONFIG_TCG_INTERPRETER
719 static ffi_type *typecode_to_ffi(int argmask)
720 {
721     /*
722      * libffi does not support __int128_t, so we have forced Int128
723      * to use the structure definition instead of the builtin type.
724      */
725     static ffi_type *ffi_type_i128_elements[3] = {
726         &ffi_type_uint64,
727         &ffi_type_uint64,
728         NULL
729     };
730     static ffi_type ffi_type_i128 = {
731         .size = 16,
732         .alignment = __alignof__(Int128),
733         .type = FFI_TYPE_STRUCT,
734         .elements = ffi_type_i128_elements,
735     };
736 
737     switch (argmask) {
738     case dh_typecode_void:
739         return &ffi_type_void;
740     case dh_typecode_i32:
741         return &ffi_type_uint32;
742     case dh_typecode_s32:
743         return &ffi_type_sint32;
744     case dh_typecode_i64:
745         return &ffi_type_uint64;
746     case dh_typecode_s64:
747         return &ffi_type_sint64;
748     case dh_typecode_ptr:
749         return &ffi_type_pointer;
750     case dh_typecode_i128:
751         return &ffi_type_i128;
752     }
753     g_assert_not_reached();
754 }
755 
756 static void init_ffi_layouts(void)
757 {
758     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
759     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
760 
761     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
762         TCGHelperInfo *info = &all_helpers[i];
763         unsigned typemask = info->typemask;
764         gpointer hash = (gpointer)(uintptr_t)typemask;
765         struct {
766             ffi_cif cif;
767             ffi_type *args[];
768         } *ca;
769         ffi_status status;
770         int nargs;
771         ffi_cif *cif;
772 
773         cif = g_hash_table_lookup(ffi_table, hash);
774         if (cif) {
775             info->cif = cif;
776             continue;
777         }
778 
779         /* Ignoring the return type, find the last non-zero field. */
780         nargs = 32 - clz32(typemask >> 3);
781         nargs = DIV_ROUND_UP(nargs, 3);
782         assert(nargs <= MAX_CALL_IARGS);
783 
784         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
785         ca->cif.rtype = typecode_to_ffi(typemask & 7);
786         ca->cif.nargs = nargs;
787 
788         if (nargs != 0) {
789             ca->cif.arg_types = ca->args;
790             for (int j = 0; j < nargs; ++j) {
791                 int typecode = extract32(typemask, (j + 1) * 3, 3);
792                 ca->args[j] = typecode_to_ffi(typecode);
793             }
794         }
795 
796         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
797                               ca->cif.rtype, ca->cif.arg_types);
798         assert(status == FFI_OK);
799 
800         cif = &ca->cif;
801         info->cif = cif;
802         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
803     }
804 
805     g_hash_table_destroy(ffi_table);
806 }
807 #endif /* CONFIG_TCG_INTERPRETER */
808 
809 static inline bool arg_slot_reg_p(unsigned arg_slot)
810 {
811     /*
812      * Split the sizeof away from the comparison to avoid Werror from
813      * "unsigned < 0 is always false", when iarg_regs is empty.
814      */
815     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
816     return arg_slot < nreg;
817 }
818 
819 static inline int arg_slot_stk_ofs(unsigned arg_slot)
820 {
821     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
822     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
823 
824     tcg_debug_assert(stk_slot < max);
825     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
826 }
827 
828 typedef struct TCGCumulativeArgs {
829     int arg_idx;                /* tcg_gen_callN args[] */
830     int info_in_idx;            /* TCGHelperInfo in[] */
831     int arg_slot;               /* regs+stack slot */
832     int ref_slot;               /* stack slots for references */
833 } TCGCumulativeArgs;
834 
835 static void layout_arg_even(TCGCumulativeArgs *cum)
836 {
837     cum->arg_slot += cum->arg_slot & 1;
838 }
839 
840 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
841                          TCGCallArgumentKind kind)
842 {
843     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
844 
845     *loc = (TCGCallArgumentLoc){
846         .kind = kind,
847         .arg_idx = cum->arg_idx,
848         .arg_slot = cum->arg_slot,
849     };
850     cum->info_in_idx++;
851     cum->arg_slot++;
852 }
853 
854 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
855                                 TCGHelperInfo *info, int n)
856 {
857     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
858 
859     for (int i = 0; i < n; ++i) {
860         /* Layout all using the same arg_idx, adjusting the subindex. */
861         loc[i] = (TCGCallArgumentLoc){
862             .kind = TCG_CALL_ARG_NORMAL,
863             .arg_idx = cum->arg_idx,
864             .tmp_subindex = i,
865             .arg_slot = cum->arg_slot + i,
866         };
867     }
868     cum->info_in_idx += n;
869     cum->arg_slot += n;
870 }
871 
872 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
873 {
874     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
875     int n = 128 / TCG_TARGET_REG_BITS;
876 
877     /* The first subindex carries the pointer. */
878     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
879 
880     /*
881      * The callee is allowed to clobber memory associated with
882      * structure pass by-reference.  Therefore we must make copies.
883      * Allocate space from "ref_slot", which will be adjusted to
884      * follow the parameters on the stack.
885      */
886     loc[0].ref_slot = cum->ref_slot;
887 
888     /*
889      * Subsequent words also go into the reference slot, but
890      * do not accumulate into the regular arguments.
891      */
892     for (int i = 1; i < n; ++i) {
893         loc[i] = (TCGCallArgumentLoc){
894             .kind = TCG_CALL_ARG_BY_REF_N,
895             .arg_idx = cum->arg_idx,
896             .tmp_subindex = i,
897             .ref_slot = cum->ref_slot + i,
898         };
899     }
900     cum->info_in_idx += n;
901     cum->ref_slot += n;
902 }
903 
904 static void init_call_layout(TCGHelperInfo *info)
905 {
906     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
907     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
908     unsigned typemask = info->typemask;
909     unsigned typecode;
910     TCGCumulativeArgs cum = { };
911 
912     /*
913      * Parse and place any function return value.
914      */
915     typecode = typemask & 7;
916     switch (typecode) {
917     case dh_typecode_void:
918         info->nr_out = 0;
919         break;
920     case dh_typecode_i32:
921     case dh_typecode_s32:
922     case dh_typecode_ptr:
923         info->nr_out = 1;
924         info->out_kind = TCG_CALL_RET_NORMAL;
925         break;
926     case dh_typecode_i64:
927     case dh_typecode_s64:
928         info->nr_out = 64 / TCG_TARGET_REG_BITS;
929         info->out_kind = TCG_CALL_RET_NORMAL;
930         /* Query the last register now to trigger any assert early. */
931         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
932         break;
933     case dh_typecode_i128:
934         info->nr_out = 128 / TCG_TARGET_REG_BITS;
935         info->out_kind = TCG_TARGET_CALL_RET_I128;
936         switch (TCG_TARGET_CALL_RET_I128) {
937         case TCG_CALL_RET_NORMAL:
938             /* Query the last register now to trigger any assert early. */
939             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
940             break;
941         case TCG_CALL_RET_BY_VEC:
942             /* Query the single register now to trigger any assert early. */
943             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
944             break;
945         case TCG_CALL_RET_BY_REF:
946             /*
947              * Allocate the first argument to the output.
948              * We don't need to store this anywhere, just make it
949              * unavailable for use in the input loop below.
950              */
951             cum.arg_slot = 1;
952             break;
953         default:
954             qemu_build_not_reached();
955         }
956         break;
957     default:
958         g_assert_not_reached();
959     }
960 
961     /*
962      * Parse and place function arguments.
963      */
964     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
965         TCGCallArgumentKind kind;
966         TCGType type;
967 
968         typecode = typemask & 7;
969         switch (typecode) {
970         case dh_typecode_i32:
971         case dh_typecode_s32:
972             type = TCG_TYPE_I32;
973             break;
974         case dh_typecode_i64:
975         case dh_typecode_s64:
976             type = TCG_TYPE_I64;
977             break;
978         case dh_typecode_ptr:
979             type = TCG_TYPE_PTR;
980             break;
981         case dh_typecode_i128:
982             type = TCG_TYPE_I128;
983             break;
984         default:
985             g_assert_not_reached();
986         }
987 
988         switch (type) {
989         case TCG_TYPE_I32:
990             switch (TCG_TARGET_CALL_ARG_I32) {
991             case TCG_CALL_ARG_EVEN:
992                 layout_arg_even(&cum);
993                 /* fall through */
994             case TCG_CALL_ARG_NORMAL:
995                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
996                 break;
997             case TCG_CALL_ARG_EXTEND:
998                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
999                 layout_arg_1(&cum, info, kind);
1000                 break;
1001             default:
1002                 qemu_build_not_reached();
1003             }
1004             break;
1005 
1006         case TCG_TYPE_I64:
1007             switch (TCG_TARGET_CALL_ARG_I64) {
1008             case TCG_CALL_ARG_EVEN:
1009                 layout_arg_even(&cum);
1010                 /* fall through */
1011             case TCG_CALL_ARG_NORMAL:
1012                 if (TCG_TARGET_REG_BITS == 32) {
1013                     layout_arg_normal_n(&cum, info, 2);
1014                 } else {
1015                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1016                 }
1017                 break;
1018             default:
1019                 qemu_build_not_reached();
1020             }
1021             break;
1022 
1023         case TCG_TYPE_I128:
1024             switch (TCG_TARGET_CALL_ARG_I128) {
1025             case TCG_CALL_ARG_EVEN:
1026                 layout_arg_even(&cum);
1027                 /* fall through */
1028             case TCG_CALL_ARG_NORMAL:
1029                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1030                 break;
1031             case TCG_CALL_ARG_BY_REF:
1032                 layout_arg_by_ref(&cum, info);
1033                 break;
1034             default:
1035                 qemu_build_not_reached();
1036             }
1037             break;
1038 
1039         default:
1040             g_assert_not_reached();
1041         }
1042     }
1043     info->nr_in = cum.info_in_idx;
1044 
1045     /* Validate that we didn't overrun the input array. */
1046     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1047     /* Validate the backend has enough argument space. */
1048     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1049 
1050     /*
1051      * Relocate the "ref_slot" area to the end of the parameters.
1052      * Minimizing this stack offset helps code size for x86,
1053      * which has a signed 8-bit offset encoding.
1054      */
1055     if (cum.ref_slot != 0) {
1056         int ref_base = 0;
1057 
1058         if (cum.arg_slot > max_reg_slots) {
1059             int align = __alignof(Int128) / sizeof(tcg_target_long);
1060 
1061             ref_base = cum.arg_slot - max_reg_slots;
1062             if (align > 1) {
1063                 ref_base = ROUND_UP(ref_base, align);
1064             }
1065         }
1066         assert(ref_base + cum.ref_slot <= max_stk_slots);
1067         ref_base += max_reg_slots;
1068 
1069         if (ref_base != 0) {
1070             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1071                 TCGCallArgumentLoc *loc = &info->in[i];
1072                 switch (loc->kind) {
1073                 case TCG_CALL_ARG_BY_REF:
1074                 case TCG_CALL_ARG_BY_REF_N:
1075                     loc->ref_slot += ref_base;
1076                     break;
1077                 default:
1078                     break;
1079                 }
1080             }
1081         }
1082     }
1083 }
1084 
1085 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1086 static void process_op_defs(TCGContext *s);
1087 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1088                                             TCGReg reg, const char *name);
1089 
1090 static void tcg_context_init(unsigned max_cpus)
1091 {
1092     TCGContext *s = &tcg_init_ctx;
1093     int op, total_args, n, i;
1094     TCGOpDef *def;
1095     TCGArgConstraint *args_ct;
1096     TCGTemp *ts;
1097 
1098     memset(s, 0, sizeof(*s));
1099     s->nb_globals = 0;
1100 
1101     /* Count total number of arguments and allocate the corresponding
1102        space */
1103     total_args = 0;
1104     for(op = 0; op < NB_OPS; op++) {
1105         def = &tcg_op_defs[op];
1106         n = def->nb_iargs + def->nb_oargs;
1107         total_args += n;
1108     }
1109 
1110     args_ct = g_new0(TCGArgConstraint, total_args);
1111 
1112     for(op = 0; op < NB_OPS; op++) {
1113         def = &tcg_op_defs[op];
1114         def->args_ct = args_ct;
1115         n = def->nb_iargs + def->nb_oargs;
1116         args_ct += n;
1117     }
1118 
1119     /* Register helpers.  */
1120     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1121     helper_table = g_hash_table_new(NULL, NULL);
1122 
1123     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1124         init_call_layout(&all_helpers[i]);
1125         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1126                             (gpointer)&all_helpers[i]);
1127     }
1128 
1129 #ifdef CONFIG_TCG_INTERPRETER
1130     init_ffi_layouts();
1131 #endif
1132 
1133     tcg_target_init(s);
1134     process_op_defs(s);
1135 
1136     /* Reverse the order of the saved registers, assuming they're all at
1137        the start of tcg_target_reg_alloc_order.  */
1138     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1139         int r = tcg_target_reg_alloc_order[n];
1140         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1141             break;
1142         }
1143     }
1144     for (i = 0; i < n; ++i) {
1145         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1146     }
1147     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1148         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1149     }
1150 
1151     alloc_tcg_plugin_context(s);
1152 
1153     tcg_ctx = s;
1154     /*
1155      * In user-mode we simply share the init context among threads, since we
1156      * use a single region. See the documentation tcg_region_init() for the
1157      * reasoning behind this.
1158      * In softmmu we will have at most max_cpus TCG threads.
1159      */
1160 #ifdef CONFIG_USER_ONLY
1161     tcg_ctxs = &tcg_ctx;
1162     tcg_cur_ctxs = 1;
1163     tcg_max_ctxs = 1;
1164 #else
1165     tcg_max_ctxs = max_cpus;
1166     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1167 #endif
1168 
1169     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1170     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1171     cpu_env = temp_tcgv_ptr(ts);
1172 }
1173 
1174 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1175 {
1176     tcg_context_init(max_cpus);
1177     tcg_region_init(tb_size, splitwx, max_cpus);
1178 }
1179 
1180 /*
1181  * Allocate TBs right before their corresponding translated code, making
1182  * sure that TBs and code are on different cache lines.
1183  */
1184 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1185 {
1186     uintptr_t align = qemu_icache_linesize;
1187     TranslationBlock *tb;
1188     void *next;
1189 
1190  retry:
1191     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1192     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1193 
1194     if (unlikely(next > s->code_gen_highwater)) {
1195         if (tcg_region_alloc(s)) {
1196             return NULL;
1197         }
1198         goto retry;
1199     }
1200     qatomic_set(&s->code_gen_ptr, next);
1201     s->data_gen_ptr = NULL;
1202     return tb;
1203 }
1204 
1205 void tcg_prologue_init(TCGContext *s)
1206 {
1207     size_t prologue_size;
1208 
1209     s->code_ptr = s->code_gen_ptr;
1210     s->code_buf = s->code_gen_ptr;
1211     s->data_gen_ptr = NULL;
1212 
1213 #ifndef CONFIG_TCG_INTERPRETER
1214     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1215 #endif
1216 
1217 #ifdef TCG_TARGET_NEED_POOL_LABELS
1218     s->pool_labels = NULL;
1219 #endif
1220 
1221     qemu_thread_jit_write();
1222     /* Generate the prologue.  */
1223     tcg_target_qemu_prologue(s);
1224 
1225 #ifdef TCG_TARGET_NEED_POOL_LABELS
1226     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1227     {
1228         int result = tcg_out_pool_finalize(s);
1229         tcg_debug_assert(result == 0);
1230     }
1231 #endif
1232 
1233     prologue_size = tcg_current_code_size(s);
1234     perf_report_prologue(s->code_gen_ptr, prologue_size);
1235 
1236 #ifndef CONFIG_TCG_INTERPRETER
1237     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1238                         (uintptr_t)s->code_buf, prologue_size);
1239 #endif
1240 
1241 #ifdef DEBUG_DISAS
1242     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1243         FILE *logfile = qemu_log_trylock();
1244         if (logfile) {
1245             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1246             if (s->data_gen_ptr) {
1247                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1248                 size_t data_size = prologue_size - code_size;
1249                 size_t i;
1250 
1251                 disas(logfile, s->code_gen_ptr, code_size);
1252 
1253                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1254                     if (sizeof(tcg_target_ulong) == 8) {
1255                         fprintf(logfile,
1256                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1257                                 (uintptr_t)s->data_gen_ptr + i,
1258                                 *(uint64_t *)(s->data_gen_ptr + i));
1259                     } else {
1260                         fprintf(logfile,
1261                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1262                                 (uintptr_t)s->data_gen_ptr + i,
1263                                 *(uint32_t *)(s->data_gen_ptr + i));
1264                     }
1265                 }
1266             } else {
1267                 disas(logfile, s->code_gen_ptr, prologue_size);
1268             }
1269             fprintf(logfile, "\n");
1270             qemu_log_unlock(logfile);
1271         }
1272     }
1273 #endif
1274 
1275 #ifndef CONFIG_TCG_INTERPRETER
1276     /*
1277      * Assert that goto_ptr is implemented completely, setting an epilogue.
1278      * For tci, we use NULL as the signal to return from the interpreter,
1279      * so skip this check.
1280      */
1281     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1282 #endif
1283 
1284     tcg_region_prologue_set(s);
1285 }
1286 
1287 void tcg_func_start(TCGContext *s)
1288 {
1289     tcg_pool_reset(s);
1290     s->nb_temps = s->nb_globals;
1291 
1292     /* No temps have been previously allocated for size or locality.  */
1293     memset(s->free_temps, 0, sizeof(s->free_temps));
1294 
1295     /* No constant temps have been previously allocated. */
1296     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1297         if (s->const_table[i]) {
1298             g_hash_table_remove_all(s->const_table[i]);
1299         }
1300     }
1301 
1302     s->nb_ops = 0;
1303     s->nb_labels = 0;
1304     s->current_frame_offset = s->frame_start;
1305 
1306 #ifdef CONFIG_DEBUG_TCG
1307     s->goto_tb_issue_mask = 0;
1308 #endif
1309 
1310     QTAILQ_INIT(&s->ops);
1311     QTAILQ_INIT(&s->free_ops);
1312     QSIMPLEQ_INIT(&s->labels);
1313 }
1314 
1315 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1316 {
1317     int n = s->nb_temps++;
1318 
1319     if (n >= TCG_MAX_TEMPS) {
1320         tcg_raise_tb_overflow(s);
1321     }
1322     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1323 }
1324 
1325 static TCGTemp *tcg_global_alloc(TCGContext *s)
1326 {
1327     TCGTemp *ts;
1328 
1329     tcg_debug_assert(s->nb_globals == s->nb_temps);
1330     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1331     s->nb_globals++;
1332     ts = tcg_temp_alloc(s);
1333     ts->kind = TEMP_GLOBAL;
1334 
1335     return ts;
1336 }
1337 
1338 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1339                                             TCGReg reg, const char *name)
1340 {
1341     TCGTemp *ts;
1342 
1343     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1344 
1345     ts = tcg_global_alloc(s);
1346     ts->base_type = type;
1347     ts->type = type;
1348     ts->kind = TEMP_FIXED;
1349     ts->reg = reg;
1350     ts->name = name;
1351     tcg_regset_set_reg(s->reserved_regs, reg);
1352 
1353     return ts;
1354 }
1355 
1356 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1357 {
1358     s->frame_start = start;
1359     s->frame_end = start + size;
1360     s->frame_temp
1361         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1362 }
1363 
1364 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1365                                      intptr_t offset, const char *name)
1366 {
1367     TCGContext *s = tcg_ctx;
1368     TCGTemp *base_ts = tcgv_ptr_temp(base);
1369     TCGTemp *ts = tcg_global_alloc(s);
1370     int indirect_reg = 0;
1371 
1372     switch (base_ts->kind) {
1373     case TEMP_FIXED:
1374         break;
1375     case TEMP_GLOBAL:
1376         /* We do not support double-indirect registers.  */
1377         tcg_debug_assert(!base_ts->indirect_reg);
1378         base_ts->indirect_base = 1;
1379         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1380                             ? 2 : 1);
1381         indirect_reg = 1;
1382         break;
1383     default:
1384         g_assert_not_reached();
1385     }
1386 
1387     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1388         TCGTemp *ts2 = tcg_global_alloc(s);
1389         char buf[64];
1390 
1391         ts->base_type = TCG_TYPE_I64;
1392         ts->type = TCG_TYPE_I32;
1393         ts->indirect_reg = indirect_reg;
1394         ts->mem_allocated = 1;
1395         ts->mem_base = base_ts;
1396         ts->mem_offset = offset;
1397         pstrcpy(buf, sizeof(buf), name);
1398         pstrcat(buf, sizeof(buf), "_0");
1399         ts->name = strdup(buf);
1400 
1401         tcg_debug_assert(ts2 == ts + 1);
1402         ts2->base_type = TCG_TYPE_I64;
1403         ts2->type = TCG_TYPE_I32;
1404         ts2->indirect_reg = indirect_reg;
1405         ts2->mem_allocated = 1;
1406         ts2->mem_base = base_ts;
1407         ts2->mem_offset = offset + 4;
1408         ts2->temp_subindex = 1;
1409         pstrcpy(buf, sizeof(buf), name);
1410         pstrcat(buf, sizeof(buf), "_1");
1411         ts2->name = strdup(buf);
1412     } else {
1413         ts->base_type = type;
1414         ts->type = type;
1415         ts->indirect_reg = indirect_reg;
1416         ts->mem_allocated = 1;
1417         ts->mem_base = base_ts;
1418         ts->mem_offset = offset;
1419         ts->name = name;
1420     }
1421     return ts;
1422 }
1423 
1424 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1425 {
1426     TCGContext *s = tcg_ctx;
1427     TCGTemp *ts;
1428     int n;
1429 
1430     if (kind == TEMP_EBB) {
1431         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1432 
1433         if (idx < TCG_MAX_TEMPS) {
1434             /* There is already an available temp with the right type.  */
1435             clear_bit(idx, s->free_temps[type].l);
1436 
1437             ts = &s->temps[idx];
1438             ts->temp_allocated = 1;
1439             tcg_debug_assert(ts->base_type == type);
1440             tcg_debug_assert(ts->kind == kind);
1441             return ts;
1442         }
1443     } else {
1444         tcg_debug_assert(kind == TEMP_TB);
1445     }
1446 
1447     switch (type) {
1448     case TCG_TYPE_I32:
1449     case TCG_TYPE_V64:
1450     case TCG_TYPE_V128:
1451     case TCG_TYPE_V256:
1452         n = 1;
1453         break;
1454     case TCG_TYPE_I64:
1455         n = 64 / TCG_TARGET_REG_BITS;
1456         break;
1457     case TCG_TYPE_I128:
1458         n = 128 / TCG_TARGET_REG_BITS;
1459         break;
1460     default:
1461         g_assert_not_reached();
1462     }
1463 
1464     ts = tcg_temp_alloc(s);
1465     ts->base_type = type;
1466     ts->temp_allocated = 1;
1467     ts->kind = kind;
1468 
1469     if (n == 1) {
1470         ts->type = type;
1471     } else {
1472         ts->type = TCG_TYPE_REG;
1473 
1474         for (int i = 1; i < n; ++i) {
1475             TCGTemp *ts2 = tcg_temp_alloc(s);
1476 
1477             tcg_debug_assert(ts2 == ts + i);
1478             ts2->base_type = type;
1479             ts2->type = TCG_TYPE_REG;
1480             ts2->temp_allocated = 1;
1481             ts2->temp_subindex = i;
1482             ts2->kind = kind;
1483         }
1484     }
1485     return ts;
1486 }
1487 
1488 TCGv_vec tcg_temp_new_vec(TCGType type)
1489 {
1490     TCGTemp *t;
1491 
1492 #ifdef CONFIG_DEBUG_TCG
1493     switch (type) {
1494     case TCG_TYPE_V64:
1495         assert(TCG_TARGET_HAS_v64);
1496         break;
1497     case TCG_TYPE_V128:
1498         assert(TCG_TARGET_HAS_v128);
1499         break;
1500     case TCG_TYPE_V256:
1501         assert(TCG_TARGET_HAS_v256);
1502         break;
1503     default:
1504         g_assert_not_reached();
1505     }
1506 #endif
1507 
1508     t = tcg_temp_new_internal(type, TEMP_EBB);
1509     return temp_tcgv_vec(t);
1510 }
1511 
1512 /* Create a new temp of the same type as an existing temp.  */
1513 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1514 {
1515     TCGTemp *t = tcgv_vec_temp(match);
1516 
1517     tcg_debug_assert(t->temp_allocated != 0);
1518 
1519     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1520     return temp_tcgv_vec(t);
1521 }
1522 
1523 void tcg_temp_free_internal(TCGTemp *ts)
1524 {
1525     TCGContext *s = tcg_ctx;
1526 
1527     switch (ts->kind) {
1528     case TEMP_CONST:
1529     case TEMP_TB:
1530         /* Silently ignore free. */
1531         break;
1532     case TEMP_EBB:
1533         tcg_debug_assert(ts->temp_allocated != 0);
1534         ts->temp_allocated = 0;
1535         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1536         break;
1537     default:
1538         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1539         g_assert_not_reached();
1540     }
1541 }
1542 
1543 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1544 {
1545     TCGContext *s = tcg_ctx;
1546     GHashTable *h = s->const_table[type];
1547     TCGTemp *ts;
1548 
1549     if (h == NULL) {
1550         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1551         s->const_table[type] = h;
1552     }
1553 
1554     ts = g_hash_table_lookup(h, &val);
1555     if (ts == NULL) {
1556         int64_t *val_ptr;
1557 
1558         ts = tcg_temp_alloc(s);
1559 
1560         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1561             TCGTemp *ts2 = tcg_temp_alloc(s);
1562 
1563             tcg_debug_assert(ts2 == ts + 1);
1564 
1565             ts->base_type = TCG_TYPE_I64;
1566             ts->type = TCG_TYPE_I32;
1567             ts->kind = TEMP_CONST;
1568             ts->temp_allocated = 1;
1569 
1570             ts2->base_type = TCG_TYPE_I64;
1571             ts2->type = TCG_TYPE_I32;
1572             ts2->kind = TEMP_CONST;
1573             ts2->temp_allocated = 1;
1574             ts2->temp_subindex = 1;
1575 
1576             /*
1577              * Retain the full value of the 64-bit constant in the low
1578              * part, so that the hash table works.  Actual uses will
1579              * truncate the value to the low part.
1580              */
1581             ts[HOST_BIG_ENDIAN].val = val;
1582             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1583             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1584         } else {
1585             ts->base_type = type;
1586             ts->type = type;
1587             ts->kind = TEMP_CONST;
1588             ts->temp_allocated = 1;
1589             ts->val = val;
1590             val_ptr = &ts->val;
1591         }
1592         g_hash_table_insert(h, val_ptr, ts);
1593     }
1594 
1595     return ts;
1596 }
1597 
1598 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1599 {
1600     val = dup_const(vece, val);
1601     return temp_tcgv_vec(tcg_constant_internal(type, val));
1602 }
1603 
1604 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1605 {
1606     TCGTemp *t = tcgv_vec_temp(match);
1607 
1608     tcg_debug_assert(t->temp_allocated != 0);
1609     return tcg_constant_vec(t->base_type, vece, val);
1610 }
1611 
1612 /* Return true if OP may appear in the opcode stream.
1613    Test the runtime variable that controls each opcode.  */
1614 bool tcg_op_supported(TCGOpcode op)
1615 {
1616     const bool have_vec
1617         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1618 
1619     switch (op) {
1620     case INDEX_op_discard:
1621     case INDEX_op_set_label:
1622     case INDEX_op_call:
1623     case INDEX_op_br:
1624     case INDEX_op_mb:
1625     case INDEX_op_insn_start:
1626     case INDEX_op_exit_tb:
1627     case INDEX_op_goto_tb:
1628     case INDEX_op_goto_ptr:
1629     case INDEX_op_qemu_ld_i32:
1630     case INDEX_op_qemu_st_i32:
1631     case INDEX_op_qemu_ld_i64:
1632     case INDEX_op_qemu_st_i64:
1633         return true;
1634 
1635     case INDEX_op_qemu_st8_i32:
1636         return TCG_TARGET_HAS_qemu_st8_i32;
1637 
1638     case INDEX_op_mov_i32:
1639     case INDEX_op_setcond_i32:
1640     case INDEX_op_brcond_i32:
1641     case INDEX_op_ld8u_i32:
1642     case INDEX_op_ld8s_i32:
1643     case INDEX_op_ld16u_i32:
1644     case INDEX_op_ld16s_i32:
1645     case INDEX_op_ld_i32:
1646     case INDEX_op_st8_i32:
1647     case INDEX_op_st16_i32:
1648     case INDEX_op_st_i32:
1649     case INDEX_op_add_i32:
1650     case INDEX_op_sub_i32:
1651     case INDEX_op_mul_i32:
1652     case INDEX_op_and_i32:
1653     case INDEX_op_or_i32:
1654     case INDEX_op_xor_i32:
1655     case INDEX_op_shl_i32:
1656     case INDEX_op_shr_i32:
1657     case INDEX_op_sar_i32:
1658         return true;
1659 
1660     case INDEX_op_movcond_i32:
1661         return TCG_TARGET_HAS_movcond_i32;
1662     case INDEX_op_div_i32:
1663     case INDEX_op_divu_i32:
1664         return TCG_TARGET_HAS_div_i32;
1665     case INDEX_op_rem_i32:
1666     case INDEX_op_remu_i32:
1667         return TCG_TARGET_HAS_rem_i32;
1668     case INDEX_op_div2_i32:
1669     case INDEX_op_divu2_i32:
1670         return TCG_TARGET_HAS_div2_i32;
1671     case INDEX_op_rotl_i32:
1672     case INDEX_op_rotr_i32:
1673         return TCG_TARGET_HAS_rot_i32;
1674     case INDEX_op_deposit_i32:
1675         return TCG_TARGET_HAS_deposit_i32;
1676     case INDEX_op_extract_i32:
1677         return TCG_TARGET_HAS_extract_i32;
1678     case INDEX_op_sextract_i32:
1679         return TCG_TARGET_HAS_sextract_i32;
1680     case INDEX_op_extract2_i32:
1681         return TCG_TARGET_HAS_extract2_i32;
1682     case INDEX_op_add2_i32:
1683         return TCG_TARGET_HAS_add2_i32;
1684     case INDEX_op_sub2_i32:
1685         return TCG_TARGET_HAS_sub2_i32;
1686     case INDEX_op_mulu2_i32:
1687         return TCG_TARGET_HAS_mulu2_i32;
1688     case INDEX_op_muls2_i32:
1689         return TCG_TARGET_HAS_muls2_i32;
1690     case INDEX_op_muluh_i32:
1691         return TCG_TARGET_HAS_muluh_i32;
1692     case INDEX_op_mulsh_i32:
1693         return TCG_TARGET_HAS_mulsh_i32;
1694     case INDEX_op_ext8s_i32:
1695         return TCG_TARGET_HAS_ext8s_i32;
1696     case INDEX_op_ext16s_i32:
1697         return TCG_TARGET_HAS_ext16s_i32;
1698     case INDEX_op_ext8u_i32:
1699         return TCG_TARGET_HAS_ext8u_i32;
1700     case INDEX_op_ext16u_i32:
1701         return TCG_TARGET_HAS_ext16u_i32;
1702     case INDEX_op_bswap16_i32:
1703         return TCG_TARGET_HAS_bswap16_i32;
1704     case INDEX_op_bswap32_i32:
1705         return TCG_TARGET_HAS_bswap32_i32;
1706     case INDEX_op_not_i32:
1707         return TCG_TARGET_HAS_not_i32;
1708     case INDEX_op_neg_i32:
1709         return TCG_TARGET_HAS_neg_i32;
1710     case INDEX_op_andc_i32:
1711         return TCG_TARGET_HAS_andc_i32;
1712     case INDEX_op_orc_i32:
1713         return TCG_TARGET_HAS_orc_i32;
1714     case INDEX_op_eqv_i32:
1715         return TCG_TARGET_HAS_eqv_i32;
1716     case INDEX_op_nand_i32:
1717         return TCG_TARGET_HAS_nand_i32;
1718     case INDEX_op_nor_i32:
1719         return TCG_TARGET_HAS_nor_i32;
1720     case INDEX_op_clz_i32:
1721         return TCG_TARGET_HAS_clz_i32;
1722     case INDEX_op_ctz_i32:
1723         return TCG_TARGET_HAS_ctz_i32;
1724     case INDEX_op_ctpop_i32:
1725         return TCG_TARGET_HAS_ctpop_i32;
1726 
1727     case INDEX_op_brcond2_i32:
1728     case INDEX_op_setcond2_i32:
1729         return TCG_TARGET_REG_BITS == 32;
1730 
1731     case INDEX_op_mov_i64:
1732     case INDEX_op_setcond_i64:
1733     case INDEX_op_brcond_i64:
1734     case INDEX_op_ld8u_i64:
1735     case INDEX_op_ld8s_i64:
1736     case INDEX_op_ld16u_i64:
1737     case INDEX_op_ld16s_i64:
1738     case INDEX_op_ld32u_i64:
1739     case INDEX_op_ld32s_i64:
1740     case INDEX_op_ld_i64:
1741     case INDEX_op_st8_i64:
1742     case INDEX_op_st16_i64:
1743     case INDEX_op_st32_i64:
1744     case INDEX_op_st_i64:
1745     case INDEX_op_add_i64:
1746     case INDEX_op_sub_i64:
1747     case INDEX_op_mul_i64:
1748     case INDEX_op_and_i64:
1749     case INDEX_op_or_i64:
1750     case INDEX_op_xor_i64:
1751     case INDEX_op_shl_i64:
1752     case INDEX_op_shr_i64:
1753     case INDEX_op_sar_i64:
1754     case INDEX_op_ext_i32_i64:
1755     case INDEX_op_extu_i32_i64:
1756         return TCG_TARGET_REG_BITS == 64;
1757 
1758     case INDEX_op_movcond_i64:
1759         return TCG_TARGET_HAS_movcond_i64;
1760     case INDEX_op_div_i64:
1761     case INDEX_op_divu_i64:
1762         return TCG_TARGET_HAS_div_i64;
1763     case INDEX_op_rem_i64:
1764     case INDEX_op_remu_i64:
1765         return TCG_TARGET_HAS_rem_i64;
1766     case INDEX_op_div2_i64:
1767     case INDEX_op_divu2_i64:
1768         return TCG_TARGET_HAS_div2_i64;
1769     case INDEX_op_rotl_i64:
1770     case INDEX_op_rotr_i64:
1771         return TCG_TARGET_HAS_rot_i64;
1772     case INDEX_op_deposit_i64:
1773         return TCG_TARGET_HAS_deposit_i64;
1774     case INDEX_op_extract_i64:
1775         return TCG_TARGET_HAS_extract_i64;
1776     case INDEX_op_sextract_i64:
1777         return TCG_TARGET_HAS_sextract_i64;
1778     case INDEX_op_extract2_i64:
1779         return TCG_TARGET_HAS_extract2_i64;
1780     case INDEX_op_extrl_i64_i32:
1781         return TCG_TARGET_HAS_extrl_i64_i32;
1782     case INDEX_op_extrh_i64_i32:
1783         return TCG_TARGET_HAS_extrh_i64_i32;
1784     case INDEX_op_ext8s_i64:
1785         return TCG_TARGET_HAS_ext8s_i64;
1786     case INDEX_op_ext16s_i64:
1787         return TCG_TARGET_HAS_ext16s_i64;
1788     case INDEX_op_ext32s_i64:
1789         return TCG_TARGET_HAS_ext32s_i64;
1790     case INDEX_op_ext8u_i64:
1791         return TCG_TARGET_HAS_ext8u_i64;
1792     case INDEX_op_ext16u_i64:
1793         return TCG_TARGET_HAS_ext16u_i64;
1794     case INDEX_op_ext32u_i64:
1795         return TCG_TARGET_HAS_ext32u_i64;
1796     case INDEX_op_bswap16_i64:
1797         return TCG_TARGET_HAS_bswap16_i64;
1798     case INDEX_op_bswap32_i64:
1799         return TCG_TARGET_HAS_bswap32_i64;
1800     case INDEX_op_bswap64_i64:
1801         return TCG_TARGET_HAS_bswap64_i64;
1802     case INDEX_op_not_i64:
1803         return TCG_TARGET_HAS_not_i64;
1804     case INDEX_op_neg_i64:
1805         return TCG_TARGET_HAS_neg_i64;
1806     case INDEX_op_andc_i64:
1807         return TCG_TARGET_HAS_andc_i64;
1808     case INDEX_op_orc_i64:
1809         return TCG_TARGET_HAS_orc_i64;
1810     case INDEX_op_eqv_i64:
1811         return TCG_TARGET_HAS_eqv_i64;
1812     case INDEX_op_nand_i64:
1813         return TCG_TARGET_HAS_nand_i64;
1814     case INDEX_op_nor_i64:
1815         return TCG_TARGET_HAS_nor_i64;
1816     case INDEX_op_clz_i64:
1817         return TCG_TARGET_HAS_clz_i64;
1818     case INDEX_op_ctz_i64:
1819         return TCG_TARGET_HAS_ctz_i64;
1820     case INDEX_op_ctpop_i64:
1821         return TCG_TARGET_HAS_ctpop_i64;
1822     case INDEX_op_add2_i64:
1823         return TCG_TARGET_HAS_add2_i64;
1824     case INDEX_op_sub2_i64:
1825         return TCG_TARGET_HAS_sub2_i64;
1826     case INDEX_op_mulu2_i64:
1827         return TCG_TARGET_HAS_mulu2_i64;
1828     case INDEX_op_muls2_i64:
1829         return TCG_TARGET_HAS_muls2_i64;
1830     case INDEX_op_muluh_i64:
1831         return TCG_TARGET_HAS_muluh_i64;
1832     case INDEX_op_mulsh_i64:
1833         return TCG_TARGET_HAS_mulsh_i64;
1834 
1835     case INDEX_op_mov_vec:
1836     case INDEX_op_dup_vec:
1837     case INDEX_op_dupm_vec:
1838     case INDEX_op_ld_vec:
1839     case INDEX_op_st_vec:
1840     case INDEX_op_add_vec:
1841     case INDEX_op_sub_vec:
1842     case INDEX_op_and_vec:
1843     case INDEX_op_or_vec:
1844     case INDEX_op_xor_vec:
1845     case INDEX_op_cmp_vec:
1846         return have_vec;
1847     case INDEX_op_dup2_vec:
1848         return have_vec && TCG_TARGET_REG_BITS == 32;
1849     case INDEX_op_not_vec:
1850         return have_vec && TCG_TARGET_HAS_not_vec;
1851     case INDEX_op_neg_vec:
1852         return have_vec && TCG_TARGET_HAS_neg_vec;
1853     case INDEX_op_abs_vec:
1854         return have_vec && TCG_TARGET_HAS_abs_vec;
1855     case INDEX_op_andc_vec:
1856         return have_vec && TCG_TARGET_HAS_andc_vec;
1857     case INDEX_op_orc_vec:
1858         return have_vec && TCG_TARGET_HAS_orc_vec;
1859     case INDEX_op_nand_vec:
1860         return have_vec && TCG_TARGET_HAS_nand_vec;
1861     case INDEX_op_nor_vec:
1862         return have_vec && TCG_TARGET_HAS_nor_vec;
1863     case INDEX_op_eqv_vec:
1864         return have_vec && TCG_TARGET_HAS_eqv_vec;
1865     case INDEX_op_mul_vec:
1866         return have_vec && TCG_TARGET_HAS_mul_vec;
1867     case INDEX_op_shli_vec:
1868     case INDEX_op_shri_vec:
1869     case INDEX_op_sari_vec:
1870         return have_vec && TCG_TARGET_HAS_shi_vec;
1871     case INDEX_op_shls_vec:
1872     case INDEX_op_shrs_vec:
1873     case INDEX_op_sars_vec:
1874         return have_vec && TCG_TARGET_HAS_shs_vec;
1875     case INDEX_op_shlv_vec:
1876     case INDEX_op_shrv_vec:
1877     case INDEX_op_sarv_vec:
1878         return have_vec && TCG_TARGET_HAS_shv_vec;
1879     case INDEX_op_rotli_vec:
1880         return have_vec && TCG_TARGET_HAS_roti_vec;
1881     case INDEX_op_rotls_vec:
1882         return have_vec && TCG_TARGET_HAS_rots_vec;
1883     case INDEX_op_rotlv_vec:
1884     case INDEX_op_rotrv_vec:
1885         return have_vec && TCG_TARGET_HAS_rotv_vec;
1886     case INDEX_op_ssadd_vec:
1887     case INDEX_op_usadd_vec:
1888     case INDEX_op_sssub_vec:
1889     case INDEX_op_ussub_vec:
1890         return have_vec && TCG_TARGET_HAS_sat_vec;
1891     case INDEX_op_smin_vec:
1892     case INDEX_op_umin_vec:
1893     case INDEX_op_smax_vec:
1894     case INDEX_op_umax_vec:
1895         return have_vec && TCG_TARGET_HAS_minmax_vec;
1896     case INDEX_op_bitsel_vec:
1897         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1898     case INDEX_op_cmpsel_vec:
1899         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1900 
1901     default:
1902         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1903         return true;
1904     }
1905 }
1906 
1907 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
1908 
1909 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1910 {
1911     const TCGHelperInfo *info;
1912     TCGv_i64 extend_free[MAX_CALL_IARGS];
1913     int n_extend = 0;
1914     TCGOp *op;
1915     int i, n, pi = 0, total_args;
1916 
1917     info = g_hash_table_lookup(helper_table, (gpointer)func);
1918     total_args = info->nr_out + info->nr_in + 2;
1919     op = tcg_op_alloc(INDEX_op_call, total_args);
1920 
1921 #ifdef CONFIG_PLUGIN
1922     /* Flag helpers that may affect guest state */
1923     if (tcg_ctx->plugin_insn &&
1924         !(info->flags & TCG_CALL_PLUGIN) &&
1925         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1926         tcg_ctx->plugin_insn->calls_helpers = true;
1927     }
1928 #endif
1929 
1930     TCGOP_CALLO(op) = n = info->nr_out;
1931     switch (n) {
1932     case 0:
1933         tcg_debug_assert(ret == NULL);
1934         break;
1935     case 1:
1936         tcg_debug_assert(ret != NULL);
1937         op->args[pi++] = temp_arg(ret);
1938         break;
1939     case 2:
1940     case 4:
1941         tcg_debug_assert(ret != NULL);
1942         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
1943         tcg_debug_assert(ret->temp_subindex == 0);
1944         for (i = 0; i < n; ++i) {
1945             op->args[pi++] = temp_arg(ret + i);
1946         }
1947         break;
1948     default:
1949         g_assert_not_reached();
1950     }
1951 
1952     TCGOP_CALLI(op) = n = info->nr_in;
1953     for (i = 0; i < n; i++) {
1954         const TCGCallArgumentLoc *loc = &info->in[i];
1955         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
1956 
1957         switch (loc->kind) {
1958         case TCG_CALL_ARG_NORMAL:
1959         case TCG_CALL_ARG_BY_REF:
1960         case TCG_CALL_ARG_BY_REF_N:
1961             op->args[pi++] = temp_arg(ts);
1962             break;
1963 
1964         case TCG_CALL_ARG_EXTEND_U:
1965         case TCG_CALL_ARG_EXTEND_S:
1966             {
1967                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
1968                 TCGv_i32 orig = temp_tcgv_i32(ts);
1969 
1970                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
1971                     tcg_gen_ext_i32_i64(temp, orig);
1972                 } else {
1973                     tcg_gen_extu_i32_i64(temp, orig);
1974                 }
1975                 op->args[pi++] = tcgv_i64_arg(temp);
1976                 extend_free[n_extend++] = temp;
1977             }
1978             break;
1979 
1980         default:
1981             g_assert_not_reached();
1982         }
1983     }
1984     op->args[pi++] = (uintptr_t)func;
1985     op->args[pi++] = (uintptr_t)info;
1986     tcg_debug_assert(pi == total_args);
1987 
1988     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
1989 
1990     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
1991     for (i = 0; i < n_extend; ++i) {
1992         tcg_temp_free_i64(extend_free[i]);
1993     }
1994 }
1995 
1996 static void tcg_reg_alloc_start(TCGContext *s)
1997 {
1998     int i, n;
1999 
2000     for (i = 0, n = s->nb_temps; i < n; i++) {
2001         TCGTemp *ts = &s->temps[i];
2002         TCGTempVal val = TEMP_VAL_MEM;
2003 
2004         switch (ts->kind) {
2005         case TEMP_CONST:
2006             val = TEMP_VAL_CONST;
2007             break;
2008         case TEMP_FIXED:
2009             val = TEMP_VAL_REG;
2010             break;
2011         case TEMP_GLOBAL:
2012             break;
2013         case TEMP_EBB:
2014             val = TEMP_VAL_DEAD;
2015             /* fall through */
2016         case TEMP_TB:
2017             ts->mem_allocated = 0;
2018             break;
2019         default:
2020             g_assert_not_reached();
2021         }
2022         ts->val_type = val;
2023     }
2024 
2025     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2026 }
2027 
2028 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2029                                  TCGTemp *ts)
2030 {
2031     int idx = temp_idx(ts);
2032 
2033     switch (ts->kind) {
2034     case TEMP_FIXED:
2035     case TEMP_GLOBAL:
2036         pstrcpy(buf, buf_size, ts->name);
2037         break;
2038     case TEMP_TB:
2039         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2040         break;
2041     case TEMP_EBB:
2042         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2043         break;
2044     case TEMP_CONST:
2045         switch (ts->type) {
2046         case TCG_TYPE_I32:
2047             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2048             break;
2049 #if TCG_TARGET_REG_BITS > 32
2050         case TCG_TYPE_I64:
2051             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2052             break;
2053 #endif
2054         case TCG_TYPE_V64:
2055         case TCG_TYPE_V128:
2056         case TCG_TYPE_V256:
2057             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2058                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2059             break;
2060         default:
2061             g_assert_not_reached();
2062         }
2063         break;
2064     }
2065     return buf;
2066 }
2067 
2068 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2069                              int buf_size, TCGArg arg)
2070 {
2071     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2072 }
2073 
2074 static const char * const cond_name[] =
2075 {
2076     [TCG_COND_NEVER] = "never",
2077     [TCG_COND_ALWAYS] = "always",
2078     [TCG_COND_EQ] = "eq",
2079     [TCG_COND_NE] = "ne",
2080     [TCG_COND_LT] = "lt",
2081     [TCG_COND_GE] = "ge",
2082     [TCG_COND_LE] = "le",
2083     [TCG_COND_GT] = "gt",
2084     [TCG_COND_LTU] = "ltu",
2085     [TCG_COND_GEU] = "geu",
2086     [TCG_COND_LEU] = "leu",
2087     [TCG_COND_GTU] = "gtu"
2088 };
2089 
2090 static const char * const ldst_name[] =
2091 {
2092     [MO_UB]   = "ub",
2093     [MO_SB]   = "sb",
2094     [MO_LEUW] = "leuw",
2095     [MO_LESW] = "lesw",
2096     [MO_LEUL] = "leul",
2097     [MO_LESL] = "lesl",
2098     [MO_LEUQ] = "leq",
2099     [MO_BEUW] = "beuw",
2100     [MO_BESW] = "besw",
2101     [MO_BEUL] = "beul",
2102     [MO_BESL] = "besl",
2103     [MO_BEUQ] = "beq",
2104 };
2105 
2106 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2107 #ifdef TARGET_ALIGNED_ONLY
2108     [MO_UNALN >> MO_ASHIFT]    = "un+",
2109     [MO_ALIGN >> MO_ASHIFT]    = "",
2110 #else
2111     [MO_UNALN >> MO_ASHIFT]    = "",
2112     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2113 #endif
2114     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2115     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2116     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2117     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2118     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2119     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2120 };
2121 
2122 static const char bswap_flag_name[][6] = {
2123     [TCG_BSWAP_IZ] = "iz",
2124     [TCG_BSWAP_OZ] = "oz",
2125     [TCG_BSWAP_OS] = "os",
2126     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2127     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2128 };
2129 
2130 static inline bool tcg_regset_single(TCGRegSet d)
2131 {
2132     return (d & (d - 1)) == 0;
2133 }
2134 
2135 static inline TCGReg tcg_regset_first(TCGRegSet d)
2136 {
2137     if (TCG_TARGET_NB_REGS <= 32) {
2138         return ctz32(d);
2139     } else {
2140         return ctz64(d);
2141     }
2142 }
2143 
2144 /* Return only the number of characters output -- no error return. */
2145 #define ne_fprintf(...) \
2146     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2147 
2148 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2149 {
2150     char buf[128];
2151     TCGOp *op;
2152 
2153     QTAILQ_FOREACH(op, &s->ops, link) {
2154         int i, k, nb_oargs, nb_iargs, nb_cargs;
2155         const TCGOpDef *def;
2156         TCGOpcode c;
2157         int col = 0;
2158 
2159         c = op->opc;
2160         def = &tcg_op_defs[c];
2161 
2162         if (c == INDEX_op_insn_start) {
2163             nb_oargs = 0;
2164             col += ne_fprintf(f, "\n ----");
2165 
2166             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2167                 target_ulong a;
2168 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2169                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2170 #else
2171                 a = op->args[i];
2172 #endif
2173                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
2174             }
2175         } else if (c == INDEX_op_call) {
2176             const TCGHelperInfo *info = tcg_call_info(op);
2177             void *func = tcg_call_func(op);
2178 
2179             /* variable number of arguments */
2180             nb_oargs = TCGOP_CALLO(op);
2181             nb_iargs = TCGOP_CALLI(op);
2182             nb_cargs = def->nb_cargs;
2183 
2184             col += ne_fprintf(f, " %s ", def->name);
2185 
2186             /*
2187              * Print the function name from TCGHelperInfo, if available.
2188              * Note that plugins have a template function for the info,
2189              * but the actual function pointer comes from the plugin.
2190              */
2191             if (func == info->func) {
2192                 col += ne_fprintf(f, "%s", info->name);
2193             } else {
2194                 col += ne_fprintf(f, "plugin(%p)", func);
2195             }
2196 
2197             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2198             for (i = 0; i < nb_oargs; i++) {
2199                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2200                                                             op->args[i]));
2201             }
2202             for (i = 0; i < nb_iargs; i++) {
2203                 TCGArg arg = op->args[nb_oargs + i];
2204                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2205                 col += ne_fprintf(f, ",%s", t);
2206             }
2207         } else {
2208             col += ne_fprintf(f, " %s ", def->name);
2209 
2210             nb_oargs = def->nb_oargs;
2211             nb_iargs = def->nb_iargs;
2212             nb_cargs = def->nb_cargs;
2213 
2214             if (def->flags & TCG_OPF_VECTOR) {
2215                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2216                                   8 << TCGOP_VECE(op));
2217             }
2218 
2219             k = 0;
2220             for (i = 0; i < nb_oargs; i++) {
2221                 const char *sep =  k ? "," : "";
2222                 col += ne_fprintf(f, "%s%s", sep,
2223                                   tcg_get_arg_str(s, buf, sizeof(buf),
2224                                                   op->args[k++]));
2225             }
2226             for (i = 0; i < nb_iargs; i++) {
2227                 const char *sep =  k ? "," : "";
2228                 col += ne_fprintf(f, "%s%s", sep,
2229                                   tcg_get_arg_str(s, buf, sizeof(buf),
2230                                                   op->args[k++]));
2231             }
2232             switch (c) {
2233             case INDEX_op_brcond_i32:
2234             case INDEX_op_setcond_i32:
2235             case INDEX_op_movcond_i32:
2236             case INDEX_op_brcond2_i32:
2237             case INDEX_op_setcond2_i32:
2238             case INDEX_op_brcond_i64:
2239             case INDEX_op_setcond_i64:
2240             case INDEX_op_movcond_i64:
2241             case INDEX_op_cmp_vec:
2242             case INDEX_op_cmpsel_vec:
2243                 if (op->args[k] < ARRAY_SIZE(cond_name)
2244                     && cond_name[op->args[k]]) {
2245                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2246                 } else {
2247                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2248                 }
2249                 i = 1;
2250                 break;
2251             case INDEX_op_qemu_ld_i32:
2252             case INDEX_op_qemu_st_i32:
2253             case INDEX_op_qemu_st8_i32:
2254             case INDEX_op_qemu_ld_i64:
2255             case INDEX_op_qemu_st_i64:
2256                 {
2257                     MemOpIdx oi = op->args[k++];
2258                     MemOp op = get_memop(oi);
2259                     unsigned ix = get_mmuidx(oi);
2260 
2261                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2262                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2263                     } else {
2264                         const char *s_al, *s_op;
2265                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2266                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2267                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
2268                     }
2269                     i = 1;
2270                 }
2271                 break;
2272             case INDEX_op_bswap16_i32:
2273             case INDEX_op_bswap16_i64:
2274             case INDEX_op_bswap32_i32:
2275             case INDEX_op_bswap32_i64:
2276             case INDEX_op_bswap64_i64:
2277                 {
2278                     TCGArg flags = op->args[k];
2279                     const char *name = NULL;
2280 
2281                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2282                         name = bswap_flag_name[flags];
2283                     }
2284                     if (name) {
2285                         col += ne_fprintf(f, ",%s", name);
2286                     } else {
2287                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2288                     }
2289                     i = k = 1;
2290                 }
2291                 break;
2292             default:
2293                 i = 0;
2294                 break;
2295             }
2296             switch (c) {
2297             case INDEX_op_set_label:
2298             case INDEX_op_br:
2299             case INDEX_op_brcond_i32:
2300             case INDEX_op_brcond_i64:
2301             case INDEX_op_brcond2_i32:
2302                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2303                                   arg_label(op->args[k])->id);
2304                 i++, k++;
2305                 break;
2306             case INDEX_op_mb:
2307                 {
2308                     TCGBar membar = op->args[k];
2309                     const char *b_op, *m_op;
2310 
2311                     switch (membar & TCG_BAR_SC) {
2312                     case 0:
2313                         b_op = "none";
2314                         break;
2315                     case TCG_BAR_LDAQ:
2316                         b_op = "acq";
2317                         break;
2318                     case TCG_BAR_STRL:
2319                         b_op = "rel";
2320                         break;
2321                     case TCG_BAR_SC:
2322                         b_op = "seq";
2323                         break;
2324                     default:
2325                         g_assert_not_reached();
2326                     }
2327 
2328                     switch (membar & TCG_MO_ALL) {
2329                     case 0:
2330                         m_op = "none";
2331                         break;
2332                     case TCG_MO_LD_LD:
2333                         m_op = "rr";
2334                         break;
2335                     case TCG_MO_LD_ST:
2336                         m_op = "rw";
2337                         break;
2338                     case TCG_MO_ST_LD:
2339                         m_op = "wr";
2340                         break;
2341                     case TCG_MO_ST_ST:
2342                         m_op = "ww";
2343                         break;
2344                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2345                         m_op = "rr+rw";
2346                         break;
2347                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2348                         m_op = "rr+wr";
2349                         break;
2350                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2351                         m_op = "rr+ww";
2352                         break;
2353                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2354                         m_op = "rw+wr";
2355                         break;
2356                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2357                         m_op = "rw+ww";
2358                         break;
2359                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2360                         m_op = "wr+ww";
2361                         break;
2362                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2363                         m_op = "rr+rw+wr";
2364                         break;
2365                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2366                         m_op = "rr+rw+ww";
2367                         break;
2368                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2369                         m_op = "rr+wr+ww";
2370                         break;
2371                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2372                         m_op = "rw+wr+ww";
2373                         break;
2374                     case TCG_MO_ALL:
2375                         m_op = "all";
2376                         break;
2377                     default:
2378                         g_assert_not_reached();
2379                     }
2380 
2381                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2382                     i++, k++;
2383                 }
2384                 break;
2385             default:
2386                 break;
2387             }
2388             for (; i < nb_cargs; i++, k++) {
2389                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2390                                   op->args[k]);
2391             }
2392         }
2393 
2394         if (have_prefs || op->life) {
2395             for (; col < 40; ++col) {
2396                 putc(' ', f);
2397             }
2398         }
2399 
2400         if (op->life) {
2401             unsigned life = op->life;
2402 
2403             if (life & (SYNC_ARG * 3)) {
2404                 ne_fprintf(f, "  sync:");
2405                 for (i = 0; i < 2; ++i) {
2406                     if (life & (SYNC_ARG << i)) {
2407                         ne_fprintf(f, " %d", i);
2408                     }
2409                 }
2410             }
2411             life /= DEAD_ARG;
2412             if (life) {
2413                 ne_fprintf(f, "  dead:");
2414                 for (i = 0; life; ++i, life >>= 1) {
2415                     if (life & 1) {
2416                         ne_fprintf(f, " %d", i);
2417                     }
2418                 }
2419             }
2420         }
2421 
2422         if (have_prefs) {
2423             for (i = 0; i < nb_oargs; ++i) {
2424                 TCGRegSet set = output_pref(op, i);
2425 
2426                 if (i == 0) {
2427                     ne_fprintf(f, "  pref=");
2428                 } else {
2429                     ne_fprintf(f, ",");
2430                 }
2431                 if (set == 0) {
2432                     ne_fprintf(f, "none");
2433                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2434                     ne_fprintf(f, "all");
2435 #ifdef CONFIG_DEBUG_TCG
2436                 } else if (tcg_regset_single(set)) {
2437                     TCGReg reg = tcg_regset_first(set);
2438                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2439 #endif
2440                 } else if (TCG_TARGET_NB_REGS <= 32) {
2441                     ne_fprintf(f, "0x%x", (uint32_t)set);
2442                 } else {
2443                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2444                 }
2445             }
2446         }
2447 
2448         putc('\n', f);
2449     }
2450 }
2451 
2452 /* we give more priority to constraints with less registers */
2453 static int get_constraint_priority(const TCGOpDef *def, int k)
2454 {
2455     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2456     int n = ctpop64(arg_ct->regs);
2457 
2458     /*
2459      * Sort constraints of a single register first, which includes output
2460      * aliases (which must exactly match the input already allocated).
2461      */
2462     if (n == 1 || arg_ct->oalias) {
2463         return INT_MAX;
2464     }
2465 
2466     /*
2467      * Sort register pairs next, first then second immediately after.
2468      * Arbitrarily sort multiple pairs by the index of the first reg;
2469      * there shouldn't be many pairs.
2470      */
2471     switch (arg_ct->pair) {
2472     case 1:
2473     case 3:
2474         return (k + 1) * 2;
2475     case 2:
2476         return (arg_ct->pair_index + 1) * 2 - 1;
2477     }
2478 
2479     /* Finally, sort by decreasing register count. */
2480     assert(n > 1);
2481     return -n;
2482 }
2483 
2484 /* sort from highest priority to lowest */
2485 static void sort_constraints(TCGOpDef *def, int start, int n)
2486 {
2487     int i, j;
2488     TCGArgConstraint *a = def->args_ct;
2489 
2490     for (i = 0; i < n; i++) {
2491         a[start + i].sort_index = start + i;
2492     }
2493     if (n <= 1) {
2494         return;
2495     }
2496     for (i = 0; i < n - 1; i++) {
2497         for (j = i + 1; j < n; j++) {
2498             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2499             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2500             if (p1 < p2) {
2501                 int tmp = a[start + i].sort_index;
2502                 a[start + i].sort_index = a[start + j].sort_index;
2503                 a[start + j].sort_index = tmp;
2504             }
2505         }
2506     }
2507 }
2508 
2509 static void process_op_defs(TCGContext *s)
2510 {
2511     TCGOpcode op;
2512 
2513     for (op = 0; op < NB_OPS; op++) {
2514         TCGOpDef *def = &tcg_op_defs[op];
2515         const TCGTargetOpDef *tdefs;
2516         bool saw_alias_pair = false;
2517         int i, o, i2, o2, nb_args;
2518 
2519         if (def->flags & TCG_OPF_NOT_PRESENT) {
2520             continue;
2521         }
2522 
2523         nb_args = def->nb_iargs + def->nb_oargs;
2524         if (nb_args == 0) {
2525             continue;
2526         }
2527 
2528         /*
2529          * Macro magic should make it impossible, but double-check that
2530          * the array index is in range.  Since the signness of an enum
2531          * is implementation defined, force the result to unsigned.
2532          */
2533         unsigned con_set = tcg_target_op_def(op);
2534         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2535         tdefs = &constraint_sets[con_set];
2536 
2537         for (i = 0; i < nb_args; i++) {
2538             const char *ct_str = tdefs->args_ct_str[i];
2539             bool input_p = i >= def->nb_oargs;
2540 
2541             /* Incomplete TCGTargetOpDef entry. */
2542             tcg_debug_assert(ct_str != NULL);
2543 
2544             switch (*ct_str) {
2545             case '0' ... '9':
2546                 o = *ct_str - '0';
2547                 tcg_debug_assert(input_p);
2548                 tcg_debug_assert(o < def->nb_oargs);
2549                 tcg_debug_assert(def->args_ct[o].regs != 0);
2550                 tcg_debug_assert(!def->args_ct[o].oalias);
2551                 def->args_ct[i] = def->args_ct[o];
2552                 /* The output sets oalias.  */
2553                 def->args_ct[o].oalias = 1;
2554                 def->args_ct[o].alias_index = i;
2555                 /* The input sets ialias. */
2556                 def->args_ct[i].ialias = 1;
2557                 def->args_ct[i].alias_index = o;
2558                 if (def->args_ct[i].pair) {
2559                     saw_alias_pair = true;
2560                 }
2561                 tcg_debug_assert(ct_str[1] == '\0');
2562                 continue;
2563 
2564             case '&':
2565                 tcg_debug_assert(!input_p);
2566                 def->args_ct[i].newreg = true;
2567                 ct_str++;
2568                 break;
2569 
2570             case 'p': /* plus */
2571                 /* Allocate to the register after the previous. */
2572                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2573                 o = i - 1;
2574                 tcg_debug_assert(!def->args_ct[o].pair);
2575                 tcg_debug_assert(!def->args_ct[o].ct);
2576                 def->args_ct[i] = (TCGArgConstraint){
2577                     .pair = 2,
2578                     .pair_index = o,
2579                     .regs = def->args_ct[o].regs << 1,
2580                 };
2581                 def->args_ct[o].pair = 1;
2582                 def->args_ct[o].pair_index = i;
2583                 tcg_debug_assert(ct_str[1] == '\0');
2584                 continue;
2585 
2586             case 'm': /* minus */
2587                 /* Allocate to the register before the previous. */
2588                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2589                 o = i - 1;
2590                 tcg_debug_assert(!def->args_ct[o].pair);
2591                 tcg_debug_assert(!def->args_ct[o].ct);
2592                 def->args_ct[i] = (TCGArgConstraint){
2593                     .pair = 1,
2594                     .pair_index = o,
2595                     .regs = def->args_ct[o].regs >> 1,
2596                 };
2597                 def->args_ct[o].pair = 2;
2598                 def->args_ct[o].pair_index = i;
2599                 tcg_debug_assert(ct_str[1] == '\0');
2600                 continue;
2601             }
2602 
2603             do {
2604                 switch (*ct_str) {
2605                 case 'i':
2606                     def->args_ct[i].ct |= TCG_CT_CONST;
2607                     break;
2608 
2609                 /* Include all of the target-specific constraints. */
2610 
2611 #undef CONST
2612 #define CONST(CASE, MASK) \
2613     case CASE: def->args_ct[i].ct |= MASK; break;
2614 #define REGS(CASE, MASK) \
2615     case CASE: def->args_ct[i].regs |= MASK; break;
2616 
2617 #include "tcg-target-con-str.h"
2618 
2619 #undef REGS
2620 #undef CONST
2621                 default:
2622                 case '0' ... '9':
2623                 case '&':
2624                 case 'p':
2625                 case 'm':
2626                     /* Typo in TCGTargetOpDef constraint. */
2627                     g_assert_not_reached();
2628                 }
2629             } while (*++ct_str != '\0');
2630         }
2631 
2632         /* TCGTargetOpDef entry with too much information? */
2633         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2634 
2635         /*
2636          * Fix up output pairs that are aliased with inputs.
2637          * When we created the alias, we copied pair from the output.
2638          * There are three cases:
2639          *    (1a) Pairs of inputs alias pairs of outputs.
2640          *    (1b) One input aliases the first of a pair of outputs.
2641          *    (2)  One input aliases the second of a pair of outputs.
2642          *
2643          * Case 1a is handled by making sure that the pair_index'es are
2644          * properly updated so that they appear the same as a pair of inputs.
2645          *
2646          * Case 1b is handled by setting the pair_index of the input to
2647          * itself, simply so it doesn't point to an unrelated argument.
2648          * Since we don't encounter the "second" during the input allocation
2649          * phase, nothing happens with the second half of the input pair.
2650          *
2651          * Case 2 is handled by setting the second input to pair=3, the
2652          * first output to pair=3, and the pair_index'es to match.
2653          */
2654         if (saw_alias_pair) {
2655             for (i = def->nb_oargs; i < nb_args; i++) {
2656                 /*
2657                  * Since [0-9pm] must be alone in the constraint string,
2658                  * the only way they can both be set is if the pair comes
2659                  * from the output alias.
2660                  */
2661                 if (!def->args_ct[i].ialias) {
2662                     continue;
2663                 }
2664                 switch (def->args_ct[i].pair) {
2665                 case 0:
2666                     break;
2667                 case 1:
2668                     o = def->args_ct[i].alias_index;
2669                     o2 = def->args_ct[o].pair_index;
2670                     tcg_debug_assert(def->args_ct[o].pair == 1);
2671                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2672                     if (def->args_ct[o2].oalias) {
2673                         /* Case 1a */
2674                         i2 = def->args_ct[o2].alias_index;
2675                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2676                         def->args_ct[i2].pair_index = i;
2677                         def->args_ct[i].pair_index = i2;
2678                     } else {
2679                         /* Case 1b */
2680                         def->args_ct[i].pair_index = i;
2681                     }
2682                     break;
2683                 case 2:
2684                     o = def->args_ct[i].alias_index;
2685                     o2 = def->args_ct[o].pair_index;
2686                     tcg_debug_assert(def->args_ct[o].pair == 2);
2687                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2688                     if (def->args_ct[o2].oalias) {
2689                         /* Case 1a */
2690                         i2 = def->args_ct[o2].alias_index;
2691                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2692                         def->args_ct[i2].pair_index = i;
2693                         def->args_ct[i].pair_index = i2;
2694                     } else {
2695                         /* Case 2 */
2696                         def->args_ct[i].pair = 3;
2697                         def->args_ct[o2].pair = 3;
2698                         def->args_ct[i].pair_index = o2;
2699                         def->args_ct[o2].pair_index = i;
2700                     }
2701                     break;
2702                 default:
2703                     g_assert_not_reached();
2704                 }
2705             }
2706         }
2707 
2708         /* sort the constraints (XXX: this is just an heuristic) */
2709         sort_constraints(def, 0, def->nb_oargs);
2710         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2711     }
2712 }
2713 
2714 static void remove_label_use(TCGOp *op, int idx)
2715 {
2716     TCGLabel *label = arg_label(op->args[idx]);
2717     TCGLabelUse *use;
2718 
2719     QSIMPLEQ_FOREACH(use, &label->branches, next) {
2720         if (use->op == op) {
2721             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
2722             return;
2723         }
2724     }
2725     g_assert_not_reached();
2726 }
2727 
2728 void tcg_op_remove(TCGContext *s, TCGOp *op)
2729 {
2730     switch (op->opc) {
2731     case INDEX_op_br:
2732         remove_label_use(op, 0);
2733         break;
2734     case INDEX_op_brcond_i32:
2735     case INDEX_op_brcond_i64:
2736         remove_label_use(op, 3);
2737         break;
2738     case INDEX_op_brcond2_i32:
2739         remove_label_use(op, 5);
2740         break;
2741     default:
2742         break;
2743     }
2744 
2745     QTAILQ_REMOVE(&s->ops, op, link);
2746     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2747     s->nb_ops--;
2748 
2749 #ifdef CONFIG_PROFILER
2750     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2751 #endif
2752 }
2753 
2754 void tcg_remove_ops_after(TCGOp *op)
2755 {
2756     TCGContext *s = tcg_ctx;
2757 
2758     while (true) {
2759         TCGOp *last = tcg_last_op();
2760         if (last == op) {
2761             return;
2762         }
2763         tcg_op_remove(s, last);
2764     }
2765 }
2766 
2767 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2768 {
2769     TCGContext *s = tcg_ctx;
2770     TCGOp *op = NULL;
2771 
2772     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2773         QTAILQ_FOREACH(op, &s->free_ops, link) {
2774             if (nargs <= op->nargs) {
2775                 QTAILQ_REMOVE(&s->free_ops, op, link);
2776                 nargs = op->nargs;
2777                 goto found;
2778             }
2779         }
2780     }
2781 
2782     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2783     nargs = MAX(4, nargs);
2784     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2785 
2786  found:
2787     memset(op, 0, offsetof(TCGOp, link));
2788     op->opc = opc;
2789     op->nargs = nargs;
2790 
2791     /* Check for bitfield overflow. */
2792     tcg_debug_assert(op->nargs == nargs);
2793 
2794     s->nb_ops++;
2795     return op;
2796 }
2797 
2798 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2799 {
2800     TCGOp *op = tcg_op_alloc(opc, nargs);
2801     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2802     return op;
2803 }
2804 
2805 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2806                             TCGOpcode opc, unsigned nargs)
2807 {
2808     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2809     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2810     return new_op;
2811 }
2812 
2813 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2814                            TCGOpcode opc, unsigned nargs)
2815 {
2816     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2817     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2818     return new_op;
2819 }
2820 
2821 static void move_label_uses(TCGLabel *to, TCGLabel *from)
2822 {
2823     TCGLabelUse *u;
2824 
2825     QSIMPLEQ_FOREACH(u, &from->branches, next) {
2826         TCGOp *op = u->op;
2827         switch (op->opc) {
2828         case INDEX_op_br:
2829             op->args[0] = label_arg(to);
2830             break;
2831         case INDEX_op_brcond_i32:
2832         case INDEX_op_brcond_i64:
2833             op->args[3] = label_arg(to);
2834             break;
2835         case INDEX_op_brcond2_i32:
2836             op->args[5] = label_arg(to);
2837             break;
2838         default:
2839             g_assert_not_reached();
2840         }
2841     }
2842 
2843     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
2844 }
2845 
2846 /* Reachable analysis : remove unreachable code.  */
2847 static void __attribute__((noinline))
2848 reachable_code_pass(TCGContext *s)
2849 {
2850     TCGOp *op, *op_next, *op_prev;
2851     bool dead = false;
2852 
2853     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2854         bool remove = dead;
2855         TCGLabel *label;
2856 
2857         switch (op->opc) {
2858         case INDEX_op_set_label:
2859             label = arg_label(op->args[0]);
2860 
2861             /*
2862              * Note that the first op in the TB is always a load,
2863              * so there is always something before a label.
2864              */
2865             op_prev = QTAILQ_PREV(op, link);
2866 
2867             /*
2868              * If we find two sequential labels, move all branches to
2869              * reference the second label and remove the first label.
2870              * Do this before branch to next optimization, so that the
2871              * middle label is out of the way.
2872              */
2873             if (op_prev->opc == INDEX_op_set_label) {
2874                 move_label_uses(label, arg_label(op_prev->args[0]));
2875                 tcg_op_remove(s, op_prev);
2876                 op_prev = QTAILQ_PREV(op, link);
2877             }
2878 
2879             /*
2880              * Optimization can fold conditional branches to unconditional.
2881              * If we find a label which is preceded by an unconditional
2882              * branch to next, remove the branch.  We couldn't do this when
2883              * processing the branch because any dead code between the branch
2884              * and label had not yet been removed.
2885              */
2886             if (op_prev->opc == INDEX_op_br &&
2887                 label == arg_label(op_prev->args[0])) {
2888                 tcg_op_remove(s, op_prev);
2889                 /* Fall through means insns become live again.  */
2890                 dead = false;
2891             }
2892 
2893             if (QSIMPLEQ_EMPTY(&label->branches)) {
2894                 /*
2895                  * While there is an occasional backward branch, virtually
2896                  * all branches generated by the translators are forward.
2897                  * Which means that generally we will have already removed
2898                  * all references to the label that will be, and there is
2899                  * little to be gained by iterating.
2900                  */
2901                 remove = true;
2902             } else {
2903                 /* Once we see a label, insns become live again.  */
2904                 dead = false;
2905                 remove = false;
2906             }
2907             break;
2908 
2909         case INDEX_op_br:
2910         case INDEX_op_exit_tb:
2911         case INDEX_op_goto_ptr:
2912             /* Unconditional branches; everything following is dead.  */
2913             dead = true;
2914             break;
2915 
2916         case INDEX_op_call:
2917             /* Notice noreturn helper calls, raising exceptions.  */
2918             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2919                 dead = true;
2920             }
2921             break;
2922 
2923         case INDEX_op_insn_start:
2924             /* Never remove -- we need to keep these for unwind.  */
2925             remove = false;
2926             break;
2927 
2928         default:
2929             break;
2930         }
2931 
2932         if (remove) {
2933             tcg_op_remove(s, op);
2934         }
2935     }
2936 }
2937 
2938 #define TS_DEAD  1
2939 #define TS_MEM   2
2940 
2941 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2942 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2943 
2944 /* For liveness_pass_1, the register preferences for a given temp.  */
2945 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2946 {
2947     return ts->state_ptr;
2948 }
2949 
2950 /* For liveness_pass_1, reset the preferences for a given temp to the
2951  * maximal regset for its type.
2952  */
2953 static inline void la_reset_pref(TCGTemp *ts)
2954 {
2955     *la_temp_pref(ts)
2956         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2957 }
2958 
2959 /* liveness analysis: end of function: all temps are dead, and globals
2960    should be in memory. */
2961 static void la_func_end(TCGContext *s, int ng, int nt)
2962 {
2963     int i;
2964 
2965     for (i = 0; i < ng; ++i) {
2966         s->temps[i].state = TS_DEAD | TS_MEM;
2967         la_reset_pref(&s->temps[i]);
2968     }
2969     for (i = ng; i < nt; ++i) {
2970         s->temps[i].state = TS_DEAD;
2971         la_reset_pref(&s->temps[i]);
2972     }
2973 }
2974 
2975 /* liveness analysis: end of basic block: all temps are dead, globals
2976    and local temps should be in memory. */
2977 static void la_bb_end(TCGContext *s, int ng, int nt)
2978 {
2979     int i;
2980 
2981     for (i = 0; i < nt; ++i) {
2982         TCGTemp *ts = &s->temps[i];
2983         int state;
2984 
2985         switch (ts->kind) {
2986         case TEMP_FIXED:
2987         case TEMP_GLOBAL:
2988         case TEMP_TB:
2989             state = TS_DEAD | TS_MEM;
2990             break;
2991         case TEMP_EBB:
2992         case TEMP_CONST:
2993             state = TS_DEAD;
2994             break;
2995         default:
2996             g_assert_not_reached();
2997         }
2998         ts->state = state;
2999         la_reset_pref(ts);
3000     }
3001 }
3002 
3003 /* liveness analysis: sync globals back to memory.  */
3004 static void la_global_sync(TCGContext *s, int ng)
3005 {
3006     int i;
3007 
3008     for (i = 0; i < ng; ++i) {
3009         int state = s->temps[i].state;
3010         s->temps[i].state = state | TS_MEM;
3011         if (state == TS_DEAD) {
3012             /* If the global was previously dead, reset prefs.  */
3013             la_reset_pref(&s->temps[i]);
3014         }
3015     }
3016 }
3017 
3018 /*
3019  * liveness analysis: conditional branch: all temps are dead unless
3020  * explicitly live-across-conditional-branch, globals and local temps
3021  * should be synced.
3022  */
3023 static void la_bb_sync(TCGContext *s, int ng, int nt)
3024 {
3025     la_global_sync(s, ng);
3026 
3027     for (int i = ng; i < nt; ++i) {
3028         TCGTemp *ts = &s->temps[i];
3029         int state;
3030 
3031         switch (ts->kind) {
3032         case TEMP_TB:
3033             state = ts->state;
3034             ts->state = state | TS_MEM;
3035             if (state != TS_DEAD) {
3036                 continue;
3037             }
3038             break;
3039         case TEMP_EBB:
3040         case TEMP_CONST:
3041             continue;
3042         default:
3043             g_assert_not_reached();
3044         }
3045         la_reset_pref(&s->temps[i]);
3046     }
3047 }
3048 
3049 /* liveness analysis: sync globals back to memory and kill.  */
3050 static void la_global_kill(TCGContext *s, int ng)
3051 {
3052     int i;
3053 
3054     for (i = 0; i < ng; i++) {
3055         s->temps[i].state = TS_DEAD | TS_MEM;
3056         la_reset_pref(&s->temps[i]);
3057     }
3058 }
3059 
3060 /* liveness analysis: note live globals crossing calls.  */
3061 static void la_cross_call(TCGContext *s, int nt)
3062 {
3063     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3064     int i;
3065 
3066     for (i = 0; i < nt; i++) {
3067         TCGTemp *ts = &s->temps[i];
3068         if (!(ts->state & TS_DEAD)) {
3069             TCGRegSet *pset = la_temp_pref(ts);
3070             TCGRegSet set = *pset;
3071 
3072             set &= mask;
3073             /* If the combination is not possible, restart.  */
3074             if (set == 0) {
3075                 set = tcg_target_available_regs[ts->type] & mask;
3076             }
3077             *pset = set;
3078         }
3079     }
3080 }
3081 
3082 /*
3083  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3084  * to TEMP_EBB, if possible.
3085  */
3086 static void __attribute__((noinline))
3087 liveness_pass_0(TCGContext *s)
3088 {
3089     void * const multiple_ebb = (void *)(uintptr_t)-1;
3090     int nb_temps = s->nb_temps;
3091     TCGOp *op, *ebb;
3092 
3093     for (int i = s->nb_globals; i < nb_temps; ++i) {
3094         s->temps[i].state_ptr = NULL;
3095     }
3096 
3097     /*
3098      * Represent each EBB by the op at which it begins.  In the case of
3099      * the first EBB, this is the first op, otherwise it is a label.
3100      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3101      * within a single EBB, else MULTIPLE_EBB.
3102      */
3103     ebb = QTAILQ_FIRST(&s->ops);
3104     QTAILQ_FOREACH(op, &s->ops, link) {
3105         const TCGOpDef *def;
3106         int nb_oargs, nb_iargs;
3107 
3108         switch (op->opc) {
3109         case INDEX_op_set_label:
3110             ebb = op;
3111             continue;
3112         case INDEX_op_discard:
3113             continue;
3114         case INDEX_op_call:
3115             nb_oargs = TCGOP_CALLO(op);
3116             nb_iargs = TCGOP_CALLI(op);
3117             break;
3118         default:
3119             def = &tcg_op_defs[op->opc];
3120             nb_oargs = def->nb_oargs;
3121             nb_iargs = def->nb_iargs;
3122             break;
3123         }
3124 
3125         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3126             TCGTemp *ts = arg_temp(op->args[i]);
3127 
3128             if (ts->kind != TEMP_TB) {
3129                 continue;
3130             }
3131             if (ts->state_ptr == NULL) {
3132                 ts->state_ptr = ebb;
3133             } else if (ts->state_ptr != ebb) {
3134                 ts->state_ptr = multiple_ebb;
3135             }
3136         }
3137     }
3138 
3139     /*
3140      * For TEMP_TB that turned out not to be used beyond one EBB,
3141      * reduce the liveness to TEMP_EBB.
3142      */
3143     for (int i = s->nb_globals; i < nb_temps; ++i) {
3144         TCGTemp *ts = &s->temps[i];
3145         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3146             ts->kind = TEMP_EBB;
3147         }
3148     }
3149 }
3150 
3151 /* Liveness analysis : update the opc_arg_life array to tell if a
3152    given input arguments is dead. Instructions updating dead
3153    temporaries are removed. */
3154 static void __attribute__((noinline))
3155 liveness_pass_1(TCGContext *s)
3156 {
3157     int nb_globals = s->nb_globals;
3158     int nb_temps = s->nb_temps;
3159     TCGOp *op, *op_prev;
3160     TCGRegSet *prefs;
3161     int i;
3162 
3163     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3164     for (i = 0; i < nb_temps; ++i) {
3165         s->temps[i].state_ptr = prefs + i;
3166     }
3167 
3168     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3169     la_func_end(s, nb_globals, nb_temps);
3170 
3171     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3172         int nb_iargs, nb_oargs;
3173         TCGOpcode opc_new, opc_new2;
3174         bool have_opc_new2;
3175         TCGLifeData arg_life = 0;
3176         TCGTemp *ts;
3177         TCGOpcode opc = op->opc;
3178         const TCGOpDef *def = &tcg_op_defs[opc];
3179 
3180         switch (opc) {
3181         case INDEX_op_call:
3182             {
3183                 const TCGHelperInfo *info = tcg_call_info(op);
3184                 int call_flags = tcg_call_flags(op);
3185 
3186                 nb_oargs = TCGOP_CALLO(op);
3187                 nb_iargs = TCGOP_CALLI(op);
3188 
3189                 /* pure functions can be removed if their result is unused */
3190                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3191                     for (i = 0; i < nb_oargs; i++) {
3192                         ts = arg_temp(op->args[i]);
3193                         if (ts->state != TS_DEAD) {
3194                             goto do_not_remove_call;
3195                         }
3196                     }
3197                     goto do_remove;
3198                 }
3199             do_not_remove_call:
3200 
3201                 /* Output args are dead.  */
3202                 for (i = 0; i < nb_oargs; i++) {
3203                     ts = arg_temp(op->args[i]);
3204                     if (ts->state & TS_DEAD) {
3205                         arg_life |= DEAD_ARG << i;
3206                     }
3207                     if (ts->state & TS_MEM) {
3208                         arg_life |= SYNC_ARG << i;
3209                     }
3210                     ts->state = TS_DEAD;
3211                     la_reset_pref(ts);
3212                 }
3213 
3214                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3215                 memset(op->output_pref, 0, sizeof(op->output_pref));
3216 
3217                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3218                                     TCG_CALL_NO_READ_GLOBALS))) {
3219                     la_global_kill(s, nb_globals);
3220                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3221                     la_global_sync(s, nb_globals);
3222                 }
3223 
3224                 /* Record arguments that die in this helper.  */
3225                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3226                     ts = arg_temp(op->args[i]);
3227                     if (ts->state & TS_DEAD) {
3228                         arg_life |= DEAD_ARG << i;
3229                     }
3230                 }
3231 
3232                 /* For all live registers, remove call-clobbered prefs.  */
3233                 la_cross_call(s, nb_temps);
3234 
3235                 /*
3236                  * Input arguments are live for preceding opcodes.
3237                  *
3238                  * For those arguments that die, and will be allocated in
3239                  * registers, clear the register set for that arg, to be
3240                  * filled in below.  For args that will be on the stack,
3241                  * reset to any available reg.  Process arguments in reverse
3242                  * order so that if a temp is used more than once, the stack
3243                  * reset to max happens before the register reset to 0.
3244                  */
3245                 for (i = nb_iargs - 1; i >= 0; i--) {
3246                     const TCGCallArgumentLoc *loc = &info->in[i];
3247                     ts = arg_temp(op->args[nb_oargs + i]);
3248 
3249                     if (ts->state & TS_DEAD) {
3250                         switch (loc->kind) {
3251                         case TCG_CALL_ARG_NORMAL:
3252                         case TCG_CALL_ARG_EXTEND_U:
3253                         case TCG_CALL_ARG_EXTEND_S:
3254                             if (arg_slot_reg_p(loc->arg_slot)) {
3255                                 *la_temp_pref(ts) = 0;
3256                                 break;
3257                             }
3258                             /* fall through */
3259                         default:
3260                             *la_temp_pref(ts) =
3261                                 tcg_target_available_regs[ts->type];
3262                             break;
3263                         }
3264                         ts->state &= ~TS_DEAD;
3265                     }
3266                 }
3267 
3268                 /*
3269                  * For each input argument, add its input register to prefs.
3270                  * If a temp is used once, this produces a single set bit;
3271                  * if a temp is used multiple times, this produces a set.
3272                  */
3273                 for (i = 0; i < nb_iargs; i++) {
3274                     const TCGCallArgumentLoc *loc = &info->in[i];
3275                     ts = arg_temp(op->args[nb_oargs + i]);
3276 
3277                     switch (loc->kind) {
3278                     case TCG_CALL_ARG_NORMAL:
3279                     case TCG_CALL_ARG_EXTEND_U:
3280                     case TCG_CALL_ARG_EXTEND_S:
3281                         if (arg_slot_reg_p(loc->arg_slot)) {
3282                             tcg_regset_set_reg(*la_temp_pref(ts),
3283                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3284                         }
3285                         break;
3286                     default:
3287                         break;
3288                     }
3289                 }
3290             }
3291             break;
3292         case INDEX_op_insn_start:
3293             break;
3294         case INDEX_op_discard:
3295             /* mark the temporary as dead */
3296             ts = arg_temp(op->args[0]);
3297             ts->state = TS_DEAD;
3298             la_reset_pref(ts);
3299             break;
3300 
3301         case INDEX_op_add2_i32:
3302             opc_new = INDEX_op_add_i32;
3303             goto do_addsub2;
3304         case INDEX_op_sub2_i32:
3305             opc_new = INDEX_op_sub_i32;
3306             goto do_addsub2;
3307         case INDEX_op_add2_i64:
3308             opc_new = INDEX_op_add_i64;
3309             goto do_addsub2;
3310         case INDEX_op_sub2_i64:
3311             opc_new = INDEX_op_sub_i64;
3312         do_addsub2:
3313             nb_iargs = 4;
3314             nb_oargs = 2;
3315             /* Test if the high part of the operation is dead, but not
3316                the low part.  The result can be optimized to a simple
3317                add or sub.  This happens often for x86_64 guest when the
3318                cpu mode is set to 32 bit.  */
3319             if (arg_temp(op->args[1])->state == TS_DEAD) {
3320                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3321                     goto do_remove;
3322                 }
3323                 /* Replace the opcode and adjust the args in place,
3324                    leaving 3 unused args at the end.  */
3325                 op->opc = opc = opc_new;
3326                 op->args[1] = op->args[2];
3327                 op->args[2] = op->args[4];
3328                 /* Fall through and mark the single-word operation live.  */
3329                 nb_iargs = 2;
3330                 nb_oargs = 1;
3331             }
3332             goto do_not_remove;
3333 
3334         case INDEX_op_mulu2_i32:
3335             opc_new = INDEX_op_mul_i32;
3336             opc_new2 = INDEX_op_muluh_i32;
3337             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3338             goto do_mul2;
3339         case INDEX_op_muls2_i32:
3340             opc_new = INDEX_op_mul_i32;
3341             opc_new2 = INDEX_op_mulsh_i32;
3342             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3343             goto do_mul2;
3344         case INDEX_op_mulu2_i64:
3345             opc_new = INDEX_op_mul_i64;
3346             opc_new2 = INDEX_op_muluh_i64;
3347             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3348             goto do_mul2;
3349         case INDEX_op_muls2_i64:
3350             opc_new = INDEX_op_mul_i64;
3351             opc_new2 = INDEX_op_mulsh_i64;
3352             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3353             goto do_mul2;
3354         do_mul2:
3355             nb_iargs = 2;
3356             nb_oargs = 2;
3357             if (arg_temp(op->args[1])->state == TS_DEAD) {
3358                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3359                     /* Both parts of the operation are dead.  */
3360                     goto do_remove;
3361                 }
3362                 /* The high part of the operation is dead; generate the low. */
3363                 op->opc = opc = opc_new;
3364                 op->args[1] = op->args[2];
3365                 op->args[2] = op->args[3];
3366             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3367                 /* The low part of the operation is dead; generate the high. */
3368                 op->opc = opc = opc_new2;
3369                 op->args[0] = op->args[1];
3370                 op->args[1] = op->args[2];
3371                 op->args[2] = op->args[3];
3372             } else {
3373                 goto do_not_remove;
3374             }
3375             /* Mark the single-word operation live.  */
3376             nb_oargs = 1;
3377             goto do_not_remove;
3378 
3379         default:
3380             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3381             nb_iargs = def->nb_iargs;
3382             nb_oargs = def->nb_oargs;
3383 
3384             /* Test if the operation can be removed because all
3385                its outputs are dead. We assume that nb_oargs == 0
3386                implies side effects */
3387             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3388                 for (i = 0; i < nb_oargs; i++) {
3389                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3390                         goto do_not_remove;
3391                     }
3392                 }
3393                 goto do_remove;
3394             }
3395             goto do_not_remove;
3396 
3397         do_remove:
3398             tcg_op_remove(s, op);
3399             break;
3400 
3401         do_not_remove:
3402             for (i = 0; i < nb_oargs; i++) {
3403                 ts = arg_temp(op->args[i]);
3404 
3405                 /* Remember the preference of the uses that followed.  */
3406                 if (i < ARRAY_SIZE(op->output_pref)) {
3407                     op->output_pref[i] = *la_temp_pref(ts);
3408                 }
3409 
3410                 /* Output args are dead.  */
3411                 if (ts->state & TS_DEAD) {
3412                     arg_life |= DEAD_ARG << i;
3413                 }
3414                 if (ts->state & TS_MEM) {
3415                     arg_life |= SYNC_ARG << i;
3416                 }
3417                 ts->state = TS_DEAD;
3418                 la_reset_pref(ts);
3419             }
3420 
3421             /* If end of basic block, update.  */
3422             if (def->flags & TCG_OPF_BB_EXIT) {
3423                 la_func_end(s, nb_globals, nb_temps);
3424             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3425                 la_bb_sync(s, nb_globals, nb_temps);
3426             } else if (def->flags & TCG_OPF_BB_END) {
3427                 la_bb_end(s, nb_globals, nb_temps);
3428             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3429                 la_global_sync(s, nb_globals);
3430                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3431                     la_cross_call(s, nb_temps);
3432                 }
3433             }
3434 
3435             /* Record arguments that die in this opcode.  */
3436             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3437                 ts = arg_temp(op->args[i]);
3438                 if (ts->state & TS_DEAD) {
3439                     arg_life |= DEAD_ARG << i;
3440                 }
3441             }
3442 
3443             /* Input arguments are live for preceding opcodes.  */
3444             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3445                 ts = arg_temp(op->args[i]);
3446                 if (ts->state & TS_DEAD) {
3447                     /* For operands that were dead, initially allow
3448                        all regs for the type.  */
3449                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3450                     ts->state &= ~TS_DEAD;
3451                 }
3452             }
3453 
3454             /* Incorporate constraints for this operand.  */
3455             switch (opc) {
3456             case INDEX_op_mov_i32:
3457             case INDEX_op_mov_i64:
3458                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3459                    have proper constraints.  That said, special case
3460                    moves to propagate preferences backward.  */
3461                 if (IS_DEAD_ARG(1)) {
3462                     *la_temp_pref(arg_temp(op->args[0]))
3463                         = *la_temp_pref(arg_temp(op->args[1]));
3464                 }
3465                 break;
3466 
3467             default:
3468                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3469                     const TCGArgConstraint *ct = &def->args_ct[i];
3470                     TCGRegSet set, *pset;
3471 
3472                     ts = arg_temp(op->args[i]);
3473                     pset = la_temp_pref(ts);
3474                     set = *pset;
3475 
3476                     set &= ct->regs;
3477                     if (ct->ialias) {
3478                         set &= output_pref(op, ct->alias_index);
3479                     }
3480                     /* If the combination is not possible, restart.  */
3481                     if (set == 0) {
3482                         set = ct->regs;
3483                     }
3484                     *pset = set;
3485                 }
3486                 break;
3487             }
3488             break;
3489         }
3490         op->life = arg_life;
3491     }
3492 }
3493 
3494 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3495 static bool __attribute__((noinline))
3496 liveness_pass_2(TCGContext *s)
3497 {
3498     int nb_globals = s->nb_globals;
3499     int nb_temps, i;
3500     bool changes = false;
3501     TCGOp *op, *op_next;
3502 
3503     /* Create a temporary for each indirect global.  */
3504     for (i = 0; i < nb_globals; ++i) {
3505         TCGTemp *its = &s->temps[i];
3506         if (its->indirect_reg) {
3507             TCGTemp *dts = tcg_temp_alloc(s);
3508             dts->type = its->type;
3509             dts->base_type = its->base_type;
3510             dts->temp_subindex = its->temp_subindex;
3511             dts->kind = TEMP_EBB;
3512             its->state_ptr = dts;
3513         } else {
3514             its->state_ptr = NULL;
3515         }
3516         /* All globals begin dead.  */
3517         its->state = TS_DEAD;
3518     }
3519     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3520         TCGTemp *its = &s->temps[i];
3521         its->state_ptr = NULL;
3522         its->state = TS_DEAD;
3523     }
3524 
3525     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3526         TCGOpcode opc = op->opc;
3527         const TCGOpDef *def = &tcg_op_defs[opc];
3528         TCGLifeData arg_life = op->life;
3529         int nb_iargs, nb_oargs, call_flags;
3530         TCGTemp *arg_ts, *dir_ts;
3531 
3532         if (opc == INDEX_op_call) {
3533             nb_oargs = TCGOP_CALLO(op);
3534             nb_iargs = TCGOP_CALLI(op);
3535             call_flags = tcg_call_flags(op);
3536         } else {
3537             nb_iargs = def->nb_iargs;
3538             nb_oargs = def->nb_oargs;
3539 
3540             /* Set flags similar to how calls require.  */
3541             if (def->flags & TCG_OPF_COND_BRANCH) {
3542                 /* Like reading globals: sync_globals */
3543                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3544             } else if (def->flags & TCG_OPF_BB_END) {
3545                 /* Like writing globals: save_globals */
3546                 call_flags = 0;
3547             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3548                 /* Like reading globals: sync_globals */
3549                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3550             } else {
3551                 /* No effect on globals.  */
3552                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3553                               TCG_CALL_NO_WRITE_GLOBALS);
3554             }
3555         }
3556 
3557         /* Make sure that input arguments are available.  */
3558         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3559             arg_ts = arg_temp(op->args[i]);
3560             dir_ts = arg_ts->state_ptr;
3561             if (dir_ts && arg_ts->state == TS_DEAD) {
3562                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3563                                   ? INDEX_op_ld_i32
3564                                   : INDEX_op_ld_i64);
3565                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3566 
3567                 lop->args[0] = temp_arg(dir_ts);
3568                 lop->args[1] = temp_arg(arg_ts->mem_base);
3569                 lop->args[2] = arg_ts->mem_offset;
3570 
3571                 /* Loaded, but synced with memory.  */
3572                 arg_ts->state = TS_MEM;
3573             }
3574         }
3575 
3576         /* Perform input replacement, and mark inputs that became dead.
3577            No action is required except keeping temp_state up to date
3578            so that we reload when needed.  */
3579         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3580             arg_ts = arg_temp(op->args[i]);
3581             dir_ts = arg_ts->state_ptr;
3582             if (dir_ts) {
3583                 op->args[i] = temp_arg(dir_ts);
3584                 changes = true;
3585                 if (IS_DEAD_ARG(i)) {
3586                     arg_ts->state = TS_DEAD;
3587                 }
3588             }
3589         }
3590 
3591         /* Liveness analysis should ensure that the following are
3592            all correct, for call sites and basic block end points.  */
3593         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3594             /* Nothing to do */
3595         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3596             for (i = 0; i < nb_globals; ++i) {
3597                 /* Liveness should see that globals are synced back,
3598                    that is, either TS_DEAD or TS_MEM.  */
3599                 arg_ts = &s->temps[i];
3600                 tcg_debug_assert(arg_ts->state_ptr == 0
3601                                  || arg_ts->state != 0);
3602             }
3603         } else {
3604             for (i = 0; i < nb_globals; ++i) {
3605                 /* Liveness should see that globals are saved back,
3606                    that is, TS_DEAD, waiting to be reloaded.  */
3607                 arg_ts = &s->temps[i];
3608                 tcg_debug_assert(arg_ts->state_ptr == 0
3609                                  || arg_ts->state == TS_DEAD);
3610             }
3611         }
3612 
3613         /* Outputs become available.  */
3614         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3615             arg_ts = arg_temp(op->args[0]);
3616             dir_ts = arg_ts->state_ptr;
3617             if (dir_ts) {
3618                 op->args[0] = temp_arg(dir_ts);
3619                 changes = true;
3620 
3621                 /* The output is now live and modified.  */
3622                 arg_ts->state = 0;
3623 
3624                 if (NEED_SYNC_ARG(0)) {
3625                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3626                                       ? INDEX_op_st_i32
3627                                       : INDEX_op_st_i64);
3628                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3629                     TCGTemp *out_ts = dir_ts;
3630 
3631                     if (IS_DEAD_ARG(0)) {
3632                         out_ts = arg_temp(op->args[1]);
3633                         arg_ts->state = TS_DEAD;
3634                         tcg_op_remove(s, op);
3635                     } else {
3636                         arg_ts->state = TS_MEM;
3637                     }
3638 
3639                     sop->args[0] = temp_arg(out_ts);
3640                     sop->args[1] = temp_arg(arg_ts->mem_base);
3641                     sop->args[2] = arg_ts->mem_offset;
3642                 } else {
3643                     tcg_debug_assert(!IS_DEAD_ARG(0));
3644                 }
3645             }
3646         } else {
3647             for (i = 0; i < nb_oargs; i++) {
3648                 arg_ts = arg_temp(op->args[i]);
3649                 dir_ts = arg_ts->state_ptr;
3650                 if (!dir_ts) {
3651                     continue;
3652                 }
3653                 op->args[i] = temp_arg(dir_ts);
3654                 changes = true;
3655 
3656                 /* The output is now live and modified.  */
3657                 arg_ts->state = 0;
3658 
3659                 /* Sync outputs upon their last write.  */
3660                 if (NEED_SYNC_ARG(i)) {
3661                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3662                                       ? INDEX_op_st_i32
3663                                       : INDEX_op_st_i64);
3664                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3665 
3666                     sop->args[0] = temp_arg(dir_ts);
3667                     sop->args[1] = temp_arg(arg_ts->mem_base);
3668                     sop->args[2] = arg_ts->mem_offset;
3669 
3670                     arg_ts->state = TS_MEM;
3671                 }
3672                 /* Drop outputs that are dead.  */
3673                 if (IS_DEAD_ARG(i)) {
3674                     arg_ts->state = TS_DEAD;
3675                 }
3676             }
3677         }
3678     }
3679 
3680     return changes;
3681 }
3682 
3683 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3684 {
3685     intptr_t off;
3686     int size, align;
3687 
3688     /* When allocating an object, look at the full type. */
3689     size = tcg_type_size(ts->base_type);
3690     switch (ts->base_type) {
3691     case TCG_TYPE_I32:
3692         align = 4;
3693         break;
3694     case TCG_TYPE_I64:
3695     case TCG_TYPE_V64:
3696         align = 8;
3697         break;
3698     case TCG_TYPE_I128:
3699     case TCG_TYPE_V128:
3700     case TCG_TYPE_V256:
3701         /*
3702          * Note that we do not require aligned storage for V256,
3703          * and that we provide alignment for I128 to match V128,
3704          * even if that's above what the host ABI requires.
3705          */
3706         align = 16;
3707         break;
3708     default:
3709         g_assert_not_reached();
3710     }
3711 
3712     /*
3713      * Assume the stack is sufficiently aligned.
3714      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3715      * and do not require 16 byte vector alignment.  This seems slightly
3716      * easier than fully parameterizing the above switch statement.
3717      */
3718     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3719     off = ROUND_UP(s->current_frame_offset, align);
3720 
3721     /* If we've exhausted the stack frame, restart with a smaller TB. */
3722     if (off + size > s->frame_end) {
3723         tcg_raise_tb_overflow(s);
3724     }
3725     s->current_frame_offset = off + size;
3726 #if defined(__sparc__)
3727     off += TCG_TARGET_STACK_BIAS;
3728 #endif
3729 
3730     /* If the object was subdivided, assign memory to all the parts. */
3731     if (ts->base_type != ts->type) {
3732         int part_size = tcg_type_size(ts->type);
3733         int part_count = size / part_size;
3734 
3735         /*
3736          * Each part is allocated sequentially in tcg_temp_new_internal.
3737          * Jump back to the first part by subtracting the current index.
3738          */
3739         ts -= ts->temp_subindex;
3740         for (int i = 0; i < part_count; ++i) {
3741             ts[i].mem_offset = off + i * part_size;
3742             ts[i].mem_base = s->frame_temp;
3743             ts[i].mem_allocated = 1;
3744         }
3745     } else {
3746         ts->mem_offset = off;
3747         ts->mem_base = s->frame_temp;
3748         ts->mem_allocated = 1;
3749     }
3750 }
3751 
3752 /* Assign @reg to @ts, and update reg_to_temp[]. */
3753 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3754 {
3755     if (ts->val_type == TEMP_VAL_REG) {
3756         TCGReg old = ts->reg;
3757         tcg_debug_assert(s->reg_to_temp[old] == ts);
3758         if (old == reg) {
3759             return;
3760         }
3761         s->reg_to_temp[old] = NULL;
3762     }
3763     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3764     s->reg_to_temp[reg] = ts;
3765     ts->val_type = TEMP_VAL_REG;
3766     ts->reg = reg;
3767 }
3768 
3769 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3770 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3771 {
3772     tcg_debug_assert(type != TEMP_VAL_REG);
3773     if (ts->val_type == TEMP_VAL_REG) {
3774         TCGReg reg = ts->reg;
3775         tcg_debug_assert(s->reg_to_temp[reg] == ts);
3776         s->reg_to_temp[reg] = NULL;
3777     }
3778     ts->val_type = type;
3779 }
3780 
3781 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3782 
3783 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3784    mark it free; otherwise mark it dead.  */
3785 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3786 {
3787     TCGTempVal new_type;
3788 
3789     switch (ts->kind) {
3790     case TEMP_FIXED:
3791         return;
3792     case TEMP_GLOBAL:
3793     case TEMP_TB:
3794         new_type = TEMP_VAL_MEM;
3795         break;
3796     case TEMP_EBB:
3797         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3798         break;
3799     case TEMP_CONST:
3800         new_type = TEMP_VAL_CONST;
3801         break;
3802     default:
3803         g_assert_not_reached();
3804     }
3805     set_temp_val_nonreg(s, ts, new_type);
3806 }
3807 
3808 /* Mark a temporary as dead.  */
3809 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3810 {
3811     temp_free_or_dead(s, ts, 1);
3812 }
3813 
3814 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3815    registers needs to be allocated to store a constant.  If 'free_or_dead'
3816    is non-zero, subsequently release the temporary; if it is positive, the
3817    temp is dead; if it is negative, the temp is free.  */
3818 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3819                       TCGRegSet preferred_regs, int free_or_dead)
3820 {
3821     if (!temp_readonly(ts) && !ts->mem_coherent) {
3822         if (!ts->mem_allocated) {
3823             temp_allocate_frame(s, ts);
3824         }
3825         switch (ts->val_type) {
3826         case TEMP_VAL_CONST:
3827             /* If we're going to free the temp immediately, then we won't
3828                require it later in a register, so attempt to store the
3829                constant to memory directly.  */
3830             if (free_or_dead
3831                 && tcg_out_sti(s, ts->type, ts->val,
3832                                ts->mem_base->reg, ts->mem_offset)) {
3833                 break;
3834             }
3835             temp_load(s, ts, tcg_target_available_regs[ts->type],
3836                       allocated_regs, preferred_regs);
3837             /* fallthrough */
3838 
3839         case TEMP_VAL_REG:
3840             tcg_out_st(s, ts->type, ts->reg,
3841                        ts->mem_base->reg, ts->mem_offset);
3842             break;
3843 
3844         case TEMP_VAL_MEM:
3845             break;
3846 
3847         case TEMP_VAL_DEAD:
3848         default:
3849             g_assert_not_reached();
3850         }
3851         ts->mem_coherent = 1;
3852     }
3853     if (free_or_dead) {
3854         temp_free_or_dead(s, ts, free_or_dead);
3855     }
3856 }
3857 
3858 /* free register 'reg' by spilling the corresponding temporary if necessary */
3859 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3860 {
3861     TCGTemp *ts = s->reg_to_temp[reg];
3862     if (ts != NULL) {
3863         temp_sync(s, ts, allocated_regs, 0, -1);
3864     }
3865 }
3866 
3867 /**
3868  * tcg_reg_alloc:
3869  * @required_regs: Set of registers in which we must allocate.
3870  * @allocated_regs: Set of registers which must be avoided.
3871  * @preferred_regs: Set of registers we should prefer.
3872  * @rev: True if we search the registers in "indirect" order.
3873  *
3874  * The allocated register must be in @required_regs & ~@allocated_regs,
3875  * but if we can put it in @preferred_regs we may save a move later.
3876  */
3877 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3878                             TCGRegSet allocated_regs,
3879                             TCGRegSet preferred_regs, bool rev)
3880 {
3881     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3882     TCGRegSet reg_ct[2];
3883     const int *order;
3884 
3885     reg_ct[1] = required_regs & ~allocated_regs;
3886     tcg_debug_assert(reg_ct[1] != 0);
3887     reg_ct[0] = reg_ct[1] & preferred_regs;
3888 
3889     /* Skip the preferred_regs option if it cannot be satisfied,
3890        or if the preference made no difference.  */
3891     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3892 
3893     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3894 
3895     /* Try free registers, preferences first.  */
3896     for (j = f; j < 2; j++) {
3897         TCGRegSet set = reg_ct[j];
3898 
3899         if (tcg_regset_single(set)) {
3900             /* One register in the set.  */
3901             TCGReg reg = tcg_regset_first(set);
3902             if (s->reg_to_temp[reg] == NULL) {
3903                 return reg;
3904             }
3905         } else {
3906             for (i = 0; i < n; i++) {
3907                 TCGReg reg = order[i];
3908                 if (s->reg_to_temp[reg] == NULL &&
3909                     tcg_regset_test_reg(set, reg)) {
3910                     return reg;
3911                 }
3912             }
3913         }
3914     }
3915 
3916     /* We must spill something.  */
3917     for (j = f; j < 2; j++) {
3918         TCGRegSet set = reg_ct[j];
3919 
3920         if (tcg_regset_single(set)) {
3921             /* One register in the set.  */
3922             TCGReg reg = tcg_regset_first(set);
3923             tcg_reg_free(s, reg, allocated_regs);
3924             return reg;
3925         } else {
3926             for (i = 0; i < n; i++) {
3927                 TCGReg reg = order[i];
3928                 if (tcg_regset_test_reg(set, reg)) {
3929                     tcg_reg_free(s, reg, allocated_regs);
3930                     return reg;
3931                 }
3932             }
3933         }
3934     }
3935 
3936     g_assert_not_reached();
3937 }
3938 
3939 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
3940                                  TCGRegSet allocated_regs,
3941                                  TCGRegSet preferred_regs, bool rev)
3942 {
3943     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3944     TCGRegSet reg_ct[2];
3945     const int *order;
3946 
3947     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3948     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
3949     tcg_debug_assert(reg_ct[1] != 0);
3950     reg_ct[0] = reg_ct[1] & preferred_regs;
3951 
3952     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3953 
3954     /*
3955      * Skip the preferred_regs option if it cannot be satisfied,
3956      * or if the preference made no difference.
3957      */
3958     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3959 
3960     /*
3961      * Minimize the number of flushes by looking for 2 free registers first,
3962      * then a single flush, then two flushes.
3963      */
3964     for (fmin = 2; fmin >= 0; fmin--) {
3965         for (j = k; j < 2; j++) {
3966             TCGRegSet set = reg_ct[j];
3967 
3968             for (i = 0; i < n; i++) {
3969                 TCGReg reg = order[i];
3970 
3971                 if (tcg_regset_test_reg(set, reg)) {
3972                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
3973                     if (f >= fmin) {
3974                         tcg_reg_free(s, reg, allocated_regs);
3975                         tcg_reg_free(s, reg + 1, allocated_regs);
3976                         return reg;
3977                     }
3978                 }
3979             }
3980         }
3981     }
3982     g_assert_not_reached();
3983 }
3984 
3985 /* Make sure the temporary is in a register.  If needed, allocate the register
3986    from DESIRED while avoiding ALLOCATED.  */
3987 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3988                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3989 {
3990     TCGReg reg;
3991 
3992     switch (ts->val_type) {
3993     case TEMP_VAL_REG:
3994         return;
3995     case TEMP_VAL_CONST:
3996         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3997                             preferred_regs, ts->indirect_base);
3998         if (ts->type <= TCG_TYPE_I64) {
3999             tcg_out_movi(s, ts->type, reg, ts->val);
4000         } else {
4001             uint64_t val = ts->val;
4002             MemOp vece = MO_64;
4003 
4004             /*
4005              * Find the minimal vector element that matches the constant.
4006              * The targets will, in general, have to do this search anyway,
4007              * do this generically.
4008              */
4009             if (val == dup_const(MO_8, val)) {
4010                 vece = MO_8;
4011             } else if (val == dup_const(MO_16, val)) {
4012                 vece = MO_16;
4013             } else if (val == dup_const(MO_32, val)) {
4014                 vece = MO_32;
4015             }
4016 
4017             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4018         }
4019         ts->mem_coherent = 0;
4020         break;
4021     case TEMP_VAL_MEM:
4022         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4023                             preferred_regs, ts->indirect_base);
4024         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4025         ts->mem_coherent = 1;
4026         break;
4027     case TEMP_VAL_DEAD:
4028     default:
4029         g_assert_not_reached();
4030     }
4031     set_temp_val_reg(s, ts, reg);
4032 }
4033 
4034 /* Save a temporary to memory. 'allocated_regs' is used in case a
4035    temporary registers needs to be allocated to store a constant.  */
4036 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4037 {
4038     /* The liveness analysis already ensures that globals are back
4039        in memory. Keep an tcg_debug_assert for safety. */
4040     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4041 }
4042 
4043 /* save globals to their canonical location and assume they can be
4044    modified be the following code. 'allocated_regs' is used in case a
4045    temporary registers needs to be allocated to store a constant. */
4046 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4047 {
4048     int i, n;
4049 
4050     for (i = 0, n = s->nb_globals; i < n; i++) {
4051         temp_save(s, &s->temps[i], allocated_regs);
4052     }
4053 }
4054 
4055 /* sync globals to their canonical location and assume they can be
4056    read by the following code. 'allocated_regs' is used in case a
4057    temporary registers needs to be allocated to store a constant. */
4058 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4059 {
4060     int i, n;
4061 
4062     for (i = 0, n = s->nb_globals; i < n; i++) {
4063         TCGTemp *ts = &s->temps[i];
4064         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4065                          || ts->kind == TEMP_FIXED
4066                          || ts->mem_coherent);
4067     }
4068 }
4069 
4070 /* at the end of a basic block, we assume all temporaries are dead and
4071    all globals are stored at their canonical location. */
4072 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4073 {
4074     int i;
4075 
4076     for (i = s->nb_globals; i < s->nb_temps; i++) {
4077         TCGTemp *ts = &s->temps[i];
4078 
4079         switch (ts->kind) {
4080         case TEMP_TB:
4081             temp_save(s, ts, allocated_regs);
4082             break;
4083         case TEMP_EBB:
4084             /* The liveness analysis already ensures that temps are dead.
4085                Keep an tcg_debug_assert for safety. */
4086             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4087             break;
4088         case TEMP_CONST:
4089             /* Similarly, we should have freed any allocated register. */
4090             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4091             break;
4092         default:
4093             g_assert_not_reached();
4094         }
4095     }
4096 
4097     save_globals(s, allocated_regs);
4098 }
4099 
4100 /*
4101  * At a conditional branch, we assume all temporaries are dead unless
4102  * explicitly live-across-conditional-branch; all globals and local
4103  * temps are synced to their location.
4104  */
4105 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4106 {
4107     sync_globals(s, allocated_regs);
4108 
4109     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4110         TCGTemp *ts = &s->temps[i];
4111         /*
4112          * The liveness analysis already ensures that temps are dead.
4113          * Keep tcg_debug_asserts for safety.
4114          */
4115         switch (ts->kind) {
4116         case TEMP_TB:
4117             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4118             break;
4119         case TEMP_EBB:
4120         case TEMP_CONST:
4121             break;
4122         default:
4123             g_assert_not_reached();
4124         }
4125     }
4126 }
4127 
4128 /*
4129  * Specialized code generation for INDEX_op_mov_* with a constant.
4130  */
4131 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4132                                   tcg_target_ulong val, TCGLifeData arg_life,
4133                                   TCGRegSet preferred_regs)
4134 {
4135     /* ENV should not be modified.  */
4136     tcg_debug_assert(!temp_readonly(ots));
4137 
4138     /* The movi is not explicitly generated here.  */
4139     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4140     ots->val = val;
4141     ots->mem_coherent = 0;
4142     if (NEED_SYNC_ARG(0)) {
4143         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4144     } else if (IS_DEAD_ARG(0)) {
4145         temp_dead(s, ots);
4146     }
4147 }
4148 
4149 /*
4150  * Specialized code generation for INDEX_op_mov_*.
4151  */
4152 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4153 {
4154     const TCGLifeData arg_life = op->life;
4155     TCGRegSet allocated_regs, preferred_regs;
4156     TCGTemp *ts, *ots;
4157     TCGType otype, itype;
4158     TCGReg oreg, ireg;
4159 
4160     allocated_regs = s->reserved_regs;
4161     preferred_regs = output_pref(op, 0);
4162     ots = arg_temp(op->args[0]);
4163     ts = arg_temp(op->args[1]);
4164 
4165     /* ENV should not be modified.  */
4166     tcg_debug_assert(!temp_readonly(ots));
4167 
4168     /* Note that otype != itype for no-op truncation.  */
4169     otype = ots->type;
4170     itype = ts->type;
4171 
4172     if (ts->val_type == TEMP_VAL_CONST) {
4173         /* propagate constant or generate sti */
4174         tcg_target_ulong val = ts->val;
4175         if (IS_DEAD_ARG(1)) {
4176             temp_dead(s, ts);
4177         }
4178         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4179         return;
4180     }
4181 
4182     /* If the source value is in memory we're going to be forced
4183        to have it in a register in order to perform the copy.  Copy
4184        the SOURCE value into its own register first, that way we
4185        don't have to reload SOURCE the next time it is used. */
4186     if (ts->val_type == TEMP_VAL_MEM) {
4187         temp_load(s, ts, tcg_target_available_regs[itype],
4188                   allocated_regs, preferred_regs);
4189     }
4190     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4191     ireg = ts->reg;
4192 
4193     if (IS_DEAD_ARG(0)) {
4194         /* mov to a non-saved dead register makes no sense (even with
4195            liveness analysis disabled). */
4196         tcg_debug_assert(NEED_SYNC_ARG(0));
4197         if (!ots->mem_allocated) {
4198             temp_allocate_frame(s, ots);
4199         }
4200         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4201         if (IS_DEAD_ARG(1)) {
4202             temp_dead(s, ts);
4203         }
4204         temp_dead(s, ots);
4205         return;
4206     }
4207 
4208     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4209         /*
4210          * The mov can be suppressed.  Kill input first, so that it
4211          * is unlinked from reg_to_temp, then set the output to the
4212          * reg that we saved from the input.
4213          */
4214         temp_dead(s, ts);
4215         oreg = ireg;
4216     } else {
4217         if (ots->val_type == TEMP_VAL_REG) {
4218             oreg = ots->reg;
4219         } else {
4220             /* Make sure to not spill the input register during allocation. */
4221             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4222                                  allocated_regs | ((TCGRegSet)1 << ireg),
4223                                  preferred_regs, ots->indirect_base);
4224         }
4225         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4226             /*
4227              * Cross register class move not supported.
4228              * Store the source register into the destination slot
4229              * and leave the destination temp as TEMP_VAL_MEM.
4230              */
4231             assert(!temp_readonly(ots));
4232             if (!ts->mem_allocated) {
4233                 temp_allocate_frame(s, ots);
4234             }
4235             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4236             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4237             ots->mem_coherent = 1;
4238             return;
4239         }
4240     }
4241     set_temp_val_reg(s, ots, oreg);
4242     ots->mem_coherent = 0;
4243 
4244     if (NEED_SYNC_ARG(0)) {
4245         temp_sync(s, ots, allocated_regs, 0, 0);
4246     }
4247 }
4248 
4249 /*
4250  * Specialized code generation for INDEX_op_dup_vec.
4251  */
4252 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4253 {
4254     const TCGLifeData arg_life = op->life;
4255     TCGRegSet dup_out_regs, dup_in_regs;
4256     TCGTemp *its, *ots;
4257     TCGType itype, vtype;
4258     unsigned vece;
4259     int lowpart_ofs;
4260     bool ok;
4261 
4262     ots = arg_temp(op->args[0]);
4263     its = arg_temp(op->args[1]);
4264 
4265     /* ENV should not be modified.  */
4266     tcg_debug_assert(!temp_readonly(ots));
4267 
4268     itype = its->type;
4269     vece = TCGOP_VECE(op);
4270     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4271 
4272     if (its->val_type == TEMP_VAL_CONST) {
4273         /* Propagate constant via movi -> dupi.  */
4274         tcg_target_ulong val = its->val;
4275         if (IS_DEAD_ARG(1)) {
4276             temp_dead(s, its);
4277         }
4278         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4279         return;
4280     }
4281 
4282     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4283     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4284 
4285     /* Allocate the output register now.  */
4286     if (ots->val_type != TEMP_VAL_REG) {
4287         TCGRegSet allocated_regs = s->reserved_regs;
4288         TCGReg oreg;
4289 
4290         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4291             /* Make sure to not spill the input register. */
4292             tcg_regset_set_reg(allocated_regs, its->reg);
4293         }
4294         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4295                              output_pref(op, 0), ots->indirect_base);
4296         set_temp_val_reg(s, ots, oreg);
4297     }
4298 
4299     switch (its->val_type) {
4300     case TEMP_VAL_REG:
4301         /*
4302          * The dup constriaints must be broad, covering all possible VECE.
4303          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4304          * to fail, indicating that extra moves are required for that case.
4305          */
4306         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4307             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4308                 goto done;
4309             }
4310             /* Try again from memory or a vector input register.  */
4311         }
4312         if (!its->mem_coherent) {
4313             /*
4314              * The input register is not synced, and so an extra store
4315              * would be required to use memory.  Attempt an integer-vector
4316              * register move first.  We do not have a TCGRegSet for this.
4317              */
4318             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4319                 break;
4320             }
4321             /* Sync the temp back to its slot and load from there.  */
4322             temp_sync(s, its, s->reserved_regs, 0, 0);
4323         }
4324         /* fall through */
4325 
4326     case TEMP_VAL_MEM:
4327         lowpart_ofs = 0;
4328         if (HOST_BIG_ENDIAN) {
4329             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4330         }
4331         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4332                              its->mem_offset + lowpart_ofs)) {
4333             goto done;
4334         }
4335         /* Load the input into the destination vector register. */
4336         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4337         break;
4338 
4339     default:
4340         g_assert_not_reached();
4341     }
4342 
4343     /* We now have a vector input register, so dup must succeed. */
4344     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4345     tcg_debug_assert(ok);
4346 
4347  done:
4348     ots->mem_coherent = 0;
4349     if (IS_DEAD_ARG(1)) {
4350         temp_dead(s, its);
4351     }
4352     if (NEED_SYNC_ARG(0)) {
4353         temp_sync(s, ots, s->reserved_regs, 0, 0);
4354     }
4355     if (IS_DEAD_ARG(0)) {
4356         temp_dead(s, ots);
4357     }
4358 }
4359 
4360 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4361 {
4362     const TCGLifeData arg_life = op->life;
4363     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4364     TCGRegSet i_allocated_regs;
4365     TCGRegSet o_allocated_regs;
4366     int i, k, nb_iargs, nb_oargs;
4367     TCGReg reg;
4368     TCGArg arg;
4369     const TCGArgConstraint *arg_ct;
4370     TCGTemp *ts;
4371     TCGArg new_args[TCG_MAX_OP_ARGS];
4372     int const_args[TCG_MAX_OP_ARGS];
4373 
4374     nb_oargs = def->nb_oargs;
4375     nb_iargs = def->nb_iargs;
4376 
4377     /* copy constants */
4378     memcpy(new_args + nb_oargs + nb_iargs,
4379            op->args + nb_oargs + nb_iargs,
4380            sizeof(TCGArg) * def->nb_cargs);
4381 
4382     i_allocated_regs = s->reserved_regs;
4383     o_allocated_regs = s->reserved_regs;
4384 
4385     /* satisfy input constraints */
4386     for (k = 0; k < nb_iargs; k++) {
4387         TCGRegSet i_preferred_regs, i_required_regs;
4388         bool allocate_new_reg, copyto_new_reg;
4389         TCGTemp *ts2;
4390         int i1, i2;
4391 
4392         i = def->args_ct[nb_oargs + k].sort_index;
4393         arg = op->args[i];
4394         arg_ct = &def->args_ct[i];
4395         ts = arg_temp(arg);
4396 
4397         if (ts->val_type == TEMP_VAL_CONST
4398             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4399             /* constant is OK for instruction */
4400             const_args[i] = 1;
4401             new_args[i] = ts->val;
4402             continue;
4403         }
4404 
4405         reg = ts->reg;
4406         i_preferred_regs = 0;
4407         i_required_regs = arg_ct->regs;
4408         allocate_new_reg = false;
4409         copyto_new_reg = false;
4410 
4411         switch (arg_ct->pair) {
4412         case 0: /* not paired */
4413             if (arg_ct->ialias) {
4414                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4415 
4416                 /*
4417                  * If the input is readonly, then it cannot also be an
4418                  * output and aliased to itself.  If the input is not
4419                  * dead after the instruction, we must allocate a new
4420                  * register and move it.
4421                  */
4422                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4423                     allocate_new_reg = true;
4424                 } else if (ts->val_type == TEMP_VAL_REG) {
4425                     /*
4426                      * Check if the current register has already been
4427                      * allocated for another input.
4428                      */
4429                     allocate_new_reg =
4430                         tcg_regset_test_reg(i_allocated_regs, reg);
4431                 }
4432             }
4433             if (!allocate_new_reg) {
4434                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4435                           i_preferred_regs);
4436                 reg = ts->reg;
4437                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4438             }
4439             if (allocate_new_reg) {
4440                 /*
4441                  * Allocate a new register matching the constraint
4442                  * and move the temporary register into it.
4443                  */
4444                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4445                           i_allocated_regs, 0);
4446                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4447                                     i_preferred_regs, ts->indirect_base);
4448                 copyto_new_reg = true;
4449             }
4450             break;
4451 
4452         case 1:
4453             /* First of an input pair; if i1 == i2, the second is an output. */
4454             i1 = i;
4455             i2 = arg_ct->pair_index;
4456             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4457 
4458             /*
4459              * It is easier to default to allocating a new pair
4460              * and to identify a few cases where it's not required.
4461              */
4462             if (arg_ct->ialias) {
4463                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4464                 if (IS_DEAD_ARG(i1) &&
4465                     IS_DEAD_ARG(i2) &&
4466                     !temp_readonly(ts) &&
4467                     ts->val_type == TEMP_VAL_REG &&
4468                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4469                     tcg_regset_test_reg(i_required_regs, reg) &&
4470                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4471                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4472                     (ts2
4473                      ? ts2->val_type == TEMP_VAL_REG &&
4474                        ts2->reg == reg + 1 &&
4475                        !temp_readonly(ts2)
4476                      : s->reg_to_temp[reg + 1] == NULL)) {
4477                     break;
4478                 }
4479             } else {
4480                 /* Without aliasing, the pair must also be an input. */
4481                 tcg_debug_assert(ts2);
4482                 if (ts->val_type == TEMP_VAL_REG &&
4483                     ts2->val_type == TEMP_VAL_REG &&
4484                     ts2->reg == reg + 1 &&
4485                     tcg_regset_test_reg(i_required_regs, reg)) {
4486                     break;
4487                 }
4488             }
4489             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4490                                      0, ts->indirect_base);
4491             goto do_pair;
4492 
4493         case 2: /* pair second */
4494             reg = new_args[arg_ct->pair_index] + 1;
4495             goto do_pair;
4496 
4497         case 3: /* ialias with second output, no first input */
4498             tcg_debug_assert(arg_ct->ialias);
4499             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4500 
4501             if (IS_DEAD_ARG(i) &&
4502                 !temp_readonly(ts) &&
4503                 ts->val_type == TEMP_VAL_REG &&
4504                 reg > 0 &&
4505                 s->reg_to_temp[reg - 1] == NULL &&
4506                 tcg_regset_test_reg(i_required_regs, reg) &&
4507                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4508                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4509                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4510                 break;
4511             }
4512             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4513                                      i_allocated_regs, 0,
4514                                      ts->indirect_base);
4515             tcg_regset_set_reg(i_allocated_regs, reg);
4516             reg += 1;
4517             goto do_pair;
4518 
4519         do_pair:
4520             /*
4521              * If an aliased input is not dead after the instruction,
4522              * we must allocate a new register and move it.
4523              */
4524             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4525                 TCGRegSet t_allocated_regs = i_allocated_regs;
4526 
4527                 /*
4528                  * Because of the alias, and the continued life, make sure
4529                  * that the temp is somewhere *other* than the reg pair,
4530                  * and we get a copy in reg.
4531                  */
4532                 tcg_regset_set_reg(t_allocated_regs, reg);
4533                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4534                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4535                     /* If ts was already in reg, copy it somewhere else. */
4536                     TCGReg nr;
4537                     bool ok;
4538 
4539                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4540                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4541                                        t_allocated_regs, 0, ts->indirect_base);
4542                     ok = tcg_out_mov(s, ts->type, nr, reg);
4543                     tcg_debug_assert(ok);
4544 
4545                     set_temp_val_reg(s, ts, nr);
4546                 } else {
4547                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4548                               t_allocated_regs, 0);
4549                     copyto_new_reg = true;
4550                 }
4551             } else {
4552                 /* Preferably allocate to reg, otherwise copy. */
4553                 i_required_regs = (TCGRegSet)1 << reg;
4554                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4555                           i_preferred_regs);
4556                 copyto_new_reg = ts->reg != reg;
4557             }
4558             break;
4559 
4560         default:
4561             g_assert_not_reached();
4562         }
4563 
4564         if (copyto_new_reg) {
4565             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4566                 /*
4567                  * Cross register class move not supported.  Sync the
4568                  * temp back to its slot and load from there.
4569                  */
4570                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4571                 tcg_out_ld(s, ts->type, reg,
4572                            ts->mem_base->reg, ts->mem_offset);
4573             }
4574         }
4575         new_args[i] = reg;
4576         const_args[i] = 0;
4577         tcg_regset_set_reg(i_allocated_regs, reg);
4578     }
4579 
4580     /* mark dead temporaries and free the associated registers */
4581     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4582         if (IS_DEAD_ARG(i)) {
4583             temp_dead(s, arg_temp(op->args[i]));
4584         }
4585     }
4586 
4587     if (def->flags & TCG_OPF_COND_BRANCH) {
4588         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4589     } else if (def->flags & TCG_OPF_BB_END) {
4590         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4591     } else {
4592         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4593             /* XXX: permit generic clobber register list ? */
4594             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4595                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4596                     tcg_reg_free(s, i, i_allocated_regs);
4597                 }
4598             }
4599         }
4600         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4601             /* sync globals if the op has side effects and might trigger
4602                an exception. */
4603             sync_globals(s, i_allocated_regs);
4604         }
4605 
4606         /* satisfy the output constraints */
4607         for(k = 0; k < nb_oargs; k++) {
4608             i = def->args_ct[k].sort_index;
4609             arg = op->args[i];
4610             arg_ct = &def->args_ct[i];
4611             ts = arg_temp(arg);
4612 
4613             /* ENV should not be modified.  */
4614             tcg_debug_assert(!temp_readonly(ts));
4615 
4616             switch (arg_ct->pair) {
4617             case 0: /* not paired */
4618                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4619                     reg = new_args[arg_ct->alias_index];
4620                 } else if (arg_ct->newreg) {
4621                     reg = tcg_reg_alloc(s, arg_ct->regs,
4622                                         i_allocated_regs | o_allocated_regs,
4623                                         output_pref(op, k), ts->indirect_base);
4624                 } else {
4625                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4626                                         output_pref(op, k), ts->indirect_base);
4627                 }
4628                 break;
4629 
4630             case 1: /* first of pair */
4631                 tcg_debug_assert(!arg_ct->newreg);
4632                 if (arg_ct->oalias) {
4633                     reg = new_args[arg_ct->alias_index];
4634                     break;
4635                 }
4636                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4637                                          output_pref(op, k), ts->indirect_base);
4638                 break;
4639 
4640             case 2: /* second of pair */
4641                 tcg_debug_assert(!arg_ct->newreg);
4642                 if (arg_ct->oalias) {
4643                     reg = new_args[arg_ct->alias_index];
4644                 } else {
4645                     reg = new_args[arg_ct->pair_index] + 1;
4646                 }
4647                 break;
4648 
4649             case 3: /* first of pair, aliasing with a second input */
4650                 tcg_debug_assert(!arg_ct->newreg);
4651                 reg = new_args[arg_ct->pair_index] - 1;
4652                 break;
4653 
4654             default:
4655                 g_assert_not_reached();
4656             }
4657             tcg_regset_set_reg(o_allocated_regs, reg);
4658             set_temp_val_reg(s, ts, reg);
4659             ts->mem_coherent = 0;
4660             new_args[i] = reg;
4661         }
4662     }
4663 
4664     /* emit instruction */
4665     switch (op->opc) {
4666     case INDEX_op_ext8s_i32:
4667         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4668         break;
4669     case INDEX_op_ext8s_i64:
4670         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4671         break;
4672     case INDEX_op_ext8u_i32:
4673     case INDEX_op_ext8u_i64:
4674         tcg_out_ext8u(s, new_args[0], new_args[1]);
4675         break;
4676     case INDEX_op_ext16s_i32:
4677         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4678         break;
4679     case INDEX_op_ext16s_i64:
4680         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4681         break;
4682     case INDEX_op_ext16u_i32:
4683     case INDEX_op_ext16u_i64:
4684         tcg_out_ext16u(s, new_args[0], new_args[1]);
4685         break;
4686     case INDEX_op_ext32s_i64:
4687         tcg_out_ext32s(s, new_args[0], new_args[1]);
4688         break;
4689     case INDEX_op_ext32u_i64:
4690         tcg_out_ext32u(s, new_args[0], new_args[1]);
4691         break;
4692     case INDEX_op_ext_i32_i64:
4693         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4694         break;
4695     case INDEX_op_extu_i32_i64:
4696         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4697         break;
4698     case INDEX_op_extrl_i64_i32:
4699         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4700         break;
4701     default:
4702         if (def->flags & TCG_OPF_VECTOR) {
4703             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4704                            new_args, const_args);
4705         } else {
4706             tcg_out_op(s, op->opc, new_args, const_args);
4707         }
4708         break;
4709     }
4710 
4711     /* move the outputs in the correct register if needed */
4712     for(i = 0; i < nb_oargs; i++) {
4713         ts = arg_temp(op->args[i]);
4714 
4715         /* ENV should not be modified.  */
4716         tcg_debug_assert(!temp_readonly(ts));
4717 
4718         if (NEED_SYNC_ARG(i)) {
4719             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4720         } else if (IS_DEAD_ARG(i)) {
4721             temp_dead(s, ts);
4722         }
4723     }
4724 }
4725 
4726 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4727 {
4728     const TCGLifeData arg_life = op->life;
4729     TCGTemp *ots, *itsl, *itsh;
4730     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4731 
4732     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4733     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4734     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4735 
4736     ots = arg_temp(op->args[0]);
4737     itsl = arg_temp(op->args[1]);
4738     itsh = arg_temp(op->args[2]);
4739 
4740     /* ENV should not be modified.  */
4741     tcg_debug_assert(!temp_readonly(ots));
4742 
4743     /* Allocate the output register now.  */
4744     if (ots->val_type != TEMP_VAL_REG) {
4745         TCGRegSet allocated_regs = s->reserved_regs;
4746         TCGRegSet dup_out_regs =
4747             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4748         TCGReg oreg;
4749 
4750         /* Make sure to not spill the input registers. */
4751         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4752             tcg_regset_set_reg(allocated_regs, itsl->reg);
4753         }
4754         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4755             tcg_regset_set_reg(allocated_regs, itsh->reg);
4756         }
4757 
4758         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4759                              output_pref(op, 0), ots->indirect_base);
4760         set_temp_val_reg(s, ots, oreg);
4761     }
4762 
4763     /* Promote dup2 of immediates to dupi_vec. */
4764     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4765         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4766         MemOp vece = MO_64;
4767 
4768         if (val == dup_const(MO_8, val)) {
4769             vece = MO_8;
4770         } else if (val == dup_const(MO_16, val)) {
4771             vece = MO_16;
4772         } else if (val == dup_const(MO_32, val)) {
4773             vece = MO_32;
4774         }
4775 
4776         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4777         goto done;
4778     }
4779 
4780     /* If the two inputs form one 64-bit value, try dupm_vec. */
4781     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4782         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4783         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4784         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4785 
4786         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4787         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4788 
4789         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4790                              its->mem_base->reg, its->mem_offset)) {
4791             goto done;
4792         }
4793     }
4794 
4795     /* Fall back to generic expansion. */
4796     return false;
4797 
4798  done:
4799     ots->mem_coherent = 0;
4800     if (IS_DEAD_ARG(1)) {
4801         temp_dead(s, itsl);
4802     }
4803     if (IS_DEAD_ARG(2)) {
4804         temp_dead(s, itsh);
4805     }
4806     if (NEED_SYNC_ARG(0)) {
4807         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4808     } else if (IS_DEAD_ARG(0)) {
4809         temp_dead(s, ots);
4810     }
4811     return true;
4812 }
4813 
4814 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4815                          TCGRegSet allocated_regs)
4816 {
4817     if (ts->val_type == TEMP_VAL_REG) {
4818         if (ts->reg != reg) {
4819             tcg_reg_free(s, reg, allocated_regs);
4820             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4821                 /*
4822                  * Cross register class move not supported.  Sync the
4823                  * temp back to its slot and load from there.
4824                  */
4825                 temp_sync(s, ts, allocated_regs, 0, 0);
4826                 tcg_out_ld(s, ts->type, reg,
4827                            ts->mem_base->reg, ts->mem_offset);
4828             }
4829         }
4830     } else {
4831         TCGRegSet arg_set = 0;
4832 
4833         tcg_reg_free(s, reg, allocated_regs);
4834         tcg_regset_set_reg(arg_set, reg);
4835         temp_load(s, ts, arg_set, allocated_regs, 0);
4836     }
4837 }
4838 
4839 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
4840                          TCGRegSet allocated_regs)
4841 {
4842     /*
4843      * When the destination is on the stack, load up the temp and store.
4844      * If there are many call-saved registers, the temp might live to
4845      * see another use; otherwise it'll be discarded.
4846      */
4847     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4848     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4849                arg_slot_stk_ofs(arg_slot));
4850 }
4851 
4852 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4853                             TCGTemp *ts, TCGRegSet *allocated_regs)
4854 {
4855     if (arg_slot_reg_p(l->arg_slot)) {
4856         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4857         load_arg_reg(s, reg, ts, *allocated_regs);
4858         tcg_regset_set_reg(*allocated_regs, reg);
4859     } else {
4860         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
4861     }
4862 }
4863 
4864 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
4865                          intptr_t ref_off, TCGRegSet *allocated_regs)
4866 {
4867     TCGReg reg;
4868 
4869     if (arg_slot_reg_p(arg_slot)) {
4870         reg = tcg_target_call_iarg_regs[arg_slot];
4871         tcg_reg_free(s, reg, *allocated_regs);
4872         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4873         tcg_regset_set_reg(*allocated_regs, reg);
4874     } else {
4875         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
4876                             *allocated_regs, 0, false);
4877         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4878         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
4879                    arg_slot_stk_ofs(arg_slot));
4880     }
4881 }
4882 
4883 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4884 {
4885     const int nb_oargs = TCGOP_CALLO(op);
4886     const int nb_iargs = TCGOP_CALLI(op);
4887     const TCGLifeData arg_life = op->life;
4888     const TCGHelperInfo *info = tcg_call_info(op);
4889     TCGRegSet allocated_regs = s->reserved_regs;
4890     int i;
4891 
4892     /*
4893      * Move inputs into place in reverse order,
4894      * so that we place stacked arguments first.
4895      */
4896     for (i = nb_iargs - 1; i >= 0; --i) {
4897         const TCGCallArgumentLoc *loc = &info->in[i];
4898         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
4899 
4900         switch (loc->kind) {
4901         case TCG_CALL_ARG_NORMAL:
4902         case TCG_CALL_ARG_EXTEND_U:
4903         case TCG_CALL_ARG_EXTEND_S:
4904             load_arg_normal(s, loc, ts, &allocated_regs);
4905             break;
4906         case TCG_CALL_ARG_BY_REF:
4907             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4908             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
4909                          arg_slot_stk_ofs(loc->ref_slot),
4910                          &allocated_regs);
4911             break;
4912         case TCG_CALL_ARG_BY_REF_N:
4913             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4914             break;
4915         default:
4916             g_assert_not_reached();
4917         }
4918     }
4919 
4920     /* Mark dead temporaries and free the associated registers.  */
4921     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4922         if (IS_DEAD_ARG(i)) {
4923             temp_dead(s, arg_temp(op->args[i]));
4924         }
4925     }
4926 
4927     /* Clobber call registers.  */
4928     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4929         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4930             tcg_reg_free(s, i, allocated_regs);
4931         }
4932     }
4933 
4934     /*
4935      * Save globals if they might be written by the helper,
4936      * sync them if they might be read.
4937      */
4938     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
4939         /* Nothing to do */
4940     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
4941         sync_globals(s, allocated_regs);
4942     } else {
4943         save_globals(s, allocated_regs);
4944     }
4945 
4946     /*
4947      * If the ABI passes a pointer to the returned struct as the first
4948      * argument, load that now.  Pass a pointer to the output home slot.
4949      */
4950     if (info->out_kind == TCG_CALL_RET_BY_REF) {
4951         TCGTemp *ts = arg_temp(op->args[0]);
4952 
4953         if (!ts->mem_allocated) {
4954             temp_allocate_frame(s, ts);
4955         }
4956         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
4957     }
4958 
4959     tcg_out_call(s, tcg_call_func(op), info);
4960 
4961     /* Assign output registers and emit moves if needed.  */
4962     switch (info->out_kind) {
4963     case TCG_CALL_RET_NORMAL:
4964         for (i = 0; i < nb_oargs; i++) {
4965             TCGTemp *ts = arg_temp(op->args[i]);
4966             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
4967 
4968             /* ENV should not be modified.  */
4969             tcg_debug_assert(!temp_readonly(ts));
4970 
4971             set_temp_val_reg(s, ts, reg);
4972             ts->mem_coherent = 0;
4973         }
4974         break;
4975 
4976     case TCG_CALL_RET_BY_VEC:
4977         {
4978             TCGTemp *ts = arg_temp(op->args[0]);
4979 
4980             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
4981             tcg_debug_assert(ts->temp_subindex == 0);
4982             if (!ts->mem_allocated) {
4983                 temp_allocate_frame(s, ts);
4984             }
4985             tcg_out_st(s, TCG_TYPE_V128,
4986                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
4987                        ts->mem_base->reg, ts->mem_offset);
4988         }
4989         /* fall through to mark all parts in memory */
4990 
4991     case TCG_CALL_RET_BY_REF:
4992         /* The callee has performed a write through the reference. */
4993         for (i = 0; i < nb_oargs; i++) {
4994             TCGTemp *ts = arg_temp(op->args[i]);
4995             ts->val_type = TEMP_VAL_MEM;
4996         }
4997         break;
4998 
4999     default:
5000         g_assert_not_reached();
5001     }
5002 
5003     /* Flush or discard output registers as needed. */
5004     for (i = 0; i < nb_oargs; i++) {
5005         TCGTemp *ts = arg_temp(op->args[i]);
5006         if (NEED_SYNC_ARG(i)) {
5007             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5008         } else if (IS_DEAD_ARG(i)) {
5009             temp_dead(s, ts);
5010         }
5011     }
5012 }
5013 
5014 #ifdef CONFIG_PROFILER
5015 
5016 /* avoid copy/paste errors */
5017 #define PROF_ADD(to, from, field)                       \
5018     do {                                                \
5019         (to)->field += qatomic_read(&((from)->field));  \
5020     } while (0)
5021 
5022 #define PROF_MAX(to, from, field)                                       \
5023     do {                                                                \
5024         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
5025         if (val__ > (to)->field) {                                      \
5026             (to)->field = val__;                                        \
5027         }                                                               \
5028     } while (0)
5029 
5030 /* Pass in a zero'ed @prof */
5031 static inline
5032 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
5033 {
5034     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5035     unsigned int i;
5036 
5037     for (i = 0; i < n_ctxs; i++) {
5038         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5039         const TCGProfile *orig = &s->prof;
5040 
5041         if (counters) {
5042             PROF_ADD(prof, orig, cpu_exec_time);
5043             PROF_ADD(prof, orig, tb_count1);
5044             PROF_ADD(prof, orig, tb_count);
5045             PROF_ADD(prof, orig, op_count);
5046             PROF_MAX(prof, orig, op_count_max);
5047             PROF_ADD(prof, orig, temp_count);
5048             PROF_MAX(prof, orig, temp_count_max);
5049             PROF_ADD(prof, orig, del_op_count);
5050             PROF_ADD(prof, orig, code_in_len);
5051             PROF_ADD(prof, orig, code_out_len);
5052             PROF_ADD(prof, orig, search_out_len);
5053             PROF_ADD(prof, orig, interm_time);
5054             PROF_ADD(prof, orig, code_time);
5055             PROF_ADD(prof, orig, la_time);
5056             PROF_ADD(prof, orig, opt_time);
5057             PROF_ADD(prof, orig, restore_count);
5058             PROF_ADD(prof, orig, restore_time);
5059         }
5060         if (table) {
5061             int i;
5062 
5063             for (i = 0; i < NB_OPS; i++) {
5064                 PROF_ADD(prof, orig, table_op_count[i]);
5065             }
5066         }
5067     }
5068 }
5069 
5070 #undef PROF_ADD
5071 #undef PROF_MAX
5072 
5073 static void tcg_profile_snapshot_counters(TCGProfile *prof)
5074 {
5075     tcg_profile_snapshot(prof, true, false);
5076 }
5077 
5078 static void tcg_profile_snapshot_table(TCGProfile *prof)
5079 {
5080     tcg_profile_snapshot(prof, false, true);
5081 }
5082 
5083 void tcg_dump_op_count(GString *buf)
5084 {
5085     TCGProfile prof = {};
5086     int i;
5087 
5088     tcg_profile_snapshot_table(&prof);
5089     for (i = 0; i < NB_OPS; i++) {
5090         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
5091                                prof.table_op_count[i]);
5092     }
5093 }
5094 
5095 int64_t tcg_cpu_exec_time(void)
5096 {
5097     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5098     unsigned int i;
5099     int64_t ret = 0;
5100 
5101     for (i = 0; i < n_ctxs; i++) {
5102         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5103         const TCGProfile *prof = &s->prof;
5104 
5105         ret += qatomic_read(&prof->cpu_exec_time);
5106     }
5107     return ret;
5108 }
5109 #else
5110 void tcg_dump_op_count(GString *buf)
5111 {
5112     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5113 }
5114 
5115 int64_t tcg_cpu_exec_time(void)
5116 {
5117     error_report("%s: TCG profiler not compiled", __func__);
5118     exit(EXIT_FAILURE);
5119 }
5120 #endif
5121 
5122 
5123 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
5124 {
5125 #ifdef CONFIG_PROFILER
5126     TCGProfile *prof = &s->prof;
5127 #endif
5128     int i, num_insns;
5129     TCGOp *op;
5130 
5131 #ifdef CONFIG_PROFILER
5132     {
5133         int n = 0;
5134 
5135         QTAILQ_FOREACH(op, &s->ops, link) {
5136             n++;
5137         }
5138         qatomic_set(&prof->op_count, prof->op_count + n);
5139         if (n > prof->op_count_max) {
5140             qatomic_set(&prof->op_count_max, n);
5141         }
5142 
5143         n = s->nb_temps;
5144         qatomic_set(&prof->temp_count, prof->temp_count + n);
5145         if (n > prof->temp_count_max) {
5146             qatomic_set(&prof->temp_count_max, n);
5147         }
5148     }
5149 #endif
5150 
5151 #ifdef DEBUG_DISAS
5152     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5153                  && qemu_log_in_addr_range(pc_start))) {
5154         FILE *logfile = qemu_log_trylock();
5155         if (logfile) {
5156             fprintf(logfile, "OP:\n");
5157             tcg_dump_ops(s, logfile, false);
5158             fprintf(logfile, "\n");
5159             qemu_log_unlock(logfile);
5160         }
5161     }
5162 #endif
5163 
5164 #ifdef CONFIG_DEBUG_TCG
5165     /* Ensure all labels referenced have been emitted.  */
5166     {
5167         TCGLabel *l;
5168         bool error = false;
5169 
5170         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5171             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5172                 qemu_log_mask(CPU_LOG_TB_OP,
5173                               "$L%d referenced but not present.\n", l->id);
5174                 error = true;
5175             }
5176         }
5177         assert(!error);
5178     }
5179 #endif
5180 
5181 #ifdef CONFIG_PROFILER
5182     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
5183 #endif
5184 
5185 #ifdef USE_TCG_OPTIMIZATIONS
5186     tcg_optimize(s);
5187 #endif
5188 
5189 #ifdef CONFIG_PROFILER
5190     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
5191     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
5192 #endif
5193 
5194     reachable_code_pass(s);
5195     liveness_pass_0(s);
5196     liveness_pass_1(s);
5197 
5198     if (s->nb_indirects > 0) {
5199 #ifdef DEBUG_DISAS
5200         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5201                      && qemu_log_in_addr_range(pc_start))) {
5202             FILE *logfile = qemu_log_trylock();
5203             if (logfile) {
5204                 fprintf(logfile, "OP before indirect lowering:\n");
5205                 tcg_dump_ops(s, logfile, false);
5206                 fprintf(logfile, "\n");
5207                 qemu_log_unlock(logfile);
5208             }
5209         }
5210 #endif
5211         /* Replace indirect temps with direct temps.  */
5212         if (liveness_pass_2(s)) {
5213             /* If changes were made, re-run liveness.  */
5214             liveness_pass_1(s);
5215         }
5216     }
5217 
5218 #ifdef CONFIG_PROFILER
5219     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
5220 #endif
5221 
5222 #ifdef DEBUG_DISAS
5223     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5224                  && qemu_log_in_addr_range(pc_start))) {
5225         FILE *logfile = qemu_log_trylock();
5226         if (logfile) {
5227             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5228             tcg_dump_ops(s, logfile, true);
5229             fprintf(logfile, "\n");
5230             qemu_log_unlock(logfile);
5231         }
5232     }
5233 #endif
5234 
5235     /* Initialize goto_tb jump offsets. */
5236     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5237     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5238     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5239     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5240 
5241     tcg_reg_alloc_start(s);
5242 
5243     /*
5244      * Reset the buffer pointers when restarting after overflow.
5245      * TODO: Move this into translate-all.c with the rest of the
5246      * buffer management.  Having only this done here is confusing.
5247      */
5248     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
5249     s->code_ptr = s->code_buf;
5250 
5251 #ifdef TCG_TARGET_NEED_LDST_LABELS
5252     QSIMPLEQ_INIT(&s->ldst_labels);
5253 #endif
5254 #ifdef TCG_TARGET_NEED_POOL_LABELS
5255     s->pool_labels = NULL;
5256 #endif
5257 
5258     num_insns = -1;
5259     QTAILQ_FOREACH(op, &s->ops, link) {
5260         TCGOpcode opc = op->opc;
5261 
5262 #ifdef CONFIG_PROFILER
5263         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
5264 #endif
5265 
5266         switch (opc) {
5267         case INDEX_op_mov_i32:
5268         case INDEX_op_mov_i64:
5269         case INDEX_op_mov_vec:
5270             tcg_reg_alloc_mov(s, op);
5271             break;
5272         case INDEX_op_dup_vec:
5273             tcg_reg_alloc_dup(s, op);
5274             break;
5275         case INDEX_op_insn_start:
5276             if (num_insns >= 0) {
5277                 size_t off = tcg_current_code_size(s);
5278                 s->gen_insn_end_off[num_insns] = off;
5279                 /* Assert that we do not overflow our stored offset.  */
5280                 assert(s->gen_insn_end_off[num_insns] == off);
5281             }
5282             num_insns++;
5283             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
5284                 target_ulong a;
5285 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
5286                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
5287 #else
5288                 a = op->args[i];
5289 #endif
5290                 s->gen_insn_data[num_insns][i] = a;
5291             }
5292             break;
5293         case INDEX_op_discard:
5294             temp_dead(s, arg_temp(op->args[0]));
5295             break;
5296         case INDEX_op_set_label:
5297             tcg_reg_alloc_bb_end(s, s->reserved_regs);
5298             tcg_out_label(s, arg_label(op->args[0]));
5299             break;
5300         case INDEX_op_call:
5301             tcg_reg_alloc_call(s, op);
5302             break;
5303         case INDEX_op_exit_tb:
5304             tcg_out_exit_tb(s, op->args[0]);
5305             break;
5306         case INDEX_op_goto_tb:
5307             tcg_out_goto_tb(s, op->args[0]);
5308             break;
5309         case INDEX_op_dup2_vec:
5310             if (tcg_reg_alloc_dup2(s, op)) {
5311                 break;
5312             }
5313             /* fall through */
5314         default:
5315             /* Sanity check that we've not introduced any unhandled opcodes. */
5316             tcg_debug_assert(tcg_op_supported(opc));
5317             /* Note: in order to speed up the code, it would be much
5318                faster to have specialized register allocator functions for
5319                some common argument patterns */
5320             tcg_reg_alloc_op(s, op);
5321             break;
5322         }
5323         /* Test for (pending) buffer overflow.  The assumption is that any
5324            one operation beginning below the high water mark cannot overrun
5325            the buffer completely.  Thus we can test for overflow after
5326            generating code without having to check during generation.  */
5327         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
5328             return -1;
5329         }
5330         /* Test for TB overflow, as seen by gen_insn_end_off.  */
5331         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
5332             return -2;
5333         }
5334     }
5335     tcg_debug_assert(num_insns >= 0);
5336     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
5337 
5338     /* Generate TB finalization at the end of block */
5339 #ifdef TCG_TARGET_NEED_LDST_LABELS
5340     i = tcg_out_ldst_finalize(s);
5341     if (i < 0) {
5342         return i;
5343     }
5344 #endif
5345 #ifdef TCG_TARGET_NEED_POOL_LABELS
5346     i = tcg_out_pool_finalize(s);
5347     if (i < 0) {
5348         return i;
5349     }
5350 #endif
5351     if (!tcg_resolve_relocs(s)) {
5352         return -2;
5353     }
5354 
5355 #ifndef CONFIG_TCG_INTERPRETER
5356     /* flush instruction cache */
5357     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
5358                         (uintptr_t)s->code_buf,
5359                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
5360 #endif
5361 
5362     return tcg_current_code_size(s);
5363 }
5364 
5365 #ifdef CONFIG_PROFILER
5366 void tcg_dump_info(GString *buf)
5367 {
5368     TCGProfile prof = {};
5369     const TCGProfile *s;
5370     int64_t tb_count;
5371     int64_t tb_div_count;
5372     int64_t tot;
5373 
5374     tcg_profile_snapshot_counters(&prof);
5375     s = &prof;
5376     tb_count = s->tb_count;
5377     tb_div_count = tb_count ? tb_count : 1;
5378     tot = s->interm_time + s->code_time;
5379 
5380     g_string_append_printf(buf, "JIT cycles          %" PRId64
5381                            " (%0.3f s at 2.4 GHz)\n",
5382                            tot, tot / 2.4e9);
5383     g_string_append_printf(buf, "translated TBs      %" PRId64
5384                            " (aborted=%" PRId64 " %0.1f%%)\n",
5385                            tb_count, s->tb_count1 - tb_count,
5386                            (double)(s->tb_count1 - s->tb_count)
5387                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
5388     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
5389                            (double)s->op_count / tb_div_count, s->op_count_max);
5390     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
5391                            (double)s->del_op_count / tb_div_count);
5392     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
5393                            (double)s->temp_count / tb_div_count,
5394                            s->temp_count_max);
5395     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
5396                            (double)s->code_out_len / tb_div_count);
5397     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
5398                            (double)s->search_out_len / tb_div_count);
5399 
5400     g_string_append_printf(buf, "cycles/op           %0.1f\n",
5401                            s->op_count ? (double)tot / s->op_count : 0);
5402     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
5403                            s->code_in_len ? (double)tot / s->code_in_len : 0);
5404     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
5405                            s->code_out_len ? (double)tot / s->code_out_len : 0);
5406     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
5407                            s->search_out_len ?
5408                            (double)tot / s->search_out_len : 0);
5409     if (tot == 0) {
5410         tot = 1;
5411     }
5412     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
5413                            (double)s->interm_time / tot * 100.0);
5414     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
5415                            (double)s->code_time / tot * 100.0);
5416     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
5417                            (double)s->opt_time / (s->code_time ?
5418                                                   s->code_time : 1)
5419                            * 100.0);
5420     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
5421                            (double)s->la_time / (s->code_time ?
5422                                                  s->code_time : 1) * 100.0);
5423     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
5424                            s->restore_count);
5425     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
5426                            s->restore_count ?
5427                            (double)s->restore_time / s->restore_count : 0);
5428 }
5429 #else
5430 void tcg_dump_info(GString *buf)
5431 {
5432     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5433 }
5434 #endif
5435 
5436 #ifdef ELF_HOST_MACHINE
5437 /* In order to use this feature, the backend needs to do three things:
5438 
5439    (1) Define ELF_HOST_MACHINE to indicate both what value to
5440        put into the ELF image and to indicate support for the feature.
5441 
5442    (2) Define tcg_register_jit.  This should create a buffer containing
5443        the contents of a .debug_frame section that describes the post-
5444        prologue unwind info for the tcg machine.
5445 
5446    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
5447 */
5448 
5449 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
5450 typedef enum {
5451     JIT_NOACTION = 0,
5452     JIT_REGISTER_FN,
5453     JIT_UNREGISTER_FN
5454 } jit_actions_t;
5455 
5456 struct jit_code_entry {
5457     struct jit_code_entry *next_entry;
5458     struct jit_code_entry *prev_entry;
5459     const void *symfile_addr;
5460     uint64_t symfile_size;
5461 };
5462 
5463 struct jit_descriptor {
5464     uint32_t version;
5465     uint32_t action_flag;
5466     struct jit_code_entry *relevant_entry;
5467     struct jit_code_entry *first_entry;
5468 };
5469 
5470 void __jit_debug_register_code(void) __attribute__((noinline));
5471 void __jit_debug_register_code(void)
5472 {
5473     asm("");
5474 }
5475 
5476 /* Must statically initialize the version, because GDB may check
5477    the version before we can set it.  */
5478 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
5479 
5480 /* End GDB interface.  */
5481 
5482 static int find_string(const char *strtab, const char *str)
5483 {
5484     const char *p = strtab + 1;
5485 
5486     while (1) {
5487         if (strcmp(p, str) == 0) {
5488             return p - strtab;
5489         }
5490         p += strlen(p) + 1;
5491     }
5492 }
5493 
5494 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
5495                                  const void *debug_frame,
5496                                  size_t debug_frame_size)
5497 {
5498     struct __attribute__((packed)) DebugInfo {
5499         uint32_t  len;
5500         uint16_t  version;
5501         uint32_t  abbrev;
5502         uint8_t   ptr_size;
5503         uint8_t   cu_die;
5504         uint16_t  cu_lang;
5505         uintptr_t cu_low_pc;
5506         uintptr_t cu_high_pc;
5507         uint8_t   fn_die;
5508         char      fn_name[16];
5509         uintptr_t fn_low_pc;
5510         uintptr_t fn_high_pc;
5511         uint8_t   cu_eoc;
5512     };
5513 
5514     struct ElfImage {
5515         ElfW(Ehdr) ehdr;
5516         ElfW(Phdr) phdr;
5517         ElfW(Shdr) shdr[7];
5518         ElfW(Sym)  sym[2];
5519         struct DebugInfo di;
5520         uint8_t    da[24];
5521         char       str[80];
5522     };
5523 
5524     struct ElfImage *img;
5525 
5526     static const struct ElfImage img_template = {
5527         .ehdr = {
5528             .e_ident[EI_MAG0] = ELFMAG0,
5529             .e_ident[EI_MAG1] = ELFMAG1,
5530             .e_ident[EI_MAG2] = ELFMAG2,
5531             .e_ident[EI_MAG3] = ELFMAG3,
5532             .e_ident[EI_CLASS] = ELF_CLASS,
5533             .e_ident[EI_DATA] = ELF_DATA,
5534             .e_ident[EI_VERSION] = EV_CURRENT,
5535             .e_type = ET_EXEC,
5536             .e_machine = ELF_HOST_MACHINE,
5537             .e_version = EV_CURRENT,
5538             .e_phoff = offsetof(struct ElfImage, phdr),
5539             .e_shoff = offsetof(struct ElfImage, shdr),
5540             .e_ehsize = sizeof(ElfW(Shdr)),
5541             .e_phentsize = sizeof(ElfW(Phdr)),
5542             .e_phnum = 1,
5543             .e_shentsize = sizeof(ElfW(Shdr)),
5544             .e_shnum = ARRAY_SIZE(img->shdr),
5545             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
5546 #ifdef ELF_HOST_FLAGS
5547             .e_flags = ELF_HOST_FLAGS,
5548 #endif
5549 #ifdef ELF_OSABI
5550             .e_ident[EI_OSABI] = ELF_OSABI,
5551 #endif
5552         },
5553         .phdr = {
5554             .p_type = PT_LOAD,
5555             .p_flags = PF_X,
5556         },
5557         .shdr = {
5558             [0] = { .sh_type = SHT_NULL },
5559             /* Trick: The contents of code_gen_buffer are not present in
5560                this fake ELF file; that got allocated elsewhere.  Therefore
5561                we mark .text as SHT_NOBITS (similar to .bss) so that readers
5562                will not look for contents.  We can record any address.  */
5563             [1] = { /* .text */
5564                 .sh_type = SHT_NOBITS,
5565                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
5566             },
5567             [2] = { /* .debug_info */
5568                 .sh_type = SHT_PROGBITS,
5569                 .sh_offset = offsetof(struct ElfImage, di),
5570                 .sh_size = sizeof(struct DebugInfo),
5571             },
5572             [3] = { /* .debug_abbrev */
5573                 .sh_type = SHT_PROGBITS,
5574                 .sh_offset = offsetof(struct ElfImage, da),
5575                 .sh_size = sizeof(img->da),
5576             },
5577             [4] = { /* .debug_frame */
5578                 .sh_type = SHT_PROGBITS,
5579                 .sh_offset = sizeof(struct ElfImage),
5580             },
5581             [5] = { /* .symtab */
5582                 .sh_type = SHT_SYMTAB,
5583                 .sh_offset = offsetof(struct ElfImage, sym),
5584                 .sh_size = sizeof(img->sym),
5585                 .sh_info = 1,
5586                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5587                 .sh_entsize = sizeof(ElfW(Sym)),
5588             },
5589             [6] = { /* .strtab */
5590                 .sh_type = SHT_STRTAB,
5591                 .sh_offset = offsetof(struct ElfImage, str),
5592                 .sh_size = sizeof(img->str),
5593             }
5594         },
5595         .sym = {
5596             [1] = { /* code_gen_buffer */
5597                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5598                 .st_shndx = 1,
5599             }
5600         },
5601         .di = {
5602             .len = sizeof(struct DebugInfo) - 4,
5603             .version = 2,
5604             .ptr_size = sizeof(void *),
5605             .cu_die = 1,
5606             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5607             .fn_die = 2,
5608             .fn_name = "code_gen_buffer"
5609         },
5610         .da = {
5611             1,          /* abbrev number (the cu) */
5612             0x11, 1,    /* DW_TAG_compile_unit, has children */
5613             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5614             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5615             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5616             0, 0,       /* end of abbrev */
5617             2,          /* abbrev number (the fn) */
5618             0x2e, 0,    /* DW_TAG_subprogram, no children */
5619             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5620             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5621             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5622             0, 0,       /* end of abbrev */
5623             0           /* no more abbrev */
5624         },
5625         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5626                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5627     };
5628 
5629     /* We only need a single jit entry; statically allocate it.  */
5630     static struct jit_code_entry one_entry;
5631 
5632     uintptr_t buf = (uintptr_t)buf_ptr;
5633     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5634     DebugFrameHeader *dfh;
5635 
5636     img = g_malloc(img_size);
5637     *img = img_template;
5638 
5639     img->phdr.p_vaddr = buf;
5640     img->phdr.p_paddr = buf;
5641     img->phdr.p_memsz = buf_size;
5642 
5643     img->shdr[1].sh_name = find_string(img->str, ".text");
5644     img->shdr[1].sh_addr = buf;
5645     img->shdr[1].sh_size = buf_size;
5646 
5647     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5648     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5649 
5650     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5651     img->shdr[4].sh_size = debug_frame_size;
5652 
5653     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5654     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5655 
5656     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5657     img->sym[1].st_value = buf;
5658     img->sym[1].st_size = buf_size;
5659 
5660     img->di.cu_low_pc = buf;
5661     img->di.cu_high_pc = buf + buf_size;
5662     img->di.fn_low_pc = buf;
5663     img->di.fn_high_pc = buf + buf_size;
5664 
5665     dfh = (DebugFrameHeader *)(img + 1);
5666     memcpy(dfh, debug_frame, debug_frame_size);
5667     dfh->fde.func_start = buf;
5668     dfh->fde.func_len = buf_size;
5669 
5670 #ifdef DEBUG_JIT
5671     /* Enable this block to be able to debug the ELF image file creation.
5672        One can use readelf, objdump, or other inspection utilities.  */
5673     {
5674         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
5675         FILE *f = fopen(jit, "w+b");
5676         if (f) {
5677             if (fwrite(img, img_size, 1, f) != img_size) {
5678                 /* Avoid stupid unused return value warning for fwrite.  */
5679             }
5680             fclose(f);
5681         }
5682     }
5683 #endif
5684 
5685     one_entry.symfile_addr = img;
5686     one_entry.symfile_size = img_size;
5687 
5688     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5689     __jit_debug_descriptor.relevant_entry = &one_entry;
5690     __jit_debug_descriptor.first_entry = &one_entry;
5691     __jit_debug_register_code();
5692 }
5693 #else
5694 /* No support for the feature.  Provide the entry point expected by exec.c,
5695    and implement the internal function we declared earlier.  */
5696 
5697 static void tcg_register_jit_int(const void *buf, size_t size,
5698                                  const void *debug_frame,
5699                                  size_t debug_frame_size)
5700 {
5701 }
5702 
5703 void tcg_register_jit(const void *buf, size_t buf_size)
5704 {
5705 }
5706 #endif /* ELF_HOST_MACHINE */
5707 
5708 #if !TCG_TARGET_MAYBE_vec
5709 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5710 {
5711     g_assert_not_reached();
5712 }
5713 #endif
5714