xref: /openbmc/qemu/tcg/tcg.c (revision c4601322)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/cacheflush.h"
38 #include "qemu/cacheinfo.h"
39 #include "qemu/timer.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg/tcg-temp-internal.h"
64 #include "tcg-internal.h"
65 #include "accel/tcg/perf.h"
66 
67 /* Forward declarations for functions declared in tcg-target.c.inc and
68    used here. */
69 static void tcg_target_init(TCGContext *s);
70 static void tcg_target_qemu_prologue(TCGContext *s);
71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
72                         intptr_t value, intptr_t addend);
73 
74 /* The CIE and FDE header definitions will be common to all hosts.  */
75 typedef struct {
76     uint32_t len __attribute__((aligned((sizeof(void *)))));
77     uint32_t id;
78     uint8_t version;
79     char augmentation[1];
80     uint8_t code_align;
81     uint8_t data_align;
82     uint8_t return_column;
83 } DebugFrameCIE;
84 
85 typedef struct QEMU_PACKED {
86     uint32_t len __attribute__((aligned((sizeof(void *)))));
87     uint32_t cie_offset;
88     uintptr_t func_start;
89     uintptr_t func_len;
90 } DebugFrameFDEHeader;
91 
92 typedef struct QEMU_PACKED {
93     DebugFrameCIE cie;
94     DebugFrameFDEHeader fde;
95 } DebugFrameHeader;
96 
97 typedef struct TCGLabelQemuLdst {
98     bool is_ld;             /* qemu_ld: true, qemu_st: false */
99     MemOpIdx oi;
100     TCGType type;           /* result type of a load */
101     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
102     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
103     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
104     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
105     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
106     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
107     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
108 } TCGLabelQemuLdst;
109 
110 static void tcg_register_jit_int(const void *buf, size_t size,
111                                  const void *debug_frame,
112                                  size_t debug_frame_size)
113     __attribute__((unused));
114 
115 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
116 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
117                        intptr_t arg2);
118 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_movi(TCGContext *s, TCGType type,
120                          TCGReg ret, tcg_target_long arg);
121 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
131 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
132 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
133 static void tcg_out_goto_tb(TCGContext *s, int which);
134 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
135                        const TCGArg args[TCG_MAX_OP_ARGS],
136                        const int const_args[TCG_MAX_OP_ARGS]);
137 #if TCG_TARGET_MAYBE_vec
138 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
139                             TCGReg dst, TCGReg src);
140 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
141                              TCGReg dst, TCGReg base, intptr_t offset);
142 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, int64_t arg);
144 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
145                            unsigned vecl, unsigned vece,
146                            const TCGArg args[TCG_MAX_OP_ARGS],
147                            const int const_args[TCG_MAX_OP_ARGS]);
148 #else
149 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
150                                    TCGReg dst, TCGReg src)
151 {
152     g_assert_not_reached();
153 }
154 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
155                                     TCGReg dst, TCGReg base, intptr_t offset)
156 {
157     g_assert_not_reached();
158 }
159 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
160                                     TCGReg dst, int64_t arg)
161 {
162     g_assert_not_reached();
163 }
164 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
165                                   unsigned vecl, unsigned vece,
166                                   const TCGArg args[TCG_MAX_OP_ARGS],
167                                   const int const_args[TCG_MAX_OP_ARGS])
168 {
169     g_assert_not_reached();
170 }
171 #endif
172 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
173                        intptr_t arg2);
174 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
175                         TCGReg base, intptr_t ofs);
176 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
177                          const TCGHelperInfo *info);
178 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
179 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
180 #ifdef TCG_TARGET_NEED_LDST_LABELS
181 static int tcg_out_ldst_finalize(TCGContext *s);
182 #endif
183 
184 typedef struct TCGLdstHelperParam {
185     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
186     unsigned ntmp;
187     int tmp[3];
188 } TCGLdstHelperParam;
189 
190 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
191                                    const TCGLdstHelperParam *p)
192     __attribute__((unused));
193 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
194                                   bool load_sign, const TCGLdstHelperParam *p)
195     __attribute__((unused));
196 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
197                                    const TCGLdstHelperParam *p)
198     __attribute__((unused));
199 
200 TCGContext tcg_init_ctx;
201 __thread TCGContext *tcg_ctx;
202 
203 TCGContext **tcg_ctxs;
204 unsigned int tcg_cur_ctxs;
205 unsigned int tcg_max_ctxs;
206 TCGv_env cpu_env = 0;
207 const void *tcg_code_gen_epilogue;
208 uintptr_t tcg_splitwx_diff;
209 
210 #ifndef CONFIG_TCG_INTERPRETER
211 tcg_prologue_fn *tcg_qemu_tb_exec;
212 #endif
213 
214 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
215 static TCGRegSet tcg_target_call_clobber_regs;
216 
217 #if TCG_TARGET_INSN_UNIT_SIZE == 1
218 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
219 {
220     *s->code_ptr++ = v;
221 }
222 
223 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
224                                                       uint8_t v)
225 {
226     *p = v;
227 }
228 #endif
229 
230 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
231 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
232 {
233     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
234         *s->code_ptr++ = v;
235     } else {
236         tcg_insn_unit *p = s->code_ptr;
237         memcpy(p, &v, sizeof(v));
238         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
239     }
240 }
241 
242 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
243                                                        uint16_t v)
244 {
245     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
246         *p = v;
247     } else {
248         memcpy(p, &v, sizeof(v));
249     }
250 }
251 #endif
252 
253 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
254 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
255 {
256     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
257         *s->code_ptr++ = v;
258     } else {
259         tcg_insn_unit *p = s->code_ptr;
260         memcpy(p, &v, sizeof(v));
261         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
262     }
263 }
264 
265 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
266                                                        uint32_t v)
267 {
268     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
269         *p = v;
270     } else {
271         memcpy(p, &v, sizeof(v));
272     }
273 }
274 #endif
275 
276 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
277 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
278 {
279     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
280         *s->code_ptr++ = v;
281     } else {
282         tcg_insn_unit *p = s->code_ptr;
283         memcpy(p, &v, sizeof(v));
284         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
285     }
286 }
287 
288 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
289                                                        uint64_t v)
290 {
291     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
292         *p = v;
293     } else {
294         memcpy(p, &v, sizeof(v));
295     }
296 }
297 #endif
298 
299 /* label relocation processing */
300 
301 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
302                           TCGLabel *l, intptr_t addend)
303 {
304     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
305 
306     r->type = type;
307     r->ptr = code_ptr;
308     r->addend = addend;
309     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
310 }
311 
312 static void tcg_out_label(TCGContext *s, TCGLabel *l)
313 {
314     tcg_debug_assert(!l->has_value);
315     l->has_value = 1;
316     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
317 }
318 
319 TCGLabel *gen_new_label(void)
320 {
321     TCGContext *s = tcg_ctx;
322     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
323 
324     memset(l, 0, sizeof(TCGLabel));
325     l->id = s->nb_labels++;
326     QSIMPLEQ_INIT(&l->branches);
327     QSIMPLEQ_INIT(&l->relocs);
328 
329     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
330 
331     return l;
332 }
333 
334 static bool tcg_resolve_relocs(TCGContext *s)
335 {
336     TCGLabel *l;
337 
338     QSIMPLEQ_FOREACH(l, &s->labels, next) {
339         TCGRelocation *r;
340         uintptr_t value = l->u.value;
341 
342         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
343             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
344                 return false;
345             }
346         }
347     }
348     return true;
349 }
350 
351 static void set_jmp_reset_offset(TCGContext *s, int which)
352 {
353     /*
354      * We will check for overflow at the end of the opcode loop in
355      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
356      */
357     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
358 }
359 
360 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
361 {
362     /*
363      * We will check for overflow at the end of the opcode loop in
364      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
365      */
366     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
367 }
368 
369 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
370 {
371     /*
372      * Return the read-execute version of the pointer, for the benefit
373      * of any pc-relative addressing mode.
374      */
375     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
376 }
377 
378 /* Signal overflow, starting over with fewer guest insns. */
379 static G_NORETURN
380 void tcg_raise_tb_overflow(TCGContext *s)
381 {
382     siglongjmp(s->jmp_trans, -2);
383 }
384 
385 /*
386  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
387  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
388  *
389  * However, tcg_out_helper_load_slots reuses this field to hold an
390  * argument slot number (which may designate a argument register or an
391  * argument stack slot), converting to TCGReg once all arguments that
392  * are destined for the stack are processed.
393  */
394 typedef struct TCGMovExtend {
395     unsigned dst;
396     TCGReg src;
397     TCGType dst_type;
398     TCGType src_type;
399     MemOp src_ext;
400 } TCGMovExtend;
401 
402 /**
403  * tcg_out_movext -- move and extend
404  * @s: tcg context
405  * @dst_type: integral type for destination
406  * @dst: destination register
407  * @src_type: integral type for source
408  * @src_ext: extension to apply to source
409  * @src: source register
410  *
411  * Move or extend @src into @dst, depending on @src_ext and the types.
412  */
413 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
414                            TCGType src_type, MemOp src_ext, TCGReg src)
415 {
416     switch (src_ext) {
417     case MO_UB:
418         tcg_out_ext8u(s, dst, src);
419         break;
420     case MO_SB:
421         tcg_out_ext8s(s, dst_type, dst, src);
422         break;
423     case MO_UW:
424         tcg_out_ext16u(s, dst, src);
425         break;
426     case MO_SW:
427         tcg_out_ext16s(s, dst_type, dst, src);
428         break;
429     case MO_UL:
430     case MO_SL:
431         if (dst_type == TCG_TYPE_I32) {
432             if (src_type == TCG_TYPE_I32) {
433                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
434             } else {
435                 tcg_out_extrl_i64_i32(s, dst, src);
436             }
437         } else if (src_type == TCG_TYPE_I32) {
438             if (src_ext & MO_SIGN) {
439                 tcg_out_exts_i32_i64(s, dst, src);
440             } else {
441                 tcg_out_extu_i32_i64(s, dst, src);
442             }
443         } else {
444             if (src_ext & MO_SIGN) {
445                 tcg_out_ext32s(s, dst, src);
446             } else {
447                 tcg_out_ext32u(s, dst, src);
448             }
449         }
450         break;
451     case MO_UQ:
452         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
453         if (dst_type == TCG_TYPE_I32) {
454             tcg_out_extrl_i64_i32(s, dst, src);
455         } else {
456             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
457         }
458         break;
459     default:
460         g_assert_not_reached();
461     }
462 }
463 
464 /* Minor variations on a theme, using a structure. */
465 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
466                                     TCGReg src)
467 {
468     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
469 }
470 
471 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
472 {
473     tcg_out_movext1_new_src(s, i, i->src);
474 }
475 
476 /**
477  * tcg_out_movext2 -- move and extend two pair
478  * @s: tcg context
479  * @i1: first move description
480  * @i2: second move description
481  * @scratch: temporary register, or -1 for none
482  *
483  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
484  * between the sources and destinations.
485  */
486 
487 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
488                             const TCGMovExtend *i2, int scratch)
489 {
490     TCGReg src1 = i1->src;
491     TCGReg src2 = i2->src;
492 
493     if (i1->dst != src2) {
494         tcg_out_movext1(s, i1);
495         tcg_out_movext1(s, i2);
496         return;
497     }
498     if (i2->dst == src1) {
499         TCGType src1_type = i1->src_type;
500         TCGType src2_type = i2->src_type;
501 
502         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
503             /* The data is now in the correct registers, now extend. */
504             src1 = i2->src;
505             src2 = i1->src;
506         } else {
507             tcg_debug_assert(scratch >= 0);
508             tcg_out_mov(s, src1_type, scratch, src1);
509             src1 = scratch;
510         }
511     }
512     tcg_out_movext1_new_src(s, i2, src2);
513     tcg_out_movext1_new_src(s, i1, src1);
514 }
515 
516 #define C_PFX1(P, A)                    P##A
517 #define C_PFX2(P, A, B)                 P##A##_##B
518 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
519 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
520 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
521 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
522 
523 /* Define an enumeration for the various combinations. */
524 
525 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
526 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
527 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
528 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
529 
530 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
531 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
532 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
533 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
534 
535 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
536 
537 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
538 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
539 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
540 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
541 
542 typedef enum {
543 #include "tcg-target-con-set.h"
544 } TCGConstraintSetIndex;
545 
546 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
547 
548 #undef C_O0_I1
549 #undef C_O0_I2
550 #undef C_O0_I3
551 #undef C_O0_I4
552 #undef C_O1_I1
553 #undef C_O1_I2
554 #undef C_O1_I3
555 #undef C_O1_I4
556 #undef C_N1_I2
557 #undef C_O2_I1
558 #undef C_O2_I2
559 #undef C_O2_I3
560 #undef C_O2_I4
561 
562 /* Put all of the constraint sets into an array, indexed by the enum. */
563 
564 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
565 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
566 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
567 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
568 
569 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
570 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
571 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
572 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
573 
574 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
575 
576 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
577 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
578 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
579 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
580 
581 static const TCGTargetOpDef constraint_sets[] = {
582 #include "tcg-target-con-set.h"
583 };
584 
585 
586 #undef C_O0_I1
587 #undef C_O0_I2
588 #undef C_O0_I3
589 #undef C_O0_I4
590 #undef C_O1_I1
591 #undef C_O1_I2
592 #undef C_O1_I3
593 #undef C_O1_I4
594 #undef C_N1_I2
595 #undef C_O2_I1
596 #undef C_O2_I2
597 #undef C_O2_I3
598 #undef C_O2_I4
599 
600 /* Expand the enumerator to be returned from tcg_target_op_def(). */
601 
602 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
603 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
604 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
605 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
606 
607 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
608 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
609 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
610 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
611 
612 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
613 
614 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
615 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
616 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
617 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
618 
619 #include "tcg-target.c.inc"
620 
621 static void alloc_tcg_plugin_context(TCGContext *s)
622 {
623 #ifdef CONFIG_PLUGIN
624     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
625     s->plugin_tb->insns =
626         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
627 #endif
628 }
629 
630 /*
631  * All TCG threads except the parent (i.e. the one that called tcg_context_init
632  * and registered the target's TCG globals) must register with this function
633  * before initiating translation.
634  *
635  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
636  * of tcg_region_init() for the reasoning behind this.
637  *
638  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
639  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
640  * is not used anymore for translation once this function is called.
641  *
642  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
643  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
644  */
645 #ifdef CONFIG_USER_ONLY
646 void tcg_register_thread(void)
647 {
648     tcg_ctx = &tcg_init_ctx;
649 }
650 #else
651 void tcg_register_thread(void)
652 {
653     TCGContext *s = g_malloc(sizeof(*s));
654     unsigned int i, n;
655 
656     *s = tcg_init_ctx;
657 
658     /* Relink mem_base.  */
659     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
660         if (tcg_init_ctx.temps[i].mem_base) {
661             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
662             tcg_debug_assert(b >= 0 && b < n);
663             s->temps[i].mem_base = &s->temps[b];
664         }
665     }
666 
667     /* Claim an entry in tcg_ctxs */
668     n = qatomic_fetch_inc(&tcg_cur_ctxs);
669     g_assert(n < tcg_max_ctxs);
670     qatomic_set(&tcg_ctxs[n], s);
671 
672     if (n > 0) {
673         alloc_tcg_plugin_context(s);
674         tcg_region_initial_alloc(s);
675     }
676 
677     tcg_ctx = s;
678 }
679 #endif /* !CONFIG_USER_ONLY */
680 
681 /* pool based memory allocation */
682 void *tcg_malloc_internal(TCGContext *s, int size)
683 {
684     TCGPool *p;
685     int pool_size;
686 
687     if (size > TCG_POOL_CHUNK_SIZE) {
688         /* big malloc: insert a new pool (XXX: could optimize) */
689         p = g_malloc(sizeof(TCGPool) + size);
690         p->size = size;
691         p->next = s->pool_first_large;
692         s->pool_first_large = p;
693         return p->data;
694     } else {
695         p = s->pool_current;
696         if (!p) {
697             p = s->pool_first;
698             if (!p)
699                 goto new_pool;
700         } else {
701             if (!p->next) {
702             new_pool:
703                 pool_size = TCG_POOL_CHUNK_SIZE;
704                 p = g_malloc(sizeof(TCGPool) + pool_size);
705                 p->size = pool_size;
706                 p->next = NULL;
707                 if (s->pool_current) {
708                     s->pool_current->next = p;
709                 } else {
710                     s->pool_first = p;
711                 }
712             } else {
713                 p = p->next;
714             }
715         }
716     }
717     s->pool_current = p;
718     s->pool_cur = p->data + size;
719     s->pool_end = p->data + p->size;
720     return p->data;
721 }
722 
723 void tcg_pool_reset(TCGContext *s)
724 {
725     TCGPool *p, *t;
726     for (p = s->pool_first_large; p; p = t) {
727         t = p->next;
728         g_free(p);
729     }
730     s->pool_first_large = NULL;
731     s->pool_cur = s->pool_end = NULL;
732     s->pool_current = NULL;
733 }
734 
735 #include "exec/helper-proto.h"
736 
737 static TCGHelperInfo all_helpers[] = {
738 #include "exec/helper-tcg.h"
739 };
740 static GHashTable *helper_table;
741 
742 /*
743  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
744  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
745  * We only use these for layout in tcg_out_ld_helper_ret and
746  * tcg_out_st_helper_args, and share them between several of
747  * the helpers, with the end result that it's easier to build manually.
748  */
749 
750 #if TCG_TARGET_REG_BITS == 32
751 # define dh_typecode_ttl  dh_typecode_i32
752 #else
753 # define dh_typecode_ttl  dh_typecode_i64
754 #endif
755 
756 static TCGHelperInfo info_helper_ld32_mmu = {
757     .flags = TCG_CALL_NO_WG,
758     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
759               | dh_typemask(env, 1)
760               | dh_typemask(tl, 2)   /* target_ulong addr */
761               | dh_typemask(i32, 3)  /* unsigned oi */
762               | dh_typemask(ptr, 4)  /* uintptr_t ra */
763 };
764 
765 static TCGHelperInfo info_helper_ld64_mmu = {
766     .flags = TCG_CALL_NO_WG,
767     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
768               | dh_typemask(env, 1)
769               | dh_typemask(tl, 2)   /* target_ulong addr */
770               | dh_typemask(i32, 3)  /* unsigned oi */
771               | dh_typemask(ptr, 4)  /* uintptr_t ra */
772 };
773 
774 static TCGHelperInfo info_helper_st32_mmu = {
775     .flags = TCG_CALL_NO_WG,
776     .typemask = dh_typemask(void, 0)
777               | dh_typemask(env, 1)
778               | dh_typemask(tl, 2)   /* target_ulong addr */
779               | dh_typemask(i32, 3)  /* uint32_t data */
780               | dh_typemask(i32, 4)  /* unsigned oi */
781               | dh_typemask(ptr, 5)  /* uintptr_t ra */
782 };
783 
784 static TCGHelperInfo info_helper_st64_mmu = {
785     .flags = TCG_CALL_NO_WG,
786     .typemask = dh_typemask(void, 0)
787               | dh_typemask(env, 1)
788               | dh_typemask(tl, 2)   /* target_ulong addr */
789               | dh_typemask(i64, 3)  /* uint64_t data */
790               | dh_typemask(i32, 4)  /* unsigned oi */
791               | dh_typemask(ptr, 5)  /* uintptr_t ra */
792 };
793 
794 #ifdef CONFIG_TCG_INTERPRETER
795 static ffi_type *typecode_to_ffi(int argmask)
796 {
797     /*
798      * libffi does not support __int128_t, so we have forced Int128
799      * to use the structure definition instead of the builtin type.
800      */
801     static ffi_type *ffi_type_i128_elements[3] = {
802         &ffi_type_uint64,
803         &ffi_type_uint64,
804         NULL
805     };
806     static ffi_type ffi_type_i128 = {
807         .size = 16,
808         .alignment = __alignof__(Int128),
809         .type = FFI_TYPE_STRUCT,
810         .elements = ffi_type_i128_elements,
811     };
812 
813     switch (argmask) {
814     case dh_typecode_void:
815         return &ffi_type_void;
816     case dh_typecode_i32:
817         return &ffi_type_uint32;
818     case dh_typecode_s32:
819         return &ffi_type_sint32;
820     case dh_typecode_i64:
821         return &ffi_type_uint64;
822     case dh_typecode_s64:
823         return &ffi_type_sint64;
824     case dh_typecode_ptr:
825         return &ffi_type_pointer;
826     case dh_typecode_i128:
827         return &ffi_type_i128;
828     }
829     g_assert_not_reached();
830 }
831 
832 static void init_ffi_layouts(void)
833 {
834     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
835     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
836 
837     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
838         TCGHelperInfo *info = &all_helpers[i];
839         unsigned typemask = info->typemask;
840         gpointer hash = (gpointer)(uintptr_t)typemask;
841         struct {
842             ffi_cif cif;
843             ffi_type *args[];
844         } *ca;
845         ffi_status status;
846         int nargs;
847         ffi_cif *cif;
848 
849         cif = g_hash_table_lookup(ffi_table, hash);
850         if (cif) {
851             info->cif = cif;
852             continue;
853         }
854 
855         /* Ignoring the return type, find the last non-zero field. */
856         nargs = 32 - clz32(typemask >> 3);
857         nargs = DIV_ROUND_UP(nargs, 3);
858         assert(nargs <= MAX_CALL_IARGS);
859 
860         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
861         ca->cif.rtype = typecode_to_ffi(typemask & 7);
862         ca->cif.nargs = nargs;
863 
864         if (nargs != 0) {
865             ca->cif.arg_types = ca->args;
866             for (int j = 0; j < nargs; ++j) {
867                 int typecode = extract32(typemask, (j + 1) * 3, 3);
868                 ca->args[j] = typecode_to_ffi(typecode);
869             }
870         }
871 
872         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
873                               ca->cif.rtype, ca->cif.arg_types);
874         assert(status == FFI_OK);
875 
876         cif = &ca->cif;
877         info->cif = cif;
878         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
879     }
880 
881     g_hash_table_destroy(ffi_table);
882 }
883 #endif /* CONFIG_TCG_INTERPRETER */
884 
885 static inline bool arg_slot_reg_p(unsigned arg_slot)
886 {
887     /*
888      * Split the sizeof away from the comparison to avoid Werror from
889      * "unsigned < 0 is always false", when iarg_regs is empty.
890      */
891     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
892     return arg_slot < nreg;
893 }
894 
895 static inline int arg_slot_stk_ofs(unsigned arg_slot)
896 {
897     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
898     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
899 
900     tcg_debug_assert(stk_slot < max);
901     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
902 }
903 
904 typedef struct TCGCumulativeArgs {
905     int arg_idx;                /* tcg_gen_callN args[] */
906     int info_in_idx;            /* TCGHelperInfo in[] */
907     int arg_slot;               /* regs+stack slot */
908     int ref_slot;               /* stack slots for references */
909 } TCGCumulativeArgs;
910 
911 static void layout_arg_even(TCGCumulativeArgs *cum)
912 {
913     cum->arg_slot += cum->arg_slot & 1;
914 }
915 
916 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
917                          TCGCallArgumentKind kind)
918 {
919     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
920 
921     *loc = (TCGCallArgumentLoc){
922         .kind = kind,
923         .arg_idx = cum->arg_idx,
924         .arg_slot = cum->arg_slot,
925     };
926     cum->info_in_idx++;
927     cum->arg_slot++;
928 }
929 
930 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
931                                 TCGHelperInfo *info, int n)
932 {
933     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
934 
935     for (int i = 0; i < n; ++i) {
936         /* Layout all using the same arg_idx, adjusting the subindex. */
937         loc[i] = (TCGCallArgumentLoc){
938             .kind = TCG_CALL_ARG_NORMAL,
939             .arg_idx = cum->arg_idx,
940             .tmp_subindex = i,
941             .arg_slot = cum->arg_slot + i,
942         };
943     }
944     cum->info_in_idx += n;
945     cum->arg_slot += n;
946 }
947 
948 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
949 {
950     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
951     int n = 128 / TCG_TARGET_REG_BITS;
952 
953     /* The first subindex carries the pointer. */
954     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
955 
956     /*
957      * The callee is allowed to clobber memory associated with
958      * structure pass by-reference.  Therefore we must make copies.
959      * Allocate space from "ref_slot", which will be adjusted to
960      * follow the parameters on the stack.
961      */
962     loc[0].ref_slot = cum->ref_slot;
963 
964     /*
965      * Subsequent words also go into the reference slot, but
966      * do not accumulate into the regular arguments.
967      */
968     for (int i = 1; i < n; ++i) {
969         loc[i] = (TCGCallArgumentLoc){
970             .kind = TCG_CALL_ARG_BY_REF_N,
971             .arg_idx = cum->arg_idx,
972             .tmp_subindex = i,
973             .ref_slot = cum->ref_slot + i,
974         };
975     }
976     cum->info_in_idx += n;
977     cum->ref_slot += n;
978 }
979 
980 static void init_call_layout(TCGHelperInfo *info)
981 {
982     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
983     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
984     unsigned typemask = info->typemask;
985     unsigned typecode;
986     TCGCumulativeArgs cum = { };
987 
988     /*
989      * Parse and place any function return value.
990      */
991     typecode = typemask & 7;
992     switch (typecode) {
993     case dh_typecode_void:
994         info->nr_out = 0;
995         break;
996     case dh_typecode_i32:
997     case dh_typecode_s32:
998     case dh_typecode_ptr:
999         info->nr_out = 1;
1000         info->out_kind = TCG_CALL_RET_NORMAL;
1001         break;
1002     case dh_typecode_i64:
1003     case dh_typecode_s64:
1004         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1005         info->out_kind = TCG_CALL_RET_NORMAL;
1006         /* Query the last register now to trigger any assert early. */
1007         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1008         break;
1009     case dh_typecode_i128:
1010         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1011         info->out_kind = TCG_TARGET_CALL_RET_I128;
1012         switch (TCG_TARGET_CALL_RET_I128) {
1013         case TCG_CALL_RET_NORMAL:
1014             /* Query the last register now to trigger any assert early. */
1015             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1016             break;
1017         case TCG_CALL_RET_BY_VEC:
1018             /* Query the single register now to trigger any assert early. */
1019             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1020             break;
1021         case TCG_CALL_RET_BY_REF:
1022             /*
1023              * Allocate the first argument to the output.
1024              * We don't need to store this anywhere, just make it
1025              * unavailable for use in the input loop below.
1026              */
1027             cum.arg_slot = 1;
1028             break;
1029         default:
1030             qemu_build_not_reached();
1031         }
1032         break;
1033     default:
1034         g_assert_not_reached();
1035     }
1036 
1037     /*
1038      * Parse and place function arguments.
1039      */
1040     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1041         TCGCallArgumentKind kind;
1042         TCGType type;
1043 
1044         typecode = typemask & 7;
1045         switch (typecode) {
1046         case dh_typecode_i32:
1047         case dh_typecode_s32:
1048             type = TCG_TYPE_I32;
1049             break;
1050         case dh_typecode_i64:
1051         case dh_typecode_s64:
1052             type = TCG_TYPE_I64;
1053             break;
1054         case dh_typecode_ptr:
1055             type = TCG_TYPE_PTR;
1056             break;
1057         case dh_typecode_i128:
1058             type = TCG_TYPE_I128;
1059             break;
1060         default:
1061             g_assert_not_reached();
1062         }
1063 
1064         switch (type) {
1065         case TCG_TYPE_I32:
1066             switch (TCG_TARGET_CALL_ARG_I32) {
1067             case TCG_CALL_ARG_EVEN:
1068                 layout_arg_even(&cum);
1069                 /* fall through */
1070             case TCG_CALL_ARG_NORMAL:
1071                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1072                 break;
1073             case TCG_CALL_ARG_EXTEND:
1074                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1075                 layout_arg_1(&cum, info, kind);
1076                 break;
1077             default:
1078                 qemu_build_not_reached();
1079             }
1080             break;
1081 
1082         case TCG_TYPE_I64:
1083             switch (TCG_TARGET_CALL_ARG_I64) {
1084             case TCG_CALL_ARG_EVEN:
1085                 layout_arg_even(&cum);
1086                 /* fall through */
1087             case TCG_CALL_ARG_NORMAL:
1088                 if (TCG_TARGET_REG_BITS == 32) {
1089                     layout_arg_normal_n(&cum, info, 2);
1090                 } else {
1091                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1092                 }
1093                 break;
1094             default:
1095                 qemu_build_not_reached();
1096             }
1097             break;
1098 
1099         case TCG_TYPE_I128:
1100             switch (TCG_TARGET_CALL_ARG_I128) {
1101             case TCG_CALL_ARG_EVEN:
1102                 layout_arg_even(&cum);
1103                 /* fall through */
1104             case TCG_CALL_ARG_NORMAL:
1105                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1106                 break;
1107             case TCG_CALL_ARG_BY_REF:
1108                 layout_arg_by_ref(&cum, info);
1109                 break;
1110             default:
1111                 qemu_build_not_reached();
1112             }
1113             break;
1114 
1115         default:
1116             g_assert_not_reached();
1117         }
1118     }
1119     info->nr_in = cum.info_in_idx;
1120 
1121     /* Validate that we didn't overrun the input array. */
1122     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1123     /* Validate the backend has enough argument space. */
1124     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1125 
1126     /*
1127      * Relocate the "ref_slot" area to the end of the parameters.
1128      * Minimizing this stack offset helps code size for x86,
1129      * which has a signed 8-bit offset encoding.
1130      */
1131     if (cum.ref_slot != 0) {
1132         int ref_base = 0;
1133 
1134         if (cum.arg_slot > max_reg_slots) {
1135             int align = __alignof(Int128) / sizeof(tcg_target_long);
1136 
1137             ref_base = cum.arg_slot - max_reg_slots;
1138             if (align > 1) {
1139                 ref_base = ROUND_UP(ref_base, align);
1140             }
1141         }
1142         assert(ref_base + cum.ref_slot <= max_stk_slots);
1143         ref_base += max_reg_slots;
1144 
1145         if (ref_base != 0) {
1146             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1147                 TCGCallArgumentLoc *loc = &info->in[i];
1148                 switch (loc->kind) {
1149                 case TCG_CALL_ARG_BY_REF:
1150                 case TCG_CALL_ARG_BY_REF_N:
1151                     loc->ref_slot += ref_base;
1152                     break;
1153                 default:
1154                     break;
1155                 }
1156             }
1157         }
1158     }
1159 }
1160 
1161 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1162 static void process_op_defs(TCGContext *s);
1163 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1164                                             TCGReg reg, const char *name);
1165 
1166 static void tcg_context_init(unsigned max_cpus)
1167 {
1168     TCGContext *s = &tcg_init_ctx;
1169     int op, total_args, n, i;
1170     TCGOpDef *def;
1171     TCGArgConstraint *args_ct;
1172     TCGTemp *ts;
1173 
1174     memset(s, 0, sizeof(*s));
1175     s->nb_globals = 0;
1176 
1177     /* Count total number of arguments and allocate the corresponding
1178        space */
1179     total_args = 0;
1180     for(op = 0; op < NB_OPS; op++) {
1181         def = &tcg_op_defs[op];
1182         n = def->nb_iargs + def->nb_oargs;
1183         total_args += n;
1184     }
1185 
1186     args_ct = g_new0(TCGArgConstraint, total_args);
1187 
1188     for(op = 0; op < NB_OPS; op++) {
1189         def = &tcg_op_defs[op];
1190         def->args_ct = args_ct;
1191         n = def->nb_iargs + def->nb_oargs;
1192         args_ct += n;
1193     }
1194 
1195     /* Register helpers.  */
1196     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1197     helper_table = g_hash_table_new(NULL, NULL);
1198 
1199     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1200         init_call_layout(&all_helpers[i]);
1201         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1202                             (gpointer)&all_helpers[i]);
1203     }
1204 
1205     init_call_layout(&info_helper_ld32_mmu);
1206     init_call_layout(&info_helper_ld64_mmu);
1207     init_call_layout(&info_helper_st32_mmu);
1208     init_call_layout(&info_helper_st64_mmu);
1209 
1210 #ifdef CONFIG_TCG_INTERPRETER
1211     init_ffi_layouts();
1212 #endif
1213 
1214     tcg_target_init(s);
1215     process_op_defs(s);
1216 
1217     /* Reverse the order of the saved registers, assuming they're all at
1218        the start of tcg_target_reg_alloc_order.  */
1219     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1220         int r = tcg_target_reg_alloc_order[n];
1221         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1222             break;
1223         }
1224     }
1225     for (i = 0; i < n; ++i) {
1226         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1227     }
1228     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1229         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1230     }
1231 
1232     alloc_tcg_plugin_context(s);
1233 
1234     tcg_ctx = s;
1235     /*
1236      * In user-mode we simply share the init context among threads, since we
1237      * use a single region. See the documentation tcg_region_init() for the
1238      * reasoning behind this.
1239      * In softmmu we will have at most max_cpus TCG threads.
1240      */
1241 #ifdef CONFIG_USER_ONLY
1242     tcg_ctxs = &tcg_ctx;
1243     tcg_cur_ctxs = 1;
1244     tcg_max_ctxs = 1;
1245 #else
1246     tcg_max_ctxs = max_cpus;
1247     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1248 #endif
1249 
1250     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1251     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1252     cpu_env = temp_tcgv_ptr(ts);
1253 }
1254 
1255 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1256 {
1257     tcg_context_init(max_cpus);
1258     tcg_region_init(tb_size, splitwx, max_cpus);
1259 }
1260 
1261 /*
1262  * Allocate TBs right before their corresponding translated code, making
1263  * sure that TBs and code are on different cache lines.
1264  */
1265 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1266 {
1267     uintptr_t align = qemu_icache_linesize;
1268     TranslationBlock *tb;
1269     void *next;
1270 
1271  retry:
1272     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1273     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1274 
1275     if (unlikely(next > s->code_gen_highwater)) {
1276         if (tcg_region_alloc(s)) {
1277             return NULL;
1278         }
1279         goto retry;
1280     }
1281     qatomic_set(&s->code_gen_ptr, next);
1282     s->data_gen_ptr = NULL;
1283     return tb;
1284 }
1285 
1286 void tcg_prologue_init(TCGContext *s)
1287 {
1288     size_t prologue_size;
1289 
1290     s->code_ptr = s->code_gen_ptr;
1291     s->code_buf = s->code_gen_ptr;
1292     s->data_gen_ptr = NULL;
1293 
1294 #ifndef CONFIG_TCG_INTERPRETER
1295     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1296 #endif
1297 
1298 #ifdef TCG_TARGET_NEED_POOL_LABELS
1299     s->pool_labels = NULL;
1300 #endif
1301 
1302     qemu_thread_jit_write();
1303     /* Generate the prologue.  */
1304     tcg_target_qemu_prologue(s);
1305 
1306 #ifdef TCG_TARGET_NEED_POOL_LABELS
1307     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1308     {
1309         int result = tcg_out_pool_finalize(s);
1310         tcg_debug_assert(result == 0);
1311     }
1312 #endif
1313 
1314     prologue_size = tcg_current_code_size(s);
1315     perf_report_prologue(s->code_gen_ptr, prologue_size);
1316 
1317 #ifndef CONFIG_TCG_INTERPRETER
1318     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1319                         (uintptr_t)s->code_buf, prologue_size);
1320 #endif
1321 
1322 #ifdef DEBUG_DISAS
1323     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1324         FILE *logfile = qemu_log_trylock();
1325         if (logfile) {
1326             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1327             if (s->data_gen_ptr) {
1328                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1329                 size_t data_size = prologue_size - code_size;
1330                 size_t i;
1331 
1332                 disas(logfile, s->code_gen_ptr, code_size);
1333 
1334                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1335                     if (sizeof(tcg_target_ulong) == 8) {
1336                         fprintf(logfile,
1337                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1338                                 (uintptr_t)s->data_gen_ptr + i,
1339                                 *(uint64_t *)(s->data_gen_ptr + i));
1340                     } else {
1341                         fprintf(logfile,
1342                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1343                                 (uintptr_t)s->data_gen_ptr + i,
1344                                 *(uint32_t *)(s->data_gen_ptr + i));
1345                     }
1346                 }
1347             } else {
1348                 disas(logfile, s->code_gen_ptr, prologue_size);
1349             }
1350             fprintf(logfile, "\n");
1351             qemu_log_unlock(logfile);
1352         }
1353     }
1354 #endif
1355 
1356 #ifndef CONFIG_TCG_INTERPRETER
1357     /*
1358      * Assert that goto_ptr is implemented completely, setting an epilogue.
1359      * For tci, we use NULL as the signal to return from the interpreter,
1360      * so skip this check.
1361      */
1362     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1363 #endif
1364 
1365     tcg_region_prologue_set(s);
1366 }
1367 
1368 void tcg_func_start(TCGContext *s)
1369 {
1370     tcg_pool_reset(s);
1371     s->nb_temps = s->nb_globals;
1372 
1373     /* No temps have been previously allocated for size or locality.  */
1374     memset(s->free_temps, 0, sizeof(s->free_temps));
1375 
1376     /* No constant temps have been previously allocated. */
1377     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1378         if (s->const_table[i]) {
1379             g_hash_table_remove_all(s->const_table[i]);
1380         }
1381     }
1382 
1383     s->nb_ops = 0;
1384     s->nb_labels = 0;
1385     s->current_frame_offset = s->frame_start;
1386 
1387 #ifdef CONFIG_DEBUG_TCG
1388     s->goto_tb_issue_mask = 0;
1389 #endif
1390 
1391     QTAILQ_INIT(&s->ops);
1392     QTAILQ_INIT(&s->free_ops);
1393     QSIMPLEQ_INIT(&s->labels);
1394 }
1395 
1396 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1397 {
1398     int n = s->nb_temps++;
1399 
1400     if (n >= TCG_MAX_TEMPS) {
1401         tcg_raise_tb_overflow(s);
1402     }
1403     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1404 }
1405 
1406 static TCGTemp *tcg_global_alloc(TCGContext *s)
1407 {
1408     TCGTemp *ts;
1409 
1410     tcg_debug_assert(s->nb_globals == s->nb_temps);
1411     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1412     s->nb_globals++;
1413     ts = tcg_temp_alloc(s);
1414     ts->kind = TEMP_GLOBAL;
1415 
1416     return ts;
1417 }
1418 
1419 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1420                                             TCGReg reg, const char *name)
1421 {
1422     TCGTemp *ts;
1423 
1424     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1425 
1426     ts = tcg_global_alloc(s);
1427     ts->base_type = type;
1428     ts->type = type;
1429     ts->kind = TEMP_FIXED;
1430     ts->reg = reg;
1431     ts->name = name;
1432     tcg_regset_set_reg(s->reserved_regs, reg);
1433 
1434     return ts;
1435 }
1436 
1437 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1438 {
1439     s->frame_start = start;
1440     s->frame_end = start + size;
1441     s->frame_temp
1442         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1443 }
1444 
1445 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1446                                      intptr_t offset, const char *name)
1447 {
1448     TCGContext *s = tcg_ctx;
1449     TCGTemp *base_ts = tcgv_ptr_temp(base);
1450     TCGTemp *ts = tcg_global_alloc(s);
1451     int indirect_reg = 0;
1452 
1453     switch (base_ts->kind) {
1454     case TEMP_FIXED:
1455         break;
1456     case TEMP_GLOBAL:
1457         /* We do not support double-indirect registers.  */
1458         tcg_debug_assert(!base_ts->indirect_reg);
1459         base_ts->indirect_base = 1;
1460         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1461                             ? 2 : 1);
1462         indirect_reg = 1;
1463         break;
1464     default:
1465         g_assert_not_reached();
1466     }
1467 
1468     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1469         TCGTemp *ts2 = tcg_global_alloc(s);
1470         char buf[64];
1471 
1472         ts->base_type = TCG_TYPE_I64;
1473         ts->type = TCG_TYPE_I32;
1474         ts->indirect_reg = indirect_reg;
1475         ts->mem_allocated = 1;
1476         ts->mem_base = base_ts;
1477         ts->mem_offset = offset;
1478         pstrcpy(buf, sizeof(buf), name);
1479         pstrcat(buf, sizeof(buf), "_0");
1480         ts->name = strdup(buf);
1481 
1482         tcg_debug_assert(ts2 == ts + 1);
1483         ts2->base_type = TCG_TYPE_I64;
1484         ts2->type = TCG_TYPE_I32;
1485         ts2->indirect_reg = indirect_reg;
1486         ts2->mem_allocated = 1;
1487         ts2->mem_base = base_ts;
1488         ts2->mem_offset = offset + 4;
1489         ts2->temp_subindex = 1;
1490         pstrcpy(buf, sizeof(buf), name);
1491         pstrcat(buf, sizeof(buf), "_1");
1492         ts2->name = strdup(buf);
1493     } else {
1494         ts->base_type = type;
1495         ts->type = type;
1496         ts->indirect_reg = indirect_reg;
1497         ts->mem_allocated = 1;
1498         ts->mem_base = base_ts;
1499         ts->mem_offset = offset;
1500         ts->name = name;
1501     }
1502     return ts;
1503 }
1504 
1505 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1506 {
1507     TCGContext *s = tcg_ctx;
1508     TCGTemp *ts;
1509     int n;
1510 
1511     if (kind == TEMP_EBB) {
1512         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1513 
1514         if (idx < TCG_MAX_TEMPS) {
1515             /* There is already an available temp with the right type.  */
1516             clear_bit(idx, s->free_temps[type].l);
1517 
1518             ts = &s->temps[idx];
1519             ts->temp_allocated = 1;
1520             tcg_debug_assert(ts->base_type == type);
1521             tcg_debug_assert(ts->kind == kind);
1522             return ts;
1523         }
1524     } else {
1525         tcg_debug_assert(kind == TEMP_TB);
1526     }
1527 
1528     switch (type) {
1529     case TCG_TYPE_I32:
1530     case TCG_TYPE_V64:
1531     case TCG_TYPE_V128:
1532     case TCG_TYPE_V256:
1533         n = 1;
1534         break;
1535     case TCG_TYPE_I64:
1536         n = 64 / TCG_TARGET_REG_BITS;
1537         break;
1538     case TCG_TYPE_I128:
1539         n = 128 / TCG_TARGET_REG_BITS;
1540         break;
1541     default:
1542         g_assert_not_reached();
1543     }
1544 
1545     ts = tcg_temp_alloc(s);
1546     ts->base_type = type;
1547     ts->temp_allocated = 1;
1548     ts->kind = kind;
1549 
1550     if (n == 1) {
1551         ts->type = type;
1552     } else {
1553         ts->type = TCG_TYPE_REG;
1554 
1555         for (int i = 1; i < n; ++i) {
1556             TCGTemp *ts2 = tcg_temp_alloc(s);
1557 
1558             tcg_debug_assert(ts2 == ts + i);
1559             ts2->base_type = type;
1560             ts2->type = TCG_TYPE_REG;
1561             ts2->temp_allocated = 1;
1562             ts2->temp_subindex = i;
1563             ts2->kind = kind;
1564         }
1565     }
1566     return ts;
1567 }
1568 
1569 TCGv_vec tcg_temp_new_vec(TCGType type)
1570 {
1571     TCGTemp *t;
1572 
1573 #ifdef CONFIG_DEBUG_TCG
1574     switch (type) {
1575     case TCG_TYPE_V64:
1576         assert(TCG_TARGET_HAS_v64);
1577         break;
1578     case TCG_TYPE_V128:
1579         assert(TCG_TARGET_HAS_v128);
1580         break;
1581     case TCG_TYPE_V256:
1582         assert(TCG_TARGET_HAS_v256);
1583         break;
1584     default:
1585         g_assert_not_reached();
1586     }
1587 #endif
1588 
1589     t = tcg_temp_new_internal(type, TEMP_EBB);
1590     return temp_tcgv_vec(t);
1591 }
1592 
1593 /* Create a new temp of the same type as an existing temp.  */
1594 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1595 {
1596     TCGTemp *t = tcgv_vec_temp(match);
1597 
1598     tcg_debug_assert(t->temp_allocated != 0);
1599 
1600     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1601     return temp_tcgv_vec(t);
1602 }
1603 
1604 void tcg_temp_free_internal(TCGTemp *ts)
1605 {
1606     TCGContext *s = tcg_ctx;
1607 
1608     switch (ts->kind) {
1609     case TEMP_CONST:
1610     case TEMP_TB:
1611         /* Silently ignore free. */
1612         break;
1613     case TEMP_EBB:
1614         tcg_debug_assert(ts->temp_allocated != 0);
1615         ts->temp_allocated = 0;
1616         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1617         break;
1618     default:
1619         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1620         g_assert_not_reached();
1621     }
1622 }
1623 
1624 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1625 {
1626     TCGContext *s = tcg_ctx;
1627     GHashTable *h = s->const_table[type];
1628     TCGTemp *ts;
1629 
1630     if (h == NULL) {
1631         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1632         s->const_table[type] = h;
1633     }
1634 
1635     ts = g_hash_table_lookup(h, &val);
1636     if (ts == NULL) {
1637         int64_t *val_ptr;
1638 
1639         ts = tcg_temp_alloc(s);
1640 
1641         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1642             TCGTemp *ts2 = tcg_temp_alloc(s);
1643 
1644             tcg_debug_assert(ts2 == ts + 1);
1645 
1646             ts->base_type = TCG_TYPE_I64;
1647             ts->type = TCG_TYPE_I32;
1648             ts->kind = TEMP_CONST;
1649             ts->temp_allocated = 1;
1650 
1651             ts2->base_type = TCG_TYPE_I64;
1652             ts2->type = TCG_TYPE_I32;
1653             ts2->kind = TEMP_CONST;
1654             ts2->temp_allocated = 1;
1655             ts2->temp_subindex = 1;
1656 
1657             /*
1658              * Retain the full value of the 64-bit constant in the low
1659              * part, so that the hash table works.  Actual uses will
1660              * truncate the value to the low part.
1661              */
1662             ts[HOST_BIG_ENDIAN].val = val;
1663             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1664             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1665         } else {
1666             ts->base_type = type;
1667             ts->type = type;
1668             ts->kind = TEMP_CONST;
1669             ts->temp_allocated = 1;
1670             ts->val = val;
1671             val_ptr = &ts->val;
1672         }
1673         g_hash_table_insert(h, val_ptr, ts);
1674     }
1675 
1676     return ts;
1677 }
1678 
1679 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1680 {
1681     val = dup_const(vece, val);
1682     return temp_tcgv_vec(tcg_constant_internal(type, val));
1683 }
1684 
1685 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1686 {
1687     TCGTemp *t = tcgv_vec_temp(match);
1688 
1689     tcg_debug_assert(t->temp_allocated != 0);
1690     return tcg_constant_vec(t->base_type, vece, val);
1691 }
1692 
1693 /* Return true if OP may appear in the opcode stream.
1694    Test the runtime variable that controls each opcode.  */
1695 bool tcg_op_supported(TCGOpcode op)
1696 {
1697     const bool have_vec
1698         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1699 
1700     switch (op) {
1701     case INDEX_op_discard:
1702     case INDEX_op_set_label:
1703     case INDEX_op_call:
1704     case INDEX_op_br:
1705     case INDEX_op_mb:
1706     case INDEX_op_insn_start:
1707     case INDEX_op_exit_tb:
1708     case INDEX_op_goto_tb:
1709     case INDEX_op_goto_ptr:
1710     case INDEX_op_qemu_ld_i32:
1711     case INDEX_op_qemu_st_i32:
1712     case INDEX_op_qemu_ld_i64:
1713     case INDEX_op_qemu_st_i64:
1714         return true;
1715 
1716     case INDEX_op_qemu_st8_i32:
1717         return TCG_TARGET_HAS_qemu_st8_i32;
1718 
1719     case INDEX_op_mov_i32:
1720     case INDEX_op_setcond_i32:
1721     case INDEX_op_brcond_i32:
1722     case INDEX_op_ld8u_i32:
1723     case INDEX_op_ld8s_i32:
1724     case INDEX_op_ld16u_i32:
1725     case INDEX_op_ld16s_i32:
1726     case INDEX_op_ld_i32:
1727     case INDEX_op_st8_i32:
1728     case INDEX_op_st16_i32:
1729     case INDEX_op_st_i32:
1730     case INDEX_op_add_i32:
1731     case INDEX_op_sub_i32:
1732     case INDEX_op_mul_i32:
1733     case INDEX_op_and_i32:
1734     case INDEX_op_or_i32:
1735     case INDEX_op_xor_i32:
1736     case INDEX_op_shl_i32:
1737     case INDEX_op_shr_i32:
1738     case INDEX_op_sar_i32:
1739         return true;
1740 
1741     case INDEX_op_movcond_i32:
1742         return TCG_TARGET_HAS_movcond_i32;
1743     case INDEX_op_div_i32:
1744     case INDEX_op_divu_i32:
1745         return TCG_TARGET_HAS_div_i32;
1746     case INDEX_op_rem_i32:
1747     case INDEX_op_remu_i32:
1748         return TCG_TARGET_HAS_rem_i32;
1749     case INDEX_op_div2_i32:
1750     case INDEX_op_divu2_i32:
1751         return TCG_TARGET_HAS_div2_i32;
1752     case INDEX_op_rotl_i32:
1753     case INDEX_op_rotr_i32:
1754         return TCG_TARGET_HAS_rot_i32;
1755     case INDEX_op_deposit_i32:
1756         return TCG_TARGET_HAS_deposit_i32;
1757     case INDEX_op_extract_i32:
1758         return TCG_TARGET_HAS_extract_i32;
1759     case INDEX_op_sextract_i32:
1760         return TCG_TARGET_HAS_sextract_i32;
1761     case INDEX_op_extract2_i32:
1762         return TCG_TARGET_HAS_extract2_i32;
1763     case INDEX_op_add2_i32:
1764         return TCG_TARGET_HAS_add2_i32;
1765     case INDEX_op_sub2_i32:
1766         return TCG_TARGET_HAS_sub2_i32;
1767     case INDEX_op_mulu2_i32:
1768         return TCG_TARGET_HAS_mulu2_i32;
1769     case INDEX_op_muls2_i32:
1770         return TCG_TARGET_HAS_muls2_i32;
1771     case INDEX_op_muluh_i32:
1772         return TCG_TARGET_HAS_muluh_i32;
1773     case INDEX_op_mulsh_i32:
1774         return TCG_TARGET_HAS_mulsh_i32;
1775     case INDEX_op_ext8s_i32:
1776         return TCG_TARGET_HAS_ext8s_i32;
1777     case INDEX_op_ext16s_i32:
1778         return TCG_TARGET_HAS_ext16s_i32;
1779     case INDEX_op_ext8u_i32:
1780         return TCG_TARGET_HAS_ext8u_i32;
1781     case INDEX_op_ext16u_i32:
1782         return TCG_TARGET_HAS_ext16u_i32;
1783     case INDEX_op_bswap16_i32:
1784         return TCG_TARGET_HAS_bswap16_i32;
1785     case INDEX_op_bswap32_i32:
1786         return TCG_TARGET_HAS_bswap32_i32;
1787     case INDEX_op_not_i32:
1788         return TCG_TARGET_HAS_not_i32;
1789     case INDEX_op_neg_i32:
1790         return TCG_TARGET_HAS_neg_i32;
1791     case INDEX_op_andc_i32:
1792         return TCG_TARGET_HAS_andc_i32;
1793     case INDEX_op_orc_i32:
1794         return TCG_TARGET_HAS_orc_i32;
1795     case INDEX_op_eqv_i32:
1796         return TCG_TARGET_HAS_eqv_i32;
1797     case INDEX_op_nand_i32:
1798         return TCG_TARGET_HAS_nand_i32;
1799     case INDEX_op_nor_i32:
1800         return TCG_TARGET_HAS_nor_i32;
1801     case INDEX_op_clz_i32:
1802         return TCG_TARGET_HAS_clz_i32;
1803     case INDEX_op_ctz_i32:
1804         return TCG_TARGET_HAS_ctz_i32;
1805     case INDEX_op_ctpop_i32:
1806         return TCG_TARGET_HAS_ctpop_i32;
1807 
1808     case INDEX_op_brcond2_i32:
1809     case INDEX_op_setcond2_i32:
1810         return TCG_TARGET_REG_BITS == 32;
1811 
1812     case INDEX_op_mov_i64:
1813     case INDEX_op_setcond_i64:
1814     case INDEX_op_brcond_i64:
1815     case INDEX_op_ld8u_i64:
1816     case INDEX_op_ld8s_i64:
1817     case INDEX_op_ld16u_i64:
1818     case INDEX_op_ld16s_i64:
1819     case INDEX_op_ld32u_i64:
1820     case INDEX_op_ld32s_i64:
1821     case INDEX_op_ld_i64:
1822     case INDEX_op_st8_i64:
1823     case INDEX_op_st16_i64:
1824     case INDEX_op_st32_i64:
1825     case INDEX_op_st_i64:
1826     case INDEX_op_add_i64:
1827     case INDEX_op_sub_i64:
1828     case INDEX_op_mul_i64:
1829     case INDEX_op_and_i64:
1830     case INDEX_op_or_i64:
1831     case INDEX_op_xor_i64:
1832     case INDEX_op_shl_i64:
1833     case INDEX_op_shr_i64:
1834     case INDEX_op_sar_i64:
1835     case INDEX_op_ext_i32_i64:
1836     case INDEX_op_extu_i32_i64:
1837         return TCG_TARGET_REG_BITS == 64;
1838 
1839     case INDEX_op_movcond_i64:
1840         return TCG_TARGET_HAS_movcond_i64;
1841     case INDEX_op_div_i64:
1842     case INDEX_op_divu_i64:
1843         return TCG_TARGET_HAS_div_i64;
1844     case INDEX_op_rem_i64:
1845     case INDEX_op_remu_i64:
1846         return TCG_TARGET_HAS_rem_i64;
1847     case INDEX_op_div2_i64:
1848     case INDEX_op_divu2_i64:
1849         return TCG_TARGET_HAS_div2_i64;
1850     case INDEX_op_rotl_i64:
1851     case INDEX_op_rotr_i64:
1852         return TCG_TARGET_HAS_rot_i64;
1853     case INDEX_op_deposit_i64:
1854         return TCG_TARGET_HAS_deposit_i64;
1855     case INDEX_op_extract_i64:
1856         return TCG_TARGET_HAS_extract_i64;
1857     case INDEX_op_sextract_i64:
1858         return TCG_TARGET_HAS_sextract_i64;
1859     case INDEX_op_extract2_i64:
1860         return TCG_TARGET_HAS_extract2_i64;
1861     case INDEX_op_extrl_i64_i32:
1862         return TCG_TARGET_HAS_extrl_i64_i32;
1863     case INDEX_op_extrh_i64_i32:
1864         return TCG_TARGET_HAS_extrh_i64_i32;
1865     case INDEX_op_ext8s_i64:
1866         return TCG_TARGET_HAS_ext8s_i64;
1867     case INDEX_op_ext16s_i64:
1868         return TCG_TARGET_HAS_ext16s_i64;
1869     case INDEX_op_ext32s_i64:
1870         return TCG_TARGET_HAS_ext32s_i64;
1871     case INDEX_op_ext8u_i64:
1872         return TCG_TARGET_HAS_ext8u_i64;
1873     case INDEX_op_ext16u_i64:
1874         return TCG_TARGET_HAS_ext16u_i64;
1875     case INDEX_op_ext32u_i64:
1876         return TCG_TARGET_HAS_ext32u_i64;
1877     case INDEX_op_bswap16_i64:
1878         return TCG_TARGET_HAS_bswap16_i64;
1879     case INDEX_op_bswap32_i64:
1880         return TCG_TARGET_HAS_bswap32_i64;
1881     case INDEX_op_bswap64_i64:
1882         return TCG_TARGET_HAS_bswap64_i64;
1883     case INDEX_op_not_i64:
1884         return TCG_TARGET_HAS_not_i64;
1885     case INDEX_op_neg_i64:
1886         return TCG_TARGET_HAS_neg_i64;
1887     case INDEX_op_andc_i64:
1888         return TCG_TARGET_HAS_andc_i64;
1889     case INDEX_op_orc_i64:
1890         return TCG_TARGET_HAS_orc_i64;
1891     case INDEX_op_eqv_i64:
1892         return TCG_TARGET_HAS_eqv_i64;
1893     case INDEX_op_nand_i64:
1894         return TCG_TARGET_HAS_nand_i64;
1895     case INDEX_op_nor_i64:
1896         return TCG_TARGET_HAS_nor_i64;
1897     case INDEX_op_clz_i64:
1898         return TCG_TARGET_HAS_clz_i64;
1899     case INDEX_op_ctz_i64:
1900         return TCG_TARGET_HAS_ctz_i64;
1901     case INDEX_op_ctpop_i64:
1902         return TCG_TARGET_HAS_ctpop_i64;
1903     case INDEX_op_add2_i64:
1904         return TCG_TARGET_HAS_add2_i64;
1905     case INDEX_op_sub2_i64:
1906         return TCG_TARGET_HAS_sub2_i64;
1907     case INDEX_op_mulu2_i64:
1908         return TCG_TARGET_HAS_mulu2_i64;
1909     case INDEX_op_muls2_i64:
1910         return TCG_TARGET_HAS_muls2_i64;
1911     case INDEX_op_muluh_i64:
1912         return TCG_TARGET_HAS_muluh_i64;
1913     case INDEX_op_mulsh_i64:
1914         return TCG_TARGET_HAS_mulsh_i64;
1915 
1916     case INDEX_op_mov_vec:
1917     case INDEX_op_dup_vec:
1918     case INDEX_op_dupm_vec:
1919     case INDEX_op_ld_vec:
1920     case INDEX_op_st_vec:
1921     case INDEX_op_add_vec:
1922     case INDEX_op_sub_vec:
1923     case INDEX_op_and_vec:
1924     case INDEX_op_or_vec:
1925     case INDEX_op_xor_vec:
1926     case INDEX_op_cmp_vec:
1927         return have_vec;
1928     case INDEX_op_dup2_vec:
1929         return have_vec && TCG_TARGET_REG_BITS == 32;
1930     case INDEX_op_not_vec:
1931         return have_vec && TCG_TARGET_HAS_not_vec;
1932     case INDEX_op_neg_vec:
1933         return have_vec && TCG_TARGET_HAS_neg_vec;
1934     case INDEX_op_abs_vec:
1935         return have_vec && TCG_TARGET_HAS_abs_vec;
1936     case INDEX_op_andc_vec:
1937         return have_vec && TCG_TARGET_HAS_andc_vec;
1938     case INDEX_op_orc_vec:
1939         return have_vec && TCG_TARGET_HAS_orc_vec;
1940     case INDEX_op_nand_vec:
1941         return have_vec && TCG_TARGET_HAS_nand_vec;
1942     case INDEX_op_nor_vec:
1943         return have_vec && TCG_TARGET_HAS_nor_vec;
1944     case INDEX_op_eqv_vec:
1945         return have_vec && TCG_TARGET_HAS_eqv_vec;
1946     case INDEX_op_mul_vec:
1947         return have_vec && TCG_TARGET_HAS_mul_vec;
1948     case INDEX_op_shli_vec:
1949     case INDEX_op_shri_vec:
1950     case INDEX_op_sari_vec:
1951         return have_vec && TCG_TARGET_HAS_shi_vec;
1952     case INDEX_op_shls_vec:
1953     case INDEX_op_shrs_vec:
1954     case INDEX_op_sars_vec:
1955         return have_vec && TCG_TARGET_HAS_shs_vec;
1956     case INDEX_op_shlv_vec:
1957     case INDEX_op_shrv_vec:
1958     case INDEX_op_sarv_vec:
1959         return have_vec && TCG_TARGET_HAS_shv_vec;
1960     case INDEX_op_rotli_vec:
1961         return have_vec && TCG_TARGET_HAS_roti_vec;
1962     case INDEX_op_rotls_vec:
1963         return have_vec && TCG_TARGET_HAS_rots_vec;
1964     case INDEX_op_rotlv_vec:
1965     case INDEX_op_rotrv_vec:
1966         return have_vec && TCG_TARGET_HAS_rotv_vec;
1967     case INDEX_op_ssadd_vec:
1968     case INDEX_op_usadd_vec:
1969     case INDEX_op_sssub_vec:
1970     case INDEX_op_ussub_vec:
1971         return have_vec && TCG_TARGET_HAS_sat_vec;
1972     case INDEX_op_smin_vec:
1973     case INDEX_op_umin_vec:
1974     case INDEX_op_smax_vec:
1975     case INDEX_op_umax_vec:
1976         return have_vec && TCG_TARGET_HAS_minmax_vec;
1977     case INDEX_op_bitsel_vec:
1978         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1979     case INDEX_op_cmpsel_vec:
1980         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1981 
1982     default:
1983         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1984         return true;
1985     }
1986 }
1987 
1988 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
1989 
1990 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1991 {
1992     const TCGHelperInfo *info;
1993     TCGv_i64 extend_free[MAX_CALL_IARGS];
1994     int n_extend = 0;
1995     TCGOp *op;
1996     int i, n, pi = 0, total_args;
1997 
1998     info = g_hash_table_lookup(helper_table, (gpointer)func);
1999     total_args = info->nr_out + info->nr_in + 2;
2000     op = tcg_op_alloc(INDEX_op_call, total_args);
2001 
2002 #ifdef CONFIG_PLUGIN
2003     /* Flag helpers that may affect guest state */
2004     if (tcg_ctx->plugin_insn &&
2005         !(info->flags & TCG_CALL_PLUGIN) &&
2006         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2007         tcg_ctx->plugin_insn->calls_helpers = true;
2008     }
2009 #endif
2010 
2011     TCGOP_CALLO(op) = n = info->nr_out;
2012     switch (n) {
2013     case 0:
2014         tcg_debug_assert(ret == NULL);
2015         break;
2016     case 1:
2017         tcg_debug_assert(ret != NULL);
2018         op->args[pi++] = temp_arg(ret);
2019         break;
2020     case 2:
2021     case 4:
2022         tcg_debug_assert(ret != NULL);
2023         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2024         tcg_debug_assert(ret->temp_subindex == 0);
2025         for (i = 0; i < n; ++i) {
2026             op->args[pi++] = temp_arg(ret + i);
2027         }
2028         break;
2029     default:
2030         g_assert_not_reached();
2031     }
2032 
2033     TCGOP_CALLI(op) = n = info->nr_in;
2034     for (i = 0; i < n; i++) {
2035         const TCGCallArgumentLoc *loc = &info->in[i];
2036         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2037 
2038         switch (loc->kind) {
2039         case TCG_CALL_ARG_NORMAL:
2040         case TCG_CALL_ARG_BY_REF:
2041         case TCG_CALL_ARG_BY_REF_N:
2042             op->args[pi++] = temp_arg(ts);
2043             break;
2044 
2045         case TCG_CALL_ARG_EXTEND_U:
2046         case TCG_CALL_ARG_EXTEND_S:
2047             {
2048                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2049                 TCGv_i32 orig = temp_tcgv_i32(ts);
2050 
2051                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2052                     tcg_gen_ext_i32_i64(temp, orig);
2053                 } else {
2054                     tcg_gen_extu_i32_i64(temp, orig);
2055                 }
2056                 op->args[pi++] = tcgv_i64_arg(temp);
2057                 extend_free[n_extend++] = temp;
2058             }
2059             break;
2060 
2061         default:
2062             g_assert_not_reached();
2063         }
2064     }
2065     op->args[pi++] = (uintptr_t)func;
2066     op->args[pi++] = (uintptr_t)info;
2067     tcg_debug_assert(pi == total_args);
2068 
2069     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2070 
2071     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2072     for (i = 0; i < n_extend; ++i) {
2073         tcg_temp_free_i64(extend_free[i]);
2074     }
2075 }
2076 
2077 static void tcg_reg_alloc_start(TCGContext *s)
2078 {
2079     int i, n;
2080 
2081     for (i = 0, n = s->nb_temps; i < n; i++) {
2082         TCGTemp *ts = &s->temps[i];
2083         TCGTempVal val = TEMP_VAL_MEM;
2084 
2085         switch (ts->kind) {
2086         case TEMP_CONST:
2087             val = TEMP_VAL_CONST;
2088             break;
2089         case TEMP_FIXED:
2090             val = TEMP_VAL_REG;
2091             break;
2092         case TEMP_GLOBAL:
2093             break;
2094         case TEMP_EBB:
2095             val = TEMP_VAL_DEAD;
2096             /* fall through */
2097         case TEMP_TB:
2098             ts->mem_allocated = 0;
2099             break;
2100         default:
2101             g_assert_not_reached();
2102         }
2103         ts->val_type = val;
2104     }
2105 
2106     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2107 }
2108 
2109 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2110                                  TCGTemp *ts)
2111 {
2112     int idx = temp_idx(ts);
2113 
2114     switch (ts->kind) {
2115     case TEMP_FIXED:
2116     case TEMP_GLOBAL:
2117         pstrcpy(buf, buf_size, ts->name);
2118         break;
2119     case TEMP_TB:
2120         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2121         break;
2122     case TEMP_EBB:
2123         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2124         break;
2125     case TEMP_CONST:
2126         switch (ts->type) {
2127         case TCG_TYPE_I32:
2128             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2129             break;
2130 #if TCG_TARGET_REG_BITS > 32
2131         case TCG_TYPE_I64:
2132             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2133             break;
2134 #endif
2135         case TCG_TYPE_V64:
2136         case TCG_TYPE_V128:
2137         case TCG_TYPE_V256:
2138             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2139                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2140             break;
2141         default:
2142             g_assert_not_reached();
2143         }
2144         break;
2145     }
2146     return buf;
2147 }
2148 
2149 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2150                              int buf_size, TCGArg arg)
2151 {
2152     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2153 }
2154 
2155 static const char * const cond_name[] =
2156 {
2157     [TCG_COND_NEVER] = "never",
2158     [TCG_COND_ALWAYS] = "always",
2159     [TCG_COND_EQ] = "eq",
2160     [TCG_COND_NE] = "ne",
2161     [TCG_COND_LT] = "lt",
2162     [TCG_COND_GE] = "ge",
2163     [TCG_COND_LE] = "le",
2164     [TCG_COND_GT] = "gt",
2165     [TCG_COND_LTU] = "ltu",
2166     [TCG_COND_GEU] = "geu",
2167     [TCG_COND_LEU] = "leu",
2168     [TCG_COND_GTU] = "gtu"
2169 };
2170 
2171 static const char * const ldst_name[] =
2172 {
2173     [MO_UB]   = "ub",
2174     [MO_SB]   = "sb",
2175     [MO_LEUW] = "leuw",
2176     [MO_LESW] = "lesw",
2177     [MO_LEUL] = "leul",
2178     [MO_LESL] = "lesl",
2179     [MO_LEUQ] = "leq",
2180     [MO_BEUW] = "beuw",
2181     [MO_BESW] = "besw",
2182     [MO_BEUL] = "beul",
2183     [MO_BESL] = "besl",
2184     [MO_BEUQ] = "beq",
2185 };
2186 
2187 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2188     [MO_UNALN >> MO_ASHIFT]    = "un+",
2189     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2190     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2191     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2192     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2193     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2194     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2195     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2196 };
2197 
2198 static const char bswap_flag_name[][6] = {
2199     [TCG_BSWAP_IZ] = "iz",
2200     [TCG_BSWAP_OZ] = "oz",
2201     [TCG_BSWAP_OS] = "os",
2202     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2203     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2204 };
2205 
2206 static inline bool tcg_regset_single(TCGRegSet d)
2207 {
2208     return (d & (d - 1)) == 0;
2209 }
2210 
2211 static inline TCGReg tcg_regset_first(TCGRegSet d)
2212 {
2213     if (TCG_TARGET_NB_REGS <= 32) {
2214         return ctz32(d);
2215     } else {
2216         return ctz64(d);
2217     }
2218 }
2219 
2220 /* Return only the number of characters output -- no error return. */
2221 #define ne_fprintf(...) \
2222     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2223 
2224 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2225 {
2226     char buf[128];
2227     TCGOp *op;
2228 
2229     QTAILQ_FOREACH(op, &s->ops, link) {
2230         int i, k, nb_oargs, nb_iargs, nb_cargs;
2231         const TCGOpDef *def;
2232         TCGOpcode c;
2233         int col = 0;
2234 
2235         c = op->opc;
2236         def = &tcg_op_defs[c];
2237 
2238         if (c == INDEX_op_insn_start) {
2239             nb_oargs = 0;
2240             col += ne_fprintf(f, "\n ----");
2241 
2242             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2243                 target_ulong a;
2244 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2245                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2246 #else
2247                 a = op->args[i];
2248 #endif
2249                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
2250             }
2251         } else if (c == INDEX_op_call) {
2252             const TCGHelperInfo *info = tcg_call_info(op);
2253             void *func = tcg_call_func(op);
2254 
2255             /* variable number of arguments */
2256             nb_oargs = TCGOP_CALLO(op);
2257             nb_iargs = TCGOP_CALLI(op);
2258             nb_cargs = def->nb_cargs;
2259 
2260             col += ne_fprintf(f, " %s ", def->name);
2261 
2262             /*
2263              * Print the function name from TCGHelperInfo, if available.
2264              * Note that plugins have a template function for the info,
2265              * but the actual function pointer comes from the plugin.
2266              */
2267             if (func == info->func) {
2268                 col += ne_fprintf(f, "%s", info->name);
2269             } else {
2270                 col += ne_fprintf(f, "plugin(%p)", func);
2271             }
2272 
2273             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2274             for (i = 0; i < nb_oargs; i++) {
2275                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2276                                                             op->args[i]));
2277             }
2278             for (i = 0; i < nb_iargs; i++) {
2279                 TCGArg arg = op->args[nb_oargs + i];
2280                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2281                 col += ne_fprintf(f, ",%s", t);
2282             }
2283         } else {
2284             col += ne_fprintf(f, " %s ", def->name);
2285 
2286             nb_oargs = def->nb_oargs;
2287             nb_iargs = def->nb_iargs;
2288             nb_cargs = def->nb_cargs;
2289 
2290             if (def->flags & TCG_OPF_VECTOR) {
2291                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2292                                   8 << TCGOP_VECE(op));
2293             }
2294 
2295             k = 0;
2296             for (i = 0; i < nb_oargs; i++) {
2297                 const char *sep =  k ? "," : "";
2298                 col += ne_fprintf(f, "%s%s", sep,
2299                                   tcg_get_arg_str(s, buf, sizeof(buf),
2300                                                   op->args[k++]));
2301             }
2302             for (i = 0; i < nb_iargs; i++) {
2303                 const char *sep =  k ? "," : "";
2304                 col += ne_fprintf(f, "%s%s", sep,
2305                                   tcg_get_arg_str(s, buf, sizeof(buf),
2306                                                   op->args[k++]));
2307             }
2308             switch (c) {
2309             case INDEX_op_brcond_i32:
2310             case INDEX_op_setcond_i32:
2311             case INDEX_op_movcond_i32:
2312             case INDEX_op_brcond2_i32:
2313             case INDEX_op_setcond2_i32:
2314             case INDEX_op_brcond_i64:
2315             case INDEX_op_setcond_i64:
2316             case INDEX_op_movcond_i64:
2317             case INDEX_op_cmp_vec:
2318             case INDEX_op_cmpsel_vec:
2319                 if (op->args[k] < ARRAY_SIZE(cond_name)
2320                     && cond_name[op->args[k]]) {
2321                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2322                 } else {
2323                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2324                 }
2325                 i = 1;
2326                 break;
2327             case INDEX_op_qemu_ld_i32:
2328             case INDEX_op_qemu_st_i32:
2329             case INDEX_op_qemu_st8_i32:
2330             case INDEX_op_qemu_ld_i64:
2331             case INDEX_op_qemu_st_i64:
2332                 {
2333                     MemOpIdx oi = op->args[k++];
2334                     MemOp op = get_memop(oi);
2335                     unsigned ix = get_mmuidx(oi);
2336 
2337                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2338                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2339                     } else {
2340                         const char *s_al, *s_op;
2341                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2342                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2343                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
2344                     }
2345                     i = 1;
2346                 }
2347                 break;
2348             case INDEX_op_bswap16_i32:
2349             case INDEX_op_bswap16_i64:
2350             case INDEX_op_bswap32_i32:
2351             case INDEX_op_bswap32_i64:
2352             case INDEX_op_bswap64_i64:
2353                 {
2354                     TCGArg flags = op->args[k];
2355                     const char *name = NULL;
2356 
2357                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2358                         name = bswap_flag_name[flags];
2359                     }
2360                     if (name) {
2361                         col += ne_fprintf(f, ",%s", name);
2362                     } else {
2363                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2364                     }
2365                     i = k = 1;
2366                 }
2367                 break;
2368             default:
2369                 i = 0;
2370                 break;
2371             }
2372             switch (c) {
2373             case INDEX_op_set_label:
2374             case INDEX_op_br:
2375             case INDEX_op_brcond_i32:
2376             case INDEX_op_brcond_i64:
2377             case INDEX_op_brcond2_i32:
2378                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2379                                   arg_label(op->args[k])->id);
2380                 i++, k++;
2381                 break;
2382             case INDEX_op_mb:
2383                 {
2384                     TCGBar membar = op->args[k];
2385                     const char *b_op, *m_op;
2386 
2387                     switch (membar & TCG_BAR_SC) {
2388                     case 0:
2389                         b_op = "none";
2390                         break;
2391                     case TCG_BAR_LDAQ:
2392                         b_op = "acq";
2393                         break;
2394                     case TCG_BAR_STRL:
2395                         b_op = "rel";
2396                         break;
2397                     case TCG_BAR_SC:
2398                         b_op = "seq";
2399                         break;
2400                     default:
2401                         g_assert_not_reached();
2402                     }
2403 
2404                     switch (membar & TCG_MO_ALL) {
2405                     case 0:
2406                         m_op = "none";
2407                         break;
2408                     case TCG_MO_LD_LD:
2409                         m_op = "rr";
2410                         break;
2411                     case TCG_MO_LD_ST:
2412                         m_op = "rw";
2413                         break;
2414                     case TCG_MO_ST_LD:
2415                         m_op = "wr";
2416                         break;
2417                     case TCG_MO_ST_ST:
2418                         m_op = "ww";
2419                         break;
2420                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2421                         m_op = "rr+rw";
2422                         break;
2423                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2424                         m_op = "rr+wr";
2425                         break;
2426                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2427                         m_op = "rr+ww";
2428                         break;
2429                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2430                         m_op = "rw+wr";
2431                         break;
2432                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2433                         m_op = "rw+ww";
2434                         break;
2435                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2436                         m_op = "wr+ww";
2437                         break;
2438                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2439                         m_op = "rr+rw+wr";
2440                         break;
2441                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2442                         m_op = "rr+rw+ww";
2443                         break;
2444                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2445                         m_op = "rr+wr+ww";
2446                         break;
2447                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2448                         m_op = "rw+wr+ww";
2449                         break;
2450                     case TCG_MO_ALL:
2451                         m_op = "all";
2452                         break;
2453                     default:
2454                         g_assert_not_reached();
2455                     }
2456 
2457                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2458                     i++, k++;
2459                 }
2460                 break;
2461             default:
2462                 break;
2463             }
2464             for (; i < nb_cargs; i++, k++) {
2465                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2466                                   op->args[k]);
2467             }
2468         }
2469 
2470         if (have_prefs || op->life) {
2471             for (; col < 40; ++col) {
2472                 putc(' ', f);
2473             }
2474         }
2475 
2476         if (op->life) {
2477             unsigned life = op->life;
2478 
2479             if (life & (SYNC_ARG * 3)) {
2480                 ne_fprintf(f, "  sync:");
2481                 for (i = 0; i < 2; ++i) {
2482                     if (life & (SYNC_ARG << i)) {
2483                         ne_fprintf(f, " %d", i);
2484                     }
2485                 }
2486             }
2487             life /= DEAD_ARG;
2488             if (life) {
2489                 ne_fprintf(f, "  dead:");
2490                 for (i = 0; life; ++i, life >>= 1) {
2491                     if (life & 1) {
2492                         ne_fprintf(f, " %d", i);
2493                     }
2494                 }
2495             }
2496         }
2497 
2498         if (have_prefs) {
2499             for (i = 0; i < nb_oargs; ++i) {
2500                 TCGRegSet set = output_pref(op, i);
2501 
2502                 if (i == 0) {
2503                     ne_fprintf(f, "  pref=");
2504                 } else {
2505                     ne_fprintf(f, ",");
2506                 }
2507                 if (set == 0) {
2508                     ne_fprintf(f, "none");
2509                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2510                     ne_fprintf(f, "all");
2511 #ifdef CONFIG_DEBUG_TCG
2512                 } else if (tcg_regset_single(set)) {
2513                     TCGReg reg = tcg_regset_first(set);
2514                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2515 #endif
2516                 } else if (TCG_TARGET_NB_REGS <= 32) {
2517                     ne_fprintf(f, "0x%x", (uint32_t)set);
2518                 } else {
2519                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2520                 }
2521             }
2522         }
2523 
2524         putc('\n', f);
2525     }
2526 }
2527 
2528 /* we give more priority to constraints with less registers */
2529 static int get_constraint_priority(const TCGOpDef *def, int k)
2530 {
2531     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2532     int n = ctpop64(arg_ct->regs);
2533 
2534     /*
2535      * Sort constraints of a single register first, which includes output
2536      * aliases (which must exactly match the input already allocated).
2537      */
2538     if (n == 1 || arg_ct->oalias) {
2539         return INT_MAX;
2540     }
2541 
2542     /*
2543      * Sort register pairs next, first then second immediately after.
2544      * Arbitrarily sort multiple pairs by the index of the first reg;
2545      * there shouldn't be many pairs.
2546      */
2547     switch (arg_ct->pair) {
2548     case 1:
2549     case 3:
2550         return (k + 1) * 2;
2551     case 2:
2552         return (arg_ct->pair_index + 1) * 2 - 1;
2553     }
2554 
2555     /* Finally, sort by decreasing register count. */
2556     assert(n > 1);
2557     return -n;
2558 }
2559 
2560 /* sort from highest priority to lowest */
2561 static void sort_constraints(TCGOpDef *def, int start, int n)
2562 {
2563     int i, j;
2564     TCGArgConstraint *a = def->args_ct;
2565 
2566     for (i = 0; i < n; i++) {
2567         a[start + i].sort_index = start + i;
2568     }
2569     if (n <= 1) {
2570         return;
2571     }
2572     for (i = 0; i < n - 1; i++) {
2573         for (j = i + 1; j < n; j++) {
2574             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2575             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2576             if (p1 < p2) {
2577                 int tmp = a[start + i].sort_index;
2578                 a[start + i].sort_index = a[start + j].sort_index;
2579                 a[start + j].sort_index = tmp;
2580             }
2581         }
2582     }
2583 }
2584 
2585 static void process_op_defs(TCGContext *s)
2586 {
2587     TCGOpcode op;
2588 
2589     for (op = 0; op < NB_OPS; op++) {
2590         TCGOpDef *def = &tcg_op_defs[op];
2591         const TCGTargetOpDef *tdefs;
2592         bool saw_alias_pair = false;
2593         int i, o, i2, o2, nb_args;
2594 
2595         if (def->flags & TCG_OPF_NOT_PRESENT) {
2596             continue;
2597         }
2598 
2599         nb_args = def->nb_iargs + def->nb_oargs;
2600         if (nb_args == 0) {
2601             continue;
2602         }
2603 
2604         /*
2605          * Macro magic should make it impossible, but double-check that
2606          * the array index is in range.  Since the signness of an enum
2607          * is implementation defined, force the result to unsigned.
2608          */
2609         unsigned con_set = tcg_target_op_def(op);
2610         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2611         tdefs = &constraint_sets[con_set];
2612 
2613         for (i = 0; i < nb_args; i++) {
2614             const char *ct_str = tdefs->args_ct_str[i];
2615             bool input_p = i >= def->nb_oargs;
2616 
2617             /* Incomplete TCGTargetOpDef entry. */
2618             tcg_debug_assert(ct_str != NULL);
2619 
2620             switch (*ct_str) {
2621             case '0' ... '9':
2622                 o = *ct_str - '0';
2623                 tcg_debug_assert(input_p);
2624                 tcg_debug_assert(o < def->nb_oargs);
2625                 tcg_debug_assert(def->args_ct[o].regs != 0);
2626                 tcg_debug_assert(!def->args_ct[o].oalias);
2627                 def->args_ct[i] = def->args_ct[o];
2628                 /* The output sets oalias.  */
2629                 def->args_ct[o].oalias = 1;
2630                 def->args_ct[o].alias_index = i;
2631                 /* The input sets ialias. */
2632                 def->args_ct[i].ialias = 1;
2633                 def->args_ct[i].alias_index = o;
2634                 if (def->args_ct[i].pair) {
2635                     saw_alias_pair = true;
2636                 }
2637                 tcg_debug_assert(ct_str[1] == '\0');
2638                 continue;
2639 
2640             case '&':
2641                 tcg_debug_assert(!input_p);
2642                 def->args_ct[i].newreg = true;
2643                 ct_str++;
2644                 break;
2645 
2646             case 'p': /* plus */
2647                 /* Allocate to the register after the previous. */
2648                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2649                 o = i - 1;
2650                 tcg_debug_assert(!def->args_ct[o].pair);
2651                 tcg_debug_assert(!def->args_ct[o].ct);
2652                 def->args_ct[i] = (TCGArgConstraint){
2653                     .pair = 2,
2654                     .pair_index = o,
2655                     .regs = def->args_ct[o].regs << 1,
2656                 };
2657                 def->args_ct[o].pair = 1;
2658                 def->args_ct[o].pair_index = i;
2659                 tcg_debug_assert(ct_str[1] == '\0');
2660                 continue;
2661 
2662             case 'm': /* minus */
2663                 /* Allocate to the register before the previous. */
2664                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2665                 o = i - 1;
2666                 tcg_debug_assert(!def->args_ct[o].pair);
2667                 tcg_debug_assert(!def->args_ct[o].ct);
2668                 def->args_ct[i] = (TCGArgConstraint){
2669                     .pair = 1,
2670                     .pair_index = o,
2671                     .regs = def->args_ct[o].regs >> 1,
2672                 };
2673                 def->args_ct[o].pair = 2;
2674                 def->args_ct[o].pair_index = i;
2675                 tcg_debug_assert(ct_str[1] == '\0');
2676                 continue;
2677             }
2678 
2679             do {
2680                 switch (*ct_str) {
2681                 case 'i':
2682                     def->args_ct[i].ct |= TCG_CT_CONST;
2683                     break;
2684 
2685                 /* Include all of the target-specific constraints. */
2686 
2687 #undef CONST
2688 #define CONST(CASE, MASK) \
2689     case CASE: def->args_ct[i].ct |= MASK; break;
2690 #define REGS(CASE, MASK) \
2691     case CASE: def->args_ct[i].regs |= MASK; break;
2692 
2693 #include "tcg-target-con-str.h"
2694 
2695 #undef REGS
2696 #undef CONST
2697                 default:
2698                 case '0' ... '9':
2699                 case '&':
2700                 case 'p':
2701                 case 'm':
2702                     /* Typo in TCGTargetOpDef constraint. */
2703                     g_assert_not_reached();
2704                 }
2705             } while (*++ct_str != '\0');
2706         }
2707 
2708         /* TCGTargetOpDef entry with too much information? */
2709         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2710 
2711         /*
2712          * Fix up output pairs that are aliased with inputs.
2713          * When we created the alias, we copied pair from the output.
2714          * There are three cases:
2715          *    (1a) Pairs of inputs alias pairs of outputs.
2716          *    (1b) One input aliases the first of a pair of outputs.
2717          *    (2)  One input aliases the second of a pair of outputs.
2718          *
2719          * Case 1a is handled by making sure that the pair_index'es are
2720          * properly updated so that they appear the same as a pair of inputs.
2721          *
2722          * Case 1b is handled by setting the pair_index of the input to
2723          * itself, simply so it doesn't point to an unrelated argument.
2724          * Since we don't encounter the "second" during the input allocation
2725          * phase, nothing happens with the second half of the input pair.
2726          *
2727          * Case 2 is handled by setting the second input to pair=3, the
2728          * first output to pair=3, and the pair_index'es to match.
2729          */
2730         if (saw_alias_pair) {
2731             for (i = def->nb_oargs; i < nb_args; i++) {
2732                 /*
2733                  * Since [0-9pm] must be alone in the constraint string,
2734                  * the only way they can both be set is if the pair comes
2735                  * from the output alias.
2736                  */
2737                 if (!def->args_ct[i].ialias) {
2738                     continue;
2739                 }
2740                 switch (def->args_ct[i].pair) {
2741                 case 0:
2742                     break;
2743                 case 1:
2744                     o = def->args_ct[i].alias_index;
2745                     o2 = def->args_ct[o].pair_index;
2746                     tcg_debug_assert(def->args_ct[o].pair == 1);
2747                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2748                     if (def->args_ct[o2].oalias) {
2749                         /* Case 1a */
2750                         i2 = def->args_ct[o2].alias_index;
2751                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2752                         def->args_ct[i2].pair_index = i;
2753                         def->args_ct[i].pair_index = i2;
2754                     } else {
2755                         /* Case 1b */
2756                         def->args_ct[i].pair_index = i;
2757                     }
2758                     break;
2759                 case 2:
2760                     o = def->args_ct[i].alias_index;
2761                     o2 = def->args_ct[o].pair_index;
2762                     tcg_debug_assert(def->args_ct[o].pair == 2);
2763                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2764                     if (def->args_ct[o2].oalias) {
2765                         /* Case 1a */
2766                         i2 = def->args_ct[o2].alias_index;
2767                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2768                         def->args_ct[i2].pair_index = i;
2769                         def->args_ct[i].pair_index = i2;
2770                     } else {
2771                         /* Case 2 */
2772                         def->args_ct[i].pair = 3;
2773                         def->args_ct[o2].pair = 3;
2774                         def->args_ct[i].pair_index = o2;
2775                         def->args_ct[o2].pair_index = i;
2776                     }
2777                     break;
2778                 default:
2779                     g_assert_not_reached();
2780                 }
2781             }
2782         }
2783 
2784         /* sort the constraints (XXX: this is just an heuristic) */
2785         sort_constraints(def, 0, def->nb_oargs);
2786         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2787     }
2788 }
2789 
2790 static void remove_label_use(TCGOp *op, int idx)
2791 {
2792     TCGLabel *label = arg_label(op->args[idx]);
2793     TCGLabelUse *use;
2794 
2795     QSIMPLEQ_FOREACH(use, &label->branches, next) {
2796         if (use->op == op) {
2797             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
2798             return;
2799         }
2800     }
2801     g_assert_not_reached();
2802 }
2803 
2804 void tcg_op_remove(TCGContext *s, TCGOp *op)
2805 {
2806     switch (op->opc) {
2807     case INDEX_op_br:
2808         remove_label_use(op, 0);
2809         break;
2810     case INDEX_op_brcond_i32:
2811     case INDEX_op_brcond_i64:
2812         remove_label_use(op, 3);
2813         break;
2814     case INDEX_op_brcond2_i32:
2815         remove_label_use(op, 5);
2816         break;
2817     default:
2818         break;
2819     }
2820 
2821     QTAILQ_REMOVE(&s->ops, op, link);
2822     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2823     s->nb_ops--;
2824 
2825 #ifdef CONFIG_PROFILER
2826     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2827 #endif
2828 }
2829 
2830 void tcg_remove_ops_after(TCGOp *op)
2831 {
2832     TCGContext *s = tcg_ctx;
2833 
2834     while (true) {
2835         TCGOp *last = tcg_last_op();
2836         if (last == op) {
2837             return;
2838         }
2839         tcg_op_remove(s, last);
2840     }
2841 }
2842 
2843 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2844 {
2845     TCGContext *s = tcg_ctx;
2846     TCGOp *op = NULL;
2847 
2848     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2849         QTAILQ_FOREACH(op, &s->free_ops, link) {
2850             if (nargs <= op->nargs) {
2851                 QTAILQ_REMOVE(&s->free_ops, op, link);
2852                 nargs = op->nargs;
2853                 goto found;
2854             }
2855         }
2856     }
2857 
2858     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2859     nargs = MAX(4, nargs);
2860     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2861 
2862  found:
2863     memset(op, 0, offsetof(TCGOp, link));
2864     op->opc = opc;
2865     op->nargs = nargs;
2866 
2867     /* Check for bitfield overflow. */
2868     tcg_debug_assert(op->nargs == nargs);
2869 
2870     s->nb_ops++;
2871     return op;
2872 }
2873 
2874 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2875 {
2876     TCGOp *op = tcg_op_alloc(opc, nargs);
2877     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2878     return op;
2879 }
2880 
2881 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2882                             TCGOpcode opc, unsigned nargs)
2883 {
2884     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2885     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2886     return new_op;
2887 }
2888 
2889 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2890                            TCGOpcode opc, unsigned nargs)
2891 {
2892     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2893     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2894     return new_op;
2895 }
2896 
2897 static void move_label_uses(TCGLabel *to, TCGLabel *from)
2898 {
2899     TCGLabelUse *u;
2900 
2901     QSIMPLEQ_FOREACH(u, &from->branches, next) {
2902         TCGOp *op = u->op;
2903         switch (op->opc) {
2904         case INDEX_op_br:
2905             op->args[0] = label_arg(to);
2906             break;
2907         case INDEX_op_brcond_i32:
2908         case INDEX_op_brcond_i64:
2909             op->args[3] = label_arg(to);
2910             break;
2911         case INDEX_op_brcond2_i32:
2912             op->args[5] = label_arg(to);
2913             break;
2914         default:
2915             g_assert_not_reached();
2916         }
2917     }
2918 
2919     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
2920 }
2921 
2922 /* Reachable analysis : remove unreachable code.  */
2923 static void __attribute__((noinline))
2924 reachable_code_pass(TCGContext *s)
2925 {
2926     TCGOp *op, *op_next, *op_prev;
2927     bool dead = false;
2928 
2929     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2930         bool remove = dead;
2931         TCGLabel *label;
2932 
2933         switch (op->opc) {
2934         case INDEX_op_set_label:
2935             label = arg_label(op->args[0]);
2936 
2937             /*
2938              * Note that the first op in the TB is always a load,
2939              * so there is always something before a label.
2940              */
2941             op_prev = QTAILQ_PREV(op, link);
2942 
2943             /*
2944              * If we find two sequential labels, move all branches to
2945              * reference the second label and remove the first label.
2946              * Do this before branch to next optimization, so that the
2947              * middle label is out of the way.
2948              */
2949             if (op_prev->opc == INDEX_op_set_label) {
2950                 move_label_uses(label, arg_label(op_prev->args[0]));
2951                 tcg_op_remove(s, op_prev);
2952                 op_prev = QTAILQ_PREV(op, link);
2953             }
2954 
2955             /*
2956              * Optimization can fold conditional branches to unconditional.
2957              * If we find a label which is preceded by an unconditional
2958              * branch to next, remove the branch.  We couldn't do this when
2959              * processing the branch because any dead code between the branch
2960              * and label had not yet been removed.
2961              */
2962             if (op_prev->opc == INDEX_op_br &&
2963                 label == arg_label(op_prev->args[0])) {
2964                 tcg_op_remove(s, op_prev);
2965                 /* Fall through means insns become live again.  */
2966                 dead = false;
2967             }
2968 
2969             if (QSIMPLEQ_EMPTY(&label->branches)) {
2970                 /*
2971                  * While there is an occasional backward branch, virtually
2972                  * all branches generated by the translators are forward.
2973                  * Which means that generally we will have already removed
2974                  * all references to the label that will be, and there is
2975                  * little to be gained by iterating.
2976                  */
2977                 remove = true;
2978             } else {
2979                 /* Once we see a label, insns become live again.  */
2980                 dead = false;
2981                 remove = false;
2982             }
2983             break;
2984 
2985         case INDEX_op_br:
2986         case INDEX_op_exit_tb:
2987         case INDEX_op_goto_ptr:
2988             /* Unconditional branches; everything following is dead.  */
2989             dead = true;
2990             break;
2991 
2992         case INDEX_op_call:
2993             /* Notice noreturn helper calls, raising exceptions.  */
2994             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2995                 dead = true;
2996             }
2997             break;
2998 
2999         case INDEX_op_insn_start:
3000             /* Never remove -- we need to keep these for unwind.  */
3001             remove = false;
3002             break;
3003 
3004         default:
3005             break;
3006         }
3007 
3008         if (remove) {
3009             tcg_op_remove(s, op);
3010         }
3011     }
3012 }
3013 
3014 #define TS_DEAD  1
3015 #define TS_MEM   2
3016 
3017 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3018 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3019 
3020 /* For liveness_pass_1, the register preferences for a given temp.  */
3021 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3022 {
3023     return ts->state_ptr;
3024 }
3025 
3026 /* For liveness_pass_1, reset the preferences for a given temp to the
3027  * maximal regset for its type.
3028  */
3029 static inline void la_reset_pref(TCGTemp *ts)
3030 {
3031     *la_temp_pref(ts)
3032         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3033 }
3034 
3035 /* liveness analysis: end of function: all temps are dead, and globals
3036    should be in memory. */
3037 static void la_func_end(TCGContext *s, int ng, int nt)
3038 {
3039     int i;
3040 
3041     for (i = 0; i < ng; ++i) {
3042         s->temps[i].state = TS_DEAD | TS_MEM;
3043         la_reset_pref(&s->temps[i]);
3044     }
3045     for (i = ng; i < nt; ++i) {
3046         s->temps[i].state = TS_DEAD;
3047         la_reset_pref(&s->temps[i]);
3048     }
3049 }
3050 
3051 /* liveness analysis: end of basic block: all temps are dead, globals
3052    and local temps should be in memory. */
3053 static void la_bb_end(TCGContext *s, int ng, int nt)
3054 {
3055     int i;
3056 
3057     for (i = 0; i < nt; ++i) {
3058         TCGTemp *ts = &s->temps[i];
3059         int state;
3060 
3061         switch (ts->kind) {
3062         case TEMP_FIXED:
3063         case TEMP_GLOBAL:
3064         case TEMP_TB:
3065             state = TS_DEAD | TS_MEM;
3066             break;
3067         case TEMP_EBB:
3068         case TEMP_CONST:
3069             state = TS_DEAD;
3070             break;
3071         default:
3072             g_assert_not_reached();
3073         }
3074         ts->state = state;
3075         la_reset_pref(ts);
3076     }
3077 }
3078 
3079 /* liveness analysis: sync globals back to memory.  */
3080 static void la_global_sync(TCGContext *s, int ng)
3081 {
3082     int i;
3083 
3084     for (i = 0; i < ng; ++i) {
3085         int state = s->temps[i].state;
3086         s->temps[i].state = state | TS_MEM;
3087         if (state == TS_DEAD) {
3088             /* If the global was previously dead, reset prefs.  */
3089             la_reset_pref(&s->temps[i]);
3090         }
3091     }
3092 }
3093 
3094 /*
3095  * liveness analysis: conditional branch: all temps are dead unless
3096  * explicitly live-across-conditional-branch, globals and local temps
3097  * should be synced.
3098  */
3099 static void la_bb_sync(TCGContext *s, int ng, int nt)
3100 {
3101     la_global_sync(s, ng);
3102 
3103     for (int i = ng; i < nt; ++i) {
3104         TCGTemp *ts = &s->temps[i];
3105         int state;
3106 
3107         switch (ts->kind) {
3108         case TEMP_TB:
3109             state = ts->state;
3110             ts->state = state | TS_MEM;
3111             if (state != TS_DEAD) {
3112                 continue;
3113             }
3114             break;
3115         case TEMP_EBB:
3116         case TEMP_CONST:
3117             continue;
3118         default:
3119             g_assert_not_reached();
3120         }
3121         la_reset_pref(&s->temps[i]);
3122     }
3123 }
3124 
3125 /* liveness analysis: sync globals back to memory and kill.  */
3126 static void la_global_kill(TCGContext *s, int ng)
3127 {
3128     int i;
3129 
3130     for (i = 0; i < ng; i++) {
3131         s->temps[i].state = TS_DEAD | TS_MEM;
3132         la_reset_pref(&s->temps[i]);
3133     }
3134 }
3135 
3136 /* liveness analysis: note live globals crossing calls.  */
3137 static void la_cross_call(TCGContext *s, int nt)
3138 {
3139     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3140     int i;
3141 
3142     for (i = 0; i < nt; i++) {
3143         TCGTemp *ts = &s->temps[i];
3144         if (!(ts->state & TS_DEAD)) {
3145             TCGRegSet *pset = la_temp_pref(ts);
3146             TCGRegSet set = *pset;
3147 
3148             set &= mask;
3149             /* If the combination is not possible, restart.  */
3150             if (set == 0) {
3151                 set = tcg_target_available_regs[ts->type] & mask;
3152             }
3153             *pset = set;
3154         }
3155     }
3156 }
3157 
3158 /*
3159  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3160  * to TEMP_EBB, if possible.
3161  */
3162 static void __attribute__((noinline))
3163 liveness_pass_0(TCGContext *s)
3164 {
3165     void * const multiple_ebb = (void *)(uintptr_t)-1;
3166     int nb_temps = s->nb_temps;
3167     TCGOp *op, *ebb;
3168 
3169     for (int i = s->nb_globals; i < nb_temps; ++i) {
3170         s->temps[i].state_ptr = NULL;
3171     }
3172 
3173     /*
3174      * Represent each EBB by the op at which it begins.  In the case of
3175      * the first EBB, this is the first op, otherwise it is a label.
3176      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3177      * within a single EBB, else MULTIPLE_EBB.
3178      */
3179     ebb = QTAILQ_FIRST(&s->ops);
3180     QTAILQ_FOREACH(op, &s->ops, link) {
3181         const TCGOpDef *def;
3182         int nb_oargs, nb_iargs;
3183 
3184         switch (op->opc) {
3185         case INDEX_op_set_label:
3186             ebb = op;
3187             continue;
3188         case INDEX_op_discard:
3189             continue;
3190         case INDEX_op_call:
3191             nb_oargs = TCGOP_CALLO(op);
3192             nb_iargs = TCGOP_CALLI(op);
3193             break;
3194         default:
3195             def = &tcg_op_defs[op->opc];
3196             nb_oargs = def->nb_oargs;
3197             nb_iargs = def->nb_iargs;
3198             break;
3199         }
3200 
3201         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3202             TCGTemp *ts = arg_temp(op->args[i]);
3203 
3204             if (ts->kind != TEMP_TB) {
3205                 continue;
3206             }
3207             if (ts->state_ptr == NULL) {
3208                 ts->state_ptr = ebb;
3209             } else if (ts->state_ptr != ebb) {
3210                 ts->state_ptr = multiple_ebb;
3211             }
3212         }
3213     }
3214 
3215     /*
3216      * For TEMP_TB that turned out not to be used beyond one EBB,
3217      * reduce the liveness to TEMP_EBB.
3218      */
3219     for (int i = s->nb_globals; i < nb_temps; ++i) {
3220         TCGTemp *ts = &s->temps[i];
3221         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3222             ts->kind = TEMP_EBB;
3223         }
3224     }
3225 }
3226 
3227 /* Liveness analysis : update the opc_arg_life array to tell if a
3228    given input arguments is dead. Instructions updating dead
3229    temporaries are removed. */
3230 static void __attribute__((noinline))
3231 liveness_pass_1(TCGContext *s)
3232 {
3233     int nb_globals = s->nb_globals;
3234     int nb_temps = s->nb_temps;
3235     TCGOp *op, *op_prev;
3236     TCGRegSet *prefs;
3237     int i;
3238 
3239     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3240     for (i = 0; i < nb_temps; ++i) {
3241         s->temps[i].state_ptr = prefs + i;
3242     }
3243 
3244     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3245     la_func_end(s, nb_globals, nb_temps);
3246 
3247     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3248         int nb_iargs, nb_oargs;
3249         TCGOpcode opc_new, opc_new2;
3250         bool have_opc_new2;
3251         TCGLifeData arg_life = 0;
3252         TCGTemp *ts;
3253         TCGOpcode opc = op->opc;
3254         const TCGOpDef *def = &tcg_op_defs[opc];
3255 
3256         switch (opc) {
3257         case INDEX_op_call:
3258             {
3259                 const TCGHelperInfo *info = tcg_call_info(op);
3260                 int call_flags = tcg_call_flags(op);
3261 
3262                 nb_oargs = TCGOP_CALLO(op);
3263                 nb_iargs = TCGOP_CALLI(op);
3264 
3265                 /* pure functions can be removed if their result is unused */
3266                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3267                     for (i = 0; i < nb_oargs; i++) {
3268                         ts = arg_temp(op->args[i]);
3269                         if (ts->state != TS_DEAD) {
3270                             goto do_not_remove_call;
3271                         }
3272                     }
3273                     goto do_remove;
3274                 }
3275             do_not_remove_call:
3276 
3277                 /* Output args are dead.  */
3278                 for (i = 0; i < nb_oargs; i++) {
3279                     ts = arg_temp(op->args[i]);
3280                     if (ts->state & TS_DEAD) {
3281                         arg_life |= DEAD_ARG << i;
3282                     }
3283                     if (ts->state & TS_MEM) {
3284                         arg_life |= SYNC_ARG << i;
3285                     }
3286                     ts->state = TS_DEAD;
3287                     la_reset_pref(ts);
3288                 }
3289 
3290                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3291                 memset(op->output_pref, 0, sizeof(op->output_pref));
3292 
3293                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3294                                     TCG_CALL_NO_READ_GLOBALS))) {
3295                     la_global_kill(s, nb_globals);
3296                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3297                     la_global_sync(s, nb_globals);
3298                 }
3299 
3300                 /* Record arguments that die in this helper.  */
3301                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3302                     ts = arg_temp(op->args[i]);
3303                     if (ts->state & TS_DEAD) {
3304                         arg_life |= DEAD_ARG << i;
3305                     }
3306                 }
3307 
3308                 /* For all live registers, remove call-clobbered prefs.  */
3309                 la_cross_call(s, nb_temps);
3310 
3311                 /*
3312                  * Input arguments are live for preceding opcodes.
3313                  *
3314                  * For those arguments that die, and will be allocated in
3315                  * registers, clear the register set for that arg, to be
3316                  * filled in below.  For args that will be on the stack,
3317                  * reset to any available reg.  Process arguments in reverse
3318                  * order so that if a temp is used more than once, the stack
3319                  * reset to max happens before the register reset to 0.
3320                  */
3321                 for (i = nb_iargs - 1; i >= 0; i--) {
3322                     const TCGCallArgumentLoc *loc = &info->in[i];
3323                     ts = arg_temp(op->args[nb_oargs + i]);
3324 
3325                     if (ts->state & TS_DEAD) {
3326                         switch (loc->kind) {
3327                         case TCG_CALL_ARG_NORMAL:
3328                         case TCG_CALL_ARG_EXTEND_U:
3329                         case TCG_CALL_ARG_EXTEND_S:
3330                             if (arg_slot_reg_p(loc->arg_slot)) {
3331                                 *la_temp_pref(ts) = 0;
3332                                 break;
3333                             }
3334                             /* fall through */
3335                         default:
3336                             *la_temp_pref(ts) =
3337                                 tcg_target_available_regs[ts->type];
3338                             break;
3339                         }
3340                         ts->state &= ~TS_DEAD;
3341                     }
3342                 }
3343 
3344                 /*
3345                  * For each input argument, add its input register to prefs.
3346                  * If a temp is used once, this produces a single set bit;
3347                  * if a temp is used multiple times, this produces a set.
3348                  */
3349                 for (i = 0; i < nb_iargs; i++) {
3350                     const TCGCallArgumentLoc *loc = &info->in[i];
3351                     ts = arg_temp(op->args[nb_oargs + i]);
3352 
3353                     switch (loc->kind) {
3354                     case TCG_CALL_ARG_NORMAL:
3355                     case TCG_CALL_ARG_EXTEND_U:
3356                     case TCG_CALL_ARG_EXTEND_S:
3357                         if (arg_slot_reg_p(loc->arg_slot)) {
3358                             tcg_regset_set_reg(*la_temp_pref(ts),
3359                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3360                         }
3361                         break;
3362                     default:
3363                         break;
3364                     }
3365                 }
3366             }
3367             break;
3368         case INDEX_op_insn_start:
3369             break;
3370         case INDEX_op_discard:
3371             /* mark the temporary as dead */
3372             ts = arg_temp(op->args[0]);
3373             ts->state = TS_DEAD;
3374             la_reset_pref(ts);
3375             break;
3376 
3377         case INDEX_op_add2_i32:
3378             opc_new = INDEX_op_add_i32;
3379             goto do_addsub2;
3380         case INDEX_op_sub2_i32:
3381             opc_new = INDEX_op_sub_i32;
3382             goto do_addsub2;
3383         case INDEX_op_add2_i64:
3384             opc_new = INDEX_op_add_i64;
3385             goto do_addsub2;
3386         case INDEX_op_sub2_i64:
3387             opc_new = INDEX_op_sub_i64;
3388         do_addsub2:
3389             nb_iargs = 4;
3390             nb_oargs = 2;
3391             /* Test if the high part of the operation is dead, but not
3392                the low part.  The result can be optimized to a simple
3393                add or sub.  This happens often for x86_64 guest when the
3394                cpu mode is set to 32 bit.  */
3395             if (arg_temp(op->args[1])->state == TS_DEAD) {
3396                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3397                     goto do_remove;
3398                 }
3399                 /* Replace the opcode and adjust the args in place,
3400                    leaving 3 unused args at the end.  */
3401                 op->opc = opc = opc_new;
3402                 op->args[1] = op->args[2];
3403                 op->args[2] = op->args[4];
3404                 /* Fall through and mark the single-word operation live.  */
3405                 nb_iargs = 2;
3406                 nb_oargs = 1;
3407             }
3408             goto do_not_remove;
3409 
3410         case INDEX_op_mulu2_i32:
3411             opc_new = INDEX_op_mul_i32;
3412             opc_new2 = INDEX_op_muluh_i32;
3413             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3414             goto do_mul2;
3415         case INDEX_op_muls2_i32:
3416             opc_new = INDEX_op_mul_i32;
3417             opc_new2 = INDEX_op_mulsh_i32;
3418             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3419             goto do_mul2;
3420         case INDEX_op_mulu2_i64:
3421             opc_new = INDEX_op_mul_i64;
3422             opc_new2 = INDEX_op_muluh_i64;
3423             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3424             goto do_mul2;
3425         case INDEX_op_muls2_i64:
3426             opc_new = INDEX_op_mul_i64;
3427             opc_new2 = INDEX_op_mulsh_i64;
3428             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3429             goto do_mul2;
3430         do_mul2:
3431             nb_iargs = 2;
3432             nb_oargs = 2;
3433             if (arg_temp(op->args[1])->state == TS_DEAD) {
3434                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3435                     /* Both parts of the operation are dead.  */
3436                     goto do_remove;
3437                 }
3438                 /* The high part of the operation is dead; generate the low. */
3439                 op->opc = opc = opc_new;
3440                 op->args[1] = op->args[2];
3441                 op->args[2] = op->args[3];
3442             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3443                 /* The low part of the operation is dead; generate the high. */
3444                 op->opc = opc = opc_new2;
3445                 op->args[0] = op->args[1];
3446                 op->args[1] = op->args[2];
3447                 op->args[2] = op->args[3];
3448             } else {
3449                 goto do_not_remove;
3450             }
3451             /* Mark the single-word operation live.  */
3452             nb_oargs = 1;
3453             goto do_not_remove;
3454 
3455         default:
3456             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3457             nb_iargs = def->nb_iargs;
3458             nb_oargs = def->nb_oargs;
3459 
3460             /* Test if the operation can be removed because all
3461                its outputs are dead. We assume that nb_oargs == 0
3462                implies side effects */
3463             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3464                 for (i = 0; i < nb_oargs; i++) {
3465                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3466                         goto do_not_remove;
3467                     }
3468                 }
3469                 goto do_remove;
3470             }
3471             goto do_not_remove;
3472 
3473         do_remove:
3474             tcg_op_remove(s, op);
3475             break;
3476 
3477         do_not_remove:
3478             for (i = 0; i < nb_oargs; i++) {
3479                 ts = arg_temp(op->args[i]);
3480 
3481                 /* Remember the preference of the uses that followed.  */
3482                 if (i < ARRAY_SIZE(op->output_pref)) {
3483                     op->output_pref[i] = *la_temp_pref(ts);
3484                 }
3485 
3486                 /* Output args are dead.  */
3487                 if (ts->state & TS_DEAD) {
3488                     arg_life |= DEAD_ARG << i;
3489                 }
3490                 if (ts->state & TS_MEM) {
3491                     arg_life |= SYNC_ARG << i;
3492                 }
3493                 ts->state = TS_DEAD;
3494                 la_reset_pref(ts);
3495             }
3496 
3497             /* If end of basic block, update.  */
3498             if (def->flags & TCG_OPF_BB_EXIT) {
3499                 la_func_end(s, nb_globals, nb_temps);
3500             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3501                 la_bb_sync(s, nb_globals, nb_temps);
3502             } else if (def->flags & TCG_OPF_BB_END) {
3503                 la_bb_end(s, nb_globals, nb_temps);
3504             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3505                 la_global_sync(s, nb_globals);
3506                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3507                     la_cross_call(s, nb_temps);
3508                 }
3509             }
3510 
3511             /* Record arguments that die in this opcode.  */
3512             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3513                 ts = arg_temp(op->args[i]);
3514                 if (ts->state & TS_DEAD) {
3515                     arg_life |= DEAD_ARG << i;
3516                 }
3517             }
3518 
3519             /* Input arguments are live for preceding opcodes.  */
3520             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3521                 ts = arg_temp(op->args[i]);
3522                 if (ts->state & TS_DEAD) {
3523                     /* For operands that were dead, initially allow
3524                        all regs for the type.  */
3525                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3526                     ts->state &= ~TS_DEAD;
3527                 }
3528             }
3529 
3530             /* Incorporate constraints for this operand.  */
3531             switch (opc) {
3532             case INDEX_op_mov_i32:
3533             case INDEX_op_mov_i64:
3534                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3535                    have proper constraints.  That said, special case
3536                    moves to propagate preferences backward.  */
3537                 if (IS_DEAD_ARG(1)) {
3538                     *la_temp_pref(arg_temp(op->args[0]))
3539                         = *la_temp_pref(arg_temp(op->args[1]));
3540                 }
3541                 break;
3542 
3543             default:
3544                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3545                     const TCGArgConstraint *ct = &def->args_ct[i];
3546                     TCGRegSet set, *pset;
3547 
3548                     ts = arg_temp(op->args[i]);
3549                     pset = la_temp_pref(ts);
3550                     set = *pset;
3551 
3552                     set &= ct->regs;
3553                     if (ct->ialias) {
3554                         set &= output_pref(op, ct->alias_index);
3555                     }
3556                     /* If the combination is not possible, restart.  */
3557                     if (set == 0) {
3558                         set = ct->regs;
3559                     }
3560                     *pset = set;
3561                 }
3562                 break;
3563             }
3564             break;
3565         }
3566         op->life = arg_life;
3567     }
3568 }
3569 
3570 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3571 static bool __attribute__((noinline))
3572 liveness_pass_2(TCGContext *s)
3573 {
3574     int nb_globals = s->nb_globals;
3575     int nb_temps, i;
3576     bool changes = false;
3577     TCGOp *op, *op_next;
3578 
3579     /* Create a temporary for each indirect global.  */
3580     for (i = 0; i < nb_globals; ++i) {
3581         TCGTemp *its = &s->temps[i];
3582         if (its->indirect_reg) {
3583             TCGTemp *dts = tcg_temp_alloc(s);
3584             dts->type = its->type;
3585             dts->base_type = its->base_type;
3586             dts->temp_subindex = its->temp_subindex;
3587             dts->kind = TEMP_EBB;
3588             its->state_ptr = dts;
3589         } else {
3590             its->state_ptr = NULL;
3591         }
3592         /* All globals begin dead.  */
3593         its->state = TS_DEAD;
3594     }
3595     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3596         TCGTemp *its = &s->temps[i];
3597         its->state_ptr = NULL;
3598         its->state = TS_DEAD;
3599     }
3600 
3601     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3602         TCGOpcode opc = op->opc;
3603         const TCGOpDef *def = &tcg_op_defs[opc];
3604         TCGLifeData arg_life = op->life;
3605         int nb_iargs, nb_oargs, call_flags;
3606         TCGTemp *arg_ts, *dir_ts;
3607 
3608         if (opc == INDEX_op_call) {
3609             nb_oargs = TCGOP_CALLO(op);
3610             nb_iargs = TCGOP_CALLI(op);
3611             call_flags = tcg_call_flags(op);
3612         } else {
3613             nb_iargs = def->nb_iargs;
3614             nb_oargs = def->nb_oargs;
3615 
3616             /* Set flags similar to how calls require.  */
3617             if (def->flags & TCG_OPF_COND_BRANCH) {
3618                 /* Like reading globals: sync_globals */
3619                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3620             } else if (def->flags & TCG_OPF_BB_END) {
3621                 /* Like writing globals: save_globals */
3622                 call_flags = 0;
3623             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3624                 /* Like reading globals: sync_globals */
3625                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3626             } else {
3627                 /* No effect on globals.  */
3628                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3629                               TCG_CALL_NO_WRITE_GLOBALS);
3630             }
3631         }
3632 
3633         /* Make sure that input arguments are available.  */
3634         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3635             arg_ts = arg_temp(op->args[i]);
3636             dir_ts = arg_ts->state_ptr;
3637             if (dir_ts && arg_ts->state == TS_DEAD) {
3638                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3639                                   ? INDEX_op_ld_i32
3640                                   : INDEX_op_ld_i64);
3641                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3642 
3643                 lop->args[0] = temp_arg(dir_ts);
3644                 lop->args[1] = temp_arg(arg_ts->mem_base);
3645                 lop->args[2] = arg_ts->mem_offset;
3646 
3647                 /* Loaded, but synced with memory.  */
3648                 arg_ts->state = TS_MEM;
3649             }
3650         }
3651 
3652         /* Perform input replacement, and mark inputs that became dead.
3653            No action is required except keeping temp_state up to date
3654            so that we reload when needed.  */
3655         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3656             arg_ts = arg_temp(op->args[i]);
3657             dir_ts = arg_ts->state_ptr;
3658             if (dir_ts) {
3659                 op->args[i] = temp_arg(dir_ts);
3660                 changes = true;
3661                 if (IS_DEAD_ARG(i)) {
3662                     arg_ts->state = TS_DEAD;
3663                 }
3664             }
3665         }
3666 
3667         /* Liveness analysis should ensure that the following are
3668            all correct, for call sites and basic block end points.  */
3669         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3670             /* Nothing to do */
3671         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3672             for (i = 0; i < nb_globals; ++i) {
3673                 /* Liveness should see that globals are synced back,
3674                    that is, either TS_DEAD or TS_MEM.  */
3675                 arg_ts = &s->temps[i];
3676                 tcg_debug_assert(arg_ts->state_ptr == 0
3677                                  || arg_ts->state != 0);
3678             }
3679         } else {
3680             for (i = 0; i < nb_globals; ++i) {
3681                 /* Liveness should see that globals are saved back,
3682                    that is, TS_DEAD, waiting to be reloaded.  */
3683                 arg_ts = &s->temps[i];
3684                 tcg_debug_assert(arg_ts->state_ptr == 0
3685                                  || arg_ts->state == TS_DEAD);
3686             }
3687         }
3688 
3689         /* Outputs become available.  */
3690         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3691             arg_ts = arg_temp(op->args[0]);
3692             dir_ts = arg_ts->state_ptr;
3693             if (dir_ts) {
3694                 op->args[0] = temp_arg(dir_ts);
3695                 changes = true;
3696 
3697                 /* The output is now live and modified.  */
3698                 arg_ts->state = 0;
3699 
3700                 if (NEED_SYNC_ARG(0)) {
3701                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3702                                       ? INDEX_op_st_i32
3703                                       : INDEX_op_st_i64);
3704                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3705                     TCGTemp *out_ts = dir_ts;
3706 
3707                     if (IS_DEAD_ARG(0)) {
3708                         out_ts = arg_temp(op->args[1]);
3709                         arg_ts->state = TS_DEAD;
3710                         tcg_op_remove(s, op);
3711                     } else {
3712                         arg_ts->state = TS_MEM;
3713                     }
3714 
3715                     sop->args[0] = temp_arg(out_ts);
3716                     sop->args[1] = temp_arg(arg_ts->mem_base);
3717                     sop->args[2] = arg_ts->mem_offset;
3718                 } else {
3719                     tcg_debug_assert(!IS_DEAD_ARG(0));
3720                 }
3721             }
3722         } else {
3723             for (i = 0; i < nb_oargs; i++) {
3724                 arg_ts = arg_temp(op->args[i]);
3725                 dir_ts = arg_ts->state_ptr;
3726                 if (!dir_ts) {
3727                     continue;
3728                 }
3729                 op->args[i] = temp_arg(dir_ts);
3730                 changes = true;
3731 
3732                 /* The output is now live and modified.  */
3733                 arg_ts->state = 0;
3734 
3735                 /* Sync outputs upon their last write.  */
3736                 if (NEED_SYNC_ARG(i)) {
3737                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3738                                       ? INDEX_op_st_i32
3739                                       : INDEX_op_st_i64);
3740                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3741 
3742                     sop->args[0] = temp_arg(dir_ts);
3743                     sop->args[1] = temp_arg(arg_ts->mem_base);
3744                     sop->args[2] = arg_ts->mem_offset;
3745 
3746                     arg_ts->state = TS_MEM;
3747                 }
3748                 /* Drop outputs that are dead.  */
3749                 if (IS_DEAD_ARG(i)) {
3750                     arg_ts->state = TS_DEAD;
3751                 }
3752             }
3753         }
3754     }
3755 
3756     return changes;
3757 }
3758 
3759 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3760 {
3761     intptr_t off;
3762     int size, align;
3763 
3764     /* When allocating an object, look at the full type. */
3765     size = tcg_type_size(ts->base_type);
3766     switch (ts->base_type) {
3767     case TCG_TYPE_I32:
3768         align = 4;
3769         break;
3770     case TCG_TYPE_I64:
3771     case TCG_TYPE_V64:
3772         align = 8;
3773         break;
3774     case TCG_TYPE_I128:
3775     case TCG_TYPE_V128:
3776     case TCG_TYPE_V256:
3777         /*
3778          * Note that we do not require aligned storage for V256,
3779          * and that we provide alignment for I128 to match V128,
3780          * even if that's above what the host ABI requires.
3781          */
3782         align = 16;
3783         break;
3784     default:
3785         g_assert_not_reached();
3786     }
3787 
3788     /*
3789      * Assume the stack is sufficiently aligned.
3790      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3791      * and do not require 16 byte vector alignment.  This seems slightly
3792      * easier than fully parameterizing the above switch statement.
3793      */
3794     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3795     off = ROUND_UP(s->current_frame_offset, align);
3796 
3797     /* If we've exhausted the stack frame, restart with a smaller TB. */
3798     if (off + size > s->frame_end) {
3799         tcg_raise_tb_overflow(s);
3800     }
3801     s->current_frame_offset = off + size;
3802 #if defined(__sparc__)
3803     off += TCG_TARGET_STACK_BIAS;
3804 #endif
3805 
3806     /* If the object was subdivided, assign memory to all the parts. */
3807     if (ts->base_type != ts->type) {
3808         int part_size = tcg_type_size(ts->type);
3809         int part_count = size / part_size;
3810 
3811         /*
3812          * Each part is allocated sequentially in tcg_temp_new_internal.
3813          * Jump back to the first part by subtracting the current index.
3814          */
3815         ts -= ts->temp_subindex;
3816         for (int i = 0; i < part_count; ++i) {
3817             ts[i].mem_offset = off + i * part_size;
3818             ts[i].mem_base = s->frame_temp;
3819             ts[i].mem_allocated = 1;
3820         }
3821     } else {
3822         ts->mem_offset = off;
3823         ts->mem_base = s->frame_temp;
3824         ts->mem_allocated = 1;
3825     }
3826 }
3827 
3828 /* Assign @reg to @ts, and update reg_to_temp[]. */
3829 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3830 {
3831     if (ts->val_type == TEMP_VAL_REG) {
3832         TCGReg old = ts->reg;
3833         tcg_debug_assert(s->reg_to_temp[old] == ts);
3834         if (old == reg) {
3835             return;
3836         }
3837         s->reg_to_temp[old] = NULL;
3838     }
3839     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3840     s->reg_to_temp[reg] = ts;
3841     ts->val_type = TEMP_VAL_REG;
3842     ts->reg = reg;
3843 }
3844 
3845 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3846 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3847 {
3848     tcg_debug_assert(type != TEMP_VAL_REG);
3849     if (ts->val_type == TEMP_VAL_REG) {
3850         TCGReg reg = ts->reg;
3851         tcg_debug_assert(s->reg_to_temp[reg] == ts);
3852         s->reg_to_temp[reg] = NULL;
3853     }
3854     ts->val_type = type;
3855 }
3856 
3857 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3858 
3859 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3860    mark it free; otherwise mark it dead.  */
3861 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3862 {
3863     TCGTempVal new_type;
3864 
3865     switch (ts->kind) {
3866     case TEMP_FIXED:
3867         return;
3868     case TEMP_GLOBAL:
3869     case TEMP_TB:
3870         new_type = TEMP_VAL_MEM;
3871         break;
3872     case TEMP_EBB:
3873         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3874         break;
3875     case TEMP_CONST:
3876         new_type = TEMP_VAL_CONST;
3877         break;
3878     default:
3879         g_assert_not_reached();
3880     }
3881     set_temp_val_nonreg(s, ts, new_type);
3882 }
3883 
3884 /* Mark a temporary as dead.  */
3885 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3886 {
3887     temp_free_or_dead(s, ts, 1);
3888 }
3889 
3890 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3891    registers needs to be allocated to store a constant.  If 'free_or_dead'
3892    is non-zero, subsequently release the temporary; if it is positive, the
3893    temp is dead; if it is negative, the temp is free.  */
3894 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3895                       TCGRegSet preferred_regs, int free_or_dead)
3896 {
3897     if (!temp_readonly(ts) && !ts->mem_coherent) {
3898         if (!ts->mem_allocated) {
3899             temp_allocate_frame(s, ts);
3900         }
3901         switch (ts->val_type) {
3902         case TEMP_VAL_CONST:
3903             /* If we're going to free the temp immediately, then we won't
3904                require it later in a register, so attempt to store the
3905                constant to memory directly.  */
3906             if (free_or_dead
3907                 && tcg_out_sti(s, ts->type, ts->val,
3908                                ts->mem_base->reg, ts->mem_offset)) {
3909                 break;
3910             }
3911             temp_load(s, ts, tcg_target_available_regs[ts->type],
3912                       allocated_regs, preferred_regs);
3913             /* fallthrough */
3914 
3915         case TEMP_VAL_REG:
3916             tcg_out_st(s, ts->type, ts->reg,
3917                        ts->mem_base->reg, ts->mem_offset);
3918             break;
3919 
3920         case TEMP_VAL_MEM:
3921             break;
3922 
3923         case TEMP_VAL_DEAD:
3924         default:
3925             g_assert_not_reached();
3926         }
3927         ts->mem_coherent = 1;
3928     }
3929     if (free_or_dead) {
3930         temp_free_or_dead(s, ts, free_or_dead);
3931     }
3932 }
3933 
3934 /* free register 'reg' by spilling the corresponding temporary if necessary */
3935 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3936 {
3937     TCGTemp *ts = s->reg_to_temp[reg];
3938     if (ts != NULL) {
3939         temp_sync(s, ts, allocated_regs, 0, -1);
3940     }
3941 }
3942 
3943 /**
3944  * tcg_reg_alloc:
3945  * @required_regs: Set of registers in which we must allocate.
3946  * @allocated_regs: Set of registers which must be avoided.
3947  * @preferred_regs: Set of registers we should prefer.
3948  * @rev: True if we search the registers in "indirect" order.
3949  *
3950  * The allocated register must be in @required_regs & ~@allocated_regs,
3951  * but if we can put it in @preferred_regs we may save a move later.
3952  */
3953 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3954                             TCGRegSet allocated_regs,
3955                             TCGRegSet preferred_regs, bool rev)
3956 {
3957     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3958     TCGRegSet reg_ct[2];
3959     const int *order;
3960 
3961     reg_ct[1] = required_regs & ~allocated_regs;
3962     tcg_debug_assert(reg_ct[1] != 0);
3963     reg_ct[0] = reg_ct[1] & preferred_regs;
3964 
3965     /* Skip the preferred_regs option if it cannot be satisfied,
3966        or if the preference made no difference.  */
3967     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3968 
3969     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3970 
3971     /* Try free registers, preferences first.  */
3972     for (j = f; j < 2; j++) {
3973         TCGRegSet set = reg_ct[j];
3974 
3975         if (tcg_regset_single(set)) {
3976             /* One register in the set.  */
3977             TCGReg reg = tcg_regset_first(set);
3978             if (s->reg_to_temp[reg] == NULL) {
3979                 return reg;
3980             }
3981         } else {
3982             for (i = 0; i < n; i++) {
3983                 TCGReg reg = order[i];
3984                 if (s->reg_to_temp[reg] == NULL &&
3985                     tcg_regset_test_reg(set, reg)) {
3986                     return reg;
3987                 }
3988             }
3989         }
3990     }
3991 
3992     /* We must spill something.  */
3993     for (j = f; j < 2; j++) {
3994         TCGRegSet set = reg_ct[j];
3995 
3996         if (tcg_regset_single(set)) {
3997             /* One register in the set.  */
3998             TCGReg reg = tcg_regset_first(set);
3999             tcg_reg_free(s, reg, allocated_regs);
4000             return reg;
4001         } else {
4002             for (i = 0; i < n; i++) {
4003                 TCGReg reg = order[i];
4004                 if (tcg_regset_test_reg(set, reg)) {
4005                     tcg_reg_free(s, reg, allocated_regs);
4006                     return reg;
4007                 }
4008             }
4009         }
4010     }
4011 
4012     g_assert_not_reached();
4013 }
4014 
4015 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4016                                  TCGRegSet allocated_regs,
4017                                  TCGRegSet preferred_regs, bool rev)
4018 {
4019     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4020     TCGRegSet reg_ct[2];
4021     const int *order;
4022 
4023     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4024     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4025     tcg_debug_assert(reg_ct[1] != 0);
4026     reg_ct[0] = reg_ct[1] & preferred_regs;
4027 
4028     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4029 
4030     /*
4031      * Skip the preferred_regs option if it cannot be satisfied,
4032      * or if the preference made no difference.
4033      */
4034     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4035 
4036     /*
4037      * Minimize the number of flushes by looking for 2 free registers first,
4038      * then a single flush, then two flushes.
4039      */
4040     for (fmin = 2; fmin >= 0; fmin--) {
4041         for (j = k; j < 2; j++) {
4042             TCGRegSet set = reg_ct[j];
4043 
4044             for (i = 0; i < n; i++) {
4045                 TCGReg reg = order[i];
4046 
4047                 if (tcg_regset_test_reg(set, reg)) {
4048                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4049                     if (f >= fmin) {
4050                         tcg_reg_free(s, reg, allocated_regs);
4051                         tcg_reg_free(s, reg + 1, allocated_regs);
4052                         return reg;
4053                     }
4054                 }
4055             }
4056         }
4057     }
4058     g_assert_not_reached();
4059 }
4060 
4061 /* Make sure the temporary is in a register.  If needed, allocate the register
4062    from DESIRED while avoiding ALLOCATED.  */
4063 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4064                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4065 {
4066     TCGReg reg;
4067 
4068     switch (ts->val_type) {
4069     case TEMP_VAL_REG:
4070         return;
4071     case TEMP_VAL_CONST:
4072         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4073                             preferred_regs, ts->indirect_base);
4074         if (ts->type <= TCG_TYPE_I64) {
4075             tcg_out_movi(s, ts->type, reg, ts->val);
4076         } else {
4077             uint64_t val = ts->val;
4078             MemOp vece = MO_64;
4079 
4080             /*
4081              * Find the minimal vector element that matches the constant.
4082              * The targets will, in general, have to do this search anyway,
4083              * do this generically.
4084              */
4085             if (val == dup_const(MO_8, val)) {
4086                 vece = MO_8;
4087             } else if (val == dup_const(MO_16, val)) {
4088                 vece = MO_16;
4089             } else if (val == dup_const(MO_32, val)) {
4090                 vece = MO_32;
4091             }
4092 
4093             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4094         }
4095         ts->mem_coherent = 0;
4096         break;
4097     case TEMP_VAL_MEM:
4098         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4099                             preferred_regs, ts->indirect_base);
4100         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4101         ts->mem_coherent = 1;
4102         break;
4103     case TEMP_VAL_DEAD:
4104     default:
4105         g_assert_not_reached();
4106     }
4107     set_temp_val_reg(s, ts, reg);
4108 }
4109 
4110 /* Save a temporary to memory. 'allocated_regs' is used in case a
4111    temporary registers needs to be allocated to store a constant.  */
4112 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4113 {
4114     /* The liveness analysis already ensures that globals are back
4115        in memory. Keep an tcg_debug_assert for safety. */
4116     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4117 }
4118 
4119 /* save globals to their canonical location and assume they can be
4120    modified be the following code. 'allocated_regs' is used in case a
4121    temporary registers needs to be allocated to store a constant. */
4122 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4123 {
4124     int i, n;
4125 
4126     for (i = 0, n = s->nb_globals; i < n; i++) {
4127         temp_save(s, &s->temps[i], allocated_regs);
4128     }
4129 }
4130 
4131 /* sync globals to their canonical location and assume they can be
4132    read by the following code. 'allocated_regs' is used in case a
4133    temporary registers needs to be allocated to store a constant. */
4134 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4135 {
4136     int i, n;
4137 
4138     for (i = 0, n = s->nb_globals; i < n; i++) {
4139         TCGTemp *ts = &s->temps[i];
4140         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4141                          || ts->kind == TEMP_FIXED
4142                          || ts->mem_coherent);
4143     }
4144 }
4145 
4146 /* at the end of a basic block, we assume all temporaries are dead and
4147    all globals are stored at their canonical location. */
4148 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4149 {
4150     int i;
4151 
4152     for (i = s->nb_globals; i < s->nb_temps; i++) {
4153         TCGTemp *ts = &s->temps[i];
4154 
4155         switch (ts->kind) {
4156         case TEMP_TB:
4157             temp_save(s, ts, allocated_regs);
4158             break;
4159         case TEMP_EBB:
4160             /* The liveness analysis already ensures that temps are dead.
4161                Keep an tcg_debug_assert for safety. */
4162             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4163             break;
4164         case TEMP_CONST:
4165             /* Similarly, we should have freed any allocated register. */
4166             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4167             break;
4168         default:
4169             g_assert_not_reached();
4170         }
4171     }
4172 
4173     save_globals(s, allocated_regs);
4174 }
4175 
4176 /*
4177  * At a conditional branch, we assume all temporaries are dead unless
4178  * explicitly live-across-conditional-branch; all globals and local
4179  * temps are synced to their location.
4180  */
4181 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4182 {
4183     sync_globals(s, allocated_regs);
4184 
4185     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4186         TCGTemp *ts = &s->temps[i];
4187         /*
4188          * The liveness analysis already ensures that temps are dead.
4189          * Keep tcg_debug_asserts for safety.
4190          */
4191         switch (ts->kind) {
4192         case TEMP_TB:
4193             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4194             break;
4195         case TEMP_EBB:
4196         case TEMP_CONST:
4197             break;
4198         default:
4199             g_assert_not_reached();
4200         }
4201     }
4202 }
4203 
4204 /*
4205  * Specialized code generation for INDEX_op_mov_* with a constant.
4206  */
4207 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4208                                   tcg_target_ulong val, TCGLifeData arg_life,
4209                                   TCGRegSet preferred_regs)
4210 {
4211     /* ENV should not be modified.  */
4212     tcg_debug_assert(!temp_readonly(ots));
4213 
4214     /* The movi is not explicitly generated here.  */
4215     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4216     ots->val = val;
4217     ots->mem_coherent = 0;
4218     if (NEED_SYNC_ARG(0)) {
4219         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4220     } else if (IS_DEAD_ARG(0)) {
4221         temp_dead(s, ots);
4222     }
4223 }
4224 
4225 /*
4226  * Specialized code generation for INDEX_op_mov_*.
4227  */
4228 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4229 {
4230     const TCGLifeData arg_life = op->life;
4231     TCGRegSet allocated_regs, preferred_regs;
4232     TCGTemp *ts, *ots;
4233     TCGType otype, itype;
4234     TCGReg oreg, ireg;
4235 
4236     allocated_regs = s->reserved_regs;
4237     preferred_regs = output_pref(op, 0);
4238     ots = arg_temp(op->args[0]);
4239     ts = arg_temp(op->args[1]);
4240 
4241     /* ENV should not be modified.  */
4242     tcg_debug_assert(!temp_readonly(ots));
4243 
4244     /* Note that otype != itype for no-op truncation.  */
4245     otype = ots->type;
4246     itype = ts->type;
4247 
4248     if (ts->val_type == TEMP_VAL_CONST) {
4249         /* propagate constant or generate sti */
4250         tcg_target_ulong val = ts->val;
4251         if (IS_DEAD_ARG(1)) {
4252             temp_dead(s, ts);
4253         }
4254         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4255         return;
4256     }
4257 
4258     /* If the source value is in memory we're going to be forced
4259        to have it in a register in order to perform the copy.  Copy
4260        the SOURCE value into its own register first, that way we
4261        don't have to reload SOURCE the next time it is used. */
4262     if (ts->val_type == TEMP_VAL_MEM) {
4263         temp_load(s, ts, tcg_target_available_regs[itype],
4264                   allocated_regs, preferred_regs);
4265     }
4266     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4267     ireg = ts->reg;
4268 
4269     if (IS_DEAD_ARG(0)) {
4270         /* mov to a non-saved dead register makes no sense (even with
4271            liveness analysis disabled). */
4272         tcg_debug_assert(NEED_SYNC_ARG(0));
4273         if (!ots->mem_allocated) {
4274             temp_allocate_frame(s, ots);
4275         }
4276         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4277         if (IS_DEAD_ARG(1)) {
4278             temp_dead(s, ts);
4279         }
4280         temp_dead(s, ots);
4281         return;
4282     }
4283 
4284     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4285         /*
4286          * The mov can be suppressed.  Kill input first, so that it
4287          * is unlinked from reg_to_temp, then set the output to the
4288          * reg that we saved from the input.
4289          */
4290         temp_dead(s, ts);
4291         oreg = ireg;
4292     } else {
4293         if (ots->val_type == TEMP_VAL_REG) {
4294             oreg = ots->reg;
4295         } else {
4296             /* Make sure to not spill the input register during allocation. */
4297             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4298                                  allocated_regs | ((TCGRegSet)1 << ireg),
4299                                  preferred_regs, ots->indirect_base);
4300         }
4301         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4302             /*
4303              * Cross register class move not supported.
4304              * Store the source register into the destination slot
4305              * and leave the destination temp as TEMP_VAL_MEM.
4306              */
4307             assert(!temp_readonly(ots));
4308             if (!ts->mem_allocated) {
4309                 temp_allocate_frame(s, ots);
4310             }
4311             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4312             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4313             ots->mem_coherent = 1;
4314             return;
4315         }
4316     }
4317     set_temp_val_reg(s, ots, oreg);
4318     ots->mem_coherent = 0;
4319 
4320     if (NEED_SYNC_ARG(0)) {
4321         temp_sync(s, ots, allocated_regs, 0, 0);
4322     }
4323 }
4324 
4325 /*
4326  * Specialized code generation for INDEX_op_dup_vec.
4327  */
4328 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4329 {
4330     const TCGLifeData arg_life = op->life;
4331     TCGRegSet dup_out_regs, dup_in_regs;
4332     TCGTemp *its, *ots;
4333     TCGType itype, vtype;
4334     unsigned vece;
4335     int lowpart_ofs;
4336     bool ok;
4337 
4338     ots = arg_temp(op->args[0]);
4339     its = arg_temp(op->args[1]);
4340 
4341     /* ENV should not be modified.  */
4342     tcg_debug_assert(!temp_readonly(ots));
4343 
4344     itype = its->type;
4345     vece = TCGOP_VECE(op);
4346     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4347 
4348     if (its->val_type == TEMP_VAL_CONST) {
4349         /* Propagate constant via movi -> dupi.  */
4350         tcg_target_ulong val = its->val;
4351         if (IS_DEAD_ARG(1)) {
4352             temp_dead(s, its);
4353         }
4354         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4355         return;
4356     }
4357 
4358     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4359     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4360 
4361     /* Allocate the output register now.  */
4362     if (ots->val_type != TEMP_VAL_REG) {
4363         TCGRegSet allocated_regs = s->reserved_regs;
4364         TCGReg oreg;
4365 
4366         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4367             /* Make sure to not spill the input register. */
4368             tcg_regset_set_reg(allocated_regs, its->reg);
4369         }
4370         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4371                              output_pref(op, 0), ots->indirect_base);
4372         set_temp_val_reg(s, ots, oreg);
4373     }
4374 
4375     switch (its->val_type) {
4376     case TEMP_VAL_REG:
4377         /*
4378          * The dup constriaints must be broad, covering all possible VECE.
4379          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4380          * to fail, indicating that extra moves are required for that case.
4381          */
4382         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4383             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4384                 goto done;
4385             }
4386             /* Try again from memory or a vector input register.  */
4387         }
4388         if (!its->mem_coherent) {
4389             /*
4390              * The input register is not synced, and so an extra store
4391              * would be required to use memory.  Attempt an integer-vector
4392              * register move first.  We do not have a TCGRegSet for this.
4393              */
4394             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4395                 break;
4396             }
4397             /* Sync the temp back to its slot and load from there.  */
4398             temp_sync(s, its, s->reserved_regs, 0, 0);
4399         }
4400         /* fall through */
4401 
4402     case TEMP_VAL_MEM:
4403         lowpart_ofs = 0;
4404         if (HOST_BIG_ENDIAN) {
4405             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4406         }
4407         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4408                              its->mem_offset + lowpart_ofs)) {
4409             goto done;
4410         }
4411         /* Load the input into the destination vector register. */
4412         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4413         break;
4414 
4415     default:
4416         g_assert_not_reached();
4417     }
4418 
4419     /* We now have a vector input register, so dup must succeed. */
4420     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4421     tcg_debug_assert(ok);
4422 
4423  done:
4424     ots->mem_coherent = 0;
4425     if (IS_DEAD_ARG(1)) {
4426         temp_dead(s, its);
4427     }
4428     if (NEED_SYNC_ARG(0)) {
4429         temp_sync(s, ots, s->reserved_regs, 0, 0);
4430     }
4431     if (IS_DEAD_ARG(0)) {
4432         temp_dead(s, ots);
4433     }
4434 }
4435 
4436 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4437 {
4438     const TCGLifeData arg_life = op->life;
4439     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4440     TCGRegSet i_allocated_regs;
4441     TCGRegSet o_allocated_regs;
4442     int i, k, nb_iargs, nb_oargs;
4443     TCGReg reg;
4444     TCGArg arg;
4445     const TCGArgConstraint *arg_ct;
4446     TCGTemp *ts;
4447     TCGArg new_args[TCG_MAX_OP_ARGS];
4448     int const_args[TCG_MAX_OP_ARGS];
4449 
4450     nb_oargs = def->nb_oargs;
4451     nb_iargs = def->nb_iargs;
4452 
4453     /* copy constants */
4454     memcpy(new_args + nb_oargs + nb_iargs,
4455            op->args + nb_oargs + nb_iargs,
4456            sizeof(TCGArg) * def->nb_cargs);
4457 
4458     i_allocated_regs = s->reserved_regs;
4459     o_allocated_regs = s->reserved_regs;
4460 
4461     /* satisfy input constraints */
4462     for (k = 0; k < nb_iargs; k++) {
4463         TCGRegSet i_preferred_regs, i_required_regs;
4464         bool allocate_new_reg, copyto_new_reg;
4465         TCGTemp *ts2;
4466         int i1, i2;
4467 
4468         i = def->args_ct[nb_oargs + k].sort_index;
4469         arg = op->args[i];
4470         arg_ct = &def->args_ct[i];
4471         ts = arg_temp(arg);
4472 
4473         if (ts->val_type == TEMP_VAL_CONST
4474             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4475             /* constant is OK for instruction */
4476             const_args[i] = 1;
4477             new_args[i] = ts->val;
4478             continue;
4479         }
4480 
4481         reg = ts->reg;
4482         i_preferred_regs = 0;
4483         i_required_regs = arg_ct->regs;
4484         allocate_new_reg = false;
4485         copyto_new_reg = false;
4486 
4487         switch (arg_ct->pair) {
4488         case 0: /* not paired */
4489             if (arg_ct->ialias) {
4490                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4491 
4492                 /*
4493                  * If the input is readonly, then it cannot also be an
4494                  * output and aliased to itself.  If the input is not
4495                  * dead after the instruction, we must allocate a new
4496                  * register and move it.
4497                  */
4498                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4499                     allocate_new_reg = true;
4500                 } else if (ts->val_type == TEMP_VAL_REG) {
4501                     /*
4502                      * Check if the current register has already been
4503                      * allocated for another input.
4504                      */
4505                     allocate_new_reg =
4506                         tcg_regset_test_reg(i_allocated_regs, reg);
4507                 }
4508             }
4509             if (!allocate_new_reg) {
4510                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4511                           i_preferred_regs);
4512                 reg = ts->reg;
4513                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4514             }
4515             if (allocate_new_reg) {
4516                 /*
4517                  * Allocate a new register matching the constraint
4518                  * and move the temporary register into it.
4519                  */
4520                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4521                           i_allocated_regs, 0);
4522                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4523                                     i_preferred_regs, ts->indirect_base);
4524                 copyto_new_reg = true;
4525             }
4526             break;
4527 
4528         case 1:
4529             /* First of an input pair; if i1 == i2, the second is an output. */
4530             i1 = i;
4531             i2 = arg_ct->pair_index;
4532             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4533 
4534             /*
4535              * It is easier to default to allocating a new pair
4536              * and to identify a few cases where it's not required.
4537              */
4538             if (arg_ct->ialias) {
4539                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4540                 if (IS_DEAD_ARG(i1) &&
4541                     IS_DEAD_ARG(i2) &&
4542                     !temp_readonly(ts) &&
4543                     ts->val_type == TEMP_VAL_REG &&
4544                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4545                     tcg_regset_test_reg(i_required_regs, reg) &&
4546                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4547                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4548                     (ts2
4549                      ? ts2->val_type == TEMP_VAL_REG &&
4550                        ts2->reg == reg + 1 &&
4551                        !temp_readonly(ts2)
4552                      : s->reg_to_temp[reg + 1] == NULL)) {
4553                     break;
4554                 }
4555             } else {
4556                 /* Without aliasing, the pair must also be an input. */
4557                 tcg_debug_assert(ts2);
4558                 if (ts->val_type == TEMP_VAL_REG &&
4559                     ts2->val_type == TEMP_VAL_REG &&
4560                     ts2->reg == reg + 1 &&
4561                     tcg_regset_test_reg(i_required_regs, reg)) {
4562                     break;
4563                 }
4564             }
4565             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4566                                      0, ts->indirect_base);
4567             goto do_pair;
4568 
4569         case 2: /* pair second */
4570             reg = new_args[arg_ct->pair_index] + 1;
4571             goto do_pair;
4572 
4573         case 3: /* ialias with second output, no first input */
4574             tcg_debug_assert(arg_ct->ialias);
4575             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4576 
4577             if (IS_DEAD_ARG(i) &&
4578                 !temp_readonly(ts) &&
4579                 ts->val_type == TEMP_VAL_REG &&
4580                 reg > 0 &&
4581                 s->reg_to_temp[reg - 1] == NULL &&
4582                 tcg_regset_test_reg(i_required_regs, reg) &&
4583                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4584                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4585                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4586                 break;
4587             }
4588             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4589                                      i_allocated_regs, 0,
4590                                      ts->indirect_base);
4591             tcg_regset_set_reg(i_allocated_regs, reg);
4592             reg += 1;
4593             goto do_pair;
4594 
4595         do_pair:
4596             /*
4597              * If an aliased input is not dead after the instruction,
4598              * we must allocate a new register and move it.
4599              */
4600             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4601                 TCGRegSet t_allocated_regs = i_allocated_regs;
4602 
4603                 /*
4604                  * Because of the alias, and the continued life, make sure
4605                  * that the temp is somewhere *other* than the reg pair,
4606                  * and we get a copy in reg.
4607                  */
4608                 tcg_regset_set_reg(t_allocated_regs, reg);
4609                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4610                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4611                     /* If ts was already in reg, copy it somewhere else. */
4612                     TCGReg nr;
4613                     bool ok;
4614 
4615                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4616                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4617                                        t_allocated_regs, 0, ts->indirect_base);
4618                     ok = tcg_out_mov(s, ts->type, nr, reg);
4619                     tcg_debug_assert(ok);
4620 
4621                     set_temp_val_reg(s, ts, nr);
4622                 } else {
4623                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4624                               t_allocated_regs, 0);
4625                     copyto_new_reg = true;
4626                 }
4627             } else {
4628                 /* Preferably allocate to reg, otherwise copy. */
4629                 i_required_regs = (TCGRegSet)1 << reg;
4630                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4631                           i_preferred_regs);
4632                 copyto_new_reg = ts->reg != reg;
4633             }
4634             break;
4635 
4636         default:
4637             g_assert_not_reached();
4638         }
4639 
4640         if (copyto_new_reg) {
4641             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4642                 /*
4643                  * Cross register class move not supported.  Sync the
4644                  * temp back to its slot and load from there.
4645                  */
4646                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4647                 tcg_out_ld(s, ts->type, reg,
4648                            ts->mem_base->reg, ts->mem_offset);
4649             }
4650         }
4651         new_args[i] = reg;
4652         const_args[i] = 0;
4653         tcg_regset_set_reg(i_allocated_regs, reg);
4654     }
4655 
4656     /* mark dead temporaries and free the associated registers */
4657     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4658         if (IS_DEAD_ARG(i)) {
4659             temp_dead(s, arg_temp(op->args[i]));
4660         }
4661     }
4662 
4663     if (def->flags & TCG_OPF_COND_BRANCH) {
4664         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4665     } else if (def->flags & TCG_OPF_BB_END) {
4666         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4667     } else {
4668         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4669             /* XXX: permit generic clobber register list ? */
4670             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4671                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4672                     tcg_reg_free(s, i, i_allocated_regs);
4673                 }
4674             }
4675         }
4676         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4677             /* sync globals if the op has side effects and might trigger
4678                an exception. */
4679             sync_globals(s, i_allocated_regs);
4680         }
4681 
4682         /* satisfy the output constraints */
4683         for(k = 0; k < nb_oargs; k++) {
4684             i = def->args_ct[k].sort_index;
4685             arg = op->args[i];
4686             arg_ct = &def->args_ct[i];
4687             ts = arg_temp(arg);
4688 
4689             /* ENV should not be modified.  */
4690             tcg_debug_assert(!temp_readonly(ts));
4691 
4692             switch (arg_ct->pair) {
4693             case 0: /* not paired */
4694                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4695                     reg = new_args[arg_ct->alias_index];
4696                 } else if (arg_ct->newreg) {
4697                     reg = tcg_reg_alloc(s, arg_ct->regs,
4698                                         i_allocated_regs | o_allocated_regs,
4699                                         output_pref(op, k), ts->indirect_base);
4700                 } else {
4701                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4702                                         output_pref(op, k), ts->indirect_base);
4703                 }
4704                 break;
4705 
4706             case 1: /* first of pair */
4707                 tcg_debug_assert(!arg_ct->newreg);
4708                 if (arg_ct->oalias) {
4709                     reg = new_args[arg_ct->alias_index];
4710                     break;
4711                 }
4712                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4713                                          output_pref(op, k), ts->indirect_base);
4714                 break;
4715 
4716             case 2: /* second of pair */
4717                 tcg_debug_assert(!arg_ct->newreg);
4718                 if (arg_ct->oalias) {
4719                     reg = new_args[arg_ct->alias_index];
4720                 } else {
4721                     reg = new_args[arg_ct->pair_index] + 1;
4722                 }
4723                 break;
4724 
4725             case 3: /* first of pair, aliasing with a second input */
4726                 tcg_debug_assert(!arg_ct->newreg);
4727                 reg = new_args[arg_ct->pair_index] - 1;
4728                 break;
4729 
4730             default:
4731                 g_assert_not_reached();
4732             }
4733             tcg_regset_set_reg(o_allocated_regs, reg);
4734             set_temp_val_reg(s, ts, reg);
4735             ts->mem_coherent = 0;
4736             new_args[i] = reg;
4737         }
4738     }
4739 
4740     /* emit instruction */
4741     switch (op->opc) {
4742     case INDEX_op_ext8s_i32:
4743         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4744         break;
4745     case INDEX_op_ext8s_i64:
4746         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4747         break;
4748     case INDEX_op_ext8u_i32:
4749     case INDEX_op_ext8u_i64:
4750         tcg_out_ext8u(s, new_args[0], new_args[1]);
4751         break;
4752     case INDEX_op_ext16s_i32:
4753         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4754         break;
4755     case INDEX_op_ext16s_i64:
4756         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4757         break;
4758     case INDEX_op_ext16u_i32:
4759     case INDEX_op_ext16u_i64:
4760         tcg_out_ext16u(s, new_args[0], new_args[1]);
4761         break;
4762     case INDEX_op_ext32s_i64:
4763         tcg_out_ext32s(s, new_args[0], new_args[1]);
4764         break;
4765     case INDEX_op_ext32u_i64:
4766         tcg_out_ext32u(s, new_args[0], new_args[1]);
4767         break;
4768     case INDEX_op_ext_i32_i64:
4769         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4770         break;
4771     case INDEX_op_extu_i32_i64:
4772         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4773         break;
4774     case INDEX_op_extrl_i64_i32:
4775         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4776         break;
4777     default:
4778         if (def->flags & TCG_OPF_VECTOR) {
4779             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4780                            new_args, const_args);
4781         } else {
4782             tcg_out_op(s, op->opc, new_args, const_args);
4783         }
4784         break;
4785     }
4786 
4787     /* move the outputs in the correct register if needed */
4788     for(i = 0; i < nb_oargs; i++) {
4789         ts = arg_temp(op->args[i]);
4790 
4791         /* ENV should not be modified.  */
4792         tcg_debug_assert(!temp_readonly(ts));
4793 
4794         if (NEED_SYNC_ARG(i)) {
4795             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4796         } else if (IS_DEAD_ARG(i)) {
4797             temp_dead(s, ts);
4798         }
4799     }
4800 }
4801 
4802 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4803 {
4804     const TCGLifeData arg_life = op->life;
4805     TCGTemp *ots, *itsl, *itsh;
4806     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4807 
4808     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4809     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4810     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4811 
4812     ots = arg_temp(op->args[0]);
4813     itsl = arg_temp(op->args[1]);
4814     itsh = arg_temp(op->args[2]);
4815 
4816     /* ENV should not be modified.  */
4817     tcg_debug_assert(!temp_readonly(ots));
4818 
4819     /* Allocate the output register now.  */
4820     if (ots->val_type != TEMP_VAL_REG) {
4821         TCGRegSet allocated_regs = s->reserved_regs;
4822         TCGRegSet dup_out_regs =
4823             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4824         TCGReg oreg;
4825 
4826         /* Make sure to not spill the input registers. */
4827         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4828             tcg_regset_set_reg(allocated_regs, itsl->reg);
4829         }
4830         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4831             tcg_regset_set_reg(allocated_regs, itsh->reg);
4832         }
4833 
4834         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4835                              output_pref(op, 0), ots->indirect_base);
4836         set_temp_val_reg(s, ots, oreg);
4837     }
4838 
4839     /* Promote dup2 of immediates to dupi_vec. */
4840     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4841         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4842         MemOp vece = MO_64;
4843 
4844         if (val == dup_const(MO_8, val)) {
4845             vece = MO_8;
4846         } else if (val == dup_const(MO_16, val)) {
4847             vece = MO_16;
4848         } else if (val == dup_const(MO_32, val)) {
4849             vece = MO_32;
4850         }
4851 
4852         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4853         goto done;
4854     }
4855 
4856     /* If the two inputs form one 64-bit value, try dupm_vec. */
4857     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4858         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4859         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4860         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4861 
4862         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4863         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4864 
4865         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4866                              its->mem_base->reg, its->mem_offset)) {
4867             goto done;
4868         }
4869     }
4870 
4871     /* Fall back to generic expansion. */
4872     return false;
4873 
4874  done:
4875     ots->mem_coherent = 0;
4876     if (IS_DEAD_ARG(1)) {
4877         temp_dead(s, itsl);
4878     }
4879     if (IS_DEAD_ARG(2)) {
4880         temp_dead(s, itsh);
4881     }
4882     if (NEED_SYNC_ARG(0)) {
4883         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4884     } else if (IS_DEAD_ARG(0)) {
4885         temp_dead(s, ots);
4886     }
4887     return true;
4888 }
4889 
4890 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4891                          TCGRegSet allocated_regs)
4892 {
4893     if (ts->val_type == TEMP_VAL_REG) {
4894         if (ts->reg != reg) {
4895             tcg_reg_free(s, reg, allocated_regs);
4896             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4897                 /*
4898                  * Cross register class move not supported.  Sync the
4899                  * temp back to its slot and load from there.
4900                  */
4901                 temp_sync(s, ts, allocated_regs, 0, 0);
4902                 tcg_out_ld(s, ts->type, reg,
4903                            ts->mem_base->reg, ts->mem_offset);
4904             }
4905         }
4906     } else {
4907         TCGRegSet arg_set = 0;
4908 
4909         tcg_reg_free(s, reg, allocated_regs);
4910         tcg_regset_set_reg(arg_set, reg);
4911         temp_load(s, ts, arg_set, allocated_regs, 0);
4912     }
4913 }
4914 
4915 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
4916                          TCGRegSet allocated_regs)
4917 {
4918     /*
4919      * When the destination is on the stack, load up the temp and store.
4920      * If there are many call-saved registers, the temp might live to
4921      * see another use; otherwise it'll be discarded.
4922      */
4923     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4924     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4925                arg_slot_stk_ofs(arg_slot));
4926 }
4927 
4928 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4929                             TCGTemp *ts, TCGRegSet *allocated_regs)
4930 {
4931     if (arg_slot_reg_p(l->arg_slot)) {
4932         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4933         load_arg_reg(s, reg, ts, *allocated_regs);
4934         tcg_regset_set_reg(*allocated_regs, reg);
4935     } else {
4936         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
4937     }
4938 }
4939 
4940 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
4941                          intptr_t ref_off, TCGRegSet *allocated_regs)
4942 {
4943     TCGReg reg;
4944 
4945     if (arg_slot_reg_p(arg_slot)) {
4946         reg = tcg_target_call_iarg_regs[arg_slot];
4947         tcg_reg_free(s, reg, *allocated_regs);
4948         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4949         tcg_regset_set_reg(*allocated_regs, reg);
4950     } else {
4951         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
4952                             *allocated_regs, 0, false);
4953         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4954         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
4955                    arg_slot_stk_ofs(arg_slot));
4956     }
4957 }
4958 
4959 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4960 {
4961     const int nb_oargs = TCGOP_CALLO(op);
4962     const int nb_iargs = TCGOP_CALLI(op);
4963     const TCGLifeData arg_life = op->life;
4964     const TCGHelperInfo *info = tcg_call_info(op);
4965     TCGRegSet allocated_regs = s->reserved_regs;
4966     int i;
4967 
4968     /*
4969      * Move inputs into place in reverse order,
4970      * so that we place stacked arguments first.
4971      */
4972     for (i = nb_iargs - 1; i >= 0; --i) {
4973         const TCGCallArgumentLoc *loc = &info->in[i];
4974         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
4975 
4976         switch (loc->kind) {
4977         case TCG_CALL_ARG_NORMAL:
4978         case TCG_CALL_ARG_EXTEND_U:
4979         case TCG_CALL_ARG_EXTEND_S:
4980             load_arg_normal(s, loc, ts, &allocated_regs);
4981             break;
4982         case TCG_CALL_ARG_BY_REF:
4983             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4984             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
4985                          arg_slot_stk_ofs(loc->ref_slot),
4986                          &allocated_regs);
4987             break;
4988         case TCG_CALL_ARG_BY_REF_N:
4989             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4990             break;
4991         default:
4992             g_assert_not_reached();
4993         }
4994     }
4995 
4996     /* Mark dead temporaries and free the associated registers.  */
4997     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4998         if (IS_DEAD_ARG(i)) {
4999             temp_dead(s, arg_temp(op->args[i]));
5000         }
5001     }
5002 
5003     /* Clobber call registers.  */
5004     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5005         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5006             tcg_reg_free(s, i, allocated_regs);
5007         }
5008     }
5009 
5010     /*
5011      * Save globals if they might be written by the helper,
5012      * sync them if they might be read.
5013      */
5014     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5015         /* Nothing to do */
5016     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5017         sync_globals(s, allocated_regs);
5018     } else {
5019         save_globals(s, allocated_regs);
5020     }
5021 
5022     /*
5023      * If the ABI passes a pointer to the returned struct as the first
5024      * argument, load that now.  Pass a pointer to the output home slot.
5025      */
5026     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5027         TCGTemp *ts = arg_temp(op->args[0]);
5028 
5029         if (!ts->mem_allocated) {
5030             temp_allocate_frame(s, ts);
5031         }
5032         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5033     }
5034 
5035     tcg_out_call(s, tcg_call_func(op), info);
5036 
5037     /* Assign output registers and emit moves if needed.  */
5038     switch (info->out_kind) {
5039     case TCG_CALL_RET_NORMAL:
5040         for (i = 0; i < nb_oargs; i++) {
5041             TCGTemp *ts = arg_temp(op->args[i]);
5042             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5043 
5044             /* ENV should not be modified.  */
5045             tcg_debug_assert(!temp_readonly(ts));
5046 
5047             set_temp_val_reg(s, ts, reg);
5048             ts->mem_coherent = 0;
5049         }
5050         break;
5051 
5052     case TCG_CALL_RET_BY_VEC:
5053         {
5054             TCGTemp *ts = arg_temp(op->args[0]);
5055 
5056             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5057             tcg_debug_assert(ts->temp_subindex == 0);
5058             if (!ts->mem_allocated) {
5059                 temp_allocate_frame(s, ts);
5060             }
5061             tcg_out_st(s, TCG_TYPE_V128,
5062                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5063                        ts->mem_base->reg, ts->mem_offset);
5064         }
5065         /* fall through to mark all parts in memory */
5066 
5067     case TCG_CALL_RET_BY_REF:
5068         /* The callee has performed a write through the reference. */
5069         for (i = 0; i < nb_oargs; i++) {
5070             TCGTemp *ts = arg_temp(op->args[i]);
5071             ts->val_type = TEMP_VAL_MEM;
5072         }
5073         break;
5074 
5075     default:
5076         g_assert_not_reached();
5077     }
5078 
5079     /* Flush or discard output registers as needed. */
5080     for (i = 0; i < nb_oargs; i++) {
5081         TCGTemp *ts = arg_temp(op->args[i]);
5082         if (NEED_SYNC_ARG(i)) {
5083             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5084         } else if (IS_DEAD_ARG(i)) {
5085             temp_dead(s, ts);
5086         }
5087     }
5088 }
5089 
5090 /*
5091  * Similarly for qemu_ld/st slow path helpers.
5092  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5093  * using only the provided backend tcg_out_* functions.
5094  */
5095 
5096 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5097 {
5098     int ofs = arg_slot_stk_ofs(slot);
5099 
5100     /*
5101      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5102      * require extension to uint64_t, adjust the address for uint32_t.
5103      */
5104     if (HOST_BIG_ENDIAN &&
5105         TCG_TARGET_REG_BITS == 64 &&
5106         type == TCG_TYPE_I32) {
5107         ofs += 4;
5108     }
5109     return ofs;
5110 }
5111 
5112 static void tcg_out_helper_load_regs(TCGContext *s,
5113                                      unsigned nmov, TCGMovExtend *mov,
5114                                      unsigned ntmp, const int *tmp)
5115 {
5116     switch (nmov) {
5117     default:
5118         /* The backend must have provided enough temps for the worst case. */
5119         tcg_debug_assert(ntmp + 1 >= nmov);
5120 
5121         for (unsigned i = nmov - 1; i >= 2; --i) {
5122             TCGReg dst = mov[i].dst;
5123 
5124             for (unsigned j = 0; j < i; ++j) {
5125                 if (dst == mov[j].src) {
5126                     /*
5127                      * Conflict.
5128                      * Copy the source to a temporary, recurse for the
5129                      * remaining moves, perform the extension from our
5130                      * scratch on the way out.
5131                      */
5132                     TCGReg scratch = tmp[--ntmp];
5133                     tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
5134                     mov[i].src = scratch;
5135 
5136                     tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
5137                     tcg_out_movext1(s, &mov[i]);
5138                     return;
5139                 }
5140             }
5141 
5142             /* No conflicts: perform this move and continue. */
5143             tcg_out_movext1(s, &mov[i]);
5144         }
5145         /* fall through for the final two moves */
5146 
5147     case 2:
5148         tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
5149         return;
5150     case 1:
5151         tcg_out_movext1(s, mov);
5152         return;
5153     case 0:
5154         g_assert_not_reached();
5155     }
5156 }
5157 
5158 static void tcg_out_helper_load_slots(TCGContext *s,
5159                                       unsigned nmov, TCGMovExtend *mov,
5160                                       const TCGLdstHelperParam *parm)
5161 {
5162     unsigned i;
5163 
5164     /*
5165      * Start from the end, storing to the stack first.
5166      * This frees those registers, so we need not consider overlap.
5167      */
5168     for (i = nmov; i-- > 0; ) {
5169         unsigned slot = mov[i].dst;
5170 
5171         if (arg_slot_reg_p(slot)) {
5172             goto found_reg;
5173         }
5174 
5175         TCGReg src = mov[i].src;
5176         TCGType dst_type = mov[i].dst_type;
5177         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5178 
5179         /* The argument is going onto the stack; extend into scratch. */
5180         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5181             tcg_debug_assert(parm->ntmp != 0);
5182             mov[i].dst = src = parm->tmp[0];
5183             tcg_out_movext1(s, &mov[i]);
5184         }
5185 
5186         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5187                    tcg_out_helper_stk_ofs(dst_type, slot));
5188     }
5189     return;
5190 
5191  found_reg:
5192     /*
5193      * The remaining arguments are in registers.
5194      * Convert slot numbers to argument registers.
5195      */
5196     nmov = i + 1;
5197     for (i = 0; i < nmov; ++i) {
5198         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5199     }
5200     tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
5201 }
5202 
5203 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5204                                     TCGType type, tcg_target_long imm,
5205                                     const TCGLdstHelperParam *parm)
5206 {
5207     if (arg_slot_reg_p(slot)) {
5208         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5209     } else {
5210         int ofs = tcg_out_helper_stk_ofs(type, slot);
5211         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5212             tcg_debug_assert(parm->ntmp != 0);
5213             tcg_out_movi(s, type, parm->tmp[0], imm);
5214             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5215         }
5216     }
5217 }
5218 
5219 static void tcg_out_helper_load_common_args(TCGContext *s,
5220                                             const TCGLabelQemuLdst *ldst,
5221                                             const TCGLdstHelperParam *parm,
5222                                             const TCGHelperInfo *info,
5223                                             unsigned next_arg)
5224 {
5225     TCGMovExtend ptr_mov = {
5226         .dst_type = TCG_TYPE_PTR,
5227         .src_type = TCG_TYPE_PTR,
5228         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5229     };
5230     const TCGCallArgumentLoc *loc = &info->in[0];
5231     TCGType type;
5232     unsigned slot;
5233     tcg_target_ulong imm;
5234 
5235     /*
5236      * Handle env, which is always first.
5237      */
5238     ptr_mov.dst = loc->arg_slot;
5239     ptr_mov.src = TCG_AREG0;
5240     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5241 
5242     /*
5243      * Handle oi.
5244      */
5245     imm = ldst->oi;
5246     loc = &info->in[next_arg];
5247     type = TCG_TYPE_I32;
5248     switch (loc->kind) {
5249     case TCG_CALL_ARG_NORMAL:
5250         break;
5251     case TCG_CALL_ARG_EXTEND_U:
5252     case TCG_CALL_ARG_EXTEND_S:
5253         /* No extension required for MemOpIdx. */
5254         tcg_debug_assert(imm <= INT32_MAX);
5255         type = TCG_TYPE_REG;
5256         break;
5257     default:
5258         g_assert_not_reached();
5259     }
5260     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5261     next_arg++;
5262 
5263     /*
5264      * Handle ra.
5265      */
5266     loc = &info->in[next_arg];
5267     slot = loc->arg_slot;
5268     if (parm->ra_gen) {
5269         int arg_reg = -1;
5270         TCGReg ra_reg;
5271 
5272         if (arg_slot_reg_p(slot)) {
5273             arg_reg = tcg_target_call_iarg_regs[slot];
5274         }
5275         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5276 
5277         ptr_mov.dst = slot;
5278         ptr_mov.src = ra_reg;
5279         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5280     } else {
5281         imm = (uintptr_t)ldst->raddr;
5282         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5283     }
5284 }
5285 
5286 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5287                                        const TCGCallArgumentLoc *loc,
5288                                        TCGType dst_type, TCGType src_type,
5289                                        TCGReg lo, TCGReg hi)
5290 {
5291     if (dst_type <= TCG_TYPE_REG) {
5292         MemOp src_ext;
5293 
5294         switch (loc->kind) {
5295         case TCG_CALL_ARG_NORMAL:
5296             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5297             break;
5298         case TCG_CALL_ARG_EXTEND_U:
5299             dst_type = TCG_TYPE_REG;
5300             src_ext = MO_UL;
5301             break;
5302         case TCG_CALL_ARG_EXTEND_S:
5303             dst_type = TCG_TYPE_REG;
5304             src_ext = MO_SL;
5305             break;
5306         default:
5307             g_assert_not_reached();
5308         }
5309 
5310         mov[0].dst = loc->arg_slot;
5311         mov[0].dst_type = dst_type;
5312         mov[0].src = lo;
5313         mov[0].src_type = src_type;
5314         mov[0].src_ext = src_ext;
5315         return 1;
5316     }
5317 
5318     assert(TCG_TARGET_REG_BITS == 32);
5319 
5320     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5321     mov[0].src = lo;
5322     mov[0].dst_type = TCG_TYPE_I32;
5323     mov[0].src_type = TCG_TYPE_I32;
5324     mov[0].src_ext = MO_32;
5325 
5326     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5327     mov[1].src = hi;
5328     mov[1].dst_type = TCG_TYPE_I32;
5329     mov[1].src_type = TCG_TYPE_I32;
5330     mov[1].src_ext = MO_32;
5331 
5332     return 2;
5333 }
5334 
5335 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5336                                    const TCGLdstHelperParam *parm)
5337 {
5338     const TCGHelperInfo *info;
5339     const TCGCallArgumentLoc *loc;
5340     TCGMovExtend mov[2];
5341     unsigned next_arg, nmov;
5342     MemOp mop = get_memop(ldst->oi);
5343 
5344     switch (mop & MO_SIZE) {
5345     case MO_8:
5346     case MO_16:
5347     case MO_32:
5348         info = &info_helper_ld32_mmu;
5349         break;
5350     case MO_64:
5351         info = &info_helper_ld64_mmu;
5352         break;
5353     default:
5354         g_assert_not_reached();
5355     }
5356 
5357     /* Defer env argument. */
5358     next_arg = 1;
5359 
5360     loc = &info->in[next_arg];
5361     nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
5362                                   ldst->addrlo_reg, ldst->addrhi_reg);
5363     next_arg += nmov;
5364 
5365     tcg_out_helper_load_slots(s, nmov, mov, parm);
5366 
5367     /* No special attention for 32 and 64-bit return values. */
5368     tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
5369 
5370     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5371 }
5372 
5373 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5374                                   bool load_sign,
5375                                   const TCGLdstHelperParam *parm)
5376 {
5377     TCGMovExtend mov[2];
5378 
5379     if (ldst->type <= TCG_TYPE_REG) {
5380         MemOp mop = get_memop(ldst->oi);
5381 
5382         mov[0].dst = ldst->datalo_reg;
5383         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5384         mov[0].dst_type = ldst->type;
5385         mov[0].src_type = TCG_TYPE_REG;
5386 
5387         /*
5388          * If load_sign, then we allowed the helper to perform the
5389          * appropriate sign extension to tcg_target_ulong, and all
5390          * we need now is a plain move.
5391          *
5392          * If they do not, then we expect the relevant extension
5393          * instruction to be no more expensive than a move, and
5394          * we thus save the icache etc by only using one of two
5395          * helper functions.
5396          */
5397         if (load_sign || !(mop & MO_SIGN)) {
5398             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5399                 mov[0].src_ext = MO_32;
5400             } else {
5401                 mov[0].src_ext = MO_64;
5402             }
5403         } else {
5404             mov[0].src_ext = mop & MO_SSIZE;
5405         }
5406         tcg_out_movext1(s, mov);
5407     } else {
5408         assert(TCG_TARGET_REG_BITS == 32);
5409 
5410         mov[0].dst = ldst->datalo_reg;
5411         mov[0].src =
5412             tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5413         mov[0].dst_type = TCG_TYPE_I32;
5414         mov[0].src_type = TCG_TYPE_I32;
5415         mov[0].src_ext = MO_32;
5416 
5417         mov[1].dst = ldst->datahi_reg;
5418         mov[1].src =
5419             tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5420         mov[1].dst_type = TCG_TYPE_REG;
5421         mov[1].src_type = TCG_TYPE_REG;
5422         mov[1].src_ext = MO_32;
5423 
5424         tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5425     }
5426 }
5427 
5428 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5429                                    const TCGLdstHelperParam *parm)
5430 {
5431     const TCGHelperInfo *info;
5432     const TCGCallArgumentLoc *loc;
5433     TCGMovExtend mov[4];
5434     TCGType data_type;
5435     unsigned next_arg, nmov, n;
5436     MemOp mop = get_memop(ldst->oi);
5437 
5438     switch (mop & MO_SIZE) {
5439     case MO_8:
5440     case MO_16:
5441     case MO_32:
5442         info = &info_helper_st32_mmu;
5443         data_type = TCG_TYPE_I32;
5444         break;
5445     case MO_64:
5446         info = &info_helper_st64_mmu;
5447         data_type = TCG_TYPE_I64;
5448         break;
5449     default:
5450         g_assert_not_reached();
5451     }
5452 
5453     /* Defer env argument. */
5454     next_arg = 1;
5455     nmov = 0;
5456 
5457     /* Handle addr argument. */
5458     loc = &info->in[next_arg];
5459     n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
5460                                ldst->addrlo_reg, ldst->addrhi_reg);
5461     next_arg += n;
5462     nmov += n;
5463 
5464     /* Handle data argument. */
5465     loc = &info->in[next_arg];
5466     n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5467                                ldst->datalo_reg, ldst->datahi_reg);
5468     next_arg += n;
5469     nmov += n;
5470     tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
5471 
5472     tcg_out_helper_load_slots(s, nmov, mov, parm);
5473     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5474 }
5475 
5476 #ifdef CONFIG_PROFILER
5477 
5478 /* avoid copy/paste errors */
5479 #define PROF_ADD(to, from, field)                       \
5480     do {                                                \
5481         (to)->field += qatomic_read(&((from)->field));  \
5482     } while (0)
5483 
5484 #define PROF_MAX(to, from, field)                                       \
5485     do {                                                                \
5486         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
5487         if (val__ > (to)->field) {                                      \
5488             (to)->field = val__;                                        \
5489         }                                                               \
5490     } while (0)
5491 
5492 /* Pass in a zero'ed @prof */
5493 static inline
5494 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
5495 {
5496     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5497     unsigned int i;
5498 
5499     for (i = 0; i < n_ctxs; i++) {
5500         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5501         const TCGProfile *orig = &s->prof;
5502 
5503         if (counters) {
5504             PROF_ADD(prof, orig, cpu_exec_time);
5505             PROF_ADD(prof, orig, tb_count1);
5506             PROF_ADD(prof, orig, tb_count);
5507             PROF_ADD(prof, orig, op_count);
5508             PROF_MAX(prof, orig, op_count_max);
5509             PROF_ADD(prof, orig, temp_count);
5510             PROF_MAX(prof, orig, temp_count_max);
5511             PROF_ADD(prof, orig, del_op_count);
5512             PROF_ADD(prof, orig, code_in_len);
5513             PROF_ADD(prof, orig, code_out_len);
5514             PROF_ADD(prof, orig, search_out_len);
5515             PROF_ADD(prof, orig, interm_time);
5516             PROF_ADD(prof, orig, code_time);
5517             PROF_ADD(prof, orig, la_time);
5518             PROF_ADD(prof, orig, opt_time);
5519             PROF_ADD(prof, orig, restore_count);
5520             PROF_ADD(prof, orig, restore_time);
5521         }
5522         if (table) {
5523             int i;
5524 
5525             for (i = 0; i < NB_OPS; i++) {
5526                 PROF_ADD(prof, orig, table_op_count[i]);
5527             }
5528         }
5529     }
5530 }
5531 
5532 #undef PROF_ADD
5533 #undef PROF_MAX
5534 
5535 static void tcg_profile_snapshot_counters(TCGProfile *prof)
5536 {
5537     tcg_profile_snapshot(prof, true, false);
5538 }
5539 
5540 static void tcg_profile_snapshot_table(TCGProfile *prof)
5541 {
5542     tcg_profile_snapshot(prof, false, true);
5543 }
5544 
5545 void tcg_dump_op_count(GString *buf)
5546 {
5547     TCGProfile prof = {};
5548     int i;
5549 
5550     tcg_profile_snapshot_table(&prof);
5551     for (i = 0; i < NB_OPS; i++) {
5552         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
5553                                prof.table_op_count[i]);
5554     }
5555 }
5556 
5557 int64_t tcg_cpu_exec_time(void)
5558 {
5559     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5560     unsigned int i;
5561     int64_t ret = 0;
5562 
5563     for (i = 0; i < n_ctxs; i++) {
5564         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5565         const TCGProfile *prof = &s->prof;
5566 
5567         ret += qatomic_read(&prof->cpu_exec_time);
5568     }
5569     return ret;
5570 }
5571 #else
5572 void tcg_dump_op_count(GString *buf)
5573 {
5574     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5575 }
5576 
5577 int64_t tcg_cpu_exec_time(void)
5578 {
5579     error_report("%s: TCG profiler not compiled", __func__);
5580     exit(EXIT_FAILURE);
5581 }
5582 #endif
5583 
5584 
5585 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
5586 {
5587 #ifdef CONFIG_PROFILER
5588     TCGProfile *prof = &s->prof;
5589 #endif
5590     int i, num_insns;
5591     TCGOp *op;
5592 
5593 #ifdef CONFIG_PROFILER
5594     {
5595         int n = 0;
5596 
5597         QTAILQ_FOREACH(op, &s->ops, link) {
5598             n++;
5599         }
5600         qatomic_set(&prof->op_count, prof->op_count + n);
5601         if (n > prof->op_count_max) {
5602             qatomic_set(&prof->op_count_max, n);
5603         }
5604 
5605         n = s->nb_temps;
5606         qatomic_set(&prof->temp_count, prof->temp_count + n);
5607         if (n > prof->temp_count_max) {
5608             qatomic_set(&prof->temp_count_max, n);
5609         }
5610     }
5611 #endif
5612 
5613 #ifdef DEBUG_DISAS
5614     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5615                  && qemu_log_in_addr_range(pc_start))) {
5616         FILE *logfile = qemu_log_trylock();
5617         if (logfile) {
5618             fprintf(logfile, "OP:\n");
5619             tcg_dump_ops(s, logfile, false);
5620             fprintf(logfile, "\n");
5621             qemu_log_unlock(logfile);
5622         }
5623     }
5624 #endif
5625 
5626 #ifdef CONFIG_DEBUG_TCG
5627     /* Ensure all labels referenced have been emitted.  */
5628     {
5629         TCGLabel *l;
5630         bool error = false;
5631 
5632         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5633             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5634                 qemu_log_mask(CPU_LOG_TB_OP,
5635                               "$L%d referenced but not present.\n", l->id);
5636                 error = true;
5637             }
5638         }
5639         assert(!error);
5640     }
5641 #endif
5642 
5643 #ifdef CONFIG_PROFILER
5644     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
5645 #endif
5646 
5647 #ifdef USE_TCG_OPTIMIZATIONS
5648     tcg_optimize(s);
5649 #endif
5650 
5651 #ifdef CONFIG_PROFILER
5652     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
5653     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
5654 #endif
5655 
5656     reachable_code_pass(s);
5657     liveness_pass_0(s);
5658     liveness_pass_1(s);
5659 
5660     if (s->nb_indirects > 0) {
5661 #ifdef DEBUG_DISAS
5662         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5663                      && qemu_log_in_addr_range(pc_start))) {
5664             FILE *logfile = qemu_log_trylock();
5665             if (logfile) {
5666                 fprintf(logfile, "OP before indirect lowering:\n");
5667                 tcg_dump_ops(s, logfile, false);
5668                 fprintf(logfile, "\n");
5669                 qemu_log_unlock(logfile);
5670             }
5671         }
5672 #endif
5673         /* Replace indirect temps with direct temps.  */
5674         if (liveness_pass_2(s)) {
5675             /* If changes were made, re-run liveness.  */
5676             liveness_pass_1(s);
5677         }
5678     }
5679 
5680 #ifdef CONFIG_PROFILER
5681     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
5682 #endif
5683 
5684 #ifdef DEBUG_DISAS
5685     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5686                  && qemu_log_in_addr_range(pc_start))) {
5687         FILE *logfile = qemu_log_trylock();
5688         if (logfile) {
5689             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5690             tcg_dump_ops(s, logfile, true);
5691             fprintf(logfile, "\n");
5692             qemu_log_unlock(logfile);
5693         }
5694     }
5695 #endif
5696 
5697     /* Initialize goto_tb jump offsets. */
5698     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5699     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5700     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5701     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5702 
5703     tcg_reg_alloc_start(s);
5704 
5705     /*
5706      * Reset the buffer pointers when restarting after overflow.
5707      * TODO: Move this into translate-all.c with the rest of the
5708      * buffer management.  Having only this done here is confusing.
5709      */
5710     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
5711     s->code_ptr = s->code_buf;
5712 
5713 #ifdef TCG_TARGET_NEED_LDST_LABELS
5714     QSIMPLEQ_INIT(&s->ldst_labels);
5715 #endif
5716 #ifdef TCG_TARGET_NEED_POOL_LABELS
5717     s->pool_labels = NULL;
5718 #endif
5719 
5720     num_insns = -1;
5721     QTAILQ_FOREACH(op, &s->ops, link) {
5722         TCGOpcode opc = op->opc;
5723 
5724 #ifdef CONFIG_PROFILER
5725         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
5726 #endif
5727 
5728         switch (opc) {
5729         case INDEX_op_mov_i32:
5730         case INDEX_op_mov_i64:
5731         case INDEX_op_mov_vec:
5732             tcg_reg_alloc_mov(s, op);
5733             break;
5734         case INDEX_op_dup_vec:
5735             tcg_reg_alloc_dup(s, op);
5736             break;
5737         case INDEX_op_insn_start:
5738             if (num_insns >= 0) {
5739                 size_t off = tcg_current_code_size(s);
5740                 s->gen_insn_end_off[num_insns] = off;
5741                 /* Assert that we do not overflow our stored offset.  */
5742                 assert(s->gen_insn_end_off[num_insns] == off);
5743             }
5744             num_insns++;
5745             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
5746                 target_ulong a;
5747 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
5748                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
5749 #else
5750                 a = op->args[i];
5751 #endif
5752                 s->gen_insn_data[num_insns][i] = a;
5753             }
5754             break;
5755         case INDEX_op_discard:
5756             temp_dead(s, arg_temp(op->args[0]));
5757             break;
5758         case INDEX_op_set_label:
5759             tcg_reg_alloc_bb_end(s, s->reserved_regs);
5760             tcg_out_label(s, arg_label(op->args[0]));
5761             break;
5762         case INDEX_op_call:
5763             tcg_reg_alloc_call(s, op);
5764             break;
5765         case INDEX_op_exit_tb:
5766             tcg_out_exit_tb(s, op->args[0]);
5767             break;
5768         case INDEX_op_goto_tb:
5769             tcg_out_goto_tb(s, op->args[0]);
5770             break;
5771         case INDEX_op_dup2_vec:
5772             if (tcg_reg_alloc_dup2(s, op)) {
5773                 break;
5774             }
5775             /* fall through */
5776         default:
5777             /* Sanity check that we've not introduced any unhandled opcodes. */
5778             tcg_debug_assert(tcg_op_supported(opc));
5779             /* Note: in order to speed up the code, it would be much
5780                faster to have specialized register allocator functions for
5781                some common argument patterns */
5782             tcg_reg_alloc_op(s, op);
5783             break;
5784         }
5785         /* Test for (pending) buffer overflow.  The assumption is that any
5786            one operation beginning below the high water mark cannot overrun
5787            the buffer completely.  Thus we can test for overflow after
5788            generating code without having to check during generation.  */
5789         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
5790             return -1;
5791         }
5792         /* Test for TB overflow, as seen by gen_insn_end_off.  */
5793         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
5794             return -2;
5795         }
5796     }
5797     tcg_debug_assert(num_insns >= 0);
5798     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
5799 
5800     /* Generate TB finalization at the end of block */
5801 #ifdef TCG_TARGET_NEED_LDST_LABELS
5802     i = tcg_out_ldst_finalize(s);
5803     if (i < 0) {
5804         return i;
5805     }
5806 #endif
5807 #ifdef TCG_TARGET_NEED_POOL_LABELS
5808     i = tcg_out_pool_finalize(s);
5809     if (i < 0) {
5810         return i;
5811     }
5812 #endif
5813     if (!tcg_resolve_relocs(s)) {
5814         return -2;
5815     }
5816 
5817 #ifndef CONFIG_TCG_INTERPRETER
5818     /* flush instruction cache */
5819     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
5820                         (uintptr_t)s->code_buf,
5821                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
5822 #endif
5823 
5824     return tcg_current_code_size(s);
5825 }
5826 
5827 #ifdef CONFIG_PROFILER
5828 void tcg_dump_info(GString *buf)
5829 {
5830     TCGProfile prof = {};
5831     const TCGProfile *s;
5832     int64_t tb_count;
5833     int64_t tb_div_count;
5834     int64_t tot;
5835 
5836     tcg_profile_snapshot_counters(&prof);
5837     s = &prof;
5838     tb_count = s->tb_count;
5839     tb_div_count = tb_count ? tb_count : 1;
5840     tot = s->interm_time + s->code_time;
5841 
5842     g_string_append_printf(buf, "JIT cycles          %" PRId64
5843                            " (%0.3f s at 2.4 GHz)\n",
5844                            tot, tot / 2.4e9);
5845     g_string_append_printf(buf, "translated TBs      %" PRId64
5846                            " (aborted=%" PRId64 " %0.1f%%)\n",
5847                            tb_count, s->tb_count1 - tb_count,
5848                            (double)(s->tb_count1 - s->tb_count)
5849                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
5850     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
5851                            (double)s->op_count / tb_div_count, s->op_count_max);
5852     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
5853                            (double)s->del_op_count / tb_div_count);
5854     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
5855                            (double)s->temp_count / tb_div_count,
5856                            s->temp_count_max);
5857     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
5858                            (double)s->code_out_len / tb_div_count);
5859     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
5860                            (double)s->search_out_len / tb_div_count);
5861 
5862     g_string_append_printf(buf, "cycles/op           %0.1f\n",
5863                            s->op_count ? (double)tot / s->op_count : 0);
5864     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
5865                            s->code_in_len ? (double)tot / s->code_in_len : 0);
5866     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
5867                            s->code_out_len ? (double)tot / s->code_out_len : 0);
5868     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
5869                            s->search_out_len ?
5870                            (double)tot / s->search_out_len : 0);
5871     if (tot == 0) {
5872         tot = 1;
5873     }
5874     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
5875                            (double)s->interm_time / tot * 100.0);
5876     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
5877                            (double)s->code_time / tot * 100.0);
5878     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
5879                            (double)s->opt_time / (s->code_time ?
5880                                                   s->code_time : 1)
5881                            * 100.0);
5882     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
5883                            (double)s->la_time / (s->code_time ?
5884                                                  s->code_time : 1) * 100.0);
5885     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
5886                            s->restore_count);
5887     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
5888                            s->restore_count ?
5889                            (double)s->restore_time / s->restore_count : 0);
5890 }
5891 #else
5892 void tcg_dump_info(GString *buf)
5893 {
5894     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5895 }
5896 #endif
5897 
5898 #ifdef ELF_HOST_MACHINE
5899 /* In order to use this feature, the backend needs to do three things:
5900 
5901    (1) Define ELF_HOST_MACHINE to indicate both what value to
5902        put into the ELF image and to indicate support for the feature.
5903 
5904    (2) Define tcg_register_jit.  This should create a buffer containing
5905        the contents of a .debug_frame section that describes the post-
5906        prologue unwind info for the tcg machine.
5907 
5908    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
5909 */
5910 
5911 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
5912 typedef enum {
5913     JIT_NOACTION = 0,
5914     JIT_REGISTER_FN,
5915     JIT_UNREGISTER_FN
5916 } jit_actions_t;
5917 
5918 struct jit_code_entry {
5919     struct jit_code_entry *next_entry;
5920     struct jit_code_entry *prev_entry;
5921     const void *symfile_addr;
5922     uint64_t symfile_size;
5923 };
5924 
5925 struct jit_descriptor {
5926     uint32_t version;
5927     uint32_t action_flag;
5928     struct jit_code_entry *relevant_entry;
5929     struct jit_code_entry *first_entry;
5930 };
5931 
5932 void __jit_debug_register_code(void) __attribute__((noinline));
5933 void __jit_debug_register_code(void)
5934 {
5935     asm("");
5936 }
5937 
5938 /* Must statically initialize the version, because GDB may check
5939    the version before we can set it.  */
5940 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
5941 
5942 /* End GDB interface.  */
5943 
5944 static int find_string(const char *strtab, const char *str)
5945 {
5946     const char *p = strtab + 1;
5947 
5948     while (1) {
5949         if (strcmp(p, str) == 0) {
5950             return p - strtab;
5951         }
5952         p += strlen(p) + 1;
5953     }
5954 }
5955 
5956 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
5957                                  const void *debug_frame,
5958                                  size_t debug_frame_size)
5959 {
5960     struct __attribute__((packed)) DebugInfo {
5961         uint32_t  len;
5962         uint16_t  version;
5963         uint32_t  abbrev;
5964         uint8_t   ptr_size;
5965         uint8_t   cu_die;
5966         uint16_t  cu_lang;
5967         uintptr_t cu_low_pc;
5968         uintptr_t cu_high_pc;
5969         uint8_t   fn_die;
5970         char      fn_name[16];
5971         uintptr_t fn_low_pc;
5972         uintptr_t fn_high_pc;
5973         uint8_t   cu_eoc;
5974     };
5975 
5976     struct ElfImage {
5977         ElfW(Ehdr) ehdr;
5978         ElfW(Phdr) phdr;
5979         ElfW(Shdr) shdr[7];
5980         ElfW(Sym)  sym[2];
5981         struct DebugInfo di;
5982         uint8_t    da[24];
5983         char       str[80];
5984     };
5985 
5986     struct ElfImage *img;
5987 
5988     static const struct ElfImage img_template = {
5989         .ehdr = {
5990             .e_ident[EI_MAG0] = ELFMAG0,
5991             .e_ident[EI_MAG1] = ELFMAG1,
5992             .e_ident[EI_MAG2] = ELFMAG2,
5993             .e_ident[EI_MAG3] = ELFMAG3,
5994             .e_ident[EI_CLASS] = ELF_CLASS,
5995             .e_ident[EI_DATA] = ELF_DATA,
5996             .e_ident[EI_VERSION] = EV_CURRENT,
5997             .e_type = ET_EXEC,
5998             .e_machine = ELF_HOST_MACHINE,
5999             .e_version = EV_CURRENT,
6000             .e_phoff = offsetof(struct ElfImage, phdr),
6001             .e_shoff = offsetof(struct ElfImage, shdr),
6002             .e_ehsize = sizeof(ElfW(Shdr)),
6003             .e_phentsize = sizeof(ElfW(Phdr)),
6004             .e_phnum = 1,
6005             .e_shentsize = sizeof(ElfW(Shdr)),
6006             .e_shnum = ARRAY_SIZE(img->shdr),
6007             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6008 #ifdef ELF_HOST_FLAGS
6009             .e_flags = ELF_HOST_FLAGS,
6010 #endif
6011 #ifdef ELF_OSABI
6012             .e_ident[EI_OSABI] = ELF_OSABI,
6013 #endif
6014         },
6015         .phdr = {
6016             .p_type = PT_LOAD,
6017             .p_flags = PF_X,
6018         },
6019         .shdr = {
6020             [0] = { .sh_type = SHT_NULL },
6021             /* Trick: The contents of code_gen_buffer are not present in
6022                this fake ELF file; that got allocated elsewhere.  Therefore
6023                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6024                will not look for contents.  We can record any address.  */
6025             [1] = { /* .text */
6026                 .sh_type = SHT_NOBITS,
6027                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6028             },
6029             [2] = { /* .debug_info */
6030                 .sh_type = SHT_PROGBITS,
6031                 .sh_offset = offsetof(struct ElfImage, di),
6032                 .sh_size = sizeof(struct DebugInfo),
6033             },
6034             [3] = { /* .debug_abbrev */
6035                 .sh_type = SHT_PROGBITS,
6036                 .sh_offset = offsetof(struct ElfImage, da),
6037                 .sh_size = sizeof(img->da),
6038             },
6039             [4] = { /* .debug_frame */
6040                 .sh_type = SHT_PROGBITS,
6041                 .sh_offset = sizeof(struct ElfImage),
6042             },
6043             [5] = { /* .symtab */
6044                 .sh_type = SHT_SYMTAB,
6045                 .sh_offset = offsetof(struct ElfImage, sym),
6046                 .sh_size = sizeof(img->sym),
6047                 .sh_info = 1,
6048                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6049                 .sh_entsize = sizeof(ElfW(Sym)),
6050             },
6051             [6] = { /* .strtab */
6052                 .sh_type = SHT_STRTAB,
6053                 .sh_offset = offsetof(struct ElfImage, str),
6054                 .sh_size = sizeof(img->str),
6055             }
6056         },
6057         .sym = {
6058             [1] = { /* code_gen_buffer */
6059                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6060                 .st_shndx = 1,
6061             }
6062         },
6063         .di = {
6064             .len = sizeof(struct DebugInfo) - 4,
6065             .version = 2,
6066             .ptr_size = sizeof(void *),
6067             .cu_die = 1,
6068             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6069             .fn_die = 2,
6070             .fn_name = "code_gen_buffer"
6071         },
6072         .da = {
6073             1,          /* abbrev number (the cu) */
6074             0x11, 1,    /* DW_TAG_compile_unit, has children */
6075             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6076             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6077             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6078             0, 0,       /* end of abbrev */
6079             2,          /* abbrev number (the fn) */
6080             0x2e, 0,    /* DW_TAG_subprogram, no children */
6081             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6082             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6083             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6084             0, 0,       /* end of abbrev */
6085             0           /* no more abbrev */
6086         },
6087         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6088                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6089     };
6090 
6091     /* We only need a single jit entry; statically allocate it.  */
6092     static struct jit_code_entry one_entry;
6093 
6094     uintptr_t buf = (uintptr_t)buf_ptr;
6095     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6096     DebugFrameHeader *dfh;
6097 
6098     img = g_malloc(img_size);
6099     *img = img_template;
6100 
6101     img->phdr.p_vaddr = buf;
6102     img->phdr.p_paddr = buf;
6103     img->phdr.p_memsz = buf_size;
6104 
6105     img->shdr[1].sh_name = find_string(img->str, ".text");
6106     img->shdr[1].sh_addr = buf;
6107     img->shdr[1].sh_size = buf_size;
6108 
6109     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6110     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6111 
6112     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6113     img->shdr[4].sh_size = debug_frame_size;
6114 
6115     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6116     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6117 
6118     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6119     img->sym[1].st_value = buf;
6120     img->sym[1].st_size = buf_size;
6121 
6122     img->di.cu_low_pc = buf;
6123     img->di.cu_high_pc = buf + buf_size;
6124     img->di.fn_low_pc = buf;
6125     img->di.fn_high_pc = buf + buf_size;
6126 
6127     dfh = (DebugFrameHeader *)(img + 1);
6128     memcpy(dfh, debug_frame, debug_frame_size);
6129     dfh->fde.func_start = buf;
6130     dfh->fde.func_len = buf_size;
6131 
6132 #ifdef DEBUG_JIT
6133     /* Enable this block to be able to debug the ELF image file creation.
6134        One can use readelf, objdump, or other inspection utilities.  */
6135     {
6136         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6137         FILE *f = fopen(jit, "w+b");
6138         if (f) {
6139             if (fwrite(img, img_size, 1, f) != img_size) {
6140                 /* Avoid stupid unused return value warning for fwrite.  */
6141             }
6142             fclose(f);
6143         }
6144     }
6145 #endif
6146 
6147     one_entry.symfile_addr = img;
6148     one_entry.symfile_size = img_size;
6149 
6150     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6151     __jit_debug_descriptor.relevant_entry = &one_entry;
6152     __jit_debug_descriptor.first_entry = &one_entry;
6153     __jit_debug_register_code();
6154 }
6155 #else
6156 /* No support for the feature.  Provide the entry point expected by exec.c,
6157    and implement the internal function we declared earlier.  */
6158 
6159 static void tcg_register_jit_int(const void *buf, size_t size,
6160                                  const void *debug_frame,
6161                                  size_t debug_frame_size)
6162 {
6163 }
6164 
6165 void tcg_register_jit(const void *buf, size_t buf_size)
6166 {
6167 }
6168 #endif /* ELF_HOST_MACHINE */
6169 
6170 #if !TCG_TARGET_MAYBE_vec
6171 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6172 {
6173     g_assert_not_reached();
6174 }
6175 #endif
6176