xref: /openbmc/qemu/tcg/tcg.c (revision c0177f91)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/cacheflush.h"
38 #include "qemu/cacheinfo.h"
39 #include "qemu/timer.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg/tcg-temp-internal.h"
64 #include "tcg-internal.h"
65 #include "accel/tcg/perf.h"
66 
67 /* Forward declarations for functions declared in tcg-target.c.inc and
68    used here. */
69 static void tcg_target_init(TCGContext *s);
70 static void tcg_target_qemu_prologue(TCGContext *s);
71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
72                         intptr_t value, intptr_t addend);
73 
74 /* The CIE and FDE header definitions will be common to all hosts.  */
75 typedef struct {
76     uint32_t len __attribute__((aligned((sizeof(void *)))));
77     uint32_t id;
78     uint8_t version;
79     char augmentation[1];
80     uint8_t code_align;
81     uint8_t data_align;
82     uint8_t return_column;
83 } DebugFrameCIE;
84 
85 typedef struct QEMU_PACKED {
86     uint32_t len __attribute__((aligned((sizeof(void *)))));
87     uint32_t cie_offset;
88     uintptr_t func_start;
89     uintptr_t func_len;
90 } DebugFrameFDEHeader;
91 
92 typedef struct QEMU_PACKED {
93     DebugFrameCIE cie;
94     DebugFrameFDEHeader fde;
95 } DebugFrameHeader;
96 
97 static void tcg_register_jit_int(const void *buf, size_t size,
98                                  const void *debug_frame,
99                                  size_t debug_frame_size)
100     __attribute__((unused));
101 
102 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
104                        intptr_t arg2);
105 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106 static void tcg_out_movi(TCGContext *s, TCGType type,
107                          TCGReg ret, tcg_target_long arg);
108 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
109 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
110 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
111 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
112 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
113 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
114 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
115 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
116 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
117 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
118 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
119 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
120 static void tcg_out_goto_tb(TCGContext *s, int which);
121 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
122                        const TCGArg args[TCG_MAX_OP_ARGS],
123                        const int const_args[TCG_MAX_OP_ARGS]);
124 #if TCG_TARGET_MAYBE_vec
125 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                             TCGReg dst, TCGReg src);
127 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
128                              TCGReg dst, TCGReg base, intptr_t offset);
129 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
130                              TCGReg dst, int64_t arg);
131 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
132                            unsigned vecl, unsigned vece,
133                            const TCGArg args[TCG_MAX_OP_ARGS],
134                            const int const_args[TCG_MAX_OP_ARGS]);
135 #else
136 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
137                                    TCGReg dst, TCGReg src)
138 {
139     g_assert_not_reached();
140 }
141 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
142                                     TCGReg dst, TCGReg base, intptr_t offset)
143 {
144     g_assert_not_reached();
145 }
146 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
147                                     TCGReg dst, int64_t arg)
148 {
149     g_assert_not_reached();
150 }
151 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
152                                   unsigned vecl, unsigned vece,
153                                   const TCGArg args[TCG_MAX_OP_ARGS],
154                                   const int const_args[TCG_MAX_OP_ARGS])
155 {
156     g_assert_not_reached();
157 }
158 #endif
159 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
160                        intptr_t arg2);
161 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
162                         TCGReg base, intptr_t ofs);
163 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
164                          const TCGHelperInfo *info);
165 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
166 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
167 #ifdef TCG_TARGET_NEED_LDST_LABELS
168 static int tcg_out_ldst_finalize(TCGContext *s);
169 #endif
170 
171 TCGContext tcg_init_ctx;
172 __thread TCGContext *tcg_ctx;
173 
174 TCGContext **tcg_ctxs;
175 unsigned int tcg_cur_ctxs;
176 unsigned int tcg_max_ctxs;
177 TCGv_env cpu_env = 0;
178 const void *tcg_code_gen_epilogue;
179 uintptr_t tcg_splitwx_diff;
180 
181 #ifndef CONFIG_TCG_INTERPRETER
182 tcg_prologue_fn *tcg_qemu_tb_exec;
183 #endif
184 
185 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
186 static TCGRegSet tcg_target_call_clobber_regs;
187 
188 #if TCG_TARGET_INSN_UNIT_SIZE == 1
189 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
190 {
191     *s->code_ptr++ = v;
192 }
193 
194 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
195                                                       uint8_t v)
196 {
197     *p = v;
198 }
199 #endif
200 
201 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
202 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
203 {
204     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
205         *s->code_ptr++ = v;
206     } else {
207         tcg_insn_unit *p = s->code_ptr;
208         memcpy(p, &v, sizeof(v));
209         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
210     }
211 }
212 
213 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
214                                                        uint16_t v)
215 {
216     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
217         *p = v;
218     } else {
219         memcpy(p, &v, sizeof(v));
220     }
221 }
222 #endif
223 
224 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
225 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
226 {
227     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
228         *s->code_ptr++ = v;
229     } else {
230         tcg_insn_unit *p = s->code_ptr;
231         memcpy(p, &v, sizeof(v));
232         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
233     }
234 }
235 
236 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
237                                                        uint32_t v)
238 {
239     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
240         *p = v;
241     } else {
242         memcpy(p, &v, sizeof(v));
243     }
244 }
245 #endif
246 
247 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
248 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
249 {
250     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
251         *s->code_ptr++ = v;
252     } else {
253         tcg_insn_unit *p = s->code_ptr;
254         memcpy(p, &v, sizeof(v));
255         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
256     }
257 }
258 
259 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
260                                                        uint64_t v)
261 {
262     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
263         *p = v;
264     } else {
265         memcpy(p, &v, sizeof(v));
266     }
267 }
268 #endif
269 
270 /* label relocation processing */
271 
272 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
273                           TCGLabel *l, intptr_t addend)
274 {
275     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
276 
277     r->type = type;
278     r->ptr = code_ptr;
279     r->addend = addend;
280     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
281 }
282 
283 static void tcg_out_label(TCGContext *s, TCGLabel *l)
284 {
285     tcg_debug_assert(!l->has_value);
286     l->has_value = 1;
287     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
288 }
289 
290 TCGLabel *gen_new_label(void)
291 {
292     TCGContext *s = tcg_ctx;
293     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
294 
295     memset(l, 0, sizeof(TCGLabel));
296     l->id = s->nb_labels++;
297     QSIMPLEQ_INIT(&l->branches);
298     QSIMPLEQ_INIT(&l->relocs);
299 
300     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
301 
302     return l;
303 }
304 
305 static bool tcg_resolve_relocs(TCGContext *s)
306 {
307     TCGLabel *l;
308 
309     QSIMPLEQ_FOREACH(l, &s->labels, next) {
310         TCGRelocation *r;
311         uintptr_t value = l->u.value;
312 
313         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
314             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
315                 return false;
316             }
317         }
318     }
319     return true;
320 }
321 
322 static void set_jmp_reset_offset(TCGContext *s, int which)
323 {
324     /*
325      * We will check for overflow at the end of the opcode loop in
326      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
327      */
328     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
329 }
330 
331 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
332 {
333     /*
334      * We will check for overflow at the end of the opcode loop in
335      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
336      */
337     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
338 }
339 
340 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
341 {
342     /*
343      * Return the read-execute version of the pointer, for the benefit
344      * of any pc-relative addressing mode.
345      */
346     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
347 }
348 
349 /* Signal overflow, starting over with fewer guest insns. */
350 static G_NORETURN
351 void tcg_raise_tb_overflow(TCGContext *s)
352 {
353     siglongjmp(s->jmp_trans, -2);
354 }
355 
356 typedef struct TCGMovExtend {
357     TCGReg dst;
358     TCGReg src;
359     TCGType dst_type;
360     TCGType src_type;
361     MemOp src_ext;
362 } TCGMovExtend;
363 
364 /**
365  * tcg_out_movext -- move and extend
366  * @s: tcg context
367  * @dst_type: integral type for destination
368  * @dst: destination register
369  * @src_type: integral type for source
370  * @src_ext: extension to apply to source
371  * @src: source register
372  *
373  * Move or extend @src into @dst, depending on @src_ext and the types.
374  */
375 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
376                            TCGType src_type, MemOp src_ext, TCGReg src)
377 {
378     switch (src_ext) {
379     case MO_UB:
380         tcg_out_ext8u(s, dst, src);
381         break;
382     case MO_SB:
383         tcg_out_ext8s(s, dst_type, dst, src);
384         break;
385     case MO_UW:
386         tcg_out_ext16u(s, dst, src);
387         break;
388     case MO_SW:
389         tcg_out_ext16s(s, dst_type, dst, src);
390         break;
391     case MO_UL:
392     case MO_SL:
393         if (dst_type == TCG_TYPE_I32) {
394             if (src_type == TCG_TYPE_I32) {
395                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
396             } else {
397                 tcg_out_extrl_i64_i32(s, dst, src);
398             }
399         } else if (src_type == TCG_TYPE_I32) {
400             if (src_ext & MO_SIGN) {
401                 tcg_out_exts_i32_i64(s, dst, src);
402             } else {
403                 tcg_out_extu_i32_i64(s, dst, src);
404             }
405         } else {
406             if (src_ext & MO_SIGN) {
407                 tcg_out_ext32s(s, dst, src);
408             } else {
409                 tcg_out_ext32u(s, dst, src);
410             }
411         }
412         break;
413     case MO_UQ:
414         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
415         if (dst_type == TCG_TYPE_I32) {
416             tcg_out_extrl_i64_i32(s, dst, src);
417         } else {
418             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
419         }
420         break;
421     default:
422         g_assert_not_reached();
423     }
424 }
425 
426 /* Minor variations on a theme, using a structure. */
427 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
428                                     TCGReg src)
429 {
430     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
431 }
432 
433 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
434 {
435     tcg_out_movext1_new_src(s, i, i->src);
436 }
437 
438 /**
439  * tcg_out_movext2 -- move and extend two pair
440  * @s: tcg context
441  * @i1: first move description
442  * @i2: second move description
443  * @scratch: temporary register, or -1 for none
444  *
445  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
446  * between the sources and destinations.
447  */
448 
449 static void __attribute__((unused))
450 tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
451                 const TCGMovExtend *i2, int scratch)
452 {
453     TCGReg src1 = i1->src;
454     TCGReg src2 = i2->src;
455 
456     if (i1->dst != src2) {
457         tcg_out_movext1(s, i1);
458         tcg_out_movext1(s, i2);
459         return;
460     }
461     if (i2->dst == src1) {
462         TCGType src1_type = i1->src_type;
463         TCGType src2_type = i2->src_type;
464 
465         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
466             /* The data is now in the correct registers, now extend. */
467             src1 = i2->src;
468             src2 = i1->src;
469         } else {
470             tcg_debug_assert(scratch >= 0);
471             tcg_out_mov(s, src1_type, scratch, src1);
472             src1 = scratch;
473         }
474     }
475     tcg_out_movext1_new_src(s, i2, src2);
476     tcg_out_movext1_new_src(s, i1, src1);
477 }
478 
479 #define C_PFX1(P, A)                    P##A
480 #define C_PFX2(P, A, B)                 P##A##_##B
481 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
482 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
483 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
484 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
485 
486 /* Define an enumeration for the various combinations. */
487 
488 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
489 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
490 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
491 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
492 
493 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
494 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
495 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
496 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
497 
498 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
499 
500 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
501 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
502 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
503 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
504 
505 typedef enum {
506 #include "tcg-target-con-set.h"
507 } TCGConstraintSetIndex;
508 
509 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
510 
511 #undef C_O0_I1
512 #undef C_O0_I2
513 #undef C_O0_I3
514 #undef C_O0_I4
515 #undef C_O1_I1
516 #undef C_O1_I2
517 #undef C_O1_I3
518 #undef C_O1_I4
519 #undef C_N1_I2
520 #undef C_O2_I1
521 #undef C_O2_I2
522 #undef C_O2_I3
523 #undef C_O2_I4
524 
525 /* Put all of the constraint sets into an array, indexed by the enum. */
526 
527 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
528 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
529 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
530 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
531 
532 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
533 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
534 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
535 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
536 
537 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
538 
539 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
540 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
541 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
542 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
543 
544 static const TCGTargetOpDef constraint_sets[] = {
545 #include "tcg-target-con-set.h"
546 };
547 
548 
549 #undef C_O0_I1
550 #undef C_O0_I2
551 #undef C_O0_I3
552 #undef C_O0_I4
553 #undef C_O1_I1
554 #undef C_O1_I2
555 #undef C_O1_I3
556 #undef C_O1_I4
557 #undef C_N1_I2
558 #undef C_O2_I1
559 #undef C_O2_I2
560 #undef C_O2_I3
561 #undef C_O2_I4
562 
563 /* Expand the enumerator to be returned from tcg_target_op_def(). */
564 
565 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
566 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
567 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
568 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
569 
570 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
571 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
572 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
573 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
574 
575 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
576 
577 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
578 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
579 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
580 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
581 
582 #include "tcg-target.c.inc"
583 
584 static void alloc_tcg_plugin_context(TCGContext *s)
585 {
586 #ifdef CONFIG_PLUGIN
587     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
588     s->plugin_tb->insns =
589         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
590 #endif
591 }
592 
593 /*
594  * All TCG threads except the parent (i.e. the one that called tcg_context_init
595  * and registered the target's TCG globals) must register with this function
596  * before initiating translation.
597  *
598  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
599  * of tcg_region_init() for the reasoning behind this.
600  *
601  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
602  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
603  * is not used anymore for translation once this function is called.
604  *
605  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
606  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
607  */
608 #ifdef CONFIG_USER_ONLY
609 void tcg_register_thread(void)
610 {
611     tcg_ctx = &tcg_init_ctx;
612 }
613 #else
614 void tcg_register_thread(void)
615 {
616     TCGContext *s = g_malloc(sizeof(*s));
617     unsigned int i, n;
618 
619     *s = tcg_init_ctx;
620 
621     /* Relink mem_base.  */
622     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
623         if (tcg_init_ctx.temps[i].mem_base) {
624             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
625             tcg_debug_assert(b >= 0 && b < n);
626             s->temps[i].mem_base = &s->temps[b];
627         }
628     }
629 
630     /* Claim an entry in tcg_ctxs */
631     n = qatomic_fetch_inc(&tcg_cur_ctxs);
632     g_assert(n < tcg_max_ctxs);
633     qatomic_set(&tcg_ctxs[n], s);
634 
635     if (n > 0) {
636         alloc_tcg_plugin_context(s);
637         tcg_region_initial_alloc(s);
638     }
639 
640     tcg_ctx = s;
641 }
642 #endif /* !CONFIG_USER_ONLY */
643 
644 /* pool based memory allocation */
645 void *tcg_malloc_internal(TCGContext *s, int size)
646 {
647     TCGPool *p;
648     int pool_size;
649 
650     if (size > TCG_POOL_CHUNK_SIZE) {
651         /* big malloc: insert a new pool (XXX: could optimize) */
652         p = g_malloc(sizeof(TCGPool) + size);
653         p->size = size;
654         p->next = s->pool_first_large;
655         s->pool_first_large = p;
656         return p->data;
657     } else {
658         p = s->pool_current;
659         if (!p) {
660             p = s->pool_first;
661             if (!p)
662                 goto new_pool;
663         } else {
664             if (!p->next) {
665             new_pool:
666                 pool_size = TCG_POOL_CHUNK_SIZE;
667                 p = g_malloc(sizeof(TCGPool) + pool_size);
668                 p->size = pool_size;
669                 p->next = NULL;
670                 if (s->pool_current) {
671                     s->pool_current->next = p;
672                 } else {
673                     s->pool_first = p;
674                 }
675             } else {
676                 p = p->next;
677             }
678         }
679     }
680     s->pool_current = p;
681     s->pool_cur = p->data + size;
682     s->pool_end = p->data + p->size;
683     return p->data;
684 }
685 
686 void tcg_pool_reset(TCGContext *s)
687 {
688     TCGPool *p, *t;
689     for (p = s->pool_first_large; p; p = t) {
690         t = p->next;
691         g_free(p);
692     }
693     s->pool_first_large = NULL;
694     s->pool_cur = s->pool_end = NULL;
695     s->pool_current = NULL;
696 }
697 
698 #include "exec/helper-proto.h"
699 
700 static TCGHelperInfo all_helpers[] = {
701 #include "exec/helper-tcg.h"
702 };
703 static GHashTable *helper_table;
704 
705 #ifdef CONFIG_TCG_INTERPRETER
706 static ffi_type *typecode_to_ffi(int argmask)
707 {
708     /*
709      * libffi does not support __int128_t, so we have forced Int128
710      * to use the structure definition instead of the builtin type.
711      */
712     static ffi_type *ffi_type_i128_elements[3] = {
713         &ffi_type_uint64,
714         &ffi_type_uint64,
715         NULL
716     };
717     static ffi_type ffi_type_i128 = {
718         .size = 16,
719         .alignment = __alignof__(Int128),
720         .type = FFI_TYPE_STRUCT,
721         .elements = ffi_type_i128_elements,
722     };
723 
724     switch (argmask) {
725     case dh_typecode_void:
726         return &ffi_type_void;
727     case dh_typecode_i32:
728         return &ffi_type_uint32;
729     case dh_typecode_s32:
730         return &ffi_type_sint32;
731     case dh_typecode_i64:
732         return &ffi_type_uint64;
733     case dh_typecode_s64:
734         return &ffi_type_sint64;
735     case dh_typecode_ptr:
736         return &ffi_type_pointer;
737     case dh_typecode_i128:
738         return &ffi_type_i128;
739     }
740     g_assert_not_reached();
741 }
742 
743 static void init_ffi_layouts(void)
744 {
745     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
746     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
747 
748     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
749         TCGHelperInfo *info = &all_helpers[i];
750         unsigned typemask = info->typemask;
751         gpointer hash = (gpointer)(uintptr_t)typemask;
752         struct {
753             ffi_cif cif;
754             ffi_type *args[];
755         } *ca;
756         ffi_status status;
757         int nargs;
758         ffi_cif *cif;
759 
760         cif = g_hash_table_lookup(ffi_table, hash);
761         if (cif) {
762             info->cif = cif;
763             continue;
764         }
765 
766         /* Ignoring the return type, find the last non-zero field. */
767         nargs = 32 - clz32(typemask >> 3);
768         nargs = DIV_ROUND_UP(nargs, 3);
769         assert(nargs <= MAX_CALL_IARGS);
770 
771         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
772         ca->cif.rtype = typecode_to_ffi(typemask & 7);
773         ca->cif.nargs = nargs;
774 
775         if (nargs != 0) {
776             ca->cif.arg_types = ca->args;
777             for (int j = 0; j < nargs; ++j) {
778                 int typecode = extract32(typemask, (j + 1) * 3, 3);
779                 ca->args[j] = typecode_to_ffi(typecode);
780             }
781         }
782 
783         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
784                               ca->cif.rtype, ca->cif.arg_types);
785         assert(status == FFI_OK);
786 
787         cif = &ca->cif;
788         info->cif = cif;
789         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
790     }
791 
792     g_hash_table_destroy(ffi_table);
793 }
794 #endif /* CONFIG_TCG_INTERPRETER */
795 
796 typedef struct TCGCumulativeArgs {
797     int arg_idx;                /* tcg_gen_callN args[] */
798     int info_in_idx;            /* TCGHelperInfo in[] */
799     int arg_slot;               /* regs+stack slot */
800     int ref_slot;               /* stack slots for references */
801 } TCGCumulativeArgs;
802 
803 static void layout_arg_even(TCGCumulativeArgs *cum)
804 {
805     cum->arg_slot += cum->arg_slot & 1;
806 }
807 
808 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
809                          TCGCallArgumentKind kind)
810 {
811     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
812 
813     *loc = (TCGCallArgumentLoc){
814         .kind = kind,
815         .arg_idx = cum->arg_idx,
816         .arg_slot = cum->arg_slot,
817     };
818     cum->info_in_idx++;
819     cum->arg_slot++;
820 }
821 
822 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
823                                 TCGHelperInfo *info, int n)
824 {
825     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
826 
827     for (int i = 0; i < n; ++i) {
828         /* Layout all using the same arg_idx, adjusting the subindex. */
829         loc[i] = (TCGCallArgumentLoc){
830             .kind = TCG_CALL_ARG_NORMAL,
831             .arg_idx = cum->arg_idx,
832             .tmp_subindex = i,
833             .arg_slot = cum->arg_slot + i,
834         };
835     }
836     cum->info_in_idx += n;
837     cum->arg_slot += n;
838 }
839 
840 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
841 {
842     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
843     int n = 128 / TCG_TARGET_REG_BITS;
844 
845     /* The first subindex carries the pointer. */
846     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
847 
848     /*
849      * The callee is allowed to clobber memory associated with
850      * structure pass by-reference.  Therefore we must make copies.
851      * Allocate space from "ref_slot", which will be adjusted to
852      * follow the parameters on the stack.
853      */
854     loc[0].ref_slot = cum->ref_slot;
855 
856     /*
857      * Subsequent words also go into the reference slot, but
858      * do not accumulate into the regular arguments.
859      */
860     for (int i = 1; i < n; ++i) {
861         loc[i] = (TCGCallArgumentLoc){
862             .kind = TCG_CALL_ARG_BY_REF_N,
863             .arg_idx = cum->arg_idx,
864             .tmp_subindex = i,
865             .ref_slot = cum->ref_slot + i,
866         };
867     }
868     cum->info_in_idx += n;
869     cum->ref_slot += n;
870 }
871 
872 static void init_call_layout(TCGHelperInfo *info)
873 {
874     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
875     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
876     unsigned typemask = info->typemask;
877     unsigned typecode;
878     TCGCumulativeArgs cum = { };
879 
880     /*
881      * Parse and place any function return value.
882      */
883     typecode = typemask & 7;
884     switch (typecode) {
885     case dh_typecode_void:
886         info->nr_out = 0;
887         break;
888     case dh_typecode_i32:
889     case dh_typecode_s32:
890     case dh_typecode_ptr:
891         info->nr_out = 1;
892         info->out_kind = TCG_CALL_RET_NORMAL;
893         break;
894     case dh_typecode_i64:
895     case dh_typecode_s64:
896         info->nr_out = 64 / TCG_TARGET_REG_BITS;
897         info->out_kind = TCG_CALL_RET_NORMAL;
898         /* Query the last register now to trigger any assert early. */
899         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
900         break;
901     case dh_typecode_i128:
902         info->nr_out = 128 / TCG_TARGET_REG_BITS;
903         info->out_kind = TCG_TARGET_CALL_RET_I128;
904         switch (TCG_TARGET_CALL_RET_I128) {
905         case TCG_CALL_RET_NORMAL:
906             /* Query the last register now to trigger any assert early. */
907             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
908             break;
909         case TCG_CALL_RET_BY_VEC:
910             /* Query the single register now to trigger any assert early. */
911             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
912             break;
913         case TCG_CALL_RET_BY_REF:
914             /*
915              * Allocate the first argument to the output.
916              * We don't need to store this anywhere, just make it
917              * unavailable for use in the input loop below.
918              */
919             cum.arg_slot = 1;
920             break;
921         default:
922             qemu_build_not_reached();
923         }
924         break;
925     default:
926         g_assert_not_reached();
927     }
928 
929     /*
930      * Parse and place function arguments.
931      */
932     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
933         TCGCallArgumentKind kind;
934         TCGType type;
935 
936         typecode = typemask & 7;
937         switch (typecode) {
938         case dh_typecode_i32:
939         case dh_typecode_s32:
940             type = TCG_TYPE_I32;
941             break;
942         case dh_typecode_i64:
943         case dh_typecode_s64:
944             type = TCG_TYPE_I64;
945             break;
946         case dh_typecode_ptr:
947             type = TCG_TYPE_PTR;
948             break;
949         case dh_typecode_i128:
950             type = TCG_TYPE_I128;
951             break;
952         default:
953             g_assert_not_reached();
954         }
955 
956         switch (type) {
957         case TCG_TYPE_I32:
958             switch (TCG_TARGET_CALL_ARG_I32) {
959             case TCG_CALL_ARG_EVEN:
960                 layout_arg_even(&cum);
961                 /* fall through */
962             case TCG_CALL_ARG_NORMAL:
963                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
964                 break;
965             case TCG_CALL_ARG_EXTEND:
966                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
967                 layout_arg_1(&cum, info, kind);
968                 break;
969             default:
970                 qemu_build_not_reached();
971             }
972             break;
973 
974         case TCG_TYPE_I64:
975             switch (TCG_TARGET_CALL_ARG_I64) {
976             case TCG_CALL_ARG_EVEN:
977                 layout_arg_even(&cum);
978                 /* fall through */
979             case TCG_CALL_ARG_NORMAL:
980                 if (TCG_TARGET_REG_BITS == 32) {
981                     layout_arg_normal_n(&cum, info, 2);
982                 } else {
983                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
984                 }
985                 break;
986             default:
987                 qemu_build_not_reached();
988             }
989             break;
990 
991         case TCG_TYPE_I128:
992             switch (TCG_TARGET_CALL_ARG_I128) {
993             case TCG_CALL_ARG_EVEN:
994                 layout_arg_even(&cum);
995                 /* fall through */
996             case TCG_CALL_ARG_NORMAL:
997                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
998                 break;
999             case TCG_CALL_ARG_BY_REF:
1000                 layout_arg_by_ref(&cum, info);
1001                 break;
1002             default:
1003                 qemu_build_not_reached();
1004             }
1005             break;
1006 
1007         default:
1008             g_assert_not_reached();
1009         }
1010     }
1011     info->nr_in = cum.info_in_idx;
1012 
1013     /* Validate that we didn't overrun the input array. */
1014     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1015     /* Validate the backend has enough argument space. */
1016     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1017 
1018     /*
1019      * Relocate the "ref_slot" area to the end of the parameters.
1020      * Minimizing this stack offset helps code size for x86,
1021      * which has a signed 8-bit offset encoding.
1022      */
1023     if (cum.ref_slot != 0) {
1024         int ref_base = 0;
1025 
1026         if (cum.arg_slot > max_reg_slots) {
1027             int align = __alignof(Int128) / sizeof(tcg_target_long);
1028 
1029             ref_base = cum.arg_slot - max_reg_slots;
1030             if (align > 1) {
1031                 ref_base = ROUND_UP(ref_base, align);
1032             }
1033         }
1034         assert(ref_base + cum.ref_slot <= max_stk_slots);
1035 
1036         if (ref_base != 0) {
1037             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1038                 TCGCallArgumentLoc *loc = &info->in[i];
1039                 switch (loc->kind) {
1040                 case TCG_CALL_ARG_BY_REF:
1041                 case TCG_CALL_ARG_BY_REF_N:
1042                     loc->ref_slot += ref_base;
1043                     break;
1044                 default:
1045                     break;
1046                 }
1047             }
1048         }
1049     }
1050 }
1051 
1052 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1053 static void process_op_defs(TCGContext *s);
1054 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1055                                             TCGReg reg, const char *name);
1056 
1057 static void tcg_context_init(unsigned max_cpus)
1058 {
1059     TCGContext *s = &tcg_init_ctx;
1060     int op, total_args, n, i;
1061     TCGOpDef *def;
1062     TCGArgConstraint *args_ct;
1063     TCGTemp *ts;
1064 
1065     memset(s, 0, sizeof(*s));
1066     s->nb_globals = 0;
1067 
1068     /* Count total number of arguments and allocate the corresponding
1069        space */
1070     total_args = 0;
1071     for(op = 0; op < NB_OPS; op++) {
1072         def = &tcg_op_defs[op];
1073         n = def->nb_iargs + def->nb_oargs;
1074         total_args += n;
1075     }
1076 
1077     args_ct = g_new0(TCGArgConstraint, total_args);
1078 
1079     for(op = 0; op < NB_OPS; op++) {
1080         def = &tcg_op_defs[op];
1081         def->args_ct = args_ct;
1082         n = def->nb_iargs + def->nb_oargs;
1083         args_ct += n;
1084     }
1085 
1086     /* Register helpers.  */
1087     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1088     helper_table = g_hash_table_new(NULL, NULL);
1089 
1090     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1091         init_call_layout(&all_helpers[i]);
1092         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1093                             (gpointer)&all_helpers[i]);
1094     }
1095 
1096 #ifdef CONFIG_TCG_INTERPRETER
1097     init_ffi_layouts();
1098 #endif
1099 
1100     tcg_target_init(s);
1101     process_op_defs(s);
1102 
1103     /* Reverse the order of the saved registers, assuming they're all at
1104        the start of tcg_target_reg_alloc_order.  */
1105     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1106         int r = tcg_target_reg_alloc_order[n];
1107         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1108             break;
1109         }
1110     }
1111     for (i = 0; i < n; ++i) {
1112         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1113     }
1114     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1115         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1116     }
1117 
1118     alloc_tcg_plugin_context(s);
1119 
1120     tcg_ctx = s;
1121     /*
1122      * In user-mode we simply share the init context among threads, since we
1123      * use a single region. See the documentation tcg_region_init() for the
1124      * reasoning behind this.
1125      * In softmmu we will have at most max_cpus TCG threads.
1126      */
1127 #ifdef CONFIG_USER_ONLY
1128     tcg_ctxs = &tcg_ctx;
1129     tcg_cur_ctxs = 1;
1130     tcg_max_ctxs = 1;
1131 #else
1132     tcg_max_ctxs = max_cpus;
1133     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1134 #endif
1135 
1136     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1137     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1138     cpu_env = temp_tcgv_ptr(ts);
1139 }
1140 
1141 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1142 {
1143     tcg_context_init(max_cpus);
1144     tcg_region_init(tb_size, splitwx, max_cpus);
1145 }
1146 
1147 /*
1148  * Allocate TBs right before their corresponding translated code, making
1149  * sure that TBs and code are on different cache lines.
1150  */
1151 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1152 {
1153     uintptr_t align = qemu_icache_linesize;
1154     TranslationBlock *tb;
1155     void *next;
1156 
1157  retry:
1158     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1159     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1160 
1161     if (unlikely(next > s->code_gen_highwater)) {
1162         if (tcg_region_alloc(s)) {
1163             return NULL;
1164         }
1165         goto retry;
1166     }
1167     qatomic_set(&s->code_gen_ptr, next);
1168     s->data_gen_ptr = NULL;
1169     return tb;
1170 }
1171 
1172 void tcg_prologue_init(TCGContext *s)
1173 {
1174     size_t prologue_size;
1175 
1176     s->code_ptr = s->code_gen_ptr;
1177     s->code_buf = s->code_gen_ptr;
1178     s->data_gen_ptr = NULL;
1179 
1180 #ifndef CONFIG_TCG_INTERPRETER
1181     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1182 #endif
1183 
1184 #ifdef TCG_TARGET_NEED_POOL_LABELS
1185     s->pool_labels = NULL;
1186 #endif
1187 
1188     qemu_thread_jit_write();
1189     /* Generate the prologue.  */
1190     tcg_target_qemu_prologue(s);
1191 
1192 #ifdef TCG_TARGET_NEED_POOL_LABELS
1193     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1194     {
1195         int result = tcg_out_pool_finalize(s);
1196         tcg_debug_assert(result == 0);
1197     }
1198 #endif
1199 
1200     prologue_size = tcg_current_code_size(s);
1201     perf_report_prologue(s->code_gen_ptr, prologue_size);
1202 
1203 #ifndef CONFIG_TCG_INTERPRETER
1204     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1205                         (uintptr_t)s->code_buf, prologue_size);
1206 #endif
1207 
1208 #ifdef DEBUG_DISAS
1209     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1210         FILE *logfile = qemu_log_trylock();
1211         if (logfile) {
1212             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1213             if (s->data_gen_ptr) {
1214                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1215                 size_t data_size = prologue_size - code_size;
1216                 size_t i;
1217 
1218                 disas(logfile, s->code_gen_ptr, code_size);
1219 
1220                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1221                     if (sizeof(tcg_target_ulong) == 8) {
1222                         fprintf(logfile,
1223                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1224                                 (uintptr_t)s->data_gen_ptr + i,
1225                                 *(uint64_t *)(s->data_gen_ptr + i));
1226                     } else {
1227                         fprintf(logfile,
1228                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1229                                 (uintptr_t)s->data_gen_ptr + i,
1230                                 *(uint32_t *)(s->data_gen_ptr + i));
1231                     }
1232                 }
1233             } else {
1234                 disas(logfile, s->code_gen_ptr, prologue_size);
1235             }
1236             fprintf(logfile, "\n");
1237             qemu_log_unlock(logfile);
1238         }
1239     }
1240 #endif
1241 
1242 #ifndef CONFIG_TCG_INTERPRETER
1243     /*
1244      * Assert that goto_ptr is implemented completely, setting an epilogue.
1245      * For tci, we use NULL as the signal to return from the interpreter,
1246      * so skip this check.
1247      */
1248     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1249 #endif
1250 
1251     tcg_region_prologue_set(s);
1252 }
1253 
1254 void tcg_func_start(TCGContext *s)
1255 {
1256     tcg_pool_reset(s);
1257     s->nb_temps = s->nb_globals;
1258 
1259     /* No temps have been previously allocated for size or locality.  */
1260     memset(s->free_temps, 0, sizeof(s->free_temps));
1261 
1262     /* No constant temps have been previously allocated. */
1263     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1264         if (s->const_table[i]) {
1265             g_hash_table_remove_all(s->const_table[i]);
1266         }
1267     }
1268 
1269     s->nb_ops = 0;
1270     s->nb_labels = 0;
1271     s->current_frame_offset = s->frame_start;
1272 
1273 #ifdef CONFIG_DEBUG_TCG
1274     s->goto_tb_issue_mask = 0;
1275 #endif
1276 
1277     QTAILQ_INIT(&s->ops);
1278     QTAILQ_INIT(&s->free_ops);
1279     QSIMPLEQ_INIT(&s->labels);
1280 }
1281 
1282 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1283 {
1284     int n = s->nb_temps++;
1285 
1286     if (n >= TCG_MAX_TEMPS) {
1287         tcg_raise_tb_overflow(s);
1288     }
1289     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1290 }
1291 
1292 static TCGTemp *tcg_global_alloc(TCGContext *s)
1293 {
1294     TCGTemp *ts;
1295 
1296     tcg_debug_assert(s->nb_globals == s->nb_temps);
1297     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1298     s->nb_globals++;
1299     ts = tcg_temp_alloc(s);
1300     ts->kind = TEMP_GLOBAL;
1301 
1302     return ts;
1303 }
1304 
1305 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1306                                             TCGReg reg, const char *name)
1307 {
1308     TCGTemp *ts;
1309 
1310     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1311 
1312     ts = tcg_global_alloc(s);
1313     ts->base_type = type;
1314     ts->type = type;
1315     ts->kind = TEMP_FIXED;
1316     ts->reg = reg;
1317     ts->name = name;
1318     tcg_regset_set_reg(s->reserved_regs, reg);
1319 
1320     return ts;
1321 }
1322 
1323 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1324 {
1325     s->frame_start = start;
1326     s->frame_end = start + size;
1327     s->frame_temp
1328         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1329 }
1330 
1331 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1332                                      intptr_t offset, const char *name)
1333 {
1334     TCGContext *s = tcg_ctx;
1335     TCGTemp *base_ts = tcgv_ptr_temp(base);
1336     TCGTemp *ts = tcg_global_alloc(s);
1337     int indirect_reg = 0;
1338 
1339     switch (base_ts->kind) {
1340     case TEMP_FIXED:
1341         break;
1342     case TEMP_GLOBAL:
1343         /* We do not support double-indirect registers.  */
1344         tcg_debug_assert(!base_ts->indirect_reg);
1345         base_ts->indirect_base = 1;
1346         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1347                             ? 2 : 1);
1348         indirect_reg = 1;
1349         break;
1350     default:
1351         g_assert_not_reached();
1352     }
1353 
1354     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1355         TCGTemp *ts2 = tcg_global_alloc(s);
1356         char buf[64];
1357 
1358         ts->base_type = TCG_TYPE_I64;
1359         ts->type = TCG_TYPE_I32;
1360         ts->indirect_reg = indirect_reg;
1361         ts->mem_allocated = 1;
1362         ts->mem_base = base_ts;
1363         ts->mem_offset = offset;
1364         pstrcpy(buf, sizeof(buf), name);
1365         pstrcat(buf, sizeof(buf), "_0");
1366         ts->name = strdup(buf);
1367 
1368         tcg_debug_assert(ts2 == ts + 1);
1369         ts2->base_type = TCG_TYPE_I64;
1370         ts2->type = TCG_TYPE_I32;
1371         ts2->indirect_reg = indirect_reg;
1372         ts2->mem_allocated = 1;
1373         ts2->mem_base = base_ts;
1374         ts2->mem_offset = offset + 4;
1375         ts2->temp_subindex = 1;
1376         pstrcpy(buf, sizeof(buf), name);
1377         pstrcat(buf, sizeof(buf), "_1");
1378         ts2->name = strdup(buf);
1379     } else {
1380         ts->base_type = type;
1381         ts->type = type;
1382         ts->indirect_reg = indirect_reg;
1383         ts->mem_allocated = 1;
1384         ts->mem_base = base_ts;
1385         ts->mem_offset = offset;
1386         ts->name = name;
1387     }
1388     return ts;
1389 }
1390 
1391 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1392 {
1393     TCGContext *s = tcg_ctx;
1394     TCGTemp *ts;
1395     int n;
1396 
1397     if (kind == TEMP_EBB) {
1398         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1399 
1400         if (idx < TCG_MAX_TEMPS) {
1401             /* There is already an available temp with the right type.  */
1402             clear_bit(idx, s->free_temps[type].l);
1403 
1404             ts = &s->temps[idx];
1405             ts->temp_allocated = 1;
1406             tcg_debug_assert(ts->base_type == type);
1407             tcg_debug_assert(ts->kind == kind);
1408             return ts;
1409         }
1410     } else {
1411         tcg_debug_assert(kind == TEMP_TB);
1412     }
1413 
1414     switch (type) {
1415     case TCG_TYPE_I32:
1416     case TCG_TYPE_V64:
1417     case TCG_TYPE_V128:
1418     case TCG_TYPE_V256:
1419         n = 1;
1420         break;
1421     case TCG_TYPE_I64:
1422         n = 64 / TCG_TARGET_REG_BITS;
1423         break;
1424     case TCG_TYPE_I128:
1425         n = 128 / TCG_TARGET_REG_BITS;
1426         break;
1427     default:
1428         g_assert_not_reached();
1429     }
1430 
1431     ts = tcg_temp_alloc(s);
1432     ts->base_type = type;
1433     ts->temp_allocated = 1;
1434     ts->kind = kind;
1435 
1436     if (n == 1) {
1437         ts->type = type;
1438     } else {
1439         ts->type = TCG_TYPE_REG;
1440 
1441         for (int i = 1; i < n; ++i) {
1442             TCGTemp *ts2 = tcg_temp_alloc(s);
1443 
1444             tcg_debug_assert(ts2 == ts + i);
1445             ts2->base_type = type;
1446             ts2->type = TCG_TYPE_REG;
1447             ts2->temp_allocated = 1;
1448             ts2->temp_subindex = i;
1449             ts2->kind = kind;
1450         }
1451     }
1452     return ts;
1453 }
1454 
1455 TCGv_vec tcg_temp_new_vec(TCGType type)
1456 {
1457     TCGTemp *t;
1458 
1459 #ifdef CONFIG_DEBUG_TCG
1460     switch (type) {
1461     case TCG_TYPE_V64:
1462         assert(TCG_TARGET_HAS_v64);
1463         break;
1464     case TCG_TYPE_V128:
1465         assert(TCG_TARGET_HAS_v128);
1466         break;
1467     case TCG_TYPE_V256:
1468         assert(TCG_TARGET_HAS_v256);
1469         break;
1470     default:
1471         g_assert_not_reached();
1472     }
1473 #endif
1474 
1475     t = tcg_temp_new_internal(type, TEMP_EBB);
1476     return temp_tcgv_vec(t);
1477 }
1478 
1479 /* Create a new temp of the same type as an existing temp.  */
1480 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1481 {
1482     TCGTemp *t = tcgv_vec_temp(match);
1483 
1484     tcg_debug_assert(t->temp_allocated != 0);
1485 
1486     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1487     return temp_tcgv_vec(t);
1488 }
1489 
1490 void tcg_temp_free_internal(TCGTemp *ts)
1491 {
1492     TCGContext *s = tcg_ctx;
1493 
1494     switch (ts->kind) {
1495     case TEMP_CONST:
1496     case TEMP_TB:
1497         /* Silently ignore free. */
1498         break;
1499     case TEMP_EBB:
1500         tcg_debug_assert(ts->temp_allocated != 0);
1501         ts->temp_allocated = 0;
1502         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1503         break;
1504     default:
1505         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1506         g_assert_not_reached();
1507     }
1508 }
1509 
1510 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1511 {
1512     TCGContext *s = tcg_ctx;
1513     GHashTable *h = s->const_table[type];
1514     TCGTemp *ts;
1515 
1516     if (h == NULL) {
1517         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1518         s->const_table[type] = h;
1519     }
1520 
1521     ts = g_hash_table_lookup(h, &val);
1522     if (ts == NULL) {
1523         int64_t *val_ptr;
1524 
1525         ts = tcg_temp_alloc(s);
1526 
1527         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1528             TCGTemp *ts2 = tcg_temp_alloc(s);
1529 
1530             tcg_debug_assert(ts2 == ts + 1);
1531 
1532             ts->base_type = TCG_TYPE_I64;
1533             ts->type = TCG_TYPE_I32;
1534             ts->kind = TEMP_CONST;
1535             ts->temp_allocated = 1;
1536 
1537             ts2->base_type = TCG_TYPE_I64;
1538             ts2->type = TCG_TYPE_I32;
1539             ts2->kind = TEMP_CONST;
1540             ts2->temp_allocated = 1;
1541             ts2->temp_subindex = 1;
1542 
1543             /*
1544              * Retain the full value of the 64-bit constant in the low
1545              * part, so that the hash table works.  Actual uses will
1546              * truncate the value to the low part.
1547              */
1548             ts[HOST_BIG_ENDIAN].val = val;
1549             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1550             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1551         } else {
1552             ts->base_type = type;
1553             ts->type = type;
1554             ts->kind = TEMP_CONST;
1555             ts->temp_allocated = 1;
1556             ts->val = val;
1557             val_ptr = &ts->val;
1558         }
1559         g_hash_table_insert(h, val_ptr, ts);
1560     }
1561 
1562     return ts;
1563 }
1564 
1565 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1566 {
1567     val = dup_const(vece, val);
1568     return temp_tcgv_vec(tcg_constant_internal(type, val));
1569 }
1570 
1571 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1572 {
1573     TCGTemp *t = tcgv_vec_temp(match);
1574 
1575     tcg_debug_assert(t->temp_allocated != 0);
1576     return tcg_constant_vec(t->base_type, vece, val);
1577 }
1578 
1579 /* Return true if OP may appear in the opcode stream.
1580    Test the runtime variable that controls each opcode.  */
1581 bool tcg_op_supported(TCGOpcode op)
1582 {
1583     const bool have_vec
1584         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1585 
1586     switch (op) {
1587     case INDEX_op_discard:
1588     case INDEX_op_set_label:
1589     case INDEX_op_call:
1590     case INDEX_op_br:
1591     case INDEX_op_mb:
1592     case INDEX_op_insn_start:
1593     case INDEX_op_exit_tb:
1594     case INDEX_op_goto_tb:
1595     case INDEX_op_goto_ptr:
1596     case INDEX_op_qemu_ld_i32:
1597     case INDEX_op_qemu_st_i32:
1598     case INDEX_op_qemu_ld_i64:
1599     case INDEX_op_qemu_st_i64:
1600         return true;
1601 
1602     case INDEX_op_qemu_st8_i32:
1603         return TCG_TARGET_HAS_qemu_st8_i32;
1604 
1605     case INDEX_op_mov_i32:
1606     case INDEX_op_setcond_i32:
1607     case INDEX_op_brcond_i32:
1608     case INDEX_op_ld8u_i32:
1609     case INDEX_op_ld8s_i32:
1610     case INDEX_op_ld16u_i32:
1611     case INDEX_op_ld16s_i32:
1612     case INDEX_op_ld_i32:
1613     case INDEX_op_st8_i32:
1614     case INDEX_op_st16_i32:
1615     case INDEX_op_st_i32:
1616     case INDEX_op_add_i32:
1617     case INDEX_op_sub_i32:
1618     case INDEX_op_mul_i32:
1619     case INDEX_op_and_i32:
1620     case INDEX_op_or_i32:
1621     case INDEX_op_xor_i32:
1622     case INDEX_op_shl_i32:
1623     case INDEX_op_shr_i32:
1624     case INDEX_op_sar_i32:
1625         return true;
1626 
1627     case INDEX_op_movcond_i32:
1628         return TCG_TARGET_HAS_movcond_i32;
1629     case INDEX_op_div_i32:
1630     case INDEX_op_divu_i32:
1631         return TCG_TARGET_HAS_div_i32;
1632     case INDEX_op_rem_i32:
1633     case INDEX_op_remu_i32:
1634         return TCG_TARGET_HAS_rem_i32;
1635     case INDEX_op_div2_i32:
1636     case INDEX_op_divu2_i32:
1637         return TCG_TARGET_HAS_div2_i32;
1638     case INDEX_op_rotl_i32:
1639     case INDEX_op_rotr_i32:
1640         return TCG_TARGET_HAS_rot_i32;
1641     case INDEX_op_deposit_i32:
1642         return TCG_TARGET_HAS_deposit_i32;
1643     case INDEX_op_extract_i32:
1644         return TCG_TARGET_HAS_extract_i32;
1645     case INDEX_op_sextract_i32:
1646         return TCG_TARGET_HAS_sextract_i32;
1647     case INDEX_op_extract2_i32:
1648         return TCG_TARGET_HAS_extract2_i32;
1649     case INDEX_op_add2_i32:
1650         return TCG_TARGET_HAS_add2_i32;
1651     case INDEX_op_sub2_i32:
1652         return TCG_TARGET_HAS_sub2_i32;
1653     case INDEX_op_mulu2_i32:
1654         return TCG_TARGET_HAS_mulu2_i32;
1655     case INDEX_op_muls2_i32:
1656         return TCG_TARGET_HAS_muls2_i32;
1657     case INDEX_op_muluh_i32:
1658         return TCG_TARGET_HAS_muluh_i32;
1659     case INDEX_op_mulsh_i32:
1660         return TCG_TARGET_HAS_mulsh_i32;
1661     case INDEX_op_ext8s_i32:
1662         return TCG_TARGET_HAS_ext8s_i32;
1663     case INDEX_op_ext16s_i32:
1664         return TCG_TARGET_HAS_ext16s_i32;
1665     case INDEX_op_ext8u_i32:
1666         return TCG_TARGET_HAS_ext8u_i32;
1667     case INDEX_op_ext16u_i32:
1668         return TCG_TARGET_HAS_ext16u_i32;
1669     case INDEX_op_bswap16_i32:
1670         return TCG_TARGET_HAS_bswap16_i32;
1671     case INDEX_op_bswap32_i32:
1672         return TCG_TARGET_HAS_bswap32_i32;
1673     case INDEX_op_not_i32:
1674         return TCG_TARGET_HAS_not_i32;
1675     case INDEX_op_neg_i32:
1676         return TCG_TARGET_HAS_neg_i32;
1677     case INDEX_op_andc_i32:
1678         return TCG_TARGET_HAS_andc_i32;
1679     case INDEX_op_orc_i32:
1680         return TCG_TARGET_HAS_orc_i32;
1681     case INDEX_op_eqv_i32:
1682         return TCG_TARGET_HAS_eqv_i32;
1683     case INDEX_op_nand_i32:
1684         return TCG_TARGET_HAS_nand_i32;
1685     case INDEX_op_nor_i32:
1686         return TCG_TARGET_HAS_nor_i32;
1687     case INDEX_op_clz_i32:
1688         return TCG_TARGET_HAS_clz_i32;
1689     case INDEX_op_ctz_i32:
1690         return TCG_TARGET_HAS_ctz_i32;
1691     case INDEX_op_ctpop_i32:
1692         return TCG_TARGET_HAS_ctpop_i32;
1693 
1694     case INDEX_op_brcond2_i32:
1695     case INDEX_op_setcond2_i32:
1696         return TCG_TARGET_REG_BITS == 32;
1697 
1698     case INDEX_op_mov_i64:
1699     case INDEX_op_setcond_i64:
1700     case INDEX_op_brcond_i64:
1701     case INDEX_op_ld8u_i64:
1702     case INDEX_op_ld8s_i64:
1703     case INDEX_op_ld16u_i64:
1704     case INDEX_op_ld16s_i64:
1705     case INDEX_op_ld32u_i64:
1706     case INDEX_op_ld32s_i64:
1707     case INDEX_op_ld_i64:
1708     case INDEX_op_st8_i64:
1709     case INDEX_op_st16_i64:
1710     case INDEX_op_st32_i64:
1711     case INDEX_op_st_i64:
1712     case INDEX_op_add_i64:
1713     case INDEX_op_sub_i64:
1714     case INDEX_op_mul_i64:
1715     case INDEX_op_and_i64:
1716     case INDEX_op_or_i64:
1717     case INDEX_op_xor_i64:
1718     case INDEX_op_shl_i64:
1719     case INDEX_op_shr_i64:
1720     case INDEX_op_sar_i64:
1721     case INDEX_op_ext_i32_i64:
1722     case INDEX_op_extu_i32_i64:
1723         return TCG_TARGET_REG_BITS == 64;
1724 
1725     case INDEX_op_movcond_i64:
1726         return TCG_TARGET_HAS_movcond_i64;
1727     case INDEX_op_div_i64:
1728     case INDEX_op_divu_i64:
1729         return TCG_TARGET_HAS_div_i64;
1730     case INDEX_op_rem_i64:
1731     case INDEX_op_remu_i64:
1732         return TCG_TARGET_HAS_rem_i64;
1733     case INDEX_op_div2_i64:
1734     case INDEX_op_divu2_i64:
1735         return TCG_TARGET_HAS_div2_i64;
1736     case INDEX_op_rotl_i64:
1737     case INDEX_op_rotr_i64:
1738         return TCG_TARGET_HAS_rot_i64;
1739     case INDEX_op_deposit_i64:
1740         return TCG_TARGET_HAS_deposit_i64;
1741     case INDEX_op_extract_i64:
1742         return TCG_TARGET_HAS_extract_i64;
1743     case INDEX_op_sextract_i64:
1744         return TCG_TARGET_HAS_sextract_i64;
1745     case INDEX_op_extract2_i64:
1746         return TCG_TARGET_HAS_extract2_i64;
1747     case INDEX_op_extrl_i64_i32:
1748         return TCG_TARGET_HAS_extrl_i64_i32;
1749     case INDEX_op_extrh_i64_i32:
1750         return TCG_TARGET_HAS_extrh_i64_i32;
1751     case INDEX_op_ext8s_i64:
1752         return TCG_TARGET_HAS_ext8s_i64;
1753     case INDEX_op_ext16s_i64:
1754         return TCG_TARGET_HAS_ext16s_i64;
1755     case INDEX_op_ext32s_i64:
1756         return TCG_TARGET_HAS_ext32s_i64;
1757     case INDEX_op_ext8u_i64:
1758         return TCG_TARGET_HAS_ext8u_i64;
1759     case INDEX_op_ext16u_i64:
1760         return TCG_TARGET_HAS_ext16u_i64;
1761     case INDEX_op_ext32u_i64:
1762         return TCG_TARGET_HAS_ext32u_i64;
1763     case INDEX_op_bswap16_i64:
1764         return TCG_TARGET_HAS_bswap16_i64;
1765     case INDEX_op_bswap32_i64:
1766         return TCG_TARGET_HAS_bswap32_i64;
1767     case INDEX_op_bswap64_i64:
1768         return TCG_TARGET_HAS_bswap64_i64;
1769     case INDEX_op_not_i64:
1770         return TCG_TARGET_HAS_not_i64;
1771     case INDEX_op_neg_i64:
1772         return TCG_TARGET_HAS_neg_i64;
1773     case INDEX_op_andc_i64:
1774         return TCG_TARGET_HAS_andc_i64;
1775     case INDEX_op_orc_i64:
1776         return TCG_TARGET_HAS_orc_i64;
1777     case INDEX_op_eqv_i64:
1778         return TCG_TARGET_HAS_eqv_i64;
1779     case INDEX_op_nand_i64:
1780         return TCG_TARGET_HAS_nand_i64;
1781     case INDEX_op_nor_i64:
1782         return TCG_TARGET_HAS_nor_i64;
1783     case INDEX_op_clz_i64:
1784         return TCG_TARGET_HAS_clz_i64;
1785     case INDEX_op_ctz_i64:
1786         return TCG_TARGET_HAS_ctz_i64;
1787     case INDEX_op_ctpop_i64:
1788         return TCG_TARGET_HAS_ctpop_i64;
1789     case INDEX_op_add2_i64:
1790         return TCG_TARGET_HAS_add2_i64;
1791     case INDEX_op_sub2_i64:
1792         return TCG_TARGET_HAS_sub2_i64;
1793     case INDEX_op_mulu2_i64:
1794         return TCG_TARGET_HAS_mulu2_i64;
1795     case INDEX_op_muls2_i64:
1796         return TCG_TARGET_HAS_muls2_i64;
1797     case INDEX_op_muluh_i64:
1798         return TCG_TARGET_HAS_muluh_i64;
1799     case INDEX_op_mulsh_i64:
1800         return TCG_TARGET_HAS_mulsh_i64;
1801 
1802     case INDEX_op_mov_vec:
1803     case INDEX_op_dup_vec:
1804     case INDEX_op_dupm_vec:
1805     case INDEX_op_ld_vec:
1806     case INDEX_op_st_vec:
1807     case INDEX_op_add_vec:
1808     case INDEX_op_sub_vec:
1809     case INDEX_op_and_vec:
1810     case INDEX_op_or_vec:
1811     case INDEX_op_xor_vec:
1812     case INDEX_op_cmp_vec:
1813         return have_vec;
1814     case INDEX_op_dup2_vec:
1815         return have_vec && TCG_TARGET_REG_BITS == 32;
1816     case INDEX_op_not_vec:
1817         return have_vec && TCG_TARGET_HAS_not_vec;
1818     case INDEX_op_neg_vec:
1819         return have_vec && TCG_TARGET_HAS_neg_vec;
1820     case INDEX_op_abs_vec:
1821         return have_vec && TCG_TARGET_HAS_abs_vec;
1822     case INDEX_op_andc_vec:
1823         return have_vec && TCG_TARGET_HAS_andc_vec;
1824     case INDEX_op_orc_vec:
1825         return have_vec && TCG_TARGET_HAS_orc_vec;
1826     case INDEX_op_nand_vec:
1827         return have_vec && TCG_TARGET_HAS_nand_vec;
1828     case INDEX_op_nor_vec:
1829         return have_vec && TCG_TARGET_HAS_nor_vec;
1830     case INDEX_op_eqv_vec:
1831         return have_vec && TCG_TARGET_HAS_eqv_vec;
1832     case INDEX_op_mul_vec:
1833         return have_vec && TCG_TARGET_HAS_mul_vec;
1834     case INDEX_op_shli_vec:
1835     case INDEX_op_shri_vec:
1836     case INDEX_op_sari_vec:
1837         return have_vec && TCG_TARGET_HAS_shi_vec;
1838     case INDEX_op_shls_vec:
1839     case INDEX_op_shrs_vec:
1840     case INDEX_op_sars_vec:
1841         return have_vec && TCG_TARGET_HAS_shs_vec;
1842     case INDEX_op_shlv_vec:
1843     case INDEX_op_shrv_vec:
1844     case INDEX_op_sarv_vec:
1845         return have_vec && TCG_TARGET_HAS_shv_vec;
1846     case INDEX_op_rotli_vec:
1847         return have_vec && TCG_TARGET_HAS_roti_vec;
1848     case INDEX_op_rotls_vec:
1849         return have_vec && TCG_TARGET_HAS_rots_vec;
1850     case INDEX_op_rotlv_vec:
1851     case INDEX_op_rotrv_vec:
1852         return have_vec && TCG_TARGET_HAS_rotv_vec;
1853     case INDEX_op_ssadd_vec:
1854     case INDEX_op_usadd_vec:
1855     case INDEX_op_sssub_vec:
1856     case INDEX_op_ussub_vec:
1857         return have_vec && TCG_TARGET_HAS_sat_vec;
1858     case INDEX_op_smin_vec:
1859     case INDEX_op_umin_vec:
1860     case INDEX_op_smax_vec:
1861     case INDEX_op_umax_vec:
1862         return have_vec && TCG_TARGET_HAS_minmax_vec;
1863     case INDEX_op_bitsel_vec:
1864         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1865     case INDEX_op_cmpsel_vec:
1866         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1867 
1868     default:
1869         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1870         return true;
1871     }
1872 }
1873 
1874 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
1875 
1876 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1877 {
1878     const TCGHelperInfo *info;
1879     TCGv_i64 extend_free[MAX_CALL_IARGS];
1880     int n_extend = 0;
1881     TCGOp *op;
1882     int i, n, pi = 0, total_args;
1883 
1884     info = g_hash_table_lookup(helper_table, (gpointer)func);
1885     total_args = info->nr_out + info->nr_in + 2;
1886     op = tcg_op_alloc(INDEX_op_call, total_args);
1887 
1888 #ifdef CONFIG_PLUGIN
1889     /* Flag helpers that may affect guest state */
1890     if (tcg_ctx->plugin_insn &&
1891         !(info->flags & TCG_CALL_PLUGIN) &&
1892         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1893         tcg_ctx->plugin_insn->calls_helpers = true;
1894     }
1895 #endif
1896 
1897     TCGOP_CALLO(op) = n = info->nr_out;
1898     switch (n) {
1899     case 0:
1900         tcg_debug_assert(ret == NULL);
1901         break;
1902     case 1:
1903         tcg_debug_assert(ret != NULL);
1904         op->args[pi++] = temp_arg(ret);
1905         break;
1906     case 2:
1907     case 4:
1908         tcg_debug_assert(ret != NULL);
1909         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
1910         tcg_debug_assert(ret->temp_subindex == 0);
1911         for (i = 0; i < n; ++i) {
1912             op->args[pi++] = temp_arg(ret + i);
1913         }
1914         break;
1915     default:
1916         g_assert_not_reached();
1917     }
1918 
1919     TCGOP_CALLI(op) = n = info->nr_in;
1920     for (i = 0; i < n; i++) {
1921         const TCGCallArgumentLoc *loc = &info->in[i];
1922         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
1923 
1924         switch (loc->kind) {
1925         case TCG_CALL_ARG_NORMAL:
1926         case TCG_CALL_ARG_BY_REF:
1927         case TCG_CALL_ARG_BY_REF_N:
1928             op->args[pi++] = temp_arg(ts);
1929             break;
1930 
1931         case TCG_CALL_ARG_EXTEND_U:
1932         case TCG_CALL_ARG_EXTEND_S:
1933             {
1934                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
1935                 TCGv_i32 orig = temp_tcgv_i32(ts);
1936 
1937                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
1938                     tcg_gen_ext_i32_i64(temp, orig);
1939                 } else {
1940                     tcg_gen_extu_i32_i64(temp, orig);
1941                 }
1942                 op->args[pi++] = tcgv_i64_arg(temp);
1943                 extend_free[n_extend++] = temp;
1944             }
1945             break;
1946 
1947         default:
1948             g_assert_not_reached();
1949         }
1950     }
1951     op->args[pi++] = (uintptr_t)func;
1952     op->args[pi++] = (uintptr_t)info;
1953     tcg_debug_assert(pi == total_args);
1954 
1955     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
1956 
1957     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
1958     for (i = 0; i < n_extend; ++i) {
1959         tcg_temp_free_i64(extend_free[i]);
1960     }
1961 }
1962 
1963 static void tcg_reg_alloc_start(TCGContext *s)
1964 {
1965     int i, n;
1966 
1967     for (i = 0, n = s->nb_temps; i < n; i++) {
1968         TCGTemp *ts = &s->temps[i];
1969         TCGTempVal val = TEMP_VAL_MEM;
1970 
1971         switch (ts->kind) {
1972         case TEMP_CONST:
1973             val = TEMP_VAL_CONST;
1974             break;
1975         case TEMP_FIXED:
1976             val = TEMP_VAL_REG;
1977             break;
1978         case TEMP_GLOBAL:
1979             break;
1980         case TEMP_EBB:
1981             val = TEMP_VAL_DEAD;
1982             /* fall through */
1983         case TEMP_TB:
1984             ts->mem_allocated = 0;
1985             break;
1986         default:
1987             g_assert_not_reached();
1988         }
1989         ts->val_type = val;
1990     }
1991 
1992     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1993 }
1994 
1995 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1996                                  TCGTemp *ts)
1997 {
1998     int idx = temp_idx(ts);
1999 
2000     switch (ts->kind) {
2001     case TEMP_FIXED:
2002     case TEMP_GLOBAL:
2003         pstrcpy(buf, buf_size, ts->name);
2004         break;
2005     case TEMP_TB:
2006         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2007         break;
2008     case TEMP_EBB:
2009         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2010         break;
2011     case TEMP_CONST:
2012         switch (ts->type) {
2013         case TCG_TYPE_I32:
2014             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2015             break;
2016 #if TCG_TARGET_REG_BITS > 32
2017         case TCG_TYPE_I64:
2018             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2019             break;
2020 #endif
2021         case TCG_TYPE_V64:
2022         case TCG_TYPE_V128:
2023         case TCG_TYPE_V256:
2024             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2025                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2026             break;
2027         default:
2028             g_assert_not_reached();
2029         }
2030         break;
2031     }
2032     return buf;
2033 }
2034 
2035 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2036                              int buf_size, TCGArg arg)
2037 {
2038     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2039 }
2040 
2041 static const char * const cond_name[] =
2042 {
2043     [TCG_COND_NEVER] = "never",
2044     [TCG_COND_ALWAYS] = "always",
2045     [TCG_COND_EQ] = "eq",
2046     [TCG_COND_NE] = "ne",
2047     [TCG_COND_LT] = "lt",
2048     [TCG_COND_GE] = "ge",
2049     [TCG_COND_LE] = "le",
2050     [TCG_COND_GT] = "gt",
2051     [TCG_COND_LTU] = "ltu",
2052     [TCG_COND_GEU] = "geu",
2053     [TCG_COND_LEU] = "leu",
2054     [TCG_COND_GTU] = "gtu"
2055 };
2056 
2057 static const char * const ldst_name[] =
2058 {
2059     [MO_UB]   = "ub",
2060     [MO_SB]   = "sb",
2061     [MO_LEUW] = "leuw",
2062     [MO_LESW] = "lesw",
2063     [MO_LEUL] = "leul",
2064     [MO_LESL] = "lesl",
2065     [MO_LEUQ] = "leq",
2066     [MO_BEUW] = "beuw",
2067     [MO_BESW] = "besw",
2068     [MO_BEUL] = "beul",
2069     [MO_BESL] = "besl",
2070     [MO_BEUQ] = "beq",
2071 };
2072 
2073 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2074 #ifdef TARGET_ALIGNED_ONLY
2075     [MO_UNALN >> MO_ASHIFT]    = "un+",
2076     [MO_ALIGN >> MO_ASHIFT]    = "",
2077 #else
2078     [MO_UNALN >> MO_ASHIFT]    = "",
2079     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2080 #endif
2081     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2082     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2083     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2084     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2085     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2086     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2087 };
2088 
2089 static const char bswap_flag_name[][6] = {
2090     [TCG_BSWAP_IZ] = "iz",
2091     [TCG_BSWAP_OZ] = "oz",
2092     [TCG_BSWAP_OS] = "os",
2093     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2094     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2095 };
2096 
2097 static inline bool tcg_regset_single(TCGRegSet d)
2098 {
2099     return (d & (d - 1)) == 0;
2100 }
2101 
2102 static inline TCGReg tcg_regset_first(TCGRegSet d)
2103 {
2104     if (TCG_TARGET_NB_REGS <= 32) {
2105         return ctz32(d);
2106     } else {
2107         return ctz64(d);
2108     }
2109 }
2110 
2111 /* Return only the number of characters output -- no error return. */
2112 #define ne_fprintf(...) \
2113     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2114 
2115 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2116 {
2117     char buf[128];
2118     TCGOp *op;
2119 
2120     QTAILQ_FOREACH(op, &s->ops, link) {
2121         int i, k, nb_oargs, nb_iargs, nb_cargs;
2122         const TCGOpDef *def;
2123         TCGOpcode c;
2124         int col = 0;
2125 
2126         c = op->opc;
2127         def = &tcg_op_defs[c];
2128 
2129         if (c == INDEX_op_insn_start) {
2130             nb_oargs = 0;
2131             col += ne_fprintf(f, "\n ----");
2132 
2133             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2134                 target_ulong a;
2135 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2136                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2137 #else
2138                 a = op->args[i];
2139 #endif
2140                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
2141             }
2142         } else if (c == INDEX_op_call) {
2143             const TCGHelperInfo *info = tcg_call_info(op);
2144             void *func = tcg_call_func(op);
2145 
2146             /* variable number of arguments */
2147             nb_oargs = TCGOP_CALLO(op);
2148             nb_iargs = TCGOP_CALLI(op);
2149             nb_cargs = def->nb_cargs;
2150 
2151             col += ne_fprintf(f, " %s ", def->name);
2152 
2153             /*
2154              * Print the function name from TCGHelperInfo, if available.
2155              * Note that plugins have a template function for the info,
2156              * but the actual function pointer comes from the plugin.
2157              */
2158             if (func == info->func) {
2159                 col += ne_fprintf(f, "%s", info->name);
2160             } else {
2161                 col += ne_fprintf(f, "plugin(%p)", func);
2162             }
2163 
2164             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2165             for (i = 0; i < nb_oargs; i++) {
2166                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2167                                                             op->args[i]));
2168             }
2169             for (i = 0; i < nb_iargs; i++) {
2170                 TCGArg arg = op->args[nb_oargs + i];
2171                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2172                 col += ne_fprintf(f, ",%s", t);
2173             }
2174         } else {
2175             col += ne_fprintf(f, " %s ", def->name);
2176 
2177             nb_oargs = def->nb_oargs;
2178             nb_iargs = def->nb_iargs;
2179             nb_cargs = def->nb_cargs;
2180 
2181             if (def->flags & TCG_OPF_VECTOR) {
2182                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2183                                   8 << TCGOP_VECE(op));
2184             }
2185 
2186             k = 0;
2187             for (i = 0; i < nb_oargs; i++) {
2188                 const char *sep =  k ? "," : "";
2189                 col += ne_fprintf(f, "%s%s", sep,
2190                                   tcg_get_arg_str(s, buf, sizeof(buf),
2191                                                   op->args[k++]));
2192             }
2193             for (i = 0; i < nb_iargs; i++) {
2194                 const char *sep =  k ? "," : "";
2195                 col += ne_fprintf(f, "%s%s", sep,
2196                                   tcg_get_arg_str(s, buf, sizeof(buf),
2197                                                   op->args[k++]));
2198             }
2199             switch (c) {
2200             case INDEX_op_brcond_i32:
2201             case INDEX_op_setcond_i32:
2202             case INDEX_op_movcond_i32:
2203             case INDEX_op_brcond2_i32:
2204             case INDEX_op_setcond2_i32:
2205             case INDEX_op_brcond_i64:
2206             case INDEX_op_setcond_i64:
2207             case INDEX_op_movcond_i64:
2208             case INDEX_op_cmp_vec:
2209             case INDEX_op_cmpsel_vec:
2210                 if (op->args[k] < ARRAY_SIZE(cond_name)
2211                     && cond_name[op->args[k]]) {
2212                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2213                 } else {
2214                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2215                 }
2216                 i = 1;
2217                 break;
2218             case INDEX_op_qemu_ld_i32:
2219             case INDEX_op_qemu_st_i32:
2220             case INDEX_op_qemu_st8_i32:
2221             case INDEX_op_qemu_ld_i64:
2222             case INDEX_op_qemu_st_i64:
2223                 {
2224                     MemOpIdx oi = op->args[k++];
2225                     MemOp op = get_memop(oi);
2226                     unsigned ix = get_mmuidx(oi);
2227 
2228                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2229                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2230                     } else {
2231                         const char *s_al, *s_op;
2232                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2233                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2234                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
2235                     }
2236                     i = 1;
2237                 }
2238                 break;
2239             case INDEX_op_bswap16_i32:
2240             case INDEX_op_bswap16_i64:
2241             case INDEX_op_bswap32_i32:
2242             case INDEX_op_bswap32_i64:
2243             case INDEX_op_bswap64_i64:
2244                 {
2245                     TCGArg flags = op->args[k];
2246                     const char *name = NULL;
2247 
2248                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2249                         name = bswap_flag_name[flags];
2250                     }
2251                     if (name) {
2252                         col += ne_fprintf(f, ",%s", name);
2253                     } else {
2254                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2255                     }
2256                     i = k = 1;
2257                 }
2258                 break;
2259             default:
2260                 i = 0;
2261                 break;
2262             }
2263             switch (c) {
2264             case INDEX_op_set_label:
2265             case INDEX_op_br:
2266             case INDEX_op_brcond_i32:
2267             case INDEX_op_brcond_i64:
2268             case INDEX_op_brcond2_i32:
2269                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2270                                   arg_label(op->args[k])->id);
2271                 i++, k++;
2272                 break;
2273             case INDEX_op_mb:
2274                 {
2275                     TCGBar membar = op->args[k];
2276                     const char *b_op, *m_op;
2277 
2278                     switch (membar & TCG_BAR_SC) {
2279                     case 0:
2280                         b_op = "none";
2281                         break;
2282                     case TCG_BAR_LDAQ:
2283                         b_op = "acq";
2284                         break;
2285                     case TCG_BAR_STRL:
2286                         b_op = "rel";
2287                         break;
2288                     case TCG_BAR_SC:
2289                         b_op = "seq";
2290                         break;
2291                     default:
2292                         g_assert_not_reached();
2293                     }
2294 
2295                     switch (membar & TCG_MO_ALL) {
2296                     case 0:
2297                         m_op = "none";
2298                         break;
2299                     case TCG_MO_LD_LD:
2300                         m_op = "rr";
2301                         break;
2302                     case TCG_MO_LD_ST:
2303                         m_op = "rw";
2304                         break;
2305                     case TCG_MO_ST_LD:
2306                         m_op = "wr";
2307                         break;
2308                     case TCG_MO_ST_ST:
2309                         m_op = "ww";
2310                         break;
2311                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2312                         m_op = "rr+rw";
2313                         break;
2314                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2315                         m_op = "rr+wr";
2316                         break;
2317                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2318                         m_op = "rr+ww";
2319                         break;
2320                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2321                         m_op = "rw+wr";
2322                         break;
2323                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2324                         m_op = "rw+ww";
2325                         break;
2326                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2327                         m_op = "wr+ww";
2328                         break;
2329                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2330                         m_op = "rr+rw+wr";
2331                         break;
2332                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2333                         m_op = "rr+rw+ww";
2334                         break;
2335                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2336                         m_op = "rr+wr+ww";
2337                         break;
2338                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2339                         m_op = "rw+wr+ww";
2340                         break;
2341                     case TCG_MO_ALL:
2342                         m_op = "all";
2343                         break;
2344                     default:
2345                         g_assert_not_reached();
2346                     }
2347 
2348                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2349                     i++, k++;
2350                 }
2351                 break;
2352             default:
2353                 break;
2354             }
2355             for (; i < nb_cargs; i++, k++) {
2356                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2357                                   op->args[k]);
2358             }
2359         }
2360 
2361         if (have_prefs || op->life) {
2362             for (; col < 40; ++col) {
2363                 putc(' ', f);
2364             }
2365         }
2366 
2367         if (op->life) {
2368             unsigned life = op->life;
2369 
2370             if (life & (SYNC_ARG * 3)) {
2371                 ne_fprintf(f, "  sync:");
2372                 for (i = 0; i < 2; ++i) {
2373                     if (life & (SYNC_ARG << i)) {
2374                         ne_fprintf(f, " %d", i);
2375                     }
2376                 }
2377             }
2378             life /= DEAD_ARG;
2379             if (life) {
2380                 ne_fprintf(f, "  dead:");
2381                 for (i = 0; life; ++i, life >>= 1) {
2382                     if (life & 1) {
2383                         ne_fprintf(f, " %d", i);
2384                     }
2385                 }
2386             }
2387         }
2388 
2389         if (have_prefs) {
2390             for (i = 0; i < nb_oargs; ++i) {
2391                 TCGRegSet set = output_pref(op, i);
2392 
2393                 if (i == 0) {
2394                     ne_fprintf(f, "  pref=");
2395                 } else {
2396                     ne_fprintf(f, ",");
2397                 }
2398                 if (set == 0) {
2399                     ne_fprintf(f, "none");
2400                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2401                     ne_fprintf(f, "all");
2402 #ifdef CONFIG_DEBUG_TCG
2403                 } else if (tcg_regset_single(set)) {
2404                     TCGReg reg = tcg_regset_first(set);
2405                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2406 #endif
2407                 } else if (TCG_TARGET_NB_REGS <= 32) {
2408                     ne_fprintf(f, "0x%x", (uint32_t)set);
2409                 } else {
2410                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2411                 }
2412             }
2413         }
2414 
2415         putc('\n', f);
2416     }
2417 }
2418 
2419 /* we give more priority to constraints with less registers */
2420 static int get_constraint_priority(const TCGOpDef *def, int k)
2421 {
2422     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2423     int n = ctpop64(arg_ct->regs);
2424 
2425     /*
2426      * Sort constraints of a single register first, which includes output
2427      * aliases (which must exactly match the input already allocated).
2428      */
2429     if (n == 1 || arg_ct->oalias) {
2430         return INT_MAX;
2431     }
2432 
2433     /*
2434      * Sort register pairs next, first then second immediately after.
2435      * Arbitrarily sort multiple pairs by the index of the first reg;
2436      * there shouldn't be many pairs.
2437      */
2438     switch (arg_ct->pair) {
2439     case 1:
2440     case 3:
2441         return (k + 1) * 2;
2442     case 2:
2443         return (arg_ct->pair_index + 1) * 2 - 1;
2444     }
2445 
2446     /* Finally, sort by decreasing register count. */
2447     assert(n > 1);
2448     return -n;
2449 }
2450 
2451 /* sort from highest priority to lowest */
2452 static void sort_constraints(TCGOpDef *def, int start, int n)
2453 {
2454     int i, j;
2455     TCGArgConstraint *a = def->args_ct;
2456 
2457     for (i = 0; i < n; i++) {
2458         a[start + i].sort_index = start + i;
2459     }
2460     if (n <= 1) {
2461         return;
2462     }
2463     for (i = 0; i < n - 1; i++) {
2464         for (j = i + 1; j < n; j++) {
2465             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2466             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2467             if (p1 < p2) {
2468                 int tmp = a[start + i].sort_index;
2469                 a[start + i].sort_index = a[start + j].sort_index;
2470                 a[start + j].sort_index = tmp;
2471             }
2472         }
2473     }
2474 }
2475 
2476 static void process_op_defs(TCGContext *s)
2477 {
2478     TCGOpcode op;
2479 
2480     for (op = 0; op < NB_OPS; op++) {
2481         TCGOpDef *def = &tcg_op_defs[op];
2482         const TCGTargetOpDef *tdefs;
2483         bool saw_alias_pair = false;
2484         int i, o, i2, o2, nb_args;
2485 
2486         if (def->flags & TCG_OPF_NOT_PRESENT) {
2487             continue;
2488         }
2489 
2490         nb_args = def->nb_iargs + def->nb_oargs;
2491         if (nb_args == 0) {
2492             continue;
2493         }
2494 
2495         /*
2496          * Macro magic should make it impossible, but double-check that
2497          * the array index is in range.  Since the signness of an enum
2498          * is implementation defined, force the result to unsigned.
2499          */
2500         unsigned con_set = tcg_target_op_def(op);
2501         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2502         tdefs = &constraint_sets[con_set];
2503 
2504         for (i = 0; i < nb_args; i++) {
2505             const char *ct_str = tdefs->args_ct_str[i];
2506             bool input_p = i >= def->nb_oargs;
2507 
2508             /* Incomplete TCGTargetOpDef entry. */
2509             tcg_debug_assert(ct_str != NULL);
2510 
2511             switch (*ct_str) {
2512             case '0' ... '9':
2513                 o = *ct_str - '0';
2514                 tcg_debug_assert(input_p);
2515                 tcg_debug_assert(o < def->nb_oargs);
2516                 tcg_debug_assert(def->args_ct[o].regs != 0);
2517                 tcg_debug_assert(!def->args_ct[o].oalias);
2518                 def->args_ct[i] = def->args_ct[o];
2519                 /* The output sets oalias.  */
2520                 def->args_ct[o].oalias = 1;
2521                 def->args_ct[o].alias_index = i;
2522                 /* The input sets ialias. */
2523                 def->args_ct[i].ialias = 1;
2524                 def->args_ct[i].alias_index = o;
2525                 if (def->args_ct[i].pair) {
2526                     saw_alias_pair = true;
2527                 }
2528                 tcg_debug_assert(ct_str[1] == '\0');
2529                 continue;
2530 
2531             case '&':
2532                 tcg_debug_assert(!input_p);
2533                 def->args_ct[i].newreg = true;
2534                 ct_str++;
2535                 break;
2536 
2537             case 'p': /* plus */
2538                 /* Allocate to the register after the previous. */
2539                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2540                 o = i - 1;
2541                 tcg_debug_assert(!def->args_ct[o].pair);
2542                 tcg_debug_assert(!def->args_ct[o].ct);
2543                 def->args_ct[i] = (TCGArgConstraint){
2544                     .pair = 2,
2545                     .pair_index = o,
2546                     .regs = def->args_ct[o].regs << 1,
2547                 };
2548                 def->args_ct[o].pair = 1;
2549                 def->args_ct[o].pair_index = i;
2550                 tcg_debug_assert(ct_str[1] == '\0');
2551                 continue;
2552 
2553             case 'm': /* minus */
2554                 /* Allocate to the register before the previous. */
2555                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2556                 o = i - 1;
2557                 tcg_debug_assert(!def->args_ct[o].pair);
2558                 tcg_debug_assert(!def->args_ct[o].ct);
2559                 def->args_ct[i] = (TCGArgConstraint){
2560                     .pair = 1,
2561                     .pair_index = o,
2562                     .regs = def->args_ct[o].regs >> 1,
2563                 };
2564                 def->args_ct[o].pair = 2;
2565                 def->args_ct[o].pair_index = i;
2566                 tcg_debug_assert(ct_str[1] == '\0');
2567                 continue;
2568             }
2569 
2570             do {
2571                 switch (*ct_str) {
2572                 case 'i':
2573                     def->args_ct[i].ct |= TCG_CT_CONST;
2574                     break;
2575 
2576                 /* Include all of the target-specific constraints. */
2577 
2578 #undef CONST
2579 #define CONST(CASE, MASK) \
2580     case CASE: def->args_ct[i].ct |= MASK; break;
2581 #define REGS(CASE, MASK) \
2582     case CASE: def->args_ct[i].regs |= MASK; break;
2583 
2584 #include "tcg-target-con-str.h"
2585 
2586 #undef REGS
2587 #undef CONST
2588                 default:
2589                 case '0' ... '9':
2590                 case '&':
2591                 case 'p':
2592                 case 'm':
2593                     /* Typo in TCGTargetOpDef constraint. */
2594                     g_assert_not_reached();
2595                 }
2596             } while (*++ct_str != '\0');
2597         }
2598 
2599         /* TCGTargetOpDef entry with too much information? */
2600         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2601 
2602         /*
2603          * Fix up output pairs that are aliased with inputs.
2604          * When we created the alias, we copied pair from the output.
2605          * There are three cases:
2606          *    (1a) Pairs of inputs alias pairs of outputs.
2607          *    (1b) One input aliases the first of a pair of outputs.
2608          *    (2)  One input aliases the second of a pair of outputs.
2609          *
2610          * Case 1a is handled by making sure that the pair_index'es are
2611          * properly updated so that they appear the same as a pair of inputs.
2612          *
2613          * Case 1b is handled by setting the pair_index of the input to
2614          * itself, simply so it doesn't point to an unrelated argument.
2615          * Since we don't encounter the "second" during the input allocation
2616          * phase, nothing happens with the second half of the input pair.
2617          *
2618          * Case 2 is handled by setting the second input to pair=3, the
2619          * first output to pair=3, and the pair_index'es to match.
2620          */
2621         if (saw_alias_pair) {
2622             for (i = def->nb_oargs; i < nb_args; i++) {
2623                 /*
2624                  * Since [0-9pm] must be alone in the constraint string,
2625                  * the only way they can both be set is if the pair comes
2626                  * from the output alias.
2627                  */
2628                 if (!def->args_ct[i].ialias) {
2629                     continue;
2630                 }
2631                 switch (def->args_ct[i].pair) {
2632                 case 0:
2633                     break;
2634                 case 1:
2635                     o = def->args_ct[i].alias_index;
2636                     o2 = def->args_ct[o].pair_index;
2637                     tcg_debug_assert(def->args_ct[o].pair == 1);
2638                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2639                     if (def->args_ct[o2].oalias) {
2640                         /* Case 1a */
2641                         i2 = def->args_ct[o2].alias_index;
2642                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2643                         def->args_ct[i2].pair_index = i;
2644                         def->args_ct[i].pair_index = i2;
2645                     } else {
2646                         /* Case 1b */
2647                         def->args_ct[i].pair_index = i;
2648                     }
2649                     break;
2650                 case 2:
2651                     o = def->args_ct[i].alias_index;
2652                     o2 = def->args_ct[o].pair_index;
2653                     tcg_debug_assert(def->args_ct[o].pair == 2);
2654                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2655                     if (def->args_ct[o2].oalias) {
2656                         /* Case 1a */
2657                         i2 = def->args_ct[o2].alias_index;
2658                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2659                         def->args_ct[i2].pair_index = i;
2660                         def->args_ct[i].pair_index = i2;
2661                     } else {
2662                         /* Case 2 */
2663                         def->args_ct[i].pair = 3;
2664                         def->args_ct[o2].pair = 3;
2665                         def->args_ct[i].pair_index = o2;
2666                         def->args_ct[o2].pair_index = i;
2667                     }
2668                     break;
2669                 default:
2670                     g_assert_not_reached();
2671                 }
2672             }
2673         }
2674 
2675         /* sort the constraints (XXX: this is just an heuristic) */
2676         sort_constraints(def, 0, def->nb_oargs);
2677         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2678     }
2679 }
2680 
2681 static void remove_label_use(TCGOp *op, int idx)
2682 {
2683     TCGLabel *label = arg_label(op->args[idx]);
2684     TCGLabelUse *use;
2685 
2686     QSIMPLEQ_FOREACH(use, &label->branches, next) {
2687         if (use->op == op) {
2688             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
2689             return;
2690         }
2691     }
2692     g_assert_not_reached();
2693 }
2694 
2695 void tcg_op_remove(TCGContext *s, TCGOp *op)
2696 {
2697     switch (op->opc) {
2698     case INDEX_op_br:
2699         remove_label_use(op, 0);
2700         break;
2701     case INDEX_op_brcond_i32:
2702     case INDEX_op_brcond_i64:
2703         remove_label_use(op, 3);
2704         break;
2705     case INDEX_op_brcond2_i32:
2706         remove_label_use(op, 5);
2707         break;
2708     default:
2709         break;
2710     }
2711 
2712     QTAILQ_REMOVE(&s->ops, op, link);
2713     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2714     s->nb_ops--;
2715 
2716 #ifdef CONFIG_PROFILER
2717     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2718 #endif
2719 }
2720 
2721 void tcg_remove_ops_after(TCGOp *op)
2722 {
2723     TCGContext *s = tcg_ctx;
2724 
2725     while (true) {
2726         TCGOp *last = tcg_last_op();
2727         if (last == op) {
2728             return;
2729         }
2730         tcg_op_remove(s, last);
2731     }
2732 }
2733 
2734 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2735 {
2736     TCGContext *s = tcg_ctx;
2737     TCGOp *op = NULL;
2738 
2739     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2740         QTAILQ_FOREACH(op, &s->free_ops, link) {
2741             if (nargs <= op->nargs) {
2742                 QTAILQ_REMOVE(&s->free_ops, op, link);
2743                 nargs = op->nargs;
2744                 goto found;
2745             }
2746         }
2747     }
2748 
2749     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2750     nargs = MAX(4, nargs);
2751     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2752 
2753  found:
2754     memset(op, 0, offsetof(TCGOp, link));
2755     op->opc = opc;
2756     op->nargs = nargs;
2757 
2758     /* Check for bitfield overflow. */
2759     tcg_debug_assert(op->nargs == nargs);
2760 
2761     s->nb_ops++;
2762     return op;
2763 }
2764 
2765 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2766 {
2767     TCGOp *op = tcg_op_alloc(opc, nargs);
2768     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2769     return op;
2770 }
2771 
2772 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2773                             TCGOpcode opc, unsigned nargs)
2774 {
2775     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2776     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2777     return new_op;
2778 }
2779 
2780 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2781                            TCGOpcode opc, unsigned nargs)
2782 {
2783     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2784     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2785     return new_op;
2786 }
2787 
2788 static void move_label_uses(TCGLabel *to, TCGLabel *from)
2789 {
2790     TCGLabelUse *u;
2791 
2792     QSIMPLEQ_FOREACH(u, &from->branches, next) {
2793         TCGOp *op = u->op;
2794         switch (op->opc) {
2795         case INDEX_op_br:
2796             op->args[0] = label_arg(to);
2797             break;
2798         case INDEX_op_brcond_i32:
2799         case INDEX_op_brcond_i64:
2800             op->args[3] = label_arg(to);
2801             break;
2802         case INDEX_op_brcond2_i32:
2803             op->args[5] = label_arg(to);
2804             break;
2805         default:
2806             g_assert_not_reached();
2807         }
2808     }
2809 
2810     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
2811 }
2812 
2813 /* Reachable analysis : remove unreachable code.  */
2814 static void __attribute__((noinline))
2815 reachable_code_pass(TCGContext *s)
2816 {
2817     TCGOp *op, *op_next, *op_prev;
2818     bool dead = false;
2819 
2820     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2821         bool remove = dead;
2822         TCGLabel *label;
2823 
2824         switch (op->opc) {
2825         case INDEX_op_set_label:
2826             label = arg_label(op->args[0]);
2827 
2828             /*
2829              * Note that the first op in the TB is always a load,
2830              * so there is always something before a label.
2831              */
2832             op_prev = QTAILQ_PREV(op, link);
2833 
2834             /*
2835              * If we find two sequential labels, move all branches to
2836              * reference the second label and remove the first label.
2837              * Do this before branch to next optimization, so that the
2838              * middle label is out of the way.
2839              */
2840             if (op_prev->opc == INDEX_op_set_label) {
2841                 move_label_uses(label, arg_label(op_prev->args[0]));
2842                 tcg_op_remove(s, op_prev);
2843                 op_prev = QTAILQ_PREV(op, link);
2844             }
2845 
2846             /*
2847              * Optimization can fold conditional branches to unconditional.
2848              * If we find a label which is preceded by an unconditional
2849              * branch to next, remove the branch.  We couldn't do this when
2850              * processing the branch because any dead code between the branch
2851              * and label had not yet been removed.
2852              */
2853             if (op_prev->opc == INDEX_op_br &&
2854                 label == arg_label(op_prev->args[0])) {
2855                 tcg_op_remove(s, op_prev);
2856                 /* Fall through means insns become live again.  */
2857                 dead = false;
2858             }
2859 
2860             if (QSIMPLEQ_EMPTY(&label->branches)) {
2861                 /*
2862                  * While there is an occasional backward branch, virtually
2863                  * all branches generated by the translators are forward.
2864                  * Which means that generally we will have already removed
2865                  * all references to the label that will be, and there is
2866                  * little to be gained by iterating.
2867                  */
2868                 remove = true;
2869             } else {
2870                 /* Once we see a label, insns become live again.  */
2871                 dead = false;
2872                 remove = false;
2873             }
2874             break;
2875 
2876         case INDEX_op_br:
2877         case INDEX_op_exit_tb:
2878         case INDEX_op_goto_ptr:
2879             /* Unconditional branches; everything following is dead.  */
2880             dead = true;
2881             break;
2882 
2883         case INDEX_op_call:
2884             /* Notice noreturn helper calls, raising exceptions.  */
2885             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2886                 dead = true;
2887             }
2888             break;
2889 
2890         case INDEX_op_insn_start:
2891             /* Never remove -- we need to keep these for unwind.  */
2892             remove = false;
2893             break;
2894 
2895         default:
2896             break;
2897         }
2898 
2899         if (remove) {
2900             tcg_op_remove(s, op);
2901         }
2902     }
2903 }
2904 
2905 #define TS_DEAD  1
2906 #define TS_MEM   2
2907 
2908 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2909 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2910 
2911 /* For liveness_pass_1, the register preferences for a given temp.  */
2912 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2913 {
2914     return ts->state_ptr;
2915 }
2916 
2917 /* For liveness_pass_1, reset the preferences for a given temp to the
2918  * maximal regset for its type.
2919  */
2920 static inline void la_reset_pref(TCGTemp *ts)
2921 {
2922     *la_temp_pref(ts)
2923         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2924 }
2925 
2926 /* liveness analysis: end of function: all temps are dead, and globals
2927    should be in memory. */
2928 static void la_func_end(TCGContext *s, int ng, int nt)
2929 {
2930     int i;
2931 
2932     for (i = 0; i < ng; ++i) {
2933         s->temps[i].state = TS_DEAD | TS_MEM;
2934         la_reset_pref(&s->temps[i]);
2935     }
2936     for (i = ng; i < nt; ++i) {
2937         s->temps[i].state = TS_DEAD;
2938         la_reset_pref(&s->temps[i]);
2939     }
2940 }
2941 
2942 /* liveness analysis: end of basic block: all temps are dead, globals
2943    and local temps should be in memory. */
2944 static void la_bb_end(TCGContext *s, int ng, int nt)
2945 {
2946     int i;
2947 
2948     for (i = 0; i < nt; ++i) {
2949         TCGTemp *ts = &s->temps[i];
2950         int state;
2951 
2952         switch (ts->kind) {
2953         case TEMP_FIXED:
2954         case TEMP_GLOBAL:
2955         case TEMP_TB:
2956             state = TS_DEAD | TS_MEM;
2957             break;
2958         case TEMP_EBB:
2959         case TEMP_CONST:
2960             state = TS_DEAD;
2961             break;
2962         default:
2963             g_assert_not_reached();
2964         }
2965         ts->state = state;
2966         la_reset_pref(ts);
2967     }
2968 }
2969 
2970 /* liveness analysis: sync globals back to memory.  */
2971 static void la_global_sync(TCGContext *s, int ng)
2972 {
2973     int i;
2974 
2975     for (i = 0; i < ng; ++i) {
2976         int state = s->temps[i].state;
2977         s->temps[i].state = state | TS_MEM;
2978         if (state == TS_DEAD) {
2979             /* If the global was previously dead, reset prefs.  */
2980             la_reset_pref(&s->temps[i]);
2981         }
2982     }
2983 }
2984 
2985 /*
2986  * liveness analysis: conditional branch: all temps are dead unless
2987  * explicitly live-across-conditional-branch, globals and local temps
2988  * should be synced.
2989  */
2990 static void la_bb_sync(TCGContext *s, int ng, int nt)
2991 {
2992     la_global_sync(s, ng);
2993 
2994     for (int i = ng; i < nt; ++i) {
2995         TCGTemp *ts = &s->temps[i];
2996         int state;
2997 
2998         switch (ts->kind) {
2999         case TEMP_TB:
3000             state = ts->state;
3001             ts->state = state | TS_MEM;
3002             if (state != TS_DEAD) {
3003                 continue;
3004             }
3005             break;
3006         case TEMP_EBB:
3007         case TEMP_CONST:
3008             continue;
3009         default:
3010             g_assert_not_reached();
3011         }
3012         la_reset_pref(&s->temps[i]);
3013     }
3014 }
3015 
3016 /* liveness analysis: sync globals back to memory and kill.  */
3017 static void la_global_kill(TCGContext *s, int ng)
3018 {
3019     int i;
3020 
3021     for (i = 0; i < ng; i++) {
3022         s->temps[i].state = TS_DEAD | TS_MEM;
3023         la_reset_pref(&s->temps[i]);
3024     }
3025 }
3026 
3027 /* liveness analysis: note live globals crossing calls.  */
3028 static void la_cross_call(TCGContext *s, int nt)
3029 {
3030     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3031     int i;
3032 
3033     for (i = 0; i < nt; i++) {
3034         TCGTemp *ts = &s->temps[i];
3035         if (!(ts->state & TS_DEAD)) {
3036             TCGRegSet *pset = la_temp_pref(ts);
3037             TCGRegSet set = *pset;
3038 
3039             set &= mask;
3040             /* If the combination is not possible, restart.  */
3041             if (set == 0) {
3042                 set = tcg_target_available_regs[ts->type] & mask;
3043             }
3044             *pset = set;
3045         }
3046     }
3047 }
3048 
3049 /*
3050  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3051  * to TEMP_EBB, if possible.
3052  */
3053 static void __attribute__((noinline))
3054 liveness_pass_0(TCGContext *s)
3055 {
3056     void * const multiple_ebb = (void *)(uintptr_t)-1;
3057     int nb_temps = s->nb_temps;
3058     TCGOp *op, *ebb;
3059 
3060     for (int i = s->nb_globals; i < nb_temps; ++i) {
3061         s->temps[i].state_ptr = NULL;
3062     }
3063 
3064     /*
3065      * Represent each EBB by the op at which it begins.  In the case of
3066      * the first EBB, this is the first op, otherwise it is a label.
3067      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3068      * within a single EBB, else MULTIPLE_EBB.
3069      */
3070     ebb = QTAILQ_FIRST(&s->ops);
3071     QTAILQ_FOREACH(op, &s->ops, link) {
3072         const TCGOpDef *def;
3073         int nb_oargs, nb_iargs;
3074 
3075         switch (op->opc) {
3076         case INDEX_op_set_label:
3077             ebb = op;
3078             continue;
3079         case INDEX_op_discard:
3080             continue;
3081         case INDEX_op_call:
3082             nb_oargs = TCGOP_CALLO(op);
3083             nb_iargs = TCGOP_CALLI(op);
3084             break;
3085         default:
3086             def = &tcg_op_defs[op->opc];
3087             nb_oargs = def->nb_oargs;
3088             nb_iargs = def->nb_iargs;
3089             break;
3090         }
3091 
3092         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3093             TCGTemp *ts = arg_temp(op->args[i]);
3094 
3095             if (ts->kind != TEMP_TB) {
3096                 continue;
3097             }
3098             if (ts->state_ptr == NULL) {
3099                 ts->state_ptr = ebb;
3100             } else if (ts->state_ptr != ebb) {
3101                 ts->state_ptr = multiple_ebb;
3102             }
3103         }
3104     }
3105 
3106     /*
3107      * For TEMP_TB that turned out not to be used beyond one EBB,
3108      * reduce the liveness to TEMP_EBB.
3109      */
3110     for (int i = s->nb_globals; i < nb_temps; ++i) {
3111         TCGTemp *ts = &s->temps[i];
3112         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3113             ts->kind = TEMP_EBB;
3114         }
3115     }
3116 }
3117 
3118 /* Liveness analysis : update the opc_arg_life array to tell if a
3119    given input arguments is dead. Instructions updating dead
3120    temporaries are removed. */
3121 static void __attribute__((noinline))
3122 liveness_pass_1(TCGContext *s)
3123 {
3124     int nb_globals = s->nb_globals;
3125     int nb_temps = s->nb_temps;
3126     TCGOp *op, *op_prev;
3127     TCGRegSet *prefs;
3128     int i;
3129 
3130     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3131     for (i = 0; i < nb_temps; ++i) {
3132         s->temps[i].state_ptr = prefs + i;
3133     }
3134 
3135     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3136     la_func_end(s, nb_globals, nb_temps);
3137 
3138     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3139         int nb_iargs, nb_oargs;
3140         TCGOpcode opc_new, opc_new2;
3141         bool have_opc_new2;
3142         TCGLifeData arg_life = 0;
3143         TCGTemp *ts;
3144         TCGOpcode opc = op->opc;
3145         const TCGOpDef *def = &tcg_op_defs[opc];
3146 
3147         switch (opc) {
3148         case INDEX_op_call:
3149             {
3150                 const TCGHelperInfo *info = tcg_call_info(op);
3151                 int call_flags = tcg_call_flags(op);
3152 
3153                 nb_oargs = TCGOP_CALLO(op);
3154                 nb_iargs = TCGOP_CALLI(op);
3155 
3156                 /* pure functions can be removed if their result is unused */
3157                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3158                     for (i = 0; i < nb_oargs; i++) {
3159                         ts = arg_temp(op->args[i]);
3160                         if (ts->state != TS_DEAD) {
3161                             goto do_not_remove_call;
3162                         }
3163                     }
3164                     goto do_remove;
3165                 }
3166             do_not_remove_call:
3167 
3168                 /* Output args are dead.  */
3169                 for (i = 0; i < nb_oargs; i++) {
3170                     ts = arg_temp(op->args[i]);
3171                     if (ts->state & TS_DEAD) {
3172                         arg_life |= DEAD_ARG << i;
3173                     }
3174                     if (ts->state & TS_MEM) {
3175                         arg_life |= SYNC_ARG << i;
3176                     }
3177                     ts->state = TS_DEAD;
3178                     la_reset_pref(ts);
3179                 }
3180 
3181                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3182                 memset(op->output_pref, 0, sizeof(op->output_pref));
3183 
3184                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3185                                     TCG_CALL_NO_READ_GLOBALS))) {
3186                     la_global_kill(s, nb_globals);
3187                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3188                     la_global_sync(s, nb_globals);
3189                 }
3190 
3191                 /* Record arguments that die in this helper.  */
3192                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3193                     ts = arg_temp(op->args[i]);
3194                     if (ts->state & TS_DEAD) {
3195                         arg_life |= DEAD_ARG << i;
3196                     }
3197                 }
3198 
3199                 /* For all live registers, remove call-clobbered prefs.  */
3200                 la_cross_call(s, nb_temps);
3201 
3202                 /*
3203                  * Input arguments are live for preceding opcodes.
3204                  *
3205                  * For those arguments that die, and will be allocated in
3206                  * registers, clear the register set for that arg, to be
3207                  * filled in below.  For args that will be on the stack,
3208                  * reset to any available reg.  Process arguments in reverse
3209                  * order so that if a temp is used more than once, the stack
3210                  * reset to max happens before the register reset to 0.
3211                  */
3212                 for (i = nb_iargs - 1; i >= 0; i--) {
3213                     const TCGCallArgumentLoc *loc = &info->in[i];
3214                     ts = arg_temp(op->args[nb_oargs + i]);
3215 
3216                     if (ts->state & TS_DEAD) {
3217                         switch (loc->kind) {
3218                         case TCG_CALL_ARG_NORMAL:
3219                         case TCG_CALL_ARG_EXTEND_U:
3220                         case TCG_CALL_ARG_EXTEND_S:
3221                             if (REG_P(loc)) {
3222                                 *la_temp_pref(ts) = 0;
3223                                 break;
3224                             }
3225                             /* fall through */
3226                         default:
3227                             *la_temp_pref(ts) =
3228                                 tcg_target_available_regs[ts->type];
3229                             break;
3230                         }
3231                         ts->state &= ~TS_DEAD;
3232                     }
3233                 }
3234 
3235                 /*
3236                  * For each input argument, add its input register to prefs.
3237                  * If a temp is used once, this produces a single set bit;
3238                  * if a temp is used multiple times, this produces a set.
3239                  */
3240                 for (i = 0; i < nb_iargs; i++) {
3241                     const TCGCallArgumentLoc *loc = &info->in[i];
3242                     ts = arg_temp(op->args[nb_oargs + i]);
3243 
3244                     switch (loc->kind) {
3245                     case TCG_CALL_ARG_NORMAL:
3246                     case TCG_CALL_ARG_EXTEND_U:
3247                     case TCG_CALL_ARG_EXTEND_S:
3248                         if (REG_P(loc)) {
3249                             tcg_regset_set_reg(*la_temp_pref(ts),
3250                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3251                         }
3252                         break;
3253                     default:
3254                         break;
3255                     }
3256                 }
3257             }
3258             break;
3259         case INDEX_op_insn_start:
3260             break;
3261         case INDEX_op_discard:
3262             /* mark the temporary as dead */
3263             ts = arg_temp(op->args[0]);
3264             ts->state = TS_DEAD;
3265             la_reset_pref(ts);
3266             break;
3267 
3268         case INDEX_op_add2_i32:
3269             opc_new = INDEX_op_add_i32;
3270             goto do_addsub2;
3271         case INDEX_op_sub2_i32:
3272             opc_new = INDEX_op_sub_i32;
3273             goto do_addsub2;
3274         case INDEX_op_add2_i64:
3275             opc_new = INDEX_op_add_i64;
3276             goto do_addsub2;
3277         case INDEX_op_sub2_i64:
3278             opc_new = INDEX_op_sub_i64;
3279         do_addsub2:
3280             nb_iargs = 4;
3281             nb_oargs = 2;
3282             /* Test if the high part of the operation is dead, but not
3283                the low part.  The result can be optimized to a simple
3284                add or sub.  This happens often for x86_64 guest when the
3285                cpu mode is set to 32 bit.  */
3286             if (arg_temp(op->args[1])->state == TS_DEAD) {
3287                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3288                     goto do_remove;
3289                 }
3290                 /* Replace the opcode and adjust the args in place,
3291                    leaving 3 unused args at the end.  */
3292                 op->opc = opc = opc_new;
3293                 op->args[1] = op->args[2];
3294                 op->args[2] = op->args[4];
3295                 /* Fall through and mark the single-word operation live.  */
3296                 nb_iargs = 2;
3297                 nb_oargs = 1;
3298             }
3299             goto do_not_remove;
3300 
3301         case INDEX_op_mulu2_i32:
3302             opc_new = INDEX_op_mul_i32;
3303             opc_new2 = INDEX_op_muluh_i32;
3304             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3305             goto do_mul2;
3306         case INDEX_op_muls2_i32:
3307             opc_new = INDEX_op_mul_i32;
3308             opc_new2 = INDEX_op_mulsh_i32;
3309             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3310             goto do_mul2;
3311         case INDEX_op_mulu2_i64:
3312             opc_new = INDEX_op_mul_i64;
3313             opc_new2 = INDEX_op_muluh_i64;
3314             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3315             goto do_mul2;
3316         case INDEX_op_muls2_i64:
3317             opc_new = INDEX_op_mul_i64;
3318             opc_new2 = INDEX_op_mulsh_i64;
3319             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3320             goto do_mul2;
3321         do_mul2:
3322             nb_iargs = 2;
3323             nb_oargs = 2;
3324             if (arg_temp(op->args[1])->state == TS_DEAD) {
3325                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3326                     /* Both parts of the operation are dead.  */
3327                     goto do_remove;
3328                 }
3329                 /* The high part of the operation is dead; generate the low. */
3330                 op->opc = opc = opc_new;
3331                 op->args[1] = op->args[2];
3332                 op->args[2] = op->args[3];
3333             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3334                 /* The low part of the operation is dead; generate the high. */
3335                 op->opc = opc = opc_new2;
3336                 op->args[0] = op->args[1];
3337                 op->args[1] = op->args[2];
3338                 op->args[2] = op->args[3];
3339             } else {
3340                 goto do_not_remove;
3341             }
3342             /* Mark the single-word operation live.  */
3343             nb_oargs = 1;
3344             goto do_not_remove;
3345 
3346         default:
3347             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3348             nb_iargs = def->nb_iargs;
3349             nb_oargs = def->nb_oargs;
3350 
3351             /* Test if the operation can be removed because all
3352                its outputs are dead. We assume that nb_oargs == 0
3353                implies side effects */
3354             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3355                 for (i = 0; i < nb_oargs; i++) {
3356                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3357                         goto do_not_remove;
3358                     }
3359                 }
3360                 goto do_remove;
3361             }
3362             goto do_not_remove;
3363 
3364         do_remove:
3365             tcg_op_remove(s, op);
3366             break;
3367 
3368         do_not_remove:
3369             for (i = 0; i < nb_oargs; i++) {
3370                 ts = arg_temp(op->args[i]);
3371 
3372                 /* Remember the preference of the uses that followed.  */
3373                 if (i < ARRAY_SIZE(op->output_pref)) {
3374                     op->output_pref[i] = *la_temp_pref(ts);
3375                 }
3376 
3377                 /* Output args are dead.  */
3378                 if (ts->state & TS_DEAD) {
3379                     arg_life |= DEAD_ARG << i;
3380                 }
3381                 if (ts->state & TS_MEM) {
3382                     arg_life |= SYNC_ARG << i;
3383                 }
3384                 ts->state = TS_DEAD;
3385                 la_reset_pref(ts);
3386             }
3387 
3388             /* If end of basic block, update.  */
3389             if (def->flags & TCG_OPF_BB_EXIT) {
3390                 la_func_end(s, nb_globals, nb_temps);
3391             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3392                 la_bb_sync(s, nb_globals, nb_temps);
3393             } else if (def->flags & TCG_OPF_BB_END) {
3394                 la_bb_end(s, nb_globals, nb_temps);
3395             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3396                 la_global_sync(s, nb_globals);
3397                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3398                     la_cross_call(s, nb_temps);
3399                 }
3400             }
3401 
3402             /* Record arguments that die in this opcode.  */
3403             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3404                 ts = arg_temp(op->args[i]);
3405                 if (ts->state & TS_DEAD) {
3406                     arg_life |= DEAD_ARG << i;
3407                 }
3408             }
3409 
3410             /* Input arguments are live for preceding opcodes.  */
3411             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3412                 ts = arg_temp(op->args[i]);
3413                 if (ts->state & TS_DEAD) {
3414                     /* For operands that were dead, initially allow
3415                        all regs for the type.  */
3416                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3417                     ts->state &= ~TS_DEAD;
3418                 }
3419             }
3420 
3421             /* Incorporate constraints for this operand.  */
3422             switch (opc) {
3423             case INDEX_op_mov_i32:
3424             case INDEX_op_mov_i64:
3425                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3426                    have proper constraints.  That said, special case
3427                    moves to propagate preferences backward.  */
3428                 if (IS_DEAD_ARG(1)) {
3429                     *la_temp_pref(arg_temp(op->args[0]))
3430                         = *la_temp_pref(arg_temp(op->args[1]));
3431                 }
3432                 break;
3433 
3434             default:
3435                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3436                     const TCGArgConstraint *ct = &def->args_ct[i];
3437                     TCGRegSet set, *pset;
3438 
3439                     ts = arg_temp(op->args[i]);
3440                     pset = la_temp_pref(ts);
3441                     set = *pset;
3442 
3443                     set &= ct->regs;
3444                     if (ct->ialias) {
3445                         set &= output_pref(op, ct->alias_index);
3446                     }
3447                     /* If the combination is not possible, restart.  */
3448                     if (set == 0) {
3449                         set = ct->regs;
3450                     }
3451                     *pset = set;
3452                 }
3453                 break;
3454             }
3455             break;
3456         }
3457         op->life = arg_life;
3458     }
3459 }
3460 
3461 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3462 static bool __attribute__((noinline))
3463 liveness_pass_2(TCGContext *s)
3464 {
3465     int nb_globals = s->nb_globals;
3466     int nb_temps, i;
3467     bool changes = false;
3468     TCGOp *op, *op_next;
3469 
3470     /* Create a temporary for each indirect global.  */
3471     for (i = 0; i < nb_globals; ++i) {
3472         TCGTemp *its = &s->temps[i];
3473         if (its->indirect_reg) {
3474             TCGTemp *dts = tcg_temp_alloc(s);
3475             dts->type = its->type;
3476             dts->base_type = its->base_type;
3477             dts->temp_subindex = its->temp_subindex;
3478             dts->kind = TEMP_EBB;
3479             its->state_ptr = dts;
3480         } else {
3481             its->state_ptr = NULL;
3482         }
3483         /* All globals begin dead.  */
3484         its->state = TS_DEAD;
3485     }
3486     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3487         TCGTemp *its = &s->temps[i];
3488         its->state_ptr = NULL;
3489         its->state = TS_DEAD;
3490     }
3491 
3492     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3493         TCGOpcode opc = op->opc;
3494         const TCGOpDef *def = &tcg_op_defs[opc];
3495         TCGLifeData arg_life = op->life;
3496         int nb_iargs, nb_oargs, call_flags;
3497         TCGTemp *arg_ts, *dir_ts;
3498 
3499         if (opc == INDEX_op_call) {
3500             nb_oargs = TCGOP_CALLO(op);
3501             nb_iargs = TCGOP_CALLI(op);
3502             call_flags = tcg_call_flags(op);
3503         } else {
3504             nb_iargs = def->nb_iargs;
3505             nb_oargs = def->nb_oargs;
3506 
3507             /* Set flags similar to how calls require.  */
3508             if (def->flags & TCG_OPF_COND_BRANCH) {
3509                 /* Like reading globals: sync_globals */
3510                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3511             } else if (def->flags & TCG_OPF_BB_END) {
3512                 /* Like writing globals: save_globals */
3513                 call_flags = 0;
3514             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3515                 /* Like reading globals: sync_globals */
3516                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3517             } else {
3518                 /* No effect on globals.  */
3519                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3520                               TCG_CALL_NO_WRITE_GLOBALS);
3521             }
3522         }
3523 
3524         /* Make sure that input arguments are available.  */
3525         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3526             arg_ts = arg_temp(op->args[i]);
3527             dir_ts = arg_ts->state_ptr;
3528             if (dir_ts && arg_ts->state == TS_DEAD) {
3529                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3530                                   ? INDEX_op_ld_i32
3531                                   : INDEX_op_ld_i64);
3532                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3533 
3534                 lop->args[0] = temp_arg(dir_ts);
3535                 lop->args[1] = temp_arg(arg_ts->mem_base);
3536                 lop->args[2] = arg_ts->mem_offset;
3537 
3538                 /* Loaded, but synced with memory.  */
3539                 arg_ts->state = TS_MEM;
3540             }
3541         }
3542 
3543         /* Perform input replacement, and mark inputs that became dead.
3544            No action is required except keeping temp_state up to date
3545            so that we reload when needed.  */
3546         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3547             arg_ts = arg_temp(op->args[i]);
3548             dir_ts = arg_ts->state_ptr;
3549             if (dir_ts) {
3550                 op->args[i] = temp_arg(dir_ts);
3551                 changes = true;
3552                 if (IS_DEAD_ARG(i)) {
3553                     arg_ts->state = TS_DEAD;
3554                 }
3555             }
3556         }
3557 
3558         /* Liveness analysis should ensure that the following are
3559            all correct, for call sites and basic block end points.  */
3560         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3561             /* Nothing to do */
3562         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3563             for (i = 0; i < nb_globals; ++i) {
3564                 /* Liveness should see that globals are synced back,
3565                    that is, either TS_DEAD or TS_MEM.  */
3566                 arg_ts = &s->temps[i];
3567                 tcg_debug_assert(arg_ts->state_ptr == 0
3568                                  || arg_ts->state != 0);
3569             }
3570         } else {
3571             for (i = 0; i < nb_globals; ++i) {
3572                 /* Liveness should see that globals are saved back,
3573                    that is, TS_DEAD, waiting to be reloaded.  */
3574                 arg_ts = &s->temps[i];
3575                 tcg_debug_assert(arg_ts->state_ptr == 0
3576                                  || arg_ts->state == TS_DEAD);
3577             }
3578         }
3579 
3580         /* Outputs become available.  */
3581         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3582             arg_ts = arg_temp(op->args[0]);
3583             dir_ts = arg_ts->state_ptr;
3584             if (dir_ts) {
3585                 op->args[0] = temp_arg(dir_ts);
3586                 changes = true;
3587 
3588                 /* The output is now live and modified.  */
3589                 arg_ts->state = 0;
3590 
3591                 if (NEED_SYNC_ARG(0)) {
3592                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3593                                       ? INDEX_op_st_i32
3594                                       : INDEX_op_st_i64);
3595                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3596                     TCGTemp *out_ts = dir_ts;
3597 
3598                     if (IS_DEAD_ARG(0)) {
3599                         out_ts = arg_temp(op->args[1]);
3600                         arg_ts->state = TS_DEAD;
3601                         tcg_op_remove(s, op);
3602                     } else {
3603                         arg_ts->state = TS_MEM;
3604                     }
3605 
3606                     sop->args[0] = temp_arg(out_ts);
3607                     sop->args[1] = temp_arg(arg_ts->mem_base);
3608                     sop->args[2] = arg_ts->mem_offset;
3609                 } else {
3610                     tcg_debug_assert(!IS_DEAD_ARG(0));
3611                 }
3612             }
3613         } else {
3614             for (i = 0; i < nb_oargs; i++) {
3615                 arg_ts = arg_temp(op->args[i]);
3616                 dir_ts = arg_ts->state_ptr;
3617                 if (!dir_ts) {
3618                     continue;
3619                 }
3620                 op->args[i] = temp_arg(dir_ts);
3621                 changes = true;
3622 
3623                 /* The output is now live and modified.  */
3624                 arg_ts->state = 0;
3625 
3626                 /* Sync outputs upon their last write.  */
3627                 if (NEED_SYNC_ARG(i)) {
3628                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3629                                       ? INDEX_op_st_i32
3630                                       : INDEX_op_st_i64);
3631                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3632 
3633                     sop->args[0] = temp_arg(dir_ts);
3634                     sop->args[1] = temp_arg(arg_ts->mem_base);
3635                     sop->args[2] = arg_ts->mem_offset;
3636 
3637                     arg_ts->state = TS_MEM;
3638                 }
3639                 /* Drop outputs that are dead.  */
3640                 if (IS_DEAD_ARG(i)) {
3641                     arg_ts->state = TS_DEAD;
3642                 }
3643             }
3644         }
3645     }
3646 
3647     return changes;
3648 }
3649 
3650 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3651 {
3652     intptr_t off;
3653     int size, align;
3654 
3655     /* When allocating an object, look at the full type. */
3656     size = tcg_type_size(ts->base_type);
3657     switch (ts->base_type) {
3658     case TCG_TYPE_I32:
3659         align = 4;
3660         break;
3661     case TCG_TYPE_I64:
3662     case TCG_TYPE_V64:
3663         align = 8;
3664         break;
3665     case TCG_TYPE_I128:
3666     case TCG_TYPE_V128:
3667     case TCG_TYPE_V256:
3668         /*
3669          * Note that we do not require aligned storage for V256,
3670          * and that we provide alignment for I128 to match V128,
3671          * even if that's above what the host ABI requires.
3672          */
3673         align = 16;
3674         break;
3675     default:
3676         g_assert_not_reached();
3677     }
3678 
3679     /*
3680      * Assume the stack is sufficiently aligned.
3681      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3682      * and do not require 16 byte vector alignment.  This seems slightly
3683      * easier than fully parameterizing the above switch statement.
3684      */
3685     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3686     off = ROUND_UP(s->current_frame_offset, align);
3687 
3688     /* If we've exhausted the stack frame, restart with a smaller TB. */
3689     if (off + size > s->frame_end) {
3690         tcg_raise_tb_overflow(s);
3691     }
3692     s->current_frame_offset = off + size;
3693 #if defined(__sparc__)
3694     off += TCG_TARGET_STACK_BIAS;
3695 #endif
3696 
3697     /* If the object was subdivided, assign memory to all the parts. */
3698     if (ts->base_type != ts->type) {
3699         int part_size = tcg_type_size(ts->type);
3700         int part_count = size / part_size;
3701 
3702         /*
3703          * Each part is allocated sequentially in tcg_temp_new_internal.
3704          * Jump back to the first part by subtracting the current index.
3705          */
3706         ts -= ts->temp_subindex;
3707         for (int i = 0; i < part_count; ++i) {
3708             ts[i].mem_offset = off + i * part_size;
3709             ts[i].mem_base = s->frame_temp;
3710             ts[i].mem_allocated = 1;
3711         }
3712     } else {
3713         ts->mem_offset = off;
3714         ts->mem_base = s->frame_temp;
3715         ts->mem_allocated = 1;
3716     }
3717 }
3718 
3719 /* Assign @reg to @ts, and update reg_to_temp[]. */
3720 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3721 {
3722     if (ts->val_type == TEMP_VAL_REG) {
3723         TCGReg old = ts->reg;
3724         tcg_debug_assert(s->reg_to_temp[old] == ts);
3725         if (old == reg) {
3726             return;
3727         }
3728         s->reg_to_temp[old] = NULL;
3729     }
3730     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3731     s->reg_to_temp[reg] = ts;
3732     ts->val_type = TEMP_VAL_REG;
3733     ts->reg = reg;
3734 }
3735 
3736 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3737 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3738 {
3739     tcg_debug_assert(type != TEMP_VAL_REG);
3740     if (ts->val_type == TEMP_VAL_REG) {
3741         TCGReg reg = ts->reg;
3742         tcg_debug_assert(s->reg_to_temp[reg] == ts);
3743         s->reg_to_temp[reg] = NULL;
3744     }
3745     ts->val_type = type;
3746 }
3747 
3748 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3749 
3750 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3751    mark it free; otherwise mark it dead.  */
3752 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3753 {
3754     TCGTempVal new_type;
3755 
3756     switch (ts->kind) {
3757     case TEMP_FIXED:
3758         return;
3759     case TEMP_GLOBAL:
3760     case TEMP_TB:
3761         new_type = TEMP_VAL_MEM;
3762         break;
3763     case TEMP_EBB:
3764         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3765         break;
3766     case TEMP_CONST:
3767         new_type = TEMP_VAL_CONST;
3768         break;
3769     default:
3770         g_assert_not_reached();
3771     }
3772     set_temp_val_nonreg(s, ts, new_type);
3773 }
3774 
3775 /* Mark a temporary as dead.  */
3776 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3777 {
3778     temp_free_or_dead(s, ts, 1);
3779 }
3780 
3781 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3782    registers needs to be allocated to store a constant.  If 'free_or_dead'
3783    is non-zero, subsequently release the temporary; if it is positive, the
3784    temp is dead; if it is negative, the temp is free.  */
3785 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3786                       TCGRegSet preferred_regs, int free_or_dead)
3787 {
3788     if (!temp_readonly(ts) && !ts->mem_coherent) {
3789         if (!ts->mem_allocated) {
3790             temp_allocate_frame(s, ts);
3791         }
3792         switch (ts->val_type) {
3793         case TEMP_VAL_CONST:
3794             /* If we're going to free the temp immediately, then we won't
3795                require it later in a register, so attempt to store the
3796                constant to memory directly.  */
3797             if (free_or_dead
3798                 && tcg_out_sti(s, ts->type, ts->val,
3799                                ts->mem_base->reg, ts->mem_offset)) {
3800                 break;
3801             }
3802             temp_load(s, ts, tcg_target_available_regs[ts->type],
3803                       allocated_regs, preferred_regs);
3804             /* fallthrough */
3805 
3806         case TEMP_VAL_REG:
3807             tcg_out_st(s, ts->type, ts->reg,
3808                        ts->mem_base->reg, ts->mem_offset);
3809             break;
3810 
3811         case TEMP_VAL_MEM:
3812             break;
3813 
3814         case TEMP_VAL_DEAD:
3815         default:
3816             g_assert_not_reached();
3817         }
3818         ts->mem_coherent = 1;
3819     }
3820     if (free_or_dead) {
3821         temp_free_or_dead(s, ts, free_or_dead);
3822     }
3823 }
3824 
3825 /* free register 'reg' by spilling the corresponding temporary if necessary */
3826 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3827 {
3828     TCGTemp *ts = s->reg_to_temp[reg];
3829     if (ts != NULL) {
3830         temp_sync(s, ts, allocated_regs, 0, -1);
3831     }
3832 }
3833 
3834 /**
3835  * tcg_reg_alloc:
3836  * @required_regs: Set of registers in which we must allocate.
3837  * @allocated_regs: Set of registers which must be avoided.
3838  * @preferred_regs: Set of registers we should prefer.
3839  * @rev: True if we search the registers in "indirect" order.
3840  *
3841  * The allocated register must be in @required_regs & ~@allocated_regs,
3842  * but if we can put it in @preferred_regs we may save a move later.
3843  */
3844 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3845                             TCGRegSet allocated_regs,
3846                             TCGRegSet preferred_regs, bool rev)
3847 {
3848     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3849     TCGRegSet reg_ct[2];
3850     const int *order;
3851 
3852     reg_ct[1] = required_regs & ~allocated_regs;
3853     tcg_debug_assert(reg_ct[1] != 0);
3854     reg_ct[0] = reg_ct[1] & preferred_regs;
3855 
3856     /* Skip the preferred_regs option if it cannot be satisfied,
3857        or if the preference made no difference.  */
3858     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3859 
3860     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3861 
3862     /* Try free registers, preferences first.  */
3863     for (j = f; j < 2; j++) {
3864         TCGRegSet set = reg_ct[j];
3865 
3866         if (tcg_regset_single(set)) {
3867             /* One register in the set.  */
3868             TCGReg reg = tcg_regset_first(set);
3869             if (s->reg_to_temp[reg] == NULL) {
3870                 return reg;
3871             }
3872         } else {
3873             for (i = 0; i < n; i++) {
3874                 TCGReg reg = order[i];
3875                 if (s->reg_to_temp[reg] == NULL &&
3876                     tcg_regset_test_reg(set, reg)) {
3877                     return reg;
3878                 }
3879             }
3880         }
3881     }
3882 
3883     /* We must spill something.  */
3884     for (j = f; j < 2; j++) {
3885         TCGRegSet set = reg_ct[j];
3886 
3887         if (tcg_regset_single(set)) {
3888             /* One register in the set.  */
3889             TCGReg reg = tcg_regset_first(set);
3890             tcg_reg_free(s, reg, allocated_regs);
3891             return reg;
3892         } else {
3893             for (i = 0; i < n; i++) {
3894                 TCGReg reg = order[i];
3895                 if (tcg_regset_test_reg(set, reg)) {
3896                     tcg_reg_free(s, reg, allocated_regs);
3897                     return reg;
3898                 }
3899             }
3900         }
3901     }
3902 
3903     g_assert_not_reached();
3904 }
3905 
3906 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
3907                                  TCGRegSet allocated_regs,
3908                                  TCGRegSet preferred_regs, bool rev)
3909 {
3910     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3911     TCGRegSet reg_ct[2];
3912     const int *order;
3913 
3914     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3915     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
3916     tcg_debug_assert(reg_ct[1] != 0);
3917     reg_ct[0] = reg_ct[1] & preferred_regs;
3918 
3919     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3920 
3921     /*
3922      * Skip the preferred_regs option if it cannot be satisfied,
3923      * or if the preference made no difference.
3924      */
3925     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3926 
3927     /*
3928      * Minimize the number of flushes by looking for 2 free registers first,
3929      * then a single flush, then two flushes.
3930      */
3931     for (fmin = 2; fmin >= 0; fmin--) {
3932         for (j = k; j < 2; j++) {
3933             TCGRegSet set = reg_ct[j];
3934 
3935             for (i = 0; i < n; i++) {
3936                 TCGReg reg = order[i];
3937 
3938                 if (tcg_regset_test_reg(set, reg)) {
3939                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
3940                     if (f >= fmin) {
3941                         tcg_reg_free(s, reg, allocated_regs);
3942                         tcg_reg_free(s, reg + 1, allocated_regs);
3943                         return reg;
3944                     }
3945                 }
3946             }
3947         }
3948     }
3949     g_assert_not_reached();
3950 }
3951 
3952 /* Make sure the temporary is in a register.  If needed, allocate the register
3953    from DESIRED while avoiding ALLOCATED.  */
3954 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3955                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3956 {
3957     TCGReg reg;
3958 
3959     switch (ts->val_type) {
3960     case TEMP_VAL_REG:
3961         return;
3962     case TEMP_VAL_CONST:
3963         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3964                             preferred_regs, ts->indirect_base);
3965         if (ts->type <= TCG_TYPE_I64) {
3966             tcg_out_movi(s, ts->type, reg, ts->val);
3967         } else {
3968             uint64_t val = ts->val;
3969             MemOp vece = MO_64;
3970 
3971             /*
3972              * Find the minimal vector element that matches the constant.
3973              * The targets will, in general, have to do this search anyway,
3974              * do this generically.
3975              */
3976             if (val == dup_const(MO_8, val)) {
3977                 vece = MO_8;
3978             } else if (val == dup_const(MO_16, val)) {
3979                 vece = MO_16;
3980             } else if (val == dup_const(MO_32, val)) {
3981                 vece = MO_32;
3982             }
3983 
3984             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3985         }
3986         ts->mem_coherent = 0;
3987         break;
3988     case TEMP_VAL_MEM:
3989         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3990                             preferred_regs, ts->indirect_base);
3991         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3992         ts->mem_coherent = 1;
3993         break;
3994     case TEMP_VAL_DEAD:
3995     default:
3996         g_assert_not_reached();
3997     }
3998     set_temp_val_reg(s, ts, reg);
3999 }
4000 
4001 /* Save a temporary to memory. 'allocated_regs' is used in case a
4002    temporary registers needs to be allocated to store a constant.  */
4003 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4004 {
4005     /* The liveness analysis already ensures that globals are back
4006        in memory. Keep an tcg_debug_assert for safety. */
4007     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4008 }
4009 
4010 /* save globals to their canonical location and assume they can be
4011    modified be the following code. 'allocated_regs' is used in case a
4012    temporary registers needs to be allocated to store a constant. */
4013 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4014 {
4015     int i, n;
4016 
4017     for (i = 0, n = s->nb_globals; i < n; i++) {
4018         temp_save(s, &s->temps[i], allocated_regs);
4019     }
4020 }
4021 
4022 /* sync globals to their canonical location and assume they can be
4023    read by the following code. 'allocated_regs' is used in case a
4024    temporary registers needs to be allocated to store a constant. */
4025 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4026 {
4027     int i, n;
4028 
4029     for (i = 0, n = s->nb_globals; i < n; i++) {
4030         TCGTemp *ts = &s->temps[i];
4031         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4032                          || ts->kind == TEMP_FIXED
4033                          || ts->mem_coherent);
4034     }
4035 }
4036 
4037 /* at the end of a basic block, we assume all temporaries are dead and
4038    all globals are stored at their canonical location. */
4039 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4040 {
4041     int i;
4042 
4043     for (i = s->nb_globals; i < s->nb_temps; i++) {
4044         TCGTemp *ts = &s->temps[i];
4045 
4046         switch (ts->kind) {
4047         case TEMP_TB:
4048             temp_save(s, ts, allocated_regs);
4049             break;
4050         case TEMP_EBB:
4051             /* The liveness analysis already ensures that temps are dead.
4052                Keep an tcg_debug_assert for safety. */
4053             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4054             break;
4055         case TEMP_CONST:
4056             /* Similarly, we should have freed any allocated register. */
4057             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4058             break;
4059         default:
4060             g_assert_not_reached();
4061         }
4062     }
4063 
4064     save_globals(s, allocated_regs);
4065 }
4066 
4067 /*
4068  * At a conditional branch, we assume all temporaries are dead unless
4069  * explicitly live-across-conditional-branch; all globals and local
4070  * temps are synced to their location.
4071  */
4072 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4073 {
4074     sync_globals(s, allocated_regs);
4075 
4076     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4077         TCGTemp *ts = &s->temps[i];
4078         /*
4079          * The liveness analysis already ensures that temps are dead.
4080          * Keep tcg_debug_asserts for safety.
4081          */
4082         switch (ts->kind) {
4083         case TEMP_TB:
4084             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4085             break;
4086         case TEMP_EBB:
4087         case TEMP_CONST:
4088             break;
4089         default:
4090             g_assert_not_reached();
4091         }
4092     }
4093 }
4094 
4095 /*
4096  * Specialized code generation for INDEX_op_mov_* with a constant.
4097  */
4098 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4099                                   tcg_target_ulong val, TCGLifeData arg_life,
4100                                   TCGRegSet preferred_regs)
4101 {
4102     /* ENV should not be modified.  */
4103     tcg_debug_assert(!temp_readonly(ots));
4104 
4105     /* The movi is not explicitly generated here.  */
4106     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4107     ots->val = val;
4108     ots->mem_coherent = 0;
4109     if (NEED_SYNC_ARG(0)) {
4110         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4111     } else if (IS_DEAD_ARG(0)) {
4112         temp_dead(s, ots);
4113     }
4114 }
4115 
4116 /*
4117  * Specialized code generation for INDEX_op_mov_*.
4118  */
4119 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4120 {
4121     const TCGLifeData arg_life = op->life;
4122     TCGRegSet allocated_regs, preferred_regs;
4123     TCGTemp *ts, *ots;
4124     TCGType otype, itype;
4125     TCGReg oreg, ireg;
4126 
4127     allocated_regs = s->reserved_regs;
4128     preferred_regs = output_pref(op, 0);
4129     ots = arg_temp(op->args[0]);
4130     ts = arg_temp(op->args[1]);
4131 
4132     /* ENV should not be modified.  */
4133     tcg_debug_assert(!temp_readonly(ots));
4134 
4135     /* Note that otype != itype for no-op truncation.  */
4136     otype = ots->type;
4137     itype = ts->type;
4138 
4139     if (ts->val_type == TEMP_VAL_CONST) {
4140         /* propagate constant or generate sti */
4141         tcg_target_ulong val = ts->val;
4142         if (IS_DEAD_ARG(1)) {
4143             temp_dead(s, ts);
4144         }
4145         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4146         return;
4147     }
4148 
4149     /* If the source value is in memory we're going to be forced
4150        to have it in a register in order to perform the copy.  Copy
4151        the SOURCE value into its own register first, that way we
4152        don't have to reload SOURCE the next time it is used. */
4153     if (ts->val_type == TEMP_VAL_MEM) {
4154         temp_load(s, ts, tcg_target_available_regs[itype],
4155                   allocated_regs, preferred_regs);
4156     }
4157     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4158     ireg = ts->reg;
4159 
4160     if (IS_DEAD_ARG(0)) {
4161         /* mov to a non-saved dead register makes no sense (even with
4162            liveness analysis disabled). */
4163         tcg_debug_assert(NEED_SYNC_ARG(0));
4164         if (!ots->mem_allocated) {
4165             temp_allocate_frame(s, ots);
4166         }
4167         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4168         if (IS_DEAD_ARG(1)) {
4169             temp_dead(s, ts);
4170         }
4171         temp_dead(s, ots);
4172         return;
4173     }
4174 
4175     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4176         /*
4177          * The mov can be suppressed.  Kill input first, so that it
4178          * is unlinked from reg_to_temp, then set the output to the
4179          * reg that we saved from the input.
4180          */
4181         temp_dead(s, ts);
4182         oreg = ireg;
4183     } else {
4184         if (ots->val_type == TEMP_VAL_REG) {
4185             oreg = ots->reg;
4186         } else {
4187             /* Make sure to not spill the input register during allocation. */
4188             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4189                                  allocated_regs | ((TCGRegSet)1 << ireg),
4190                                  preferred_regs, ots->indirect_base);
4191         }
4192         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4193             /*
4194              * Cross register class move not supported.
4195              * Store the source register into the destination slot
4196              * and leave the destination temp as TEMP_VAL_MEM.
4197              */
4198             assert(!temp_readonly(ots));
4199             if (!ts->mem_allocated) {
4200                 temp_allocate_frame(s, ots);
4201             }
4202             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4203             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4204             ots->mem_coherent = 1;
4205             return;
4206         }
4207     }
4208     set_temp_val_reg(s, ots, oreg);
4209     ots->mem_coherent = 0;
4210 
4211     if (NEED_SYNC_ARG(0)) {
4212         temp_sync(s, ots, allocated_regs, 0, 0);
4213     }
4214 }
4215 
4216 /*
4217  * Specialized code generation for INDEX_op_dup_vec.
4218  */
4219 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4220 {
4221     const TCGLifeData arg_life = op->life;
4222     TCGRegSet dup_out_regs, dup_in_regs;
4223     TCGTemp *its, *ots;
4224     TCGType itype, vtype;
4225     unsigned vece;
4226     int lowpart_ofs;
4227     bool ok;
4228 
4229     ots = arg_temp(op->args[0]);
4230     its = arg_temp(op->args[1]);
4231 
4232     /* ENV should not be modified.  */
4233     tcg_debug_assert(!temp_readonly(ots));
4234 
4235     itype = its->type;
4236     vece = TCGOP_VECE(op);
4237     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4238 
4239     if (its->val_type == TEMP_VAL_CONST) {
4240         /* Propagate constant via movi -> dupi.  */
4241         tcg_target_ulong val = its->val;
4242         if (IS_DEAD_ARG(1)) {
4243             temp_dead(s, its);
4244         }
4245         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4246         return;
4247     }
4248 
4249     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4250     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4251 
4252     /* Allocate the output register now.  */
4253     if (ots->val_type != TEMP_VAL_REG) {
4254         TCGRegSet allocated_regs = s->reserved_regs;
4255         TCGReg oreg;
4256 
4257         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4258             /* Make sure to not spill the input register. */
4259             tcg_regset_set_reg(allocated_regs, its->reg);
4260         }
4261         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4262                              output_pref(op, 0), ots->indirect_base);
4263         set_temp_val_reg(s, ots, oreg);
4264     }
4265 
4266     switch (its->val_type) {
4267     case TEMP_VAL_REG:
4268         /*
4269          * The dup constriaints must be broad, covering all possible VECE.
4270          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4271          * to fail, indicating that extra moves are required for that case.
4272          */
4273         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4274             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4275                 goto done;
4276             }
4277             /* Try again from memory or a vector input register.  */
4278         }
4279         if (!its->mem_coherent) {
4280             /*
4281              * The input register is not synced, and so an extra store
4282              * would be required to use memory.  Attempt an integer-vector
4283              * register move first.  We do not have a TCGRegSet for this.
4284              */
4285             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4286                 break;
4287             }
4288             /* Sync the temp back to its slot and load from there.  */
4289             temp_sync(s, its, s->reserved_regs, 0, 0);
4290         }
4291         /* fall through */
4292 
4293     case TEMP_VAL_MEM:
4294         lowpart_ofs = 0;
4295         if (HOST_BIG_ENDIAN) {
4296             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4297         }
4298         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4299                              its->mem_offset + lowpart_ofs)) {
4300             goto done;
4301         }
4302         /* Load the input into the destination vector register. */
4303         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4304         break;
4305 
4306     default:
4307         g_assert_not_reached();
4308     }
4309 
4310     /* We now have a vector input register, so dup must succeed. */
4311     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4312     tcg_debug_assert(ok);
4313 
4314  done:
4315     ots->mem_coherent = 0;
4316     if (IS_DEAD_ARG(1)) {
4317         temp_dead(s, its);
4318     }
4319     if (NEED_SYNC_ARG(0)) {
4320         temp_sync(s, ots, s->reserved_regs, 0, 0);
4321     }
4322     if (IS_DEAD_ARG(0)) {
4323         temp_dead(s, ots);
4324     }
4325 }
4326 
4327 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4328 {
4329     const TCGLifeData arg_life = op->life;
4330     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4331     TCGRegSet i_allocated_regs;
4332     TCGRegSet o_allocated_regs;
4333     int i, k, nb_iargs, nb_oargs;
4334     TCGReg reg;
4335     TCGArg arg;
4336     const TCGArgConstraint *arg_ct;
4337     TCGTemp *ts;
4338     TCGArg new_args[TCG_MAX_OP_ARGS];
4339     int const_args[TCG_MAX_OP_ARGS];
4340 
4341     nb_oargs = def->nb_oargs;
4342     nb_iargs = def->nb_iargs;
4343 
4344     /* copy constants */
4345     memcpy(new_args + nb_oargs + nb_iargs,
4346            op->args + nb_oargs + nb_iargs,
4347            sizeof(TCGArg) * def->nb_cargs);
4348 
4349     i_allocated_regs = s->reserved_regs;
4350     o_allocated_regs = s->reserved_regs;
4351 
4352     /* satisfy input constraints */
4353     for (k = 0; k < nb_iargs; k++) {
4354         TCGRegSet i_preferred_regs, i_required_regs;
4355         bool allocate_new_reg, copyto_new_reg;
4356         TCGTemp *ts2;
4357         int i1, i2;
4358 
4359         i = def->args_ct[nb_oargs + k].sort_index;
4360         arg = op->args[i];
4361         arg_ct = &def->args_ct[i];
4362         ts = arg_temp(arg);
4363 
4364         if (ts->val_type == TEMP_VAL_CONST
4365             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4366             /* constant is OK for instruction */
4367             const_args[i] = 1;
4368             new_args[i] = ts->val;
4369             continue;
4370         }
4371 
4372         reg = ts->reg;
4373         i_preferred_regs = 0;
4374         i_required_regs = arg_ct->regs;
4375         allocate_new_reg = false;
4376         copyto_new_reg = false;
4377 
4378         switch (arg_ct->pair) {
4379         case 0: /* not paired */
4380             if (arg_ct->ialias) {
4381                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4382 
4383                 /*
4384                  * If the input is readonly, then it cannot also be an
4385                  * output and aliased to itself.  If the input is not
4386                  * dead after the instruction, we must allocate a new
4387                  * register and move it.
4388                  */
4389                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4390                     allocate_new_reg = true;
4391                 } else if (ts->val_type == TEMP_VAL_REG) {
4392                     /*
4393                      * Check if the current register has already been
4394                      * allocated for another input.
4395                      */
4396                     allocate_new_reg =
4397                         tcg_regset_test_reg(i_allocated_regs, reg);
4398                 }
4399             }
4400             if (!allocate_new_reg) {
4401                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4402                           i_preferred_regs);
4403                 reg = ts->reg;
4404                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4405             }
4406             if (allocate_new_reg) {
4407                 /*
4408                  * Allocate a new register matching the constraint
4409                  * and move the temporary register into it.
4410                  */
4411                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4412                           i_allocated_regs, 0);
4413                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4414                                     i_preferred_regs, ts->indirect_base);
4415                 copyto_new_reg = true;
4416             }
4417             break;
4418 
4419         case 1:
4420             /* First of an input pair; if i1 == i2, the second is an output. */
4421             i1 = i;
4422             i2 = arg_ct->pair_index;
4423             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4424 
4425             /*
4426              * It is easier to default to allocating a new pair
4427              * and to identify a few cases where it's not required.
4428              */
4429             if (arg_ct->ialias) {
4430                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4431                 if (IS_DEAD_ARG(i1) &&
4432                     IS_DEAD_ARG(i2) &&
4433                     !temp_readonly(ts) &&
4434                     ts->val_type == TEMP_VAL_REG &&
4435                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4436                     tcg_regset_test_reg(i_required_regs, reg) &&
4437                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4438                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4439                     (ts2
4440                      ? ts2->val_type == TEMP_VAL_REG &&
4441                        ts2->reg == reg + 1 &&
4442                        !temp_readonly(ts2)
4443                      : s->reg_to_temp[reg + 1] == NULL)) {
4444                     break;
4445                 }
4446             } else {
4447                 /* Without aliasing, the pair must also be an input. */
4448                 tcg_debug_assert(ts2);
4449                 if (ts->val_type == TEMP_VAL_REG &&
4450                     ts2->val_type == TEMP_VAL_REG &&
4451                     ts2->reg == reg + 1 &&
4452                     tcg_regset_test_reg(i_required_regs, reg)) {
4453                     break;
4454                 }
4455             }
4456             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4457                                      0, ts->indirect_base);
4458             goto do_pair;
4459 
4460         case 2: /* pair second */
4461             reg = new_args[arg_ct->pair_index] + 1;
4462             goto do_pair;
4463 
4464         case 3: /* ialias with second output, no first input */
4465             tcg_debug_assert(arg_ct->ialias);
4466             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4467 
4468             if (IS_DEAD_ARG(i) &&
4469                 !temp_readonly(ts) &&
4470                 ts->val_type == TEMP_VAL_REG &&
4471                 reg > 0 &&
4472                 s->reg_to_temp[reg - 1] == NULL &&
4473                 tcg_regset_test_reg(i_required_regs, reg) &&
4474                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4475                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4476                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4477                 break;
4478             }
4479             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4480                                      i_allocated_regs, 0,
4481                                      ts->indirect_base);
4482             tcg_regset_set_reg(i_allocated_regs, reg);
4483             reg += 1;
4484             goto do_pair;
4485 
4486         do_pair:
4487             /*
4488              * If an aliased input is not dead after the instruction,
4489              * we must allocate a new register and move it.
4490              */
4491             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4492                 TCGRegSet t_allocated_regs = i_allocated_regs;
4493 
4494                 /*
4495                  * Because of the alias, and the continued life, make sure
4496                  * that the temp is somewhere *other* than the reg pair,
4497                  * and we get a copy in reg.
4498                  */
4499                 tcg_regset_set_reg(t_allocated_regs, reg);
4500                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4501                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4502                     /* If ts was already in reg, copy it somewhere else. */
4503                     TCGReg nr;
4504                     bool ok;
4505 
4506                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4507                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4508                                        t_allocated_regs, 0, ts->indirect_base);
4509                     ok = tcg_out_mov(s, ts->type, nr, reg);
4510                     tcg_debug_assert(ok);
4511 
4512                     set_temp_val_reg(s, ts, nr);
4513                 } else {
4514                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4515                               t_allocated_regs, 0);
4516                     copyto_new_reg = true;
4517                 }
4518             } else {
4519                 /* Preferably allocate to reg, otherwise copy. */
4520                 i_required_regs = (TCGRegSet)1 << reg;
4521                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4522                           i_preferred_regs);
4523                 copyto_new_reg = ts->reg != reg;
4524             }
4525             break;
4526 
4527         default:
4528             g_assert_not_reached();
4529         }
4530 
4531         if (copyto_new_reg) {
4532             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4533                 /*
4534                  * Cross register class move not supported.  Sync the
4535                  * temp back to its slot and load from there.
4536                  */
4537                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4538                 tcg_out_ld(s, ts->type, reg,
4539                            ts->mem_base->reg, ts->mem_offset);
4540             }
4541         }
4542         new_args[i] = reg;
4543         const_args[i] = 0;
4544         tcg_regset_set_reg(i_allocated_regs, reg);
4545     }
4546 
4547     /* mark dead temporaries and free the associated registers */
4548     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4549         if (IS_DEAD_ARG(i)) {
4550             temp_dead(s, arg_temp(op->args[i]));
4551         }
4552     }
4553 
4554     if (def->flags & TCG_OPF_COND_BRANCH) {
4555         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4556     } else if (def->flags & TCG_OPF_BB_END) {
4557         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4558     } else {
4559         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4560             /* XXX: permit generic clobber register list ? */
4561             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4562                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4563                     tcg_reg_free(s, i, i_allocated_regs);
4564                 }
4565             }
4566         }
4567         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4568             /* sync globals if the op has side effects and might trigger
4569                an exception. */
4570             sync_globals(s, i_allocated_regs);
4571         }
4572 
4573         /* satisfy the output constraints */
4574         for(k = 0; k < nb_oargs; k++) {
4575             i = def->args_ct[k].sort_index;
4576             arg = op->args[i];
4577             arg_ct = &def->args_ct[i];
4578             ts = arg_temp(arg);
4579 
4580             /* ENV should not be modified.  */
4581             tcg_debug_assert(!temp_readonly(ts));
4582 
4583             switch (arg_ct->pair) {
4584             case 0: /* not paired */
4585                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4586                     reg = new_args[arg_ct->alias_index];
4587                 } else if (arg_ct->newreg) {
4588                     reg = tcg_reg_alloc(s, arg_ct->regs,
4589                                         i_allocated_regs | o_allocated_regs,
4590                                         output_pref(op, k), ts->indirect_base);
4591                 } else {
4592                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4593                                         output_pref(op, k), ts->indirect_base);
4594                 }
4595                 break;
4596 
4597             case 1: /* first of pair */
4598                 tcg_debug_assert(!arg_ct->newreg);
4599                 if (arg_ct->oalias) {
4600                     reg = new_args[arg_ct->alias_index];
4601                     break;
4602                 }
4603                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4604                                          output_pref(op, k), ts->indirect_base);
4605                 break;
4606 
4607             case 2: /* second of pair */
4608                 tcg_debug_assert(!arg_ct->newreg);
4609                 if (arg_ct->oalias) {
4610                     reg = new_args[arg_ct->alias_index];
4611                 } else {
4612                     reg = new_args[arg_ct->pair_index] + 1;
4613                 }
4614                 break;
4615 
4616             case 3: /* first of pair, aliasing with a second input */
4617                 tcg_debug_assert(!arg_ct->newreg);
4618                 reg = new_args[arg_ct->pair_index] - 1;
4619                 break;
4620 
4621             default:
4622                 g_assert_not_reached();
4623             }
4624             tcg_regset_set_reg(o_allocated_regs, reg);
4625             set_temp_val_reg(s, ts, reg);
4626             ts->mem_coherent = 0;
4627             new_args[i] = reg;
4628         }
4629     }
4630 
4631     /* emit instruction */
4632     switch (op->opc) {
4633     case INDEX_op_ext8s_i32:
4634         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4635         break;
4636     case INDEX_op_ext8s_i64:
4637         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4638         break;
4639     case INDEX_op_ext8u_i32:
4640     case INDEX_op_ext8u_i64:
4641         tcg_out_ext8u(s, new_args[0], new_args[1]);
4642         break;
4643     case INDEX_op_ext16s_i32:
4644         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4645         break;
4646     case INDEX_op_ext16s_i64:
4647         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4648         break;
4649     case INDEX_op_ext16u_i32:
4650     case INDEX_op_ext16u_i64:
4651         tcg_out_ext16u(s, new_args[0], new_args[1]);
4652         break;
4653     case INDEX_op_ext32s_i64:
4654         tcg_out_ext32s(s, new_args[0], new_args[1]);
4655         break;
4656     case INDEX_op_ext32u_i64:
4657         tcg_out_ext32u(s, new_args[0], new_args[1]);
4658         break;
4659     case INDEX_op_ext_i32_i64:
4660         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4661         break;
4662     case INDEX_op_extu_i32_i64:
4663         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4664         break;
4665     case INDEX_op_extrl_i64_i32:
4666         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4667         break;
4668     default:
4669         if (def->flags & TCG_OPF_VECTOR) {
4670             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4671                            new_args, const_args);
4672         } else {
4673             tcg_out_op(s, op->opc, new_args, const_args);
4674         }
4675         break;
4676     }
4677 
4678     /* move the outputs in the correct register if needed */
4679     for(i = 0; i < nb_oargs; i++) {
4680         ts = arg_temp(op->args[i]);
4681 
4682         /* ENV should not be modified.  */
4683         tcg_debug_assert(!temp_readonly(ts));
4684 
4685         if (NEED_SYNC_ARG(i)) {
4686             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4687         } else if (IS_DEAD_ARG(i)) {
4688             temp_dead(s, ts);
4689         }
4690     }
4691 }
4692 
4693 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4694 {
4695     const TCGLifeData arg_life = op->life;
4696     TCGTemp *ots, *itsl, *itsh;
4697     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4698 
4699     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4700     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4701     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4702 
4703     ots = arg_temp(op->args[0]);
4704     itsl = arg_temp(op->args[1]);
4705     itsh = arg_temp(op->args[2]);
4706 
4707     /* ENV should not be modified.  */
4708     tcg_debug_assert(!temp_readonly(ots));
4709 
4710     /* Allocate the output register now.  */
4711     if (ots->val_type != TEMP_VAL_REG) {
4712         TCGRegSet allocated_regs = s->reserved_regs;
4713         TCGRegSet dup_out_regs =
4714             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4715         TCGReg oreg;
4716 
4717         /* Make sure to not spill the input registers. */
4718         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4719             tcg_regset_set_reg(allocated_regs, itsl->reg);
4720         }
4721         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4722             tcg_regset_set_reg(allocated_regs, itsh->reg);
4723         }
4724 
4725         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4726                              output_pref(op, 0), ots->indirect_base);
4727         set_temp_val_reg(s, ots, oreg);
4728     }
4729 
4730     /* Promote dup2 of immediates to dupi_vec. */
4731     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4732         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4733         MemOp vece = MO_64;
4734 
4735         if (val == dup_const(MO_8, val)) {
4736             vece = MO_8;
4737         } else if (val == dup_const(MO_16, val)) {
4738             vece = MO_16;
4739         } else if (val == dup_const(MO_32, val)) {
4740             vece = MO_32;
4741         }
4742 
4743         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4744         goto done;
4745     }
4746 
4747     /* If the two inputs form one 64-bit value, try dupm_vec. */
4748     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4749         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4750         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4751         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4752 
4753         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4754         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4755 
4756         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4757                              its->mem_base->reg, its->mem_offset)) {
4758             goto done;
4759         }
4760     }
4761 
4762     /* Fall back to generic expansion. */
4763     return false;
4764 
4765  done:
4766     ots->mem_coherent = 0;
4767     if (IS_DEAD_ARG(1)) {
4768         temp_dead(s, itsl);
4769     }
4770     if (IS_DEAD_ARG(2)) {
4771         temp_dead(s, itsh);
4772     }
4773     if (NEED_SYNC_ARG(0)) {
4774         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4775     } else if (IS_DEAD_ARG(0)) {
4776         temp_dead(s, ots);
4777     }
4778     return true;
4779 }
4780 
4781 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4782                          TCGRegSet allocated_regs)
4783 {
4784     if (ts->val_type == TEMP_VAL_REG) {
4785         if (ts->reg != reg) {
4786             tcg_reg_free(s, reg, allocated_regs);
4787             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4788                 /*
4789                  * Cross register class move not supported.  Sync the
4790                  * temp back to its slot and load from there.
4791                  */
4792                 temp_sync(s, ts, allocated_regs, 0, 0);
4793                 tcg_out_ld(s, ts->type, reg,
4794                            ts->mem_base->reg, ts->mem_offset);
4795             }
4796         }
4797     } else {
4798         TCGRegSet arg_set = 0;
4799 
4800         tcg_reg_free(s, reg, allocated_regs);
4801         tcg_regset_set_reg(arg_set, reg);
4802         temp_load(s, ts, arg_set, allocated_regs, 0);
4803     }
4804 }
4805 
4806 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
4807                          TCGRegSet allocated_regs)
4808 {
4809     /*
4810      * When the destination is on the stack, load up the temp and store.
4811      * If there are many call-saved registers, the temp might live to
4812      * see another use; otherwise it'll be discarded.
4813      */
4814     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4815     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4816                TCG_TARGET_CALL_STACK_OFFSET +
4817                stk_slot * sizeof(tcg_target_long));
4818 }
4819 
4820 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4821                             TCGTemp *ts, TCGRegSet *allocated_regs)
4822 {
4823     if (REG_P(l)) {
4824         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4825         load_arg_reg(s, reg, ts, *allocated_regs);
4826         tcg_regset_set_reg(*allocated_regs, reg);
4827     } else {
4828         load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
4829                      ts, *allocated_regs);
4830     }
4831 }
4832 
4833 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
4834                          intptr_t ref_off, TCGRegSet *allocated_regs)
4835 {
4836     TCGReg reg;
4837     int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
4838 
4839     if (stk_slot < 0) {
4840         reg = tcg_target_call_iarg_regs[arg_slot];
4841         tcg_reg_free(s, reg, *allocated_regs);
4842         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4843         tcg_regset_set_reg(*allocated_regs, reg);
4844     } else {
4845         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
4846                             *allocated_regs, 0, false);
4847         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4848         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
4849                    TCG_TARGET_CALL_STACK_OFFSET
4850                    + stk_slot * sizeof(tcg_target_long));
4851     }
4852 }
4853 
4854 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4855 {
4856     const int nb_oargs = TCGOP_CALLO(op);
4857     const int nb_iargs = TCGOP_CALLI(op);
4858     const TCGLifeData arg_life = op->life;
4859     const TCGHelperInfo *info = tcg_call_info(op);
4860     TCGRegSet allocated_regs = s->reserved_regs;
4861     int i;
4862 
4863     /*
4864      * Move inputs into place in reverse order,
4865      * so that we place stacked arguments first.
4866      */
4867     for (i = nb_iargs - 1; i >= 0; --i) {
4868         const TCGCallArgumentLoc *loc = &info->in[i];
4869         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
4870 
4871         switch (loc->kind) {
4872         case TCG_CALL_ARG_NORMAL:
4873         case TCG_CALL_ARG_EXTEND_U:
4874         case TCG_CALL_ARG_EXTEND_S:
4875             load_arg_normal(s, loc, ts, &allocated_regs);
4876             break;
4877         case TCG_CALL_ARG_BY_REF:
4878             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4879             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
4880                          TCG_TARGET_CALL_STACK_OFFSET
4881                          + loc->ref_slot * sizeof(tcg_target_long),
4882                          &allocated_regs);
4883             break;
4884         case TCG_CALL_ARG_BY_REF_N:
4885             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4886             break;
4887         default:
4888             g_assert_not_reached();
4889         }
4890     }
4891 
4892     /* Mark dead temporaries and free the associated registers.  */
4893     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4894         if (IS_DEAD_ARG(i)) {
4895             temp_dead(s, arg_temp(op->args[i]));
4896         }
4897     }
4898 
4899     /* Clobber call registers.  */
4900     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4901         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4902             tcg_reg_free(s, i, allocated_regs);
4903         }
4904     }
4905 
4906     /*
4907      * Save globals if they might be written by the helper,
4908      * sync them if they might be read.
4909      */
4910     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
4911         /* Nothing to do */
4912     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
4913         sync_globals(s, allocated_regs);
4914     } else {
4915         save_globals(s, allocated_regs);
4916     }
4917 
4918     /*
4919      * If the ABI passes a pointer to the returned struct as the first
4920      * argument, load that now.  Pass a pointer to the output home slot.
4921      */
4922     if (info->out_kind == TCG_CALL_RET_BY_REF) {
4923         TCGTemp *ts = arg_temp(op->args[0]);
4924 
4925         if (!ts->mem_allocated) {
4926             temp_allocate_frame(s, ts);
4927         }
4928         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
4929     }
4930 
4931     tcg_out_call(s, tcg_call_func(op), info);
4932 
4933     /* Assign output registers and emit moves if needed.  */
4934     switch (info->out_kind) {
4935     case TCG_CALL_RET_NORMAL:
4936         for (i = 0; i < nb_oargs; i++) {
4937             TCGTemp *ts = arg_temp(op->args[i]);
4938             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
4939 
4940             /* ENV should not be modified.  */
4941             tcg_debug_assert(!temp_readonly(ts));
4942 
4943             set_temp_val_reg(s, ts, reg);
4944             ts->mem_coherent = 0;
4945         }
4946         break;
4947 
4948     case TCG_CALL_RET_BY_VEC:
4949         {
4950             TCGTemp *ts = arg_temp(op->args[0]);
4951 
4952             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
4953             tcg_debug_assert(ts->temp_subindex == 0);
4954             if (!ts->mem_allocated) {
4955                 temp_allocate_frame(s, ts);
4956             }
4957             tcg_out_st(s, TCG_TYPE_V128,
4958                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
4959                        ts->mem_base->reg, ts->mem_offset);
4960         }
4961         /* fall through to mark all parts in memory */
4962 
4963     case TCG_CALL_RET_BY_REF:
4964         /* The callee has performed a write through the reference. */
4965         for (i = 0; i < nb_oargs; i++) {
4966             TCGTemp *ts = arg_temp(op->args[i]);
4967             ts->val_type = TEMP_VAL_MEM;
4968         }
4969         break;
4970 
4971     default:
4972         g_assert_not_reached();
4973     }
4974 
4975     /* Flush or discard output registers as needed. */
4976     for (i = 0; i < nb_oargs; i++) {
4977         TCGTemp *ts = arg_temp(op->args[i]);
4978         if (NEED_SYNC_ARG(i)) {
4979             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
4980         } else if (IS_DEAD_ARG(i)) {
4981             temp_dead(s, ts);
4982         }
4983     }
4984 }
4985 
4986 #ifdef CONFIG_PROFILER
4987 
4988 /* avoid copy/paste errors */
4989 #define PROF_ADD(to, from, field)                       \
4990     do {                                                \
4991         (to)->field += qatomic_read(&((from)->field));  \
4992     } while (0)
4993 
4994 #define PROF_MAX(to, from, field)                                       \
4995     do {                                                                \
4996         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4997         if (val__ > (to)->field) {                                      \
4998             (to)->field = val__;                                        \
4999         }                                                               \
5000     } while (0)
5001 
5002 /* Pass in a zero'ed @prof */
5003 static inline
5004 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
5005 {
5006     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5007     unsigned int i;
5008 
5009     for (i = 0; i < n_ctxs; i++) {
5010         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5011         const TCGProfile *orig = &s->prof;
5012 
5013         if (counters) {
5014             PROF_ADD(prof, orig, cpu_exec_time);
5015             PROF_ADD(prof, orig, tb_count1);
5016             PROF_ADD(prof, orig, tb_count);
5017             PROF_ADD(prof, orig, op_count);
5018             PROF_MAX(prof, orig, op_count_max);
5019             PROF_ADD(prof, orig, temp_count);
5020             PROF_MAX(prof, orig, temp_count_max);
5021             PROF_ADD(prof, orig, del_op_count);
5022             PROF_ADD(prof, orig, code_in_len);
5023             PROF_ADD(prof, orig, code_out_len);
5024             PROF_ADD(prof, orig, search_out_len);
5025             PROF_ADD(prof, orig, interm_time);
5026             PROF_ADD(prof, orig, code_time);
5027             PROF_ADD(prof, orig, la_time);
5028             PROF_ADD(prof, orig, opt_time);
5029             PROF_ADD(prof, orig, restore_count);
5030             PROF_ADD(prof, orig, restore_time);
5031         }
5032         if (table) {
5033             int i;
5034 
5035             for (i = 0; i < NB_OPS; i++) {
5036                 PROF_ADD(prof, orig, table_op_count[i]);
5037             }
5038         }
5039     }
5040 }
5041 
5042 #undef PROF_ADD
5043 #undef PROF_MAX
5044 
5045 static void tcg_profile_snapshot_counters(TCGProfile *prof)
5046 {
5047     tcg_profile_snapshot(prof, true, false);
5048 }
5049 
5050 static void tcg_profile_snapshot_table(TCGProfile *prof)
5051 {
5052     tcg_profile_snapshot(prof, false, true);
5053 }
5054 
5055 void tcg_dump_op_count(GString *buf)
5056 {
5057     TCGProfile prof = {};
5058     int i;
5059 
5060     tcg_profile_snapshot_table(&prof);
5061     for (i = 0; i < NB_OPS; i++) {
5062         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
5063                                prof.table_op_count[i]);
5064     }
5065 }
5066 
5067 int64_t tcg_cpu_exec_time(void)
5068 {
5069     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5070     unsigned int i;
5071     int64_t ret = 0;
5072 
5073     for (i = 0; i < n_ctxs; i++) {
5074         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5075         const TCGProfile *prof = &s->prof;
5076 
5077         ret += qatomic_read(&prof->cpu_exec_time);
5078     }
5079     return ret;
5080 }
5081 #else
5082 void tcg_dump_op_count(GString *buf)
5083 {
5084     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5085 }
5086 
5087 int64_t tcg_cpu_exec_time(void)
5088 {
5089     error_report("%s: TCG profiler not compiled", __func__);
5090     exit(EXIT_FAILURE);
5091 }
5092 #endif
5093 
5094 
5095 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
5096 {
5097 #ifdef CONFIG_PROFILER
5098     TCGProfile *prof = &s->prof;
5099 #endif
5100     int i, num_insns;
5101     TCGOp *op;
5102 
5103 #ifdef CONFIG_PROFILER
5104     {
5105         int n = 0;
5106 
5107         QTAILQ_FOREACH(op, &s->ops, link) {
5108             n++;
5109         }
5110         qatomic_set(&prof->op_count, prof->op_count + n);
5111         if (n > prof->op_count_max) {
5112             qatomic_set(&prof->op_count_max, n);
5113         }
5114 
5115         n = s->nb_temps;
5116         qatomic_set(&prof->temp_count, prof->temp_count + n);
5117         if (n > prof->temp_count_max) {
5118             qatomic_set(&prof->temp_count_max, n);
5119         }
5120     }
5121 #endif
5122 
5123 #ifdef DEBUG_DISAS
5124     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5125                  && qemu_log_in_addr_range(pc_start))) {
5126         FILE *logfile = qemu_log_trylock();
5127         if (logfile) {
5128             fprintf(logfile, "OP:\n");
5129             tcg_dump_ops(s, logfile, false);
5130             fprintf(logfile, "\n");
5131             qemu_log_unlock(logfile);
5132         }
5133     }
5134 #endif
5135 
5136 #ifdef CONFIG_DEBUG_TCG
5137     /* Ensure all labels referenced have been emitted.  */
5138     {
5139         TCGLabel *l;
5140         bool error = false;
5141 
5142         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5143             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5144                 qemu_log_mask(CPU_LOG_TB_OP,
5145                               "$L%d referenced but not present.\n", l->id);
5146                 error = true;
5147             }
5148         }
5149         assert(!error);
5150     }
5151 #endif
5152 
5153 #ifdef CONFIG_PROFILER
5154     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
5155 #endif
5156 
5157 #ifdef USE_TCG_OPTIMIZATIONS
5158     tcg_optimize(s);
5159 #endif
5160 
5161 #ifdef CONFIG_PROFILER
5162     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
5163     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
5164 #endif
5165 
5166     reachable_code_pass(s);
5167     liveness_pass_0(s);
5168     liveness_pass_1(s);
5169 
5170     if (s->nb_indirects > 0) {
5171 #ifdef DEBUG_DISAS
5172         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5173                      && qemu_log_in_addr_range(pc_start))) {
5174             FILE *logfile = qemu_log_trylock();
5175             if (logfile) {
5176                 fprintf(logfile, "OP before indirect lowering:\n");
5177                 tcg_dump_ops(s, logfile, false);
5178                 fprintf(logfile, "\n");
5179                 qemu_log_unlock(logfile);
5180             }
5181         }
5182 #endif
5183         /* Replace indirect temps with direct temps.  */
5184         if (liveness_pass_2(s)) {
5185             /* If changes were made, re-run liveness.  */
5186             liveness_pass_1(s);
5187         }
5188     }
5189 
5190 #ifdef CONFIG_PROFILER
5191     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
5192 #endif
5193 
5194 #ifdef DEBUG_DISAS
5195     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5196                  && qemu_log_in_addr_range(pc_start))) {
5197         FILE *logfile = qemu_log_trylock();
5198         if (logfile) {
5199             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5200             tcg_dump_ops(s, logfile, true);
5201             fprintf(logfile, "\n");
5202             qemu_log_unlock(logfile);
5203         }
5204     }
5205 #endif
5206 
5207     /* Initialize goto_tb jump offsets. */
5208     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5209     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5210     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5211     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5212 
5213     tcg_reg_alloc_start(s);
5214 
5215     /*
5216      * Reset the buffer pointers when restarting after overflow.
5217      * TODO: Move this into translate-all.c with the rest of the
5218      * buffer management.  Having only this done here is confusing.
5219      */
5220     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
5221     s->code_ptr = s->code_buf;
5222 
5223 #ifdef TCG_TARGET_NEED_LDST_LABELS
5224     QSIMPLEQ_INIT(&s->ldst_labels);
5225 #endif
5226 #ifdef TCG_TARGET_NEED_POOL_LABELS
5227     s->pool_labels = NULL;
5228 #endif
5229 
5230     num_insns = -1;
5231     QTAILQ_FOREACH(op, &s->ops, link) {
5232         TCGOpcode opc = op->opc;
5233 
5234 #ifdef CONFIG_PROFILER
5235         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
5236 #endif
5237 
5238         switch (opc) {
5239         case INDEX_op_mov_i32:
5240         case INDEX_op_mov_i64:
5241         case INDEX_op_mov_vec:
5242             tcg_reg_alloc_mov(s, op);
5243             break;
5244         case INDEX_op_dup_vec:
5245             tcg_reg_alloc_dup(s, op);
5246             break;
5247         case INDEX_op_insn_start:
5248             if (num_insns >= 0) {
5249                 size_t off = tcg_current_code_size(s);
5250                 s->gen_insn_end_off[num_insns] = off;
5251                 /* Assert that we do not overflow our stored offset.  */
5252                 assert(s->gen_insn_end_off[num_insns] == off);
5253             }
5254             num_insns++;
5255             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
5256                 target_ulong a;
5257 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
5258                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
5259 #else
5260                 a = op->args[i];
5261 #endif
5262                 s->gen_insn_data[num_insns][i] = a;
5263             }
5264             break;
5265         case INDEX_op_discard:
5266             temp_dead(s, arg_temp(op->args[0]));
5267             break;
5268         case INDEX_op_set_label:
5269             tcg_reg_alloc_bb_end(s, s->reserved_regs);
5270             tcg_out_label(s, arg_label(op->args[0]));
5271             break;
5272         case INDEX_op_call:
5273             tcg_reg_alloc_call(s, op);
5274             break;
5275         case INDEX_op_exit_tb:
5276             tcg_out_exit_tb(s, op->args[0]);
5277             break;
5278         case INDEX_op_goto_tb:
5279             tcg_out_goto_tb(s, op->args[0]);
5280             break;
5281         case INDEX_op_dup2_vec:
5282             if (tcg_reg_alloc_dup2(s, op)) {
5283                 break;
5284             }
5285             /* fall through */
5286         default:
5287             /* Sanity check that we've not introduced any unhandled opcodes. */
5288             tcg_debug_assert(tcg_op_supported(opc));
5289             /* Note: in order to speed up the code, it would be much
5290                faster to have specialized register allocator functions for
5291                some common argument patterns */
5292             tcg_reg_alloc_op(s, op);
5293             break;
5294         }
5295         /* Test for (pending) buffer overflow.  The assumption is that any
5296            one operation beginning below the high water mark cannot overrun
5297            the buffer completely.  Thus we can test for overflow after
5298            generating code without having to check during generation.  */
5299         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
5300             return -1;
5301         }
5302         /* Test for TB overflow, as seen by gen_insn_end_off.  */
5303         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
5304             return -2;
5305         }
5306     }
5307     tcg_debug_assert(num_insns >= 0);
5308     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
5309 
5310     /* Generate TB finalization at the end of block */
5311 #ifdef TCG_TARGET_NEED_LDST_LABELS
5312     i = tcg_out_ldst_finalize(s);
5313     if (i < 0) {
5314         return i;
5315     }
5316 #endif
5317 #ifdef TCG_TARGET_NEED_POOL_LABELS
5318     i = tcg_out_pool_finalize(s);
5319     if (i < 0) {
5320         return i;
5321     }
5322 #endif
5323     if (!tcg_resolve_relocs(s)) {
5324         return -2;
5325     }
5326 
5327 #ifndef CONFIG_TCG_INTERPRETER
5328     /* flush instruction cache */
5329     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
5330                         (uintptr_t)s->code_buf,
5331                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
5332 #endif
5333 
5334     return tcg_current_code_size(s);
5335 }
5336 
5337 #ifdef CONFIG_PROFILER
5338 void tcg_dump_info(GString *buf)
5339 {
5340     TCGProfile prof = {};
5341     const TCGProfile *s;
5342     int64_t tb_count;
5343     int64_t tb_div_count;
5344     int64_t tot;
5345 
5346     tcg_profile_snapshot_counters(&prof);
5347     s = &prof;
5348     tb_count = s->tb_count;
5349     tb_div_count = tb_count ? tb_count : 1;
5350     tot = s->interm_time + s->code_time;
5351 
5352     g_string_append_printf(buf, "JIT cycles          %" PRId64
5353                            " (%0.3f s at 2.4 GHz)\n",
5354                            tot, tot / 2.4e9);
5355     g_string_append_printf(buf, "translated TBs      %" PRId64
5356                            " (aborted=%" PRId64 " %0.1f%%)\n",
5357                            tb_count, s->tb_count1 - tb_count,
5358                            (double)(s->tb_count1 - s->tb_count)
5359                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
5360     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
5361                            (double)s->op_count / tb_div_count, s->op_count_max);
5362     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
5363                            (double)s->del_op_count / tb_div_count);
5364     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
5365                            (double)s->temp_count / tb_div_count,
5366                            s->temp_count_max);
5367     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
5368                            (double)s->code_out_len / tb_div_count);
5369     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
5370                            (double)s->search_out_len / tb_div_count);
5371 
5372     g_string_append_printf(buf, "cycles/op           %0.1f\n",
5373                            s->op_count ? (double)tot / s->op_count : 0);
5374     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
5375                            s->code_in_len ? (double)tot / s->code_in_len : 0);
5376     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
5377                            s->code_out_len ? (double)tot / s->code_out_len : 0);
5378     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
5379                            s->search_out_len ?
5380                            (double)tot / s->search_out_len : 0);
5381     if (tot == 0) {
5382         tot = 1;
5383     }
5384     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
5385                            (double)s->interm_time / tot * 100.0);
5386     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
5387                            (double)s->code_time / tot * 100.0);
5388     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
5389                            (double)s->opt_time / (s->code_time ?
5390                                                   s->code_time : 1)
5391                            * 100.0);
5392     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
5393                            (double)s->la_time / (s->code_time ?
5394                                                  s->code_time : 1) * 100.0);
5395     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
5396                            s->restore_count);
5397     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
5398                            s->restore_count ?
5399                            (double)s->restore_time / s->restore_count : 0);
5400 }
5401 #else
5402 void tcg_dump_info(GString *buf)
5403 {
5404     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5405 }
5406 #endif
5407 
5408 #ifdef ELF_HOST_MACHINE
5409 /* In order to use this feature, the backend needs to do three things:
5410 
5411    (1) Define ELF_HOST_MACHINE to indicate both what value to
5412        put into the ELF image and to indicate support for the feature.
5413 
5414    (2) Define tcg_register_jit.  This should create a buffer containing
5415        the contents of a .debug_frame section that describes the post-
5416        prologue unwind info for the tcg machine.
5417 
5418    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
5419 */
5420 
5421 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
5422 typedef enum {
5423     JIT_NOACTION = 0,
5424     JIT_REGISTER_FN,
5425     JIT_UNREGISTER_FN
5426 } jit_actions_t;
5427 
5428 struct jit_code_entry {
5429     struct jit_code_entry *next_entry;
5430     struct jit_code_entry *prev_entry;
5431     const void *symfile_addr;
5432     uint64_t symfile_size;
5433 };
5434 
5435 struct jit_descriptor {
5436     uint32_t version;
5437     uint32_t action_flag;
5438     struct jit_code_entry *relevant_entry;
5439     struct jit_code_entry *first_entry;
5440 };
5441 
5442 void __jit_debug_register_code(void) __attribute__((noinline));
5443 void __jit_debug_register_code(void)
5444 {
5445     asm("");
5446 }
5447 
5448 /* Must statically initialize the version, because GDB may check
5449    the version before we can set it.  */
5450 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
5451 
5452 /* End GDB interface.  */
5453 
5454 static int find_string(const char *strtab, const char *str)
5455 {
5456     const char *p = strtab + 1;
5457 
5458     while (1) {
5459         if (strcmp(p, str) == 0) {
5460             return p - strtab;
5461         }
5462         p += strlen(p) + 1;
5463     }
5464 }
5465 
5466 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
5467                                  const void *debug_frame,
5468                                  size_t debug_frame_size)
5469 {
5470     struct __attribute__((packed)) DebugInfo {
5471         uint32_t  len;
5472         uint16_t  version;
5473         uint32_t  abbrev;
5474         uint8_t   ptr_size;
5475         uint8_t   cu_die;
5476         uint16_t  cu_lang;
5477         uintptr_t cu_low_pc;
5478         uintptr_t cu_high_pc;
5479         uint8_t   fn_die;
5480         char      fn_name[16];
5481         uintptr_t fn_low_pc;
5482         uintptr_t fn_high_pc;
5483         uint8_t   cu_eoc;
5484     };
5485 
5486     struct ElfImage {
5487         ElfW(Ehdr) ehdr;
5488         ElfW(Phdr) phdr;
5489         ElfW(Shdr) shdr[7];
5490         ElfW(Sym)  sym[2];
5491         struct DebugInfo di;
5492         uint8_t    da[24];
5493         char       str[80];
5494     };
5495 
5496     struct ElfImage *img;
5497 
5498     static const struct ElfImage img_template = {
5499         .ehdr = {
5500             .e_ident[EI_MAG0] = ELFMAG0,
5501             .e_ident[EI_MAG1] = ELFMAG1,
5502             .e_ident[EI_MAG2] = ELFMAG2,
5503             .e_ident[EI_MAG3] = ELFMAG3,
5504             .e_ident[EI_CLASS] = ELF_CLASS,
5505             .e_ident[EI_DATA] = ELF_DATA,
5506             .e_ident[EI_VERSION] = EV_CURRENT,
5507             .e_type = ET_EXEC,
5508             .e_machine = ELF_HOST_MACHINE,
5509             .e_version = EV_CURRENT,
5510             .e_phoff = offsetof(struct ElfImage, phdr),
5511             .e_shoff = offsetof(struct ElfImage, shdr),
5512             .e_ehsize = sizeof(ElfW(Shdr)),
5513             .e_phentsize = sizeof(ElfW(Phdr)),
5514             .e_phnum = 1,
5515             .e_shentsize = sizeof(ElfW(Shdr)),
5516             .e_shnum = ARRAY_SIZE(img->shdr),
5517             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
5518 #ifdef ELF_HOST_FLAGS
5519             .e_flags = ELF_HOST_FLAGS,
5520 #endif
5521 #ifdef ELF_OSABI
5522             .e_ident[EI_OSABI] = ELF_OSABI,
5523 #endif
5524         },
5525         .phdr = {
5526             .p_type = PT_LOAD,
5527             .p_flags = PF_X,
5528         },
5529         .shdr = {
5530             [0] = { .sh_type = SHT_NULL },
5531             /* Trick: The contents of code_gen_buffer are not present in
5532                this fake ELF file; that got allocated elsewhere.  Therefore
5533                we mark .text as SHT_NOBITS (similar to .bss) so that readers
5534                will not look for contents.  We can record any address.  */
5535             [1] = { /* .text */
5536                 .sh_type = SHT_NOBITS,
5537                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
5538             },
5539             [2] = { /* .debug_info */
5540                 .sh_type = SHT_PROGBITS,
5541                 .sh_offset = offsetof(struct ElfImage, di),
5542                 .sh_size = sizeof(struct DebugInfo),
5543             },
5544             [3] = { /* .debug_abbrev */
5545                 .sh_type = SHT_PROGBITS,
5546                 .sh_offset = offsetof(struct ElfImage, da),
5547                 .sh_size = sizeof(img->da),
5548             },
5549             [4] = { /* .debug_frame */
5550                 .sh_type = SHT_PROGBITS,
5551                 .sh_offset = sizeof(struct ElfImage),
5552             },
5553             [5] = { /* .symtab */
5554                 .sh_type = SHT_SYMTAB,
5555                 .sh_offset = offsetof(struct ElfImage, sym),
5556                 .sh_size = sizeof(img->sym),
5557                 .sh_info = 1,
5558                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5559                 .sh_entsize = sizeof(ElfW(Sym)),
5560             },
5561             [6] = { /* .strtab */
5562                 .sh_type = SHT_STRTAB,
5563                 .sh_offset = offsetof(struct ElfImage, str),
5564                 .sh_size = sizeof(img->str),
5565             }
5566         },
5567         .sym = {
5568             [1] = { /* code_gen_buffer */
5569                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5570                 .st_shndx = 1,
5571             }
5572         },
5573         .di = {
5574             .len = sizeof(struct DebugInfo) - 4,
5575             .version = 2,
5576             .ptr_size = sizeof(void *),
5577             .cu_die = 1,
5578             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5579             .fn_die = 2,
5580             .fn_name = "code_gen_buffer"
5581         },
5582         .da = {
5583             1,          /* abbrev number (the cu) */
5584             0x11, 1,    /* DW_TAG_compile_unit, has children */
5585             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5586             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5587             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5588             0, 0,       /* end of abbrev */
5589             2,          /* abbrev number (the fn) */
5590             0x2e, 0,    /* DW_TAG_subprogram, no children */
5591             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5592             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5593             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5594             0, 0,       /* end of abbrev */
5595             0           /* no more abbrev */
5596         },
5597         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5598                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5599     };
5600 
5601     /* We only need a single jit entry; statically allocate it.  */
5602     static struct jit_code_entry one_entry;
5603 
5604     uintptr_t buf = (uintptr_t)buf_ptr;
5605     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5606     DebugFrameHeader *dfh;
5607 
5608     img = g_malloc(img_size);
5609     *img = img_template;
5610 
5611     img->phdr.p_vaddr = buf;
5612     img->phdr.p_paddr = buf;
5613     img->phdr.p_memsz = buf_size;
5614 
5615     img->shdr[1].sh_name = find_string(img->str, ".text");
5616     img->shdr[1].sh_addr = buf;
5617     img->shdr[1].sh_size = buf_size;
5618 
5619     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5620     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5621 
5622     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5623     img->shdr[4].sh_size = debug_frame_size;
5624 
5625     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5626     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5627 
5628     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5629     img->sym[1].st_value = buf;
5630     img->sym[1].st_size = buf_size;
5631 
5632     img->di.cu_low_pc = buf;
5633     img->di.cu_high_pc = buf + buf_size;
5634     img->di.fn_low_pc = buf;
5635     img->di.fn_high_pc = buf + buf_size;
5636 
5637     dfh = (DebugFrameHeader *)(img + 1);
5638     memcpy(dfh, debug_frame, debug_frame_size);
5639     dfh->fde.func_start = buf;
5640     dfh->fde.func_len = buf_size;
5641 
5642 #ifdef DEBUG_JIT
5643     /* Enable this block to be able to debug the ELF image file creation.
5644        One can use readelf, objdump, or other inspection utilities.  */
5645     {
5646         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
5647         FILE *f = fopen(jit, "w+b");
5648         if (f) {
5649             if (fwrite(img, img_size, 1, f) != img_size) {
5650                 /* Avoid stupid unused return value warning for fwrite.  */
5651             }
5652             fclose(f);
5653         }
5654     }
5655 #endif
5656 
5657     one_entry.symfile_addr = img;
5658     one_entry.symfile_size = img_size;
5659 
5660     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5661     __jit_debug_descriptor.relevant_entry = &one_entry;
5662     __jit_debug_descriptor.first_entry = &one_entry;
5663     __jit_debug_register_code();
5664 }
5665 #else
5666 /* No support for the feature.  Provide the entry point expected by exec.c,
5667    and implement the internal function we declared earlier.  */
5668 
5669 static void tcg_register_jit_int(const void *buf, size_t size,
5670                                  const void *debug_frame,
5671                                  size_t debug_frame_size)
5672 {
5673 }
5674 
5675 void tcg_register_jit(const void *buf, size_t buf_size)
5676 {
5677 }
5678 #endif /* ELF_HOST_MACHINE */
5679 
5680 #if !TCG_TARGET_MAYBE_vec
5681 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5682 {
5683     g_assert_not_reached();
5684 }
5685 #endif
5686