xref: /openbmc/qemu/tcg/tcg.c (revision 36bc99bc)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #ifdef CONFIG_USER_ONLY
60 #include "user/guest-base.h"
61 #endif
62 
63 /* Forward declarations for functions declared in tcg-target.c.inc and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68                         intptr_t value, intptr_t addend);
69 
70 /* The CIE and FDE header definitions will be common to all hosts.  */
71 typedef struct {
72     uint32_t len __attribute__((aligned((sizeof(void *)))));
73     uint32_t id;
74     uint8_t version;
75     char augmentation[1];
76     uint8_t code_align;
77     uint8_t data_align;
78     uint8_t return_column;
79 } DebugFrameCIE;
80 
81 typedef struct QEMU_PACKED {
82     uint32_t len __attribute__((aligned((sizeof(void *)))));
83     uint32_t cie_offset;
84     uintptr_t func_start;
85     uintptr_t func_len;
86 } DebugFrameFDEHeader;
87 
88 typedef struct QEMU_PACKED {
89     DebugFrameCIE cie;
90     DebugFrameFDEHeader fde;
91 } DebugFrameHeader;
92 
93 typedef struct TCGLabelQemuLdst {
94     bool is_ld;             /* qemu_ld: true, qemu_st: false */
95     MemOpIdx oi;
96     TCGType type;           /* result type of a load */
97     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104 } TCGLabelQemuLdst;
105 
106 static void tcg_register_jit_int(const void *buf, size_t size,
107                                  const void *debug_frame,
108                                  size_t debug_frame_size)
109     __attribute__((unused));
110 
111 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
112 static void tcg_out_tb_start(TCGContext *s);
113 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114                        intptr_t arg2);
115 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116 static void tcg_out_movi(TCGContext *s, TCGType type,
117                          TCGReg ret, tcg_target_long arg);
118 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130 static void tcg_out_goto_tb(TCGContext *s, int which);
131 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132                        const TCGArg args[TCG_MAX_OP_ARGS],
133                        const int const_args[TCG_MAX_OP_ARGS]);
134 #if TCG_TARGET_MAYBE_vec
135 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136                             TCGReg dst, TCGReg src);
137 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, TCGReg base, intptr_t offset);
139 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140                              TCGReg dst, int64_t arg);
141 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142                            unsigned vecl, unsigned vece,
143                            const TCGArg args[TCG_MAX_OP_ARGS],
144                            const int const_args[TCG_MAX_OP_ARGS]);
145 #else
146 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147                                    TCGReg dst, TCGReg src)
148 {
149     g_assert_not_reached();
150 }
151 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152                                     TCGReg dst, TCGReg base, intptr_t offset)
153 {
154     g_assert_not_reached();
155 }
156 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, int64_t arg)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162                                   unsigned vecl, unsigned vece,
163                                   const TCGArg args[TCG_MAX_OP_ARGS],
164                                   const int const_args[TCG_MAX_OP_ARGS])
165 {
166     g_assert_not_reached();
167 }
168 #endif
169 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170                        intptr_t arg2);
171 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172                         TCGReg base, intptr_t ofs);
173 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174                          const TCGHelperInfo *info);
175 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176 static bool tcg_target_const_match(int64_t val, int ct,
177                                    TCGType type, TCGCond cond, int vece);
178 #ifdef TCG_TARGET_NEED_LDST_LABELS
179 static int tcg_out_ldst_finalize(TCGContext *s);
180 #endif
181 
182 #ifndef CONFIG_USER_ONLY
183 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
184 #endif
185 
186 typedef struct TCGLdstHelperParam {
187     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
188     unsigned ntmp;
189     int tmp[3];
190 } TCGLdstHelperParam;
191 
192 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
193                                    const TCGLdstHelperParam *p)
194     __attribute__((unused));
195 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
196                                   bool load_sign, const TCGLdstHelperParam *p)
197     __attribute__((unused));
198 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 
202 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
203     [MO_UB] = helper_ldub_mmu,
204     [MO_SB] = helper_ldsb_mmu,
205     [MO_UW] = helper_lduw_mmu,
206     [MO_SW] = helper_ldsw_mmu,
207     [MO_UL] = helper_ldul_mmu,
208     [MO_UQ] = helper_ldq_mmu,
209 #if TCG_TARGET_REG_BITS == 64
210     [MO_SL] = helper_ldsl_mmu,
211     [MO_128] = helper_ld16_mmu,
212 #endif
213 };
214 
215 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
216     [MO_8]  = helper_stb_mmu,
217     [MO_16] = helper_stw_mmu,
218     [MO_32] = helper_stl_mmu,
219     [MO_64] = helper_stq_mmu,
220 #if TCG_TARGET_REG_BITS == 64
221     [MO_128] = helper_st16_mmu,
222 #endif
223 };
224 
225 typedef struct {
226     MemOp atom;   /* lg2 bits of atomicity required */
227     MemOp align;  /* lg2 bits of alignment to use */
228 } TCGAtomAlign;
229 
230 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
231                                            MemOp host_atom, bool allow_two_ops)
232     __attribute__((unused));
233 
234 #ifdef CONFIG_USER_ONLY
235 bool tcg_use_softmmu;
236 #endif
237 
238 TCGContext tcg_init_ctx;
239 __thread TCGContext *tcg_ctx;
240 
241 TCGContext **tcg_ctxs;
242 unsigned int tcg_cur_ctxs;
243 unsigned int tcg_max_ctxs;
244 TCGv_env tcg_env;
245 const void *tcg_code_gen_epilogue;
246 uintptr_t tcg_splitwx_diff;
247 
248 #ifndef CONFIG_TCG_INTERPRETER
249 tcg_prologue_fn *tcg_qemu_tb_exec;
250 #endif
251 
252 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
253 static TCGRegSet tcg_target_call_clobber_regs;
254 
255 #if TCG_TARGET_INSN_UNIT_SIZE == 1
256 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
257 {
258     *s->code_ptr++ = v;
259 }
260 
261 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
262                                                       uint8_t v)
263 {
264     *p = v;
265 }
266 #endif
267 
268 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
269 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
270 {
271     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
272         *s->code_ptr++ = v;
273     } else {
274         tcg_insn_unit *p = s->code_ptr;
275         memcpy(p, &v, sizeof(v));
276         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
277     }
278 }
279 
280 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
281                                                        uint16_t v)
282 {
283     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
284         *p = v;
285     } else {
286         memcpy(p, &v, sizeof(v));
287     }
288 }
289 #endif
290 
291 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
292 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
293 {
294     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
295         *s->code_ptr++ = v;
296     } else {
297         tcg_insn_unit *p = s->code_ptr;
298         memcpy(p, &v, sizeof(v));
299         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
300     }
301 }
302 
303 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
304                                                        uint32_t v)
305 {
306     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
307         *p = v;
308     } else {
309         memcpy(p, &v, sizeof(v));
310     }
311 }
312 #endif
313 
314 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
315 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
316 {
317     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
318         *s->code_ptr++ = v;
319     } else {
320         tcg_insn_unit *p = s->code_ptr;
321         memcpy(p, &v, sizeof(v));
322         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
323     }
324 }
325 
326 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
327                                                        uint64_t v)
328 {
329     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
330         *p = v;
331     } else {
332         memcpy(p, &v, sizeof(v));
333     }
334 }
335 #endif
336 
337 /* label relocation processing */
338 
339 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
340                           TCGLabel *l, intptr_t addend)
341 {
342     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
343 
344     r->type = type;
345     r->ptr = code_ptr;
346     r->addend = addend;
347     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
348 }
349 
350 static void tcg_out_label(TCGContext *s, TCGLabel *l)
351 {
352     tcg_debug_assert(!l->has_value);
353     l->has_value = 1;
354     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
355 }
356 
357 TCGLabel *gen_new_label(void)
358 {
359     TCGContext *s = tcg_ctx;
360     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
361 
362     memset(l, 0, sizeof(TCGLabel));
363     l->id = s->nb_labels++;
364     QSIMPLEQ_INIT(&l->branches);
365     QSIMPLEQ_INIT(&l->relocs);
366 
367     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
368 
369     return l;
370 }
371 
372 static bool tcg_resolve_relocs(TCGContext *s)
373 {
374     TCGLabel *l;
375 
376     QSIMPLEQ_FOREACH(l, &s->labels, next) {
377         TCGRelocation *r;
378         uintptr_t value = l->u.value;
379 
380         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
381             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
382                 return false;
383             }
384         }
385     }
386     return true;
387 }
388 
389 static void set_jmp_reset_offset(TCGContext *s, int which)
390 {
391     /*
392      * We will check for overflow at the end of the opcode loop in
393      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
394      */
395     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
396 }
397 
398 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
399 {
400     /*
401      * We will check for overflow at the end of the opcode loop in
402      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
403      */
404     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
405 }
406 
407 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
408 {
409     /*
410      * Return the read-execute version of the pointer, for the benefit
411      * of any pc-relative addressing mode.
412      */
413     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
414 }
415 
416 static int __attribute__((unused))
417 tlb_mask_table_ofs(TCGContext *s, int which)
418 {
419     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
420             sizeof(CPUNegativeOffsetState));
421 }
422 
423 /* Signal overflow, starting over with fewer guest insns. */
424 static G_NORETURN
425 void tcg_raise_tb_overflow(TCGContext *s)
426 {
427     siglongjmp(s->jmp_trans, -2);
428 }
429 
430 /*
431  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
432  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
433  *
434  * However, tcg_out_helper_load_slots reuses this field to hold an
435  * argument slot number (which may designate a argument register or an
436  * argument stack slot), converting to TCGReg once all arguments that
437  * are destined for the stack are processed.
438  */
439 typedef struct TCGMovExtend {
440     unsigned dst;
441     TCGReg src;
442     TCGType dst_type;
443     TCGType src_type;
444     MemOp src_ext;
445 } TCGMovExtend;
446 
447 /**
448  * tcg_out_movext -- move and extend
449  * @s: tcg context
450  * @dst_type: integral type for destination
451  * @dst: destination register
452  * @src_type: integral type for source
453  * @src_ext: extension to apply to source
454  * @src: source register
455  *
456  * Move or extend @src into @dst, depending on @src_ext and the types.
457  */
458 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
459                            TCGType src_type, MemOp src_ext, TCGReg src)
460 {
461     switch (src_ext) {
462     case MO_UB:
463         tcg_out_ext8u(s, dst, src);
464         break;
465     case MO_SB:
466         tcg_out_ext8s(s, dst_type, dst, src);
467         break;
468     case MO_UW:
469         tcg_out_ext16u(s, dst, src);
470         break;
471     case MO_SW:
472         tcg_out_ext16s(s, dst_type, dst, src);
473         break;
474     case MO_UL:
475     case MO_SL:
476         if (dst_type == TCG_TYPE_I32) {
477             if (src_type == TCG_TYPE_I32) {
478                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
479             } else {
480                 tcg_out_extrl_i64_i32(s, dst, src);
481             }
482         } else if (src_type == TCG_TYPE_I32) {
483             if (src_ext & MO_SIGN) {
484                 tcg_out_exts_i32_i64(s, dst, src);
485             } else {
486                 tcg_out_extu_i32_i64(s, dst, src);
487             }
488         } else {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_ext32s(s, dst, src);
491             } else {
492                 tcg_out_ext32u(s, dst, src);
493             }
494         }
495         break;
496     case MO_UQ:
497         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
498         if (dst_type == TCG_TYPE_I32) {
499             tcg_out_extrl_i64_i32(s, dst, src);
500         } else {
501             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
502         }
503         break;
504     default:
505         g_assert_not_reached();
506     }
507 }
508 
509 /* Minor variations on a theme, using a structure. */
510 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
511                                     TCGReg src)
512 {
513     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
514 }
515 
516 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
517 {
518     tcg_out_movext1_new_src(s, i, i->src);
519 }
520 
521 /**
522  * tcg_out_movext2 -- move and extend two pair
523  * @s: tcg context
524  * @i1: first move description
525  * @i2: second move description
526  * @scratch: temporary register, or -1 for none
527  *
528  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
529  * between the sources and destinations.
530  */
531 
532 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
533                             const TCGMovExtend *i2, int scratch)
534 {
535     TCGReg src1 = i1->src;
536     TCGReg src2 = i2->src;
537 
538     if (i1->dst != src2) {
539         tcg_out_movext1(s, i1);
540         tcg_out_movext1(s, i2);
541         return;
542     }
543     if (i2->dst == src1) {
544         TCGType src1_type = i1->src_type;
545         TCGType src2_type = i2->src_type;
546 
547         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
548             /* The data is now in the correct registers, now extend. */
549             src1 = i2->src;
550             src2 = i1->src;
551         } else {
552             tcg_debug_assert(scratch >= 0);
553             tcg_out_mov(s, src1_type, scratch, src1);
554             src1 = scratch;
555         }
556     }
557     tcg_out_movext1_new_src(s, i2, src2);
558     tcg_out_movext1_new_src(s, i1, src1);
559 }
560 
561 /**
562  * tcg_out_movext3 -- move and extend three pair
563  * @s: tcg context
564  * @i1: first move description
565  * @i2: second move description
566  * @i3: third move description
567  * @scratch: temporary register, or -1 for none
568  *
569  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
570  * between the sources and destinations.
571  */
572 
573 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
574                             const TCGMovExtend *i2, const TCGMovExtend *i3,
575                             int scratch)
576 {
577     TCGReg src1 = i1->src;
578     TCGReg src2 = i2->src;
579     TCGReg src3 = i3->src;
580 
581     if (i1->dst != src2 && i1->dst != src3) {
582         tcg_out_movext1(s, i1);
583         tcg_out_movext2(s, i2, i3, scratch);
584         return;
585     }
586     if (i2->dst != src1 && i2->dst != src3) {
587         tcg_out_movext1(s, i2);
588         tcg_out_movext2(s, i1, i3, scratch);
589         return;
590     }
591     if (i3->dst != src1 && i3->dst != src2) {
592         tcg_out_movext1(s, i3);
593         tcg_out_movext2(s, i1, i2, scratch);
594         return;
595     }
596 
597     /*
598      * There is a cycle.  Since there are only 3 nodes, the cycle is
599      * either "clockwise" or "anti-clockwise", and can be solved with
600      * a single scratch or two xchg.
601      */
602     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
603         /* "Clockwise" */
604         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
605             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
606             /* The data is now in the correct registers, now extend. */
607             tcg_out_movext1_new_src(s, i1, i1->dst);
608             tcg_out_movext1_new_src(s, i2, i2->dst);
609             tcg_out_movext1_new_src(s, i3, i3->dst);
610         } else {
611             tcg_debug_assert(scratch >= 0);
612             tcg_out_mov(s, i1->src_type, scratch, src1);
613             tcg_out_movext1(s, i3);
614             tcg_out_movext1(s, i2);
615             tcg_out_movext1_new_src(s, i1, scratch);
616         }
617     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
618         /* "Anti-clockwise" */
619         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
620             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
621             /* The data is now in the correct registers, now extend. */
622             tcg_out_movext1_new_src(s, i1, i1->dst);
623             tcg_out_movext1_new_src(s, i2, i2->dst);
624             tcg_out_movext1_new_src(s, i3, i3->dst);
625         } else {
626             tcg_debug_assert(scratch >= 0);
627             tcg_out_mov(s, i1->src_type, scratch, src1);
628             tcg_out_movext1(s, i2);
629             tcg_out_movext1(s, i3);
630             tcg_out_movext1_new_src(s, i1, scratch);
631         }
632     } else {
633         g_assert_not_reached();
634     }
635 }
636 
637 #define C_PFX1(P, A)                    P##A
638 #define C_PFX2(P, A, B)                 P##A##_##B
639 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
640 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
641 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
642 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
643 
644 /* Define an enumeration for the various combinations. */
645 
646 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
647 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
648 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
649 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
650 
651 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
652 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
653 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
654 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
655 
656 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
657 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
658 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
659 
660 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
661 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
662 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
663 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
664 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
665 
666 typedef enum {
667 #include "tcg-target-con-set.h"
668 } TCGConstraintSetIndex;
669 
670 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
671 
672 #undef C_O0_I1
673 #undef C_O0_I2
674 #undef C_O0_I3
675 #undef C_O0_I4
676 #undef C_O1_I1
677 #undef C_O1_I2
678 #undef C_O1_I3
679 #undef C_O1_I4
680 #undef C_N1_I2
681 #undef C_N1O1_I1
682 #undef C_N2_I1
683 #undef C_O2_I1
684 #undef C_O2_I2
685 #undef C_O2_I3
686 #undef C_O2_I4
687 #undef C_N1_O1_I4
688 
689 /* Put all of the constraint sets into an array, indexed by the enum. */
690 
691 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
692 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
693 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
694 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
695 
696 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
697 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
698 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
699 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
700 
701 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
702 #define C_N1O1_I1(O1, O2, I1)           { .args_ct_str = { "&" #O1, #O2, #I1 } },
703 #define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
704 
705 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
706 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
707 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
708 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
709 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
710 
711 static const TCGTargetOpDef constraint_sets[] = {
712 #include "tcg-target-con-set.h"
713 };
714 
715 
716 #undef C_O0_I1
717 #undef C_O0_I2
718 #undef C_O0_I3
719 #undef C_O0_I4
720 #undef C_O1_I1
721 #undef C_O1_I2
722 #undef C_O1_I3
723 #undef C_O1_I4
724 #undef C_N1_I2
725 #undef C_N1O1_I1
726 #undef C_N2_I1
727 #undef C_O2_I1
728 #undef C_O2_I2
729 #undef C_O2_I3
730 #undef C_O2_I4
731 #undef C_N1_O1_I4
732 
733 /* Expand the enumerator to be returned from tcg_target_op_def(). */
734 
735 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
736 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
737 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
738 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
739 
740 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
741 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
742 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
743 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
744 
745 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
746 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
747 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
748 
749 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
750 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
751 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
752 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
753 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
754 
755 #include "tcg-target.c.inc"
756 
757 #ifndef CONFIG_TCG_INTERPRETER
758 /* Validate CPUTLBDescFast placement. */
759 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
760                         sizeof(CPUNegativeOffsetState))
761                   < MIN_TLB_MASK_TABLE_OFS);
762 #endif
763 
764 static void alloc_tcg_plugin_context(TCGContext *s)
765 {
766 #ifdef CONFIG_PLUGIN
767     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
768     s->plugin_tb->insns = g_ptr_array_new();
769 #endif
770 }
771 
772 /*
773  * All TCG threads except the parent (i.e. the one that called tcg_context_init
774  * and registered the target's TCG globals) must register with this function
775  * before initiating translation.
776  *
777  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
778  * of tcg_region_init() for the reasoning behind this.
779  *
780  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
781  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
782  * is not used anymore for translation once this function is called.
783  *
784  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
785  * iterates over the array (e.g. tcg_code_size() the same for both system/user
786  * modes.
787  */
788 #ifdef CONFIG_USER_ONLY
789 void tcg_register_thread(void)
790 {
791     tcg_ctx = &tcg_init_ctx;
792 }
793 #else
794 void tcg_register_thread(void)
795 {
796     TCGContext *s = g_malloc(sizeof(*s));
797     unsigned int i, n;
798 
799     *s = tcg_init_ctx;
800 
801     /* Relink mem_base.  */
802     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
803         if (tcg_init_ctx.temps[i].mem_base) {
804             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
805             tcg_debug_assert(b >= 0 && b < n);
806             s->temps[i].mem_base = &s->temps[b];
807         }
808     }
809 
810     /* Claim an entry in tcg_ctxs */
811     n = qatomic_fetch_inc(&tcg_cur_ctxs);
812     g_assert(n < tcg_max_ctxs);
813     qatomic_set(&tcg_ctxs[n], s);
814 
815     if (n > 0) {
816         alloc_tcg_plugin_context(s);
817         tcg_region_initial_alloc(s);
818     }
819 
820     tcg_ctx = s;
821 }
822 #endif /* !CONFIG_USER_ONLY */
823 
824 /* pool based memory allocation */
825 void *tcg_malloc_internal(TCGContext *s, int size)
826 {
827     TCGPool *p;
828     int pool_size;
829 
830     if (size > TCG_POOL_CHUNK_SIZE) {
831         /* big malloc: insert a new pool (XXX: could optimize) */
832         p = g_malloc(sizeof(TCGPool) + size);
833         p->size = size;
834         p->next = s->pool_first_large;
835         s->pool_first_large = p;
836         return p->data;
837     } else {
838         p = s->pool_current;
839         if (!p) {
840             p = s->pool_first;
841             if (!p)
842                 goto new_pool;
843         } else {
844             if (!p->next) {
845             new_pool:
846                 pool_size = TCG_POOL_CHUNK_SIZE;
847                 p = g_malloc(sizeof(TCGPool) + pool_size);
848                 p->size = pool_size;
849                 p->next = NULL;
850                 if (s->pool_current) {
851                     s->pool_current->next = p;
852                 } else {
853                     s->pool_first = p;
854                 }
855             } else {
856                 p = p->next;
857             }
858         }
859     }
860     s->pool_current = p;
861     s->pool_cur = p->data + size;
862     s->pool_end = p->data + p->size;
863     return p->data;
864 }
865 
866 void tcg_pool_reset(TCGContext *s)
867 {
868     TCGPool *p, *t;
869     for (p = s->pool_first_large; p; p = t) {
870         t = p->next;
871         g_free(p);
872     }
873     s->pool_first_large = NULL;
874     s->pool_cur = s->pool_end = NULL;
875     s->pool_current = NULL;
876 }
877 
878 /*
879  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
880  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
881  * We only use these for layout in tcg_out_ld_helper_ret and
882  * tcg_out_st_helper_args, and share them between several of
883  * the helpers, with the end result that it's easier to build manually.
884  */
885 
886 #if TCG_TARGET_REG_BITS == 32
887 # define dh_typecode_ttl  dh_typecode_i32
888 #else
889 # define dh_typecode_ttl  dh_typecode_i64
890 #endif
891 
892 static TCGHelperInfo info_helper_ld32_mmu = {
893     .flags = TCG_CALL_NO_WG,
894     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
895               | dh_typemask(env, 1)
896               | dh_typemask(i64, 2)  /* uint64_t addr */
897               | dh_typemask(i32, 3)  /* unsigned oi */
898               | dh_typemask(ptr, 4)  /* uintptr_t ra */
899 };
900 
901 static TCGHelperInfo info_helper_ld64_mmu = {
902     .flags = TCG_CALL_NO_WG,
903     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
904               | dh_typemask(env, 1)
905               | dh_typemask(i64, 2)  /* uint64_t addr */
906               | dh_typemask(i32, 3)  /* unsigned oi */
907               | dh_typemask(ptr, 4)  /* uintptr_t ra */
908 };
909 
910 static TCGHelperInfo info_helper_ld128_mmu = {
911     .flags = TCG_CALL_NO_WG,
912     .typemask = dh_typemask(i128, 0) /* return Int128 */
913               | dh_typemask(env, 1)
914               | dh_typemask(i64, 2)  /* uint64_t addr */
915               | dh_typemask(i32, 3)  /* unsigned oi */
916               | dh_typemask(ptr, 4)  /* uintptr_t ra */
917 };
918 
919 static TCGHelperInfo info_helper_st32_mmu = {
920     .flags = TCG_CALL_NO_WG,
921     .typemask = dh_typemask(void, 0)
922               | dh_typemask(env, 1)
923               | dh_typemask(i64, 2)  /* uint64_t addr */
924               | dh_typemask(i32, 3)  /* uint32_t data */
925               | dh_typemask(i32, 4)  /* unsigned oi */
926               | dh_typemask(ptr, 5)  /* uintptr_t ra */
927 };
928 
929 static TCGHelperInfo info_helper_st64_mmu = {
930     .flags = TCG_CALL_NO_WG,
931     .typemask = dh_typemask(void, 0)
932               | dh_typemask(env, 1)
933               | dh_typemask(i64, 2)  /* uint64_t addr */
934               | dh_typemask(i64, 3)  /* uint64_t data */
935               | dh_typemask(i32, 4)  /* unsigned oi */
936               | dh_typemask(ptr, 5)  /* uintptr_t ra */
937 };
938 
939 static TCGHelperInfo info_helper_st128_mmu = {
940     .flags = TCG_CALL_NO_WG,
941     .typemask = dh_typemask(void, 0)
942               | dh_typemask(env, 1)
943               | dh_typemask(i64, 2)  /* uint64_t addr */
944               | dh_typemask(i128, 3) /* Int128 data */
945               | dh_typemask(i32, 4)  /* unsigned oi */
946               | dh_typemask(ptr, 5)  /* uintptr_t ra */
947 };
948 
949 #ifdef CONFIG_TCG_INTERPRETER
950 static ffi_type *typecode_to_ffi(int argmask)
951 {
952     /*
953      * libffi does not support __int128_t, so we have forced Int128
954      * to use the structure definition instead of the builtin type.
955      */
956     static ffi_type *ffi_type_i128_elements[3] = {
957         &ffi_type_uint64,
958         &ffi_type_uint64,
959         NULL
960     };
961     static ffi_type ffi_type_i128 = {
962         .size = 16,
963         .alignment = __alignof__(Int128),
964         .type = FFI_TYPE_STRUCT,
965         .elements = ffi_type_i128_elements,
966     };
967 
968     switch (argmask) {
969     case dh_typecode_void:
970         return &ffi_type_void;
971     case dh_typecode_i32:
972         return &ffi_type_uint32;
973     case dh_typecode_s32:
974         return &ffi_type_sint32;
975     case dh_typecode_i64:
976         return &ffi_type_uint64;
977     case dh_typecode_s64:
978         return &ffi_type_sint64;
979     case dh_typecode_ptr:
980         return &ffi_type_pointer;
981     case dh_typecode_i128:
982         return &ffi_type_i128;
983     }
984     g_assert_not_reached();
985 }
986 
987 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
988 {
989     unsigned typemask = info->typemask;
990     struct {
991         ffi_cif cif;
992         ffi_type *args[];
993     } *ca;
994     ffi_status status;
995     int nargs;
996 
997     /* Ignoring the return type, find the last non-zero field. */
998     nargs = 32 - clz32(typemask >> 3);
999     nargs = DIV_ROUND_UP(nargs, 3);
1000     assert(nargs <= MAX_CALL_IARGS);
1001 
1002     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1003     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1004     ca->cif.nargs = nargs;
1005 
1006     if (nargs != 0) {
1007         ca->cif.arg_types = ca->args;
1008         for (int j = 0; j < nargs; ++j) {
1009             int typecode = extract32(typemask, (j + 1) * 3, 3);
1010             ca->args[j] = typecode_to_ffi(typecode);
1011         }
1012     }
1013 
1014     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1015                           ca->cif.rtype, ca->cif.arg_types);
1016     assert(status == FFI_OK);
1017 
1018     return &ca->cif;
1019 }
1020 
1021 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1022 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1023 #else
1024 #define HELPER_INFO_INIT(I)      (&(I)->init)
1025 #define HELPER_INFO_INIT_VAL(I)  1
1026 #endif /* CONFIG_TCG_INTERPRETER */
1027 
1028 static inline bool arg_slot_reg_p(unsigned arg_slot)
1029 {
1030     /*
1031      * Split the sizeof away from the comparison to avoid Werror from
1032      * "unsigned < 0 is always false", when iarg_regs is empty.
1033      */
1034     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1035     return arg_slot < nreg;
1036 }
1037 
1038 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1039 {
1040     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1041     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1042 
1043     tcg_debug_assert(stk_slot < max);
1044     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1045 }
1046 
1047 typedef struct TCGCumulativeArgs {
1048     int arg_idx;                /* tcg_gen_callN args[] */
1049     int info_in_idx;            /* TCGHelperInfo in[] */
1050     int arg_slot;               /* regs+stack slot */
1051     int ref_slot;               /* stack slots for references */
1052 } TCGCumulativeArgs;
1053 
1054 static void layout_arg_even(TCGCumulativeArgs *cum)
1055 {
1056     cum->arg_slot += cum->arg_slot & 1;
1057 }
1058 
1059 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1060                          TCGCallArgumentKind kind)
1061 {
1062     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1063 
1064     *loc = (TCGCallArgumentLoc){
1065         .kind = kind,
1066         .arg_idx = cum->arg_idx,
1067         .arg_slot = cum->arg_slot,
1068     };
1069     cum->info_in_idx++;
1070     cum->arg_slot++;
1071 }
1072 
1073 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1074                                 TCGHelperInfo *info, int n)
1075 {
1076     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1077 
1078     for (int i = 0; i < n; ++i) {
1079         /* Layout all using the same arg_idx, adjusting the subindex. */
1080         loc[i] = (TCGCallArgumentLoc){
1081             .kind = TCG_CALL_ARG_NORMAL,
1082             .arg_idx = cum->arg_idx,
1083             .tmp_subindex = i,
1084             .arg_slot = cum->arg_slot + i,
1085         };
1086     }
1087     cum->info_in_idx += n;
1088     cum->arg_slot += n;
1089 }
1090 
1091 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1092 {
1093     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1094     int n = 128 / TCG_TARGET_REG_BITS;
1095 
1096     /* The first subindex carries the pointer. */
1097     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1098 
1099     /*
1100      * The callee is allowed to clobber memory associated with
1101      * structure pass by-reference.  Therefore we must make copies.
1102      * Allocate space from "ref_slot", which will be adjusted to
1103      * follow the parameters on the stack.
1104      */
1105     loc[0].ref_slot = cum->ref_slot;
1106 
1107     /*
1108      * Subsequent words also go into the reference slot, but
1109      * do not accumulate into the regular arguments.
1110      */
1111     for (int i = 1; i < n; ++i) {
1112         loc[i] = (TCGCallArgumentLoc){
1113             .kind = TCG_CALL_ARG_BY_REF_N,
1114             .arg_idx = cum->arg_idx,
1115             .tmp_subindex = i,
1116             .ref_slot = cum->ref_slot + i,
1117         };
1118     }
1119     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1120     cum->ref_slot += n;
1121 }
1122 
1123 static void init_call_layout(TCGHelperInfo *info)
1124 {
1125     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1126     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1127     unsigned typemask = info->typemask;
1128     unsigned typecode;
1129     TCGCumulativeArgs cum = { };
1130 
1131     /*
1132      * Parse and place any function return value.
1133      */
1134     typecode = typemask & 7;
1135     switch (typecode) {
1136     case dh_typecode_void:
1137         info->nr_out = 0;
1138         break;
1139     case dh_typecode_i32:
1140     case dh_typecode_s32:
1141     case dh_typecode_ptr:
1142         info->nr_out = 1;
1143         info->out_kind = TCG_CALL_RET_NORMAL;
1144         break;
1145     case dh_typecode_i64:
1146     case dh_typecode_s64:
1147         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1148         info->out_kind = TCG_CALL_RET_NORMAL;
1149         /* Query the last register now to trigger any assert early. */
1150         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1151         break;
1152     case dh_typecode_i128:
1153         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1154         info->out_kind = TCG_TARGET_CALL_RET_I128;
1155         switch (TCG_TARGET_CALL_RET_I128) {
1156         case TCG_CALL_RET_NORMAL:
1157             /* Query the last register now to trigger any assert early. */
1158             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1159             break;
1160         case TCG_CALL_RET_BY_VEC:
1161             /* Query the single register now to trigger any assert early. */
1162             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1163             break;
1164         case TCG_CALL_RET_BY_REF:
1165             /*
1166              * Allocate the first argument to the output.
1167              * We don't need to store this anywhere, just make it
1168              * unavailable for use in the input loop below.
1169              */
1170             cum.arg_slot = 1;
1171             break;
1172         default:
1173             qemu_build_not_reached();
1174         }
1175         break;
1176     default:
1177         g_assert_not_reached();
1178     }
1179 
1180     /*
1181      * Parse and place function arguments.
1182      */
1183     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1184         TCGCallArgumentKind kind;
1185         TCGType type;
1186 
1187         typecode = typemask & 7;
1188         switch (typecode) {
1189         case dh_typecode_i32:
1190         case dh_typecode_s32:
1191             type = TCG_TYPE_I32;
1192             break;
1193         case dh_typecode_i64:
1194         case dh_typecode_s64:
1195             type = TCG_TYPE_I64;
1196             break;
1197         case dh_typecode_ptr:
1198             type = TCG_TYPE_PTR;
1199             break;
1200         case dh_typecode_i128:
1201             type = TCG_TYPE_I128;
1202             break;
1203         default:
1204             g_assert_not_reached();
1205         }
1206 
1207         switch (type) {
1208         case TCG_TYPE_I32:
1209             switch (TCG_TARGET_CALL_ARG_I32) {
1210             case TCG_CALL_ARG_EVEN:
1211                 layout_arg_even(&cum);
1212                 /* fall through */
1213             case TCG_CALL_ARG_NORMAL:
1214                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1215                 break;
1216             case TCG_CALL_ARG_EXTEND:
1217                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1218                 layout_arg_1(&cum, info, kind);
1219                 break;
1220             default:
1221                 qemu_build_not_reached();
1222             }
1223             break;
1224 
1225         case TCG_TYPE_I64:
1226             switch (TCG_TARGET_CALL_ARG_I64) {
1227             case TCG_CALL_ARG_EVEN:
1228                 layout_arg_even(&cum);
1229                 /* fall through */
1230             case TCG_CALL_ARG_NORMAL:
1231                 if (TCG_TARGET_REG_BITS == 32) {
1232                     layout_arg_normal_n(&cum, info, 2);
1233                 } else {
1234                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1235                 }
1236                 break;
1237             default:
1238                 qemu_build_not_reached();
1239             }
1240             break;
1241 
1242         case TCG_TYPE_I128:
1243             switch (TCG_TARGET_CALL_ARG_I128) {
1244             case TCG_CALL_ARG_EVEN:
1245                 layout_arg_even(&cum);
1246                 /* fall through */
1247             case TCG_CALL_ARG_NORMAL:
1248                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1249                 break;
1250             case TCG_CALL_ARG_BY_REF:
1251                 layout_arg_by_ref(&cum, info);
1252                 break;
1253             default:
1254                 qemu_build_not_reached();
1255             }
1256             break;
1257 
1258         default:
1259             g_assert_not_reached();
1260         }
1261     }
1262     info->nr_in = cum.info_in_idx;
1263 
1264     /* Validate that we didn't overrun the input array. */
1265     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1266     /* Validate the backend has enough argument space. */
1267     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1268 
1269     /*
1270      * Relocate the "ref_slot" area to the end of the parameters.
1271      * Minimizing this stack offset helps code size for x86,
1272      * which has a signed 8-bit offset encoding.
1273      */
1274     if (cum.ref_slot != 0) {
1275         int ref_base = 0;
1276 
1277         if (cum.arg_slot > max_reg_slots) {
1278             int align = __alignof(Int128) / sizeof(tcg_target_long);
1279 
1280             ref_base = cum.arg_slot - max_reg_slots;
1281             if (align > 1) {
1282                 ref_base = ROUND_UP(ref_base, align);
1283             }
1284         }
1285         assert(ref_base + cum.ref_slot <= max_stk_slots);
1286         ref_base += max_reg_slots;
1287 
1288         if (ref_base != 0) {
1289             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1290                 TCGCallArgumentLoc *loc = &info->in[i];
1291                 switch (loc->kind) {
1292                 case TCG_CALL_ARG_BY_REF:
1293                 case TCG_CALL_ARG_BY_REF_N:
1294                     loc->ref_slot += ref_base;
1295                     break;
1296                 default:
1297                     break;
1298                 }
1299             }
1300         }
1301     }
1302 }
1303 
1304 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1305 static void process_op_defs(TCGContext *s);
1306 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1307                                             TCGReg reg, const char *name);
1308 
1309 static void tcg_context_init(unsigned max_cpus)
1310 {
1311     TCGContext *s = &tcg_init_ctx;
1312     int op, total_args, n, i;
1313     TCGOpDef *def;
1314     TCGArgConstraint *args_ct;
1315     TCGTemp *ts;
1316 
1317     memset(s, 0, sizeof(*s));
1318     s->nb_globals = 0;
1319 
1320     /* Count total number of arguments and allocate the corresponding
1321        space */
1322     total_args = 0;
1323     for(op = 0; op < NB_OPS; op++) {
1324         def = &tcg_op_defs[op];
1325         n = def->nb_iargs + def->nb_oargs;
1326         total_args += n;
1327     }
1328 
1329     args_ct = g_new0(TCGArgConstraint, total_args);
1330 
1331     for(op = 0; op < NB_OPS; op++) {
1332         def = &tcg_op_defs[op];
1333         def->args_ct = args_ct;
1334         n = def->nb_iargs + def->nb_oargs;
1335         args_ct += n;
1336     }
1337 
1338     init_call_layout(&info_helper_ld32_mmu);
1339     init_call_layout(&info_helper_ld64_mmu);
1340     init_call_layout(&info_helper_ld128_mmu);
1341     init_call_layout(&info_helper_st32_mmu);
1342     init_call_layout(&info_helper_st64_mmu);
1343     init_call_layout(&info_helper_st128_mmu);
1344 
1345     tcg_target_init(s);
1346     process_op_defs(s);
1347 
1348     /* Reverse the order of the saved registers, assuming they're all at
1349        the start of tcg_target_reg_alloc_order.  */
1350     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1351         int r = tcg_target_reg_alloc_order[n];
1352         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1353             break;
1354         }
1355     }
1356     for (i = 0; i < n; ++i) {
1357         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1358     }
1359     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1360         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1361     }
1362 
1363     alloc_tcg_plugin_context(s);
1364 
1365     tcg_ctx = s;
1366     /*
1367      * In user-mode we simply share the init context among threads, since we
1368      * use a single region. See the documentation tcg_region_init() for the
1369      * reasoning behind this.
1370      * In system-mode we will have at most max_cpus TCG threads.
1371      */
1372 #ifdef CONFIG_USER_ONLY
1373     tcg_ctxs = &tcg_ctx;
1374     tcg_cur_ctxs = 1;
1375     tcg_max_ctxs = 1;
1376 #else
1377     tcg_max_ctxs = max_cpus;
1378     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1379 #endif
1380 
1381     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1382     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1383     tcg_env = temp_tcgv_ptr(ts);
1384 }
1385 
1386 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1387 {
1388     tcg_context_init(max_cpus);
1389     tcg_region_init(tb_size, splitwx, max_cpus);
1390 }
1391 
1392 /*
1393  * Allocate TBs right before their corresponding translated code, making
1394  * sure that TBs and code are on different cache lines.
1395  */
1396 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1397 {
1398     uintptr_t align = qemu_icache_linesize;
1399     TranslationBlock *tb;
1400     void *next;
1401 
1402  retry:
1403     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1404     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1405 
1406     if (unlikely(next > s->code_gen_highwater)) {
1407         if (tcg_region_alloc(s)) {
1408             return NULL;
1409         }
1410         goto retry;
1411     }
1412     qatomic_set(&s->code_gen_ptr, next);
1413     s->data_gen_ptr = NULL;
1414     return tb;
1415 }
1416 
1417 void tcg_prologue_init(void)
1418 {
1419     TCGContext *s = tcg_ctx;
1420     size_t prologue_size;
1421 
1422     s->code_ptr = s->code_gen_ptr;
1423     s->code_buf = s->code_gen_ptr;
1424     s->data_gen_ptr = NULL;
1425 
1426 #ifndef CONFIG_TCG_INTERPRETER
1427     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1428 #endif
1429 
1430 #ifdef TCG_TARGET_NEED_POOL_LABELS
1431     s->pool_labels = NULL;
1432 #endif
1433 
1434     qemu_thread_jit_write();
1435     /* Generate the prologue.  */
1436     tcg_target_qemu_prologue(s);
1437 
1438 #ifdef TCG_TARGET_NEED_POOL_LABELS
1439     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1440     {
1441         int result = tcg_out_pool_finalize(s);
1442         tcg_debug_assert(result == 0);
1443     }
1444 #endif
1445 
1446     prologue_size = tcg_current_code_size(s);
1447     perf_report_prologue(s->code_gen_ptr, prologue_size);
1448 
1449 #ifndef CONFIG_TCG_INTERPRETER
1450     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1451                         (uintptr_t)s->code_buf, prologue_size);
1452 #endif
1453 
1454     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1455         FILE *logfile = qemu_log_trylock();
1456         if (logfile) {
1457             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1458             if (s->data_gen_ptr) {
1459                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1460                 size_t data_size = prologue_size - code_size;
1461                 size_t i;
1462 
1463                 disas(logfile, s->code_gen_ptr, code_size);
1464 
1465                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1466                     if (sizeof(tcg_target_ulong) == 8) {
1467                         fprintf(logfile,
1468                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1469                                 (uintptr_t)s->data_gen_ptr + i,
1470                                 *(uint64_t *)(s->data_gen_ptr + i));
1471                     } else {
1472                         fprintf(logfile,
1473                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1474                                 (uintptr_t)s->data_gen_ptr + i,
1475                                 *(uint32_t *)(s->data_gen_ptr + i));
1476                     }
1477                 }
1478             } else {
1479                 disas(logfile, s->code_gen_ptr, prologue_size);
1480             }
1481             fprintf(logfile, "\n");
1482             qemu_log_unlock(logfile);
1483         }
1484     }
1485 
1486 #ifndef CONFIG_TCG_INTERPRETER
1487     /*
1488      * Assert that goto_ptr is implemented completely, setting an epilogue.
1489      * For tci, we use NULL as the signal to return from the interpreter,
1490      * so skip this check.
1491      */
1492     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1493 #endif
1494 
1495     tcg_region_prologue_set(s);
1496 }
1497 
1498 void tcg_func_start(TCGContext *s)
1499 {
1500     tcg_pool_reset(s);
1501     s->nb_temps = s->nb_globals;
1502 
1503     /* No temps have been previously allocated for size or locality.  */
1504     memset(s->free_temps, 0, sizeof(s->free_temps));
1505 
1506     /* No constant temps have been previously allocated. */
1507     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1508         if (s->const_table[i]) {
1509             g_hash_table_remove_all(s->const_table[i]);
1510         }
1511     }
1512 
1513     s->nb_ops = 0;
1514     s->nb_labels = 0;
1515     s->current_frame_offset = s->frame_start;
1516 
1517 #ifdef CONFIG_DEBUG_TCG
1518     s->goto_tb_issue_mask = 0;
1519 #endif
1520 
1521     QTAILQ_INIT(&s->ops);
1522     QTAILQ_INIT(&s->free_ops);
1523     s->emit_before_op = NULL;
1524     QSIMPLEQ_INIT(&s->labels);
1525 
1526     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1527                      s->addr_type == TCG_TYPE_I64);
1528 
1529     tcg_debug_assert(s->insn_start_words > 0);
1530 }
1531 
1532 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1533 {
1534     int n = s->nb_temps++;
1535 
1536     if (n >= TCG_MAX_TEMPS) {
1537         tcg_raise_tb_overflow(s);
1538     }
1539     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1540 }
1541 
1542 static TCGTemp *tcg_global_alloc(TCGContext *s)
1543 {
1544     TCGTemp *ts;
1545 
1546     tcg_debug_assert(s->nb_globals == s->nb_temps);
1547     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1548     s->nb_globals++;
1549     ts = tcg_temp_alloc(s);
1550     ts->kind = TEMP_GLOBAL;
1551 
1552     return ts;
1553 }
1554 
1555 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1556                                             TCGReg reg, const char *name)
1557 {
1558     TCGTemp *ts;
1559 
1560     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1561 
1562     ts = tcg_global_alloc(s);
1563     ts->base_type = type;
1564     ts->type = type;
1565     ts->kind = TEMP_FIXED;
1566     ts->reg = reg;
1567     ts->name = name;
1568     tcg_regset_set_reg(s->reserved_regs, reg);
1569 
1570     return ts;
1571 }
1572 
1573 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1574 {
1575     s->frame_start = start;
1576     s->frame_end = start + size;
1577     s->frame_temp
1578         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1579 }
1580 
1581 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1582                                             const char *name, TCGType type)
1583 {
1584     TCGContext *s = tcg_ctx;
1585     TCGTemp *base_ts = tcgv_ptr_temp(base);
1586     TCGTemp *ts = tcg_global_alloc(s);
1587     int indirect_reg = 0;
1588 
1589     switch (base_ts->kind) {
1590     case TEMP_FIXED:
1591         break;
1592     case TEMP_GLOBAL:
1593         /* We do not support double-indirect registers.  */
1594         tcg_debug_assert(!base_ts->indirect_reg);
1595         base_ts->indirect_base = 1;
1596         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1597                             ? 2 : 1);
1598         indirect_reg = 1;
1599         break;
1600     default:
1601         g_assert_not_reached();
1602     }
1603 
1604     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1605         TCGTemp *ts2 = tcg_global_alloc(s);
1606         char buf[64];
1607 
1608         ts->base_type = TCG_TYPE_I64;
1609         ts->type = TCG_TYPE_I32;
1610         ts->indirect_reg = indirect_reg;
1611         ts->mem_allocated = 1;
1612         ts->mem_base = base_ts;
1613         ts->mem_offset = offset;
1614         pstrcpy(buf, sizeof(buf), name);
1615         pstrcat(buf, sizeof(buf), "_0");
1616         ts->name = strdup(buf);
1617 
1618         tcg_debug_assert(ts2 == ts + 1);
1619         ts2->base_type = TCG_TYPE_I64;
1620         ts2->type = TCG_TYPE_I32;
1621         ts2->indirect_reg = indirect_reg;
1622         ts2->mem_allocated = 1;
1623         ts2->mem_base = base_ts;
1624         ts2->mem_offset = offset + 4;
1625         ts2->temp_subindex = 1;
1626         pstrcpy(buf, sizeof(buf), name);
1627         pstrcat(buf, sizeof(buf), "_1");
1628         ts2->name = strdup(buf);
1629     } else {
1630         ts->base_type = type;
1631         ts->type = type;
1632         ts->indirect_reg = indirect_reg;
1633         ts->mem_allocated = 1;
1634         ts->mem_base = base_ts;
1635         ts->mem_offset = offset;
1636         ts->name = name;
1637     }
1638     return ts;
1639 }
1640 
1641 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1642 {
1643     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1644     return temp_tcgv_i32(ts);
1645 }
1646 
1647 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1648 {
1649     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1650     return temp_tcgv_i64(ts);
1651 }
1652 
1653 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1654 {
1655     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1656     return temp_tcgv_ptr(ts);
1657 }
1658 
1659 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1660 {
1661     TCGContext *s = tcg_ctx;
1662     TCGTemp *ts;
1663     int n;
1664 
1665     if (kind == TEMP_EBB) {
1666         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1667 
1668         if (idx < TCG_MAX_TEMPS) {
1669             /* There is already an available temp with the right type.  */
1670             clear_bit(idx, s->free_temps[type].l);
1671 
1672             ts = &s->temps[idx];
1673             ts->temp_allocated = 1;
1674             tcg_debug_assert(ts->base_type == type);
1675             tcg_debug_assert(ts->kind == kind);
1676             return ts;
1677         }
1678     } else {
1679         tcg_debug_assert(kind == TEMP_TB);
1680     }
1681 
1682     switch (type) {
1683     case TCG_TYPE_I32:
1684     case TCG_TYPE_V64:
1685     case TCG_TYPE_V128:
1686     case TCG_TYPE_V256:
1687         n = 1;
1688         break;
1689     case TCG_TYPE_I64:
1690         n = 64 / TCG_TARGET_REG_BITS;
1691         break;
1692     case TCG_TYPE_I128:
1693         n = 128 / TCG_TARGET_REG_BITS;
1694         break;
1695     default:
1696         g_assert_not_reached();
1697     }
1698 
1699     ts = tcg_temp_alloc(s);
1700     ts->base_type = type;
1701     ts->temp_allocated = 1;
1702     ts->kind = kind;
1703 
1704     if (n == 1) {
1705         ts->type = type;
1706     } else {
1707         ts->type = TCG_TYPE_REG;
1708 
1709         for (int i = 1; i < n; ++i) {
1710             TCGTemp *ts2 = tcg_temp_alloc(s);
1711 
1712             tcg_debug_assert(ts2 == ts + i);
1713             ts2->base_type = type;
1714             ts2->type = TCG_TYPE_REG;
1715             ts2->temp_allocated = 1;
1716             ts2->temp_subindex = i;
1717             ts2->kind = kind;
1718         }
1719     }
1720     return ts;
1721 }
1722 
1723 TCGv_i32 tcg_temp_new_i32(void)
1724 {
1725     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1726 }
1727 
1728 TCGv_i32 tcg_temp_ebb_new_i32(void)
1729 {
1730     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1731 }
1732 
1733 TCGv_i64 tcg_temp_new_i64(void)
1734 {
1735     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1736 }
1737 
1738 TCGv_i64 tcg_temp_ebb_new_i64(void)
1739 {
1740     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1741 }
1742 
1743 TCGv_ptr tcg_temp_new_ptr(void)
1744 {
1745     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1746 }
1747 
1748 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1749 {
1750     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1751 }
1752 
1753 TCGv_i128 tcg_temp_new_i128(void)
1754 {
1755     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1756 }
1757 
1758 TCGv_i128 tcg_temp_ebb_new_i128(void)
1759 {
1760     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1761 }
1762 
1763 TCGv_vec tcg_temp_new_vec(TCGType type)
1764 {
1765     TCGTemp *t;
1766 
1767 #ifdef CONFIG_DEBUG_TCG
1768     switch (type) {
1769     case TCG_TYPE_V64:
1770         assert(TCG_TARGET_HAS_v64);
1771         break;
1772     case TCG_TYPE_V128:
1773         assert(TCG_TARGET_HAS_v128);
1774         break;
1775     case TCG_TYPE_V256:
1776         assert(TCG_TARGET_HAS_v256);
1777         break;
1778     default:
1779         g_assert_not_reached();
1780     }
1781 #endif
1782 
1783     t = tcg_temp_new_internal(type, TEMP_EBB);
1784     return temp_tcgv_vec(t);
1785 }
1786 
1787 /* Create a new temp of the same type as an existing temp.  */
1788 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1789 {
1790     TCGTemp *t = tcgv_vec_temp(match);
1791 
1792     tcg_debug_assert(t->temp_allocated != 0);
1793 
1794     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1795     return temp_tcgv_vec(t);
1796 }
1797 
1798 void tcg_temp_free_internal(TCGTemp *ts)
1799 {
1800     TCGContext *s = tcg_ctx;
1801 
1802     switch (ts->kind) {
1803     case TEMP_CONST:
1804     case TEMP_TB:
1805         /* Silently ignore free. */
1806         break;
1807     case TEMP_EBB:
1808         tcg_debug_assert(ts->temp_allocated != 0);
1809         ts->temp_allocated = 0;
1810         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1811         break;
1812     default:
1813         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1814         g_assert_not_reached();
1815     }
1816 }
1817 
1818 void tcg_temp_free_i32(TCGv_i32 arg)
1819 {
1820     tcg_temp_free_internal(tcgv_i32_temp(arg));
1821 }
1822 
1823 void tcg_temp_free_i64(TCGv_i64 arg)
1824 {
1825     tcg_temp_free_internal(tcgv_i64_temp(arg));
1826 }
1827 
1828 void tcg_temp_free_i128(TCGv_i128 arg)
1829 {
1830     tcg_temp_free_internal(tcgv_i128_temp(arg));
1831 }
1832 
1833 void tcg_temp_free_ptr(TCGv_ptr arg)
1834 {
1835     tcg_temp_free_internal(tcgv_ptr_temp(arg));
1836 }
1837 
1838 void tcg_temp_free_vec(TCGv_vec arg)
1839 {
1840     tcg_temp_free_internal(tcgv_vec_temp(arg));
1841 }
1842 
1843 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1844 {
1845     TCGContext *s = tcg_ctx;
1846     GHashTable *h = s->const_table[type];
1847     TCGTemp *ts;
1848 
1849     if (h == NULL) {
1850         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1851         s->const_table[type] = h;
1852     }
1853 
1854     ts = g_hash_table_lookup(h, &val);
1855     if (ts == NULL) {
1856         int64_t *val_ptr;
1857 
1858         ts = tcg_temp_alloc(s);
1859 
1860         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1861             TCGTemp *ts2 = tcg_temp_alloc(s);
1862 
1863             tcg_debug_assert(ts2 == ts + 1);
1864 
1865             ts->base_type = TCG_TYPE_I64;
1866             ts->type = TCG_TYPE_I32;
1867             ts->kind = TEMP_CONST;
1868             ts->temp_allocated = 1;
1869 
1870             ts2->base_type = TCG_TYPE_I64;
1871             ts2->type = TCG_TYPE_I32;
1872             ts2->kind = TEMP_CONST;
1873             ts2->temp_allocated = 1;
1874             ts2->temp_subindex = 1;
1875 
1876             /*
1877              * Retain the full value of the 64-bit constant in the low
1878              * part, so that the hash table works.  Actual uses will
1879              * truncate the value to the low part.
1880              */
1881             ts[HOST_BIG_ENDIAN].val = val;
1882             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1883             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1884         } else {
1885             ts->base_type = type;
1886             ts->type = type;
1887             ts->kind = TEMP_CONST;
1888             ts->temp_allocated = 1;
1889             ts->val = val;
1890             val_ptr = &ts->val;
1891         }
1892         g_hash_table_insert(h, val_ptr, ts);
1893     }
1894 
1895     return ts;
1896 }
1897 
1898 TCGv_i32 tcg_constant_i32(int32_t val)
1899 {
1900     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
1901 }
1902 
1903 TCGv_i64 tcg_constant_i64(int64_t val)
1904 {
1905     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
1906 }
1907 
1908 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
1909 {
1910     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
1911 }
1912 
1913 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1914 {
1915     val = dup_const(vece, val);
1916     return temp_tcgv_vec(tcg_constant_internal(type, val));
1917 }
1918 
1919 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1920 {
1921     TCGTemp *t = tcgv_vec_temp(match);
1922 
1923     tcg_debug_assert(t->temp_allocated != 0);
1924     return tcg_constant_vec(t->base_type, vece, val);
1925 }
1926 
1927 #ifdef CONFIG_DEBUG_TCG
1928 size_t temp_idx(TCGTemp *ts)
1929 {
1930     ptrdiff_t n = ts - tcg_ctx->temps;
1931     assert(n >= 0 && n < tcg_ctx->nb_temps);
1932     return n;
1933 }
1934 
1935 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1936 {
1937     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1938 
1939     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1940     assert(o % sizeof(TCGTemp) == 0);
1941 
1942     return (void *)tcg_ctx + (uintptr_t)v;
1943 }
1944 #endif /* CONFIG_DEBUG_TCG */
1945 
1946 /* Return true if OP may appear in the opcode stream.
1947    Test the runtime variable that controls each opcode.  */
1948 bool tcg_op_supported(TCGOpcode op)
1949 {
1950     const bool have_vec
1951         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1952 
1953     switch (op) {
1954     case INDEX_op_discard:
1955     case INDEX_op_set_label:
1956     case INDEX_op_call:
1957     case INDEX_op_br:
1958     case INDEX_op_mb:
1959     case INDEX_op_insn_start:
1960     case INDEX_op_exit_tb:
1961     case INDEX_op_goto_tb:
1962     case INDEX_op_goto_ptr:
1963     case INDEX_op_qemu_ld_a32_i32:
1964     case INDEX_op_qemu_ld_a64_i32:
1965     case INDEX_op_qemu_st_a32_i32:
1966     case INDEX_op_qemu_st_a64_i32:
1967     case INDEX_op_qemu_ld_a32_i64:
1968     case INDEX_op_qemu_ld_a64_i64:
1969     case INDEX_op_qemu_st_a32_i64:
1970     case INDEX_op_qemu_st_a64_i64:
1971         return true;
1972 
1973     case INDEX_op_qemu_st8_a32_i32:
1974     case INDEX_op_qemu_st8_a64_i32:
1975         return TCG_TARGET_HAS_qemu_st8_i32;
1976 
1977     case INDEX_op_qemu_ld_a32_i128:
1978     case INDEX_op_qemu_ld_a64_i128:
1979     case INDEX_op_qemu_st_a32_i128:
1980     case INDEX_op_qemu_st_a64_i128:
1981         return TCG_TARGET_HAS_qemu_ldst_i128;
1982 
1983     case INDEX_op_mov_i32:
1984     case INDEX_op_setcond_i32:
1985     case INDEX_op_brcond_i32:
1986     case INDEX_op_movcond_i32:
1987     case INDEX_op_ld8u_i32:
1988     case INDEX_op_ld8s_i32:
1989     case INDEX_op_ld16u_i32:
1990     case INDEX_op_ld16s_i32:
1991     case INDEX_op_ld_i32:
1992     case INDEX_op_st8_i32:
1993     case INDEX_op_st16_i32:
1994     case INDEX_op_st_i32:
1995     case INDEX_op_add_i32:
1996     case INDEX_op_sub_i32:
1997     case INDEX_op_neg_i32:
1998     case INDEX_op_mul_i32:
1999     case INDEX_op_and_i32:
2000     case INDEX_op_or_i32:
2001     case INDEX_op_xor_i32:
2002     case INDEX_op_shl_i32:
2003     case INDEX_op_shr_i32:
2004     case INDEX_op_sar_i32:
2005         return true;
2006 
2007     case INDEX_op_negsetcond_i32:
2008         return TCG_TARGET_HAS_negsetcond_i32;
2009     case INDEX_op_div_i32:
2010     case INDEX_op_divu_i32:
2011         return TCG_TARGET_HAS_div_i32;
2012     case INDEX_op_rem_i32:
2013     case INDEX_op_remu_i32:
2014         return TCG_TARGET_HAS_rem_i32;
2015     case INDEX_op_div2_i32:
2016     case INDEX_op_divu2_i32:
2017         return TCG_TARGET_HAS_div2_i32;
2018     case INDEX_op_rotl_i32:
2019     case INDEX_op_rotr_i32:
2020         return TCG_TARGET_HAS_rot_i32;
2021     case INDEX_op_deposit_i32:
2022         return TCG_TARGET_HAS_deposit_i32;
2023     case INDEX_op_extract_i32:
2024         return TCG_TARGET_HAS_extract_i32;
2025     case INDEX_op_sextract_i32:
2026         return TCG_TARGET_HAS_sextract_i32;
2027     case INDEX_op_extract2_i32:
2028         return TCG_TARGET_HAS_extract2_i32;
2029     case INDEX_op_add2_i32:
2030         return TCG_TARGET_HAS_add2_i32;
2031     case INDEX_op_sub2_i32:
2032         return TCG_TARGET_HAS_sub2_i32;
2033     case INDEX_op_mulu2_i32:
2034         return TCG_TARGET_HAS_mulu2_i32;
2035     case INDEX_op_muls2_i32:
2036         return TCG_TARGET_HAS_muls2_i32;
2037     case INDEX_op_muluh_i32:
2038         return TCG_TARGET_HAS_muluh_i32;
2039     case INDEX_op_mulsh_i32:
2040         return TCG_TARGET_HAS_mulsh_i32;
2041     case INDEX_op_ext8s_i32:
2042         return TCG_TARGET_HAS_ext8s_i32;
2043     case INDEX_op_ext16s_i32:
2044         return TCG_TARGET_HAS_ext16s_i32;
2045     case INDEX_op_ext8u_i32:
2046         return TCG_TARGET_HAS_ext8u_i32;
2047     case INDEX_op_ext16u_i32:
2048         return TCG_TARGET_HAS_ext16u_i32;
2049     case INDEX_op_bswap16_i32:
2050         return TCG_TARGET_HAS_bswap16_i32;
2051     case INDEX_op_bswap32_i32:
2052         return TCG_TARGET_HAS_bswap32_i32;
2053     case INDEX_op_not_i32:
2054         return TCG_TARGET_HAS_not_i32;
2055     case INDEX_op_andc_i32:
2056         return TCG_TARGET_HAS_andc_i32;
2057     case INDEX_op_orc_i32:
2058         return TCG_TARGET_HAS_orc_i32;
2059     case INDEX_op_eqv_i32:
2060         return TCG_TARGET_HAS_eqv_i32;
2061     case INDEX_op_nand_i32:
2062         return TCG_TARGET_HAS_nand_i32;
2063     case INDEX_op_nor_i32:
2064         return TCG_TARGET_HAS_nor_i32;
2065     case INDEX_op_clz_i32:
2066         return TCG_TARGET_HAS_clz_i32;
2067     case INDEX_op_ctz_i32:
2068         return TCG_TARGET_HAS_ctz_i32;
2069     case INDEX_op_ctpop_i32:
2070         return TCG_TARGET_HAS_ctpop_i32;
2071 
2072     case INDEX_op_brcond2_i32:
2073     case INDEX_op_setcond2_i32:
2074         return TCG_TARGET_REG_BITS == 32;
2075 
2076     case INDEX_op_mov_i64:
2077     case INDEX_op_setcond_i64:
2078     case INDEX_op_brcond_i64:
2079     case INDEX_op_movcond_i64:
2080     case INDEX_op_ld8u_i64:
2081     case INDEX_op_ld8s_i64:
2082     case INDEX_op_ld16u_i64:
2083     case INDEX_op_ld16s_i64:
2084     case INDEX_op_ld32u_i64:
2085     case INDEX_op_ld32s_i64:
2086     case INDEX_op_ld_i64:
2087     case INDEX_op_st8_i64:
2088     case INDEX_op_st16_i64:
2089     case INDEX_op_st32_i64:
2090     case INDEX_op_st_i64:
2091     case INDEX_op_add_i64:
2092     case INDEX_op_sub_i64:
2093     case INDEX_op_neg_i64:
2094     case INDEX_op_mul_i64:
2095     case INDEX_op_and_i64:
2096     case INDEX_op_or_i64:
2097     case INDEX_op_xor_i64:
2098     case INDEX_op_shl_i64:
2099     case INDEX_op_shr_i64:
2100     case INDEX_op_sar_i64:
2101     case INDEX_op_ext_i32_i64:
2102     case INDEX_op_extu_i32_i64:
2103         return TCG_TARGET_REG_BITS == 64;
2104 
2105     case INDEX_op_negsetcond_i64:
2106         return TCG_TARGET_HAS_negsetcond_i64;
2107     case INDEX_op_div_i64:
2108     case INDEX_op_divu_i64:
2109         return TCG_TARGET_HAS_div_i64;
2110     case INDEX_op_rem_i64:
2111     case INDEX_op_remu_i64:
2112         return TCG_TARGET_HAS_rem_i64;
2113     case INDEX_op_div2_i64:
2114     case INDEX_op_divu2_i64:
2115         return TCG_TARGET_HAS_div2_i64;
2116     case INDEX_op_rotl_i64:
2117     case INDEX_op_rotr_i64:
2118         return TCG_TARGET_HAS_rot_i64;
2119     case INDEX_op_deposit_i64:
2120         return TCG_TARGET_HAS_deposit_i64;
2121     case INDEX_op_extract_i64:
2122         return TCG_TARGET_HAS_extract_i64;
2123     case INDEX_op_sextract_i64:
2124         return TCG_TARGET_HAS_sextract_i64;
2125     case INDEX_op_extract2_i64:
2126         return TCG_TARGET_HAS_extract2_i64;
2127     case INDEX_op_extrl_i64_i32:
2128     case INDEX_op_extrh_i64_i32:
2129         return TCG_TARGET_HAS_extr_i64_i32;
2130     case INDEX_op_ext8s_i64:
2131         return TCG_TARGET_HAS_ext8s_i64;
2132     case INDEX_op_ext16s_i64:
2133         return TCG_TARGET_HAS_ext16s_i64;
2134     case INDEX_op_ext32s_i64:
2135         return TCG_TARGET_HAS_ext32s_i64;
2136     case INDEX_op_ext8u_i64:
2137         return TCG_TARGET_HAS_ext8u_i64;
2138     case INDEX_op_ext16u_i64:
2139         return TCG_TARGET_HAS_ext16u_i64;
2140     case INDEX_op_ext32u_i64:
2141         return TCG_TARGET_HAS_ext32u_i64;
2142     case INDEX_op_bswap16_i64:
2143         return TCG_TARGET_HAS_bswap16_i64;
2144     case INDEX_op_bswap32_i64:
2145         return TCG_TARGET_HAS_bswap32_i64;
2146     case INDEX_op_bswap64_i64:
2147         return TCG_TARGET_HAS_bswap64_i64;
2148     case INDEX_op_not_i64:
2149         return TCG_TARGET_HAS_not_i64;
2150     case INDEX_op_andc_i64:
2151         return TCG_TARGET_HAS_andc_i64;
2152     case INDEX_op_orc_i64:
2153         return TCG_TARGET_HAS_orc_i64;
2154     case INDEX_op_eqv_i64:
2155         return TCG_TARGET_HAS_eqv_i64;
2156     case INDEX_op_nand_i64:
2157         return TCG_TARGET_HAS_nand_i64;
2158     case INDEX_op_nor_i64:
2159         return TCG_TARGET_HAS_nor_i64;
2160     case INDEX_op_clz_i64:
2161         return TCG_TARGET_HAS_clz_i64;
2162     case INDEX_op_ctz_i64:
2163         return TCG_TARGET_HAS_ctz_i64;
2164     case INDEX_op_ctpop_i64:
2165         return TCG_TARGET_HAS_ctpop_i64;
2166     case INDEX_op_add2_i64:
2167         return TCG_TARGET_HAS_add2_i64;
2168     case INDEX_op_sub2_i64:
2169         return TCG_TARGET_HAS_sub2_i64;
2170     case INDEX_op_mulu2_i64:
2171         return TCG_TARGET_HAS_mulu2_i64;
2172     case INDEX_op_muls2_i64:
2173         return TCG_TARGET_HAS_muls2_i64;
2174     case INDEX_op_muluh_i64:
2175         return TCG_TARGET_HAS_muluh_i64;
2176     case INDEX_op_mulsh_i64:
2177         return TCG_TARGET_HAS_mulsh_i64;
2178 
2179     case INDEX_op_mov_vec:
2180     case INDEX_op_dup_vec:
2181     case INDEX_op_dupm_vec:
2182     case INDEX_op_ld_vec:
2183     case INDEX_op_st_vec:
2184     case INDEX_op_add_vec:
2185     case INDEX_op_sub_vec:
2186     case INDEX_op_and_vec:
2187     case INDEX_op_or_vec:
2188     case INDEX_op_xor_vec:
2189     case INDEX_op_cmp_vec:
2190         return have_vec;
2191     case INDEX_op_dup2_vec:
2192         return have_vec && TCG_TARGET_REG_BITS == 32;
2193     case INDEX_op_not_vec:
2194         return have_vec && TCG_TARGET_HAS_not_vec;
2195     case INDEX_op_neg_vec:
2196         return have_vec && TCG_TARGET_HAS_neg_vec;
2197     case INDEX_op_abs_vec:
2198         return have_vec && TCG_TARGET_HAS_abs_vec;
2199     case INDEX_op_andc_vec:
2200         return have_vec && TCG_TARGET_HAS_andc_vec;
2201     case INDEX_op_orc_vec:
2202         return have_vec && TCG_TARGET_HAS_orc_vec;
2203     case INDEX_op_nand_vec:
2204         return have_vec && TCG_TARGET_HAS_nand_vec;
2205     case INDEX_op_nor_vec:
2206         return have_vec && TCG_TARGET_HAS_nor_vec;
2207     case INDEX_op_eqv_vec:
2208         return have_vec && TCG_TARGET_HAS_eqv_vec;
2209     case INDEX_op_mul_vec:
2210         return have_vec && TCG_TARGET_HAS_mul_vec;
2211     case INDEX_op_shli_vec:
2212     case INDEX_op_shri_vec:
2213     case INDEX_op_sari_vec:
2214         return have_vec && TCG_TARGET_HAS_shi_vec;
2215     case INDEX_op_shls_vec:
2216     case INDEX_op_shrs_vec:
2217     case INDEX_op_sars_vec:
2218         return have_vec && TCG_TARGET_HAS_shs_vec;
2219     case INDEX_op_shlv_vec:
2220     case INDEX_op_shrv_vec:
2221     case INDEX_op_sarv_vec:
2222         return have_vec && TCG_TARGET_HAS_shv_vec;
2223     case INDEX_op_rotli_vec:
2224         return have_vec && TCG_TARGET_HAS_roti_vec;
2225     case INDEX_op_rotls_vec:
2226         return have_vec && TCG_TARGET_HAS_rots_vec;
2227     case INDEX_op_rotlv_vec:
2228     case INDEX_op_rotrv_vec:
2229         return have_vec && TCG_TARGET_HAS_rotv_vec;
2230     case INDEX_op_ssadd_vec:
2231     case INDEX_op_usadd_vec:
2232     case INDEX_op_sssub_vec:
2233     case INDEX_op_ussub_vec:
2234         return have_vec && TCG_TARGET_HAS_sat_vec;
2235     case INDEX_op_smin_vec:
2236     case INDEX_op_umin_vec:
2237     case INDEX_op_smax_vec:
2238     case INDEX_op_umax_vec:
2239         return have_vec && TCG_TARGET_HAS_minmax_vec;
2240     case INDEX_op_bitsel_vec:
2241         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2242     case INDEX_op_cmpsel_vec:
2243         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2244 
2245     default:
2246         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2247         return true;
2248     }
2249 }
2250 
2251 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2252 
2253 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2254                           TCGTemp *ret, TCGTemp **args)
2255 {
2256     TCGv_i64 extend_free[MAX_CALL_IARGS];
2257     int n_extend = 0;
2258     TCGOp *op;
2259     int i, n, pi = 0, total_args;
2260 
2261     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2262         init_call_layout(info);
2263         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2264     }
2265 
2266     total_args = info->nr_out + info->nr_in + 2;
2267     op = tcg_op_alloc(INDEX_op_call, total_args);
2268 
2269 #ifdef CONFIG_PLUGIN
2270     /* Flag helpers that may affect guest state */
2271     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2272         tcg_ctx->plugin_insn->calls_helpers = true;
2273     }
2274 #endif
2275 
2276     TCGOP_CALLO(op) = n = info->nr_out;
2277     switch (n) {
2278     case 0:
2279         tcg_debug_assert(ret == NULL);
2280         break;
2281     case 1:
2282         tcg_debug_assert(ret != NULL);
2283         op->args[pi++] = temp_arg(ret);
2284         break;
2285     case 2:
2286     case 4:
2287         tcg_debug_assert(ret != NULL);
2288         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2289         tcg_debug_assert(ret->temp_subindex == 0);
2290         for (i = 0; i < n; ++i) {
2291             op->args[pi++] = temp_arg(ret + i);
2292         }
2293         break;
2294     default:
2295         g_assert_not_reached();
2296     }
2297 
2298     TCGOP_CALLI(op) = n = info->nr_in;
2299     for (i = 0; i < n; i++) {
2300         const TCGCallArgumentLoc *loc = &info->in[i];
2301         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2302 
2303         switch (loc->kind) {
2304         case TCG_CALL_ARG_NORMAL:
2305         case TCG_CALL_ARG_BY_REF:
2306         case TCG_CALL_ARG_BY_REF_N:
2307             op->args[pi++] = temp_arg(ts);
2308             break;
2309 
2310         case TCG_CALL_ARG_EXTEND_U:
2311         case TCG_CALL_ARG_EXTEND_S:
2312             {
2313                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2314                 TCGv_i32 orig = temp_tcgv_i32(ts);
2315 
2316                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2317                     tcg_gen_ext_i32_i64(temp, orig);
2318                 } else {
2319                     tcg_gen_extu_i32_i64(temp, orig);
2320                 }
2321                 op->args[pi++] = tcgv_i64_arg(temp);
2322                 extend_free[n_extend++] = temp;
2323             }
2324             break;
2325 
2326         default:
2327             g_assert_not_reached();
2328         }
2329     }
2330     op->args[pi++] = (uintptr_t)func;
2331     op->args[pi++] = (uintptr_t)info;
2332     tcg_debug_assert(pi == total_args);
2333 
2334     if (tcg_ctx->emit_before_op) {
2335         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2336     } else {
2337         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2338     }
2339 
2340     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2341     for (i = 0; i < n_extend; ++i) {
2342         tcg_temp_free_i64(extend_free[i]);
2343     }
2344 }
2345 
2346 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2347 {
2348     tcg_gen_callN(func, info, ret, NULL);
2349 }
2350 
2351 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2352 {
2353     tcg_gen_callN(func, info, ret, &t1);
2354 }
2355 
2356 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2357                    TCGTemp *t1, TCGTemp *t2)
2358 {
2359     TCGTemp *args[2] = { t1, t2 };
2360     tcg_gen_callN(func, info, ret, args);
2361 }
2362 
2363 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2364                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2365 {
2366     TCGTemp *args[3] = { t1, t2, t3 };
2367     tcg_gen_callN(func, info, ret, args);
2368 }
2369 
2370 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2371                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2372 {
2373     TCGTemp *args[4] = { t1, t2, t3, t4 };
2374     tcg_gen_callN(func, info, ret, args);
2375 }
2376 
2377 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2378                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2379 {
2380     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2381     tcg_gen_callN(func, info, ret, args);
2382 }
2383 
2384 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2385                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2386                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2387 {
2388     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2389     tcg_gen_callN(func, info, ret, args);
2390 }
2391 
2392 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2393                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2394                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2395 {
2396     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2397     tcg_gen_callN(func, info, ret, args);
2398 }
2399 
2400 static void tcg_reg_alloc_start(TCGContext *s)
2401 {
2402     int i, n;
2403 
2404     for (i = 0, n = s->nb_temps; i < n; i++) {
2405         TCGTemp *ts = &s->temps[i];
2406         TCGTempVal val = TEMP_VAL_MEM;
2407 
2408         switch (ts->kind) {
2409         case TEMP_CONST:
2410             val = TEMP_VAL_CONST;
2411             break;
2412         case TEMP_FIXED:
2413             val = TEMP_VAL_REG;
2414             break;
2415         case TEMP_GLOBAL:
2416             break;
2417         case TEMP_EBB:
2418             val = TEMP_VAL_DEAD;
2419             /* fall through */
2420         case TEMP_TB:
2421             ts->mem_allocated = 0;
2422             break;
2423         default:
2424             g_assert_not_reached();
2425         }
2426         ts->val_type = val;
2427     }
2428 
2429     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2430 }
2431 
2432 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2433                                  TCGTemp *ts)
2434 {
2435     int idx = temp_idx(ts);
2436 
2437     switch (ts->kind) {
2438     case TEMP_FIXED:
2439     case TEMP_GLOBAL:
2440         pstrcpy(buf, buf_size, ts->name);
2441         break;
2442     case TEMP_TB:
2443         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2444         break;
2445     case TEMP_EBB:
2446         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2447         break;
2448     case TEMP_CONST:
2449         switch (ts->type) {
2450         case TCG_TYPE_I32:
2451             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2452             break;
2453 #if TCG_TARGET_REG_BITS > 32
2454         case TCG_TYPE_I64:
2455             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2456             break;
2457 #endif
2458         case TCG_TYPE_V64:
2459         case TCG_TYPE_V128:
2460         case TCG_TYPE_V256:
2461             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2462                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2463             break;
2464         default:
2465             g_assert_not_reached();
2466         }
2467         break;
2468     }
2469     return buf;
2470 }
2471 
2472 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2473                              int buf_size, TCGArg arg)
2474 {
2475     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2476 }
2477 
2478 static const char * const cond_name[] =
2479 {
2480     [TCG_COND_NEVER] = "never",
2481     [TCG_COND_ALWAYS] = "always",
2482     [TCG_COND_EQ] = "eq",
2483     [TCG_COND_NE] = "ne",
2484     [TCG_COND_LT] = "lt",
2485     [TCG_COND_GE] = "ge",
2486     [TCG_COND_LE] = "le",
2487     [TCG_COND_GT] = "gt",
2488     [TCG_COND_LTU] = "ltu",
2489     [TCG_COND_GEU] = "geu",
2490     [TCG_COND_LEU] = "leu",
2491     [TCG_COND_GTU] = "gtu",
2492     [TCG_COND_TSTEQ] = "tsteq",
2493     [TCG_COND_TSTNE] = "tstne",
2494 };
2495 
2496 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2497 {
2498     [MO_UB]   = "ub",
2499     [MO_SB]   = "sb",
2500     [MO_LEUW] = "leuw",
2501     [MO_LESW] = "lesw",
2502     [MO_LEUL] = "leul",
2503     [MO_LESL] = "lesl",
2504     [MO_LEUQ] = "leq",
2505     [MO_BEUW] = "beuw",
2506     [MO_BESW] = "besw",
2507     [MO_BEUL] = "beul",
2508     [MO_BESL] = "besl",
2509     [MO_BEUQ] = "beq",
2510     [MO_128 + MO_BE] = "beo",
2511     [MO_128 + MO_LE] = "leo",
2512 };
2513 
2514 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2515     [MO_UNALN >> MO_ASHIFT]    = "un+",
2516     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2517     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2518     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2519     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2520     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2521     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2522     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2523 };
2524 
2525 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2526     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2527     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2528     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2529     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2530     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2531     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2532 };
2533 
2534 static const char bswap_flag_name[][6] = {
2535     [TCG_BSWAP_IZ] = "iz",
2536     [TCG_BSWAP_OZ] = "oz",
2537     [TCG_BSWAP_OS] = "os",
2538     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2539     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2540 };
2541 
2542 #ifdef CONFIG_PLUGIN
2543 static const char * const plugin_from_name[] = {
2544     "from-tb",
2545     "from-insn",
2546     "after-insn",
2547     "after-tb",
2548 };
2549 #endif
2550 
2551 static inline bool tcg_regset_single(TCGRegSet d)
2552 {
2553     return (d & (d - 1)) == 0;
2554 }
2555 
2556 static inline TCGReg tcg_regset_first(TCGRegSet d)
2557 {
2558     if (TCG_TARGET_NB_REGS <= 32) {
2559         return ctz32(d);
2560     } else {
2561         return ctz64(d);
2562     }
2563 }
2564 
2565 /* Return only the number of characters output -- no error return. */
2566 #define ne_fprintf(...) \
2567     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2568 
2569 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2570 {
2571     char buf[128];
2572     TCGOp *op;
2573 
2574     QTAILQ_FOREACH(op, &s->ops, link) {
2575         int i, k, nb_oargs, nb_iargs, nb_cargs;
2576         const TCGOpDef *def;
2577         TCGOpcode c;
2578         int col = 0;
2579 
2580         c = op->opc;
2581         def = &tcg_op_defs[c];
2582 
2583         if (c == INDEX_op_insn_start) {
2584             nb_oargs = 0;
2585             col += ne_fprintf(f, "\n ----");
2586 
2587             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2588                 col += ne_fprintf(f, " %016" PRIx64,
2589                                   tcg_get_insn_start_param(op, i));
2590             }
2591         } else if (c == INDEX_op_call) {
2592             const TCGHelperInfo *info = tcg_call_info(op);
2593             void *func = tcg_call_func(op);
2594 
2595             /* variable number of arguments */
2596             nb_oargs = TCGOP_CALLO(op);
2597             nb_iargs = TCGOP_CALLI(op);
2598             nb_cargs = def->nb_cargs;
2599 
2600             col += ne_fprintf(f, " %s ", def->name);
2601 
2602             /*
2603              * Print the function name from TCGHelperInfo, if available.
2604              * Note that plugins have a template function for the info,
2605              * but the actual function pointer comes from the plugin.
2606              */
2607             if (func == info->func) {
2608                 col += ne_fprintf(f, "%s", info->name);
2609             } else {
2610                 col += ne_fprintf(f, "plugin(%p)", func);
2611             }
2612 
2613             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2614             for (i = 0; i < nb_oargs; i++) {
2615                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2616                                                             op->args[i]));
2617             }
2618             for (i = 0; i < nb_iargs; i++) {
2619                 TCGArg arg = op->args[nb_oargs + i];
2620                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2621                 col += ne_fprintf(f, ",%s", t);
2622             }
2623         } else {
2624             col += ne_fprintf(f, " %s ", def->name);
2625 
2626             nb_oargs = def->nb_oargs;
2627             nb_iargs = def->nb_iargs;
2628             nb_cargs = def->nb_cargs;
2629 
2630             if (def->flags & TCG_OPF_VECTOR) {
2631                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2632                                   8 << TCGOP_VECE(op));
2633             }
2634 
2635             k = 0;
2636             for (i = 0; i < nb_oargs; i++) {
2637                 const char *sep =  k ? "," : "";
2638                 col += ne_fprintf(f, "%s%s", sep,
2639                                   tcg_get_arg_str(s, buf, sizeof(buf),
2640                                                   op->args[k++]));
2641             }
2642             for (i = 0; i < nb_iargs; i++) {
2643                 const char *sep =  k ? "," : "";
2644                 col += ne_fprintf(f, "%s%s", sep,
2645                                   tcg_get_arg_str(s, buf, sizeof(buf),
2646                                                   op->args[k++]));
2647             }
2648             switch (c) {
2649             case INDEX_op_brcond_i32:
2650             case INDEX_op_setcond_i32:
2651             case INDEX_op_negsetcond_i32:
2652             case INDEX_op_movcond_i32:
2653             case INDEX_op_brcond2_i32:
2654             case INDEX_op_setcond2_i32:
2655             case INDEX_op_brcond_i64:
2656             case INDEX_op_setcond_i64:
2657             case INDEX_op_negsetcond_i64:
2658             case INDEX_op_movcond_i64:
2659             case INDEX_op_cmp_vec:
2660             case INDEX_op_cmpsel_vec:
2661                 if (op->args[k] < ARRAY_SIZE(cond_name)
2662                     && cond_name[op->args[k]]) {
2663                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2664                 } else {
2665                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2666                 }
2667                 i = 1;
2668                 break;
2669             case INDEX_op_qemu_ld_a32_i32:
2670             case INDEX_op_qemu_ld_a64_i32:
2671             case INDEX_op_qemu_st_a32_i32:
2672             case INDEX_op_qemu_st_a64_i32:
2673             case INDEX_op_qemu_st8_a32_i32:
2674             case INDEX_op_qemu_st8_a64_i32:
2675             case INDEX_op_qemu_ld_a32_i64:
2676             case INDEX_op_qemu_ld_a64_i64:
2677             case INDEX_op_qemu_st_a32_i64:
2678             case INDEX_op_qemu_st_a64_i64:
2679             case INDEX_op_qemu_ld_a32_i128:
2680             case INDEX_op_qemu_ld_a64_i128:
2681             case INDEX_op_qemu_st_a32_i128:
2682             case INDEX_op_qemu_st_a64_i128:
2683                 {
2684                     const char *s_al, *s_op, *s_at;
2685                     MemOpIdx oi = op->args[k++];
2686                     MemOp mop = get_memop(oi);
2687                     unsigned ix = get_mmuidx(oi);
2688 
2689                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2690                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2691                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2692                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2693 
2694                     /* If all fields are accounted for, print symbolically. */
2695                     if (!mop && s_al && s_op && s_at) {
2696                         col += ne_fprintf(f, ",%s%s%s,%u",
2697                                           s_at, s_al, s_op, ix);
2698                     } else {
2699                         mop = get_memop(oi);
2700                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2701                     }
2702                     i = 1;
2703                 }
2704                 break;
2705             case INDEX_op_bswap16_i32:
2706             case INDEX_op_bswap16_i64:
2707             case INDEX_op_bswap32_i32:
2708             case INDEX_op_bswap32_i64:
2709             case INDEX_op_bswap64_i64:
2710                 {
2711                     TCGArg flags = op->args[k];
2712                     const char *name = NULL;
2713 
2714                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2715                         name = bswap_flag_name[flags];
2716                     }
2717                     if (name) {
2718                         col += ne_fprintf(f, ",%s", name);
2719                     } else {
2720                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2721                     }
2722                     i = k = 1;
2723                 }
2724                 break;
2725 #ifdef CONFIG_PLUGIN
2726             case INDEX_op_plugin_cb:
2727                 {
2728                     TCGArg from = op->args[k++];
2729                     const char *name = NULL;
2730 
2731                     if (from < ARRAY_SIZE(plugin_from_name)) {
2732                         name = plugin_from_name[from];
2733                     }
2734                     if (name) {
2735                         col += ne_fprintf(f, "%s", name);
2736                     } else {
2737                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2738                     }
2739                     i = 1;
2740                 }
2741                 break;
2742 #endif
2743             default:
2744                 i = 0;
2745                 break;
2746             }
2747             switch (c) {
2748             case INDEX_op_set_label:
2749             case INDEX_op_br:
2750             case INDEX_op_brcond_i32:
2751             case INDEX_op_brcond_i64:
2752             case INDEX_op_brcond2_i32:
2753                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2754                                   arg_label(op->args[k])->id);
2755                 i++, k++;
2756                 break;
2757             case INDEX_op_mb:
2758                 {
2759                     TCGBar membar = op->args[k];
2760                     const char *b_op, *m_op;
2761 
2762                     switch (membar & TCG_BAR_SC) {
2763                     case 0:
2764                         b_op = "none";
2765                         break;
2766                     case TCG_BAR_LDAQ:
2767                         b_op = "acq";
2768                         break;
2769                     case TCG_BAR_STRL:
2770                         b_op = "rel";
2771                         break;
2772                     case TCG_BAR_SC:
2773                         b_op = "seq";
2774                         break;
2775                     default:
2776                         g_assert_not_reached();
2777                     }
2778 
2779                     switch (membar & TCG_MO_ALL) {
2780                     case 0:
2781                         m_op = "none";
2782                         break;
2783                     case TCG_MO_LD_LD:
2784                         m_op = "rr";
2785                         break;
2786                     case TCG_MO_LD_ST:
2787                         m_op = "rw";
2788                         break;
2789                     case TCG_MO_ST_LD:
2790                         m_op = "wr";
2791                         break;
2792                     case TCG_MO_ST_ST:
2793                         m_op = "ww";
2794                         break;
2795                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2796                         m_op = "rr+rw";
2797                         break;
2798                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2799                         m_op = "rr+wr";
2800                         break;
2801                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2802                         m_op = "rr+ww";
2803                         break;
2804                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2805                         m_op = "rw+wr";
2806                         break;
2807                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2808                         m_op = "rw+ww";
2809                         break;
2810                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2811                         m_op = "wr+ww";
2812                         break;
2813                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2814                         m_op = "rr+rw+wr";
2815                         break;
2816                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2817                         m_op = "rr+rw+ww";
2818                         break;
2819                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2820                         m_op = "rr+wr+ww";
2821                         break;
2822                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2823                         m_op = "rw+wr+ww";
2824                         break;
2825                     case TCG_MO_ALL:
2826                         m_op = "all";
2827                         break;
2828                     default:
2829                         g_assert_not_reached();
2830                     }
2831 
2832                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2833                     i++, k++;
2834                 }
2835                 break;
2836             default:
2837                 break;
2838             }
2839             for (; i < nb_cargs; i++, k++) {
2840                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2841                                   op->args[k]);
2842             }
2843         }
2844 
2845         if (have_prefs || op->life) {
2846             for (; col < 40; ++col) {
2847                 putc(' ', f);
2848             }
2849         }
2850 
2851         if (op->life) {
2852             unsigned life = op->life;
2853 
2854             if (life & (SYNC_ARG * 3)) {
2855                 ne_fprintf(f, "  sync:");
2856                 for (i = 0; i < 2; ++i) {
2857                     if (life & (SYNC_ARG << i)) {
2858                         ne_fprintf(f, " %d", i);
2859                     }
2860                 }
2861             }
2862             life /= DEAD_ARG;
2863             if (life) {
2864                 ne_fprintf(f, "  dead:");
2865                 for (i = 0; life; ++i, life >>= 1) {
2866                     if (life & 1) {
2867                         ne_fprintf(f, " %d", i);
2868                     }
2869                 }
2870             }
2871         }
2872 
2873         if (have_prefs) {
2874             for (i = 0; i < nb_oargs; ++i) {
2875                 TCGRegSet set = output_pref(op, i);
2876 
2877                 if (i == 0) {
2878                     ne_fprintf(f, "  pref=");
2879                 } else {
2880                     ne_fprintf(f, ",");
2881                 }
2882                 if (set == 0) {
2883                     ne_fprintf(f, "none");
2884                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2885                     ne_fprintf(f, "all");
2886 #ifdef CONFIG_DEBUG_TCG
2887                 } else if (tcg_regset_single(set)) {
2888                     TCGReg reg = tcg_regset_first(set);
2889                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2890 #endif
2891                 } else if (TCG_TARGET_NB_REGS <= 32) {
2892                     ne_fprintf(f, "0x%x", (uint32_t)set);
2893                 } else {
2894                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2895                 }
2896             }
2897         }
2898 
2899         putc('\n', f);
2900     }
2901 }
2902 
2903 /* we give more priority to constraints with less registers */
2904 static int get_constraint_priority(const TCGOpDef *def, int k)
2905 {
2906     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2907     int n = ctpop64(arg_ct->regs);
2908 
2909     /*
2910      * Sort constraints of a single register first, which includes output
2911      * aliases (which must exactly match the input already allocated).
2912      */
2913     if (n == 1 || arg_ct->oalias) {
2914         return INT_MAX;
2915     }
2916 
2917     /*
2918      * Sort register pairs next, first then second immediately after.
2919      * Arbitrarily sort multiple pairs by the index of the first reg;
2920      * there shouldn't be many pairs.
2921      */
2922     switch (arg_ct->pair) {
2923     case 1:
2924     case 3:
2925         return (k + 1) * 2;
2926     case 2:
2927         return (arg_ct->pair_index + 1) * 2 - 1;
2928     }
2929 
2930     /* Finally, sort by decreasing register count. */
2931     assert(n > 1);
2932     return -n;
2933 }
2934 
2935 /* sort from highest priority to lowest */
2936 static void sort_constraints(TCGOpDef *def, int start, int n)
2937 {
2938     int i, j;
2939     TCGArgConstraint *a = def->args_ct;
2940 
2941     for (i = 0; i < n; i++) {
2942         a[start + i].sort_index = start + i;
2943     }
2944     if (n <= 1) {
2945         return;
2946     }
2947     for (i = 0; i < n - 1; i++) {
2948         for (j = i + 1; j < n; j++) {
2949             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2950             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2951             if (p1 < p2) {
2952                 int tmp = a[start + i].sort_index;
2953                 a[start + i].sort_index = a[start + j].sort_index;
2954                 a[start + j].sort_index = tmp;
2955             }
2956         }
2957     }
2958 }
2959 
2960 static void process_op_defs(TCGContext *s)
2961 {
2962     TCGOpcode op;
2963 
2964     for (op = 0; op < NB_OPS; op++) {
2965         TCGOpDef *def = &tcg_op_defs[op];
2966         const TCGTargetOpDef *tdefs;
2967         bool saw_alias_pair = false;
2968         int i, o, i2, o2, nb_args;
2969 
2970         if (def->flags & TCG_OPF_NOT_PRESENT) {
2971             continue;
2972         }
2973 
2974         nb_args = def->nb_iargs + def->nb_oargs;
2975         if (nb_args == 0) {
2976             continue;
2977         }
2978 
2979         /*
2980          * Macro magic should make it impossible, but double-check that
2981          * the array index is in range.  Since the signness of an enum
2982          * is implementation defined, force the result to unsigned.
2983          */
2984         unsigned con_set = tcg_target_op_def(op);
2985         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2986         tdefs = &constraint_sets[con_set];
2987 
2988         for (i = 0; i < nb_args; i++) {
2989             const char *ct_str = tdefs->args_ct_str[i];
2990             bool input_p = i >= def->nb_oargs;
2991 
2992             /* Incomplete TCGTargetOpDef entry. */
2993             tcg_debug_assert(ct_str != NULL);
2994 
2995             switch (*ct_str) {
2996             case '0' ... '9':
2997                 o = *ct_str - '0';
2998                 tcg_debug_assert(input_p);
2999                 tcg_debug_assert(o < def->nb_oargs);
3000                 tcg_debug_assert(def->args_ct[o].regs != 0);
3001                 tcg_debug_assert(!def->args_ct[o].oalias);
3002                 def->args_ct[i] = def->args_ct[o];
3003                 /* The output sets oalias.  */
3004                 def->args_ct[o].oalias = 1;
3005                 def->args_ct[o].alias_index = i;
3006                 /* The input sets ialias. */
3007                 def->args_ct[i].ialias = 1;
3008                 def->args_ct[i].alias_index = o;
3009                 if (def->args_ct[i].pair) {
3010                     saw_alias_pair = true;
3011                 }
3012                 tcg_debug_assert(ct_str[1] == '\0');
3013                 continue;
3014 
3015             case '&':
3016                 tcg_debug_assert(!input_p);
3017                 def->args_ct[i].newreg = true;
3018                 ct_str++;
3019                 break;
3020 
3021             case 'p': /* plus */
3022                 /* Allocate to the register after the previous. */
3023                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3024                 o = i - 1;
3025                 tcg_debug_assert(!def->args_ct[o].pair);
3026                 tcg_debug_assert(!def->args_ct[o].ct);
3027                 def->args_ct[i] = (TCGArgConstraint){
3028                     .pair = 2,
3029                     .pair_index = o,
3030                     .regs = def->args_ct[o].regs << 1,
3031                     .newreg = def->args_ct[o].newreg,
3032                 };
3033                 def->args_ct[o].pair = 1;
3034                 def->args_ct[o].pair_index = i;
3035                 tcg_debug_assert(ct_str[1] == '\0');
3036                 continue;
3037 
3038             case 'm': /* minus */
3039                 /* Allocate to the register before the previous. */
3040                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3041                 o = i - 1;
3042                 tcg_debug_assert(!def->args_ct[o].pair);
3043                 tcg_debug_assert(!def->args_ct[o].ct);
3044                 def->args_ct[i] = (TCGArgConstraint){
3045                     .pair = 1,
3046                     .pair_index = o,
3047                     .regs = def->args_ct[o].regs >> 1,
3048                     .newreg = def->args_ct[o].newreg,
3049                 };
3050                 def->args_ct[o].pair = 2;
3051                 def->args_ct[o].pair_index = i;
3052                 tcg_debug_assert(ct_str[1] == '\0');
3053                 continue;
3054             }
3055 
3056             do {
3057                 switch (*ct_str) {
3058                 case 'i':
3059                     def->args_ct[i].ct |= TCG_CT_CONST;
3060                     break;
3061 
3062                 /* Include all of the target-specific constraints. */
3063 
3064 #undef CONST
3065 #define CONST(CASE, MASK) \
3066     case CASE: def->args_ct[i].ct |= MASK; break;
3067 #define REGS(CASE, MASK) \
3068     case CASE: def->args_ct[i].regs |= MASK; break;
3069 
3070 #include "tcg-target-con-str.h"
3071 
3072 #undef REGS
3073 #undef CONST
3074                 default:
3075                 case '0' ... '9':
3076                 case '&':
3077                 case 'p':
3078                 case 'm':
3079                     /* Typo in TCGTargetOpDef constraint. */
3080                     g_assert_not_reached();
3081                 }
3082             } while (*++ct_str != '\0');
3083         }
3084 
3085         /* TCGTargetOpDef entry with too much information? */
3086         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
3087 
3088         /*
3089          * Fix up output pairs that are aliased with inputs.
3090          * When we created the alias, we copied pair from the output.
3091          * There are three cases:
3092          *    (1a) Pairs of inputs alias pairs of outputs.
3093          *    (1b) One input aliases the first of a pair of outputs.
3094          *    (2)  One input aliases the second of a pair of outputs.
3095          *
3096          * Case 1a is handled by making sure that the pair_index'es are
3097          * properly updated so that they appear the same as a pair of inputs.
3098          *
3099          * Case 1b is handled by setting the pair_index of the input to
3100          * itself, simply so it doesn't point to an unrelated argument.
3101          * Since we don't encounter the "second" during the input allocation
3102          * phase, nothing happens with the second half of the input pair.
3103          *
3104          * Case 2 is handled by setting the second input to pair=3, the
3105          * first output to pair=3, and the pair_index'es to match.
3106          */
3107         if (saw_alias_pair) {
3108             for (i = def->nb_oargs; i < nb_args; i++) {
3109                 /*
3110                  * Since [0-9pm] must be alone in the constraint string,
3111                  * the only way they can both be set is if the pair comes
3112                  * from the output alias.
3113                  */
3114                 if (!def->args_ct[i].ialias) {
3115                     continue;
3116                 }
3117                 switch (def->args_ct[i].pair) {
3118                 case 0:
3119                     break;
3120                 case 1:
3121                     o = def->args_ct[i].alias_index;
3122                     o2 = def->args_ct[o].pair_index;
3123                     tcg_debug_assert(def->args_ct[o].pair == 1);
3124                     tcg_debug_assert(def->args_ct[o2].pair == 2);
3125                     if (def->args_ct[o2].oalias) {
3126                         /* Case 1a */
3127                         i2 = def->args_ct[o2].alias_index;
3128                         tcg_debug_assert(def->args_ct[i2].pair == 2);
3129                         def->args_ct[i2].pair_index = i;
3130                         def->args_ct[i].pair_index = i2;
3131                     } else {
3132                         /* Case 1b */
3133                         def->args_ct[i].pair_index = i;
3134                     }
3135                     break;
3136                 case 2:
3137                     o = def->args_ct[i].alias_index;
3138                     o2 = def->args_ct[o].pair_index;
3139                     tcg_debug_assert(def->args_ct[o].pair == 2);
3140                     tcg_debug_assert(def->args_ct[o2].pair == 1);
3141                     if (def->args_ct[o2].oalias) {
3142                         /* Case 1a */
3143                         i2 = def->args_ct[o2].alias_index;
3144                         tcg_debug_assert(def->args_ct[i2].pair == 1);
3145                         def->args_ct[i2].pair_index = i;
3146                         def->args_ct[i].pair_index = i2;
3147                     } else {
3148                         /* Case 2 */
3149                         def->args_ct[i].pair = 3;
3150                         def->args_ct[o2].pair = 3;
3151                         def->args_ct[i].pair_index = o2;
3152                         def->args_ct[o2].pair_index = i;
3153                     }
3154                     break;
3155                 default:
3156                     g_assert_not_reached();
3157                 }
3158             }
3159         }
3160 
3161         /* sort the constraints (XXX: this is just an heuristic) */
3162         sort_constraints(def, 0, def->nb_oargs);
3163         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3164     }
3165 }
3166 
3167 static void remove_label_use(TCGOp *op, int idx)
3168 {
3169     TCGLabel *label = arg_label(op->args[idx]);
3170     TCGLabelUse *use;
3171 
3172     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3173         if (use->op == op) {
3174             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3175             return;
3176         }
3177     }
3178     g_assert_not_reached();
3179 }
3180 
3181 void tcg_op_remove(TCGContext *s, TCGOp *op)
3182 {
3183     switch (op->opc) {
3184     case INDEX_op_br:
3185         remove_label_use(op, 0);
3186         break;
3187     case INDEX_op_brcond_i32:
3188     case INDEX_op_brcond_i64:
3189         remove_label_use(op, 3);
3190         break;
3191     case INDEX_op_brcond2_i32:
3192         remove_label_use(op, 5);
3193         break;
3194     default:
3195         break;
3196     }
3197 
3198     QTAILQ_REMOVE(&s->ops, op, link);
3199     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3200     s->nb_ops--;
3201 }
3202 
3203 void tcg_remove_ops_after(TCGOp *op)
3204 {
3205     TCGContext *s = tcg_ctx;
3206 
3207     while (true) {
3208         TCGOp *last = tcg_last_op();
3209         if (last == op) {
3210             return;
3211         }
3212         tcg_op_remove(s, last);
3213     }
3214 }
3215 
3216 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3217 {
3218     TCGContext *s = tcg_ctx;
3219     TCGOp *op = NULL;
3220 
3221     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3222         QTAILQ_FOREACH(op, &s->free_ops, link) {
3223             if (nargs <= op->nargs) {
3224                 QTAILQ_REMOVE(&s->free_ops, op, link);
3225                 nargs = op->nargs;
3226                 goto found;
3227             }
3228         }
3229     }
3230 
3231     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3232     nargs = MAX(4, nargs);
3233     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3234 
3235  found:
3236     memset(op, 0, offsetof(TCGOp, link));
3237     op->opc = opc;
3238     op->nargs = nargs;
3239 
3240     /* Check for bitfield overflow. */
3241     tcg_debug_assert(op->nargs == nargs);
3242 
3243     s->nb_ops++;
3244     return op;
3245 }
3246 
3247 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3248 {
3249     TCGOp *op = tcg_op_alloc(opc, nargs);
3250 
3251     if (tcg_ctx->emit_before_op) {
3252         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3253     } else {
3254         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3255     }
3256     return op;
3257 }
3258 
3259 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3260                             TCGOpcode opc, unsigned nargs)
3261 {
3262     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3263     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3264     return new_op;
3265 }
3266 
3267 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3268                            TCGOpcode opc, unsigned nargs)
3269 {
3270     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3271     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3272     return new_op;
3273 }
3274 
3275 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3276 {
3277     TCGLabelUse *u;
3278 
3279     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3280         TCGOp *op = u->op;
3281         switch (op->opc) {
3282         case INDEX_op_br:
3283             op->args[0] = label_arg(to);
3284             break;
3285         case INDEX_op_brcond_i32:
3286         case INDEX_op_brcond_i64:
3287             op->args[3] = label_arg(to);
3288             break;
3289         case INDEX_op_brcond2_i32:
3290             op->args[5] = label_arg(to);
3291             break;
3292         default:
3293             g_assert_not_reached();
3294         }
3295     }
3296 
3297     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3298 }
3299 
3300 /* Reachable analysis : remove unreachable code.  */
3301 static void __attribute__((noinline))
3302 reachable_code_pass(TCGContext *s)
3303 {
3304     TCGOp *op, *op_next, *op_prev;
3305     bool dead = false;
3306 
3307     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3308         bool remove = dead;
3309         TCGLabel *label;
3310 
3311         switch (op->opc) {
3312         case INDEX_op_set_label:
3313             label = arg_label(op->args[0]);
3314 
3315             /*
3316              * Note that the first op in the TB is always a load,
3317              * so there is always something before a label.
3318              */
3319             op_prev = QTAILQ_PREV(op, link);
3320 
3321             /*
3322              * If we find two sequential labels, move all branches to
3323              * reference the second label and remove the first label.
3324              * Do this before branch to next optimization, so that the
3325              * middle label is out of the way.
3326              */
3327             if (op_prev->opc == INDEX_op_set_label) {
3328                 move_label_uses(label, arg_label(op_prev->args[0]));
3329                 tcg_op_remove(s, op_prev);
3330                 op_prev = QTAILQ_PREV(op, link);
3331             }
3332 
3333             /*
3334              * Optimization can fold conditional branches to unconditional.
3335              * If we find a label which is preceded by an unconditional
3336              * branch to next, remove the branch.  We couldn't do this when
3337              * processing the branch because any dead code between the branch
3338              * and label had not yet been removed.
3339              */
3340             if (op_prev->opc == INDEX_op_br &&
3341                 label == arg_label(op_prev->args[0])) {
3342                 tcg_op_remove(s, op_prev);
3343                 /* Fall through means insns become live again.  */
3344                 dead = false;
3345             }
3346 
3347             if (QSIMPLEQ_EMPTY(&label->branches)) {
3348                 /*
3349                  * While there is an occasional backward branch, virtually
3350                  * all branches generated by the translators are forward.
3351                  * Which means that generally we will have already removed
3352                  * all references to the label that will be, and there is
3353                  * little to be gained by iterating.
3354                  */
3355                 remove = true;
3356             } else {
3357                 /* Once we see a label, insns become live again.  */
3358                 dead = false;
3359                 remove = false;
3360             }
3361             break;
3362 
3363         case INDEX_op_br:
3364         case INDEX_op_exit_tb:
3365         case INDEX_op_goto_ptr:
3366             /* Unconditional branches; everything following is dead.  */
3367             dead = true;
3368             break;
3369 
3370         case INDEX_op_call:
3371             /* Notice noreturn helper calls, raising exceptions.  */
3372             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3373                 dead = true;
3374             }
3375             break;
3376 
3377         case INDEX_op_insn_start:
3378             /* Never remove -- we need to keep these for unwind.  */
3379             remove = false;
3380             break;
3381 
3382         default:
3383             break;
3384         }
3385 
3386         if (remove) {
3387             tcg_op_remove(s, op);
3388         }
3389     }
3390 }
3391 
3392 #define TS_DEAD  1
3393 #define TS_MEM   2
3394 
3395 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3396 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3397 
3398 /* For liveness_pass_1, the register preferences for a given temp.  */
3399 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3400 {
3401     return ts->state_ptr;
3402 }
3403 
3404 /* For liveness_pass_1, reset the preferences for a given temp to the
3405  * maximal regset for its type.
3406  */
3407 static inline void la_reset_pref(TCGTemp *ts)
3408 {
3409     *la_temp_pref(ts)
3410         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3411 }
3412 
3413 /* liveness analysis: end of function: all temps are dead, and globals
3414    should be in memory. */
3415 static void la_func_end(TCGContext *s, int ng, int nt)
3416 {
3417     int i;
3418 
3419     for (i = 0; i < ng; ++i) {
3420         s->temps[i].state = TS_DEAD | TS_MEM;
3421         la_reset_pref(&s->temps[i]);
3422     }
3423     for (i = ng; i < nt; ++i) {
3424         s->temps[i].state = TS_DEAD;
3425         la_reset_pref(&s->temps[i]);
3426     }
3427 }
3428 
3429 /* liveness analysis: end of basic block: all temps are dead, globals
3430    and local temps should be in memory. */
3431 static void la_bb_end(TCGContext *s, int ng, int nt)
3432 {
3433     int i;
3434 
3435     for (i = 0; i < nt; ++i) {
3436         TCGTemp *ts = &s->temps[i];
3437         int state;
3438 
3439         switch (ts->kind) {
3440         case TEMP_FIXED:
3441         case TEMP_GLOBAL:
3442         case TEMP_TB:
3443             state = TS_DEAD | TS_MEM;
3444             break;
3445         case TEMP_EBB:
3446         case TEMP_CONST:
3447             state = TS_DEAD;
3448             break;
3449         default:
3450             g_assert_not_reached();
3451         }
3452         ts->state = state;
3453         la_reset_pref(ts);
3454     }
3455 }
3456 
3457 /* liveness analysis: sync globals back to memory.  */
3458 static void la_global_sync(TCGContext *s, int ng)
3459 {
3460     int i;
3461 
3462     for (i = 0; i < ng; ++i) {
3463         int state = s->temps[i].state;
3464         s->temps[i].state = state | TS_MEM;
3465         if (state == TS_DEAD) {
3466             /* If the global was previously dead, reset prefs.  */
3467             la_reset_pref(&s->temps[i]);
3468         }
3469     }
3470 }
3471 
3472 /*
3473  * liveness analysis: conditional branch: all temps are dead unless
3474  * explicitly live-across-conditional-branch, globals and local temps
3475  * should be synced.
3476  */
3477 static void la_bb_sync(TCGContext *s, int ng, int nt)
3478 {
3479     la_global_sync(s, ng);
3480 
3481     for (int i = ng; i < nt; ++i) {
3482         TCGTemp *ts = &s->temps[i];
3483         int state;
3484 
3485         switch (ts->kind) {
3486         case TEMP_TB:
3487             state = ts->state;
3488             ts->state = state | TS_MEM;
3489             if (state != TS_DEAD) {
3490                 continue;
3491             }
3492             break;
3493         case TEMP_EBB:
3494         case TEMP_CONST:
3495             continue;
3496         default:
3497             g_assert_not_reached();
3498         }
3499         la_reset_pref(&s->temps[i]);
3500     }
3501 }
3502 
3503 /* liveness analysis: sync globals back to memory and kill.  */
3504 static void la_global_kill(TCGContext *s, int ng)
3505 {
3506     int i;
3507 
3508     for (i = 0; i < ng; i++) {
3509         s->temps[i].state = TS_DEAD | TS_MEM;
3510         la_reset_pref(&s->temps[i]);
3511     }
3512 }
3513 
3514 /* liveness analysis: note live globals crossing calls.  */
3515 static void la_cross_call(TCGContext *s, int nt)
3516 {
3517     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3518     int i;
3519 
3520     for (i = 0; i < nt; i++) {
3521         TCGTemp *ts = &s->temps[i];
3522         if (!(ts->state & TS_DEAD)) {
3523             TCGRegSet *pset = la_temp_pref(ts);
3524             TCGRegSet set = *pset;
3525 
3526             set &= mask;
3527             /* If the combination is not possible, restart.  */
3528             if (set == 0) {
3529                 set = tcg_target_available_regs[ts->type] & mask;
3530             }
3531             *pset = set;
3532         }
3533     }
3534 }
3535 
3536 /*
3537  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3538  * to TEMP_EBB, if possible.
3539  */
3540 static void __attribute__((noinline))
3541 liveness_pass_0(TCGContext *s)
3542 {
3543     void * const multiple_ebb = (void *)(uintptr_t)-1;
3544     int nb_temps = s->nb_temps;
3545     TCGOp *op, *ebb;
3546 
3547     for (int i = s->nb_globals; i < nb_temps; ++i) {
3548         s->temps[i].state_ptr = NULL;
3549     }
3550 
3551     /*
3552      * Represent each EBB by the op at which it begins.  In the case of
3553      * the first EBB, this is the first op, otherwise it is a label.
3554      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3555      * within a single EBB, else MULTIPLE_EBB.
3556      */
3557     ebb = QTAILQ_FIRST(&s->ops);
3558     QTAILQ_FOREACH(op, &s->ops, link) {
3559         const TCGOpDef *def;
3560         int nb_oargs, nb_iargs;
3561 
3562         switch (op->opc) {
3563         case INDEX_op_set_label:
3564             ebb = op;
3565             continue;
3566         case INDEX_op_discard:
3567             continue;
3568         case INDEX_op_call:
3569             nb_oargs = TCGOP_CALLO(op);
3570             nb_iargs = TCGOP_CALLI(op);
3571             break;
3572         default:
3573             def = &tcg_op_defs[op->opc];
3574             nb_oargs = def->nb_oargs;
3575             nb_iargs = def->nb_iargs;
3576             break;
3577         }
3578 
3579         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3580             TCGTemp *ts = arg_temp(op->args[i]);
3581 
3582             if (ts->kind != TEMP_TB) {
3583                 continue;
3584             }
3585             if (ts->state_ptr == NULL) {
3586                 ts->state_ptr = ebb;
3587             } else if (ts->state_ptr != ebb) {
3588                 ts->state_ptr = multiple_ebb;
3589             }
3590         }
3591     }
3592 
3593     /*
3594      * For TEMP_TB that turned out not to be used beyond one EBB,
3595      * reduce the liveness to TEMP_EBB.
3596      */
3597     for (int i = s->nb_globals; i < nb_temps; ++i) {
3598         TCGTemp *ts = &s->temps[i];
3599         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3600             ts->kind = TEMP_EBB;
3601         }
3602     }
3603 }
3604 
3605 /* Liveness analysis : update the opc_arg_life array to tell if a
3606    given input arguments is dead. Instructions updating dead
3607    temporaries are removed. */
3608 static void __attribute__((noinline))
3609 liveness_pass_1(TCGContext *s)
3610 {
3611     int nb_globals = s->nb_globals;
3612     int nb_temps = s->nb_temps;
3613     TCGOp *op, *op_prev;
3614     TCGRegSet *prefs;
3615     int i;
3616 
3617     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3618     for (i = 0; i < nb_temps; ++i) {
3619         s->temps[i].state_ptr = prefs + i;
3620     }
3621 
3622     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3623     la_func_end(s, nb_globals, nb_temps);
3624 
3625     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3626         int nb_iargs, nb_oargs;
3627         TCGOpcode opc_new, opc_new2;
3628         bool have_opc_new2;
3629         TCGLifeData arg_life = 0;
3630         TCGTemp *ts;
3631         TCGOpcode opc = op->opc;
3632         const TCGOpDef *def = &tcg_op_defs[opc];
3633 
3634         switch (opc) {
3635         case INDEX_op_call:
3636             {
3637                 const TCGHelperInfo *info = tcg_call_info(op);
3638                 int call_flags = tcg_call_flags(op);
3639 
3640                 nb_oargs = TCGOP_CALLO(op);
3641                 nb_iargs = TCGOP_CALLI(op);
3642 
3643                 /* pure functions can be removed if their result is unused */
3644                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3645                     for (i = 0; i < nb_oargs; i++) {
3646                         ts = arg_temp(op->args[i]);
3647                         if (ts->state != TS_DEAD) {
3648                             goto do_not_remove_call;
3649                         }
3650                     }
3651                     goto do_remove;
3652                 }
3653             do_not_remove_call:
3654 
3655                 /* Output args are dead.  */
3656                 for (i = 0; i < nb_oargs; i++) {
3657                     ts = arg_temp(op->args[i]);
3658                     if (ts->state & TS_DEAD) {
3659                         arg_life |= DEAD_ARG << i;
3660                     }
3661                     if (ts->state & TS_MEM) {
3662                         arg_life |= SYNC_ARG << i;
3663                     }
3664                     ts->state = TS_DEAD;
3665                     la_reset_pref(ts);
3666                 }
3667 
3668                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3669                 memset(op->output_pref, 0, sizeof(op->output_pref));
3670 
3671                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3672                                     TCG_CALL_NO_READ_GLOBALS))) {
3673                     la_global_kill(s, nb_globals);
3674                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3675                     la_global_sync(s, nb_globals);
3676                 }
3677 
3678                 /* Record arguments that die in this helper.  */
3679                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3680                     ts = arg_temp(op->args[i]);
3681                     if (ts->state & TS_DEAD) {
3682                         arg_life |= DEAD_ARG << i;
3683                     }
3684                 }
3685 
3686                 /* For all live registers, remove call-clobbered prefs.  */
3687                 la_cross_call(s, nb_temps);
3688 
3689                 /*
3690                  * Input arguments are live for preceding opcodes.
3691                  *
3692                  * For those arguments that die, and will be allocated in
3693                  * registers, clear the register set for that arg, to be
3694                  * filled in below.  For args that will be on the stack,
3695                  * reset to any available reg.  Process arguments in reverse
3696                  * order so that if a temp is used more than once, the stack
3697                  * reset to max happens before the register reset to 0.
3698                  */
3699                 for (i = nb_iargs - 1; i >= 0; i--) {
3700                     const TCGCallArgumentLoc *loc = &info->in[i];
3701                     ts = arg_temp(op->args[nb_oargs + i]);
3702 
3703                     if (ts->state & TS_DEAD) {
3704                         switch (loc->kind) {
3705                         case TCG_CALL_ARG_NORMAL:
3706                         case TCG_CALL_ARG_EXTEND_U:
3707                         case TCG_CALL_ARG_EXTEND_S:
3708                             if (arg_slot_reg_p(loc->arg_slot)) {
3709                                 *la_temp_pref(ts) = 0;
3710                                 break;
3711                             }
3712                             /* fall through */
3713                         default:
3714                             *la_temp_pref(ts) =
3715                                 tcg_target_available_regs[ts->type];
3716                             break;
3717                         }
3718                         ts->state &= ~TS_DEAD;
3719                     }
3720                 }
3721 
3722                 /*
3723                  * For each input argument, add its input register to prefs.
3724                  * If a temp is used once, this produces a single set bit;
3725                  * if a temp is used multiple times, this produces a set.
3726                  */
3727                 for (i = 0; i < nb_iargs; i++) {
3728                     const TCGCallArgumentLoc *loc = &info->in[i];
3729                     ts = arg_temp(op->args[nb_oargs + i]);
3730 
3731                     switch (loc->kind) {
3732                     case TCG_CALL_ARG_NORMAL:
3733                     case TCG_CALL_ARG_EXTEND_U:
3734                     case TCG_CALL_ARG_EXTEND_S:
3735                         if (arg_slot_reg_p(loc->arg_slot)) {
3736                             tcg_regset_set_reg(*la_temp_pref(ts),
3737                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3738                         }
3739                         break;
3740                     default:
3741                         break;
3742                     }
3743                 }
3744             }
3745             break;
3746         case INDEX_op_insn_start:
3747             break;
3748         case INDEX_op_discard:
3749             /* mark the temporary as dead */
3750             ts = arg_temp(op->args[0]);
3751             ts->state = TS_DEAD;
3752             la_reset_pref(ts);
3753             break;
3754 
3755         case INDEX_op_add2_i32:
3756             opc_new = INDEX_op_add_i32;
3757             goto do_addsub2;
3758         case INDEX_op_sub2_i32:
3759             opc_new = INDEX_op_sub_i32;
3760             goto do_addsub2;
3761         case INDEX_op_add2_i64:
3762             opc_new = INDEX_op_add_i64;
3763             goto do_addsub2;
3764         case INDEX_op_sub2_i64:
3765             opc_new = INDEX_op_sub_i64;
3766         do_addsub2:
3767             nb_iargs = 4;
3768             nb_oargs = 2;
3769             /* Test if the high part of the operation is dead, but not
3770                the low part.  The result can be optimized to a simple
3771                add or sub.  This happens often for x86_64 guest when the
3772                cpu mode is set to 32 bit.  */
3773             if (arg_temp(op->args[1])->state == TS_DEAD) {
3774                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3775                     goto do_remove;
3776                 }
3777                 /* Replace the opcode and adjust the args in place,
3778                    leaving 3 unused args at the end.  */
3779                 op->opc = opc = opc_new;
3780                 op->args[1] = op->args[2];
3781                 op->args[2] = op->args[4];
3782                 /* Fall through and mark the single-word operation live.  */
3783                 nb_iargs = 2;
3784                 nb_oargs = 1;
3785             }
3786             goto do_not_remove;
3787 
3788         case INDEX_op_mulu2_i32:
3789             opc_new = INDEX_op_mul_i32;
3790             opc_new2 = INDEX_op_muluh_i32;
3791             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3792             goto do_mul2;
3793         case INDEX_op_muls2_i32:
3794             opc_new = INDEX_op_mul_i32;
3795             opc_new2 = INDEX_op_mulsh_i32;
3796             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3797             goto do_mul2;
3798         case INDEX_op_mulu2_i64:
3799             opc_new = INDEX_op_mul_i64;
3800             opc_new2 = INDEX_op_muluh_i64;
3801             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3802             goto do_mul2;
3803         case INDEX_op_muls2_i64:
3804             opc_new = INDEX_op_mul_i64;
3805             opc_new2 = INDEX_op_mulsh_i64;
3806             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3807             goto do_mul2;
3808         do_mul2:
3809             nb_iargs = 2;
3810             nb_oargs = 2;
3811             if (arg_temp(op->args[1])->state == TS_DEAD) {
3812                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3813                     /* Both parts of the operation are dead.  */
3814                     goto do_remove;
3815                 }
3816                 /* The high part of the operation is dead; generate the low. */
3817                 op->opc = opc = opc_new;
3818                 op->args[1] = op->args[2];
3819                 op->args[2] = op->args[3];
3820             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3821                 /* The low part of the operation is dead; generate the high. */
3822                 op->opc = opc = opc_new2;
3823                 op->args[0] = op->args[1];
3824                 op->args[1] = op->args[2];
3825                 op->args[2] = op->args[3];
3826             } else {
3827                 goto do_not_remove;
3828             }
3829             /* Mark the single-word operation live.  */
3830             nb_oargs = 1;
3831             goto do_not_remove;
3832 
3833         default:
3834             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3835             nb_iargs = def->nb_iargs;
3836             nb_oargs = def->nb_oargs;
3837 
3838             /* Test if the operation can be removed because all
3839                its outputs are dead. We assume that nb_oargs == 0
3840                implies side effects */
3841             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3842                 for (i = 0; i < nb_oargs; i++) {
3843                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3844                         goto do_not_remove;
3845                     }
3846                 }
3847                 goto do_remove;
3848             }
3849             goto do_not_remove;
3850 
3851         do_remove:
3852             tcg_op_remove(s, op);
3853             break;
3854 
3855         do_not_remove:
3856             for (i = 0; i < nb_oargs; i++) {
3857                 ts = arg_temp(op->args[i]);
3858 
3859                 /* Remember the preference of the uses that followed.  */
3860                 if (i < ARRAY_SIZE(op->output_pref)) {
3861                     op->output_pref[i] = *la_temp_pref(ts);
3862                 }
3863 
3864                 /* Output args are dead.  */
3865                 if (ts->state & TS_DEAD) {
3866                     arg_life |= DEAD_ARG << i;
3867                 }
3868                 if (ts->state & TS_MEM) {
3869                     arg_life |= SYNC_ARG << i;
3870                 }
3871                 ts->state = TS_DEAD;
3872                 la_reset_pref(ts);
3873             }
3874 
3875             /* If end of basic block, update.  */
3876             if (def->flags & TCG_OPF_BB_EXIT) {
3877                 la_func_end(s, nb_globals, nb_temps);
3878             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3879                 la_bb_sync(s, nb_globals, nb_temps);
3880             } else if (def->flags & TCG_OPF_BB_END) {
3881                 la_bb_end(s, nb_globals, nb_temps);
3882             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3883                 la_global_sync(s, nb_globals);
3884                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3885                     la_cross_call(s, nb_temps);
3886                 }
3887             }
3888 
3889             /* Record arguments that die in this opcode.  */
3890             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3891                 ts = arg_temp(op->args[i]);
3892                 if (ts->state & TS_DEAD) {
3893                     arg_life |= DEAD_ARG << i;
3894                 }
3895             }
3896 
3897             /* Input arguments are live for preceding opcodes.  */
3898             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3899                 ts = arg_temp(op->args[i]);
3900                 if (ts->state & TS_DEAD) {
3901                     /* For operands that were dead, initially allow
3902                        all regs for the type.  */
3903                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3904                     ts->state &= ~TS_DEAD;
3905                 }
3906             }
3907 
3908             /* Incorporate constraints for this operand.  */
3909             switch (opc) {
3910             case INDEX_op_mov_i32:
3911             case INDEX_op_mov_i64:
3912                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3913                    have proper constraints.  That said, special case
3914                    moves to propagate preferences backward.  */
3915                 if (IS_DEAD_ARG(1)) {
3916                     *la_temp_pref(arg_temp(op->args[0]))
3917                         = *la_temp_pref(arg_temp(op->args[1]));
3918                 }
3919                 break;
3920 
3921             default:
3922                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3923                     const TCGArgConstraint *ct = &def->args_ct[i];
3924                     TCGRegSet set, *pset;
3925 
3926                     ts = arg_temp(op->args[i]);
3927                     pset = la_temp_pref(ts);
3928                     set = *pset;
3929 
3930                     set &= ct->regs;
3931                     if (ct->ialias) {
3932                         set &= output_pref(op, ct->alias_index);
3933                     }
3934                     /* If the combination is not possible, restart.  */
3935                     if (set == 0) {
3936                         set = ct->regs;
3937                     }
3938                     *pset = set;
3939                 }
3940                 break;
3941             }
3942             break;
3943         }
3944         op->life = arg_life;
3945     }
3946 }
3947 
3948 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3949 static bool __attribute__((noinline))
3950 liveness_pass_2(TCGContext *s)
3951 {
3952     int nb_globals = s->nb_globals;
3953     int nb_temps, i;
3954     bool changes = false;
3955     TCGOp *op, *op_next;
3956 
3957     /* Create a temporary for each indirect global.  */
3958     for (i = 0; i < nb_globals; ++i) {
3959         TCGTemp *its = &s->temps[i];
3960         if (its->indirect_reg) {
3961             TCGTemp *dts = tcg_temp_alloc(s);
3962             dts->type = its->type;
3963             dts->base_type = its->base_type;
3964             dts->temp_subindex = its->temp_subindex;
3965             dts->kind = TEMP_EBB;
3966             its->state_ptr = dts;
3967         } else {
3968             its->state_ptr = NULL;
3969         }
3970         /* All globals begin dead.  */
3971         its->state = TS_DEAD;
3972     }
3973     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3974         TCGTemp *its = &s->temps[i];
3975         its->state_ptr = NULL;
3976         its->state = TS_DEAD;
3977     }
3978 
3979     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3980         TCGOpcode opc = op->opc;
3981         const TCGOpDef *def = &tcg_op_defs[opc];
3982         TCGLifeData arg_life = op->life;
3983         int nb_iargs, nb_oargs, call_flags;
3984         TCGTemp *arg_ts, *dir_ts;
3985 
3986         if (opc == INDEX_op_call) {
3987             nb_oargs = TCGOP_CALLO(op);
3988             nb_iargs = TCGOP_CALLI(op);
3989             call_flags = tcg_call_flags(op);
3990         } else {
3991             nb_iargs = def->nb_iargs;
3992             nb_oargs = def->nb_oargs;
3993 
3994             /* Set flags similar to how calls require.  */
3995             if (def->flags & TCG_OPF_COND_BRANCH) {
3996                 /* Like reading globals: sync_globals */
3997                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3998             } else if (def->flags & TCG_OPF_BB_END) {
3999                 /* Like writing globals: save_globals */
4000                 call_flags = 0;
4001             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4002                 /* Like reading globals: sync_globals */
4003                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4004             } else {
4005                 /* No effect on globals.  */
4006                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4007                               TCG_CALL_NO_WRITE_GLOBALS);
4008             }
4009         }
4010 
4011         /* Make sure that input arguments are available.  */
4012         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4013             arg_ts = arg_temp(op->args[i]);
4014             dir_ts = arg_ts->state_ptr;
4015             if (dir_ts && arg_ts->state == TS_DEAD) {
4016                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4017                                   ? INDEX_op_ld_i32
4018                                   : INDEX_op_ld_i64);
4019                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
4020 
4021                 lop->args[0] = temp_arg(dir_ts);
4022                 lop->args[1] = temp_arg(arg_ts->mem_base);
4023                 lop->args[2] = arg_ts->mem_offset;
4024 
4025                 /* Loaded, but synced with memory.  */
4026                 arg_ts->state = TS_MEM;
4027             }
4028         }
4029 
4030         /* Perform input replacement, and mark inputs that became dead.
4031            No action is required except keeping temp_state up to date
4032            so that we reload when needed.  */
4033         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4034             arg_ts = arg_temp(op->args[i]);
4035             dir_ts = arg_ts->state_ptr;
4036             if (dir_ts) {
4037                 op->args[i] = temp_arg(dir_ts);
4038                 changes = true;
4039                 if (IS_DEAD_ARG(i)) {
4040                     arg_ts->state = TS_DEAD;
4041                 }
4042             }
4043         }
4044 
4045         /* Liveness analysis should ensure that the following are
4046            all correct, for call sites and basic block end points.  */
4047         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4048             /* Nothing to do */
4049         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4050             for (i = 0; i < nb_globals; ++i) {
4051                 /* Liveness should see that globals are synced back,
4052                    that is, either TS_DEAD or TS_MEM.  */
4053                 arg_ts = &s->temps[i];
4054                 tcg_debug_assert(arg_ts->state_ptr == 0
4055                                  || arg_ts->state != 0);
4056             }
4057         } else {
4058             for (i = 0; i < nb_globals; ++i) {
4059                 /* Liveness should see that globals are saved back,
4060                    that is, TS_DEAD, waiting to be reloaded.  */
4061                 arg_ts = &s->temps[i];
4062                 tcg_debug_assert(arg_ts->state_ptr == 0
4063                                  || arg_ts->state == TS_DEAD);
4064             }
4065         }
4066 
4067         /* Outputs become available.  */
4068         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4069             arg_ts = arg_temp(op->args[0]);
4070             dir_ts = arg_ts->state_ptr;
4071             if (dir_ts) {
4072                 op->args[0] = temp_arg(dir_ts);
4073                 changes = true;
4074 
4075                 /* The output is now live and modified.  */
4076                 arg_ts->state = 0;
4077 
4078                 if (NEED_SYNC_ARG(0)) {
4079                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4080                                       ? INDEX_op_st_i32
4081                                       : INDEX_op_st_i64);
4082                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4083                     TCGTemp *out_ts = dir_ts;
4084 
4085                     if (IS_DEAD_ARG(0)) {
4086                         out_ts = arg_temp(op->args[1]);
4087                         arg_ts->state = TS_DEAD;
4088                         tcg_op_remove(s, op);
4089                     } else {
4090                         arg_ts->state = TS_MEM;
4091                     }
4092 
4093                     sop->args[0] = temp_arg(out_ts);
4094                     sop->args[1] = temp_arg(arg_ts->mem_base);
4095                     sop->args[2] = arg_ts->mem_offset;
4096                 } else {
4097                     tcg_debug_assert(!IS_DEAD_ARG(0));
4098                 }
4099             }
4100         } else {
4101             for (i = 0; i < nb_oargs; i++) {
4102                 arg_ts = arg_temp(op->args[i]);
4103                 dir_ts = arg_ts->state_ptr;
4104                 if (!dir_ts) {
4105                     continue;
4106                 }
4107                 op->args[i] = temp_arg(dir_ts);
4108                 changes = true;
4109 
4110                 /* The output is now live and modified.  */
4111                 arg_ts->state = 0;
4112 
4113                 /* Sync outputs upon their last write.  */
4114                 if (NEED_SYNC_ARG(i)) {
4115                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4116                                       ? INDEX_op_st_i32
4117                                       : INDEX_op_st_i64);
4118                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4119 
4120                     sop->args[0] = temp_arg(dir_ts);
4121                     sop->args[1] = temp_arg(arg_ts->mem_base);
4122                     sop->args[2] = arg_ts->mem_offset;
4123 
4124                     arg_ts->state = TS_MEM;
4125                 }
4126                 /* Drop outputs that are dead.  */
4127                 if (IS_DEAD_ARG(i)) {
4128                     arg_ts->state = TS_DEAD;
4129                 }
4130             }
4131         }
4132     }
4133 
4134     return changes;
4135 }
4136 
4137 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4138 {
4139     intptr_t off;
4140     int size, align;
4141 
4142     /* When allocating an object, look at the full type. */
4143     size = tcg_type_size(ts->base_type);
4144     switch (ts->base_type) {
4145     case TCG_TYPE_I32:
4146         align = 4;
4147         break;
4148     case TCG_TYPE_I64:
4149     case TCG_TYPE_V64:
4150         align = 8;
4151         break;
4152     case TCG_TYPE_I128:
4153     case TCG_TYPE_V128:
4154     case TCG_TYPE_V256:
4155         /*
4156          * Note that we do not require aligned storage for V256,
4157          * and that we provide alignment for I128 to match V128,
4158          * even if that's above what the host ABI requires.
4159          */
4160         align = 16;
4161         break;
4162     default:
4163         g_assert_not_reached();
4164     }
4165 
4166     /*
4167      * Assume the stack is sufficiently aligned.
4168      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4169      * and do not require 16 byte vector alignment.  This seems slightly
4170      * easier than fully parameterizing the above switch statement.
4171      */
4172     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4173     off = ROUND_UP(s->current_frame_offset, align);
4174 
4175     /* If we've exhausted the stack frame, restart with a smaller TB. */
4176     if (off + size > s->frame_end) {
4177         tcg_raise_tb_overflow(s);
4178     }
4179     s->current_frame_offset = off + size;
4180 #if defined(__sparc__)
4181     off += TCG_TARGET_STACK_BIAS;
4182 #endif
4183 
4184     /* If the object was subdivided, assign memory to all the parts. */
4185     if (ts->base_type != ts->type) {
4186         int part_size = tcg_type_size(ts->type);
4187         int part_count = size / part_size;
4188 
4189         /*
4190          * Each part is allocated sequentially in tcg_temp_new_internal.
4191          * Jump back to the first part by subtracting the current index.
4192          */
4193         ts -= ts->temp_subindex;
4194         for (int i = 0; i < part_count; ++i) {
4195             ts[i].mem_offset = off + i * part_size;
4196             ts[i].mem_base = s->frame_temp;
4197             ts[i].mem_allocated = 1;
4198         }
4199     } else {
4200         ts->mem_offset = off;
4201         ts->mem_base = s->frame_temp;
4202         ts->mem_allocated = 1;
4203     }
4204 }
4205 
4206 /* Assign @reg to @ts, and update reg_to_temp[]. */
4207 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4208 {
4209     if (ts->val_type == TEMP_VAL_REG) {
4210         TCGReg old = ts->reg;
4211         tcg_debug_assert(s->reg_to_temp[old] == ts);
4212         if (old == reg) {
4213             return;
4214         }
4215         s->reg_to_temp[old] = NULL;
4216     }
4217     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4218     s->reg_to_temp[reg] = ts;
4219     ts->val_type = TEMP_VAL_REG;
4220     ts->reg = reg;
4221 }
4222 
4223 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4224 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4225 {
4226     tcg_debug_assert(type != TEMP_VAL_REG);
4227     if (ts->val_type == TEMP_VAL_REG) {
4228         TCGReg reg = ts->reg;
4229         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4230         s->reg_to_temp[reg] = NULL;
4231     }
4232     ts->val_type = type;
4233 }
4234 
4235 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4236 
4237 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4238    mark it free; otherwise mark it dead.  */
4239 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4240 {
4241     TCGTempVal new_type;
4242 
4243     switch (ts->kind) {
4244     case TEMP_FIXED:
4245         return;
4246     case TEMP_GLOBAL:
4247     case TEMP_TB:
4248         new_type = TEMP_VAL_MEM;
4249         break;
4250     case TEMP_EBB:
4251         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4252         break;
4253     case TEMP_CONST:
4254         new_type = TEMP_VAL_CONST;
4255         break;
4256     default:
4257         g_assert_not_reached();
4258     }
4259     set_temp_val_nonreg(s, ts, new_type);
4260 }
4261 
4262 /* Mark a temporary as dead.  */
4263 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4264 {
4265     temp_free_or_dead(s, ts, 1);
4266 }
4267 
4268 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4269    registers needs to be allocated to store a constant.  If 'free_or_dead'
4270    is non-zero, subsequently release the temporary; if it is positive, the
4271    temp is dead; if it is negative, the temp is free.  */
4272 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4273                       TCGRegSet preferred_regs, int free_or_dead)
4274 {
4275     if (!temp_readonly(ts) && !ts->mem_coherent) {
4276         if (!ts->mem_allocated) {
4277             temp_allocate_frame(s, ts);
4278         }
4279         switch (ts->val_type) {
4280         case TEMP_VAL_CONST:
4281             /* If we're going to free the temp immediately, then we won't
4282                require it later in a register, so attempt to store the
4283                constant to memory directly.  */
4284             if (free_or_dead
4285                 && tcg_out_sti(s, ts->type, ts->val,
4286                                ts->mem_base->reg, ts->mem_offset)) {
4287                 break;
4288             }
4289             temp_load(s, ts, tcg_target_available_regs[ts->type],
4290                       allocated_regs, preferred_regs);
4291             /* fallthrough */
4292 
4293         case TEMP_VAL_REG:
4294             tcg_out_st(s, ts->type, ts->reg,
4295                        ts->mem_base->reg, ts->mem_offset);
4296             break;
4297 
4298         case TEMP_VAL_MEM:
4299             break;
4300 
4301         case TEMP_VAL_DEAD:
4302         default:
4303             g_assert_not_reached();
4304         }
4305         ts->mem_coherent = 1;
4306     }
4307     if (free_or_dead) {
4308         temp_free_or_dead(s, ts, free_or_dead);
4309     }
4310 }
4311 
4312 /* free register 'reg' by spilling the corresponding temporary if necessary */
4313 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4314 {
4315     TCGTemp *ts = s->reg_to_temp[reg];
4316     if (ts != NULL) {
4317         temp_sync(s, ts, allocated_regs, 0, -1);
4318     }
4319 }
4320 
4321 /**
4322  * tcg_reg_alloc:
4323  * @required_regs: Set of registers in which we must allocate.
4324  * @allocated_regs: Set of registers which must be avoided.
4325  * @preferred_regs: Set of registers we should prefer.
4326  * @rev: True if we search the registers in "indirect" order.
4327  *
4328  * The allocated register must be in @required_regs & ~@allocated_regs,
4329  * but if we can put it in @preferred_regs we may save a move later.
4330  */
4331 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4332                             TCGRegSet allocated_regs,
4333                             TCGRegSet preferred_regs, bool rev)
4334 {
4335     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4336     TCGRegSet reg_ct[2];
4337     const int *order;
4338 
4339     reg_ct[1] = required_regs & ~allocated_regs;
4340     tcg_debug_assert(reg_ct[1] != 0);
4341     reg_ct[0] = reg_ct[1] & preferred_regs;
4342 
4343     /* Skip the preferred_regs option if it cannot be satisfied,
4344        or if the preference made no difference.  */
4345     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4346 
4347     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4348 
4349     /* Try free registers, preferences first.  */
4350     for (j = f; j < 2; j++) {
4351         TCGRegSet set = reg_ct[j];
4352 
4353         if (tcg_regset_single(set)) {
4354             /* One register in the set.  */
4355             TCGReg reg = tcg_regset_first(set);
4356             if (s->reg_to_temp[reg] == NULL) {
4357                 return reg;
4358             }
4359         } else {
4360             for (i = 0; i < n; i++) {
4361                 TCGReg reg = order[i];
4362                 if (s->reg_to_temp[reg] == NULL &&
4363                     tcg_regset_test_reg(set, reg)) {
4364                     return reg;
4365                 }
4366             }
4367         }
4368     }
4369 
4370     /* We must spill something.  */
4371     for (j = f; j < 2; j++) {
4372         TCGRegSet set = reg_ct[j];
4373 
4374         if (tcg_regset_single(set)) {
4375             /* One register in the set.  */
4376             TCGReg reg = tcg_regset_first(set);
4377             tcg_reg_free(s, reg, allocated_regs);
4378             return reg;
4379         } else {
4380             for (i = 0; i < n; i++) {
4381                 TCGReg reg = order[i];
4382                 if (tcg_regset_test_reg(set, reg)) {
4383                     tcg_reg_free(s, reg, allocated_regs);
4384                     return reg;
4385                 }
4386             }
4387         }
4388     }
4389 
4390     g_assert_not_reached();
4391 }
4392 
4393 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4394                                  TCGRegSet allocated_regs,
4395                                  TCGRegSet preferred_regs, bool rev)
4396 {
4397     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4398     TCGRegSet reg_ct[2];
4399     const int *order;
4400 
4401     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4402     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4403     tcg_debug_assert(reg_ct[1] != 0);
4404     reg_ct[0] = reg_ct[1] & preferred_regs;
4405 
4406     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4407 
4408     /*
4409      * Skip the preferred_regs option if it cannot be satisfied,
4410      * or if the preference made no difference.
4411      */
4412     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4413 
4414     /*
4415      * Minimize the number of flushes by looking for 2 free registers first,
4416      * then a single flush, then two flushes.
4417      */
4418     for (fmin = 2; fmin >= 0; fmin--) {
4419         for (j = k; j < 2; j++) {
4420             TCGRegSet set = reg_ct[j];
4421 
4422             for (i = 0; i < n; i++) {
4423                 TCGReg reg = order[i];
4424 
4425                 if (tcg_regset_test_reg(set, reg)) {
4426                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4427                     if (f >= fmin) {
4428                         tcg_reg_free(s, reg, allocated_regs);
4429                         tcg_reg_free(s, reg + 1, allocated_regs);
4430                         return reg;
4431                     }
4432                 }
4433             }
4434         }
4435     }
4436     g_assert_not_reached();
4437 }
4438 
4439 /* Make sure the temporary is in a register.  If needed, allocate the register
4440    from DESIRED while avoiding ALLOCATED.  */
4441 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4442                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4443 {
4444     TCGReg reg;
4445 
4446     switch (ts->val_type) {
4447     case TEMP_VAL_REG:
4448         return;
4449     case TEMP_VAL_CONST:
4450         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4451                             preferred_regs, ts->indirect_base);
4452         if (ts->type <= TCG_TYPE_I64) {
4453             tcg_out_movi(s, ts->type, reg, ts->val);
4454         } else {
4455             uint64_t val = ts->val;
4456             MemOp vece = MO_64;
4457 
4458             /*
4459              * Find the minimal vector element that matches the constant.
4460              * The targets will, in general, have to do this search anyway,
4461              * do this generically.
4462              */
4463             if (val == dup_const(MO_8, val)) {
4464                 vece = MO_8;
4465             } else if (val == dup_const(MO_16, val)) {
4466                 vece = MO_16;
4467             } else if (val == dup_const(MO_32, val)) {
4468                 vece = MO_32;
4469             }
4470 
4471             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4472         }
4473         ts->mem_coherent = 0;
4474         break;
4475     case TEMP_VAL_MEM:
4476         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4477                             preferred_regs, ts->indirect_base);
4478         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4479         ts->mem_coherent = 1;
4480         break;
4481     case TEMP_VAL_DEAD:
4482     default:
4483         g_assert_not_reached();
4484     }
4485     set_temp_val_reg(s, ts, reg);
4486 }
4487 
4488 /* Save a temporary to memory. 'allocated_regs' is used in case a
4489    temporary registers needs to be allocated to store a constant.  */
4490 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4491 {
4492     /* The liveness analysis already ensures that globals are back
4493        in memory. Keep an tcg_debug_assert for safety. */
4494     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4495 }
4496 
4497 /* save globals to their canonical location and assume they can be
4498    modified be the following code. 'allocated_regs' is used in case a
4499    temporary registers needs to be allocated to store a constant. */
4500 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4501 {
4502     int i, n;
4503 
4504     for (i = 0, n = s->nb_globals; i < n; i++) {
4505         temp_save(s, &s->temps[i], allocated_regs);
4506     }
4507 }
4508 
4509 /* sync globals to their canonical location and assume they can be
4510    read by the following code. 'allocated_regs' is used in case a
4511    temporary registers needs to be allocated to store a constant. */
4512 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4513 {
4514     int i, n;
4515 
4516     for (i = 0, n = s->nb_globals; i < n; i++) {
4517         TCGTemp *ts = &s->temps[i];
4518         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4519                          || ts->kind == TEMP_FIXED
4520                          || ts->mem_coherent);
4521     }
4522 }
4523 
4524 /* at the end of a basic block, we assume all temporaries are dead and
4525    all globals are stored at their canonical location. */
4526 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4527 {
4528     int i;
4529 
4530     for (i = s->nb_globals; i < s->nb_temps; i++) {
4531         TCGTemp *ts = &s->temps[i];
4532 
4533         switch (ts->kind) {
4534         case TEMP_TB:
4535             temp_save(s, ts, allocated_regs);
4536             break;
4537         case TEMP_EBB:
4538             /* The liveness analysis already ensures that temps are dead.
4539                Keep an tcg_debug_assert for safety. */
4540             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4541             break;
4542         case TEMP_CONST:
4543             /* Similarly, we should have freed any allocated register. */
4544             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4545             break;
4546         default:
4547             g_assert_not_reached();
4548         }
4549     }
4550 
4551     save_globals(s, allocated_regs);
4552 }
4553 
4554 /*
4555  * At a conditional branch, we assume all temporaries are dead unless
4556  * explicitly live-across-conditional-branch; all globals and local
4557  * temps are synced to their location.
4558  */
4559 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4560 {
4561     sync_globals(s, allocated_regs);
4562 
4563     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4564         TCGTemp *ts = &s->temps[i];
4565         /*
4566          * The liveness analysis already ensures that temps are dead.
4567          * Keep tcg_debug_asserts for safety.
4568          */
4569         switch (ts->kind) {
4570         case TEMP_TB:
4571             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4572             break;
4573         case TEMP_EBB:
4574         case TEMP_CONST:
4575             break;
4576         default:
4577             g_assert_not_reached();
4578         }
4579     }
4580 }
4581 
4582 /*
4583  * Specialized code generation for INDEX_op_mov_* with a constant.
4584  */
4585 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4586                                   tcg_target_ulong val, TCGLifeData arg_life,
4587                                   TCGRegSet preferred_regs)
4588 {
4589     /* ENV should not be modified.  */
4590     tcg_debug_assert(!temp_readonly(ots));
4591 
4592     /* The movi is not explicitly generated here.  */
4593     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4594     ots->val = val;
4595     ots->mem_coherent = 0;
4596     if (NEED_SYNC_ARG(0)) {
4597         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4598     } else if (IS_DEAD_ARG(0)) {
4599         temp_dead(s, ots);
4600     }
4601 }
4602 
4603 /*
4604  * Specialized code generation for INDEX_op_mov_*.
4605  */
4606 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4607 {
4608     const TCGLifeData arg_life = op->life;
4609     TCGRegSet allocated_regs, preferred_regs;
4610     TCGTemp *ts, *ots;
4611     TCGType otype, itype;
4612     TCGReg oreg, ireg;
4613 
4614     allocated_regs = s->reserved_regs;
4615     preferred_regs = output_pref(op, 0);
4616     ots = arg_temp(op->args[0]);
4617     ts = arg_temp(op->args[1]);
4618 
4619     /* ENV should not be modified.  */
4620     tcg_debug_assert(!temp_readonly(ots));
4621 
4622     /* Note that otype != itype for no-op truncation.  */
4623     otype = ots->type;
4624     itype = ts->type;
4625 
4626     if (ts->val_type == TEMP_VAL_CONST) {
4627         /* propagate constant or generate sti */
4628         tcg_target_ulong val = ts->val;
4629         if (IS_DEAD_ARG(1)) {
4630             temp_dead(s, ts);
4631         }
4632         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4633         return;
4634     }
4635 
4636     /* If the source value is in memory we're going to be forced
4637        to have it in a register in order to perform the copy.  Copy
4638        the SOURCE value into its own register first, that way we
4639        don't have to reload SOURCE the next time it is used. */
4640     if (ts->val_type == TEMP_VAL_MEM) {
4641         temp_load(s, ts, tcg_target_available_regs[itype],
4642                   allocated_regs, preferred_regs);
4643     }
4644     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4645     ireg = ts->reg;
4646 
4647     if (IS_DEAD_ARG(0)) {
4648         /* mov to a non-saved dead register makes no sense (even with
4649            liveness analysis disabled). */
4650         tcg_debug_assert(NEED_SYNC_ARG(0));
4651         if (!ots->mem_allocated) {
4652             temp_allocate_frame(s, ots);
4653         }
4654         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4655         if (IS_DEAD_ARG(1)) {
4656             temp_dead(s, ts);
4657         }
4658         temp_dead(s, ots);
4659         return;
4660     }
4661 
4662     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4663         /*
4664          * The mov can be suppressed.  Kill input first, so that it
4665          * is unlinked from reg_to_temp, then set the output to the
4666          * reg that we saved from the input.
4667          */
4668         temp_dead(s, ts);
4669         oreg = ireg;
4670     } else {
4671         if (ots->val_type == TEMP_VAL_REG) {
4672             oreg = ots->reg;
4673         } else {
4674             /* Make sure to not spill the input register during allocation. */
4675             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4676                                  allocated_regs | ((TCGRegSet)1 << ireg),
4677                                  preferred_regs, ots->indirect_base);
4678         }
4679         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4680             /*
4681              * Cross register class move not supported.
4682              * Store the source register into the destination slot
4683              * and leave the destination temp as TEMP_VAL_MEM.
4684              */
4685             assert(!temp_readonly(ots));
4686             if (!ts->mem_allocated) {
4687                 temp_allocate_frame(s, ots);
4688             }
4689             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4690             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4691             ots->mem_coherent = 1;
4692             return;
4693         }
4694     }
4695     set_temp_val_reg(s, ots, oreg);
4696     ots->mem_coherent = 0;
4697 
4698     if (NEED_SYNC_ARG(0)) {
4699         temp_sync(s, ots, allocated_regs, 0, 0);
4700     }
4701 }
4702 
4703 /*
4704  * Specialized code generation for INDEX_op_dup_vec.
4705  */
4706 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4707 {
4708     const TCGLifeData arg_life = op->life;
4709     TCGRegSet dup_out_regs, dup_in_regs;
4710     TCGTemp *its, *ots;
4711     TCGType itype, vtype;
4712     unsigned vece;
4713     int lowpart_ofs;
4714     bool ok;
4715 
4716     ots = arg_temp(op->args[0]);
4717     its = arg_temp(op->args[1]);
4718 
4719     /* ENV should not be modified.  */
4720     tcg_debug_assert(!temp_readonly(ots));
4721 
4722     itype = its->type;
4723     vece = TCGOP_VECE(op);
4724     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4725 
4726     if (its->val_type == TEMP_VAL_CONST) {
4727         /* Propagate constant via movi -> dupi.  */
4728         tcg_target_ulong val = its->val;
4729         if (IS_DEAD_ARG(1)) {
4730             temp_dead(s, its);
4731         }
4732         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4733         return;
4734     }
4735 
4736     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4737     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4738 
4739     /* Allocate the output register now.  */
4740     if (ots->val_type != TEMP_VAL_REG) {
4741         TCGRegSet allocated_regs = s->reserved_regs;
4742         TCGReg oreg;
4743 
4744         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4745             /* Make sure to not spill the input register. */
4746             tcg_regset_set_reg(allocated_regs, its->reg);
4747         }
4748         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4749                              output_pref(op, 0), ots->indirect_base);
4750         set_temp_val_reg(s, ots, oreg);
4751     }
4752 
4753     switch (its->val_type) {
4754     case TEMP_VAL_REG:
4755         /*
4756          * The dup constriaints must be broad, covering all possible VECE.
4757          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4758          * to fail, indicating that extra moves are required for that case.
4759          */
4760         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4761             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4762                 goto done;
4763             }
4764             /* Try again from memory or a vector input register.  */
4765         }
4766         if (!its->mem_coherent) {
4767             /*
4768              * The input register is not synced, and so an extra store
4769              * would be required to use memory.  Attempt an integer-vector
4770              * register move first.  We do not have a TCGRegSet for this.
4771              */
4772             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4773                 break;
4774             }
4775             /* Sync the temp back to its slot and load from there.  */
4776             temp_sync(s, its, s->reserved_regs, 0, 0);
4777         }
4778         /* fall through */
4779 
4780     case TEMP_VAL_MEM:
4781         lowpart_ofs = 0;
4782         if (HOST_BIG_ENDIAN) {
4783             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4784         }
4785         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4786                              its->mem_offset + lowpart_ofs)) {
4787             goto done;
4788         }
4789         /* Load the input into the destination vector register. */
4790         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4791         break;
4792 
4793     default:
4794         g_assert_not_reached();
4795     }
4796 
4797     /* We now have a vector input register, so dup must succeed. */
4798     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4799     tcg_debug_assert(ok);
4800 
4801  done:
4802     ots->mem_coherent = 0;
4803     if (IS_DEAD_ARG(1)) {
4804         temp_dead(s, its);
4805     }
4806     if (NEED_SYNC_ARG(0)) {
4807         temp_sync(s, ots, s->reserved_regs, 0, 0);
4808     }
4809     if (IS_DEAD_ARG(0)) {
4810         temp_dead(s, ots);
4811     }
4812 }
4813 
4814 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4815 {
4816     const TCGLifeData arg_life = op->life;
4817     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4818     TCGRegSet i_allocated_regs;
4819     TCGRegSet o_allocated_regs;
4820     int i, k, nb_iargs, nb_oargs;
4821     TCGReg reg;
4822     TCGArg arg;
4823     const TCGArgConstraint *arg_ct;
4824     TCGTemp *ts;
4825     TCGArg new_args[TCG_MAX_OP_ARGS];
4826     int const_args[TCG_MAX_OP_ARGS];
4827     TCGCond op_cond;
4828 
4829     nb_oargs = def->nb_oargs;
4830     nb_iargs = def->nb_iargs;
4831 
4832     /* copy constants */
4833     memcpy(new_args + nb_oargs + nb_iargs,
4834            op->args + nb_oargs + nb_iargs,
4835            sizeof(TCGArg) * def->nb_cargs);
4836 
4837     i_allocated_regs = s->reserved_regs;
4838     o_allocated_regs = s->reserved_regs;
4839 
4840     switch (op->opc) {
4841     case INDEX_op_brcond_i32:
4842     case INDEX_op_brcond_i64:
4843         op_cond = op->args[2];
4844         break;
4845     case INDEX_op_setcond_i32:
4846     case INDEX_op_setcond_i64:
4847     case INDEX_op_negsetcond_i32:
4848     case INDEX_op_negsetcond_i64:
4849     case INDEX_op_cmp_vec:
4850         op_cond = op->args[3];
4851         break;
4852     case INDEX_op_brcond2_i32:
4853         op_cond = op->args[4];
4854         break;
4855     case INDEX_op_movcond_i32:
4856     case INDEX_op_movcond_i64:
4857     case INDEX_op_setcond2_i32:
4858     case INDEX_op_cmpsel_vec:
4859         op_cond = op->args[5];
4860         break;
4861     default:
4862         /* No condition within opcode. */
4863         op_cond = TCG_COND_ALWAYS;
4864         break;
4865     }
4866 
4867     /* satisfy input constraints */
4868     for (k = 0; k < nb_iargs; k++) {
4869         TCGRegSet i_preferred_regs, i_required_regs;
4870         bool allocate_new_reg, copyto_new_reg;
4871         TCGTemp *ts2;
4872         int i1, i2;
4873 
4874         i = def->args_ct[nb_oargs + k].sort_index;
4875         arg = op->args[i];
4876         arg_ct = &def->args_ct[i];
4877         ts = arg_temp(arg);
4878 
4879         if (ts->val_type == TEMP_VAL_CONST
4880             && tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
4881                                       op_cond, TCGOP_VECE(op))) {
4882             /* constant is OK for instruction */
4883             const_args[i] = 1;
4884             new_args[i] = ts->val;
4885             continue;
4886         }
4887 
4888         reg = ts->reg;
4889         i_preferred_regs = 0;
4890         i_required_regs = arg_ct->regs;
4891         allocate_new_reg = false;
4892         copyto_new_reg = false;
4893 
4894         switch (arg_ct->pair) {
4895         case 0: /* not paired */
4896             if (arg_ct->ialias) {
4897                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4898 
4899                 /*
4900                  * If the input is readonly, then it cannot also be an
4901                  * output and aliased to itself.  If the input is not
4902                  * dead after the instruction, we must allocate a new
4903                  * register and move it.
4904                  */
4905                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4906                     || def->args_ct[arg_ct->alias_index].newreg) {
4907                     allocate_new_reg = true;
4908                 } else if (ts->val_type == TEMP_VAL_REG) {
4909                     /*
4910                      * Check if the current register has already been
4911                      * allocated for another input.
4912                      */
4913                     allocate_new_reg =
4914                         tcg_regset_test_reg(i_allocated_regs, reg);
4915                 }
4916             }
4917             if (!allocate_new_reg) {
4918                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4919                           i_preferred_regs);
4920                 reg = ts->reg;
4921                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4922             }
4923             if (allocate_new_reg) {
4924                 /*
4925                  * Allocate a new register matching the constraint
4926                  * and move the temporary register into it.
4927                  */
4928                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4929                           i_allocated_regs, 0);
4930                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4931                                     i_preferred_regs, ts->indirect_base);
4932                 copyto_new_reg = true;
4933             }
4934             break;
4935 
4936         case 1:
4937             /* First of an input pair; if i1 == i2, the second is an output. */
4938             i1 = i;
4939             i2 = arg_ct->pair_index;
4940             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4941 
4942             /*
4943              * It is easier to default to allocating a new pair
4944              * and to identify a few cases where it's not required.
4945              */
4946             if (arg_ct->ialias) {
4947                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4948                 if (IS_DEAD_ARG(i1) &&
4949                     IS_DEAD_ARG(i2) &&
4950                     !temp_readonly(ts) &&
4951                     ts->val_type == TEMP_VAL_REG &&
4952                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4953                     tcg_regset_test_reg(i_required_regs, reg) &&
4954                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4955                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4956                     (ts2
4957                      ? ts2->val_type == TEMP_VAL_REG &&
4958                        ts2->reg == reg + 1 &&
4959                        !temp_readonly(ts2)
4960                      : s->reg_to_temp[reg + 1] == NULL)) {
4961                     break;
4962                 }
4963             } else {
4964                 /* Without aliasing, the pair must also be an input. */
4965                 tcg_debug_assert(ts2);
4966                 if (ts->val_type == TEMP_VAL_REG &&
4967                     ts2->val_type == TEMP_VAL_REG &&
4968                     ts2->reg == reg + 1 &&
4969                     tcg_regset_test_reg(i_required_regs, reg)) {
4970                     break;
4971                 }
4972             }
4973             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4974                                      0, ts->indirect_base);
4975             goto do_pair;
4976 
4977         case 2: /* pair second */
4978             reg = new_args[arg_ct->pair_index] + 1;
4979             goto do_pair;
4980 
4981         case 3: /* ialias with second output, no first input */
4982             tcg_debug_assert(arg_ct->ialias);
4983             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4984 
4985             if (IS_DEAD_ARG(i) &&
4986                 !temp_readonly(ts) &&
4987                 ts->val_type == TEMP_VAL_REG &&
4988                 reg > 0 &&
4989                 s->reg_to_temp[reg - 1] == NULL &&
4990                 tcg_regset_test_reg(i_required_regs, reg) &&
4991                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4992                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4993                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4994                 break;
4995             }
4996             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4997                                      i_allocated_regs, 0,
4998                                      ts->indirect_base);
4999             tcg_regset_set_reg(i_allocated_regs, reg);
5000             reg += 1;
5001             goto do_pair;
5002 
5003         do_pair:
5004             /*
5005              * If an aliased input is not dead after the instruction,
5006              * we must allocate a new register and move it.
5007              */
5008             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5009                 TCGRegSet t_allocated_regs = i_allocated_regs;
5010 
5011                 /*
5012                  * Because of the alias, and the continued life, make sure
5013                  * that the temp is somewhere *other* than the reg pair,
5014                  * and we get a copy in reg.
5015                  */
5016                 tcg_regset_set_reg(t_allocated_regs, reg);
5017                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5018                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5019                     /* If ts was already in reg, copy it somewhere else. */
5020                     TCGReg nr;
5021                     bool ok;
5022 
5023                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5024                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5025                                        t_allocated_regs, 0, ts->indirect_base);
5026                     ok = tcg_out_mov(s, ts->type, nr, reg);
5027                     tcg_debug_assert(ok);
5028 
5029                     set_temp_val_reg(s, ts, nr);
5030                 } else {
5031                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5032                               t_allocated_regs, 0);
5033                     copyto_new_reg = true;
5034                 }
5035             } else {
5036                 /* Preferably allocate to reg, otherwise copy. */
5037                 i_required_regs = (TCGRegSet)1 << reg;
5038                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5039                           i_preferred_regs);
5040                 copyto_new_reg = ts->reg != reg;
5041             }
5042             break;
5043 
5044         default:
5045             g_assert_not_reached();
5046         }
5047 
5048         if (copyto_new_reg) {
5049             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5050                 /*
5051                  * Cross register class move not supported.  Sync the
5052                  * temp back to its slot and load from there.
5053                  */
5054                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5055                 tcg_out_ld(s, ts->type, reg,
5056                            ts->mem_base->reg, ts->mem_offset);
5057             }
5058         }
5059         new_args[i] = reg;
5060         const_args[i] = 0;
5061         tcg_regset_set_reg(i_allocated_regs, reg);
5062     }
5063 
5064     /* mark dead temporaries and free the associated registers */
5065     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5066         if (IS_DEAD_ARG(i)) {
5067             temp_dead(s, arg_temp(op->args[i]));
5068         }
5069     }
5070 
5071     if (def->flags & TCG_OPF_COND_BRANCH) {
5072         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5073     } else if (def->flags & TCG_OPF_BB_END) {
5074         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5075     } else {
5076         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5077             /* XXX: permit generic clobber register list ? */
5078             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5079                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5080                     tcg_reg_free(s, i, i_allocated_regs);
5081                 }
5082             }
5083         }
5084         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5085             /* sync globals if the op has side effects and might trigger
5086                an exception. */
5087             sync_globals(s, i_allocated_regs);
5088         }
5089 
5090         /* satisfy the output constraints */
5091         for(k = 0; k < nb_oargs; k++) {
5092             i = def->args_ct[k].sort_index;
5093             arg = op->args[i];
5094             arg_ct = &def->args_ct[i];
5095             ts = arg_temp(arg);
5096 
5097             /* ENV should not be modified.  */
5098             tcg_debug_assert(!temp_readonly(ts));
5099 
5100             switch (arg_ct->pair) {
5101             case 0: /* not paired */
5102                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5103                     reg = new_args[arg_ct->alias_index];
5104                 } else if (arg_ct->newreg) {
5105                     reg = tcg_reg_alloc(s, arg_ct->regs,
5106                                         i_allocated_regs | o_allocated_regs,
5107                                         output_pref(op, k), ts->indirect_base);
5108                 } else {
5109                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5110                                         output_pref(op, k), ts->indirect_base);
5111                 }
5112                 break;
5113 
5114             case 1: /* first of pair */
5115                 if (arg_ct->oalias) {
5116                     reg = new_args[arg_ct->alias_index];
5117                 } else if (arg_ct->newreg) {
5118                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5119                                              i_allocated_regs | o_allocated_regs,
5120                                              output_pref(op, k),
5121                                              ts->indirect_base);
5122                 } else {
5123                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5124                                              output_pref(op, k),
5125                                              ts->indirect_base);
5126                 }
5127                 break;
5128 
5129             case 2: /* second of pair */
5130                 if (arg_ct->oalias) {
5131                     reg = new_args[arg_ct->alias_index];
5132                 } else {
5133                     reg = new_args[arg_ct->pair_index] + 1;
5134                 }
5135                 break;
5136 
5137             case 3: /* first of pair, aliasing with a second input */
5138                 tcg_debug_assert(!arg_ct->newreg);
5139                 reg = new_args[arg_ct->pair_index] - 1;
5140                 break;
5141 
5142             default:
5143                 g_assert_not_reached();
5144             }
5145             tcg_regset_set_reg(o_allocated_regs, reg);
5146             set_temp_val_reg(s, ts, reg);
5147             ts->mem_coherent = 0;
5148             new_args[i] = reg;
5149         }
5150     }
5151 
5152     /* emit instruction */
5153     switch (op->opc) {
5154     case INDEX_op_ext8s_i32:
5155         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5156         break;
5157     case INDEX_op_ext8s_i64:
5158         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5159         break;
5160     case INDEX_op_ext8u_i32:
5161     case INDEX_op_ext8u_i64:
5162         tcg_out_ext8u(s, new_args[0], new_args[1]);
5163         break;
5164     case INDEX_op_ext16s_i32:
5165         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5166         break;
5167     case INDEX_op_ext16s_i64:
5168         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5169         break;
5170     case INDEX_op_ext16u_i32:
5171     case INDEX_op_ext16u_i64:
5172         tcg_out_ext16u(s, new_args[0], new_args[1]);
5173         break;
5174     case INDEX_op_ext32s_i64:
5175         tcg_out_ext32s(s, new_args[0], new_args[1]);
5176         break;
5177     case INDEX_op_ext32u_i64:
5178         tcg_out_ext32u(s, new_args[0], new_args[1]);
5179         break;
5180     case INDEX_op_ext_i32_i64:
5181         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5182         break;
5183     case INDEX_op_extu_i32_i64:
5184         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5185         break;
5186     case INDEX_op_extrl_i64_i32:
5187         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5188         break;
5189     default:
5190         if (def->flags & TCG_OPF_VECTOR) {
5191             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5192                            new_args, const_args);
5193         } else {
5194             tcg_out_op(s, op->opc, new_args, const_args);
5195         }
5196         break;
5197     }
5198 
5199     /* move the outputs in the correct register if needed */
5200     for(i = 0; i < nb_oargs; i++) {
5201         ts = arg_temp(op->args[i]);
5202 
5203         /* ENV should not be modified.  */
5204         tcg_debug_assert(!temp_readonly(ts));
5205 
5206         if (NEED_SYNC_ARG(i)) {
5207             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5208         } else if (IS_DEAD_ARG(i)) {
5209             temp_dead(s, ts);
5210         }
5211     }
5212 }
5213 
5214 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5215 {
5216     const TCGLifeData arg_life = op->life;
5217     TCGTemp *ots, *itsl, *itsh;
5218     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5219 
5220     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5221     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5222     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5223 
5224     ots = arg_temp(op->args[0]);
5225     itsl = arg_temp(op->args[1]);
5226     itsh = arg_temp(op->args[2]);
5227 
5228     /* ENV should not be modified.  */
5229     tcg_debug_assert(!temp_readonly(ots));
5230 
5231     /* Allocate the output register now.  */
5232     if (ots->val_type != TEMP_VAL_REG) {
5233         TCGRegSet allocated_regs = s->reserved_regs;
5234         TCGRegSet dup_out_regs =
5235             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5236         TCGReg oreg;
5237 
5238         /* Make sure to not spill the input registers. */
5239         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5240             tcg_regset_set_reg(allocated_regs, itsl->reg);
5241         }
5242         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5243             tcg_regset_set_reg(allocated_regs, itsh->reg);
5244         }
5245 
5246         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5247                              output_pref(op, 0), ots->indirect_base);
5248         set_temp_val_reg(s, ots, oreg);
5249     }
5250 
5251     /* Promote dup2 of immediates to dupi_vec. */
5252     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5253         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5254         MemOp vece = MO_64;
5255 
5256         if (val == dup_const(MO_8, val)) {
5257             vece = MO_8;
5258         } else if (val == dup_const(MO_16, val)) {
5259             vece = MO_16;
5260         } else if (val == dup_const(MO_32, val)) {
5261             vece = MO_32;
5262         }
5263 
5264         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5265         goto done;
5266     }
5267 
5268     /* If the two inputs form one 64-bit value, try dupm_vec. */
5269     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5270         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5271         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5272         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5273 
5274         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5275         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5276 
5277         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5278                              its->mem_base->reg, its->mem_offset)) {
5279             goto done;
5280         }
5281     }
5282 
5283     /* Fall back to generic expansion. */
5284     return false;
5285 
5286  done:
5287     ots->mem_coherent = 0;
5288     if (IS_DEAD_ARG(1)) {
5289         temp_dead(s, itsl);
5290     }
5291     if (IS_DEAD_ARG(2)) {
5292         temp_dead(s, itsh);
5293     }
5294     if (NEED_SYNC_ARG(0)) {
5295         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5296     } else if (IS_DEAD_ARG(0)) {
5297         temp_dead(s, ots);
5298     }
5299     return true;
5300 }
5301 
5302 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5303                          TCGRegSet allocated_regs)
5304 {
5305     if (ts->val_type == TEMP_VAL_REG) {
5306         if (ts->reg != reg) {
5307             tcg_reg_free(s, reg, allocated_regs);
5308             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5309                 /*
5310                  * Cross register class move not supported.  Sync the
5311                  * temp back to its slot and load from there.
5312                  */
5313                 temp_sync(s, ts, allocated_regs, 0, 0);
5314                 tcg_out_ld(s, ts->type, reg,
5315                            ts->mem_base->reg, ts->mem_offset);
5316             }
5317         }
5318     } else {
5319         TCGRegSet arg_set = 0;
5320 
5321         tcg_reg_free(s, reg, allocated_regs);
5322         tcg_regset_set_reg(arg_set, reg);
5323         temp_load(s, ts, arg_set, allocated_regs, 0);
5324     }
5325 }
5326 
5327 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5328                          TCGRegSet allocated_regs)
5329 {
5330     /*
5331      * When the destination is on the stack, load up the temp and store.
5332      * If there are many call-saved registers, the temp might live to
5333      * see another use; otherwise it'll be discarded.
5334      */
5335     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5336     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5337                arg_slot_stk_ofs(arg_slot));
5338 }
5339 
5340 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5341                             TCGTemp *ts, TCGRegSet *allocated_regs)
5342 {
5343     if (arg_slot_reg_p(l->arg_slot)) {
5344         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5345         load_arg_reg(s, reg, ts, *allocated_regs);
5346         tcg_regset_set_reg(*allocated_regs, reg);
5347     } else {
5348         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5349     }
5350 }
5351 
5352 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5353                          intptr_t ref_off, TCGRegSet *allocated_regs)
5354 {
5355     TCGReg reg;
5356 
5357     if (arg_slot_reg_p(arg_slot)) {
5358         reg = tcg_target_call_iarg_regs[arg_slot];
5359         tcg_reg_free(s, reg, *allocated_regs);
5360         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5361         tcg_regset_set_reg(*allocated_regs, reg);
5362     } else {
5363         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5364                             *allocated_regs, 0, false);
5365         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5366         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5367                    arg_slot_stk_ofs(arg_slot));
5368     }
5369 }
5370 
5371 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5372 {
5373     const int nb_oargs = TCGOP_CALLO(op);
5374     const int nb_iargs = TCGOP_CALLI(op);
5375     const TCGLifeData arg_life = op->life;
5376     const TCGHelperInfo *info = tcg_call_info(op);
5377     TCGRegSet allocated_regs = s->reserved_regs;
5378     int i;
5379 
5380     /*
5381      * Move inputs into place in reverse order,
5382      * so that we place stacked arguments first.
5383      */
5384     for (i = nb_iargs - 1; i >= 0; --i) {
5385         const TCGCallArgumentLoc *loc = &info->in[i];
5386         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5387 
5388         switch (loc->kind) {
5389         case TCG_CALL_ARG_NORMAL:
5390         case TCG_CALL_ARG_EXTEND_U:
5391         case TCG_CALL_ARG_EXTEND_S:
5392             load_arg_normal(s, loc, ts, &allocated_regs);
5393             break;
5394         case TCG_CALL_ARG_BY_REF:
5395             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5396             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5397                          arg_slot_stk_ofs(loc->ref_slot),
5398                          &allocated_regs);
5399             break;
5400         case TCG_CALL_ARG_BY_REF_N:
5401             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5402             break;
5403         default:
5404             g_assert_not_reached();
5405         }
5406     }
5407 
5408     /* Mark dead temporaries and free the associated registers.  */
5409     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5410         if (IS_DEAD_ARG(i)) {
5411             temp_dead(s, arg_temp(op->args[i]));
5412         }
5413     }
5414 
5415     /* Clobber call registers.  */
5416     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5417         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5418             tcg_reg_free(s, i, allocated_regs);
5419         }
5420     }
5421 
5422     /*
5423      * Save globals if they might be written by the helper,
5424      * sync them if they might be read.
5425      */
5426     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5427         /* Nothing to do */
5428     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5429         sync_globals(s, allocated_regs);
5430     } else {
5431         save_globals(s, allocated_regs);
5432     }
5433 
5434     /*
5435      * If the ABI passes a pointer to the returned struct as the first
5436      * argument, load that now.  Pass a pointer to the output home slot.
5437      */
5438     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5439         TCGTemp *ts = arg_temp(op->args[0]);
5440 
5441         if (!ts->mem_allocated) {
5442             temp_allocate_frame(s, ts);
5443         }
5444         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5445     }
5446 
5447     tcg_out_call(s, tcg_call_func(op), info);
5448 
5449     /* Assign output registers and emit moves if needed.  */
5450     switch (info->out_kind) {
5451     case TCG_CALL_RET_NORMAL:
5452         for (i = 0; i < nb_oargs; i++) {
5453             TCGTemp *ts = arg_temp(op->args[i]);
5454             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5455 
5456             /* ENV should not be modified.  */
5457             tcg_debug_assert(!temp_readonly(ts));
5458 
5459             set_temp_val_reg(s, ts, reg);
5460             ts->mem_coherent = 0;
5461         }
5462         break;
5463 
5464     case TCG_CALL_RET_BY_VEC:
5465         {
5466             TCGTemp *ts = arg_temp(op->args[0]);
5467 
5468             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5469             tcg_debug_assert(ts->temp_subindex == 0);
5470             if (!ts->mem_allocated) {
5471                 temp_allocate_frame(s, ts);
5472             }
5473             tcg_out_st(s, TCG_TYPE_V128,
5474                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5475                        ts->mem_base->reg, ts->mem_offset);
5476         }
5477         /* fall through to mark all parts in memory */
5478 
5479     case TCG_CALL_RET_BY_REF:
5480         /* The callee has performed a write through the reference. */
5481         for (i = 0; i < nb_oargs; i++) {
5482             TCGTemp *ts = arg_temp(op->args[i]);
5483             ts->val_type = TEMP_VAL_MEM;
5484         }
5485         break;
5486 
5487     default:
5488         g_assert_not_reached();
5489     }
5490 
5491     /* Flush or discard output registers as needed. */
5492     for (i = 0; i < nb_oargs; i++) {
5493         TCGTemp *ts = arg_temp(op->args[i]);
5494         if (NEED_SYNC_ARG(i)) {
5495             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5496         } else if (IS_DEAD_ARG(i)) {
5497             temp_dead(s, ts);
5498         }
5499     }
5500 }
5501 
5502 /**
5503  * atom_and_align_for_opc:
5504  * @s: tcg context
5505  * @opc: memory operation code
5506  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5507  * @allow_two_ops: true if we are prepared to issue two operations
5508  *
5509  * Return the alignment and atomicity to use for the inline fast path
5510  * for the given memory operation.  The alignment may be larger than
5511  * that specified in @opc, and the correct alignment will be diagnosed
5512  * by the slow path helper.
5513  *
5514  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5515  * and issue two loads or stores for subalignment.
5516  */
5517 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5518                                            MemOp host_atom, bool allow_two_ops)
5519 {
5520     MemOp align = get_alignment_bits(opc);
5521     MemOp size = opc & MO_SIZE;
5522     MemOp half = size ? size - 1 : 0;
5523     MemOp atom = opc & MO_ATOM_MASK;
5524     MemOp atmax;
5525 
5526     switch (atom) {
5527     case MO_ATOM_NONE:
5528         /* The operation requires no specific atomicity. */
5529         atmax = MO_8;
5530         break;
5531 
5532     case MO_ATOM_IFALIGN:
5533         atmax = size;
5534         break;
5535 
5536     case MO_ATOM_IFALIGN_PAIR:
5537         atmax = half;
5538         break;
5539 
5540     case MO_ATOM_WITHIN16:
5541         atmax = size;
5542         if (size == MO_128) {
5543             /* Misalignment implies !within16, and therefore no atomicity. */
5544         } else if (host_atom != MO_ATOM_WITHIN16) {
5545             /* The host does not implement within16, so require alignment. */
5546             align = MAX(align, size);
5547         }
5548         break;
5549 
5550     case MO_ATOM_WITHIN16_PAIR:
5551         atmax = size;
5552         /*
5553          * Misalignment implies !within16, and therefore half atomicity.
5554          * Any host prepared for two operations can implement this with
5555          * half alignment.
5556          */
5557         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5558             align = MAX(align, half);
5559         }
5560         break;
5561 
5562     case MO_ATOM_SUBALIGN:
5563         atmax = size;
5564         if (host_atom != MO_ATOM_SUBALIGN) {
5565             /* If unaligned but not odd, there are subobjects up to half. */
5566             if (allow_two_ops) {
5567                 align = MAX(align, half);
5568             } else {
5569                 align = MAX(align, size);
5570             }
5571         }
5572         break;
5573 
5574     default:
5575         g_assert_not_reached();
5576     }
5577 
5578     return (TCGAtomAlign){ .atom = atmax, .align = align };
5579 }
5580 
5581 /*
5582  * Similarly for qemu_ld/st slow path helpers.
5583  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5584  * using only the provided backend tcg_out_* functions.
5585  */
5586 
5587 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5588 {
5589     int ofs = arg_slot_stk_ofs(slot);
5590 
5591     /*
5592      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5593      * require extension to uint64_t, adjust the address for uint32_t.
5594      */
5595     if (HOST_BIG_ENDIAN &&
5596         TCG_TARGET_REG_BITS == 64 &&
5597         type == TCG_TYPE_I32) {
5598         ofs += 4;
5599     }
5600     return ofs;
5601 }
5602 
5603 static void tcg_out_helper_load_slots(TCGContext *s,
5604                                       unsigned nmov, TCGMovExtend *mov,
5605                                       const TCGLdstHelperParam *parm)
5606 {
5607     unsigned i;
5608     TCGReg dst3;
5609 
5610     /*
5611      * Start from the end, storing to the stack first.
5612      * This frees those registers, so we need not consider overlap.
5613      */
5614     for (i = nmov; i-- > 0; ) {
5615         unsigned slot = mov[i].dst;
5616 
5617         if (arg_slot_reg_p(slot)) {
5618             goto found_reg;
5619         }
5620 
5621         TCGReg src = mov[i].src;
5622         TCGType dst_type = mov[i].dst_type;
5623         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5624 
5625         /* The argument is going onto the stack; extend into scratch. */
5626         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5627             tcg_debug_assert(parm->ntmp != 0);
5628             mov[i].dst = src = parm->tmp[0];
5629             tcg_out_movext1(s, &mov[i]);
5630         }
5631 
5632         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5633                    tcg_out_helper_stk_ofs(dst_type, slot));
5634     }
5635     return;
5636 
5637  found_reg:
5638     /*
5639      * The remaining arguments are in registers.
5640      * Convert slot numbers to argument registers.
5641      */
5642     nmov = i + 1;
5643     for (i = 0; i < nmov; ++i) {
5644         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5645     }
5646 
5647     switch (nmov) {
5648     case 4:
5649         /* The backend must have provided enough temps for the worst case. */
5650         tcg_debug_assert(parm->ntmp >= 2);
5651 
5652         dst3 = mov[3].dst;
5653         for (unsigned j = 0; j < 3; ++j) {
5654             if (dst3 == mov[j].src) {
5655                 /*
5656                  * Conflict. Copy the source to a temporary, perform the
5657                  * remaining moves, then the extension from our scratch
5658                  * on the way out.
5659                  */
5660                 TCGReg scratch = parm->tmp[1];
5661 
5662                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5663                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5664                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5665                 break;
5666             }
5667         }
5668 
5669         /* No conflicts: perform this move and continue. */
5670         tcg_out_movext1(s, &mov[3]);
5671         /* fall through */
5672 
5673     case 3:
5674         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5675                         parm->ntmp ? parm->tmp[0] : -1);
5676         break;
5677     case 2:
5678         tcg_out_movext2(s, mov, mov + 1,
5679                         parm->ntmp ? parm->tmp[0] : -1);
5680         break;
5681     case 1:
5682         tcg_out_movext1(s, mov);
5683         break;
5684     default:
5685         g_assert_not_reached();
5686     }
5687 }
5688 
5689 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5690                                     TCGType type, tcg_target_long imm,
5691                                     const TCGLdstHelperParam *parm)
5692 {
5693     if (arg_slot_reg_p(slot)) {
5694         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5695     } else {
5696         int ofs = tcg_out_helper_stk_ofs(type, slot);
5697         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5698             tcg_debug_assert(parm->ntmp != 0);
5699             tcg_out_movi(s, type, parm->tmp[0], imm);
5700             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5701         }
5702     }
5703 }
5704 
5705 static void tcg_out_helper_load_common_args(TCGContext *s,
5706                                             const TCGLabelQemuLdst *ldst,
5707                                             const TCGLdstHelperParam *parm,
5708                                             const TCGHelperInfo *info,
5709                                             unsigned next_arg)
5710 {
5711     TCGMovExtend ptr_mov = {
5712         .dst_type = TCG_TYPE_PTR,
5713         .src_type = TCG_TYPE_PTR,
5714         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5715     };
5716     const TCGCallArgumentLoc *loc = &info->in[0];
5717     TCGType type;
5718     unsigned slot;
5719     tcg_target_ulong imm;
5720 
5721     /*
5722      * Handle env, which is always first.
5723      */
5724     ptr_mov.dst = loc->arg_slot;
5725     ptr_mov.src = TCG_AREG0;
5726     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5727 
5728     /*
5729      * Handle oi.
5730      */
5731     imm = ldst->oi;
5732     loc = &info->in[next_arg];
5733     type = TCG_TYPE_I32;
5734     switch (loc->kind) {
5735     case TCG_CALL_ARG_NORMAL:
5736         break;
5737     case TCG_CALL_ARG_EXTEND_U:
5738     case TCG_CALL_ARG_EXTEND_S:
5739         /* No extension required for MemOpIdx. */
5740         tcg_debug_assert(imm <= INT32_MAX);
5741         type = TCG_TYPE_REG;
5742         break;
5743     default:
5744         g_assert_not_reached();
5745     }
5746     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5747     next_arg++;
5748 
5749     /*
5750      * Handle ra.
5751      */
5752     loc = &info->in[next_arg];
5753     slot = loc->arg_slot;
5754     if (parm->ra_gen) {
5755         int arg_reg = -1;
5756         TCGReg ra_reg;
5757 
5758         if (arg_slot_reg_p(slot)) {
5759             arg_reg = tcg_target_call_iarg_regs[slot];
5760         }
5761         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5762 
5763         ptr_mov.dst = slot;
5764         ptr_mov.src = ra_reg;
5765         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5766     } else {
5767         imm = (uintptr_t)ldst->raddr;
5768         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5769     }
5770 }
5771 
5772 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5773                                        const TCGCallArgumentLoc *loc,
5774                                        TCGType dst_type, TCGType src_type,
5775                                        TCGReg lo, TCGReg hi)
5776 {
5777     MemOp reg_mo;
5778 
5779     if (dst_type <= TCG_TYPE_REG) {
5780         MemOp src_ext;
5781 
5782         switch (loc->kind) {
5783         case TCG_CALL_ARG_NORMAL:
5784             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5785             break;
5786         case TCG_CALL_ARG_EXTEND_U:
5787             dst_type = TCG_TYPE_REG;
5788             src_ext = MO_UL;
5789             break;
5790         case TCG_CALL_ARG_EXTEND_S:
5791             dst_type = TCG_TYPE_REG;
5792             src_ext = MO_SL;
5793             break;
5794         default:
5795             g_assert_not_reached();
5796         }
5797 
5798         mov[0].dst = loc->arg_slot;
5799         mov[0].dst_type = dst_type;
5800         mov[0].src = lo;
5801         mov[0].src_type = src_type;
5802         mov[0].src_ext = src_ext;
5803         return 1;
5804     }
5805 
5806     if (TCG_TARGET_REG_BITS == 32) {
5807         assert(dst_type == TCG_TYPE_I64);
5808         reg_mo = MO_32;
5809     } else {
5810         assert(dst_type == TCG_TYPE_I128);
5811         reg_mo = MO_64;
5812     }
5813 
5814     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5815     mov[0].src = lo;
5816     mov[0].dst_type = TCG_TYPE_REG;
5817     mov[0].src_type = TCG_TYPE_REG;
5818     mov[0].src_ext = reg_mo;
5819 
5820     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5821     mov[1].src = hi;
5822     mov[1].dst_type = TCG_TYPE_REG;
5823     mov[1].src_type = TCG_TYPE_REG;
5824     mov[1].src_ext = reg_mo;
5825 
5826     return 2;
5827 }
5828 
5829 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5830                                    const TCGLdstHelperParam *parm)
5831 {
5832     const TCGHelperInfo *info;
5833     const TCGCallArgumentLoc *loc;
5834     TCGMovExtend mov[2];
5835     unsigned next_arg, nmov;
5836     MemOp mop = get_memop(ldst->oi);
5837 
5838     switch (mop & MO_SIZE) {
5839     case MO_8:
5840     case MO_16:
5841     case MO_32:
5842         info = &info_helper_ld32_mmu;
5843         break;
5844     case MO_64:
5845         info = &info_helper_ld64_mmu;
5846         break;
5847     case MO_128:
5848         info = &info_helper_ld128_mmu;
5849         break;
5850     default:
5851         g_assert_not_reached();
5852     }
5853 
5854     /* Defer env argument. */
5855     next_arg = 1;
5856 
5857     loc = &info->in[next_arg];
5858     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5859         /*
5860          * 32-bit host with 32-bit guest: zero-extend the guest address
5861          * to 64-bits for the helper by storing the low part, then
5862          * load a zero for the high part.
5863          */
5864         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5865                                TCG_TYPE_I32, TCG_TYPE_I32,
5866                                ldst->addrlo_reg, -1);
5867         tcg_out_helper_load_slots(s, 1, mov, parm);
5868 
5869         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5870                                 TCG_TYPE_I32, 0, parm);
5871         next_arg += 2;
5872     } else {
5873         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5874                                       ldst->addrlo_reg, ldst->addrhi_reg);
5875         tcg_out_helper_load_slots(s, nmov, mov, parm);
5876         next_arg += nmov;
5877     }
5878 
5879     switch (info->out_kind) {
5880     case TCG_CALL_RET_NORMAL:
5881     case TCG_CALL_RET_BY_VEC:
5882         break;
5883     case TCG_CALL_RET_BY_REF:
5884         /*
5885          * The return reference is in the first argument slot.
5886          * We need memory in which to return: re-use the top of stack.
5887          */
5888         {
5889             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5890 
5891             if (arg_slot_reg_p(0)) {
5892                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5893                                  TCG_REG_CALL_STACK, ofs_slot0);
5894             } else {
5895                 tcg_debug_assert(parm->ntmp != 0);
5896                 tcg_out_addi_ptr(s, parm->tmp[0],
5897                                  TCG_REG_CALL_STACK, ofs_slot0);
5898                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5899                            TCG_REG_CALL_STACK, ofs_slot0);
5900             }
5901         }
5902         break;
5903     default:
5904         g_assert_not_reached();
5905     }
5906 
5907     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5908 }
5909 
5910 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5911                                   bool load_sign,
5912                                   const TCGLdstHelperParam *parm)
5913 {
5914     MemOp mop = get_memop(ldst->oi);
5915     TCGMovExtend mov[2];
5916     int ofs_slot0;
5917 
5918     switch (ldst->type) {
5919     case TCG_TYPE_I64:
5920         if (TCG_TARGET_REG_BITS == 32) {
5921             break;
5922         }
5923         /* fall through */
5924 
5925     case TCG_TYPE_I32:
5926         mov[0].dst = ldst->datalo_reg;
5927         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5928         mov[0].dst_type = ldst->type;
5929         mov[0].src_type = TCG_TYPE_REG;
5930 
5931         /*
5932          * If load_sign, then we allowed the helper to perform the
5933          * appropriate sign extension to tcg_target_ulong, and all
5934          * we need now is a plain move.
5935          *
5936          * If they do not, then we expect the relevant extension
5937          * instruction to be no more expensive than a move, and
5938          * we thus save the icache etc by only using one of two
5939          * helper functions.
5940          */
5941         if (load_sign || !(mop & MO_SIGN)) {
5942             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5943                 mov[0].src_ext = MO_32;
5944             } else {
5945                 mov[0].src_ext = MO_64;
5946             }
5947         } else {
5948             mov[0].src_ext = mop & MO_SSIZE;
5949         }
5950         tcg_out_movext1(s, mov);
5951         return;
5952 
5953     case TCG_TYPE_I128:
5954         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5955         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5956         switch (TCG_TARGET_CALL_RET_I128) {
5957         case TCG_CALL_RET_NORMAL:
5958             break;
5959         case TCG_CALL_RET_BY_VEC:
5960             tcg_out_st(s, TCG_TYPE_V128,
5961                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5962                        TCG_REG_CALL_STACK, ofs_slot0);
5963             /* fall through */
5964         case TCG_CALL_RET_BY_REF:
5965             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5966                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5967             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5968                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5969             return;
5970         default:
5971             g_assert_not_reached();
5972         }
5973         break;
5974 
5975     default:
5976         g_assert_not_reached();
5977     }
5978 
5979     mov[0].dst = ldst->datalo_reg;
5980     mov[0].src =
5981         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5982     mov[0].dst_type = TCG_TYPE_REG;
5983     mov[0].src_type = TCG_TYPE_REG;
5984     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5985 
5986     mov[1].dst = ldst->datahi_reg;
5987     mov[1].src =
5988         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5989     mov[1].dst_type = TCG_TYPE_REG;
5990     mov[1].src_type = TCG_TYPE_REG;
5991     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5992 
5993     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5994 }
5995 
5996 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5997                                    const TCGLdstHelperParam *parm)
5998 {
5999     const TCGHelperInfo *info;
6000     const TCGCallArgumentLoc *loc;
6001     TCGMovExtend mov[4];
6002     TCGType data_type;
6003     unsigned next_arg, nmov, n;
6004     MemOp mop = get_memop(ldst->oi);
6005 
6006     switch (mop & MO_SIZE) {
6007     case MO_8:
6008     case MO_16:
6009     case MO_32:
6010         info = &info_helper_st32_mmu;
6011         data_type = TCG_TYPE_I32;
6012         break;
6013     case MO_64:
6014         info = &info_helper_st64_mmu;
6015         data_type = TCG_TYPE_I64;
6016         break;
6017     case MO_128:
6018         info = &info_helper_st128_mmu;
6019         data_type = TCG_TYPE_I128;
6020         break;
6021     default:
6022         g_assert_not_reached();
6023     }
6024 
6025     /* Defer env argument. */
6026     next_arg = 1;
6027     nmov = 0;
6028 
6029     /* Handle addr argument. */
6030     loc = &info->in[next_arg];
6031     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6032         /*
6033          * 32-bit host with 32-bit guest: zero-extend the guest address
6034          * to 64-bits for the helper by storing the low part.  Later,
6035          * after we have processed the register inputs, we will load a
6036          * zero for the high part.
6037          */
6038         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6039                                TCG_TYPE_I32, TCG_TYPE_I32,
6040                                ldst->addrlo_reg, -1);
6041         next_arg += 2;
6042         nmov += 1;
6043     } else {
6044         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6045                                    ldst->addrlo_reg, ldst->addrhi_reg);
6046         next_arg += n;
6047         nmov += n;
6048     }
6049 
6050     /* Handle data argument. */
6051     loc = &info->in[next_arg];
6052     switch (loc->kind) {
6053     case TCG_CALL_ARG_NORMAL:
6054     case TCG_CALL_ARG_EXTEND_U:
6055     case TCG_CALL_ARG_EXTEND_S:
6056         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6057                                    ldst->datalo_reg, ldst->datahi_reg);
6058         next_arg += n;
6059         nmov += n;
6060         tcg_out_helper_load_slots(s, nmov, mov, parm);
6061         break;
6062 
6063     case TCG_CALL_ARG_BY_REF:
6064         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6065         tcg_debug_assert(data_type == TCG_TYPE_I128);
6066         tcg_out_st(s, TCG_TYPE_I64,
6067                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6068                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6069         tcg_out_st(s, TCG_TYPE_I64,
6070                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6071                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6072 
6073         tcg_out_helper_load_slots(s, nmov, mov, parm);
6074 
6075         if (arg_slot_reg_p(loc->arg_slot)) {
6076             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6077                              TCG_REG_CALL_STACK,
6078                              arg_slot_stk_ofs(loc->ref_slot));
6079         } else {
6080             tcg_debug_assert(parm->ntmp != 0);
6081             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6082                              arg_slot_stk_ofs(loc->ref_slot));
6083             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6084                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6085         }
6086         next_arg += 2;
6087         break;
6088 
6089     default:
6090         g_assert_not_reached();
6091     }
6092 
6093     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6094         /* Zero extend the address by loading a zero for the high part. */
6095         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6096         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6097     }
6098 
6099     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6100 }
6101 
6102 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6103 {
6104     int i, start_words, num_insns;
6105     TCGOp *op;
6106 
6107     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6108                  && qemu_log_in_addr_range(pc_start))) {
6109         FILE *logfile = qemu_log_trylock();
6110         if (logfile) {
6111             fprintf(logfile, "OP:\n");
6112             tcg_dump_ops(s, logfile, false);
6113             fprintf(logfile, "\n");
6114             qemu_log_unlock(logfile);
6115         }
6116     }
6117 
6118 #ifdef CONFIG_DEBUG_TCG
6119     /* Ensure all labels referenced have been emitted.  */
6120     {
6121         TCGLabel *l;
6122         bool error = false;
6123 
6124         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6125             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6126                 qemu_log_mask(CPU_LOG_TB_OP,
6127                               "$L%d referenced but not present.\n", l->id);
6128                 error = true;
6129             }
6130         }
6131         assert(!error);
6132     }
6133 #endif
6134 
6135     tcg_optimize(s);
6136 
6137     reachable_code_pass(s);
6138     liveness_pass_0(s);
6139     liveness_pass_1(s);
6140 
6141     if (s->nb_indirects > 0) {
6142         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6143                      && qemu_log_in_addr_range(pc_start))) {
6144             FILE *logfile = qemu_log_trylock();
6145             if (logfile) {
6146                 fprintf(logfile, "OP before indirect lowering:\n");
6147                 tcg_dump_ops(s, logfile, false);
6148                 fprintf(logfile, "\n");
6149                 qemu_log_unlock(logfile);
6150             }
6151         }
6152 
6153         /* Replace indirect temps with direct temps.  */
6154         if (liveness_pass_2(s)) {
6155             /* If changes were made, re-run liveness.  */
6156             liveness_pass_1(s);
6157         }
6158     }
6159 
6160     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6161                  && qemu_log_in_addr_range(pc_start))) {
6162         FILE *logfile = qemu_log_trylock();
6163         if (logfile) {
6164             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6165             tcg_dump_ops(s, logfile, true);
6166             fprintf(logfile, "\n");
6167             qemu_log_unlock(logfile);
6168         }
6169     }
6170 
6171     /* Initialize goto_tb jump offsets. */
6172     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6173     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6174     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6175     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6176 
6177     tcg_reg_alloc_start(s);
6178 
6179     /*
6180      * Reset the buffer pointers when restarting after overflow.
6181      * TODO: Move this into translate-all.c with the rest of the
6182      * buffer management.  Having only this done here is confusing.
6183      */
6184     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6185     s->code_ptr = s->code_buf;
6186 
6187 #ifdef TCG_TARGET_NEED_LDST_LABELS
6188     QSIMPLEQ_INIT(&s->ldst_labels);
6189 #endif
6190 #ifdef TCG_TARGET_NEED_POOL_LABELS
6191     s->pool_labels = NULL;
6192 #endif
6193 
6194     start_words = s->insn_start_words;
6195     s->gen_insn_data =
6196         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6197 
6198     tcg_out_tb_start(s);
6199 
6200     num_insns = -1;
6201     QTAILQ_FOREACH(op, &s->ops, link) {
6202         TCGOpcode opc = op->opc;
6203 
6204         switch (opc) {
6205         case INDEX_op_mov_i32:
6206         case INDEX_op_mov_i64:
6207         case INDEX_op_mov_vec:
6208             tcg_reg_alloc_mov(s, op);
6209             break;
6210         case INDEX_op_dup_vec:
6211             tcg_reg_alloc_dup(s, op);
6212             break;
6213         case INDEX_op_insn_start:
6214             if (num_insns >= 0) {
6215                 size_t off = tcg_current_code_size(s);
6216                 s->gen_insn_end_off[num_insns] = off;
6217                 /* Assert that we do not overflow our stored offset.  */
6218                 assert(s->gen_insn_end_off[num_insns] == off);
6219             }
6220             num_insns++;
6221             for (i = 0; i < start_words; ++i) {
6222                 s->gen_insn_data[num_insns * start_words + i] =
6223                     tcg_get_insn_start_param(op, i);
6224             }
6225             break;
6226         case INDEX_op_discard:
6227             temp_dead(s, arg_temp(op->args[0]));
6228             break;
6229         case INDEX_op_set_label:
6230             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6231             tcg_out_label(s, arg_label(op->args[0]));
6232             break;
6233         case INDEX_op_call:
6234             tcg_reg_alloc_call(s, op);
6235             break;
6236         case INDEX_op_exit_tb:
6237             tcg_out_exit_tb(s, op->args[0]);
6238             break;
6239         case INDEX_op_goto_tb:
6240             tcg_out_goto_tb(s, op->args[0]);
6241             break;
6242         case INDEX_op_dup2_vec:
6243             if (tcg_reg_alloc_dup2(s, op)) {
6244                 break;
6245             }
6246             /* fall through */
6247         default:
6248             /* Sanity check that we've not introduced any unhandled opcodes. */
6249             tcg_debug_assert(tcg_op_supported(opc));
6250             /* Note: in order to speed up the code, it would be much
6251                faster to have specialized register allocator functions for
6252                some common argument patterns */
6253             tcg_reg_alloc_op(s, op);
6254             break;
6255         }
6256         /* Test for (pending) buffer overflow.  The assumption is that any
6257            one operation beginning below the high water mark cannot overrun
6258            the buffer completely.  Thus we can test for overflow after
6259            generating code without having to check during generation.  */
6260         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6261             return -1;
6262         }
6263         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6264         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6265             return -2;
6266         }
6267     }
6268     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6269     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6270 
6271     /* Generate TB finalization at the end of block */
6272 #ifdef TCG_TARGET_NEED_LDST_LABELS
6273     i = tcg_out_ldst_finalize(s);
6274     if (i < 0) {
6275         return i;
6276     }
6277 #endif
6278 #ifdef TCG_TARGET_NEED_POOL_LABELS
6279     i = tcg_out_pool_finalize(s);
6280     if (i < 0) {
6281         return i;
6282     }
6283 #endif
6284     if (!tcg_resolve_relocs(s)) {
6285         return -2;
6286     }
6287 
6288 #ifndef CONFIG_TCG_INTERPRETER
6289     /* flush instruction cache */
6290     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6291                         (uintptr_t)s->code_buf,
6292                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6293 #endif
6294 
6295     return tcg_current_code_size(s);
6296 }
6297 
6298 #ifdef ELF_HOST_MACHINE
6299 /* In order to use this feature, the backend needs to do three things:
6300 
6301    (1) Define ELF_HOST_MACHINE to indicate both what value to
6302        put into the ELF image and to indicate support for the feature.
6303 
6304    (2) Define tcg_register_jit.  This should create a buffer containing
6305        the contents of a .debug_frame section that describes the post-
6306        prologue unwind info for the tcg machine.
6307 
6308    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6309 */
6310 
6311 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6312 typedef enum {
6313     JIT_NOACTION = 0,
6314     JIT_REGISTER_FN,
6315     JIT_UNREGISTER_FN
6316 } jit_actions_t;
6317 
6318 struct jit_code_entry {
6319     struct jit_code_entry *next_entry;
6320     struct jit_code_entry *prev_entry;
6321     const void *symfile_addr;
6322     uint64_t symfile_size;
6323 };
6324 
6325 struct jit_descriptor {
6326     uint32_t version;
6327     uint32_t action_flag;
6328     struct jit_code_entry *relevant_entry;
6329     struct jit_code_entry *first_entry;
6330 };
6331 
6332 void __jit_debug_register_code(void) __attribute__((noinline));
6333 void __jit_debug_register_code(void)
6334 {
6335     asm("");
6336 }
6337 
6338 /* Must statically initialize the version, because GDB may check
6339    the version before we can set it.  */
6340 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6341 
6342 /* End GDB interface.  */
6343 
6344 static int find_string(const char *strtab, const char *str)
6345 {
6346     const char *p = strtab + 1;
6347 
6348     while (1) {
6349         if (strcmp(p, str) == 0) {
6350             return p - strtab;
6351         }
6352         p += strlen(p) + 1;
6353     }
6354 }
6355 
6356 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6357                                  const void *debug_frame,
6358                                  size_t debug_frame_size)
6359 {
6360     struct __attribute__((packed)) DebugInfo {
6361         uint32_t  len;
6362         uint16_t  version;
6363         uint32_t  abbrev;
6364         uint8_t   ptr_size;
6365         uint8_t   cu_die;
6366         uint16_t  cu_lang;
6367         uintptr_t cu_low_pc;
6368         uintptr_t cu_high_pc;
6369         uint8_t   fn_die;
6370         char      fn_name[16];
6371         uintptr_t fn_low_pc;
6372         uintptr_t fn_high_pc;
6373         uint8_t   cu_eoc;
6374     };
6375 
6376     struct ElfImage {
6377         ElfW(Ehdr) ehdr;
6378         ElfW(Phdr) phdr;
6379         ElfW(Shdr) shdr[7];
6380         ElfW(Sym)  sym[2];
6381         struct DebugInfo di;
6382         uint8_t    da[24];
6383         char       str[80];
6384     };
6385 
6386     struct ElfImage *img;
6387 
6388     static const struct ElfImage img_template = {
6389         .ehdr = {
6390             .e_ident[EI_MAG0] = ELFMAG0,
6391             .e_ident[EI_MAG1] = ELFMAG1,
6392             .e_ident[EI_MAG2] = ELFMAG2,
6393             .e_ident[EI_MAG3] = ELFMAG3,
6394             .e_ident[EI_CLASS] = ELF_CLASS,
6395             .e_ident[EI_DATA] = ELF_DATA,
6396             .e_ident[EI_VERSION] = EV_CURRENT,
6397             .e_type = ET_EXEC,
6398             .e_machine = ELF_HOST_MACHINE,
6399             .e_version = EV_CURRENT,
6400             .e_phoff = offsetof(struct ElfImage, phdr),
6401             .e_shoff = offsetof(struct ElfImage, shdr),
6402             .e_ehsize = sizeof(ElfW(Shdr)),
6403             .e_phentsize = sizeof(ElfW(Phdr)),
6404             .e_phnum = 1,
6405             .e_shentsize = sizeof(ElfW(Shdr)),
6406             .e_shnum = ARRAY_SIZE(img->shdr),
6407             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6408 #ifdef ELF_HOST_FLAGS
6409             .e_flags = ELF_HOST_FLAGS,
6410 #endif
6411 #ifdef ELF_OSABI
6412             .e_ident[EI_OSABI] = ELF_OSABI,
6413 #endif
6414         },
6415         .phdr = {
6416             .p_type = PT_LOAD,
6417             .p_flags = PF_X,
6418         },
6419         .shdr = {
6420             [0] = { .sh_type = SHT_NULL },
6421             /* Trick: The contents of code_gen_buffer are not present in
6422                this fake ELF file; that got allocated elsewhere.  Therefore
6423                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6424                will not look for contents.  We can record any address.  */
6425             [1] = { /* .text */
6426                 .sh_type = SHT_NOBITS,
6427                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6428             },
6429             [2] = { /* .debug_info */
6430                 .sh_type = SHT_PROGBITS,
6431                 .sh_offset = offsetof(struct ElfImage, di),
6432                 .sh_size = sizeof(struct DebugInfo),
6433             },
6434             [3] = { /* .debug_abbrev */
6435                 .sh_type = SHT_PROGBITS,
6436                 .sh_offset = offsetof(struct ElfImage, da),
6437                 .sh_size = sizeof(img->da),
6438             },
6439             [4] = { /* .debug_frame */
6440                 .sh_type = SHT_PROGBITS,
6441                 .sh_offset = sizeof(struct ElfImage),
6442             },
6443             [5] = { /* .symtab */
6444                 .sh_type = SHT_SYMTAB,
6445                 .sh_offset = offsetof(struct ElfImage, sym),
6446                 .sh_size = sizeof(img->sym),
6447                 .sh_info = 1,
6448                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6449                 .sh_entsize = sizeof(ElfW(Sym)),
6450             },
6451             [6] = { /* .strtab */
6452                 .sh_type = SHT_STRTAB,
6453                 .sh_offset = offsetof(struct ElfImage, str),
6454                 .sh_size = sizeof(img->str),
6455             }
6456         },
6457         .sym = {
6458             [1] = { /* code_gen_buffer */
6459                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6460                 .st_shndx = 1,
6461             }
6462         },
6463         .di = {
6464             .len = sizeof(struct DebugInfo) - 4,
6465             .version = 2,
6466             .ptr_size = sizeof(void *),
6467             .cu_die = 1,
6468             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6469             .fn_die = 2,
6470             .fn_name = "code_gen_buffer"
6471         },
6472         .da = {
6473             1,          /* abbrev number (the cu) */
6474             0x11, 1,    /* DW_TAG_compile_unit, has children */
6475             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6476             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6477             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6478             0, 0,       /* end of abbrev */
6479             2,          /* abbrev number (the fn) */
6480             0x2e, 0,    /* DW_TAG_subprogram, no children */
6481             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6482             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6483             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6484             0, 0,       /* end of abbrev */
6485             0           /* no more abbrev */
6486         },
6487         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6488                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6489     };
6490 
6491     /* We only need a single jit entry; statically allocate it.  */
6492     static struct jit_code_entry one_entry;
6493 
6494     uintptr_t buf = (uintptr_t)buf_ptr;
6495     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6496     DebugFrameHeader *dfh;
6497 
6498     img = g_malloc(img_size);
6499     *img = img_template;
6500 
6501     img->phdr.p_vaddr = buf;
6502     img->phdr.p_paddr = buf;
6503     img->phdr.p_memsz = buf_size;
6504 
6505     img->shdr[1].sh_name = find_string(img->str, ".text");
6506     img->shdr[1].sh_addr = buf;
6507     img->shdr[1].sh_size = buf_size;
6508 
6509     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6510     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6511 
6512     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6513     img->shdr[4].sh_size = debug_frame_size;
6514 
6515     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6516     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6517 
6518     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6519     img->sym[1].st_value = buf;
6520     img->sym[1].st_size = buf_size;
6521 
6522     img->di.cu_low_pc = buf;
6523     img->di.cu_high_pc = buf + buf_size;
6524     img->di.fn_low_pc = buf;
6525     img->di.fn_high_pc = buf + buf_size;
6526 
6527     dfh = (DebugFrameHeader *)(img + 1);
6528     memcpy(dfh, debug_frame, debug_frame_size);
6529     dfh->fde.func_start = buf;
6530     dfh->fde.func_len = buf_size;
6531 
6532 #ifdef DEBUG_JIT
6533     /* Enable this block to be able to debug the ELF image file creation.
6534        One can use readelf, objdump, or other inspection utilities.  */
6535     {
6536         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6537         FILE *f = fopen(jit, "w+b");
6538         if (f) {
6539             if (fwrite(img, img_size, 1, f) != img_size) {
6540                 /* Avoid stupid unused return value warning for fwrite.  */
6541             }
6542             fclose(f);
6543         }
6544     }
6545 #endif
6546 
6547     one_entry.symfile_addr = img;
6548     one_entry.symfile_size = img_size;
6549 
6550     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6551     __jit_debug_descriptor.relevant_entry = &one_entry;
6552     __jit_debug_descriptor.first_entry = &one_entry;
6553     __jit_debug_register_code();
6554 }
6555 #else
6556 /* No support for the feature.  Provide the entry point expected by exec.c,
6557    and implement the internal function we declared earlier.  */
6558 
6559 static void tcg_register_jit_int(const void *buf, size_t size,
6560                                  const void *debug_frame,
6561                                  size_t debug_frame_size)
6562 {
6563 }
6564 
6565 void tcg_register_jit(const void *buf, size_t buf_size)
6566 {
6567 }
6568 #endif /* ELF_HOST_MACHINE */
6569 
6570 #if !TCG_TARGET_MAYBE_vec
6571 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6572 {
6573     g_assert_not_reached();
6574 }
6575 #endif
6576