xref: /openbmc/qemu/tcg/tcg.c (revision ecd6f6a8)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #ifdef CONFIG_USER_ONLY
60 #include "exec/user/guest-base.h"
61 #endif
62 
63 /* Forward declarations for functions declared in tcg-target.c.inc and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68                         intptr_t value, intptr_t addend);
69 
70 /* The CIE and FDE header definitions will be common to all hosts.  */
71 typedef struct {
72     uint32_t len __attribute__((aligned((sizeof(void *)))));
73     uint32_t id;
74     uint8_t version;
75     char augmentation[1];
76     uint8_t code_align;
77     uint8_t data_align;
78     uint8_t return_column;
79 } DebugFrameCIE;
80 
81 typedef struct QEMU_PACKED {
82     uint32_t len __attribute__((aligned((sizeof(void *)))));
83     uint32_t cie_offset;
84     uintptr_t func_start;
85     uintptr_t func_len;
86 } DebugFrameFDEHeader;
87 
88 typedef struct QEMU_PACKED {
89     DebugFrameCIE cie;
90     DebugFrameFDEHeader fde;
91 } DebugFrameHeader;
92 
93 typedef struct TCGLabelQemuLdst {
94     bool is_ld;             /* qemu_ld: true, qemu_st: false */
95     MemOpIdx oi;
96     TCGType type;           /* result type of a load */
97     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104 } TCGLabelQemuLdst;
105 
106 static void tcg_register_jit_int(const void *buf, size_t size,
107                                  const void *debug_frame,
108                                  size_t debug_frame_size)
109     __attribute__((unused));
110 
111 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
112 static void tcg_out_tb_start(TCGContext *s);
113 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114                        intptr_t arg2);
115 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116 static void tcg_out_movi(TCGContext *s, TCGType type,
117                          TCGReg ret, tcg_target_long arg);
118 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130 static void tcg_out_goto_tb(TCGContext *s, int which);
131 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132                        const TCGArg args[TCG_MAX_OP_ARGS],
133                        const int const_args[TCG_MAX_OP_ARGS]);
134 #if TCG_TARGET_MAYBE_vec
135 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136                             TCGReg dst, TCGReg src);
137 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, TCGReg base, intptr_t offset);
139 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140                              TCGReg dst, int64_t arg);
141 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142                            unsigned vecl, unsigned vece,
143                            const TCGArg args[TCG_MAX_OP_ARGS],
144                            const int const_args[TCG_MAX_OP_ARGS]);
145 #else
146 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147                                    TCGReg dst, TCGReg src)
148 {
149     g_assert_not_reached();
150 }
151 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152                                     TCGReg dst, TCGReg base, intptr_t offset)
153 {
154     g_assert_not_reached();
155 }
156 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, int64_t arg)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162                                   unsigned vecl, unsigned vece,
163                                   const TCGArg args[TCG_MAX_OP_ARGS],
164                                   const int const_args[TCG_MAX_OP_ARGS])
165 {
166     g_assert_not_reached();
167 }
168 #endif
169 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170                        intptr_t arg2);
171 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172                         TCGReg base, intptr_t ofs);
173 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174                          const TCGHelperInfo *info);
175 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176 static bool tcg_target_const_match(int64_t val, int ct,
177                                    TCGType type, TCGCond cond, int vece);
178 #ifdef TCG_TARGET_NEED_LDST_LABELS
179 static int tcg_out_ldst_finalize(TCGContext *s);
180 #endif
181 
182 #ifndef CONFIG_USER_ONLY
183 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
184 #endif
185 
186 typedef struct TCGLdstHelperParam {
187     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
188     unsigned ntmp;
189     int tmp[3];
190 } TCGLdstHelperParam;
191 
192 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
193                                    const TCGLdstHelperParam *p)
194     __attribute__((unused));
195 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
196                                   bool load_sign, const TCGLdstHelperParam *p)
197     __attribute__((unused));
198 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 
202 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
203     [MO_UB] = helper_ldub_mmu,
204     [MO_SB] = helper_ldsb_mmu,
205     [MO_UW] = helper_lduw_mmu,
206     [MO_SW] = helper_ldsw_mmu,
207     [MO_UL] = helper_ldul_mmu,
208     [MO_UQ] = helper_ldq_mmu,
209 #if TCG_TARGET_REG_BITS == 64
210     [MO_SL] = helper_ldsl_mmu,
211     [MO_128] = helper_ld16_mmu,
212 #endif
213 };
214 
215 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
216     [MO_8]  = helper_stb_mmu,
217     [MO_16] = helper_stw_mmu,
218     [MO_32] = helper_stl_mmu,
219     [MO_64] = helper_stq_mmu,
220 #if TCG_TARGET_REG_BITS == 64
221     [MO_128] = helper_st16_mmu,
222 #endif
223 };
224 
225 typedef struct {
226     MemOp atom;   /* lg2 bits of atomicity required */
227     MemOp align;  /* lg2 bits of alignment to use */
228 } TCGAtomAlign;
229 
230 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
231                                            MemOp host_atom, bool allow_two_ops)
232     __attribute__((unused));
233 
234 #ifdef CONFIG_USER_ONLY
235 bool tcg_use_softmmu;
236 #endif
237 
238 TCGContext tcg_init_ctx;
239 __thread TCGContext *tcg_ctx;
240 
241 TCGContext **tcg_ctxs;
242 unsigned int tcg_cur_ctxs;
243 unsigned int tcg_max_ctxs;
244 TCGv_env tcg_env;
245 const void *tcg_code_gen_epilogue;
246 uintptr_t tcg_splitwx_diff;
247 
248 #ifndef CONFIG_TCG_INTERPRETER
249 tcg_prologue_fn *tcg_qemu_tb_exec;
250 #endif
251 
252 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
253 static TCGRegSet tcg_target_call_clobber_regs;
254 
255 #if TCG_TARGET_INSN_UNIT_SIZE == 1
256 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
257 {
258     *s->code_ptr++ = v;
259 }
260 
261 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
262                                                       uint8_t v)
263 {
264     *p = v;
265 }
266 #endif
267 
268 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
269 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
270 {
271     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
272         *s->code_ptr++ = v;
273     } else {
274         tcg_insn_unit *p = s->code_ptr;
275         memcpy(p, &v, sizeof(v));
276         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
277     }
278 }
279 
280 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
281                                                        uint16_t v)
282 {
283     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
284         *p = v;
285     } else {
286         memcpy(p, &v, sizeof(v));
287     }
288 }
289 #endif
290 
291 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
292 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
293 {
294     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
295         *s->code_ptr++ = v;
296     } else {
297         tcg_insn_unit *p = s->code_ptr;
298         memcpy(p, &v, sizeof(v));
299         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
300     }
301 }
302 
303 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
304                                                        uint32_t v)
305 {
306     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
307         *p = v;
308     } else {
309         memcpy(p, &v, sizeof(v));
310     }
311 }
312 #endif
313 
314 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
315 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
316 {
317     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
318         *s->code_ptr++ = v;
319     } else {
320         tcg_insn_unit *p = s->code_ptr;
321         memcpy(p, &v, sizeof(v));
322         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
323     }
324 }
325 
326 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
327                                                        uint64_t v)
328 {
329     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
330         *p = v;
331     } else {
332         memcpy(p, &v, sizeof(v));
333     }
334 }
335 #endif
336 
337 /* label relocation processing */
338 
339 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
340                           TCGLabel *l, intptr_t addend)
341 {
342     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
343 
344     r->type = type;
345     r->ptr = code_ptr;
346     r->addend = addend;
347     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
348 }
349 
350 static void tcg_out_label(TCGContext *s, TCGLabel *l)
351 {
352     tcg_debug_assert(!l->has_value);
353     l->has_value = 1;
354     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
355 }
356 
357 TCGLabel *gen_new_label(void)
358 {
359     TCGContext *s = tcg_ctx;
360     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
361 
362     memset(l, 0, sizeof(TCGLabel));
363     l->id = s->nb_labels++;
364     QSIMPLEQ_INIT(&l->branches);
365     QSIMPLEQ_INIT(&l->relocs);
366 
367     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
368 
369     return l;
370 }
371 
372 static bool tcg_resolve_relocs(TCGContext *s)
373 {
374     TCGLabel *l;
375 
376     QSIMPLEQ_FOREACH(l, &s->labels, next) {
377         TCGRelocation *r;
378         uintptr_t value = l->u.value;
379 
380         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
381             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
382                 return false;
383             }
384         }
385     }
386     return true;
387 }
388 
389 static void set_jmp_reset_offset(TCGContext *s, int which)
390 {
391     /*
392      * We will check for overflow at the end of the opcode loop in
393      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
394      */
395     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
396 }
397 
398 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
399 {
400     /*
401      * We will check for overflow at the end of the opcode loop in
402      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
403      */
404     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
405 }
406 
407 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
408 {
409     /*
410      * Return the read-execute version of the pointer, for the benefit
411      * of any pc-relative addressing mode.
412      */
413     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
414 }
415 
416 static int __attribute__((unused))
417 tlb_mask_table_ofs(TCGContext *s, int which)
418 {
419     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
420             sizeof(CPUNegativeOffsetState));
421 }
422 
423 /* Signal overflow, starting over with fewer guest insns. */
424 static G_NORETURN
425 void tcg_raise_tb_overflow(TCGContext *s)
426 {
427     siglongjmp(s->jmp_trans, -2);
428 }
429 
430 /*
431  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
432  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
433  *
434  * However, tcg_out_helper_load_slots reuses this field to hold an
435  * argument slot number (which may designate a argument register or an
436  * argument stack slot), converting to TCGReg once all arguments that
437  * are destined for the stack are processed.
438  */
439 typedef struct TCGMovExtend {
440     unsigned dst;
441     TCGReg src;
442     TCGType dst_type;
443     TCGType src_type;
444     MemOp src_ext;
445 } TCGMovExtend;
446 
447 /**
448  * tcg_out_movext -- move and extend
449  * @s: tcg context
450  * @dst_type: integral type for destination
451  * @dst: destination register
452  * @src_type: integral type for source
453  * @src_ext: extension to apply to source
454  * @src: source register
455  *
456  * Move or extend @src into @dst, depending on @src_ext and the types.
457  */
458 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
459                            TCGType src_type, MemOp src_ext, TCGReg src)
460 {
461     switch (src_ext) {
462     case MO_UB:
463         tcg_out_ext8u(s, dst, src);
464         break;
465     case MO_SB:
466         tcg_out_ext8s(s, dst_type, dst, src);
467         break;
468     case MO_UW:
469         tcg_out_ext16u(s, dst, src);
470         break;
471     case MO_SW:
472         tcg_out_ext16s(s, dst_type, dst, src);
473         break;
474     case MO_UL:
475     case MO_SL:
476         if (dst_type == TCG_TYPE_I32) {
477             if (src_type == TCG_TYPE_I32) {
478                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
479             } else {
480                 tcg_out_extrl_i64_i32(s, dst, src);
481             }
482         } else if (src_type == TCG_TYPE_I32) {
483             if (src_ext & MO_SIGN) {
484                 tcg_out_exts_i32_i64(s, dst, src);
485             } else {
486                 tcg_out_extu_i32_i64(s, dst, src);
487             }
488         } else {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_ext32s(s, dst, src);
491             } else {
492                 tcg_out_ext32u(s, dst, src);
493             }
494         }
495         break;
496     case MO_UQ:
497         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
498         if (dst_type == TCG_TYPE_I32) {
499             tcg_out_extrl_i64_i32(s, dst, src);
500         } else {
501             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
502         }
503         break;
504     default:
505         g_assert_not_reached();
506     }
507 }
508 
509 /* Minor variations on a theme, using a structure. */
510 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
511                                     TCGReg src)
512 {
513     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
514 }
515 
516 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
517 {
518     tcg_out_movext1_new_src(s, i, i->src);
519 }
520 
521 /**
522  * tcg_out_movext2 -- move and extend two pair
523  * @s: tcg context
524  * @i1: first move description
525  * @i2: second move description
526  * @scratch: temporary register, or -1 for none
527  *
528  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
529  * between the sources and destinations.
530  */
531 
532 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
533                             const TCGMovExtend *i2, int scratch)
534 {
535     TCGReg src1 = i1->src;
536     TCGReg src2 = i2->src;
537 
538     if (i1->dst != src2) {
539         tcg_out_movext1(s, i1);
540         tcg_out_movext1(s, i2);
541         return;
542     }
543     if (i2->dst == src1) {
544         TCGType src1_type = i1->src_type;
545         TCGType src2_type = i2->src_type;
546 
547         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
548             /* The data is now in the correct registers, now extend. */
549             src1 = i2->src;
550             src2 = i1->src;
551         } else {
552             tcg_debug_assert(scratch >= 0);
553             tcg_out_mov(s, src1_type, scratch, src1);
554             src1 = scratch;
555         }
556     }
557     tcg_out_movext1_new_src(s, i2, src2);
558     tcg_out_movext1_new_src(s, i1, src1);
559 }
560 
561 /**
562  * tcg_out_movext3 -- move and extend three pair
563  * @s: tcg context
564  * @i1: first move description
565  * @i2: second move description
566  * @i3: third move description
567  * @scratch: temporary register, or -1 for none
568  *
569  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
570  * between the sources and destinations.
571  */
572 
573 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
574                             const TCGMovExtend *i2, const TCGMovExtend *i3,
575                             int scratch)
576 {
577     TCGReg src1 = i1->src;
578     TCGReg src2 = i2->src;
579     TCGReg src3 = i3->src;
580 
581     if (i1->dst != src2 && i1->dst != src3) {
582         tcg_out_movext1(s, i1);
583         tcg_out_movext2(s, i2, i3, scratch);
584         return;
585     }
586     if (i2->dst != src1 && i2->dst != src3) {
587         tcg_out_movext1(s, i2);
588         tcg_out_movext2(s, i1, i3, scratch);
589         return;
590     }
591     if (i3->dst != src1 && i3->dst != src2) {
592         tcg_out_movext1(s, i3);
593         tcg_out_movext2(s, i1, i2, scratch);
594         return;
595     }
596 
597     /*
598      * There is a cycle.  Since there are only 3 nodes, the cycle is
599      * either "clockwise" or "anti-clockwise", and can be solved with
600      * a single scratch or two xchg.
601      */
602     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
603         /* "Clockwise" */
604         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
605             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
606             /* The data is now in the correct registers, now extend. */
607             tcg_out_movext1_new_src(s, i1, i1->dst);
608             tcg_out_movext1_new_src(s, i2, i2->dst);
609             tcg_out_movext1_new_src(s, i3, i3->dst);
610         } else {
611             tcg_debug_assert(scratch >= 0);
612             tcg_out_mov(s, i1->src_type, scratch, src1);
613             tcg_out_movext1(s, i3);
614             tcg_out_movext1(s, i2);
615             tcg_out_movext1_new_src(s, i1, scratch);
616         }
617     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
618         /* "Anti-clockwise" */
619         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
620             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
621             /* The data is now in the correct registers, now extend. */
622             tcg_out_movext1_new_src(s, i1, i1->dst);
623             tcg_out_movext1_new_src(s, i2, i2->dst);
624             tcg_out_movext1_new_src(s, i3, i3->dst);
625         } else {
626             tcg_debug_assert(scratch >= 0);
627             tcg_out_mov(s, i1->src_type, scratch, src1);
628             tcg_out_movext1(s, i2);
629             tcg_out_movext1(s, i3);
630             tcg_out_movext1_new_src(s, i1, scratch);
631         }
632     } else {
633         g_assert_not_reached();
634     }
635 }
636 
637 #define C_PFX1(P, A)                    P##A
638 #define C_PFX2(P, A, B)                 P##A##_##B
639 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
640 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
641 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
642 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
643 
644 /* Define an enumeration for the various combinations. */
645 
646 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
647 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
648 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
649 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
650 
651 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
652 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
653 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
654 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
655 
656 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
657 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
658 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
659 
660 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
661 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
662 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
663 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
664 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
665 
666 typedef enum {
667 #include "tcg-target-con-set.h"
668 } TCGConstraintSetIndex;
669 
670 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
671 
672 #undef C_O0_I1
673 #undef C_O0_I2
674 #undef C_O0_I3
675 #undef C_O0_I4
676 #undef C_O1_I1
677 #undef C_O1_I2
678 #undef C_O1_I3
679 #undef C_O1_I4
680 #undef C_N1_I2
681 #undef C_N1O1_I1
682 #undef C_N2_I1
683 #undef C_O2_I1
684 #undef C_O2_I2
685 #undef C_O2_I3
686 #undef C_O2_I4
687 #undef C_N1_O1_I4
688 
689 /* Put all of the constraint sets into an array, indexed by the enum. */
690 
691 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
692 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
693 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
694 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
695 
696 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
697 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
698 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
699 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
700 
701 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
702 #define C_N1O1_I1(O1, O2, I1)           { .args_ct_str = { "&" #O1, #O2, #I1 } },
703 #define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
704 
705 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
706 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
707 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
708 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
709 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
710 
711 static const TCGTargetOpDef constraint_sets[] = {
712 #include "tcg-target-con-set.h"
713 };
714 
715 
716 #undef C_O0_I1
717 #undef C_O0_I2
718 #undef C_O0_I3
719 #undef C_O0_I4
720 #undef C_O1_I1
721 #undef C_O1_I2
722 #undef C_O1_I3
723 #undef C_O1_I4
724 #undef C_N1_I2
725 #undef C_N1O1_I1
726 #undef C_N2_I1
727 #undef C_O2_I1
728 #undef C_O2_I2
729 #undef C_O2_I3
730 #undef C_O2_I4
731 #undef C_N1_O1_I4
732 
733 /* Expand the enumerator to be returned from tcg_target_op_def(). */
734 
735 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
736 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
737 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
738 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
739 
740 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
741 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
742 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
743 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
744 
745 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
746 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
747 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
748 
749 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
750 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
751 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
752 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
753 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
754 
755 #include "tcg-target.c.inc"
756 
757 #ifndef CONFIG_TCG_INTERPRETER
758 /* Validate CPUTLBDescFast placement. */
759 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
760                         sizeof(CPUNegativeOffsetState))
761                   < MIN_TLB_MASK_TABLE_OFS);
762 #endif
763 
764 static void alloc_tcg_plugin_context(TCGContext *s)
765 {
766 #ifdef CONFIG_PLUGIN
767     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
768     s->plugin_tb->insns =
769         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
770 #endif
771 }
772 
773 /*
774  * All TCG threads except the parent (i.e. the one that called tcg_context_init
775  * and registered the target's TCG globals) must register with this function
776  * before initiating translation.
777  *
778  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
779  * of tcg_region_init() for the reasoning behind this.
780  *
781  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
782  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
783  * is not used anymore for translation once this function is called.
784  *
785  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
786  * iterates over the array (e.g. tcg_code_size() the same for both system/user
787  * modes.
788  */
789 #ifdef CONFIG_USER_ONLY
790 void tcg_register_thread(void)
791 {
792     tcg_ctx = &tcg_init_ctx;
793 }
794 #else
795 void tcg_register_thread(void)
796 {
797     TCGContext *s = g_malloc(sizeof(*s));
798     unsigned int i, n;
799 
800     *s = tcg_init_ctx;
801 
802     /* Relink mem_base.  */
803     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
804         if (tcg_init_ctx.temps[i].mem_base) {
805             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
806             tcg_debug_assert(b >= 0 && b < n);
807             s->temps[i].mem_base = &s->temps[b];
808         }
809     }
810 
811     /* Claim an entry in tcg_ctxs */
812     n = qatomic_fetch_inc(&tcg_cur_ctxs);
813     g_assert(n < tcg_max_ctxs);
814     qatomic_set(&tcg_ctxs[n], s);
815 
816     if (n > 0) {
817         alloc_tcg_plugin_context(s);
818         tcg_region_initial_alloc(s);
819     }
820 
821     tcg_ctx = s;
822 }
823 #endif /* !CONFIG_USER_ONLY */
824 
825 /* pool based memory allocation */
826 void *tcg_malloc_internal(TCGContext *s, int size)
827 {
828     TCGPool *p;
829     int pool_size;
830 
831     if (size > TCG_POOL_CHUNK_SIZE) {
832         /* big malloc: insert a new pool (XXX: could optimize) */
833         p = g_malloc(sizeof(TCGPool) + size);
834         p->size = size;
835         p->next = s->pool_first_large;
836         s->pool_first_large = p;
837         return p->data;
838     } else {
839         p = s->pool_current;
840         if (!p) {
841             p = s->pool_first;
842             if (!p)
843                 goto new_pool;
844         } else {
845             if (!p->next) {
846             new_pool:
847                 pool_size = TCG_POOL_CHUNK_SIZE;
848                 p = g_malloc(sizeof(TCGPool) + pool_size);
849                 p->size = pool_size;
850                 p->next = NULL;
851                 if (s->pool_current) {
852                     s->pool_current->next = p;
853                 } else {
854                     s->pool_first = p;
855                 }
856             } else {
857                 p = p->next;
858             }
859         }
860     }
861     s->pool_current = p;
862     s->pool_cur = p->data + size;
863     s->pool_end = p->data + p->size;
864     return p->data;
865 }
866 
867 void tcg_pool_reset(TCGContext *s)
868 {
869     TCGPool *p, *t;
870     for (p = s->pool_first_large; p; p = t) {
871         t = p->next;
872         g_free(p);
873     }
874     s->pool_first_large = NULL;
875     s->pool_cur = s->pool_end = NULL;
876     s->pool_current = NULL;
877 }
878 
879 /*
880  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
881  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
882  * We only use these for layout in tcg_out_ld_helper_ret and
883  * tcg_out_st_helper_args, and share them between several of
884  * the helpers, with the end result that it's easier to build manually.
885  */
886 
887 #if TCG_TARGET_REG_BITS == 32
888 # define dh_typecode_ttl  dh_typecode_i32
889 #else
890 # define dh_typecode_ttl  dh_typecode_i64
891 #endif
892 
893 static TCGHelperInfo info_helper_ld32_mmu = {
894     .flags = TCG_CALL_NO_WG,
895     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
896               | dh_typemask(env, 1)
897               | dh_typemask(i64, 2)  /* uint64_t addr */
898               | dh_typemask(i32, 3)  /* unsigned oi */
899               | dh_typemask(ptr, 4)  /* uintptr_t ra */
900 };
901 
902 static TCGHelperInfo info_helper_ld64_mmu = {
903     .flags = TCG_CALL_NO_WG,
904     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
905               | dh_typemask(env, 1)
906               | dh_typemask(i64, 2)  /* uint64_t addr */
907               | dh_typemask(i32, 3)  /* unsigned oi */
908               | dh_typemask(ptr, 4)  /* uintptr_t ra */
909 };
910 
911 static TCGHelperInfo info_helper_ld128_mmu = {
912     .flags = TCG_CALL_NO_WG,
913     .typemask = dh_typemask(i128, 0) /* return Int128 */
914               | dh_typemask(env, 1)
915               | dh_typemask(i64, 2)  /* uint64_t addr */
916               | dh_typemask(i32, 3)  /* unsigned oi */
917               | dh_typemask(ptr, 4)  /* uintptr_t ra */
918 };
919 
920 static TCGHelperInfo info_helper_st32_mmu = {
921     .flags = TCG_CALL_NO_WG,
922     .typemask = dh_typemask(void, 0)
923               | dh_typemask(env, 1)
924               | dh_typemask(i64, 2)  /* uint64_t addr */
925               | dh_typemask(i32, 3)  /* uint32_t data */
926               | dh_typemask(i32, 4)  /* unsigned oi */
927               | dh_typemask(ptr, 5)  /* uintptr_t ra */
928 };
929 
930 static TCGHelperInfo info_helper_st64_mmu = {
931     .flags = TCG_CALL_NO_WG,
932     .typemask = dh_typemask(void, 0)
933               | dh_typemask(env, 1)
934               | dh_typemask(i64, 2)  /* uint64_t addr */
935               | dh_typemask(i64, 3)  /* uint64_t data */
936               | dh_typemask(i32, 4)  /* unsigned oi */
937               | dh_typemask(ptr, 5)  /* uintptr_t ra */
938 };
939 
940 static TCGHelperInfo info_helper_st128_mmu = {
941     .flags = TCG_CALL_NO_WG,
942     .typemask = dh_typemask(void, 0)
943               | dh_typemask(env, 1)
944               | dh_typemask(i64, 2)  /* uint64_t addr */
945               | dh_typemask(i128, 3) /* Int128 data */
946               | dh_typemask(i32, 4)  /* unsigned oi */
947               | dh_typemask(ptr, 5)  /* uintptr_t ra */
948 };
949 
950 #ifdef CONFIG_TCG_INTERPRETER
951 static ffi_type *typecode_to_ffi(int argmask)
952 {
953     /*
954      * libffi does not support __int128_t, so we have forced Int128
955      * to use the structure definition instead of the builtin type.
956      */
957     static ffi_type *ffi_type_i128_elements[3] = {
958         &ffi_type_uint64,
959         &ffi_type_uint64,
960         NULL
961     };
962     static ffi_type ffi_type_i128 = {
963         .size = 16,
964         .alignment = __alignof__(Int128),
965         .type = FFI_TYPE_STRUCT,
966         .elements = ffi_type_i128_elements,
967     };
968 
969     switch (argmask) {
970     case dh_typecode_void:
971         return &ffi_type_void;
972     case dh_typecode_i32:
973         return &ffi_type_uint32;
974     case dh_typecode_s32:
975         return &ffi_type_sint32;
976     case dh_typecode_i64:
977         return &ffi_type_uint64;
978     case dh_typecode_s64:
979         return &ffi_type_sint64;
980     case dh_typecode_ptr:
981         return &ffi_type_pointer;
982     case dh_typecode_i128:
983         return &ffi_type_i128;
984     }
985     g_assert_not_reached();
986 }
987 
988 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
989 {
990     unsigned typemask = info->typemask;
991     struct {
992         ffi_cif cif;
993         ffi_type *args[];
994     } *ca;
995     ffi_status status;
996     int nargs;
997 
998     /* Ignoring the return type, find the last non-zero field. */
999     nargs = 32 - clz32(typemask >> 3);
1000     nargs = DIV_ROUND_UP(nargs, 3);
1001     assert(nargs <= MAX_CALL_IARGS);
1002 
1003     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1004     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1005     ca->cif.nargs = nargs;
1006 
1007     if (nargs != 0) {
1008         ca->cif.arg_types = ca->args;
1009         for (int j = 0; j < nargs; ++j) {
1010             int typecode = extract32(typemask, (j + 1) * 3, 3);
1011             ca->args[j] = typecode_to_ffi(typecode);
1012         }
1013     }
1014 
1015     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1016                           ca->cif.rtype, ca->cif.arg_types);
1017     assert(status == FFI_OK);
1018 
1019     return &ca->cif;
1020 }
1021 
1022 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1023 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1024 #else
1025 #define HELPER_INFO_INIT(I)      (&(I)->init)
1026 #define HELPER_INFO_INIT_VAL(I)  1
1027 #endif /* CONFIG_TCG_INTERPRETER */
1028 
1029 static inline bool arg_slot_reg_p(unsigned arg_slot)
1030 {
1031     /*
1032      * Split the sizeof away from the comparison to avoid Werror from
1033      * "unsigned < 0 is always false", when iarg_regs is empty.
1034      */
1035     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1036     return arg_slot < nreg;
1037 }
1038 
1039 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1040 {
1041     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1042     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1043 
1044     tcg_debug_assert(stk_slot < max);
1045     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1046 }
1047 
1048 typedef struct TCGCumulativeArgs {
1049     int arg_idx;                /* tcg_gen_callN args[] */
1050     int info_in_idx;            /* TCGHelperInfo in[] */
1051     int arg_slot;               /* regs+stack slot */
1052     int ref_slot;               /* stack slots for references */
1053 } TCGCumulativeArgs;
1054 
1055 static void layout_arg_even(TCGCumulativeArgs *cum)
1056 {
1057     cum->arg_slot += cum->arg_slot & 1;
1058 }
1059 
1060 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1061                          TCGCallArgumentKind kind)
1062 {
1063     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1064 
1065     *loc = (TCGCallArgumentLoc){
1066         .kind = kind,
1067         .arg_idx = cum->arg_idx,
1068         .arg_slot = cum->arg_slot,
1069     };
1070     cum->info_in_idx++;
1071     cum->arg_slot++;
1072 }
1073 
1074 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1075                                 TCGHelperInfo *info, int n)
1076 {
1077     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1078 
1079     for (int i = 0; i < n; ++i) {
1080         /* Layout all using the same arg_idx, adjusting the subindex. */
1081         loc[i] = (TCGCallArgumentLoc){
1082             .kind = TCG_CALL_ARG_NORMAL,
1083             .arg_idx = cum->arg_idx,
1084             .tmp_subindex = i,
1085             .arg_slot = cum->arg_slot + i,
1086         };
1087     }
1088     cum->info_in_idx += n;
1089     cum->arg_slot += n;
1090 }
1091 
1092 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1093 {
1094     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1095     int n = 128 / TCG_TARGET_REG_BITS;
1096 
1097     /* The first subindex carries the pointer. */
1098     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1099 
1100     /*
1101      * The callee is allowed to clobber memory associated with
1102      * structure pass by-reference.  Therefore we must make copies.
1103      * Allocate space from "ref_slot", which will be adjusted to
1104      * follow the parameters on the stack.
1105      */
1106     loc[0].ref_slot = cum->ref_slot;
1107 
1108     /*
1109      * Subsequent words also go into the reference slot, but
1110      * do not accumulate into the regular arguments.
1111      */
1112     for (int i = 1; i < n; ++i) {
1113         loc[i] = (TCGCallArgumentLoc){
1114             .kind = TCG_CALL_ARG_BY_REF_N,
1115             .arg_idx = cum->arg_idx,
1116             .tmp_subindex = i,
1117             .ref_slot = cum->ref_slot + i,
1118         };
1119     }
1120     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1121     cum->ref_slot += n;
1122 }
1123 
1124 static void init_call_layout(TCGHelperInfo *info)
1125 {
1126     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1127     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1128     unsigned typemask = info->typemask;
1129     unsigned typecode;
1130     TCGCumulativeArgs cum = { };
1131 
1132     /*
1133      * Parse and place any function return value.
1134      */
1135     typecode = typemask & 7;
1136     switch (typecode) {
1137     case dh_typecode_void:
1138         info->nr_out = 0;
1139         break;
1140     case dh_typecode_i32:
1141     case dh_typecode_s32:
1142     case dh_typecode_ptr:
1143         info->nr_out = 1;
1144         info->out_kind = TCG_CALL_RET_NORMAL;
1145         break;
1146     case dh_typecode_i64:
1147     case dh_typecode_s64:
1148         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1149         info->out_kind = TCG_CALL_RET_NORMAL;
1150         /* Query the last register now to trigger any assert early. */
1151         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1152         break;
1153     case dh_typecode_i128:
1154         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1155         info->out_kind = TCG_TARGET_CALL_RET_I128;
1156         switch (TCG_TARGET_CALL_RET_I128) {
1157         case TCG_CALL_RET_NORMAL:
1158             /* Query the last register now to trigger any assert early. */
1159             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1160             break;
1161         case TCG_CALL_RET_BY_VEC:
1162             /* Query the single register now to trigger any assert early. */
1163             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1164             break;
1165         case TCG_CALL_RET_BY_REF:
1166             /*
1167              * Allocate the first argument to the output.
1168              * We don't need to store this anywhere, just make it
1169              * unavailable for use in the input loop below.
1170              */
1171             cum.arg_slot = 1;
1172             break;
1173         default:
1174             qemu_build_not_reached();
1175         }
1176         break;
1177     default:
1178         g_assert_not_reached();
1179     }
1180 
1181     /*
1182      * Parse and place function arguments.
1183      */
1184     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1185         TCGCallArgumentKind kind;
1186         TCGType type;
1187 
1188         typecode = typemask & 7;
1189         switch (typecode) {
1190         case dh_typecode_i32:
1191         case dh_typecode_s32:
1192             type = TCG_TYPE_I32;
1193             break;
1194         case dh_typecode_i64:
1195         case dh_typecode_s64:
1196             type = TCG_TYPE_I64;
1197             break;
1198         case dh_typecode_ptr:
1199             type = TCG_TYPE_PTR;
1200             break;
1201         case dh_typecode_i128:
1202             type = TCG_TYPE_I128;
1203             break;
1204         default:
1205             g_assert_not_reached();
1206         }
1207 
1208         switch (type) {
1209         case TCG_TYPE_I32:
1210             switch (TCG_TARGET_CALL_ARG_I32) {
1211             case TCG_CALL_ARG_EVEN:
1212                 layout_arg_even(&cum);
1213                 /* fall through */
1214             case TCG_CALL_ARG_NORMAL:
1215                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1216                 break;
1217             case TCG_CALL_ARG_EXTEND:
1218                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1219                 layout_arg_1(&cum, info, kind);
1220                 break;
1221             default:
1222                 qemu_build_not_reached();
1223             }
1224             break;
1225 
1226         case TCG_TYPE_I64:
1227             switch (TCG_TARGET_CALL_ARG_I64) {
1228             case TCG_CALL_ARG_EVEN:
1229                 layout_arg_even(&cum);
1230                 /* fall through */
1231             case TCG_CALL_ARG_NORMAL:
1232                 if (TCG_TARGET_REG_BITS == 32) {
1233                     layout_arg_normal_n(&cum, info, 2);
1234                 } else {
1235                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1236                 }
1237                 break;
1238             default:
1239                 qemu_build_not_reached();
1240             }
1241             break;
1242 
1243         case TCG_TYPE_I128:
1244             switch (TCG_TARGET_CALL_ARG_I128) {
1245             case TCG_CALL_ARG_EVEN:
1246                 layout_arg_even(&cum);
1247                 /* fall through */
1248             case TCG_CALL_ARG_NORMAL:
1249                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1250                 break;
1251             case TCG_CALL_ARG_BY_REF:
1252                 layout_arg_by_ref(&cum, info);
1253                 break;
1254             default:
1255                 qemu_build_not_reached();
1256             }
1257             break;
1258 
1259         default:
1260             g_assert_not_reached();
1261         }
1262     }
1263     info->nr_in = cum.info_in_idx;
1264 
1265     /* Validate that we didn't overrun the input array. */
1266     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1267     /* Validate the backend has enough argument space. */
1268     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1269 
1270     /*
1271      * Relocate the "ref_slot" area to the end of the parameters.
1272      * Minimizing this stack offset helps code size for x86,
1273      * which has a signed 8-bit offset encoding.
1274      */
1275     if (cum.ref_slot != 0) {
1276         int ref_base = 0;
1277 
1278         if (cum.arg_slot > max_reg_slots) {
1279             int align = __alignof(Int128) / sizeof(tcg_target_long);
1280 
1281             ref_base = cum.arg_slot - max_reg_slots;
1282             if (align > 1) {
1283                 ref_base = ROUND_UP(ref_base, align);
1284             }
1285         }
1286         assert(ref_base + cum.ref_slot <= max_stk_slots);
1287         ref_base += max_reg_slots;
1288 
1289         if (ref_base != 0) {
1290             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1291                 TCGCallArgumentLoc *loc = &info->in[i];
1292                 switch (loc->kind) {
1293                 case TCG_CALL_ARG_BY_REF:
1294                 case TCG_CALL_ARG_BY_REF_N:
1295                     loc->ref_slot += ref_base;
1296                     break;
1297                 default:
1298                     break;
1299                 }
1300             }
1301         }
1302     }
1303 }
1304 
1305 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1306 static void process_op_defs(TCGContext *s);
1307 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1308                                             TCGReg reg, const char *name);
1309 
1310 static void tcg_context_init(unsigned max_cpus)
1311 {
1312     TCGContext *s = &tcg_init_ctx;
1313     int op, total_args, n, i;
1314     TCGOpDef *def;
1315     TCGArgConstraint *args_ct;
1316     TCGTemp *ts;
1317 
1318     memset(s, 0, sizeof(*s));
1319     s->nb_globals = 0;
1320 
1321     /* Count total number of arguments and allocate the corresponding
1322        space */
1323     total_args = 0;
1324     for(op = 0; op < NB_OPS; op++) {
1325         def = &tcg_op_defs[op];
1326         n = def->nb_iargs + def->nb_oargs;
1327         total_args += n;
1328     }
1329 
1330     args_ct = g_new0(TCGArgConstraint, total_args);
1331 
1332     for(op = 0; op < NB_OPS; op++) {
1333         def = &tcg_op_defs[op];
1334         def->args_ct = args_ct;
1335         n = def->nb_iargs + def->nb_oargs;
1336         args_ct += n;
1337     }
1338 
1339     init_call_layout(&info_helper_ld32_mmu);
1340     init_call_layout(&info_helper_ld64_mmu);
1341     init_call_layout(&info_helper_ld128_mmu);
1342     init_call_layout(&info_helper_st32_mmu);
1343     init_call_layout(&info_helper_st64_mmu);
1344     init_call_layout(&info_helper_st128_mmu);
1345 
1346     tcg_target_init(s);
1347     process_op_defs(s);
1348 
1349     /* Reverse the order of the saved registers, assuming they're all at
1350        the start of tcg_target_reg_alloc_order.  */
1351     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1352         int r = tcg_target_reg_alloc_order[n];
1353         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1354             break;
1355         }
1356     }
1357     for (i = 0; i < n; ++i) {
1358         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1359     }
1360     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1361         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1362     }
1363 
1364     alloc_tcg_plugin_context(s);
1365 
1366     tcg_ctx = s;
1367     /*
1368      * In user-mode we simply share the init context among threads, since we
1369      * use a single region. See the documentation tcg_region_init() for the
1370      * reasoning behind this.
1371      * In system-mode we will have at most max_cpus TCG threads.
1372      */
1373 #ifdef CONFIG_USER_ONLY
1374     tcg_ctxs = &tcg_ctx;
1375     tcg_cur_ctxs = 1;
1376     tcg_max_ctxs = 1;
1377 #else
1378     tcg_max_ctxs = max_cpus;
1379     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1380 #endif
1381 
1382     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1383     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1384     tcg_env = temp_tcgv_ptr(ts);
1385 }
1386 
1387 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1388 {
1389     tcg_context_init(max_cpus);
1390     tcg_region_init(tb_size, splitwx, max_cpus);
1391 }
1392 
1393 /*
1394  * Allocate TBs right before their corresponding translated code, making
1395  * sure that TBs and code are on different cache lines.
1396  */
1397 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1398 {
1399     uintptr_t align = qemu_icache_linesize;
1400     TranslationBlock *tb;
1401     void *next;
1402 
1403  retry:
1404     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1405     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1406 
1407     if (unlikely(next > s->code_gen_highwater)) {
1408         if (tcg_region_alloc(s)) {
1409             return NULL;
1410         }
1411         goto retry;
1412     }
1413     qatomic_set(&s->code_gen_ptr, next);
1414     s->data_gen_ptr = NULL;
1415     return tb;
1416 }
1417 
1418 void tcg_prologue_init(void)
1419 {
1420     TCGContext *s = tcg_ctx;
1421     size_t prologue_size;
1422 
1423     s->code_ptr = s->code_gen_ptr;
1424     s->code_buf = s->code_gen_ptr;
1425     s->data_gen_ptr = NULL;
1426 
1427 #ifndef CONFIG_TCG_INTERPRETER
1428     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1429 #endif
1430 
1431 #ifdef TCG_TARGET_NEED_POOL_LABELS
1432     s->pool_labels = NULL;
1433 #endif
1434 
1435     qemu_thread_jit_write();
1436     /* Generate the prologue.  */
1437     tcg_target_qemu_prologue(s);
1438 
1439 #ifdef TCG_TARGET_NEED_POOL_LABELS
1440     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1441     {
1442         int result = tcg_out_pool_finalize(s);
1443         tcg_debug_assert(result == 0);
1444     }
1445 #endif
1446 
1447     prologue_size = tcg_current_code_size(s);
1448     perf_report_prologue(s->code_gen_ptr, prologue_size);
1449 
1450 #ifndef CONFIG_TCG_INTERPRETER
1451     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1452                         (uintptr_t)s->code_buf, prologue_size);
1453 #endif
1454 
1455     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1456         FILE *logfile = qemu_log_trylock();
1457         if (logfile) {
1458             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1459             if (s->data_gen_ptr) {
1460                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1461                 size_t data_size = prologue_size - code_size;
1462                 size_t i;
1463 
1464                 disas(logfile, s->code_gen_ptr, code_size);
1465 
1466                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1467                     if (sizeof(tcg_target_ulong) == 8) {
1468                         fprintf(logfile,
1469                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1470                                 (uintptr_t)s->data_gen_ptr + i,
1471                                 *(uint64_t *)(s->data_gen_ptr + i));
1472                     } else {
1473                         fprintf(logfile,
1474                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1475                                 (uintptr_t)s->data_gen_ptr + i,
1476                                 *(uint32_t *)(s->data_gen_ptr + i));
1477                     }
1478                 }
1479             } else {
1480                 disas(logfile, s->code_gen_ptr, prologue_size);
1481             }
1482             fprintf(logfile, "\n");
1483             qemu_log_unlock(logfile);
1484         }
1485     }
1486 
1487 #ifndef CONFIG_TCG_INTERPRETER
1488     /*
1489      * Assert that goto_ptr is implemented completely, setting an epilogue.
1490      * For tci, we use NULL as the signal to return from the interpreter,
1491      * so skip this check.
1492      */
1493     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1494 #endif
1495 
1496     tcg_region_prologue_set(s);
1497 }
1498 
1499 void tcg_func_start(TCGContext *s)
1500 {
1501     tcg_pool_reset(s);
1502     s->nb_temps = s->nb_globals;
1503 
1504     /* No temps have been previously allocated for size or locality.  */
1505     memset(s->free_temps, 0, sizeof(s->free_temps));
1506 
1507     /* No constant temps have been previously allocated. */
1508     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1509         if (s->const_table[i]) {
1510             g_hash_table_remove_all(s->const_table[i]);
1511         }
1512     }
1513 
1514     s->nb_ops = 0;
1515     s->nb_labels = 0;
1516     s->current_frame_offset = s->frame_start;
1517 
1518 #ifdef CONFIG_DEBUG_TCG
1519     s->goto_tb_issue_mask = 0;
1520 #endif
1521 
1522     QTAILQ_INIT(&s->ops);
1523     QTAILQ_INIT(&s->free_ops);
1524     QSIMPLEQ_INIT(&s->labels);
1525 
1526     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1527                      s->addr_type == TCG_TYPE_I64);
1528 
1529     tcg_debug_assert(s->insn_start_words > 0);
1530 }
1531 
1532 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1533 {
1534     int n = s->nb_temps++;
1535 
1536     if (n >= TCG_MAX_TEMPS) {
1537         tcg_raise_tb_overflow(s);
1538     }
1539     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1540 }
1541 
1542 static TCGTemp *tcg_global_alloc(TCGContext *s)
1543 {
1544     TCGTemp *ts;
1545 
1546     tcg_debug_assert(s->nb_globals == s->nb_temps);
1547     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1548     s->nb_globals++;
1549     ts = tcg_temp_alloc(s);
1550     ts->kind = TEMP_GLOBAL;
1551 
1552     return ts;
1553 }
1554 
1555 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1556                                             TCGReg reg, const char *name)
1557 {
1558     TCGTemp *ts;
1559 
1560     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1561 
1562     ts = tcg_global_alloc(s);
1563     ts->base_type = type;
1564     ts->type = type;
1565     ts->kind = TEMP_FIXED;
1566     ts->reg = reg;
1567     ts->name = name;
1568     tcg_regset_set_reg(s->reserved_regs, reg);
1569 
1570     return ts;
1571 }
1572 
1573 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1574 {
1575     s->frame_start = start;
1576     s->frame_end = start + size;
1577     s->frame_temp
1578         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1579 }
1580 
1581 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1582                                             const char *name, TCGType type)
1583 {
1584     TCGContext *s = tcg_ctx;
1585     TCGTemp *base_ts = tcgv_ptr_temp(base);
1586     TCGTemp *ts = tcg_global_alloc(s);
1587     int indirect_reg = 0;
1588 
1589     switch (base_ts->kind) {
1590     case TEMP_FIXED:
1591         break;
1592     case TEMP_GLOBAL:
1593         /* We do not support double-indirect registers.  */
1594         tcg_debug_assert(!base_ts->indirect_reg);
1595         base_ts->indirect_base = 1;
1596         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1597                             ? 2 : 1);
1598         indirect_reg = 1;
1599         break;
1600     default:
1601         g_assert_not_reached();
1602     }
1603 
1604     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1605         TCGTemp *ts2 = tcg_global_alloc(s);
1606         char buf[64];
1607 
1608         ts->base_type = TCG_TYPE_I64;
1609         ts->type = TCG_TYPE_I32;
1610         ts->indirect_reg = indirect_reg;
1611         ts->mem_allocated = 1;
1612         ts->mem_base = base_ts;
1613         ts->mem_offset = offset;
1614         pstrcpy(buf, sizeof(buf), name);
1615         pstrcat(buf, sizeof(buf), "_0");
1616         ts->name = strdup(buf);
1617 
1618         tcg_debug_assert(ts2 == ts + 1);
1619         ts2->base_type = TCG_TYPE_I64;
1620         ts2->type = TCG_TYPE_I32;
1621         ts2->indirect_reg = indirect_reg;
1622         ts2->mem_allocated = 1;
1623         ts2->mem_base = base_ts;
1624         ts2->mem_offset = offset + 4;
1625         ts2->temp_subindex = 1;
1626         pstrcpy(buf, sizeof(buf), name);
1627         pstrcat(buf, sizeof(buf), "_1");
1628         ts2->name = strdup(buf);
1629     } else {
1630         ts->base_type = type;
1631         ts->type = type;
1632         ts->indirect_reg = indirect_reg;
1633         ts->mem_allocated = 1;
1634         ts->mem_base = base_ts;
1635         ts->mem_offset = offset;
1636         ts->name = name;
1637     }
1638     return ts;
1639 }
1640 
1641 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1642 {
1643     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1644     return temp_tcgv_i32(ts);
1645 }
1646 
1647 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1648 {
1649     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1650     return temp_tcgv_i64(ts);
1651 }
1652 
1653 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1654 {
1655     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1656     return temp_tcgv_ptr(ts);
1657 }
1658 
1659 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1660 {
1661     TCGContext *s = tcg_ctx;
1662     TCGTemp *ts;
1663     int n;
1664 
1665     if (kind == TEMP_EBB) {
1666         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1667 
1668         if (idx < TCG_MAX_TEMPS) {
1669             /* There is already an available temp with the right type.  */
1670             clear_bit(idx, s->free_temps[type].l);
1671 
1672             ts = &s->temps[idx];
1673             ts->temp_allocated = 1;
1674             tcg_debug_assert(ts->base_type == type);
1675             tcg_debug_assert(ts->kind == kind);
1676             return ts;
1677         }
1678     } else {
1679         tcg_debug_assert(kind == TEMP_TB);
1680     }
1681 
1682     switch (type) {
1683     case TCG_TYPE_I32:
1684     case TCG_TYPE_V64:
1685     case TCG_TYPE_V128:
1686     case TCG_TYPE_V256:
1687         n = 1;
1688         break;
1689     case TCG_TYPE_I64:
1690         n = 64 / TCG_TARGET_REG_BITS;
1691         break;
1692     case TCG_TYPE_I128:
1693         n = 128 / TCG_TARGET_REG_BITS;
1694         break;
1695     default:
1696         g_assert_not_reached();
1697     }
1698 
1699     ts = tcg_temp_alloc(s);
1700     ts->base_type = type;
1701     ts->temp_allocated = 1;
1702     ts->kind = kind;
1703 
1704     if (n == 1) {
1705         ts->type = type;
1706     } else {
1707         ts->type = TCG_TYPE_REG;
1708 
1709         for (int i = 1; i < n; ++i) {
1710             TCGTemp *ts2 = tcg_temp_alloc(s);
1711 
1712             tcg_debug_assert(ts2 == ts + i);
1713             ts2->base_type = type;
1714             ts2->type = TCG_TYPE_REG;
1715             ts2->temp_allocated = 1;
1716             ts2->temp_subindex = i;
1717             ts2->kind = kind;
1718         }
1719     }
1720     return ts;
1721 }
1722 
1723 TCGv_i32 tcg_temp_new_i32(void)
1724 {
1725     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1726 }
1727 
1728 TCGv_i32 tcg_temp_ebb_new_i32(void)
1729 {
1730     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1731 }
1732 
1733 TCGv_i64 tcg_temp_new_i64(void)
1734 {
1735     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1736 }
1737 
1738 TCGv_i64 tcg_temp_ebb_new_i64(void)
1739 {
1740     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1741 }
1742 
1743 TCGv_ptr tcg_temp_new_ptr(void)
1744 {
1745     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1746 }
1747 
1748 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1749 {
1750     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1751 }
1752 
1753 TCGv_i128 tcg_temp_new_i128(void)
1754 {
1755     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1756 }
1757 
1758 TCGv_i128 tcg_temp_ebb_new_i128(void)
1759 {
1760     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1761 }
1762 
1763 TCGv_vec tcg_temp_new_vec(TCGType type)
1764 {
1765     TCGTemp *t;
1766 
1767 #ifdef CONFIG_DEBUG_TCG
1768     switch (type) {
1769     case TCG_TYPE_V64:
1770         assert(TCG_TARGET_HAS_v64);
1771         break;
1772     case TCG_TYPE_V128:
1773         assert(TCG_TARGET_HAS_v128);
1774         break;
1775     case TCG_TYPE_V256:
1776         assert(TCG_TARGET_HAS_v256);
1777         break;
1778     default:
1779         g_assert_not_reached();
1780     }
1781 #endif
1782 
1783     t = tcg_temp_new_internal(type, TEMP_EBB);
1784     return temp_tcgv_vec(t);
1785 }
1786 
1787 /* Create a new temp of the same type as an existing temp.  */
1788 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1789 {
1790     TCGTemp *t = tcgv_vec_temp(match);
1791 
1792     tcg_debug_assert(t->temp_allocated != 0);
1793 
1794     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1795     return temp_tcgv_vec(t);
1796 }
1797 
1798 void tcg_temp_free_internal(TCGTemp *ts)
1799 {
1800     TCGContext *s = tcg_ctx;
1801 
1802     switch (ts->kind) {
1803     case TEMP_CONST:
1804     case TEMP_TB:
1805         /* Silently ignore free. */
1806         break;
1807     case TEMP_EBB:
1808         tcg_debug_assert(ts->temp_allocated != 0);
1809         ts->temp_allocated = 0;
1810         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1811         break;
1812     default:
1813         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1814         g_assert_not_reached();
1815     }
1816 }
1817 
1818 void tcg_temp_free_i32(TCGv_i32 arg)
1819 {
1820     tcg_temp_free_internal(tcgv_i32_temp(arg));
1821 }
1822 
1823 void tcg_temp_free_i64(TCGv_i64 arg)
1824 {
1825     tcg_temp_free_internal(tcgv_i64_temp(arg));
1826 }
1827 
1828 void tcg_temp_free_i128(TCGv_i128 arg)
1829 {
1830     tcg_temp_free_internal(tcgv_i128_temp(arg));
1831 }
1832 
1833 void tcg_temp_free_ptr(TCGv_ptr arg)
1834 {
1835     tcg_temp_free_internal(tcgv_ptr_temp(arg));
1836 }
1837 
1838 void tcg_temp_free_vec(TCGv_vec arg)
1839 {
1840     tcg_temp_free_internal(tcgv_vec_temp(arg));
1841 }
1842 
1843 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1844 {
1845     TCGContext *s = tcg_ctx;
1846     GHashTable *h = s->const_table[type];
1847     TCGTemp *ts;
1848 
1849     if (h == NULL) {
1850         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1851         s->const_table[type] = h;
1852     }
1853 
1854     ts = g_hash_table_lookup(h, &val);
1855     if (ts == NULL) {
1856         int64_t *val_ptr;
1857 
1858         ts = tcg_temp_alloc(s);
1859 
1860         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1861             TCGTemp *ts2 = tcg_temp_alloc(s);
1862 
1863             tcg_debug_assert(ts2 == ts + 1);
1864 
1865             ts->base_type = TCG_TYPE_I64;
1866             ts->type = TCG_TYPE_I32;
1867             ts->kind = TEMP_CONST;
1868             ts->temp_allocated = 1;
1869 
1870             ts2->base_type = TCG_TYPE_I64;
1871             ts2->type = TCG_TYPE_I32;
1872             ts2->kind = TEMP_CONST;
1873             ts2->temp_allocated = 1;
1874             ts2->temp_subindex = 1;
1875 
1876             /*
1877              * Retain the full value of the 64-bit constant in the low
1878              * part, so that the hash table works.  Actual uses will
1879              * truncate the value to the low part.
1880              */
1881             ts[HOST_BIG_ENDIAN].val = val;
1882             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1883             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1884         } else {
1885             ts->base_type = type;
1886             ts->type = type;
1887             ts->kind = TEMP_CONST;
1888             ts->temp_allocated = 1;
1889             ts->val = val;
1890             val_ptr = &ts->val;
1891         }
1892         g_hash_table_insert(h, val_ptr, ts);
1893     }
1894 
1895     return ts;
1896 }
1897 
1898 TCGv_i32 tcg_constant_i32(int32_t val)
1899 {
1900     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
1901 }
1902 
1903 TCGv_i64 tcg_constant_i64(int64_t val)
1904 {
1905     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
1906 }
1907 
1908 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
1909 {
1910     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
1911 }
1912 
1913 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1914 {
1915     val = dup_const(vece, val);
1916     return temp_tcgv_vec(tcg_constant_internal(type, val));
1917 }
1918 
1919 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1920 {
1921     TCGTemp *t = tcgv_vec_temp(match);
1922 
1923     tcg_debug_assert(t->temp_allocated != 0);
1924     return tcg_constant_vec(t->base_type, vece, val);
1925 }
1926 
1927 #ifdef CONFIG_DEBUG_TCG
1928 size_t temp_idx(TCGTemp *ts)
1929 {
1930     ptrdiff_t n = ts - tcg_ctx->temps;
1931     assert(n >= 0 && n < tcg_ctx->nb_temps);
1932     return n;
1933 }
1934 
1935 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1936 {
1937     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1938 
1939     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1940     assert(o % sizeof(TCGTemp) == 0);
1941 
1942     return (void *)tcg_ctx + (uintptr_t)v;
1943 }
1944 #endif /* CONFIG_DEBUG_TCG */
1945 
1946 /* Return true if OP may appear in the opcode stream.
1947    Test the runtime variable that controls each opcode.  */
1948 bool tcg_op_supported(TCGOpcode op)
1949 {
1950     const bool have_vec
1951         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1952 
1953     switch (op) {
1954     case INDEX_op_discard:
1955     case INDEX_op_set_label:
1956     case INDEX_op_call:
1957     case INDEX_op_br:
1958     case INDEX_op_mb:
1959     case INDEX_op_insn_start:
1960     case INDEX_op_exit_tb:
1961     case INDEX_op_goto_tb:
1962     case INDEX_op_goto_ptr:
1963     case INDEX_op_qemu_ld_a32_i32:
1964     case INDEX_op_qemu_ld_a64_i32:
1965     case INDEX_op_qemu_st_a32_i32:
1966     case INDEX_op_qemu_st_a64_i32:
1967     case INDEX_op_qemu_ld_a32_i64:
1968     case INDEX_op_qemu_ld_a64_i64:
1969     case INDEX_op_qemu_st_a32_i64:
1970     case INDEX_op_qemu_st_a64_i64:
1971         return true;
1972 
1973     case INDEX_op_qemu_st8_a32_i32:
1974     case INDEX_op_qemu_st8_a64_i32:
1975         return TCG_TARGET_HAS_qemu_st8_i32;
1976 
1977     case INDEX_op_qemu_ld_a32_i128:
1978     case INDEX_op_qemu_ld_a64_i128:
1979     case INDEX_op_qemu_st_a32_i128:
1980     case INDEX_op_qemu_st_a64_i128:
1981         return TCG_TARGET_HAS_qemu_ldst_i128;
1982 
1983     case INDEX_op_mov_i32:
1984     case INDEX_op_setcond_i32:
1985     case INDEX_op_brcond_i32:
1986     case INDEX_op_movcond_i32:
1987     case INDEX_op_ld8u_i32:
1988     case INDEX_op_ld8s_i32:
1989     case INDEX_op_ld16u_i32:
1990     case INDEX_op_ld16s_i32:
1991     case INDEX_op_ld_i32:
1992     case INDEX_op_st8_i32:
1993     case INDEX_op_st16_i32:
1994     case INDEX_op_st_i32:
1995     case INDEX_op_add_i32:
1996     case INDEX_op_sub_i32:
1997     case INDEX_op_neg_i32:
1998     case INDEX_op_mul_i32:
1999     case INDEX_op_and_i32:
2000     case INDEX_op_or_i32:
2001     case INDEX_op_xor_i32:
2002     case INDEX_op_shl_i32:
2003     case INDEX_op_shr_i32:
2004     case INDEX_op_sar_i32:
2005         return true;
2006 
2007     case INDEX_op_negsetcond_i32:
2008         return TCG_TARGET_HAS_negsetcond_i32;
2009     case INDEX_op_div_i32:
2010     case INDEX_op_divu_i32:
2011         return TCG_TARGET_HAS_div_i32;
2012     case INDEX_op_rem_i32:
2013     case INDEX_op_remu_i32:
2014         return TCG_TARGET_HAS_rem_i32;
2015     case INDEX_op_div2_i32:
2016     case INDEX_op_divu2_i32:
2017         return TCG_TARGET_HAS_div2_i32;
2018     case INDEX_op_rotl_i32:
2019     case INDEX_op_rotr_i32:
2020         return TCG_TARGET_HAS_rot_i32;
2021     case INDEX_op_deposit_i32:
2022         return TCG_TARGET_HAS_deposit_i32;
2023     case INDEX_op_extract_i32:
2024         return TCG_TARGET_HAS_extract_i32;
2025     case INDEX_op_sextract_i32:
2026         return TCG_TARGET_HAS_sextract_i32;
2027     case INDEX_op_extract2_i32:
2028         return TCG_TARGET_HAS_extract2_i32;
2029     case INDEX_op_add2_i32:
2030         return TCG_TARGET_HAS_add2_i32;
2031     case INDEX_op_sub2_i32:
2032         return TCG_TARGET_HAS_sub2_i32;
2033     case INDEX_op_mulu2_i32:
2034         return TCG_TARGET_HAS_mulu2_i32;
2035     case INDEX_op_muls2_i32:
2036         return TCG_TARGET_HAS_muls2_i32;
2037     case INDEX_op_muluh_i32:
2038         return TCG_TARGET_HAS_muluh_i32;
2039     case INDEX_op_mulsh_i32:
2040         return TCG_TARGET_HAS_mulsh_i32;
2041     case INDEX_op_ext8s_i32:
2042         return TCG_TARGET_HAS_ext8s_i32;
2043     case INDEX_op_ext16s_i32:
2044         return TCG_TARGET_HAS_ext16s_i32;
2045     case INDEX_op_ext8u_i32:
2046         return TCG_TARGET_HAS_ext8u_i32;
2047     case INDEX_op_ext16u_i32:
2048         return TCG_TARGET_HAS_ext16u_i32;
2049     case INDEX_op_bswap16_i32:
2050         return TCG_TARGET_HAS_bswap16_i32;
2051     case INDEX_op_bswap32_i32:
2052         return TCG_TARGET_HAS_bswap32_i32;
2053     case INDEX_op_not_i32:
2054         return TCG_TARGET_HAS_not_i32;
2055     case INDEX_op_andc_i32:
2056         return TCG_TARGET_HAS_andc_i32;
2057     case INDEX_op_orc_i32:
2058         return TCG_TARGET_HAS_orc_i32;
2059     case INDEX_op_eqv_i32:
2060         return TCG_TARGET_HAS_eqv_i32;
2061     case INDEX_op_nand_i32:
2062         return TCG_TARGET_HAS_nand_i32;
2063     case INDEX_op_nor_i32:
2064         return TCG_TARGET_HAS_nor_i32;
2065     case INDEX_op_clz_i32:
2066         return TCG_TARGET_HAS_clz_i32;
2067     case INDEX_op_ctz_i32:
2068         return TCG_TARGET_HAS_ctz_i32;
2069     case INDEX_op_ctpop_i32:
2070         return TCG_TARGET_HAS_ctpop_i32;
2071 
2072     case INDEX_op_brcond2_i32:
2073     case INDEX_op_setcond2_i32:
2074         return TCG_TARGET_REG_BITS == 32;
2075 
2076     case INDEX_op_mov_i64:
2077     case INDEX_op_setcond_i64:
2078     case INDEX_op_brcond_i64:
2079     case INDEX_op_movcond_i64:
2080     case INDEX_op_ld8u_i64:
2081     case INDEX_op_ld8s_i64:
2082     case INDEX_op_ld16u_i64:
2083     case INDEX_op_ld16s_i64:
2084     case INDEX_op_ld32u_i64:
2085     case INDEX_op_ld32s_i64:
2086     case INDEX_op_ld_i64:
2087     case INDEX_op_st8_i64:
2088     case INDEX_op_st16_i64:
2089     case INDEX_op_st32_i64:
2090     case INDEX_op_st_i64:
2091     case INDEX_op_add_i64:
2092     case INDEX_op_sub_i64:
2093     case INDEX_op_neg_i64:
2094     case INDEX_op_mul_i64:
2095     case INDEX_op_and_i64:
2096     case INDEX_op_or_i64:
2097     case INDEX_op_xor_i64:
2098     case INDEX_op_shl_i64:
2099     case INDEX_op_shr_i64:
2100     case INDEX_op_sar_i64:
2101     case INDEX_op_ext_i32_i64:
2102     case INDEX_op_extu_i32_i64:
2103         return TCG_TARGET_REG_BITS == 64;
2104 
2105     case INDEX_op_negsetcond_i64:
2106         return TCG_TARGET_HAS_negsetcond_i64;
2107     case INDEX_op_div_i64:
2108     case INDEX_op_divu_i64:
2109         return TCG_TARGET_HAS_div_i64;
2110     case INDEX_op_rem_i64:
2111     case INDEX_op_remu_i64:
2112         return TCG_TARGET_HAS_rem_i64;
2113     case INDEX_op_div2_i64:
2114     case INDEX_op_divu2_i64:
2115         return TCG_TARGET_HAS_div2_i64;
2116     case INDEX_op_rotl_i64:
2117     case INDEX_op_rotr_i64:
2118         return TCG_TARGET_HAS_rot_i64;
2119     case INDEX_op_deposit_i64:
2120         return TCG_TARGET_HAS_deposit_i64;
2121     case INDEX_op_extract_i64:
2122         return TCG_TARGET_HAS_extract_i64;
2123     case INDEX_op_sextract_i64:
2124         return TCG_TARGET_HAS_sextract_i64;
2125     case INDEX_op_extract2_i64:
2126         return TCG_TARGET_HAS_extract2_i64;
2127     case INDEX_op_extrl_i64_i32:
2128     case INDEX_op_extrh_i64_i32:
2129         return TCG_TARGET_HAS_extr_i64_i32;
2130     case INDEX_op_ext8s_i64:
2131         return TCG_TARGET_HAS_ext8s_i64;
2132     case INDEX_op_ext16s_i64:
2133         return TCG_TARGET_HAS_ext16s_i64;
2134     case INDEX_op_ext32s_i64:
2135         return TCG_TARGET_HAS_ext32s_i64;
2136     case INDEX_op_ext8u_i64:
2137         return TCG_TARGET_HAS_ext8u_i64;
2138     case INDEX_op_ext16u_i64:
2139         return TCG_TARGET_HAS_ext16u_i64;
2140     case INDEX_op_ext32u_i64:
2141         return TCG_TARGET_HAS_ext32u_i64;
2142     case INDEX_op_bswap16_i64:
2143         return TCG_TARGET_HAS_bswap16_i64;
2144     case INDEX_op_bswap32_i64:
2145         return TCG_TARGET_HAS_bswap32_i64;
2146     case INDEX_op_bswap64_i64:
2147         return TCG_TARGET_HAS_bswap64_i64;
2148     case INDEX_op_not_i64:
2149         return TCG_TARGET_HAS_not_i64;
2150     case INDEX_op_andc_i64:
2151         return TCG_TARGET_HAS_andc_i64;
2152     case INDEX_op_orc_i64:
2153         return TCG_TARGET_HAS_orc_i64;
2154     case INDEX_op_eqv_i64:
2155         return TCG_TARGET_HAS_eqv_i64;
2156     case INDEX_op_nand_i64:
2157         return TCG_TARGET_HAS_nand_i64;
2158     case INDEX_op_nor_i64:
2159         return TCG_TARGET_HAS_nor_i64;
2160     case INDEX_op_clz_i64:
2161         return TCG_TARGET_HAS_clz_i64;
2162     case INDEX_op_ctz_i64:
2163         return TCG_TARGET_HAS_ctz_i64;
2164     case INDEX_op_ctpop_i64:
2165         return TCG_TARGET_HAS_ctpop_i64;
2166     case INDEX_op_add2_i64:
2167         return TCG_TARGET_HAS_add2_i64;
2168     case INDEX_op_sub2_i64:
2169         return TCG_TARGET_HAS_sub2_i64;
2170     case INDEX_op_mulu2_i64:
2171         return TCG_TARGET_HAS_mulu2_i64;
2172     case INDEX_op_muls2_i64:
2173         return TCG_TARGET_HAS_muls2_i64;
2174     case INDEX_op_muluh_i64:
2175         return TCG_TARGET_HAS_muluh_i64;
2176     case INDEX_op_mulsh_i64:
2177         return TCG_TARGET_HAS_mulsh_i64;
2178 
2179     case INDEX_op_mov_vec:
2180     case INDEX_op_dup_vec:
2181     case INDEX_op_dupm_vec:
2182     case INDEX_op_ld_vec:
2183     case INDEX_op_st_vec:
2184     case INDEX_op_add_vec:
2185     case INDEX_op_sub_vec:
2186     case INDEX_op_and_vec:
2187     case INDEX_op_or_vec:
2188     case INDEX_op_xor_vec:
2189     case INDEX_op_cmp_vec:
2190         return have_vec;
2191     case INDEX_op_dup2_vec:
2192         return have_vec && TCG_TARGET_REG_BITS == 32;
2193     case INDEX_op_not_vec:
2194         return have_vec && TCG_TARGET_HAS_not_vec;
2195     case INDEX_op_neg_vec:
2196         return have_vec && TCG_TARGET_HAS_neg_vec;
2197     case INDEX_op_abs_vec:
2198         return have_vec && TCG_TARGET_HAS_abs_vec;
2199     case INDEX_op_andc_vec:
2200         return have_vec && TCG_TARGET_HAS_andc_vec;
2201     case INDEX_op_orc_vec:
2202         return have_vec && TCG_TARGET_HAS_orc_vec;
2203     case INDEX_op_nand_vec:
2204         return have_vec && TCG_TARGET_HAS_nand_vec;
2205     case INDEX_op_nor_vec:
2206         return have_vec && TCG_TARGET_HAS_nor_vec;
2207     case INDEX_op_eqv_vec:
2208         return have_vec && TCG_TARGET_HAS_eqv_vec;
2209     case INDEX_op_mul_vec:
2210         return have_vec && TCG_TARGET_HAS_mul_vec;
2211     case INDEX_op_shli_vec:
2212     case INDEX_op_shri_vec:
2213     case INDEX_op_sari_vec:
2214         return have_vec && TCG_TARGET_HAS_shi_vec;
2215     case INDEX_op_shls_vec:
2216     case INDEX_op_shrs_vec:
2217     case INDEX_op_sars_vec:
2218         return have_vec && TCG_TARGET_HAS_shs_vec;
2219     case INDEX_op_shlv_vec:
2220     case INDEX_op_shrv_vec:
2221     case INDEX_op_sarv_vec:
2222         return have_vec && TCG_TARGET_HAS_shv_vec;
2223     case INDEX_op_rotli_vec:
2224         return have_vec && TCG_TARGET_HAS_roti_vec;
2225     case INDEX_op_rotls_vec:
2226         return have_vec && TCG_TARGET_HAS_rots_vec;
2227     case INDEX_op_rotlv_vec:
2228     case INDEX_op_rotrv_vec:
2229         return have_vec && TCG_TARGET_HAS_rotv_vec;
2230     case INDEX_op_ssadd_vec:
2231     case INDEX_op_usadd_vec:
2232     case INDEX_op_sssub_vec:
2233     case INDEX_op_ussub_vec:
2234         return have_vec && TCG_TARGET_HAS_sat_vec;
2235     case INDEX_op_smin_vec:
2236     case INDEX_op_umin_vec:
2237     case INDEX_op_smax_vec:
2238     case INDEX_op_umax_vec:
2239         return have_vec && TCG_TARGET_HAS_minmax_vec;
2240     case INDEX_op_bitsel_vec:
2241         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2242     case INDEX_op_cmpsel_vec:
2243         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2244 
2245     default:
2246         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2247         return true;
2248     }
2249 }
2250 
2251 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2252 
2253 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2254 {
2255     TCGv_i64 extend_free[MAX_CALL_IARGS];
2256     int n_extend = 0;
2257     TCGOp *op;
2258     int i, n, pi = 0, total_args;
2259 
2260     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2261         init_call_layout(info);
2262         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2263     }
2264 
2265     total_args = info->nr_out + info->nr_in + 2;
2266     op = tcg_op_alloc(INDEX_op_call, total_args);
2267 
2268 #ifdef CONFIG_PLUGIN
2269     /* Flag helpers that may affect guest state */
2270     if (tcg_ctx->plugin_insn &&
2271         !(info->flags & TCG_CALL_PLUGIN) &&
2272         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2273         tcg_ctx->plugin_insn->calls_helpers = true;
2274     }
2275 #endif
2276 
2277     TCGOP_CALLO(op) = n = info->nr_out;
2278     switch (n) {
2279     case 0:
2280         tcg_debug_assert(ret == NULL);
2281         break;
2282     case 1:
2283         tcg_debug_assert(ret != NULL);
2284         op->args[pi++] = temp_arg(ret);
2285         break;
2286     case 2:
2287     case 4:
2288         tcg_debug_assert(ret != NULL);
2289         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2290         tcg_debug_assert(ret->temp_subindex == 0);
2291         for (i = 0; i < n; ++i) {
2292             op->args[pi++] = temp_arg(ret + i);
2293         }
2294         break;
2295     default:
2296         g_assert_not_reached();
2297     }
2298 
2299     TCGOP_CALLI(op) = n = info->nr_in;
2300     for (i = 0; i < n; i++) {
2301         const TCGCallArgumentLoc *loc = &info->in[i];
2302         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2303 
2304         switch (loc->kind) {
2305         case TCG_CALL_ARG_NORMAL:
2306         case TCG_CALL_ARG_BY_REF:
2307         case TCG_CALL_ARG_BY_REF_N:
2308             op->args[pi++] = temp_arg(ts);
2309             break;
2310 
2311         case TCG_CALL_ARG_EXTEND_U:
2312         case TCG_CALL_ARG_EXTEND_S:
2313             {
2314                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2315                 TCGv_i32 orig = temp_tcgv_i32(ts);
2316 
2317                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2318                     tcg_gen_ext_i32_i64(temp, orig);
2319                 } else {
2320                     tcg_gen_extu_i32_i64(temp, orig);
2321                 }
2322                 op->args[pi++] = tcgv_i64_arg(temp);
2323                 extend_free[n_extend++] = temp;
2324             }
2325             break;
2326 
2327         default:
2328             g_assert_not_reached();
2329         }
2330     }
2331     op->args[pi++] = (uintptr_t)info->func;
2332     op->args[pi++] = (uintptr_t)info;
2333     tcg_debug_assert(pi == total_args);
2334 
2335     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2336 
2337     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2338     for (i = 0; i < n_extend; ++i) {
2339         tcg_temp_free_i64(extend_free[i]);
2340     }
2341 }
2342 
2343 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2344 {
2345     tcg_gen_callN(info, ret, NULL);
2346 }
2347 
2348 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2349 {
2350     tcg_gen_callN(info, ret, &t1);
2351 }
2352 
2353 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2354 {
2355     TCGTemp *args[2] = { t1, t2 };
2356     tcg_gen_callN(info, ret, args);
2357 }
2358 
2359 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2360                    TCGTemp *t2, TCGTemp *t3)
2361 {
2362     TCGTemp *args[3] = { t1, t2, t3 };
2363     tcg_gen_callN(info, ret, args);
2364 }
2365 
2366 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2367                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2368 {
2369     TCGTemp *args[4] = { t1, t2, t3, t4 };
2370     tcg_gen_callN(info, ret, args);
2371 }
2372 
2373 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2374                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2375 {
2376     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2377     tcg_gen_callN(info, ret, args);
2378 }
2379 
2380 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2381                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2382 {
2383     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2384     tcg_gen_callN(info, ret, args);
2385 }
2386 
2387 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2388                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2389                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2390 {
2391     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2392     tcg_gen_callN(info, ret, args);
2393 }
2394 
2395 static void tcg_reg_alloc_start(TCGContext *s)
2396 {
2397     int i, n;
2398 
2399     for (i = 0, n = s->nb_temps; i < n; i++) {
2400         TCGTemp *ts = &s->temps[i];
2401         TCGTempVal val = TEMP_VAL_MEM;
2402 
2403         switch (ts->kind) {
2404         case TEMP_CONST:
2405             val = TEMP_VAL_CONST;
2406             break;
2407         case TEMP_FIXED:
2408             val = TEMP_VAL_REG;
2409             break;
2410         case TEMP_GLOBAL:
2411             break;
2412         case TEMP_EBB:
2413             val = TEMP_VAL_DEAD;
2414             /* fall through */
2415         case TEMP_TB:
2416             ts->mem_allocated = 0;
2417             break;
2418         default:
2419             g_assert_not_reached();
2420         }
2421         ts->val_type = val;
2422     }
2423 
2424     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2425 }
2426 
2427 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2428                                  TCGTemp *ts)
2429 {
2430     int idx = temp_idx(ts);
2431 
2432     switch (ts->kind) {
2433     case TEMP_FIXED:
2434     case TEMP_GLOBAL:
2435         pstrcpy(buf, buf_size, ts->name);
2436         break;
2437     case TEMP_TB:
2438         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2439         break;
2440     case TEMP_EBB:
2441         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2442         break;
2443     case TEMP_CONST:
2444         switch (ts->type) {
2445         case TCG_TYPE_I32:
2446             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2447             break;
2448 #if TCG_TARGET_REG_BITS > 32
2449         case TCG_TYPE_I64:
2450             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2451             break;
2452 #endif
2453         case TCG_TYPE_V64:
2454         case TCG_TYPE_V128:
2455         case TCG_TYPE_V256:
2456             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2457                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2458             break;
2459         default:
2460             g_assert_not_reached();
2461         }
2462         break;
2463     }
2464     return buf;
2465 }
2466 
2467 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2468                              int buf_size, TCGArg arg)
2469 {
2470     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2471 }
2472 
2473 static const char * const cond_name[] =
2474 {
2475     [TCG_COND_NEVER] = "never",
2476     [TCG_COND_ALWAYS] = "always",
2477     [TCG_COND_EQ] = "eq",
2478     [TCG_COND_NE] = "ne",
2479     [TCG_COND_LT] = "lt",
2480     [TCG_COND_GE] = "ge",
2481     [TCG_COND_LE] = "le",
2482     [TCG_COND_GT] = "gt",
2483     [TCG_COND_LTU] = "ltu",
2484     [TCG_COND_GEU] = "geu",
2485     [TCG_COND_LEU] = "leu",
2486     [TCG_COND_GTU] = "gtu",
2487     [TCG_COND_TSTEQ] = "tsteq",
2488     [TCG_COND_TSTNE] = "tstne",
2489 };
2490 
2491 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2492 {
2493     [MO_UB]   = "ub",
2494     [MO_SB]   = "sb",
2495     [MO_LEUW] = "leuw",
2496     [MO_LESW] = "lesw",
2497     [MO_LEUL] = "leul",
2498     [MO_LESL] = "lesl",
2499     [MO_LEUQ] = "leq",
2500     [MO_BEUW] = "beuw",
2501     [MO_BESW] = "besw",
2502     [MO_BEUL] = "beul",
2503     [MO_BESL] = "besl",
2504     [MO_BEUQ] = "beq",
2505     [MO_128 + MO_BE] = "beo",
2506     [MO_128 + MO_LE] = "leo",
2507 };
2508 
2509 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2510     [MO_UNALN >> MO_ASHIFT]    = "un+",
2511     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2512     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2513     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2514     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2515     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2516     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2517     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2518 };
2519 
2520 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2521     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2522     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2523     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2524     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2525     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2526     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2527 };
2528 
2529 static const char bswap_flag_name[][6] = {
2530     [TCG_BSWAP_IZ] = "iz",
2531     [TCG_BSWAP_OZ] = "oz",
2532     [TCG_BSWAP_OS] = "os",
2533     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2534     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2535 };
2536 
2537 static inline bool tcg_regset_single(TCGRegSet d)
2538 {
2539     return (d & (d - 1)) == 0;
2540 }
2541 
2542 static inline TCGReg tcg_regset_first(TCGRegSet d)
2543 {
2544     if (TCG_TARGET_NB_REGS <= 32) {
2545         return ctz32(d);
2546     } else {
2547         return ctz64(d);
2548     }
2549 }
2550 
2551 /* Return only the number of characters output -- no error return. */
2552 #define ne_fprintf(...) \
2553     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2554 
2555 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2556 {
2557     char buf[128];
2558     TCGOp *op;
2559 
2560     QTAILQ_FOREACH(op, &s->ops, link) {
2561         int i, k, nb_oargs, nb_iargs, nb_cargs;
2562         const TCGOpDef *def;
2563         TCGOpcode c;
2564         int col = 0;
2565 
2566         c = op->opc;
2567         def = &tcg_op_defs[c];
2568 
2569         if (c == INDEX_op_insn_start) {
2570             nb_oargs = 0;
2571             col += ne_fprintf(f, "\n ----");
2572 
2573             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2574                 col += ne_fprintf(f, " %016" PRIx64,
2575                                   tcg_get_insn_start_param(op, i));
2576             }
2577         } else if (c == INDEX_op_call) {
2578             const TCGHelperInfo *info = tcg_call_info(op);
2579             void *func = tcg_call_func(op);
2580 
2581             /* variable number of arguments */
2582             nb_oargs = TCGOP_CALLO(op);
2583             nb_iargs = TCGOP_CALLI(op);
2584             nb_cargs = def->nb_cargs;
2585 
2586             col += ne_fprintf(f, " %s ", def->name);
2587 
2588             /*
2589              * Print the function name from TCGHelperInfo, if available.
2590              * Note that plugins have a template function for the info,
2591              * but the actual function pointer comes from the plugin.
2592              */
2593             if (func == info->func) {
2594                 col += ne_fprintf(f, "%s", info->name);
2595             } else {
2596                 col += ne_fprintf(f, "plugin(%p)", func);
2597             }
2598 
2599             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2600             for (i = 0; i < nb_oargs; i++) {
2601                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2602                                                             op->args[i]));
2603             }
2604             for (i = 0; i < nb_iargs; i++) {
2605                 TCGArg arg = op->args[nb_oargs + i];
2606                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2607                 col += ne_fprintf(f, ",%s", t);
2608             }
2609         } else {
2610             col += ne_fprintf(f, " %s ", def->name);
2611 
2612             nb_oargs = def->nb_oargs;
2613             nb_iargs = def->nb_iargs;
2614             nb_cargs = def->nb_cargs;
2615 
2616             if (def->flags & TCG_OPF_VECTOR) {
2617                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2618                                   8 << TCGOP_VECE(op));
2619             }
2620 
2621             k = 0;
2622             for (i = 0; i < nb_oargs; i++) {
2623                 const char *sep =  k ? "," : "";
2624                 col += ne_fprintf(f, "%s%s", sep,
2625                                   tcg_get_arg_str(s, buf, sizeof(buf),
2626                                                   op->args[k++]));
2627             }
2628             for (i = 0; i < nb_iargs; i++) {
2629                 const char *sep =  k ? "," : "";
2630                 col += ne_fprintf(f, "%s%s", sep,
2631                                   tcg_get_arg_str(s, buf, sizeof(buf),
2632                                                   op->args[k++]));
2633             }
2634             switch (c) {
2635             case INDEX_op_brcond_i32:
2636             case INDEX_op_setcond_i32:
2637             case INDEX_op_negsetcond_i32:
2638             case INDEX_op_movcond_i32:
2639             case INDEX_op_brcond2_i32:
2640             case INDEX_op_setcond2_i32:
2641             case INDEX_op_brcond_i64:
2642             case INDEX_op_setcond_i64:
2643             case INDEX_op_negsetcond_i64:
2644             case INDEX_op_movcond_i64:
2645             case INDEX_op_cmp_vec:
2646             case INDEX_op_cmpsel_vec:
2647                 if (op->args[k] < ARRAY_SIZE(cond_name)
2648                     && cond_name[op->args[k]]) {
2649                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2650                 } else {
2651                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2652                 }
2653                 i = 1;
2654                 break;
2655             case INDEX_op_qemu_ld_a32_i32:
2656             case INDEX_op_qemu_ld_a64_i32:
2657             case INDEX_op_qemu_st_a32_i32:
2658             case INDEX_op_qemu_st_a64_i32:
2659             case INDEX_op_qemu_st8_a32_i32:
2660             case INDEX_op_qemu_st8_a64_i32:
2661             case INDEX_op_qemu_ld_a32_i64:
2662             case INDEX_op_qemu_ld_a64_i64:
2663             case INDEX_op_qemu_st_a32_i64:
2664             case INDEX_op_qemu_st_a64_i64:
2665             case INDEX_op_qemu_ld_a32_i128:
2666             case INDEX_op_qemu_ld_a64_i128:
2667             case INDEX_op_qemu_st_a32_i128:
2668             case INDEX_op_qemu_st_a64_i128:
2669                 {
2670                     const char *s_al, *s_op, *s_at;
2671                     MemOpIdx oi = op->args[k++];
2672                     MemOp mop = get_memop(oi);
2673                     unsigned ix = get_mmuidx(oi);
2674 
2675                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2676                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2677                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2678                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2679 
2680                     /* If all fields are accounted for, print symbolically. */
2681                     if (!mop && s_al && s_op && s_at) {
2682                         col += ne_fprintf(f, ",%s%s%s,%u",
2683                                           s_at, s_al, s_op, ix);
2684                     } else {
2685                         mop = get_memop(oi);
2686                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2687                     }
2688                     i = 1;
2689                 }
2690                 break;
2691             case INDEX_op_bswap16_i32:
2692             case INDEX_op_bswap16_i64:
2693             case INDEX_op_bswap32_i32:
2694             case INDEX_op_bswap32_i64:
2695             case INDEX_op_bswap64_i64:
2696                 {
2697                     TCGArg flags = op->args[k];
2698                     const char *name = NULL;
2699 
2700                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2701                         name = bswap_flag_name[flags];
2702                     }
2703                     if (name) {
2704                         col += ne_fprintf(f, ",%s", name);
2705                     } else {
2706                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2707                     }
2708                     i = k = 1;
2709                 }
2710                 break;
2711             default:
2712                 i = 0;
2713                 break;
2714             }
2715             switch (c) {
2716             case INDEX_op_set_label:
2717             case INDEX_op_br:
2718             case INDEX_op_brcond_i32:
2719             case INDEX_op_brcond_i64:
2720             case INDEX_op_brcond2_i32:
2721                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2722                                   arg_label(op->args[k])->id);
2723                 i++, k++;
2724                 break;
2725             case INDEX_op_mb:
2726                 {
2727                     TCGBar membar = op->args[k];
2728                     const char *b_op, *m_op;
2729 
2730                     switch (membar & TCG_BAR_SC) {
2731                     case 0:
2732                         b_op = "none";
2733                         break;
2734                     case TCG_BAR_LDAQ:
2735                         b_op = "acq";
2736                         break;
2737                     case TCG_BAR_STRL:
2738                         b_op = "rel";
2739                         break;
2740                     case TCG_BAR_SC:
2741                         b_op = "seq";
2742                         break;
2743                     default:
2744                         g_assert_not_reached();
2745                     }
2746 
2747                     switch (membar & TCG_MO_ALL) {
2748                     case 0:
2749                         m_op = "none";
2750                         break;
2751                     case TCG_MO_LD_LD:
2752                         m_op = "rr";
2753                         break;
2754                     case TCG_MO_LD_ST:
2755                         m_op = "rw";
2756                         break;
2757                     case TCG_MO_ST_LD:
2758                         m_op = "wr";
2759                         break;
2760                     case TCG_MO_ST_ST:
2761                         m_op = "ww";
2762                         break;
2763                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2764                         m_op = "rr+rw";
2765                         break;
2766                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2767                         m_op = "rr+wr";
2768                         break;
2769                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2770                         m_op = "rr+ww";
2771                         break;
2772                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2773                         m_op = "rw+wr";
2774                         break;
2775                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2776                         m_op = "rw+ww";
2777                         break;
2778                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2779                         m_op = "wr+ww";
2780                         break;
2781                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2782                         m_op = "rr+rw+wr";
2783                         break;
2784                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2785                         m_op = "rr+rw+ww";
2786                         break;
2787                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2788                         m_op = "rr+wr+ww";
2789                         break;
2790                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2791                         m_op = "rw+wr+ww";
2792                         break;
2793                     case TCG_MO_ALL:
2794                         m_op = "all";
2795                         break;
2796                     default:
2797                         g_assert_not_reached();
2798                     }
2799 
2800                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2801                     i++, k++;
2802                 }
2803                 break;
2804             default:
2805                 break;
2806             }
2807             for (; i < nb_cargs; i++, k++) {
2808                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2809                                   op->args[k]);
2810             }
2811         }
2812 
2813         if (have_prefs || op->life) {
2814             for (; col < 40; ++col) {
2815                 putc(' ', f);
2816             }
2817         }
2818 
2819         if (op->life) {
2820             unsigned life = op->life;
2821 
2822             if (life & (SYNC_ARG * 3)) {
2823                 ne_fprintf(f, "  sync:");
2824                 for (i = 0; i < 2; ++i) {
2825                     if (life & (SYNC_ARG << i)) {
2826                         ne_fprintf(f, " %d", i);
2827                     }
2828                 }
2829             }
2830             life /= DEAD_ARG;
2831             if (life) {
2832                 ne_fprintf(f, "  dead:");
2833                 for (i = 0; life; ++i, life >>= 1) {
2834                     if (life & 1) {
2835                         ne_fprintf(f, " %d", i);
2836                     }
2837                 }
2838             }
2839         }
2840 
2841         if (have_prefs) {
2842             for (i = 0; i < nb_oargs; ++i) {
2843                 TCGRegSet set = output_pref(op, i);
2844 
2845                 if (i == 0) {
2846                     ne_fprintf(f, "  pref=");
2847                 } else {
2848                     ne_fprintf(f, ",");
2849                 }
2850                 if (set == 0) {
2851                     ne_fprintf(f, "none");
2852                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2853                     ne_fprintf(f, "all");
2854 #ifdef CONFIG_DEBUG_TCG
2855                 } else if (tcg_regset_single(set)) {
2856                     TCGReg reg = tcg_regset_first(set);
2857                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2858 #endif
2859                 } else if (TCG_TARGET_NB_REGS <= 32) {
2860                     ne_fprintf(f, "0x%x", (uint32_t)set);
2861                 } else {
2862                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2863                 }
2864             }
2865         }
2866 
2867         putc('\n', f);
2868     }
2869 }
2870 
2871 /* we give more priority to constraints with less registers */
2872 static int get_constraint_priority(const TCGOpDef *def, int k)
2873 {
2874     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2875     int n = ctpop64(arg_ct->regs);
2876 
2877     /*
2878      * Sort constraints of a single register first, which includes output
2879      * aliases (which must exactly match the input already allocated).
2880      */
2881     if (n == 1 || arg_ct->oalias) {
2882         return INT_MAX;
2883     }
2884 
2885     /*
2886      * Sort register pairs next, first then second immediately after.
2887      * Arbitrarily sort multiple pairs by the index of the first reg;
2888      * there shouldn't be many pairs.
2889      */
2890     switch (arg_ct->pair) {
2891     case 1:
2892     case 3:
2893         return (k + 1) * 2;
2894     case 2:
2895         return (arg_ct->pair_index + 1) * 2 - 1;
2896     }
2897 
2898     /* Finally, sort by decreasing register count. */
2899     assert(n > 1);
2900     return -n;
2901 }
2902 
2903 /* sort from highest priority to lowest */
2904 static void sort_constraints(TCGOpDef *def, int start, int n)
2905 {
2906     int i, j;
2907     TCGArgConstraint *a = def->args_ct;
2908 
2909     for (i = 0; i < n; i++) {
2910         a[start + i].sort_index = start + i;
2911     }
2912     if (n <= 1) {
2913         return;
2914     }
2915     for (i = 0; i < n - 1; i++) {
2916         for (j = i + 1; j < n; j++) {
2917             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2918             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2919             if (p1 < p2) {
2920                 int tmp = a[start + i].sort_index;
2921                 a[start + i].sort_index = a[start + j].sort_index;
2922                 a[start + j].sort_index = tmp;
2923             }
2924         }
2925     }
2926 }
2927 
2928 static void process_op_defs(TCGContext *s)
2929 {
2930     TCGOpcode op;
2931 
2932     for (op = 0; op < NB_OPS; op++) {
2933         TCGOpDef *def = &tcg_op_defs[op];
2934         const TCGTargetOpDef *tdefs;
2935         bool saw_alias_pair = false;
2936         int i, o, i2, o2, nb_args;
2937 
2938         if (def->flags & TCG_OPF_NOT_PRESENT) {
2939             continue;
2940         }
2941 
2942         nb_args = def->nb_iargs + def->nb_oargs;
2943         if (nb_args == 0) {
2944             continue;
2945         }
2946 
2947         /*
2948          * Macro magic should make it impossible, but double-check that
2949          * the array index is in range.  Since the signness of an enum
2950          * is implementation defined, force the result to unsigned.
2951          */
2952         unsigned con_set = tcg_target_op_def(op);
2953         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2954         tdefs = &constraint_sets[con_set];
2955 
2956         for (i = 0; i < nb_args; i++) {
2957             const char *ct_str = tdefs->args_ct_str[i];
2958             bool input_p = i >= def->nb_oargs;
2959 
2960             /* Incomplete TCGTargetOpDef entry. */
2961             tcg_debug_assert(ct_str != NULL);
2962 
2963             switch (*ct_str) {
2964             case '0' ... '9':
2965                 o = *ct_str - '0';
2966                 tcg_debug_assert(input_p);
2967                 tcg_debug_assert(o < def->nb_oargs);
2968                 tcg_debug_assert(def->args_ct[o].regs != 0);
2969                 tcg_debug_assert(!def->args_ct[o].oalias);
2970                 def->args_ct[i] = def->args_ct[o];
2971                 /* The output sets oalias.  */
2972                 def->args_ct[o].oalias = 1;
2973                 def->args_ct[o].alias_index = i;
2974                 /* The input sets ialias. */
2975                 def->args_ct[i].ialias = 1;
2976                 def->args_ct[i].alias_index = o;
2977                 if (def->args_ct[i].pair) {
2978                     saw_alias_pair = true;
2979                 }
2980                 tcg_debug_assert(ct_str[1] == '\0');
2981                 continue;
2982 
2983             case '&':
2984                 tcg_debug_assert(!input_p);
2985                 def->args_ct[i].newreg = true;
2986                 ct_str++;
2987                 break;
2988 
2989             case 'p': /* plus */
2990                 /* Allocate to the register after the previous. */
2991                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2992                 o = i - 1;
2993                 tcg_debug_assert(!def->args_ct[o].pair);
2994                 tcg_debug_assert(!def->args_ct[o].ct);
2995                 def->args_ct[i] = (TCGArgConstraint){
2996                     .pair = 2,
2997                     .pair_index = o,
2998                     .regs = def->args_ct[o].regs << 1,
2999                     .newreg = def->args_ct[o].newreg,
3000                 };
3001                 def->args_ct[o].pair = 1;
3002                 def->args_ct[o].pair_index = i;
3003                 tcg_debug_assert(ct_str[1] == '\0');
3004                 continue;
3005 
3006             case 'm': /* minus */
3007                 /* Allocate to the register before the previous. */
3008                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3009                 o = i - 1;
3010                 tcg_debug_assert(!def->args_ct[o].pair);
3011                 tcg_debug_assert(!def->args_ct[o].ct);
3012                 def->args_ct[i] = (TCGArgConstraint){
3013                     .pair = 1,
3014                     .pair_index = o,
3015                     .regs = def->args_ct[o].regs >> 1,
3016                     .newreg = def->args_ct[o].newreg,
3017                 };
3018                 def->args_ct[o].pair = 2;
3019                 def->args_ct[o].pair_index = i;
3020                 tcg_debug_assert(ct_str[1] == '\0');
3021                 continue;
3022             }
3023 
3024             do {
3025                 switch (*ct_str) {
3026                 case 'i':
3027                     def->args_ct[i].ct |= TCG_CT_CONST;
3028                     break;
3029 
3030                 /* Include all of the target-specific constraints. */
3031 
3032 #undef CONST
3033 #define CONST(CASE, MASK) \
3034     case CASE: def->args_ct[i].ct |= MASK; break;
3035 #define REGS(CASE, MASK) \
3036     case CASE: def->args_ct[i].regs |= MASK; break;
3037 
3038 #include "tcg-target-con-str.h"
3039 
3040 #undef REGS
3041 #undef CONST
3042                 default:
3043                 case '0' ... '9':
3044                 case '&':
3045                 case 'p':
3046                 case 'm':
3047                     /* Typo in TCGTargetOpDef constraint. */
3048                     g_assert_not_reached();
3049                 }
3050             } while (*++ct_str != '\0');
3051         }
3052 
3053         /* TCGTargetOpDef entry with too much information? */
3054         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
3055 
3056         /*
3057          * Fix up output pairs that are aliased with inputs.
3058          * When we created the alias, we copied pair from the output.
3059          * There are three cases:
3060          *    (1a) Pairs of inputs alias pairs of outputs.
3061          *    (1b) One input aliases the first of a pair of outputs.
3062          *    (2)  One input aliases the second of a pair of outputs.
3063          *
3064          * Case 1a is handled by making sure that the pair_index'es are
3065          * properly updated so that they appear the same as a pair of inputs.
3066          *
3067          * Case 1b is handled by setting the pair_index of the input to
3068          * itself, simply so it doesn't point to an unrelated argument.
3069          * Since we don't encounter the "second" during the input allocation
3070          * phase, nothing happens with the second half of the input pair.
3071          *
3072          * Case 2 is handled by setting the second input to pair=3, the
3073          * first output to pair=3, and the pair_index'es to match.
3074          */
3075         if (saw_alias_pair) {
3076             for (i = def->nb_oargs; i < nb_args; i++) {
3077                 /*
3078                  * Since [0-9pm] must be alone in the constraint string,
3079                  * the only way they can both be set is if the pair comes
3080                  * from the output alias.
3081                  */
3082                 if (!def->args_ct[i].ialias) {
3083                     continue;
3084                 }
3085                 switch (def->args_ct[i].pair) {
3086                 case 0:
3087                     break;
3088                 case 1:
3089                     o = def->args_ct[i].alias_index;
3090                     o2 = def->args_ct[o].pair_index;
3091                     tcg_debug_assert(def->args_ct[o].pair == 1);
3092                     tcg_debug_assert(def->args_ct[o2].pair == 2);
3093                     if (def->args_ct[o2].oalias) {
3094                         /* Case 1a */
3095                         i2 = def->args_ct[o2].alias_index;
3096                         tcg_debug_assert(def->args_ct[i2].pair == 2);
3097                         def->args_ct[i2].pair_index = i;
3098                         def->args_ct[i].pair_index = i2;
3099                     } else {
3100                         /* Case 1b */
3101                         def->args_ct[i].pair_index = i;
3102                     }
3103                     break;
3104                 case 2:
3105                     o = def->args_ct[i].alias_index;
3106                     o2 = def->args_ct[o].pair_index;
3107                     tcg_debug_assert(def->args_ct[o].pair == 2);
3108                     tcg_debug_assert(def->args_ct[o2].pair == 1);
3109                     if (def->args_ct[o2].oalias) {
3110                         /* Case 1a */
3111                         i2 = def->args_ct[o2].alias_index;
3112                         tcg_debug_assert(def->args_ct[i2].pair == 1);
3113                         def->args_ct[i2].pair_index = i;
3114                         def->args_ct[i].pair_index = i2;
3115                     } else {
3116                         /* Case 2 */
3117                         def->args_ct[i].pair = 3;
3118                         def->args_ct[o2].pair = 3;
3119                         def->args_ct[i].pair_index = o2;
3120                         def->args_ct[o2].pair_index = i;
3121                     }
3122                     break;
3123                 default:
3124                     g_assert_not_reached();
3125                 }
3126             }
3127         }
3128 
3129         /* sort the constraints (XXX: this is just an heuristic) */
3130         sort_constraints(def, 0, def->nb_oargs);
3131         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3132     }
3133 }
3134 
3135 static void remove_label_use(TCGOp *op, int idx)
3136 {
3137     TCGLabel *label = arg_label(op->args[idx]);
3138     TCGLabelUse *use;
3139 
3140     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3141         if (use->op == op) {
3142             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3143             return;
3144         }
3145     }
3146     g_assert_not_reached();
3147 }
3148 
3149 void tcg_op_remove(TCGContext *s, TCGOp *op)
3150 {
3151     switch (op->opc) {
3152     case INDEX_op_br:
3153         remove_label_use(op, 0);
3154         break;
3155     case INDEX_op_brcond_i32:
3156     case INDEX_op_brcond_i64:
3157         remove_label_use(op, 3);
3158         break;
3159     case INDEX_op_brcond2_i32:
3160         remove_label_use(op, 5);
3161         break;
3162     default:
3163         break;
3164     }
3165 
3166     QTAILQ_REMOVE(&s->ops, op, link);
3167     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3168     s->nb_ops--;
3169 }
3170 
3171 void tcg_remove_ops_after(TCGOp *op)
3172 {
3173     TCGContext *s = tcg_ctx;
3174 
3175     while (true) {
3176         TCGOp *last = tcg_last_op();
3177         if (last == op) {
3178             return;
3179         }
3180         tcg_op_remove(s, last);
3181     }
3182 }
3183 
3184 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3185 {
3186     TCGContext *s = tcg_ctx;
3187     TCGOp *op = NULL;
3188 
3189     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3190         QTAILQ_FOREACH(op, &s->free_ops, link) {
3191             if (nargs <= op->nargs) {
3192                 QTAILQ_REMOVE(&s->free_ops, op, link);
3193                 nargs = op->nargs;
3194                 goto found;
3195             }
3196         }
3197     }
3198 
3199     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3200     nargs = MAX(4, nargs);
3201     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3202 
3203  found:
3204     memset(op, 0, offsetof(TCGOp, link));
3205     op->opc = opc;
3206     op->nargs = nargs;
3207 
3208     /* Check for bitfield overflow. */
3209     tcg_debug_assert(op->nargs == nargs);
3210 
3211     s->nb_ops++;
3212     return op;
3213 }
3214 
3215 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3216 {
3217     TCGOp *op = tcg_op_alloc(opc, nargs);
3218     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3219     return op;
3220 }
3221 
3222 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3223                             TCGOpcode opc, unsigned nargs)
3224 {
3225     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3226     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3227     return new_op;
3228 }
3229 
3230 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3231                            TCGOpcode opc, unsigned nargs)
3232 {
3233     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3234     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3235     return new_op;
3236 }
3237 
3238 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3239 {
3240     TCGLabelUse *u;
3241 
3242     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3243         TCGOp *op = u->op;
3244         switch (op->opc) {
3245         case INDEX_op_br:
3246             op->args[0] = label_arg(to);
3247             break;
3248         case INDEX_op_brcond_i32:
3249         case INDEX_op_brcond_i64:
3250             op->args[3] = label_arg(to);
3251             break;
3252         case INDEX_op_brcond2_i32:
3253             op->args[5] = label_arg(to);
3254             break;
3255         default:
3256             g_assert_not_reached();
3257         }
3258     }
3259 
3260     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3261 }
3262 
3263 /* Reachable analysis : remove unreachable code.  */
3264 static void __attribute__((noinline))
3265 reachable_code_pass(TCGContext *s)
3266 {
3267     TCGOp *op, *op_next, *op_prev;
3268     bool dead = false;
3269 
3270     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3271         bool remove = dead;
3272         TCGLabel *label;
3273 
3274         switch (op->opc) {
3275         case INDEX_op_set_label:
3276             label = arg_label(op->args[0]);
3277 
3278             /*
3279              * Note that the first op in the TB is always a load,
3280              * so there is always something before a label.
3281              */
3282             op_prev = QTAILQ_PREV(op, link);
3283 
3284             /*
3285              * If we find two sequential labels, move all branches to
3286              * reference the second label and remove the first label.
3287              * Do this before branch to next optimization, so that the
3288              * middle label is out of the way.
3289              */
3290             if (op_prev->opc == INDEX_op_set_label) {
3291                 move_label_uses(label, arg_label(op_prev->args[0]));
3292                 tcg_op_remove(s, op_prev);
3293                 op_prev = QTAILQ_PREV(op, link);
3294             }
3295 
3296             /*
3297              * Optimization can fold conditional branches to unconditional.
3298              * If we find a label which is preceded by an unconditional
3299              * branch to next, remove the branch.  We couldn't do this when
3300              * processing the branch because any dead code between the branch
3301              * and label had not yet been removed.
3302              */
3303             if (op_prev->opc == INDEX_op_br &&
3304                 label == arg_label(op_prev->args[0])) {
3305                 tcg_op_remove(s, op_prev);
3306                 /* Fall through means insns become live again.  */
3307                 dead = false;
3308             }
3309 
3310             if (QSIMPLEQ_EMPTY(&label->branches)) {
3311                 /*
3312                  * While there is an occasional backward branch, virtually
3313                  * all branches generated by the translators are forward.
3314                  * Which means that generally we will have already removed
3315                  * all references to the label that will be, and there is
3316                  * little to be gained by iterating.
3317                  */
3318                 remove = true;
3319             } else {
3320                 /* Once we see a label, insns become live again.  */
3321                 dead = false;
3322                 remove = false;
3323             }
3324             break;
3325 
3326         case INDEX_op_br:
3327         case INDEX_op_exit_tb:
3328         case INDEX_op_goto_ptr:
3329             /* Unconditional branches; everything following is dead.  */
3330             dead = true;
3331             break;
3332 
3333         case INDEX_op_call:
3334             /* Notice noreturn helper calls, raising exceptions.  */
3335             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3336                 dead = true;
3337             }
3338             break;
3339 
3340         case INDEX_op_insn_start:
3341             /* Never remove -- we need to keep these for unwind.  */
3342             remove = false;
3343             break;
3344 
3345         default:
3346             break;
3347         }
3348 
3349         if (remove) {
3350             tcg_op_remove(s, op);
3351         }
3352     }
3353 }
3354 
3355 #define TS_DEAD  1
3356 #define TS_MEM   2
3357 
3358 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3359 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3360 
3361 /* For liveness_pass_1, the register preferences for a given temp.  */
3362 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3363 {
3364     return ts->state_ptr;
3365 }
3366 
3367 /* For liveness_pass_1, reset the preferences for a given temp to the
3368  * maximal regset for its type.
3369  */
3370 static inline void la_reset_pref(TCGTemp *ts)
3371 {
3372     *la_temp_pref(ts)
3373         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3374 }
3375 
3376 /* liveness analysis: end of function: all temps are dead, and globals
3377    should be in memory. */
3378 static void la_func_end(TCGContext *s, int ng, int nt)
3379 {
3380     int i;
3381 
3382     for (i = 0; i < ng; ++i) {
3383         s->temps[i].state = TS_DEAD | TS_MEM;
3384         la_reset_pref(&s->temps[i]);
3385     }
3386     for (i = ng; i < nt; ++i) {
3387         s->temps[i].state = TS_DEAD;
3388         la_reset_pref(&s->temps[i]);
3389     }
3390 }
3391 
3392 /* liveness analysis: end of basic block: all temps are dead, globals
3393    and local temps should be in memory. */
3394 static void la_bb_end(TCGContext *s, int ng, int nt)
3395 {
3396     int i;
3397 
3398     for (i = 0; i < nt; ++i) {
3399         TCGTemp *ts = &s->temps[i];
3400         int state;
3401 
3402         switch (ts->kind) {
3403         case TEMP_FIXED:
3404         case TEMP_GLOBAL:
3405         case TEMP_TB:
3406             state = TS_DEAD | TS_MEM;
3407             break;
3408         case TEMP_EBB:
3409         case TEMP_CONST:
3410             state = TS_DEAD;
3411             break;
3412         default:
3413             g_assert_not_reached();
3414         }
3415         ts->state = state;
3416         la_reset_pref(ts);
3417     }
3418 }
3419 
3420 /* liveness analysis: sync globals back to memory.  */
3421 static void la_global_sync(TCGContext *s, int ng)
3422 {
3423     int i;
3424 
3425     for (i = 0; i < ng; ++i) {
3426         int state = s->temps[i].state;
3427         s->temps[i].state = state | TS_MEM;
3428         if (state == TS_DEAD) {
3429             /* If the global was previously dead, reset prefs.  */
3430             la_reset_pref(&s->temps[i]);
3431         }
3432     }
3433 }
3434 
3435 /*
3436  * liveness analysis: conditional branch: all temps are dead unless
3437  * explicitly live-across-conditional-branch, globals and local temps
3438  * should be synced.
3439  */
3440 static void la_bb_sync(TCGContext *s, int ng, int nt)
3441 {
3442     la_global_sync(s, ng);
3443 
3444     for (int i = ng; i < nt; ++i) {
3445         TCGTemp *ts = &s->temps[i];
3446         int state;
3447 
3448         switch (ts->kind) {
3449         case TEMP_TB:
3450             state = ts->state;
3451             ts->state = state | TS_MEM;
3452             if (state != TS_DEAD) {
3453                 continue;
3454             }
3455             break;
3456         case TEMP_EBB:
3457         case TEMP_CONST:
3458             continue;
3459         default:
3460             g_assert_not_reached();
3461         }
3462         la_reset_pref(&s->temps[i]);
3463     }
3464 }
3465 
3466 /* liveness analysis: sync globals back to memory and kill.  */
3467 static void la_global_kill(TCGContext *s, int ng)
3468 {
3469     int i;
3470 
3471     for (i = 0; i < ng; i++) {
3472         s->temps[i].state = TS_DEAD | TS_MEM;
3473         la_reset_pref(&s->temps[i]);
3474     }
3475 }
3476 
3477 /* liveness analysis: note live globals crossing calls.  */
3478 static void la_cross_call(TCGContext *s, int nt)
3479 {
3480     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3481     int i;
3482 
3483     for (i = 0; i < nt; i++) {
3484         TCGTemp *ts = &s->temps[i];
3485         if (!(ts->state & TS_DEAD)) {
3486             TCGRegSet *pset = la_temp_pref(ts);
3487             TCGRegSet set = *pset;
3488 
3489             set &= mask;
3490             /* If the combination is not possible, restart.  */
3491             if (set == 0) {
3492                 set = tcg_target_available_regs[ts->type] & mask;
3493             }
3494             *pset = set;
3495         }
3496     }
3497 }
3498 
3499 /*
3500  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3501  * to TEMP_EBB, if possible.
3502  */
3503 static void __attribute__((noinline))
3504 liveness_pass_0(TCGContext *s)
3505 {
3506     void * const multiple_ebb = (void *)(uintptr_t)-1;
3507     int nb_temps = s->nb_temps;
3508     TCGOp *op, *ebb;
3509 
3510     for (int i = s->nb_globals; i < nb_temps; ++i) {
3511         s->temps[i].state_ptr = NULL;
3512     }
3513 
3514     /*
3515      * Represent each EBB by the op at which it begins.  In the case of
3516      * the first EBB, this is the first op, otherwise it is a label.
3517      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3518      * within a single EBB, else MULTIPLE_EBB.
3519      */
3520     ebb = QTAILQ_FIRST(&s->ops);
3521     QTAILQ_FOREACH(op, &s->ops, link) {
3522         const TCGOpDef *def;
3523         int nb_oargs, nb_iargs;
3524 
3525         switch (op->opc) {
3526         case INDEX_op_set_label:
3527             ebb = op;
3528             continue;
3529         case INDEX_op_discard:
3530             continue;
3531         case INDEX_op_call:
3532             nb_oargs = TCGOP_CALLO(op);
3533             nb_iargs = TCGOP_CALLI(op);
3534             break;
3535         default:
3536             def = &tcg_op_defs[op->opc];
3537             nb_oargs = def->nb_oargs;
3538             nb_iargs = def->nb_iargs;
3539             break;
3540         }
3541 
3542         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3543             TCGTemp *ts = arg_temp(op->args[i]);
3544 
3545             if (ts->kind != TEMP_TB) {
3546                 continue;
3547             }
3548             if (ts->state_ptr == NULL) {
3549                 ts->state_ptr = ebb;
3550             } else if (ts->state_ptr != ebb) {
3551                 ts->state_ptr = multiple_ebb;
3552             }
3553         }
3554     }
3555 
3556     /*
3557      * For TEMP_TB that turned out not to be used beyond one EBB,
3558      * reduce the liveness to TEMP_EBB.
3559      */
3560     for (int i = s->nb_globals; i < nb_temps; ++i) {
3561         TCGTemp *ts = &s->temps[i];
3562         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3563             ts->kind = TEMP_EBB;
3564         }
3565     }
3566 }
3567 
3568 /* Liveness analysis : update the opc_arg_life array to tell if a
3569    given input arguments is dead. Instructions updating dead
3570    temporaries are removed. */
3571 static void __attribute__((noinline))
3572 liveness_pass_1(TCGContext *s)
3573 {
3574     int nb_globals = s->nb_globals;
3575     int nb_temps = s->nb_temps;
3576     TCGOp *op, *op_prev;
3577     TCGRegSet *prefs;
3578     int i;
3579 
3580     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3581     for (i = 0; i < nb_temps; ++i) {
3582         s->temps[i].state_ptr = prefs + i;
3583     }
3584 
3585     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3586     la_func_end(s, nb_globals, nb_temps);
3587 
3588     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3589         int nb_iargs, nb_oargs;
3590         TCGOpcode opc_new, opc_new2;
3591         bool have_opc_new2;
3592         TCGLifeData arg_life = 0;
3593         TCGTemp *ts;
3594         TCGOpcode opc = op->opc;
3595         const TCGOpDef *def = &tcg_op_defs[opc];
3596 
3597         switch (opc) {
3598         case INDEX_op_call:
3599             {
3600                 const TCGHelperInfo *info = tcg_call_info(op);
3601                 int call_flags = tcg_call_flags(op);
3602 
3603                 nb_oargs = TCGOP_CALLO(op);
3604                 nb_iargs = TCGOP_CALLI(op);
3605 
3606                 /* pure functions can be removed if their result is unused */
3607                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3608                     for (i = 0; i < nb_oargs; i++) {
3609                         ts = arg_temp(op->args[i]);
3610                         if (ts->state != TS_DEAD) {
3611                             goto do_not_remove_call;
3612                         }
3613                     }
3614                     goto do_remove;
3615                 }
3616             do_not_remove_call:
3617 
3618                 /* Output args are dead.  */
3619                 for (i = 0; i < nb_oargs; i++) {
3620                     ts = arg_temp(op->args[i]);
3621                     if (ts->state & TS_DEAD) {
3622                         arg_life |= DEAD_ARG << i;
3623                     }
3624                     if (ts->state & TS_MEM) {
3625                         arg_life |= SYNC_ARG << i;
3626                     }
3627                     ts->state = TS_DEAD;
3628                     la_reset_pref(ts);
3629                 }
3630 
3631                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3632                 memset(op->output_pref, 0, sizeof(op->output_pref));
3633 
3634                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3635                                     TCG_CALL_NO_READ_GLOBALS))) {
3636                     la_global_kill(s, nb_globals);
3637                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3638                     la_global_sync(s, nb_globals);
3639                 }
3640 
3641                 /* Record arguments that die in this helper.  */
3642                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3643                     ts = arg_temp(op->args[i]);
3644                     if (ts->state & TS_DEAD) {
3645                         arg_life |= DEAD_ARG << i;
3646                     }
3647                 }
3648 
3649                 /* For all live registers, remove call-clobbered prefs.  */
3650                 la_cross_call(s, nb_temps);
3651 
3652                 /*
3653                  * Input arguments are live for preceding opcodes.
3654                  *
3655                  * For those arguments that die, and will be allocated in
3656                  * registers, clear the register set for that arg, to be
3657                  * filled in below.  For args that will be on the stack,
3658                  * reset to any available reg.  Process arguments in reverse
3659                  * order so that if a temp is used more than once, the stack
3660                  * reset to max happens before the register reset to 0.
3661                  */
3662                 for (i = nb_iargs - 1; i >= 0; i--) {
3663                     const TCGCallArgumentLoc *loc = &info->in[i];
3664                     ts = arg_temp(op->args[nb_oargs + i]);
3665 
3666                     if (ts->state & TS_DEAD) {
3667                         switch (loc->kind) {
3668                         case TCG_CALL_ARG_NORMAL:
3669                         case TCG_CALL_ARG_EXTEND_U:
3670                         case TCG_CALL_ARG_EXTEND_S:
3671                             if (arg_slot_reg_p(loc->arg_slot)) {
3672                                 *la_temp_pref(ts) = 0;
3673                                 break;
3674                             }
3675                             /* fall through */
3676                         default:
3677                             *la_temp_pref(ts) =
3678                                 tcg_target_available_regs[ts->type];
3679                             break;
3680                         }
3681                         ts->state &= ~TS_DEAD;
3682                     }
3683                 }
3684 
3685                 /*
3686                  * For each input argument, add its input register to prefs.
3687                  * If a temp is used once, this produces a single set bit;
3688                  * if a temp is used multiple times, this produces a set.
3689                  */
3690                 for (i = 0; i < nb_iargs; i++) {
3691                     const TCGCallArgumentLoc *loc = &info->in[i];
3692                     ts = arg_temp(op->args[nb_oargs + i]);
3693 
3694                     switch (loc->kind) {
3695                     case TCG_CALL_ARG_NORMAL:
3696                     case TCG_CALL_ARG_EXTEND_U:
3697                     case TCG_CALL_ARG_EXTEND_S:
3698                         if (arg_slot_reg_p(loc->arg_slot)) {
3699                             tcg_regset_set_reg(*la_temp_pref(ts),
3700                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3701                         }
3702                         break;
3703                     default:
3704                         break;
3705                     }
3706                 }
3707             }
3708             break;
3709         case INDEX_op_insn_start:
3710             break;
3711         case INDEX_op_discard:
3712             /* mark the temporary as dead */
3713             ts = arg_temp(op->args[0]);
3714             ts->state = TS_DEAD;
3715             la_reset_pref(ts);
3716             break;
3717 
3718         case INDEX_op_add2_i32:
3719             opc_new = INDEX_op_add_i32;
3720             goto do_addsub2;
3721         case INDEX_op_sub2_i32:
3722             opc_new = INDEX_op_sub_i32;
3723             goto do_addsub2;
3724         case INDEX_op_add2_i64:
3725             opc_new = INDEX_op_add_i64;
3726             goto do_addsub2;
3727         case INDEX_op_sub2_i64:
3728             opc_new = INDEX_op_sub_i64;
3729         do_addsub2:
3730             nb_iargs = 4;
3731             nb_oargs = 2;
3732             /* Test if the high part of the operation is dead, but not
3733                the low part.  The result can be optimized to a simple
3734                add or sub.  This happens often for x86_64 guest when the
3735                cpu mode is set to 32 bit.  */
3736             if (arg_temp(op->args[1])->state == TS_DEAD) {
3737                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3738                     goto do_remove;
3739                 }
3740                 /* Replace the opcode and adjust the args in place,
3741                    leaving 3 unused args at the end.  */
3742                 op->opc = opc = opc_new;
3743                 op->args[1] = op->args[2];
3744                 op->args[2] = op->args[4];
3745                 /* Fall through and mark the single-word operation live.  */
3746                 nb_iargs = 2;
3747                 nb_oargs = 1;
3748             }
3749             goto do_not_remove;
3750 
3751         case INDEX_op_mulu2_i32:
3752             opc_new = INDEX_op_mul_i32;
3753             opc_new2 = INDEX_op_muluh_i32;
3754             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3755             goto do_mul2;
3756         case INDEX_op_muls2_i32:
3757             opc_new = INDEX_op_mul_i32;
3758             opc_new2 = INDEX_op_mulsh_i32;
3759             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3760             goto do_mul2;
3761         case INDEX_op_mulu2_i64:
3762             opc_new = INDEX_op_mul_i64;
3763             opc_new2 = INDEX_op_muluh_i64;
3764             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3765             goto do_mul2;
3766         case INDEX_op_muls2_i64:
3767             opc_new = INDEX_op_mul_i64;
3768             opc_new2 = INDEX_op_mulsh_i64;
3769             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3770             goto do_mul2;
3771         do_mul2:
3772             nb_iargs = 2;
3773             nb_oargs = 2;
3774             if (arg_temp(op->args[1])->state == TS_DEAD) {
3775                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3776                     /* Both parts of the operation are dead.  */
3777                     goto do_remove;
3778                 }
3779                 /* The high part of the operation is dead; generate the low. */
3780                 op->opc = opc = opc_new;
3781                 op->args[1] = op->args[2];
3782                 op->args[2] = op->args[3];
3783             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3784                 /* The low part of the operation is dead; generate the high. */
3785                 op->opc = opc = opc_new2;
3786                 op->args[0] = op->args[1];
3787                 op->args[1] = op->args[2];
3788                 op->args[2] = op->args[3];
3789             } else {
3790                 goto do_not_remove;
3791             }
3792             /* Mark the single-word operation live.  */
3793             nb_oargs = 1;
3794             goto do_not_remove;
3795 
3796         default:
3797             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3798             nb_iargs = def->nb_iargs;
3799             nb_oargs = def->nb_oargs;
3800 
3801             /* Test if the operation can be removed because all
3802                its outputs are dead. We assume that nb_oargs == 0
3803                implies side effects */
3804             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3805                 for (i = 0; i < nb_oargs; i++) {
3806                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3807                         goto do_not_remove;
3808                     }
3809                 }
3810                 goto do_remove;
3811             }
3812             goto do_not_remove;
3813 
3814         do_remove:
3815             tcg_op_remove(s, op);
3816             break;
3817 
3818         do_not_remove:
3819             for (i = 0; i < nb_oargs; i++) {
3820                 ts = arg_temp(op->args[i]);
3821 
3822                 /* Remember the preference of the uses that followed.  */
3823                 if (i < ARRAY_SIZE(op->output_pref)) {
3824                     op->output_pref[i] = *la_temp_pref(ts);
3825                 }
3826 
3827                 /* Output args are dead.  */
3828                 if (ts->state & TS_DEAD) {
3829                     arg_life |= DEAD_ARG << i;
3830                 }
3831                 if (ts->state & TS_MEM) {
3832                     arg_life |= SYNC_ARG << i;
3833                 }
3834                 ts->state = TS_DEAD;
3835                 la_reset_pref(ts);
3836             }
3837 
3838             /* If end of basic block, update.  */
3839             if (def->flags & TCG_OPF_BB_EXIT) {
3840                 la_func_end(s, nb_globals, nb_temps);
3841             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3842                 la_bb_sync(s, nb_globals, nb_temps);
3843             } else if (def->flags & TCG_OPF_BB_END) {
3844                 la_bb_end(s, nb_globals, nb_temps);
3845             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3846                 la_global_sync(s, nb_globals);
3847                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3848                     la_cross_call(s, nb_temps);
3849                 }
3850             }
3851 
3852             /* Record arguments that die in this opcode.  */
3853             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3854                 ts = arg_temp(op->args[i]);
3855                 if (ts->state & TS_DEAD) {
3856                     arg_life |= DEAD_ARG << i;
3857                 }
3858             }
3859 
3860             /* Input arguments are live for preceding opcodes.  */
3861             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3862                 ts = arg_temp(op->args[i]);
3863                 if (ts->state & TS_DEAD) {
3864                     /* For operands that were dead, initially allow
3865                        all regs for the type.  */
3866                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3867                     ts->state &= ~TS_DEAD;
3868                 }
3869             }
3870 
3871             /* Incorporate constraints for this operand.  */
3872             switch (opc) {
3873             case INDEX_op_mov_i32:
3874             case INDEX_op_mov_i64:
3875                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3876                    have proper constraints.  That said, special case
3877                    moves to propagate preferences backward.  */
3878                 if (IS_DEAD_ARG(1)) {
3879                     *la_temp_pref(arg_temp(op->args[0]))
3880                         = *la_temp_pref(arg_temp(op->args[1]));
3881                 }
3882                 break;
3883 
3884             default:
3885                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3886                     const TCGArgConstraint *ct = &def->args_ct[i];
3887                     TCGRegSet set, *pset;
3888 
3889                     ts = arg_temp(op->args[i]);
3890                     pset = la_temp_pref(ts);
3891                     set = *pset;
3892 
3893                     set &= ct->regs;
3894                     if (ct->ialias) {
3895                         set &= output_pref(op, ct->alias_index);
3896                     }
3897                     /* If the combination is not possible, restart.  */
3898                     if (set == 0) {
3899                         set = ct->regs;
3900                     }
3901                     *pset = set;
3902                 }
3903                 break;
3904             }
3905             break;
3906         }
3907         op->life = arg_life;
3908     }
3909 }
3910 
3911 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3912 static bool __attribute__((noinline))
3913 liveness_pass_2(TCGContext *s)
3914 {
3915     int nb_globals = s->nb_globals;
3916     int nb_temps, i;
3917     bool changes = false;
3918     TCGOp *op, *op_next;
3919 
3920     /* Create a temporary for each indirect global.  */
3921     for (i = 0; i < nb_globals; ++i) {
3922         TCGTemp *its = &s->temps[i];
3923         if (its->indirect_reg) {
3924             TCGTemp *dts = tcg_temp_alloc(s);
3925             dts->type = its->type;
3926             dts->base_type = its->base_type;
3927             dts->temp_subindex = its->temp_subindex;
3928             dts->kind = TEMP_EBB;
3929             its->state_ptr = dts;
3930         } else {
3931             its->state_ptr = NULL;
3932         }
3933         /* All globals begin dead.  */
3934         its->state = TS_DEAD;
3935     }
3936     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3937         TCGTemp *its = &s->temps[i];
3938         its->state_ptr = NULL;
3939         its->state = TS_DEAD;
3940     }
3941 
3942     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3943         TCGOpcode opc = op->opc;
3944         const TCGOpDef *def = &tcg_op_defs[opc];
3945         TCGLifeData arg_life = op->life;
3946         int nb_iargs, nb_oargs, call_flags;
3947         TCGTemp *arg_ts, *dir_ts;
3948 
3949         if (opc == INDEX_op_call) {
3950             nb_oargs = TCGOP_CALLO(op);
3951             nb_iargs = TCGOP_CALLI(op);
3952             call_flags = tcg_call_flags(op);
3953         } else {
3954             nb_iargs = def->nb_iargs;
3955             nb_oargs = def->nb_oargs;
3956 
3957             /* Set flags similar to how calls require.  */
3958             if (def->flags & TCG_OPF_COND_BRANCH) {
3959                 /* Like reading globals: sync_globals */
3960                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3961             } else if (def->flags & TCG_OPF_BB_END) {
3962                 /* Like writing globals: save_globals */
3963                 call_flags = 0;
3964             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3965                 /* Like reading globals: sync_globals */
3966                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3967             } else {
3968                 /* No effect on globals.  */
3969                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3970                               TCG_CALL_NO_WRITE_GLOBALS);
3971             }
3972         }
3973 
3974         /* Make sure that input arguments are available.  */
3975         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3976             arg_ts = arg_temp(op->args[i]);
3977             dir_ts = arg_ts->state_ptr;
3978             if (dir_ts && arg_ts->state == TS_DEAD) {
3979                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3980                                   ? INDEX_op_ld_i32
3981                                   : INDEX_op_ld_i64);
3982                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3983 
3984                 lop->args[0] = temp_arg(dir_ts);
3985                 lop->args[1] = temp_arg(arg_ts->mem_base);
3986                 lop->args[2] = arg_ts->mem_offset;
3987 
3988                 /* Loaded, but synced with memory.  */
3989                 arg_ts->state = TS_MEM;
3990             }
3991         }
3992 
3993         /* Perform input replacement, and mark inputs that became dead.
3994            No action is required except keeping temp_state up to date
3995            so that we reload when needed.  */
3996         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3997             arg_ts = arg_temp(op->args[i]);
3998             dir_ts = arg_ts->state_ptr;
3999             if (dir_ts) {
4000                 op->args[i] = temp_arg(dir_ts);
4001                 changes = true;
4002                 if (IS_DEAD_ARG(i)) {
4003                     arg_ts->state = TS_DEAD;
4004                 }
4005             }
4006         }
4007 
4008         /* Liveness analysis should ensure that the following are
4009            all correct, for call sites and basic block end points.  */
4010         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4011             /* Nothing to do */
4012         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4013             for (i = 0; i < nb_globals; ++i) {
4014                 /* Liveness should see that globals are synced back,
4015                    that is, either TS_DEAD or TS_MEM.  */
4016                 arg_ts = &s->temps[i];
4017                 tcg_debug_assert(arg_ts->state_ptr == 0
4018                                  || arg_ts->state != 0);
4019             }
4020         } else {
4021             for (i = 0; i < nb_globals; ++i) {
4022                 /* Liveness should see that globals are saved back,
4023                    that is, TS_DEAD, waiting to be reloaded.  */
4024                 arg_ts = &s->temps[i];
4025                 tcg_debug_assert(arg_ts->state_ptr == 0
4026                                  || arg_ts->state == TS_DEAD);
4027             }
4028         }
4029 
4030         /* Outputs become available.  */
4031         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4032             arg_ts = arg_temp(op->args[0]);
4033             dir_ts = arg_ts->state_ptr;
4034             if (dir_ts) {
4035                 op->args[0] = temp_arg(dir_ts);
4036                 changes = true;
4037 
4038                 /* The output is now live and modified.  */
4039                 arg_ts->state = 0;
4040 
4041                 if (NEED_SYNC_ARG(0)) {
4042                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4043                                       ? INDEX_op_st_i32
4044                                       : INDEX_op_st_i64);
4045                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4046                     TCGTemp *out_ts = dir_ts;
4047 
4048                     if (IS_DEAD_ARG(0)) {
4049                         out_ts = arg_temp(op->args[1]);
4050                         arg_ts->state = TS_DEAD;
4051                         tcg_op_remove(s, op);
4052                     } else {
4053                         arg_ts->state = TS_MEM;
4054                     }
4055 
4056                     sop->args[0] = temp_arg(out_ts);
4057                     sop->args[1] = temp_arg(arg_ts->mem_base);
4058                     sop->args[2] = arg_ts->mem_offset;
4059                 } else {
4060                     tcg_debug_assert(!IS_DEAD_ARG(0));
4061                 }
4062             }
4063         } else {
4064             for (i = 0; i < nb_oargs; i++) {
4065                 arg_ts = arg_temp(op->args[i]);
4066                 dir_ts = arg_ts->state_ptr;
4067                 if (!dir_ts) {
4068                     continue;
4069                 }
4070                 op->args[i] = temp_arg(dir_ts);
4071                 changes = true;
4072 
4073                 /* The output is now live and modified.  */
4074                 arg_ts->state = 0;
4075 
4076                 /* Sync outputs upon their last write.  */
4077                 if (NEED_SYNC_ARG(i)) {
4078                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4079                                       ? INDEX_op_st_i32
4080                                       : INDEX_op_st_i64);
4081                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4082 
4083                     sop->args[0] = temp_arg(dir_ts);
4084                     sop->args[1] = temp_arg(arg_ts->mem_base);
4085                     sop->args[2] = arg_ts->mem_offset;
4086 
4087                     arg_ts->state = TS_MEM;
4088                 }
4089                 /* Drop outputs that are dead.  */
4090                 if (IS_DEAD_ARG(i)) {
4091                     arg_ts->state = TS_DEAD;
4092                 }
4093             }
4094         }
4095     }
4096 
4097     return changes;
4098 }
4099 
4100 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4101 {
4102     intptr_t off;
4103     int size, align;
4104 
4105     /* When allocating an object, look at the full type. */
4106     size = tcg_type_size(ts->base_type);
4107     switch (ts->base_type) {
4108     case TCG_TYPE_I32:
4109         align = 4;
4110         break;
4111     case TCG_TYPE_I64:
4112     case TCG_TYPE_V64:
4113         align = 8;
4114         break;
4115     case TCG_TYPE_I128:
4116     case TCG_TYPE_V128:
4117     case TCG_TYPE_V256:
4118         /*
4119          * Note that we do not require aligned storage for V256,
4120          * and that we provide alignment for I128 to match V128,
4121          * even if that's above what the host ABI requires.
4122          */
4123         align = 16;
4124         break;
4125     default:
4126         g_assert_not_reached();
4127     }
4128 
4129     /*
4130      * Assume the stack is sufficiently aligned.
4131      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4132      * and do not require 16 byte vector alignment.  This seems slightly
4133      * easier than fully parameterizing the above switch statement.
4134      */
4135     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4136     off = ROUND_UP(s->current_frame_offset, align);
4137 
4138     /* If we've exhausted the stack frame, restart with a smaller TB. */
4139     if (off + size > s->frame_end) {
4140         tcg_raise_tb_overflow(s);
4141     }
4142     s->current_frame_offset = off + size;
4143 #if defined(__sparc__)
4144     off += TCG_TARGET_STACK_BIAS;
4145 #endif
4146 
4147     /* If the object was subdivided, assign memory to all the parts. */
4148     if (ts->base_type != ts->type) {
4149         int part_size = tcg_type_size(ts->type);
4150         int part_count = size / part_size;
4151 
4152         /*
4153          * Each part is allocated sequentially in tcg_temp_new_internal.
4154          * Jump back to the first part by subtracting the current index.
4155          */
4156         ts -= ts->temp_subindex;
4157         for (int i = 0; i < part_count; ++i) {
4158             ts[i].mem_offset = off + i * part_size;
4159             ts[i].mem_base = s->frame_temp;
4160             ts[i].mem_allocated = 1;
4161         }
4162     } else {
4163         ts->mem_offset = off;
4164         ts->mem_base = s->frame_temp;
4165         ts->mem_allocated = 1;
4166     }
4167 }
4168 
4169 /* Assign @reg to @ts, and update reg_to_temp[]. */
4170 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4171 {
4172     if (ts->val_type == TEMP_VAL_REG) {
4173         TCGReg old = ts->reg;
4174         tcg_debug_assert(s->reg_to_temp[old] == ts);
4175         if (old == reg) {
4176             return;
4177         }
4178         s->reg_to_temp[old] = NULL;
4179     }
4180     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4181     s->reg_to_temp[reg] = ts;
4182     ts->val_type = TEMP_VAL_REG;
4183     ts->reg = reg;
4184 }
4185 
4186 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4187 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4188 {
4189     tcg_debug_assert(type != TEMP_VAL_REG);
4190     if (ts->val_type == TEMP_VAL_REG) {
4191         TCGReg reg = ts->reg;
4192         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4193         s->reg_to_temp[reg] = NULL;
4194     }
4195     ts->val_type = type;
4196 }
4197 
4198 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4199 
4200 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4201    mark it free; otherwise mark it dead.  */
4202 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4203 {
4204     TCGTempVal new_type;
4205 
4206     switch (ts->kind) {
4207     case TEMP_FIXED:
4208         return;
4209     case TEMP_GLOBAL:
4210     case TEMP_TB:
4211         new_type = TEMP_VAL_MEM;
4212         break;
4213     case TEMP_EBB:
4214         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4215         break;
4216     case TEMP_CONST:
4217         new_type = TEMP_VAL_CONST;
4218         break;
4219     default:
4220         g_assert_not_reached();
4221     }
4222     set_temp_val_nonreg(s, ts, new_type);
4223 }
4224 
4225 /* Mark a temporary as dead.  */
4226 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4227 {
4228     temp_free_or_dead(s, ts, 1);
4229 }
4230 
4231 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4232    registers needs to be allocated to store a constant.  If 'free_or_dead'
4233    is non-zero, subsequently release the temporary; if it is positive, the
4234    temp is dead; if it is negative, the temp is free.  */
4235 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4236                       TCGRegSet preferred_regs, int free_or_dead)
4237 {
4238     if (!temp_readonly(ts) && !ts->mem_coherent) {
4239         if (!ts->mem_allocated) {
4240             temp_allocate_frame(s, ts);
4241         }
4242         switch (ts->val_type) {
4243         case TEMP_VAL_CONST:
4244             /* If we're going to free the temp immediately, then we won't
4245                require it later in a register, so attempt to store the
4246                constant to memory directly.  */
4247             if (free_or_dead
4248                 && tcg_out_sti(s, ts->type, ts->val,
4249                                ts->mem_base->reg, ts->mem_offset)) {
4250                 break;
4251             }
4252             temp_load(s, ts, tcg_target_available_regs[ts->type],
4253                       allocated_regs, preferred_regs);
4254             /* fallthrough */
4255 
4256         case TEMP_VAL_REG:
4257             tcg_out_st(s, ts->type, ts->reg,
4258                        ts->mem_base->reg, ts->mem_offset);
4259             break;
4260 
4261         case TEMP_VAL_MEM:
4262             break;
4263 
4264         case TEMP_VAL_DEAD:
4265         default:
4266             g_assert_not_reached();
4267         }
4268         ts->mem_coherent = 1;
4269     }
4270     if (free_or_dead) {
4271         temp_free_or_dead(s, ts, free_or_dead);
4272     }
4273 }
4274 
4275 /* free register 'reg' by spilling the corresponding temporary if necessary */
4276 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4277 {
4278     TCGTemp *ts = s->reg_to_temp[reg];
4279     if (ts != NULL) {
4280         temp_sync(s, ts, allocated_regs, 0, -1);
4281     }
4282 }
4283 
4284 /**
4285  * tcg_reg_alloc:
4286  * @required_regs: Set of registers in which we must allocate.
4287  * @allocated_regs: Set of registers which must be avoided.
4288  * @preferred_regs: Set of registers we should prefer.
4289  * @rev: True if we search the registers in "indirect" order.
4290  *
4291  * The allocated register must be in @required_regs & ~@allocated_regs,
4292  * but if we can put it in @preferred_regs we may save a move later.
4293  */
4294 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4295                             TCGRegSet allocated_regs,
4296                             TCGRegSet preferred_regs, bool rev)
4297 {
4298     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4299     TCGRegSet reg_ct[2];
4300     const int *order;
4301 
4302     reg_ct[1] = required_regs & ~allocated_regs;
4303     tcg_debug_assert(reg_ct[1] != 0);
4304     reg_ct[0] = reg_ct[1] & preferred_regs;
4305 
4306     /* Skip the preferred_regs option if it cannot be satisfied,
4307        or if the preference made no difference.  */
4308     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4309 
4310     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4311 
4312     /* Try free registers, preferences first.  */
4313     for (j = f; j < 2; j++) {
4314         TCGRegSet set = reg_ct[j];
4315 
4316         if (tcg_regset_single(set)) {
4317             /* One register in the set.  */
4318             TCGReg reg = tcg_regset_first(set);
4319             if (s->reg_to_temp[reg] == NULL) {
4320                 return reg;
4321             }
4322         } else {
4323             for (i = 0; i < n; i++) {
4324                 TCGReg reg = order[i];
4325                 if (s->reg_to_temp[reg] == NULL &&
4326                     tcg_regset_test_reg(set, reg)) {
4327                     return reg;
4328                 }
4329             }
4330         }
4331     }
4332 
4333     /* We must spill something.  */
4334     for (j = f; j < 2; j++) {
4335         TCGRegSet set = reg_ct[j];
4336 
4337         if (tcg_regset_single(set)) {
4338             /* One register in the set.  */
4339             TCGReg reg = tcg_regset_first(set);
4340             tcg_reg_free(s, reg, allocated_regs);
4341             return reg;
4342         } else {
4343             for (i = 0; i < n; i++) {
4344                 TCGReg reg = order[i];
4345                 if (tcg_regset_test_reg(set, reg)) {
4346                     tcg_reg_free(s, reg, allocated_regs);
4347                     return reg;
4348                 }
4349             }
4350         }
4351     }
4352 
4353     g_assert_not_reached();
4354 }
4355 
4356 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4357                                  TCGRegSet allocated_regs,
4358                                  TCGRegSet preferred_regs, bool rev)
4359 {
4360     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4361     TCGRegSet reg_ct[2];
4362     const int *order;
4363 
4364     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4365     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4366     tcg_debug_assert(reg_ct[1] != 0);
4367     reg_ct[0] = reg_ct[1] & preferred_regs;
4368 
4369     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4370 
4371     /*
4372      * Skip the preferred_regs option if it cannot be satisfied,
4373      * or if the preference made no difference.
4374      */
4375     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4376 
4377     /*
4378      * Minimize the number of flushes by looking for 2 free registers first,
4379      * then a single flush, then two flushes.
4380      */
4381     for (fmin = 2; fmin >= 0; fmin--) {
4382         for (j = k; j < 2; j++) {
4383             TCGRegSet set = reg_ct[j];
4384 
4385             for (i = 0; i < n; i++) {
4386                 TCGReg reg = order[i];
4387 
4388                 if (tcg_regset_test_reg(set, reg)) {
4389                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4390                     if (f >= fmin) {
4391                         tcg_reg_free(s, reg, allocated_regs);
4392                         tcg_reg_free(s, reg + 1, allocated_regs);
4393                         return reg;
4394                     }
4395                 }
4396             }
4397         }
4398     }
4399     g_assert_not_reached();
4400 }
4401 
4402 /* Make sure the temporary is in a register.  If needed, allocate the register
4403    from DESIRED while avoiding ALLOCATED.  */
4404 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4405                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4406 {
4407     TCGReg reg;
4408 
4409     switch (ts->val_type) {
4410     case TEMP_VAL_REG:
4411         return;
4412     case TEMP_VAL_CONST:
4413         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4414                             preferred_regs, ts->indirect_base);
4415         if (ts->type <= TCG_TYPE_I64) {
4416             tcg_out_movi(s, ts->type, reg, ts->val);
4417         } else {
4418             uint64_t val = ts->val;
4419             MemOp vece = MO_64;
4420 
4421             /*
4422              * Find the minimal vector element that matches the constant.
4423              * The targets will, in general, have to do this search anyway,
4424              * do this generically.
4425              */
4426             if (val == dup_const(MO_8, val)) {
4427                 vece = MO_8;
4428             } else if (val == dup_const(MO_16, val)) {
4429                 vece = MO_16;
4430             } else if (val == dup_const(MO_32, val)) {
4431                 vece = MO_32;
4432             }
4433 
4434             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4435         }
4436         ts->mem_coherent = 0;
4437         break;
4438     case TEMP_VAL_MEM:
4439         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4440                             preferred_regs, ts->indirect_base);
4441         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4442         ts->mem_coherent = 1;
4443         break;
4444     case TEMP_VAL_DEAD:
4445     default:
4446         g_assert_not_reached();
4447     }
4448     set_temp_val_reg(s, ts, reg);
4449 }
4450 
4451 /* Save a temporary to memory. 'allocated_regs' is used in case a
4452    temporary registers needs to be allocated to store a constant.  */
4453 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4454 {
4455     /* The liveness analysis already ensures that globals are back
4456        in memory. Keep an tcg_debug_assert for safety. */
4457     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4458 }
4459 
4460 /* save globals to their canonical location and assume they can be
4461    modified be the following code. 'allocated_regs' is used in case a
4462    temporary registers needs to be allocated to store a constant. */
4463 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4464 {
4465     int i, n;
4466 
4467     for (i = 0, n = s->nb_globals; i < n; i++) {
4468         temp_save(s, &s->temps[i], allocated_regs);
4469     }
4470 }
4471 
4472 /* sync globals to their canonical location and assume they can be
4473    read by the following code. 'allocated_regs' is used in case a
4474    temporary registers needs to be allocated to store a constant. */
4475 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4476 {
4477     int i, n;
4478 
4479     for (i = 0, n = s->nb_globals; i < n; i++) {
4480         TCGTemp *ts = &s->temps[i];
4481         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4482                          || ts->kind == TEMP_FIXED
4483                          || ts->mem_coherent);
4484     }
4485 }
4486 
4487 /* at the end of a basic block, we assume all temporaries are dead and
4488    all globals are stored at their canonical location. */
4489 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4490 {
4491     int i;
4492 
4493     for (i = s->nb_globals; i < s->nb_temps; i++) {
4494         TCGTemp *ts = &s->temps[i];
4495 
4496         switch (ts->kind) {
4497         case TEMP_TB:
4498             temp_save(s, ts, allocated_regs);
4499             break;
4500         case TEMP_EBB:
4501             /* The liveness analysis already ensures that temps are dead.
4502                Keep an tcg_debug_assert for safety. */
4503             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4504             break;
4505         case TEMP_CONST:
4506             /* Similarly, we should have freed any allocated register. */
4507             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4508             break;
4509         default:
4510             g_assert_not_reached();
4511         }
4512     }
4513 
4514     save_globals(s, allocated_regs);
4515 }
4516 
4517 /*
4518  * At a conditional branch, we assume all temporaries are dead unless
4519  * explicitly live-across-conditional-branch; all globals and local
4520  * temps are synced to their location.
4521  */
4522 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4523 {
4524     sync_globals(s, allocated_regs);
4525 
4526     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4527         TCGTemp *ts = &s->temps[i];
4528         /*
4529          * The liveness analysis already ensures that temps are dead.
4530          * Keep tcg_debug_asserts for safety.
4531          */
4532         switch (ts->kind) {
4533         case TEMP_TB:
4534             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4535             break;
4536         case TEMP_EBB:
4537         case TEMP_CONST:
4538             break;
4539         default:
4540             g_assert_not_reached();
4541         }
4542     }
4543 }
4544 
4545 /*
4546  * Specialized code generation for INDEX_op_mov_* with a constant.
4547  */
4548 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4549                                   tcg_target_ulong val, TCGLifeData arg_life,
4550                                   TCGRegSet preferred_regs)
4551 {
4552     /* ENV should not be modified.  */
4553     tcg_debug_assert(!temp_readonly(ots));
4554 
4555     /* The movi is not explicitly generated here.  */
4556     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4557     ots->val = val;
4558     ots->mem_coherent = 0;
4559     if (NEED_SYNC_ARG(0)) {
4560         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4561     } else if (IS_DEAD_ARG(0)) {
4562         temp_dead(s, ots);
4563     }
4564 }
4565 
4566 /*
4567  * Specialized code generation for INDEX_op_mov_*.
4568  */
4569 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4570 {
4571     const TCGLifeData arg_life = op->life;
4572     TCGRegSet allocated_regs, preferred_regs;
4573     TCGTemp *ts, *ots;
4574     TCGType otype, itype;
4575     TCGReg oreg, ireg;
4576 
4577     allocated_regs = s->reserved_regs;
4578     preferred_regs = output_pref(op, 0);
4579     ots = arg_temp(op->args[0]);
4580     ts = arg_temp(op->args[1]);
4581 
4582     /* ENV should not be modified.  */
4583     tcg_debug_assert(!temp_readonly(ots));
4584 
4585     /* Note that otype != itype for no-op truncation.  */
4586     otype = ots->type;
4587     itype = ts->type;
4588 
4589     if (ts->val_type == TEMP_VAL_CONST) {
4590         /* propagate constant or generate sti */
4591         tcg_target_ulong val = ts->val;
4592         if (IS_DEAD_ARG(1)) {
4593             temp_dead(s, ts);
4594         }
4595         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4596         return;
4597     }
4598 
4599     /* If the source value is in memory we're going to be forced
4600        to have it in a register in order to perform the copy.  Copy
4601        the SOURCE value into its own register first, that way we
4602        don't have to reload SOURCE the next time it is used. */
4603     if (ts->val_type == TEMP_VAL_MEM) {
4604         temp_load(s, ts, tcg_target_available_regs[itype],
4605                   allocated_regs, preferred_regs);
4606     }
4607     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4608     ireg = ts->reg;
4609 
4610     if (IS_DEAD_ARG(0)) {
4611         /* mov to a non-saved dead register makes no sense (even with
4612            liveness analysis disabled). */
4613         tcg_debug_assert(NEED_SYNC_ARG(0));
4614         if (!ots->mem_allocated) {
4615             temp_allocate_frame(s, ots);
4616         }
4617         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4618         if (IS_DEAD_ARG(1)) {
4619             temp_dead(s, ts);
4620         }
4621         temp_dead(s, ots);
4622         return;
4623     }
4624 
4625     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4626         /*
4627          * The mov can be suppressed.  Kill input first, so that it
4628          * is unlinked from reg_to_temp, then set the output to the
4629          * reg that we saved from the input.
4630          */
4631         temp_dead(s, ts);
4632         oreg = ireg;
4633     } else {
4634         if (ots->val_type == TEMP_VAL_REG) {
4635             oreg = ots->reg;
4636         } else {
4637             /* Make sure to not spill the input register during allocation. */
4638             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4639                                  allocated_regs | ((TCGRegSet)1 << ireg),
4640                                  preferred_regs, ots->indirect_base);
4641         }
4642         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4643             /*
4644              * Cross register class move not supported.
4645              * Store the source register into the destination slot
4646              * and leave the destination temp as TEMP_VAL_MEM.
4647              */
4648             assert(!temp_readonly(ots));
4649             if (!ts->mem_allocated) {
4650                 temp_allocate_frame(s, ots);
4651             }
4652             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4653             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4654             ots->mem_coherent = 1;
4655             return;
4656         }
4657     }
4658     set_temp_val_reg(s, ots, oreg);
4659     ots->mem_coherent = 0;
4660 
4661     if (NEED_SYNC_ARG(0)) {
4662         temp_sync(s, ots, allocated_regs, 0, 0);
4663     }
4664 }
4665 
4666 /*
4667  * Specialized code generation for INDEX_op_dup_vec.
4668  */
4669 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4670 {
4671     const TCGLifeData arg_life = op->life;
4672     TCGRegSet dup_out_regs, dup_in_regs;
4673     TCGTemp *its, *ots;
4674     TCGType itype, vtype;
4675     unsigned vece;
4676     int lowpart_ofs;
4677     bool ok;
4678 
4679     ots = arg_temp(op->args[0]);
4680     its = arg_temp(op->args[1]);
4681 
4682     /* ENV should not be modified.  */
4683     tcg_debug_assert(!temp_readonly(ots));
4684 
4685     itype = its->type;
4686     vece = TCGOP_VECE(op);
4687     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4688 
4689     if (its->val_type == TEMP_VAL_CONST) {
4690         /* Propagate constant via movi -> dupi.  */
4691         tcg_target_ulong val = its->val;
4692         if (IS_DEAD_ARG(1)) {
4693             temp_dead(s, its);
4694         }
4695         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4696         return;
4697     }
4698 
4699     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4700     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4701 
4702     /* Allocate the output register now.  */
4703     if (ots->val_type != TEMP_VAL_REG) {
4704         TCGRegSet allocated_regs = s->reserved_regs;
4705         TCGReg oreg;
4706 
4707         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4708             /* Make sure to not spill the input register. */
4709             tcg_regset_set_reg(allocated_regs, its->reg);
4710         }
4711         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4712                              output_pref(op, 0), ots->indirect_base);
4713         set_temp_val_reg(s, ots, oreg);
4714     }
4715 
4716     switch (its->val_type) {
4717     case TEMP_VAL_REG:
4718         /*
4719          * The dup constriaints must be broad, covering all possible VECE.
4720          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4721          * to fail, indicating that extra moves are required for that case.
4722          */
4723         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4724             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4725                 goto done;
4726             }
4727             /* Try again from memory or a vector input register.  */
4728         }
4729         if (!its->mem_coherent) {
4730             /*
4731              * The input register is not synced, and so an extra store
4732              * would be required to use memory.  Attempt an integer-vector
4733              * register move first.  We do not have a TCGRegSet for this.
4734              */
4735             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4736                 break;
4737             }
4738             /* Sync the temp back to its slot and load from there.  */
4739             temp_sync(s, its, s->reserved_regs, 0, 0);
4740         }
4741         /* fall through */
4742 
4743     case TEMP_VAL_MEM:
4744         lowpart_ofs = 0;
4745         if (HOST_BIG_ENDIAN) {
4746             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4747         }
4748         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4749                              its->mem_offset + lowpart_ofs)) {
4750             goto done;
4751         }
4752         /* Load the input into the destination vector register. */
4753         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4754         break;
4755 
4756     default:
4757         g_assert_not_reached();
4758     }
4759 
4760     /* We now have a vector input register, so dup must succeed. */
4761     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4762     tcg_debug_assert(ok);
4763 
4764  done:
4765     ots->mem_coherent = 0;
4766     if (IS_DEAD_ARG(1)) {
4767         temp_dead(s, its);
4768     }
4769     if (NEED_SYNC_ARG(0)) {
4770         temp_sync(s, ots, s->reserved_regs, 0, 0);
4771     }
4772     if (IS_DEAD_ARG(0)) {
4773         temp_dead(s, ots);
4774     }
4775 }
4776 
4777 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4778 {
4779     const TCGLifeData arg_life = op->life;
4780     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4781     TCGRegSet i_allocated_regs;
4782     TCGRegSet o_allocated_regs;
4783     int i, k, nb_iargs, nb_oargs;
4784     TCGReg reg;
4785     TCGArg arg;
4786     const TCGArgConstraint *arg_ct;
4787     TCGTemp *ts;
4788     TCGArg new_args[TCG_MAX_OP_ARGS];
4789     int const_args[TCG_MAX_OP_ARGS];
4790     TCGCond op_cond;
4791 
4792     nb_oargs = def->nb_oargs;
4793     nb_iargs = def->nb_iargs;
4794 
4795     /* copy constants */
4796     memcpy(new_args + nb_oargs + nb_iargs,
4797            op->args + nb_oargs + nb_iargs,
4798            sizeof(TCGArg) * def->nb_cargs);
4799 
4800     i_allocated_regs = s->reserved_regs;
4801     o_allocated_regs = s->reserved_regs;
4802 
4803     switch (op->opc) {
4804     case INDEX_op_brcond_i32:
4805     case INDEX_op_brcond_i64:
4806         op_cond = op->args[2];
4807         break;
4808     case INDEX_op_setcond_i32:
4809     case INDEX_op_setcond_i64:
4810     case INDEX_op_negsetcond_i32:
4811     case INDEX_op_negsetcond_i64:
4812     case INDEX_op_cmp_vec:
4813         op_cond = op->args[3];
4814         break;
4815     case INDEX_op_brcond2_i32:
4816         op_cond = op->args[4];
4817         break;
4818     case INDEX_op_movcond_i32:
4819     case INDEX_op_movcond_i64:
4820     case INDEX_op_setcond2_i32:
4821     case INDEX_op_cmpsel_vec:
4822         op_cond = op->args[5];
4823         break;
4824     default:
4825         /* No condition within opcode. */
4826         op_cond = TCG_COND_ALWAYS;
4827         break;
4828     }
4829 
4830     /* satisfy input constraints */
4831     for (k = 0; k < nb_iargs; k++) {
4832         TCGRegSet i_preferred_regs, i_required_regs;
4833         bool allocate_new_reg, copyto_new_reg;
4834         TCGTemp *ts2;
4835         int i1, i2;
4836 
4837         i = def->args_ct[nb_oargs + k].sort_index;
4838         arg = op->args[i];
4839         arg_ct = &def->args_ct[i];
4840         ts = arg_temp(arg);
4841 
4842         if (ts->val_type == TEMP_VAL_CONST
4843             && tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
4844                                       op_cond, TCGOP_VECE(op))) {
4845             /* constant is OK for instruction */
4846             const_args[i] = 1;
4847             new_args[i] = ts->val;
4848             continue;
4849         }
4850 
4851         reg = ts->reg;
4852         i_preferred_regs = 0;
4853         i_required_regs = arg_ct->regs;
4854         allocate_new_reg = false;
4855         copyto_new_reg = false;
4856 
4857         switch (arg_ct->pair) {
4858         case 0: /* not paired */
4859             if (arg_ct->ialias) {
4860                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4861 
4862                 /*
4863                  * If the input is readonly, then it cannot also be an
4864                  * output and aliased to itself.  If the input is not
4865                  * dead after the instruction, we must allocate a new
4866                  * register and move it.
4867                  */
4868                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4869                     || def->args_ct[arg_ct->alias_index].newreg) {
4870                     allocate_new_reg = true;
4871                 } else if (ts->val_type == TEMP_VAL_REG) {
4872                     /*
4873                      * Check if the current register has already been
4874                      * allocated for another input.
4875                      */
4876                     allocate_new_reg =
4877                         tcg_regset_test_reg(i_allocated_regs, reg);
4878                 }
4879             }
4880             if (!allocate_new_reg) {
4881                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4882                           i_preferred_regs);
4883                 reg = ts->reg;
4884                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4885             }
4886             if (allocate_new_reg) {
4887                 /*
4888                  * Allocate a new register matching the constraint
4889                  * and move the temporary register into it.
4890                  */
4891                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4892                           i_allocated_regs, 0);
4893                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4894                                     i_preferred_regs, ts->indirect_base);
4895                 copyto_new_reg = true;
4896             }
4897             break;
4898 
4899         case 1:
4900             /* First of an input pair; if i1 == i2, the second is an output. */
4901             i1 = i;
4902             i2 = arg_ct->pair_index;
4903             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4904 
4905             /*
4906              * It is easier to default to allocating a new pair
4907              * and to identify a few cases where it's not required.
4908              */
4909             if (arg_ct->ialias) {
4910                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4911                 if (IS_DEAD_ARG(i1) &&
4912                     IS_DEAD_ARG(i2) &&
4913                     !temp_readonly(ts) &&
4914                     ts->val_type == TEMP_VAL_REG &&
4915                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4916                     tcg_regset_test_reg(i_required_regs, reg) &&
4917                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4918                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4919                     (ts2
4920                      ? ts2->val_type == TEMP_VAL_REG &&
4921                        ts2->reg == reg + 1 &&
4922                        !temp_readonly(ts2)
4923                      : s->reg_to_temp[reg + 1] == NULL)) {
4924                     break;
4925                 }
4926             } else {
4927                 /* Without aliasing, the pair must also be an input. */
4928                 tcg_debug_assert(ts2);
4929                 if (ts->val_type == TEMP_VAL_REG &&
4930                     ts2->val_type == TEMP_VAL_REG &&
4931                     ts2->reg == reg + 1 &&
4932                     tcg_regset_test_reg(i_required_regs, reg)) {
4933                     break;
4934                 }
4935             }
4936             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4937                                      0, ts->indirect_base);
4938             goto do_pair;
4939 
4940         case 2: /* pair second */
4941             reg = new_args[arg_ct->pair_index] + 1;
4942             goto do_pair;
4943 
4944         case 3: /* ialias with second output, no first input */
4945             tcg_debug_assert(arg_ct->ialias);
4946             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4947 
4948             if (IS_DEAD_ARG(i) &&
4949                 !temp_readonly(ts) &&
4950                 ts->val_type == TEMP_VAL_REG &&
4951                 reg > 0 &&
4952                 s->reg_to_temp[reg - 1] == NULL &&
4953                 tcg_regset_test_reg(i_required_regs, reg) &&
4954                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4955                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4956                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4957                 break;
4958             }
4959             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4960                                      i_allocated_regs, 0,
4961                                      ts->indirect_base);
4962             tcg_regset_set_reg(i_allocated_regs, reg);
4963             reg += 1;
4964             goto do_pair;
4965 
4966         do_pair:
4967             /*
4968              * If an aliased input is not dead after the instruction,
4969              * we must allocate a new register and move it.
4970              */
4971             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4972                 TCGRegSet t_allocated_regs = i_allocated_regs;
4973 
4974                 /*
4975                  * Because of the alias, and the continued life, make sure
4976                  * that the temp is somewhere *other* than the reg pair,
4977                  * and we get a copy in reg.
4978                  */
4979                 tcg_regset_set_reg(t_allocated_regs, reg);
4980                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4981                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4982                     /* If ts was already in reg, copy it somewhere else. */
4983                     TCGReg nr;
4984                     bool ok;
4985 
4986                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4987                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4988                                        t_allocated_regs, 0, ts->indirect_base);
4989                     ok = tcg_out_mov(s, ts->type, nr, reg);
4990                     tcg_debug_assert(ok);
4991 
4992                     set_temp_val_reg(s, ts, nr);
4993                 } else {
4994                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4995                               t_allocated_regs, 0);
4996                     copyto_new_reg = true;
4997                 }
4998             } else {
4999                 /* Preferably allocate to reg, otherwise copy. */
5000                 i_required_regs = (TCGRegSet)1 << reg;
5001                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5002                           i_preferred_regs);
5003                 copyto_new_reg = ts->reg != reg;
5004             }
5005             break;
5006 
5007         default:
5008             g_assert_not_reached();
5009         }
5010 
5011         if (copyto_new_reg) {
5012             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5013                 /*
5014                  * Cross register class move not supported.  Sync the
5015                  * temp back to its slot and load from there.
5016                  */
5017                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5018                 tcg_out_ld(s, ts->type, reg,
5019                            ts->mem_base->reg, ts->mem_offset);
5020             }
5021         }
5022         new_args[i] = reg;
5023         const_args[i] = 0;
5024         tcg_regset_set_reg(i_allocated_regs, reg);
5025     }
5026 
5027     /* mark dead temporaries and free the associated registers */
5028     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5029         if (IS_DEAD_ARG(i)) {
5030             temp_dead(s, arg_temp(op->args[i]));
5031         }
5032     }
5033 
5034     if (def->flags & TCG_OPF_COND_BRANCH) {
5035         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5036     } else if (def->flags & TCG_OPF_BB_END) {
5037         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5038     } else {
5039         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5040             /* XXX: permit generic clobber register list ? */
5041             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5042                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5043                     tcg_reg_free(s, i, i_allocated_regs);
5044                 }
5045             }
5046         }
5047         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5048             /* sync globals if the op has side effects and might trigger
5049                an exception. */
5050             sync_globals(s, i_allocated_regs);
5051         }
5052 
5053         /* satisfy the output constraints */
5054         for(k = 0; k < nb_oargs; k++) {
5055             i = def->args_ct[k].sort_index;
5056             arg = op->args[i];
5057             arg_ct = &def->args_ct[i];
5058             ts = arg_temp(arg);
5059 
5060             /* ENV should not be modified.  */
5061             tcg_debug_assert(!temp_readonly(ts));
5062 
5063             switch (arg_ct->pair) {
5064             case 0: /* not paired */
5065                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5066                     reg = new_args[arg_ct->alias_index];
5067                 } else if (arg_ct->newreg) {
5068                     reg = tcg_reg_alloc(s, arg_ct->regs,
5069                                         i_allocated_regs | o_allocated_regs,
5070                                         output_pref(op, k), ts->indirect_base);
5071                 } else {
5072                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5073                                         output_pref(op, k), ts->indirect_base);
5074                 }
5075                 break;
5076 
5077             case 1: /* first of pair */
5078                 if (arg_ct->oalias) {
5079                     reg = new_args[arg_ct->alias_index];
5080                 } else if (arg_ct->newreg) {
5081                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5082                                              i_allocated_regs | o_allocated_regs,
5083                                              output_pref(op, k),
5084                                              ts->indirect_base);
5085                 } else {
5086                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5087                                              output_pref(op, k),
5088                                              ts->indirect_base);
5089                 }
5090                 break;
5091 
5092             case 2: /* second of pair */
5093                 if (arg_ct->oalias) {
5094                     reg = new_args[arg_ct->alias_index];
5095                 } else {
5096                     reg = new_args[arg_ct->pair_index] + 1;
5097                 }
5098                 break;
5099 
5100             case 3: /* first of pair, aliasing with a second input */
5101                 tcg_debug_assert(!arg_ct->newreg);
5102                 reg = new_args[arg_ct->pair_index] - 1;
5103                 break;
5104 
5105             default:
5106                 g_assert_not_reached();
5107             }
5108             tcg_regset_set_reg(o_allocated_regs, reg);
5109             set_temp_val_reg(s, ts, reg);
5110             ts->mem_coherent = 0;
5111             new_args[i] = reg;
5112         }
5113     }
5114 
5115     /* emit instruction */
5116     switch (op->opc) {
5117     case INDEX_op_ext8s_i32:
5118         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5119         break;
5120     case INDEX_op_ext8s_i64:
5121         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5122         break;
5123     case INDEX_op_ext8u_i32:
5124     case INDEX_op_ext8u_i64:
5125         tcg_out_ext8u(s, new_args[0], new_args[1]);
5126         break;
5127     case INDEX_op_ext16s_i32:
5128         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5129         break;
5130     case INDEX_op_ext16s_i64:
5131         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5132         break;
5133     case INDEX_op_ext16u_i32:
5134     case INDEX_op_ext16u_i64:
5135         tcg_out_ext16u(s, new_args[0], new_args[1]);
5136         break;
5137     case INDEX_op_ext32s_i64:
5138         tcg_out_ext32s(s, new_args[0], new_args[1]);
5139         break;
5140     case INDEX_op_ext32u_i64:
5141         tcg_out_ext32u(s, new_args[0], new_args[1]);
5142         break;
5143     case INDEX_op_ext_i32_i64:
5144         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5145         break;
5146     case INDEX_op_extu_i32_i64:
5147         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5148         break;
5149     case INDEX_op_extrl_i64_i32:
5150         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5151         break;
5152     default:
5153         if (def->flags & TCG_OPF_VECTOR) {
5154             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5155                            new_args, const_args);
5156         } else {
5157             tcg_out_op(s, op->opc, new_args, const_args);
5158         }
5159         break;
5160     }
5161 
5162     /* move the outputs in the correct register if needed */
5163     for(i = 0; i < nb_oargs; i++) {
5164         ts = arg_temp(op->args[i]);
5165 
5166         /* ENV should not be modified.  */
5167         tcg_debug_assert(!temp_readonly(ts));
5168 
5169         if (NEED_SYNC_ARG(i)) {
5170             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5171         } else if (IS_DEAD_ARG(i)) {
5172             temp_dead(s, ts);
5173         }
5174     }
5175 }
5176 
5177 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5178 {
5179     const TCGLifeData arg_life = op->life;
5180     TCGTemp *ots, *itsl, *itsh;
5181     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5182 
5183     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5184     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5185     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5186 
5187     ots = arg_temp(op->args[0]);
5188     itsl = arg_temp(op->args[1]);
5189     itsh = arg_temp(op->args[2]);
5190 
5191     /* ENV should not be modified.  */
5192     tcg_debug_assert(!temp_readonly(ots));
5193 
5194     /* Allocate the output register now.  */
5195     if (ots->val_type != TEMP_VAL_REG) {
5196         TCGRegSet allocated_regs = s->reserved_regs;
5197         TCGRegSet dup_out_regs =
5198             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5199         TCGReg oreg;
5200 
5201         /* Make sure to not spill the input registers. */
5202         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5203             tcg_regset_set_reg(allocated_regs, itsl->reg);
5204         }
5205         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5206             tcg_regset_set_reg(allocated_regs, itsh->reg);
5207         }
5208 
5209         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5210                              output_pref(op, 0), ots->indirect_base);
5211         set_temp_val_reg(s, ots, oreg);
5212     }
5213 
5214     /* Promote dup2 of immediates to dupi_vec. */
5215     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5216         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5217         MemOp vece = MO_64;
5218 
5219         if (val == dup_const(MO_8, val)) {
5220             vece = MO_8;
5221         } else if (val == dup_const(MO_16, val)) {
5222             vece = MO_16;
5223         } else if (val == dup_const(MO_32, val)) {
5224             vece = MO_32;
5225         }
5226 
5227         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5228         goto done;
5229     }
5230 
5231     /* If the two inputs form one 64-bit value, try dupm_vec. */
5232     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5233         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5234         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5235         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5236 
5237         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5238         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5239 
5240         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5241                              its->mem_base->reg, its->mem_offset)) {
5242             goto done;
5243         }
5244     }
5245 
5246     /* Fall back to generic expansion. */
5247     return false;
5248 
5249  done:
5250     ots->mem_coherent = 0;
5251     if (IS_DEAD_ARG(1)) {
5252         temp_dead(s, itsl);
5253     }
5254     if (IS_DEAD_ARG(2)) {
5255         temp_dead(s, itsh);
5256     }
5257     if (NEED_SYNC_ARG(0)) {
5258         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5259     } else if (IS_DEAD_ARG(0)) {
5260         temp_dead(s, ots);
5261     }
5262     return true;
5263 }
5264 
5265 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5266                          TCGRegSet allocated_regs)
5267 {
5268     if (ts->val_type == TEMP_VAL_REG) {
5269         if (ts->reg != reg) {
5270             tcg_reg_free(s, reg, allocated_regs);
5271             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5272                 /*
5273                  * Cross register class move not supported.  Sync the
5274                  * temp back to its slot and load from there.
5275                  */
5276                 temp_sync(s, ts, allocated_regs, 0, 0);
5277                 tcg_out_ld(s, ts->type, reg,
5278                            ts->mem_base->reg, ts->mem_offset);
5279             }
5280         }
5281     } else {
5282         TCGRegSet arg_set = 0;
5283 
5284         tcg_reg_free(s, reg, allocated_regs);
5285         tcg_regset_set_reg(arg_set, reg);
5286         temp_load(s, ts, arg_set, allocated_regs, 0);
5287     }
5288 }
5289 
5290 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5291                          TCGRegSet allocated_regs)
5292 {
5293     /*
5294      * When the destination is on the stack, load up the temp and store.
5295      * If there are many call-saved registers, the temp might live to
5296      * see another use; otherwise it'll be discarded.
5297      */
5298     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5299     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5300                arg_slot_stk_ofs(arg_slot));
5301 }
5302 
5303 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5304                             TCGTemp *ts, TCGRegSet *allocated_regs)
5305 {
5306     if (arg_slot_reg_p(l->arg_slot)) {
5307         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5308         load_arg_reg(s, reg, ts, *allocated_regs);
5309         tcg_regset_set_reg(*allocated_regs, reg);
5310     } else {
5311         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5312     }
5313 }
5314 
5315 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5316                          intptr_t ref_off, TCGRegSet *allocated_regs)
5317 {
5318     TCGReg reg;
5319 
5320     if (arg_slot_reg_p(arg_slot)) {
5321         reg = tcg_target_call_iarg_regs[arg_slot];
5322         tcg_reg_free(s, reg, *allocated_regs);
5323         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5324         tcg_regset_set_reg(*allocated_regs, reg);
5325     } else {
5326         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5327                             *allocated_regs, 0, false);
5328         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5329         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5330                    arg_slot_stk_ofs(arg_slot));
5331     }
5332 }
5333 
5334 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5335 {
5336     const int nb_oargs = TCGOP_CALLO(op);
5337     const int nb_iargs = TCGOP_CALLI(op);
5338     const TCGLifeData arg_life = op->life;
5339     const TCGHelperInfo *info = tcg_call_info(op);
5340     TCGRegSet allocated_regs = s->reserved_regs;
5341     int i;
5342 
5343     /*
5344      * Move inputs into place in reverse order,
5345      * so that we place stacked arguments first.
5346      */
5347     for (i = nb_iargs - 1; i >= 0; --i) {
5348         const TCGCallArgumentLoc *loc = &info->in[i];
5349         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5350 
5351         switch (loc->kind) {
5352         case TCG_CALL_ARG_NORMAL:
5353         case TCG_CALL_ARG_EXTEND_U:
5354         case TCG_CALL_ARG_EXTEND_S:
5355             load_arg_normal(s, loc, ts, &allocated_regs);
5356             break;
5357         case TCG_CALL_ARG_BY_REF:
5358             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5359             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5360                          arg_slot_stk_ofs(loc->ref_slot),
5361                          &allocated_regs);
5362             break;
5363         case TCG_CALL_ARG_BY_REF_N:
5364             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5365             break;
5366         default:
5367             g_assert_not_reached();
5368         }
5369     }
5370 
5371     /* Mark dead temporaries and free the associated registers.  */
5372     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5373         if (IS_DEAD_ARG(i)) {
5374             temp_dead(s, arg_temp(op->args[i]));
5375         }
5376     }
5377 
5378     /* Clobber call registers.  */
5379     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5380         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5381             tcg_reg_free(s, i, allocated_regs);
5382         }
5383     }
5384 
5385     /*
5386      * Save globals if they might be written by the helper,
5387      * sync them if they might be read.
5388      */
5389     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5390         /* Nothing to do */
5391     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5392         sync_globals(s, allocated_regs);
5393     } else {
5394         save_globals(s, allocated_regs);
5395     }
5396 
5397     /*
5398      * If the ABI passes a pointer to the returned struct as the first
5399      * argument, load that now.  Pass a pointer to the output home slot.
5400      */
5401     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5402         TCGTemp *ts = arg_temp(op->args[0]);
5403 
5404         if (!ts->mem_allocated) {
5405             temp_allocate_frame(s, ts);
5406         }
5407         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5408     }
5409 
5410     tcg_out_call(s, tcg_call_func(op), info);
5411 
5412     /* Assign output registers and emit moves if needed.  */
5413     switch (info->out_kind) {
5414     case TCG_CALL_RET_NORMAL:
5415         for (i = 0; i < nb_oargs; i++) {
5416             TCGTemp *ts = arg_temp(op->args[i]);
5417             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5418 
5419             /* ENV should not be modified.  */
5420             tcg_debug_assert(!temp_readonly(ts));
5421 
5422             set_temp_val_reg(s, ts, reg);
5423             ts->mem_coherent = 0;
5424         }
5425         break;
5426 
5427     case TCG_CALL_RET_BY_VEC:
5428         {
5429             TCGTemp *ts = arg_temp(op->args[0]);
5430 
5431             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5432             tcg_debug_assert(ts->temp_subindex == 0);
5433             if (!ts->mem_allocated) {
5434                 temp_allocate_frame(s, ts);
5435             }
5436             tcg_out_st(s, TCG_TYPE_V128,
5437                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5438                        ts->mem_base->reg, ts->mem_offset);
5439         }
5440         /* fall through to mark all parts in memory */
5441 
5442     case TCG_CALL_RET_BY_REF:
5443         /* The callee has performed a write through the reference. */
5444         for (i = 0; i < nb_oargs; i++) {
5445             TCGTemp *ts = arg_temp(op->args[i]);
5446             ts->val_type = TEMP_VAL_MEM;
5447         }
5448         break;
5449 
5450     default:
5451         g_assert_not_reached();
5452     }
5453 
5454     /* Flush or discard output registers as needed. */
5455     for (i = 0; i < nb_oargs; i++) {
5456         TCGTemp *ts = arg_temp(op->args[i]);
5457         if (NEED_SYNC_ARG(i)) {
5458             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5459         } else if (IS_DEAD_ARG(i)) {
5460             temp_dead(s, ts);
5461         }
5462     }
5463 }
5464 
5465 /**
5466  * atom_and_align_for_opc:
5467  * @s: tcg context
5468  * @opc: memory operation code
5469  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5470  * @allow_two_ops: true if we are prepared to issue two operations
5471  *
5472  * Return the alignment and atomicity to use for the inline fast path
5473  * for the given memory operation.  The alignment may be larger than
5474  * that specified in @opc, and the correct alignment will be diagnosed
5475  * by the slow path helper.
5476  *
5477  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5478  * and issue two loads or stores for subalignment.
5479  */
5480 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5481                                            MemOp host_atom, bool allow_two_ops)
5482 {
5483     MemOp align = get_alignment_bits(opc);
5484     MemOp size = opc & MO_SIZE;
5485     MemOp half = size ? size - 1 : 0;
5486     MemOp atom = opc & MO_ATOM_MASK;
5487     MemOp atmax;
5488 
5489     switch (atom) {
5490     case MO_ATOM_NONE:
5491         /* The operation requires no specific atomicity. */
5492         atmax = MO_8;
5493         break;
5494 
5495     case MO_ATOM_IFALIGN:
5496         atmax = size;
5497         break;
5498 
5499     case MO_ATOM_IFALIGN_PAIR:
5500         atmax = half;
5501         break;
5502 
5503     case MO_ATOM_WITHIN16:
5504         atmax = size;
5505         if (size == MO_128) {
5506             /* Misalignment implies !within16, and therefore no atomicity. */
5507         } else if (host_atom != MO_ATOM_WITHIN16) {
5508             /* The host does not implement within16, so require alignment. */
5509             align = MAX(align, size);
5510         }
5511         break;
5512 
5513     case MO_ATOM_WITHIN16_PAIR:
5514         atmax = size;
5515         /*
5516          * Misalignment implies !within16, and therefore half atomicity.
5517          * Any host prepared for two operations can implement this with
5518          * half alignment.
5519          */
5520         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5521             align = MAX(align, half);
5522         }
5523         break;
5524 
5525     case MO_ATOM_SUBALIGN:
5526         atmax = size;
5527         if (host_atom != MO_ATOM_SUBALIGN) {
5528             /* If unaligned but not odd, there are subobjects up to half. */
5529             if (allow_two_ops) {
5530                 align = MAX(align, half);
5531             } else {
5532                 align = MAX(align, size);
5533             }
5534         }
5535         break;
5536 
5537     default:
5538         g_assert_not_reached();
5539     }
5540 
5541     return (TCGAtomAlign){ .atom = atmax, .align = align };
5542 }
5543 
5544 /*
5545  * Similarly for qemu_ld/st slow path helpers.
5546  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5547  * using only the provided backend tcg_out_* functions.
5548  */
5549 
5550 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5551 {
5552     int ofs = arg_slot_stk_ofs(slot);
5553 
5554     /*
5555      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5556      * require extension to uint64_t, adjust the address for uint32_t.
5557      */
5558     if (HOST_BIG_ENDIAN &&
5559         TCG_TARGET_REG_BITS == 64 &&
5560         type == TCG_TYPE_I32) {
5561         ofs += 4;
5562     }
5563     return ofs;
5564 }
5565 
5566 static void tcg_out_helper_load_slots(TCGContext *s,
5567                                       unsigned nmov, TCGMovExtend *mov,
5568                                       const TCGLdstHelperParam *parm)
5569 {
5570     unsigned i;
5571     TCGReg dst3;
5572 
5573     /*
5574      * Start from the end, storing to the stack first.
5575      * This frees those registers, so we need not consider overlap.
5576      */
5577     for (i = nmov; i-- > 0; ) {
5578         unsigned slot = mov[i].dst;
5579 
5580         if (arg_slot_reg_p(slot)) {
5581             goto found_reg;
5582         }
5583 
5584         TCGReg src = mov[i].src;
5585         TCGType dst_type = mov[i].dst_type;
5586         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5587 
5588         /* The argument is going onto the stack; extend into scratch. */
5589         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5590             tcg_debug_assert(parm->ntmp != 0);
5591             mov[i].dst = src = parm->tmp[0];
5592             tcg_out_movext1(s, &mov[i]);
5593         }
5594 
5595         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5596                    tcg_out_helper_stk_ofs(dst_type, slot));
5597     }
5598     return;
5599 
5600  found_reg:
5601     /*
5602      * The remaining arguments are in registers.
5603      * Convert slot numbers to argument registers.
5604      */
5605     nmov = i + 1;
5606     for (i = 0; i < nmov; ++i) {
5607         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5608     }
5609 
5610     switch (nmov) {
5611     case 4:
5612         /* The backend must have provided enough temps for the worst case. */
5613         tcg_debug_assert(parm->ntmp >= 2);
5614 
5615         dst3 = mov[3].dst;
5616         for (unsigned j = 0; j < 3; ++j) {
5617             if (dst3 == mov[j].src) {
5618                 /*
5619                  * Conflict. Copy the source to a temporary, perform the
5620                  * remaining moves, then the extension from our scratch
5621                  * on the way out.
5622                  */
5623                 TCGReg scratch = parm->tmp[1];
5624 
5625                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5626                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5627                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5628                 break;
5629             }
5630         }
5631 
5632         /* No conflicts: perform this move and continue. */
5633         tcg_out_movext1(s, &mov[3]);
5634         /* fall through */
5635 
5636     case 3:
5637         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5638                         parm->ntmp ? parm->tmp[0] : -1);
5639         break;
5640     case 2:
5641         tcg_out_movext2(s, mov, mov + 1,
5642                         parm->ntmp ? parm->tmp[0] : -1);
5643         break;
5644     case 1:
5645         tcg_out_movext1(s, mov);
5646         break;
5647     default:
5648         g_assert_not_reached();
5649     }
5650 }
5651 
5652 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5653                                     TCGType type, tcg_target_long imm,
5654                                     const TCGLdstHelperParam *parm)
5655 {
5656     if (arg_slot_reg_p(slot)) {
5657         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5658     } else {
5659         int ofs = tcg_out_helper_stk_ofs(type, slot);
5660         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5661             tcg_debug_assert(parm->ntmp != 0);
5662             tcg_out_movi(s, type, parm->tmp[0], imm);
5663             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5664         }
5665     }
5666 }
5667 
5668 static void tcg_out_helper_load_common_args(TCGContext *s,
5669                                             const TCGLabelQemuLdst *ldst,
5670                                             const TCGLdstHelperParam *parm,
5671                                             const TCGHelperInfo *info,
5672                                             unsigned next_arg)
5673 {
5674     TCGMovExtend ptr_mov = {
5675         .dst_type = TCG_TYPE_PTR,
5676         .src_type = TCG_TYPE_PTR,
5677         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5678     };
5679     const TCGCallArgumentLoc *loc = &info->in[0];
5680     TCGType type;
5681     unsigned slot;
5682     tcg_target_ulong imm;
5683 
5684     /*
5685      * Handle env, which is always first.
5686      */
5687     ptr_mov.dst = loc->arg_slot;
5688     ptr_mov.src = TCG_AREG0;
5689     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5690 
5691     /*
5692      * Handle oi.
5693      */
5694     imm = ldst->oi;
5695     loc = &info->in[next_arg];
5696     type = TCG_TYPE_I32;
5697     switch (loc->kind) {
5698     case TCG_CALL_ARG_NORMAL:
5699         break;
5700     case TCG_CALL_ARG_EXTEND_U:
5701     case TCG_CALL_ARG_EXTEND_S:
5702         /* No extension required for MemOpIdx. */
5703         tcg_debug_assert(imm <= INT32_MAX);
5704         type = TCG_TYPE_REG;
5705         break;
5706     default:
5707         g_assert_not_reached();
5708     }
5709     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5710     next_arg++;
5711 
5712     /*
5713      * Handle ra.
5714      */
5715     loc = &info->in[next_arg];
5716     slot = loc->arg_slot;
5717     if (parm->ra_gen) {
5718         int arg_reg = -1;
5719         TCGReg ra_reg;
5720 
5721         if (arg_slot_reg_p(slot)) {
5722             arg_reg = tcg_target_call_iarg_regs[slot];
5723         }
5724         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5725 
5726         ptr_mov.dst = slot;
5727         ptr_mov.src = ra_reg;
5728         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5729     } else {
5730         imm = (uintptr_t)ldst->raddr;
5731         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5732     }
5733 }
5734 
5735 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5736                                        const TCGCallArgumentLoc *loc,
5737                                        TCGType dst_type, TCGType src_type,
5738                                        TCGReg lo, TCGReg hi)
5739 {
5740     MemOp reg_mo;
5741 
5742     if (dst_type <= TCG_TYPE_REG) {
5743         MemOp src_ext;
5744 
5745         switch (loc->kind) {
5746         case TCG_CALL_ARG_NORMAL:
5747             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5748             break;
5749         case TCG_CALL_ARG_EXTEND_U:
5750             dst_type = TCG_TYPE_REG;
5751             src_ext = MO_UL;
5752             break;
5753         case TCG_CALL_ARG_EXTEND_S:
5754             dst_type = TCG_TYPE_REG;
5755             src_ext = MO_SL;
5756             break;
5757         default:
5758             g_assert_not_reached();
5759         }
5760 
5761         mov[0].dst = loc->arg_slot;
5762         mov[0].dst_type = dst_type;
5763         mov[0].src = lo;
5764         mov[0].src_type = src_type;
5765         mov[0].src_ext = src_ext;
5766         return 1;
5767     }
5768 
5769     if (TCG_TARGET_REG_BITS == 32) {
5770         assert(dst_type == TCG_TYPE_I64);
5771         reg_mo = MO_32;
5772     } else {
5773         assert(dst_type == TCG_TYPE_I128);
5774         reg_mo = MO_64;
5775     }
5776 
5777     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5778     mov[0].src = lo;
5779     mov[0].dst_type = TCG_TYPE_REG;
5780     mov[0].src_type = TCG_TYPE_REG;
5781     mov[0].src_ext = reg_mo;
5782 
5783     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5784     mov[1].src = hi;
5785     mov[1].dst_type = TCG_TYPE_REG;
5786     mov[1].src_type = TCG_TYPE_REG;
5787     mov[1].src_ext = reg_mo;
5788 
5789     return 2;
5790 }
5791 
5792 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5793                                    const TCGLdstHelperParam *parm)
5794 {
5795     const TCGHelperInfo *info;
5796     const TCGCallArgumentLoc *loc;
5797     TCGMovExtend mov[2];
5798     unsigned next_arg, nmov;
5799     MemOp mop = get_memop(ldst->oi);
5800 
5801     switch (mop & MO_SIZE) {
5802     case MO_8:
5803     case MO_16:
5804     case MO_32:
5805         info = &info_helper_ld32_mmu;
5806         break;
5807     case MO_64:
5808         info = &info_helper_ld64_mmu;
5809         break;
5810     case MO_128:
5811         info = &info_helper_ld128_mmu;
5812         break;
5813     default:
5814         g_assert_not_reached();
5815     }
5816 
5817     /* Defer env argument. */
5818     next_arg = 1;
5819 
5820     loc = &info->in[next_arg];
5821     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5822         /*
5823          * 32-bit host with 32-bit guest: zero-extend the guest address
5824          * to 64-bits for the helper by storing the low part, then
5825          * load a zero for the high part.
5826          */
5827         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5828                                TCG_TYPE_I32, TCG_TYPE_I32,
5829                                ldst->addrlo_reg, -1);
5830         tcg_out_helper_load_slots(s, 1, mov, parm);
5831 
5832         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5833                                 TCG_TYPE_I32, 0, parm);
5834         next_arg += 2;
5835     } else {
5836         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5837                                       ldst->addrlo_reg, ldst->addrhi_reg);
5838         tcg_out_helper_load_slots(s, nmov, mov, parm);
5839         next_arg += nmov;
5840     }
5841 
5842     switch (info->out_kind) {
5843     case TCG_CALL_RET_NORMAL:
5844     case TCG_CALL_RET_BY_VEC:
5845         break;
5846     case TCG_CALL_RET_BY_REF:
5847         /*
5848          * The return reference is in the first argument slot.
5849          * We need memory in which to return: re-use the top of stack.
5850          */
5851         {
5852             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5853 
5854             if (arg_slot_reg_p(0)) {
5855                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5856                                  TCG_REG_CALL_STACK, ofs_slot0);
5857             } else {
5858                 tcg_debug_assert(parm->ntmp != 0);
5859                 tcg_out_addi_ptr(s, parm->tmp[0],
5860                                  TCG_REG_CALL_STACK, ofs_slot0);
5861                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5862                            TCG_REG_CALL_STACK, ofs_slot0);
5863             }
5864         }
5865         break;
5866     default:
5867         g_assert_not_reached();
5868     }
5869 
5870     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5871 }
5872 
5873 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5874                                   bool load_sign,
5875                                   const TCGLdstHelperParam *parm)
5876 {
5877     MemOp mop = get_memop(ldst->oi);
5878     TCGMovExtend mov[2];
5879     int ofs_slot0;
5880 
5881     switch (ldst->type) {
5882     case TCG_TYPE_I64:
5883         if (TCG_TARGET_REG_BITS == 32) {
5884             break;
5885         }
5886         /* fall through */
5887 
5888     case TCG_TYPE_I32:
5889         mov[0].dst = ldst->datalo_reg;
5890         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5891         mov[0].dst_type = ldst->type;
5892         mov[0].src_type = TCG_TYPE_REG;
5893 
5894         /*
5895          * If load_sign, then we allowed the helper to perform the
5896          * appropriate sign extension to tcg_target_ulong, and all
5897          * we need now is a plain move.
5898          *
5899          * If they do not, then we expect the relevant extension
5900          * instruction to be no more expensive than a move, and
5901          * we thus save the icache etc by only using one of two
5902          * helper functions.
5903          */
5904         if (load_sign || !(mop & MO_SIGN)) {
5905             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5906                 mov[0].src_ext = MO_32;
5907             } else {
5908                 mov[0].src_ext = MO_64;
5909             }
5910         } else {
5911             mov[0].src_ext = mop & MO_SSIZE;
5912         }
5913         tcg_out_movext1(s, mov);
5914         return;
5915 
5916     case TCG_TYPE_I128:
5917         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5918         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5919         switch (TCG_TARGET_CALL_RET_I128) {
5920         case TCG_CALL_RET_NORMAL:
5921             break;
5922         case TCG_CALL_RET_BY_VEC:
5923             tcg_out_st(s, TCG_TYPE_V128,
5924                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5925                        TCG_REG_CALL_STACK, ofs_slot0);
5926             /* fall through */
5927         case TCG_CALL_RET_BY_REF:
5928             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5929                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5930             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5931                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5932             return;
5933         default:
5934             g_assert_not_reached();
5935         }
5936         break;
5937 
5938     default:
5939         g_assert_not_reached();
5940     }
5941 
5942     mov[0].dst = ldst->datalo_reg;
5943     mov[0].src =
5944         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5945     mov[0].dst_type = TCG_TYPE_REG;
5946     mov[0].src_type = TCG_TYPE_REG;
5947     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5948 
5949     mov[1].dst = ldst->datahi_reg;
5950     mov[1].src =
5951         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5952     mov[1].dst_type = TCG_TYPE_REG;
5953     mov[1].src_type = TCG_TYPE_REG;
5954     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5955 
5956     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5957 }
5958 
5959 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5960                                    const TCGLdstHelperParam *parm)
5961 {
5962     const TCGHelperInfo *info;
5963     const TCGCallArgumentLoc *loc;
5964     TCGMovExtend mov[4];
5965     TCGType data_type;
5966     unsigned next_arg, nmov, n;
5967     MemOp mop = get_memop(ldst->oi);
5968 
5969     switch (mop & MO_SIZE) {
5970     case MO_8:
5971     case MO_16:
5972     case MO_32:
5973         info = &info_helper_st32_mmu;
5974         data_type = TCG_TYPE_I32;
5975         break;
5976     case MO_64:
5977         info = &info_helper_st64_mmu;
5978         data_type = TCG_TYPE_I64;
5979         break;
5980     case MO_128:
5981         info = &info_helper_st128_mmu;
5982         data_type = TCG_TYPE_I128;
5983         break;
5984     default:
5985         g_assert_not_reached();
5986     }
5987 
5988     /* Defer env argument. */
5989     next_arg = 1;
5990     nmov = 0;
5991 
5992     /* Handle addr argument. */
5993     loc = &info->in[next_arg];
5994     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5995         /*
5996          * 32-bit host with 32-bit guest: zero-extend the guest address
5997          * to 64-bits for the helper by storing the low part.  Later,
5998          * after we have processed the register inputs, we will load a
5999          * zero for the high part.
6000          */
6001         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6002                                TCG_TYPE_I32, TCG_TYPE_I32,
6003                                ldst->addrlo_reg, -1);
6004         next_arg += 2;
6005         nmov += 1;
6006     } else {
6007         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6008                                    ldst->addrlo_reg, ldst->addrhi_reg);
6009         next_arg += n;
6010         nmov += n;
6011     }
6012 
6013     /* Handle data argument. */
6014     loc = &info->in[next_arg];
6015     switch (loc->kind) {
6016     case TCG_CALL_ARG_NORMAL:
6017     case TCG_CALL_ARG_EXTEND_U:
6018     case TCG_CALL_ARG_EXTEND_S:
6019         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6020                                    ldst->datalo_reg, ldst->datahi_reg);
6021         next_arg += n;
6022         nmov += n;
6023         tcg_out_helper_load_slots(s, nmov, mov, parm);
6024         break;
6025 
6026     case TCG_CALL_ARG_BY_REF:
6027         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6028         tcg_debug_assert(data_type == TCG_TYPE_I128);
6029         tcg_out_st(s, TCG_TYPE_I64,
6030                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6031                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6032         tcg_out_st(s, TCG_TYPE_I64,
6033                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6034                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6035 
6036         tcg_out_helper_load_slots(s, nmov, mov, parm);
6037 
6038         if (arg_slot_reg_p(loc->arg_slot)) {
6039             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6040                              TCG_REG_CALL_STACK,
6041                              arg_slot_stk_ofs(loc->ref_slot));
6042         } else {
6043             tcg_debug_assert(parm->ntmp != 0);
6044             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6045                              arg_slot_stk_ofs(loc->ref_slot));
6046             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6047                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6048         }
6049         next_arg += 2;
6050         break;
6051 
6052     default:
6053         g_assert_not_reached();
6054     }
6055 
6056     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6057         /* Zero extend the address by loading a zero for the high part. */
6058         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6059         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6060     }
6061 
6062     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6063 }
6064 
6065 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6066 {
6067     int i, start_words, num_insns;
6068     TCGOp *op;
6069 
6070     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6071                  && qemu_log_in_addr_range(pc_start))) {
6072         FILE *logfile = qemu_log_trylock();
6073         if (logfile) {
6074             fprintf(logfile, "OP:\n");
6075             tcg_dump_ops(s, logfile, false);
6076             fprintf(logfile, "\n");
6077             qemu_log_unlock(logfile);
6078         }
6079     }
6080 
6081 #ifdef CONFIG_DEBUG_TCG
6082     /* Ensure all labels referenced have been emitted.  */
6083     {
6084         TCGLabel *l;
6085         bool error = false;
6086 
6087         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6088             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6089                 qemu_log_mask(CPU_LOG_TB_OP,
6090                               "$L%d referenced but not present.\n", l->id);
6091                 error = true;
6092             }
6093         }
6094         assert(!error);
6095     }
6096 #endif
6097 
6098     tcg_optimize(s);
6099 
6100     reachable_code_pass(s);
6101     liveness_pass_0(s);
6102     liveness_pass_1(s);
6103 
6104     if (s->nb_indirects > 0) {
6105         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6106                      && qemu_log_in_addr_range(pc_start))) {
6107             FILE *logfile = qemu_log_trylock();
6108             if (logfile) {
6109                 fprintf(logfile, "OP before indirect lowering:\n");
6110                 tcg_dump_ops(s, logfile, false);
6111                 fprintf(logfile, "\n");
6112                 qemu_log_unlock(logfile);
6113             }
6114         }
6115 
6116         /* Replace indirect temps with direct temps.  */
6117         if (liveness_pass_2(s)) {
6118             /* If changes were made, re-run liveness.  */
6119             liveness_pass_1(s);
6120         }
6121     }
6122 
6123     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6124                  && qemu_log_in_addr_range(pc_start))) {
6125         FILE *logfile = qemu_log_trylock();
6126         if (logfile) {
6127             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6128             tcg_dump_ops(s, logfile, true);
6129             fprintf(logfile, "\n");
6130             qemu_log_unlock(logfile);
6131         }
6132     }
6133 
6134     /* Initialize goto_tb jump offsets. */
6135     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6136     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6137     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6138     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6139 
6140     tcg_reg_alloc_start(s);
6141 
6142     /*
6143      * Reset the buffer pointers when restarting after overflow.
6144      * TODO: Move this into translate-all.c with the rest of the
6145      * buffer management.  Having only this done here is confusing.
6146      */
6147     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6148     s->code_ptr = s->code_buf;
6149 
6150 #ifdef TCG_TARGET_NEED_LDST_LABELS
6151     QSIMPLEQ_INIT(&s->ldst_labels);
6152 #endif
6153 #ifdef TCG_TARGET_NEED_POOL_LABELS
6154     s->pool_labels = NULL;
6155 #endif
6156 
6157     start_words = s->insn_start_words;
6158     s->gen_insn_data =
6159         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6160 
6161     tcg_out_tb_start(s);
6162 
6163     num_insns = -1;
6164     QTAILQ_FOREACH(op, &s->ops, link) {
6165         TCGOpcode opc = op->opc;
6166 
6167         switch (opc) {
6168         case INDEX_op_mov_i32:
6169         case INDEX_op_mov_i64:
6170         case INDEX_op_mov_vec:
6171             tcg_reg_alloc_mov(s, op);
6172             break;
6173         case INDEX_op_dup_vec:
6174             tcg_reg_alloc_dup(s, op);
6175             break;
6176         case INDEX_op_insn_start:
6177             if (num_insns >= 0) {
6178                 size_t off = tcg_current_code_size(s);
6179                 s->gen_insn_end_off[num_insns] = off;
6180                 /* Assert that we do not overflow our stored offset.  */
6181                 assert(s->gen_insn_end_off[num_insns] == off);
6182             }
6183             num_insns++;
6184             for (i = 0; i < start_words; ++i) {
6185                 s->gen_insn_data[num_insns * start_words + i] =
6186                     tcg_get_insn_start_param(op, i);
6187             }
6188             break;
6189         case INDEX_op_discard:
6190             temp_dead(s, arg_temp(op->args[0]));
6191             break;
6192         case INDEX_op_set_label:
6193             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6194             tcg_out_label(s, arg_label(op->args[0]));
6195             break;
6196         case INDEX_op_call:
6197             tcg_reg_alloc_call(s, op);
6198             break;
6199         case INDEX_op_exit_tb:
6200             tcg_out_exit_tb(s, op->args[0]);
6201             break;
6202         case INDEX_op_goto_tb:
6203             tcg_out_goto_tb(s, op->args[0]);
6204             break;
6205         case INDEX_op_dup2_vec:
6206             if (tcg_reg_alloc_dup2(s, op)) {
6207                 break;
6208             }
6209             /* fall through */
6210         default:
6211             /* Sanity check that we've not introduced any unhandled opcodes. */
6212             tcg_debug_assert(tcg_op_supported(opc));
6213             /* Note: in order to speed up the code, it would be much
6214                faster to have specialized register allocator functions for
6215                some common argument patterns */
6216             tcg_reg_alloc_op(s, op);
6217             break;
6218         }
6219         /* Test for (pending) buffer overflow.  The assumption is that any
6220            one operation beginning below the high water mark cannot overrun
6221            the buffer completely.  Thus we can test for overflow after
6222            generating code without having to check during generation.  */
6223         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6224             return -1;
6225         }
6226         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6227         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6228             return -2;
6229         }
6230     }
6231     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6232     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6233 
6234     /* Generate TB finalization at the end of block */
6235 #ifdef TCG_TARGET_NEED_LDST_LABELS
6236     i = tcg_out_ldst_finalize(s);
6237     if (i < 0) {
6238         return i;
6239     }
6240 #endif
6241 #ifdef TCG_TARGET_NEED_POOL_LABELS
6242     i = tcg_out_pool_finalize(s);
6243     if (i < 0) {
6244         return i;
6245     }
6246 #endif
6247     if (!tcg_resolve_relocs(s)) {
6248         return -2;
6249     }
6250 
6251 #ifndef CONFIG_TCG_INTERPRETER
6252     /* flush instruction cache */
6253     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6254                         (uintptr_t)s->code_buf,
6255                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6256 #endif
6257 
6258     return tcg_current_code_size(s);
6259 }
6260 
6261 #ifdef ELF_HOST_MACHINE
6262 /* In order to use this feature, the backend needs to do three things:
6263 
6264    (1) Define ELF_HOST_MACHINE to indicate both what value to
6265        put into the ELF image and to indicate support for the feature.
6266 
6267    (2) Define tcg_register_jit.  This should create a buffer containing
6268        the contents of a .debug_frame section that describes the post-
6269        prologue unwind info for the tcg machine.
6270 
6271    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6272 */
6273 
6274 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6275 typedef enum {
6276     JIT_NOACTION = 0,
6277     JIT_REGISTER_FN,
6278     JIT_UNREGISTER_FN
6279 } jit_actions_t;
6280 
6281 struct jit_code_entry {
6282     struct jit_code_entry *next_entry;
6283     struct jit_code_entry *prev_entry;
6284     const void *symfile_addr;
6285     uint64_t symfile_size;
6286 };
6287 
6288 struct jit_descriptor {
6289     uint32_t version;
6290     uint32_t action_flag;
6291     struct jit_code_entry *relevant_entry;
6292     struct jit_code_entry *first_entry;
6293 };
6294 
6295 void __jit_debug_register_code(void) __attribute__((noinline));
6296 void __jit_debug_register_code(void)
6297 {
6298     asm("");
6299 }
6300 
6301 /* Must statically initialize the version, because GDB may check
6302    the version before we can set it.  */
6303 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6304 
6305 /* End GDB interface.  */
6306 
6307 static int find_string(const char *strtab, const char *str)
6308 {
6309     const char *p = strtab + 1;
6310 
6311     while (1) {
6312         if (strcmp(p, str) == 0) {
6313             return p - strtab;
6314         }
6315         p += strlen(p) + 1;
6316     }
6317 }
6318 
6319 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6320                                  const void *debug_frame,
6321                                  size_t debug_frame_size)
6322 {
6323     struct __attribute__((packed)) DebugInfo {
6324         uint32_t  len;
6325         uint16_t  version;
6326         uint32_t  abbrev;
6327         uint8_t   ptr_size;
6328         uint8_t   cu_die;
6329         uint16_t  cu_lang;
6330         uintptr_t cu_low_pc;
6331         uintptr_t cu_high_pc;
6332         uint8_t   fn_die;
6333         char      fn_name[16];
6334         uintptr_t fn_low_pc;
6335         uintptr_t fn_high_pc;
6336         uint8_t   cu_eoc;
6337     };
6338 
6339     struct ElfImage {
6340         ElfW(Ehdr) ehdr;
6341         ElfW(Phdr) phdr;
6342         ElfW(Shdr) shdr[7];
6343         ElfW(Sym)  sym[2];
6344         struct DebugInfo di;
6345         uint8_t    da[24];
6346         char       str[80];
6347     };
6348 
6349     struct ElfImage *img;
6350 
6351     static const struct ElfImage img_template = {
6352         .ehdr = {
6353             .e_ident[EI_MAG0] = ELFMAG0,
6354             .e_ident[EI_MAG1] = ELFMAG1,
6355             .e_ident[EI_MAG2] = ELFMAG2,
6356             .e_ident[EI_MAG3] = ELFMAG3,
6357             .e_ident[EI_CLASS] = ELF_CLASS,
6358             .e_ident[EI_DATA] = ELF_DATA,
6359             .e_ident[EI_VERSION] = EV_CURRENT,
6360             .e_type = ET_EXEC,
6361             .e_machine = ELF_HOST_MACHINE,
6362             .e_version = EV_CURRENT,
6363             .e_phoff = offsetof(struct ElfImage, phdr),
6364             .e_shoff = offsetof(struct ElfImage, shdr),
6365             .e_ehsize = sizeof(ElfW(Shdr)),
6366             .e_phentsize = sizeof(ElfW(Phdr)),
6367             .e_phnum = 1,
6368             .e_shentsize = sizeof(ElfW(Shdr)),
6369             .e_shnum = ARRAY_SIZE(img->shdr),
6370             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6371 #ifdef ELF_HOST_FLAGS
6372             .e_flags = ELF_HOST_FLAGS,
6373 #endif
6374 #ifdef ELF_OSABI
6375             .e_ident[EI_OSABI] = ELF_OSABI,
6376 #endif
6377         },
6378         .phdr = {
6379             .p_type = PT_LOAD,
6380             .p_flags = PF_X,
6381         },
6382         .shdr = {
6383             [0] = { .sh_type = SHT_NULL },
6384             /* Trick: The contents of code_gen_buffer are not present in
6385                this fake ELF file; that got allocated elsewhere.  Therefore
6386                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6387                will not look for contents.  We can record any address.  */
6388             [1] = { /* .text */
6389                 .sh_type = SHT_NOBITS,
6390                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6391             },
6392             [2] = { /* .debug_info */
6393                 .sh_type = SHT_PROGBITS,
6394                 .sh_offset = offsetof(struct ElfImage, di),
6395                 .sh_size = sizeof(struct DebugInfo),
6396             },
6397             [3] = { /* .debug_abbrev */
6398                 .sh_type = SHT_PROGBITS,
6399                 .sh_offset = offsetof(struct ElfImage, da),
6400                 .sh_size = sizeof(img->da),
6401             },
6402             [4] = { /* .debug_frame */
6403                 .sh_type = SHT_PROGBITS,
6404                 .sh_offset = sizeof(struct ElfImage),
6405             },
6406             [5] = { /* .symtab */
6407                 .sh_type = SHT_SYMTAB,
6408                 .sh_offset = offsetof(struct ElfImage, sym),
6409                 .sh_size = sizeof(img->sym),
6410                 .sh_info = 1,
6411                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6412                 .sh_entsize = sizeof(ElfW(Sym)),
6413             },
6414             [6] = { /* .strtab */
6415                 .sh_type = SHT_STRTAB,
6416                 .sh_offset = offsetof(struct ElfImage, str),
6417                 .sh_size = sizeof(img->str),
6418             }
6419         },
6420         .sym = {
6421             [1] = { /* code_gen_buffer */
6422                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6423                 .st_shndx = 1,
6424             }
6425         },
6426         .di = {
6427             .len = sizeof(struct DebugInfo) - 4,
6428             .version = 2,
6429             .ptr_size = sizeof(void *),
6430             .cu_die = 1,
6431             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6432             .fn_die = 2,
6433             .fn_name = "code_gen_buffer"
6434         },
6435         .da = {
6436             1,          /* abbrev number (the cu) */
6437             0x11, 1,    /* DW_TAG_compile_unit, has children */
6438             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6439             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6440             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6441             0, 0,       /* end of abbrev */
6442             2,          /* abbrev number (the fn) */
6443             0x2e, 0,    /* DW_TAG_subprogram, no children */
6444             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6445             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6446             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6447             0, 0,       /* end of abbrev */
6448             0           /* no more abbrev */
6449         },
6450         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6451                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6452     };
6453 
6454     /* We only need a single jit entry; statically allocate it.  */
6455     static struct jit_code_entry one_entry;
6456 
6457     uintptr_t buf = (uintptr_t)buf_ptr;
6458     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6459     DebugFrameHeader *dfh;
6460 
6461     img = g_malloc(img_size);
6462     *img = img_template;
6463 
6464     img->phdr.p_vaddr = buf;
6465     img->phdr.p_paddr = buf;
6466     img->phdr.p_memsz = buf_size;
6467 
6468     img->shdr[1].sh_name = find_string(img->str, ".text");
6469     img->shdr[1].sh_addr = buf;
6470     img->shdr[1].sh_size = buf_size;
6471 
6472     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6473     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6474 
6475     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6476     img->shdr[4].sh_size = debug_frame_size;
6477 
6478     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6479     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6480 
6481     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6482     img->sym[1].st_value = buf;
6483     img->sym[1].st_size = buf_size;
6484 
6485     img->di.cu_low_pc = buf;
6486     img->di.cu_high_pc = buf + buf_size;
6487     img->di.fn_low_pc = buf;
6488     img->di.fn_high_pc = buf + buf_size;
6489 
6490     dfh = (DebugFrameHeader *)(img + 1);
6491     memcpy(dfh, debug_frame, debug_frame_size);
6492     dfh->fde.func_start = buf;
6493     dfh->fde.func_len = buf_size;
6494 
6495 #ifdef DEBUG_JIT
6496     /* Enable this block to be able to debug the ELF image file creation.
6497        One can use readelf, objdump, or other inspection utilities.  */
6498     {
6499         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6500         FILE *f = fopen(jit, "w+b");
6501         if (f) {
6502             if (fwrite(img, img_size, 1, f) != img_size) {
6503                 /* Avoid stupid unused return value warning for fwrite.  */
6504             }
6505             fclose(f);
6506         }
6507     }
6508 #endif
6509 
6510     one_entry.symfile_addr = img;
6511     one_entry.symfile_size = img_size;
6512 
6513     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6514     __jit_debug_descriptor.relevant_entry = &one_entry;
6515     __jit_debug_descriptor.first_entry = &one_entry;
6516     __jit_debug_register_code();
6517 }
6518 #else
6519 /* No support for the feature.  Provide the entry point expected by exec.c,
6520    and implement the internal function we declared earlier.  */
6521 
6522 static void tcg_register_jit_int(const void *buf, size_t size,
6523                                  const void *debug_frame,
6524                                  size_t debug_frame_size)
6525 {
6526 }
6527 
6528 void tcg_register_jit(const void *buf, size_t buf_size)
6529 {
6530 }
6531 #endif /* ELF_HOST_MACHINE */
6532 
6533 #if !TCG_TARGET_MAYBE_vec
6534 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6535 {
6536     g_assert_not_reached();
6537 }
6538 #endif
6539