xref: /openbmc/qemu/tcg/tcg.c (revision 6e510855)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/tcg-op-common.h"
40 
41 #if UINTPTR_MAX == UINT32_MAX
42 # define ELF_CLASS  ELFCLASS32
43 #else
44 # define ELF_CLASS  ELFCLASS64
45 #endif
46 #if HOST_BIG_ENDIAN
47 # define ELF_DATA   ELFDATA2MSB
48 #else
49 # define ELF_DATA   ELFDATA2LSB
50 #endif
51 
52 #include "elf.h"
53 #include "exec/log.h"
54 #include "tcg/tcg-ldst.h"
55 #include "tcg/tcg-temp-internal.h"
56 #include "tcg-internal.h"
57 #include "accel/tcg/perf.h"
58 #ifdef CONFIG_USER_ONLY
59 #include "exec/user/guest-base.h"
60 #endif
61 
62 /* Forward declarations for functions declared in tcg-target.c.inc and
63    used here. */
64 static void tcg_target_init(TCGContext *s);
65 static void tcg_target_qemu_prologue(TCGContext *s);
66 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
67                         intptr_t value, intptr_t addend);
68 
69 /* The CIE and FDE header definitions will be common to all hosts.  */
70 typedef struct {
71     uint32_t len __attribute__((aligned((sizeof(void *)))));
72     uint32_t id;
73     uint8_t version;
74     char augmentation[1];
75     uint8_t code_align;
76     uint8_t data_align;
77     uint8_t return_column;
78 } DebugFrameCIE;
79 
80 typedef struct QEMU_PACKED {
81     uint32_t len __attribute__((aligned((sizeof(void *)))));
82     uint32_t cie_offset;
83     uintptr_t func_start;
84     uintptr_t func_len;
85 } DebugFrameFDEHeader;
86 
87 typedef struct QEMU_PACKED {
88     DebugFrameCIE cie;
89     DebugFrameFDEHeader fde;
90 } DebugFrameHeader;
91 
92 typedef struct TCGLabelQemuLdst {
93     bool is_ld;             /* qemu_ld: true, qemu_st: false */
94     MemOpIdx oi;
95     TCGType type;           /* result type of a load */
96     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
97     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
98     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
99     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
100     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
101     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
102     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
103 } TCGLabelQemuLdst;
104 
105 static void tcg_register_jit_int(const void *buf, size_t size,
106                                  const void *debug_frame,
107                                  size_t debug_frame_size)
108     __attribute__((unused));
109 
110 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
111 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
112                        intptr_t arg2);
113 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
114 static void tcg_out_movi(TCGContext *s, TCGType type,
115                          TCGReg ret, tcg_target_long arg);
116 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
117 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
118 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
126 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
127 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
128 static void tcg_out_goto_tb(TCGContext *s, int which);
129 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
130                        const TCGArg args[TCG_MAX_OP_ARGS],
131                        const int const_args[TCG_MAX_OP_ARGS]);
132 #if TCG_TARGET_MAYBE_vec
133 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
134                             TCGReg dst, TCGReg src);
135 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
136                              TCGReg dst, TCGReg base, intptr_t offset);
137 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, int64_t arg);
139 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                            unsigned vecl, unsigned vece,
141                            const TCGArg args[TCG_MAX_OP_ARGS],
142                            const int const_args[TCG_MAX_OP_ARGS]);
143 #else
144 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
145                                    TCGReg dst, TCGReg src)
146 {
147     g_assert_not_reached();
148 }
149 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
150                                     TCGReg dst, TCGReg base, intptr_t offset)
151 {
152     g_assert_not_reached();
153 }
154 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
155                                     TCGReg dst, int64_t arg)
156 {
157     g_assert_not_reached();
158 }
159 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
160                                   unsigned vecl, unsigned vece,
161                                   const TCGArg args[TCG_MAX_OP_ARGS],
162                                   const int const_args[TCG_MAX_OP_ARGS])
163 {
164     g_assert_not_reached();
165 }
166 #endif
167 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
168                        intptr_t arg2);
169 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
170                         TCGReg base, intptr_t ofs);
171 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
172                          const TCGHelperInfo *info);
173 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
174 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
175 #ifdef TCG_TARGET_NEED_LDST_LABELS
176 static int tcg_out_ldst_finalize(TCGContext *s);
177 #endif
178 
179 typedef struct TCGLdstHelperParam {
180     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
181     unsigned ntmp;
182     int tmp[3];
183 } TCGLdstHelperParam;
184 
185 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
186                                    const TCGLdstHelperParam *p)
187     __attribute__((unused));
188 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
189                                   bool load_sign, const TCGLdstHelperParam *p)
190     __attribute__((unused));
191 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
192                                    const TCGLdstHelperParam *p)
193     __attribute__((unused));
194 
195 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
196     [MO_UB] = helper_ldub_mmu,
197     [MO_SB] = helper_ldsb_mmu,
198     [MO_UW] = helper_lduw_mmu,
199     [MO_SW] = helper_ldsw_mmu,
200     [MO_UL] = helper_ldul_mmu,
201     [MO_UQ] = helper_ldq_mmu,
202 #if TCG_TARGET_REG_BITS == 64
203     [MO_SL] = helper_ldsl_mmu,
204     [MO_128] = helper_ld16_mmu,
205 #endif
206 };
207 
208 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
209     [MO_8]  = helper_stb_mmu,
210     [MO_16] = helper_stw_mmu,
211     [MO_32] = helper_stl_mmu,
212     [MO_64] = helper_stq_mmu,
213 #if TCG_TARGET_REG_BITS == 64
214     [MO_128] = helper_st16_mmu,
215 #endif
216 };
217 
218 typedef struct {
219     MemOp atom;   /* lg2 bits of atomicity required */
220     MemOp align;  /* lg2 bits of alignment to use */
221 } TCGAtomAlign;
222 
223 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
224                                            MemOp host_atom, bool allow_two_ops)
225     __attribute__((unused));
226 
227 TCGContext tcg_init_ctx;
228 __thread TCGContext *tcg_ctx;
229 
230 TCGContext **tcg_ctxs;
231 unsigned int tcg_cur_ctxs;
232 unsigned int tcg_max_ctxs;
233 TCGv_env cpu_env = 0;
234 const void *tcg_code_gen_epilogue;
235 uintptr_t tcg_splitwx_diff;
236 
237 #ifndef CONFIG_TCG_INTERPRETER
238 tcg_prologue_fn *tcg_qemu_tb_exec;
239 #endif
240 
241 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
242 static TCGRegSet tcg_target_call_clobber_regs;
243 
244 #if TCG_TARGET_INSN_UNIT_SIZE == 1
245 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
246 {
247     *s->code_ptr++ = v;
248 }
249 
250 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
251                                                       uint8_t v)
252 {
253     *p = v;
254 }
255 #endif
256 
257 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
258 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
259 {
260     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
261         *s->code_ptr++ = v;
262     } else {
263         tcg_insn_unit *p = s->code_ptr;
264         memcpy(p, &v, sizeof(v));
265         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
266     }
267 }
268 
269 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
270                                                        uint16_t v)
271 {
272     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
273         *p = v;
274     } else {
275         memcpy(p, &v, sizeof(v));
276     }
277 }
278 #endif
279 
280 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
281 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
282 {
283     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
284         *s->code_ptr++ = v;
285     } else {
286         tcg_insn_unit *p = s->code_ptr;
287         memcpy(p, &v, sizeof(v));
288         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
289     }
290 }
291 
292 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
293                                                        uint32_t v)
294 {
295     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
296         *p = v;
297     } else {
298         memcpy(p, &v, sizeof(v));
299     }
300 }
301 #endif
302 
303 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
304 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
305 {
306     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
307         *s->code_ptr++ = v;
308     } else {
309         tcg_insn_unit *p = s->code_ptr;
310         memcpy(p, &v, sizeof(v));
311         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
312     }
313 }
314 
315 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
316                                                        uint64_t v)
317 {
318     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
319         *p = v;
320     } else {
321         memcpy(p, &v, sizeof(v));
322     }
323 }
324 #endif
325 
326 /* label relocation processing */
327 
328 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
329                           TCGLabel *l, intptr_t addend)
330 {
331     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
332 
333     r->type = type;
334     r->ptr = code_ptr;
335     r->addend = addend;
336     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
337 }
338 
339 static void tcg_out_label(TCGContext *s, TCGLabel *l)
340 {
341     tcg_debug_assert(!l->has_value);
342     l->has_value = 1;
343     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
344 }
345 
346 TCGLabel *gen_new_label(void)
347 {
348     TCGContext *s = tcg_ctx;
349     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
350 
351     memset(l, 0, sizeof(TCGLabel));
352     l->id = s->nb_labels++;
353     QSIMPLEQ_INIT(&l->branches);
354     QSIMPLEQ_INIT(&l->relocs);
355 
356     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
357 
358     return l;
359 }
360 
361 static bool tcg_resolve_relocs(TCGContext *s)
362 {
363     TCGLabel *l;
364 
365     QSIMPLEQ_FOREACH(l, &s->labels, next) {
366         TCGRelocation *r;
367         uintptr_t value = l->u.value;
368 
369         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
370             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
371                 return false;
372             }
373         }
374     }
375     return true;
376 }
377 
378 static void set_jmp_reset_offset(TCGContext *s, int which)
379 {
380     /*
381      * We will check for overflow at the end of the opcode loop in
382      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
383      */
384     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
385 }
386 
387 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
388 {
389     /*
390      * We will check for overflow at the end of the opcode loop in
391      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
392      */
393     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
394 }
395 
396 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
397 {
398     /*
399      * Return the read-execute version of the pointer, for the benefit
400      * of any pc-relative addressing mode.
401      */
402     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
403 }
404 
405 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
406 static int tlb_mask_table_ofs(TCGContext *s, int which)
407 {
408     return s->tlb_fast_offset + which * sizeof(CPUTLBDescFast);
409 }
410 #endif
411 
412 /* Signal overflow, starting over with fewer guest insns. */
413 static G_NORETURN
414 void tcg_raise_tb_overflow(TCGContext *s)
415 {
416     siglongjmp(s->jmp_trans, -2);
417 }
418 
419 /*
420  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
421  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
422  *
423  * However, tcg_out_helper_load_slots reuses this field to hold an
424  * argument slot number (which may designate a argument register or an
425  * argument stack slot), converting to TCGReg once all arguments that
426  * are destined for the stack are processed.
427  */
428 typedef struct TCGMovExtend {
429     unsigned dst;
430     TCGReg src;
431     TCGType dst_type;
432     TCGType src_type;
433     MemOp src_ext;
434 } TCGMovExtend;
435 
436 /**
437  * tcg_out_movext -- move and extend
438  * @s: tcg context
439  * @dst_type: integral type for destination
440  * @dst: destination register
441  * @src_type: integral type for source
442  * @src_ext: extension to apply to source
443  * @src: source register
444  *
445  * Move or extend @src into @dst, depending on @src_ext and the types.
446  */
447 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
448                            TCGType src_type, MemOp src_ext, TCGReg src)
449 {
450     switch (src_ext) {
451     case MO_UB:
452         tcg_out_ext8u(s, dst, src);
453         break;
454     case MO_SB:
455         tcg_out_ext8s(s, dst_type, dst, src);
456         break;
457     case MO_UW:
458         tcg_out_ext16u(s, dst, src);
459         break;
460     case MO_SW:
461         tcg_out_ext16s(s, dst_type, dst, src);
462         break;
463     case MO_UL:
464     case MO_SL:
465         if (dst_type == TCG_TYPE_I32) {
466             if (src_type == TCG_TYPE_I32) {
467                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
468             } else {
469                 tcg_out_extrl_i64_i32(s, dst, src);
470             }
471         } else if (src_type == TCG_TYPE_I32) {
472             if (src_ext & MO_SIGN) {
473                 tcg_out_exts_i32_i64(s, dst, src);
474             } else {
475                 tcg_out_extu_i32_i64(s, dst, src);
476             }
477         } else {
478             if (src_ext & MO_SIGN) {
479                 tcg_out_ext32s(s, dst, src);
480             } else {
481                 tcg_out_ext32u(s, dst, src);
482             }
483         }
484         break;
485     case MO_UQ:
486         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
487         if (dst_type == TCG_TYPE_I32) {
488             tcg_out_extrl_i64_i32(s, dst, src);
489         } else {
490             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
491         }
492         break;
493     default:
494         g_assert_not_reached();
495     }
496 }
497 
498 /* Minor variations on a theme, using a structure. */
499 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
500                                     TCGReg src)
501 {
502     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
503 }
504 
505 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
506 {
507     tcg_out_movext1_new_src(s, i, i->src);
508 }
509 
510 /**
511  * tcg_out_movext2 -- move and extend two pair
512  * @s: tcg context
513  * @i1: first move description
514  * @i2: second move description
515  * @scratch: temporary register, or -1 for none
516  *
517  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
518  * between the sources and destinations.
519  */
520 
521 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
522                             const TCGMovExtend *i2, int scratch)
523 {
524     TCGReg src1 = i1->src;
525     TCGReg src2 = i2->src;
526 
527     if (i1->dst != src2) {
528         tcg_out_movext1(s, i1);
529         tcg_out_movext1(s, i2);
530         return;
531     }
532     if (i2->dst == src1) {
533         TCGType src1_type = i1->src_type;
534         TCGType src2_type = i2->src_type;
535 
536         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
537             /* The data is now in the correct registers, now extend. */
538             src1 = i2->src;
539             src2 = i1->src;
540         } else {
541             tcg_debug_assert(scratch >= 0);
542             tcg_out_mov(s, src1_type, scratch, src1);
543             src1 = scratch;
544         }
545     }
546     tcg_out_movext1_new_src(s, i2, src2);
547     tcg_out_movext1_new_src(s, i1, src1);
548 }
549 
550 /**
551  * tcg_out_movext3 -- move and extend three pair
552  * @s: tcg context
553  * @i1: first move description
554  * @i2: second move description
555  * @i3: third move description
556  * @scratch: temporary register, or -1 for none
557  *
558  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
559  * between the sources and destinations.
560  */
561 
562 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
563                             const TCGMovExtend *i2, const TCGMovExtend *i3,
564                             int scratch)
565 {
566     TCGReg src1 = i1->src;
567     TCGReg src2 = i2->src;
568     TCGReg src3 = i3->src;
569 
570     if (i1->dst != src2 && i1->dst != src3) {
571         tcg_out_movext1(s, i1);
572         tcg_out_movext2(s, i2, i3, scratch);
573         return;
574     }
575     if (i2->dst != src1 && i2->dst != src3) {
576         tcg_out_movext1(s, i2);
577         tcg_out_movext2(s, i1, i3, scratch);
578         return;
579     }
580     if (i3->dst != src1 && i3->dst != src2) {
581         tcg_out_movext1(s, i3);
582         tcg_out_movext2(s, i1, i2, scratch);
583         return;
584     }
585 
586     /*
587      * There is a cycle.  Since there are only 3 nodes, the cycle is
588      * either "clockwise" or "anti-clockwise", and can be solved with
589      * a single scratch or two xchg.
590      */
591     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
592         /* "Clockwise" */
593         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
594             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
595             /* The data is now in the correct registers, now extend. */
596             tcg_out_movext1_new_src(s, i1, i1->dst);
597             tcg_out_movext1_new_src(s, i2, i2->dst);
598             tcg_out_movext1_new_src(s, i3, i3->dst);
599         } else {
600             tcg_debug_assert(scratch >= 0);
601             tcg_out_mov(s, i1->src_type, scratch, src1);
602             tcg_out_movext1(s, i3);
603             tcg_out_movext1(s, i2);
604             tcg_out_movext1_new_src(s, i1, scratch);
605         }
606     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
607         /* "Anti-clockwise" */
608         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
609             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
610             /* The data is now in the correct registers, now extend. */
611             tcg_out_movext1_new_src(s, i1, i1->dst);
612             tcg_out_movext1_new_src(s, i2, i2->dst);
613             tcg_out_movext1_new_src(s, i3, i3->dst);
614         } else {
615             tcg_debug_assert(scratch >= 0);
616             tcg_out_mov(s, i1->src_type, scratch, src1);
617             tcg_out_movext1(s, i2);
618             tcg_out_movext1(s, i3);
619             tcg_out_movext1_new_src(s, i1, scratch);
620         }
621     } else {
622         g_assert_not_reached();
623     }
624 }
625 
626 #define C_PFX1(P, A)                    P##A
627 #define C_PFX2(P, A, B)                 P##A##_##B
628 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
629 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
630 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
631 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
632 
633 /* Define an enumeration for the various combinations. */
634 
635 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
636 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
637 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
638 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
639 
640 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
641 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
642 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
643 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
644 
645 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
646 
647 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
648 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
649 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
650 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
651 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
652 
653 typedef enum {
654 #include "tcg-target-con-set.h"
655 } TCGConstraintSetIndex;
656 
657 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
658 
659 #undef C_O0_I1
660 #undef C_O0_I2
661 #undef C_O0_I3
662 #undef C_O0_I4
663 #undef C_O1_I1
664 #undef C_O1_I2
665 #undef C_O1_I3
666 #undef C_O1_I4
667 #undef C_N1_I2
668 #undef C_O2_I1
669 #undef C_O2_I2
670 #undef C_O2_I3
671 #undef C_O2_I4
672 #undef C_N1_O1_I4
673 
674 /* Put all of the constraint sets into an array, indexed by the enum. */
675 
676 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
677 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
678 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
679 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
680 
681 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
682 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
683 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
684 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
685 
686 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
687 
688 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
689 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
690 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
691 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
692 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
693 
694 static const TCGTargetOpDef constraint_sets[] = {
695 #include "tcg-target-con-set.h"
696 };
697 
698 
699 #undef C_O0_I1
700 #undef C_O0_I2
701 #undef C_O0_I3
702 #undef C_O0_I4
703 #undef C_O1_I1
704 #undef C_O1_I2
705 #undef C_O1_I3
706 #undef C_O1_I4
707 #undef C_N1_I2
708 #undef C_O2_I1
709 #undef C_O2_I2
710 #undef C_O2_I3
711 #undef C_O2_I4
712 #undef C_N1_O1_I4
713 
714 /* Expand the enumerator to be returned from tcg_target_op_def(). */
715 
716 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
717 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
718 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
719 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
720 
721 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
722 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
723 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
724 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
725 
726 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
727 
728 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
729 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
730 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
731 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
732 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
733 
734 #include "tcg-target.c.inc"
735 
736 static void alloc_tcg_plugin_context(TCGContext *s)
737 {
738 #ifdef CONFIG_PLUGIN
739     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
740     s->plugin_tb->insns =
741         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
742 #endif
743 }
744 
745 /*
746  * All TCG threads except the parent (i.e. the one that called tcg_context_init
747  * and registered the target's TCG globals) must register with this function
748  * before initiating translation.
749  *
750  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
751  * of tcg_region_init() for the reasoning behind this.
752  *
753  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
754  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
755  * is not used anymore for translation once this function is called.
756  *
757  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
758  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
759  */
760 #ifdef CONFIG_USER_ONLY
761 void tcg_register_thread(void)
762 {
763     tcg_ctx = &tcg_init_ctx;
764 }
765 #else
766 void tcg_register_thread(void)
767 {
768     TCGContext *s = g_malloc(sizeof(*s));
769     unsigned int i, n;
770 
771     *s = tcg_init_ctx;
772 
773     /* Relink mem_base.  */
774     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
775         if (tcg_init_ctx.temps[i].mem_base) {
776             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
777             tcg_debug_assert(b >= 0 && b < n);
778             s->temps[i].mem_base = &s->temps[b];
779         }
780     }
781 
782     /* Claim an entry in tcg_ctxs */
783     n = qatomic_fetch_inc(&tcg_cur_ctxs);
784     g_assert(n < tcg_max_ctxs);
785     qatomic_set(&tcg_ctxs[n], s);
786 
787     if (n > 0) {
788         alloc_tcg_plugin_context(s);
789         tcg_region_initial_alloc(s);
790     }
791 
792     tcg_ctx = s;
793 }
794 #endif /* !CONFIG_USER_ONLY */
795 
796 /* pool based memory allocation */
797 void *tcg_malloc_internal(TCGContext *s, int size)
798 {
799     TCGPool *p;
800     int pool_size;
801 
802     if (size > TCG_POOL_CHUNK_SIZE) {
803         /* big malloc: insert a new pool (XXX: could optimize) */
804         p = g_malloc(sizeof(TCGPool) + size);
805         p->size = size;
806         p->next = s->pool_first_large;
807         s->pool_first_large = p;
808         return p->data;
809     } else {
810         p = s->pool_current;
811         if (!p) {
812             p = s->pool_first;
813             if (!p)
814                 goto new_pool;
815         } else {
816             if (!p->next) {
817             new_pool:
818                 pool_size = TCG_POOL_CHUNK_SIZE;
819                 p = g_malloc(sizeof(TCGPool) + pool_size);
820                 p->size = pool_size;
821                 p->next = NULL;
822                 if (s->pool_current) {
823                     s->pool_current->next = p;
824                 } else {
825                     s->pool_first = p;
826                 }
827             } else {
828                 p = p->next;
829             }
830         }
831     }
832     s->pool_current = p;
833     s->pool_cur = p->data + size;
834     s->pool_end = p->data + p->size;
835     return p->data;
836 }
837 
838 void tcg_pool_reset(TCGContext *s)
839 {
840     TCGPool *p, *t;
841     for (p = s->pool_first_large; p; p = t) {
842         t = p->next;
843         g_free(p);
844     }
845     s->pool_first_large = NULL;
846     s->pool_cur = s->pool_end = NULL;
847     s->pool_current = NULL;
848 }
849 
850 /*
851  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
852  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
853  * We only use these for layout in tcg_out_ld_helper_ret and
854  * tcg_out_st_helper_args, and share them between several of
855  * the helpers, with the end result that it's easier to build manually.
856  */
857 
858 #if TCG_TARGET_REG_BITS == 32
859 # define dh_typecode_ttl  dh_typecode_i32
860 #else
861 # define dh_typecode_ttl  dh_typecode_i64
862 #endif
863 
864 static TCGHelperInfo info_helper_ld32_mmu = {
865     .flags = TCG_CALL_NO_WG,
866     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
867               | dh_typemask(env, 1)
868               | dh_typemask(i64, 2)  /* uint64_t addr */
869               | dh_typemask(i32, 3)  /* unsigned oi */
870               | dh_typemask(ptr, 4)  /* uintptr_t ra */
871 };
872 
873 static TCGHelperInfo info_helper_ld64_mmu = {
874     .flags = TCG_CALL_NO_WG,
875     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
876               | dh_typemask(env, 1)
877               | dh_typemask(i64, 2)  /* uint64_t addr */
878               | dh_typemask(i32, 3)  /* unsigned oi */
879               | dh_typemask(ptr, 4)  /* uintptr_t ra */
880 };
881 
882 static TCGHelperInfo info_helper_ld128_mmu = {
883     .flags = TCG_CALL_NO_WG,
884     .typemask = dh_typemask(i128, 0) /* return Int128 */
885               | dh_typemask(env, 1)
886               | dh_typemask(i64, 2)  /* uint64_t addr */
887               | dh_typemask(i32, 3)  /* unsigned oi */
888               | dh_typemask(ptr, 4)  /* uintptr_t ra */
889 };
890 
891 static TCGHelperInfo info_helper_st32_mmu = {
892     .flags = TCG_CALL_NO_WG,
893     .typemask = dh_typemask(void, 0)
894               | dh_typemask(env, 1)
895               | dh_typemask(i64, 2)  /* uint64_t addr */
896               | dh_typemask(i32, 3)  /* uint32_t data */
897               | dh_typemask(i32, 4)  /* unsigned oi */
898               | dh_typemask(ptr, 5)  /* uintptr_t ra */
899 };
900 
901 static TCGHelperInfo info_helper_st64_mmu = {
902     .flags = TCG_CALL_NO_WG,
903     .typemask = dh_typemask(void, 0)
904               | dh_typemask(env, 1)
905               | dh_typemask(i64, 2)  /* uint64_t addr */
906               | dh_typemask(i64, 3)  /* uint64_t data */
907               | dh_typemask(i32, 4)  /* unsigned oi */
908               | dh_typemask(ptr, 5)  /* uintptr_t ra */
909 };
910 
911 static TCGHelperInfo info_helper_st128_mmu = {
912     .flags = TCG_CALL_NO_WG,
913     .typemask = dh_typemask(void, 0)
914               | dh_typemask(env, 1)
915               | dh_typemask(i64, 2)  /* uint64_t addr */
916               | dh_typemask(i128, 3) /* Int128 data */
917               | dh_typemask(i32, 4)  /* unsigned oi */
918               | dh_typemask(ptr, 5)  /* uintptr_t ra */
919 };
920 
921 #ifdef CONFIG_TCG_INTERPRETER
922 static ffi_type *typecode_to_ffi(int argmask)
923 {
924     /*
925      * libffi does not support __int128_t, so we have forced Int128
926      * to use the structure definition instead of the builtin type.
927      */
928     static ffi_type *ffi_type_i128_elements[3] = {
929         &ffi_type_uint64,
930         &ffi_type_uint64,
931         NULL
932     };
933     static ffi_type ffi_type_i128 = {
934         .size = 16,
935         .alignment = __alignof__(Int128),
936         .type = FFI_TYPE_STRUCT,
937         .elements = ffi_type_i128_elements,
938     };
939 
940     switch (argmask) {
941     case dh_typecode_void:
942         return &ffi_type_void;
943     case dh_typecode_i32:
944         return &ffi_type_uint32;
945     case dh_typecode_s32:
946         return &ffi_type_sint32;
947     case dh_typecode_i64:
948         return &ffi_type_uint64;
949     case dh_typecode_s64:
950         return &ffi_type_sint64;
951     case dh_typecode_ptr:
952         return &ffi_type_pointer;
953     case dh_typecode_i128:
954         return &ffi_type_i128;
955     }
956     g_assert_not_reached();
957 }
958 
959 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
960 {
961     unsigned typemask = info->typemask;
962     struct {
963         ffi_cif cif;
964         ffi_type *args[];
965     } *ca;
966     ffi_status status;
967     int nargs;
968 
969     /* Ignoring the return type, find the last non-zero field. */
970     nargs = 32 - clz32(typemask >> 3);
971     nargs = DIV_ROUND_UP(nargs, 3);
972     assert(nargs <= MAX_CALL_IARGS);
973 
974     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
975     ca->cif.rtype = typecode_to_ffi(typemask & 7);
976     ca->cif.nargs = nargs;
977 
978     if (nargs != 0) {
979         ca->cif.arg_types = ca->args;
980         for (int j = 0; j < nargs; ++j) {
981             int typecode = extract32(typemask, (j + 1) * 3, 3);
982             ca->args[j] = typecode_to_ffi(typecode);
983         }
984     }
985 
986     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
987                           ca->cif.rtype, ca->cif.arg_types);
988     assert(status == FFI_OK);
989 
990     return &ca->cif;
991 }
992 
993 #define HELPER_INFO_INIT(I)      (&(I)->cif)
994 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
995 #else
996 #define HELPER_INFO_INIT(I)      (&(I)->init)
997 #define HELPER_INFO_INIT_VAL(I)  1
998 #endif /* CONFIG_TCG_INTERPRETER */
999 
1000 static inline bool arg_slot_reg_p(unsigned arg_slot)
1001 {
1002     /*
1003      * Split the sizeof away from the comparison to avoid Werror from
1004      * "unsigned < 0 is always false", when iarg_regs is empty.
1005      */
1006     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1007     return arg_slot < nreg;
1008 }
1009 
1010 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1011 {
1012     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1013     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1014 
1015     tcg_debug_assert(stk_slot < max);
1016     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1017 }
1018 
1019 typedef struct TCGCumulativeArgs {
1020     int arg_idx;                /* tcg_gen_callN args[] */
1021     int info_in_idx;            /* TCGHelperInfo in[] */
1022     int arg_slot;               /* regs+stack slot */
1023     int ref_slot;               /* stack slots for references */
1024 } TCGCumulativeArgs;
1025 
1026 static void layout_arg_even(TCGCumulativeArgs *cum)
1027 {
1028     cum->arg_slot += cum->arg_slot & 1;
1029 }
1030 
1031 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1032                          TCGCallArgumentKind kind)
1033 {
1034     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1035 
1036     *loc = (TCGCallArgumentLoc){
1037         .kind = kind,
1038         .arg_idx = cum->arg_idx,
1039         .arg_slot = cum->arg_slot,
1040     };
1041     cum->info_in_idx++;
1042     cum->arg_slot++;
1043 }
1044 
1045 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1046                                 TCGHelperInfo *info, int n)
1047 {
1048     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1049 
1050     for (int i = 0; i < n; ++i) {
1051         /* Layout all using the same arg_idx, adjusting the subindex. */
1052         loc[i] = (TCGCallArgumentLoc){
1053             .kind = TCG_CALL_ARG_NORMAL,
1054             .arg_idx = cum->arg_idx,
1055             .tmp_subindex = i,
1056             .arg_slot = cum->arg_slot + i,
1057         };
1058     }
1059     cum->info_in_idx += n;
1060     cum->arg_slot += n;
1061 }
1062 
1063 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1064 {
1065     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1066     int n = 128 / TCG_TARGET_REG_BITS;
1067 
1068     /* The first subindex carries the pointer. */
1069     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1070 
1071     /*
1072      * The callee is allowed to clobber memory associated with
1073      * structure pass by-reference.  Therefore we must make copies.
1074      * Allocate space from "ref_slot", which will be adjusted to
1075      * follow the parameters on the stack.
1076      */
1077     loc[0].ref_slot = cum->ref_slot;
1078 
1079     /*
1080      * Subsequent words also go into the reference slot, but
1081      * do not accumulate into the regular arguments.
1082      */
1083     for (int i = 1; i < n; ++i) {
1084         loc[i] = (TCGCallArgumentLoc){
1085             .kind = TCG_CALL_ARG_BY_REF_N,
1086             .arg_idx = cum->arg_idx,
1087             .tmp_subindex = i,
1088             .ref_slot = cum->ref_slot + i,
1089         };
1090     }
1091     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1092     cum->ref_slot += n;
1093 }
1094 
1095 static void init_call_layout(TCGHelperInfo *info)
1096 {
1097     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1098     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1099     unsigned typemask = info->typemask;
1100     unsigned typecode;
1101     TCGCumulativeArgs cum = { };
1102 
1103     /*
1104      * Parse and place any function return value.
1105      */
1106     typecode = typemask & 7;
1107     switch (typecode) {
1108     case dh_typecode_void:
1109         info->nr_out = 0;
1110         break;
1111     case dh_typecode_i32:
1112     case dh_typecode_s32:
1113     case dh_typecode_ptr:
1114         info->nr_out = 1;
1115         info->out_kind = TCG_CALL_RET_NORMAL;
1116         break;
1117     case dh_typecode_i64:
1118     case dh_typecode_s64:
1119         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1120         info->out_kind = TCG_CALL_RET_NORMAL;
1121         /* Query the last register now to trigger any assert early. */
1122         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1123         break;
1124     case dh_typecode_i128:
1125         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1126         info->out_kind = TCG_TARGET_CALL_RET_I128;
1127         switch (TCG_TARGET_CALL_RET_I128) {
1128         case TCG_CALL_RET_NORMAL:
1129             /* Query the last register now to trigger any assert early. */
1130             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1131             break;
1132         case TCG_CALL_RET_BY_VEC:
1133             /* Query the single register now to trigger any assert early. */
1134             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1135             break;
1136         case TCG_CALL_RET_BY_REF:
1137             /*
1138              * Allocate the first argument to the output.
1139              * We don't need to store this anywhere, just make it
1140              * unavailable for use in the input loop below.
1141              */
1142             cum.arg_slot = 1;
1143             break;
1144         default:
1145             qemu_build_not_reached();
1146         }
1147         break;
1148     default:
1149         g_assert_not_reached();
1150     }
1151 
1152     /*
1153      * Parse and place function arguments.
1154      */
1155     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1156         TCGCallArgumentKind kind;
1157         TCGType type;
1158 
1159         typecode = typemask & 7;
1160         switch (typecode) {
1161         case dh_typecode_i32:
1162         case dh_typecode_s32:
1163             type = TCG_TYPE_I32;
1164             break;
1165         case dh_typecode_i64:
1166         case dh_typecode_s64:
1167             type = TCG_TYPE_I64;
1168             break;
1169         case dh_typecode_ptr:
1170             type = TCG_TYPE_PTR;
1171             break;
1172         case dh_typecode_i128:
1173             type = TCG_TYPE_I128;
1174             break;
1175         default:
1176             g_assert_not_reached();
1177         }
1178 
1179         switch (type) {
1180         case TCG_TYPE_I32:
1181             switch (TCG_TARGET_CALL_ARG_I32) {
1182             case TCG_CALL_ARG_EVEN:
1183                 layout_arg_even(&cum);
1184                 /* fall through */
1185             case TCG_CALL_ARG_NORMAL:
1186                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1187                 break;
1188             case TCG_CALL_ARG_EXTEND:
1189                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1190                 layout_arg_1(&cum, info, kind);
1191                 break;
1192             default:
1193                 qemu_build_not_reached();
1194             }
1195             break;
1196 
1197         case TCG_TYPE_I64:
1198             switch (TCG_TARGET_CALL_ARG_I64) {
1199             case TCG_CALL_ARG_EVEN:
1200                 layout_arg_even(&cum);
1201                 /* fall through */
1202             case TCG_CALL_ARG_NORMAL:
1203                 if (TCG_TARGET_REG_BITS == 32) {
1204                     layout_arg_normal_n(&cum, info, 2);
1205                 } else {
1206                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1207                 }
1208                 break;
1209             default:
1210                 qemu_build_not_reached();
1211             }
1212             break;
1213 
1214         case TCG_TYPE_I128:
1215             switch (TCG_TARGET_CALL_ARG_I128) {
1216             case TCG_CALL_ARG_EVEN:
1217                 layout_arg_even(&cum);
1218                 /* fall through */
1219             case TCG_CALL_ARG_NORMAL:
1220                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1221                 break;
1222             case TCG_CALL_ARG_BY_REF:
1223                 layout_arg_by_ref(&cum, info);
1224                 break;
1225             default:
1226                 qemu_build_not_reached();
1227             }
1228             break;
1229 
1230         default:
1231             g_assert_not_reached();
1232         }
1233     }
1234     info->nr_in = cum.info_in_idx;
1235 
1236     /* Validate that we didn't overrun the input array. */
1237     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1238     /* Validate the backend has enough argument space. */
1239     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1240 
1241     /*
1242      * Relocate the "ref_slot" area to the end of the parameters.
1243      * Minimizing this stack offset helps code size for x86,
1244      * which has a signed 8-bit offset encoding.
1245      */
1246     if (cum.ref_slot != 0) {
1247         int ref_base = 0;
1248 
1249         if (cum.arg_slot > max_reg_slots) {
1250             int align = __alignof(Int128) / sizeof(tcg_target_long);
1251 
1252             ref_base = cum.arg_slot - max_reg_slots;
1253             if (align > 1) {
1254                 ref_base = ROUND_UP(ref_base, align);
1255             }
1256         }
1257         assert(ref_base + cum.ref_slot <= max_stk_slots);
1258         ref_base += max_reg_slots;
1259 
1260         if (ref_base != 0) {
1261             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1262                 TCGCallArgumentLoc *loc = &info->in[i];
1263                 switch (loc->kind) {
1264                 case TCG_CALL_ARG_BY_REF:
1265                 case TCG_CALL_ARG_BY_REF_N:
1266                     loc->ref_slot += ref_base;
1267                     break;
1268                 default:
1269                     break;
1270                 }
1271             }
1272         }
1273     }
1274 }
1275 
1276 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1277 static void process_op_defs(TCGContext *s);
1278 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1279                                             TCGReg reg, const char *name);
1280 
1281 static void tcg_context_init(unsigned max_cpus)
1282 {
1283     TCGContext *s = &tcg_init_ctx;
1284     int op, total_args, n, i;
1285     TCGOpDef *def;
1286     TCGArgConstraint *args_ct;
1287     TCGTemp *ts;
1288 
1289     memset(s, 0, sizeof(*s));
1290     s->nb_globals = 0;
1291 
1292     /* Count total number of arguments and allocate the corresponding
1293        space */
1294     total_args = 0;
1295     for(op = 0; op < NB_OPS; op++) {
1296         def = &tcg_op_defs[op];
1297         n = def->nb_iargs + def->nb_oargs;
1298         total_args += n;
1299     }
1300 
1301     args_ct = g_new0(TCGArgConstraint, total_args);
1302 
1303     for(op = 0; op < NB_OPS; op++) {
1304         def = &tcg_op_defs[op];
1305         def->args_ct = args_ct;
1306         n = def->nb_iargs + def->nb_oargs;
1307         args_ct += n;
1308     }
1309 
1310     init_call_layout(&info_helper_ld32_mmu);
1311     init_call_layout(&info_helper_ld64_mmu);
1312     init_call_layout(&info_helper_ld128_mmu);
1313     init_call_layout(&info_helper_st32_mmu);
1314     init_call_layout(&info_helper_st64_mmu);
1315     init_call_layout(&info_helper_st128_mmu);
1316 
1317     tcg_target_init(s);
1318     process_op_defs(s);
1319 
1320     /* Reverse the order of the saved registers, assuming they're all at
1321        the start of tcg_target_reg_alloc_order.  */
1322     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1323         int r = tcg_target_reg_alloc_order[n];
1324         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1325             break;
1326         }
1327     }
1328     for (i = 0; i < n; ++i) {
1329         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1330     }
1331     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1332         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1333     }
1334 
1335     alloc_tcg_plugin_context(s);
1336 
1337     tcg_ctx = s;
1338     /*
1339      * In user-mode we simply share the init context among threads, since we
1340      * use a single region. See the documentation tcg_region_init() for the
1341      * reasoning behind this.
1342      * In softmmu we will have at most max_cpus TCG threads.
1343      */
1344 #ifdef CONFIG_USER_ONLY
1345     tcg_ctxs = &tcg_ctx;
1346     tcg_cur_ctxs = 1;
1347     tcg_max_ctxs = 1;
1348 #else
1349     tcg_max_ctxs = max_cpus;
1350     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1351 #endif
1352 
1353     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1354     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1355     cpu_env = temp_tcgv_ptr(ts);
1356 }
1357 
1358 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1359 {
1360     tcg_context_init(max_cpus);
1361     tcg_region_init(tb_size, splitwx, max_cpus);
1362 }
1363 
1364 /*
1365  * Allocate TBs right before their corresponding translated code, making
1366  * sure that TBs and code are on different cache lines.
1367  */
1368 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1369 {
1370     uintptr_t align = qemu_icache_linesize;
1371     TranslationBlock *tb;
1372     void *next;
1373 
1374  retry:
1375     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1376     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1377 
1378     if (unlikely(next > s->code_gen_highwater)) {
1379         if (tcg_region_alloc(s)) {
1380             return NULL;
1381         }
1382         goto retry;
1383     }
1384     qatomic_set(&s->code_gen_ptr, next);
1385     s->data_gen_ptr = NULL;
1386     return tb;
1387 }
1388 
1389 void tcg_prologue_init(TCGContext *s)
1390 {
1391     size_t prologue_size;
1392 
1393     s->code_ptr = s->code_gen_ptr;
1394     s->code_buf = s->code_gen_ptr;
1395     s->data_gen_ptr = NULL;
1396 
1397 #ifndef CONFIG_TCG_INTERPRETER
1398     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1399 #endif
1400 
1401 #ifdef TCG_TARGET_NEED_POOL_LABELS
1402     s->pool_labels = NULL;
1403 #endif
1404 
1405     qemu_thread_jit_write();
1406     /* Generate the prologue.  */
1407     tcg_target_qemu_prologue(s);
1408 
1409 #ifdef TCG_TARGET_NEED_POOL_LABELS
1410     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1411     {
1412         int result = tcg_out_pool_finalize(s);
1413         tcg_debug_assert(result == 0);
1414     }
1415 #endif
1416 
1417     prologue_size = tcg_current_code_size(s);
1418     perf_report_prologue(s->code_gen_ptr, prologue_size);
1419 
1420 #ifndef CONFIG_TCG_INTERPRETER
1421     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1422                         (uintptr_t)s->code_buf, prologue_size);
1423 #endif
1424 
1425     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1426         FILE *logfile = qemu_log_trylock();
1427         if (logfile) {
1428             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1429             if (s->data_gen_ptr) {
1430                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1431                 size_t data_size = prologue_size - code_size;
1432                 size_t i;
1433 
1434                 disas(logfile, s->code_gen_ptr, code_size);
1435 
1436                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1437                     if (sizeof(tcg_target_ulong) == 8) {
1438                         fprintf(logfile,
1439                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1440                                 (uintptr_t)s->data_gen_ptr + i,
1441                                 *(uint64_t *)(s->data_gen_ptr + i));
1442                     } else {
1443                         fprintf(logfile,
1444                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1445                                 (uintptr_t)s->data_gen_ptr + i,
1446                                 *(uint32_t *)(s->data_gen_ptr + i));
1447                     }
1448                 }
1449             } else {
1450                 disas(logfile, s->code_gen_ptr, prologue_size);
1451             }
1452             fprintf(logfile, "\n");
1453             qemu_log_unlock(logfile);
1454         }
1455     }
1456 
1457 #ifndef CONFIG_TCG_INTERPRETER
1458     /*
1459      * Assert that goto_ptr is implemented completely, setting an epilogue.
1460      * For tci, we use NULL as the signal to return from the interpreter,
1461      * so skip this check.
1462      */
1463     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1464 #endif
1465 
1466     tcg_region_prologue_set(s);
1467 }
1468 
1469 void tcg_func_start(TCGContext *s)
1470 {
1471     tcg_pool_reset(s);
1472     s->nb_temps = s->nb_globals;
1473 
1474     /* No temps have been previously allocated for size or locality.  */
1475     memset(s->free_temps, 0, sizeof(s->free_temps));
1476 
1477     /* No constant temps have been previously allocated. */
1478     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1479         if (s->const_table[i]) {
1480             g_hash_table_remove_all(s->const_table[i]);
1481         }
1482     }
1483 
1484     s->nb_ops = 0;
1485     s->nb_labels = 0;
1486     s->current_frame_offset = s->frame_start;
1487 
1488 #ifdef CONFIG_DEBUG_TCG
1489     s->goto_tb_issue_mask = 0;
1490 #endif
1491 
1492     QTAILQ_INIT(&s->ops);
1493     QTAILQ_INIT(&s->free_ops);
1494     QSIMPLEQ_INIT(&s->labels);
1495 
1496     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1497                      s->addr_type == TCG_TYPE_I64);
1498 
1499 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
1500     tcg_debug_assert(s->tlb_fast_offset < 0);
1501     tcg_debug_assert(s->tlb_fast_offset >= MIN_TLB_MASK_TABLE_OFS);
1502 #endif
1503 
1504     tcg_debug_assert(s->insn_start_words > 0);
1505 }
1506 
1507 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1508 {
1509     int n = s->nb_temps++;
1510 
1511     if (n >= TCG_MAX_TEMPS) {
1512         tcg_raise_tb_overflow(s);
1513     }
1514     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1515 }
1516 
1517 static TCGTemp *tcg_global_alloc(TCGContext *s)
1518 {
1519     TCGTemp *ts;
1520 
1521     tcg_debug_assert(s->nb_globals == s->nb_temps);
1522     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1523     s->nb_globals++;
1524     ts = tcg_temp_alloc(s);
1525     ts->kind = TEMP_GLOBAL;
1526 
1527     return ts;
1528 }
1529 
1530 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1531                                             TCGReg reg, const char *name)
1532 {
1533     TCGTemp *ts;
1534 
1535     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1536 
1537     ts = tcg_global_alloc(s);
1538     ts->base_type = type;
1539     ts->type = type;
1540     ts->kind = TEMP_FIXED;
1541     ts->reg = reg;
1542     ts->name = name;
1543     tcg_regset_set_reg(s->reserved_regs, reg);
1544 
1545     return ts;
1546 }
1547 
1548 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1549 {
1550     s->frame_start = start;
1551     s->frame_end = start + size;
1552     s->frame_temp
1553         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1554 }
1555 
1556 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1557                                      intptr_t offset, const char *name)
1558 {
1559     TCGContext *s = tcg_ctx;
1560     TCGTemp *base_ts = tcgv_ptr_temp(base);
1561     TCGTemp *ts = tcg_global_alloc(s);
1562     int indirect_reg = 0;
1563 
1564     switch (base_ts->kind) {
1565     case TEMP_FIXED:
1566         break;
1567     case TEMP_GLOBAL:
1568         /* We do not support double-indirect registers.  */
1569         tcg_debug_assert(!base_ts->indirect_reg);
1570         base_ts->indirect_base = 1;
1571         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1572                             ? 2 : 1);
1573         indirect_reg = 1;
1574         break;
1575     default:
1576         g_assert_not_reached();
1577     }
1578 
1579     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1580         TCGTemp *ts2 = tcg_global_alloc(s);
1581         char buf[64];
1582 
1583         ts->base_type = TCG_TYPE_I64;
1584         ts->type = TCG_TYPE_I32;
1585         ts->indirect_reg = indirect_reg;
1586         ts->mem_allocated = 1;
1587         ts->mem_base = base_ts;
1588         ts->mem_offset = offset;
1589         pstrcpy(buf, sizeof(buf), name);
1590         pstrcat(buf, sizeof(buf), "_0");
1591         ts->name = strdup(buf);
1592 
1593         tcg_debug_assert(ts2 == ts + 1);
1594         ts2->base_type = TCG_TYPE_I64;
1595         ts2->type = TCG_TYPE_I32;
1596         ts2->indirect_reg = indirect_reg;
1597         ts2->mem_allocated = 1;
1598         ts2->mem_base = base_ts;
1599         ts2->mem_offset = offset + 4;
1600         ts2->temp_subindex = 1;
1601         pstrcpy(buf, sizeof(buf), name);
1602         pstrcat(buf, sizeof(buf), "_1");
1603         ts2->name = strdup(buf);
1604     } else {
1605         ts->base_type = type;
1606         ts->type = type;
1607         ts->indirect_reg = indirect_reg;
1608         ts->mem_allocated = 1;
1609         ts->mem_base = base_ts;
1610         ts->mem_offset = offset;
1611         ts->name = name;
1612     }
1613     return ts;
1614 }
1615 
1616 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1617 {
1618     TCGContext *s = tcg_ctx;
1619     TCGTemp *ts;
1620     int n;
1621 
1622     if (kind == TEMP_EBB) {
1623         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1624 
1625         if (idx < TCG_MAX_TEMPS) {
1626             /* There is already an available temp with the right type.  */
1627             clear_bit(idx, s->free_temps[type].l);
1628 
1629             ts = &s->temps[idx];
1630             ts->temp_allocated = 1;
1631             tcg_debug_assert(ts->base_type == type);
1632             tcg_debug_assert(ts->kind == kind);
1633             return ts;
1634         }
1635     } else {
1636         tcg_debug_assert(kind == TEMP_TB);
1637     }
1638 
1639     switch (type) {
1640     case TCG_TYPE_I32:
1641     case TCG_TYPE_V64:
1642     case TCG_TYPE_V128:
1643     case TCG_TYPE_V256:
1644         n = 1;
1645         break;
1646     case TCG_TYPE_I64:
1647         n = 64 / TCG_TARGET_REG_BITS;
1648         break;
1649     case TCG_TYPE_I128:
1650         n = 128 / TCG_TARGET_REG_BITS;
1651         break;
1652     default:
1653         g_assert_not_reached();
1654     }
1655 
1656     ts = tcg_temp_alloc(s);
1657     ts->base_type = type;
1658     ts->temp_allocated = 1;
1659     ts->kind = kind;
1660 
1661     if (n == 1) {
1662         ts->type = type;
1663     } else {
1664         ts->type = TCG_TYPE_REG;
1665 
1666         for (int i = 1; i < n; ++i) {
1667             TCGTemp *ts2 = tcg_temp_alloc(s);
1668 
1669             tcg_debug_assert(ts2 == ts + i);
1670             ts2->base_type = type;
1671             ts2->type = TCG_TYPE_REG;
1672             ts2->temp_allocated = 1;
1673             ts2->temp_subindex = i;
1674             ts2->kind = kind;
1675         }
1676     }
1677     return ts;
1678 }
1679 
1680 TCGv_vec tcg_temp_new_vec(TCGType type)
1681 {
1682     TCGTemp *t;
1683 
1684 #ifdef CONFIG_DEBUG_TCG
1685     switch (type) {
1686     case TCG_TYPE_V64:
1687         assert(TCG_TARGET_HAS_v64);
1688         break;
1689     case TCG_TYPE_V128:
1690         assert(TCG_TARGET_HAS_v128);
1691         break;
1692     case TCG_TYPE_V256:
1693         assert(TCG_TARGET_HAS_v256);
1694         break;
1695     default:
1696         g_assert_not_reached();
1697     }
1698 #endif
1699 
1700     t = tcg_temp_new_internal(type, TEMP_EBB);
1701     return temp_tcgv_vec(t);
1702 }
1703 
1704 /* Create a new temp of the same type as an existing temp.  */
1705 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1706 {
1707     TCGTemp *t = tcgv_vec_temp(match);
1708 
1709     tcg_debug_assert(t->temp_allocated != 0);
1710 
1711     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1712     return temp_tcgv_vec(t);
1713 }
1714 
1715 void tcg_temp_free_internal(TCGTemp *ts)
1716 {
1717     TCGContext *s = tcg_ctx;
1718 
1719     switch (ts->kind) {
1720     case TEMP_CONST:
1721     case TEMP_TB:
1722         /* Silently ignore free. */
1723         break;
1724     case TEMP_EBB:
1725         tcg_debug_assert(ts->temp_allocated != 0);
1726         ts->temp_allocated = 0;
1727         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1728         break;
1729     default:
1730         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1731         g_assert_not_reached();
1732     }
1733 }
1734 
1735 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1736 {
1737     TCGContext *s = tcg_ctx;
1738     GHashTable *h = s->const_table[type];
1739     TCGTemp *ts;
1740 
1741     if (h == NULL) {
1742         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1743         s->const_table[type] = h;
1744     }
1745 
1746     ts = g_hash_table_lookup(h, &val);
1747     if (ts == NULL) {
1748         int64_t *val_ptr;
1749 
1750         ts = tcg_temp_alloc(s);
1751 
1752         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1753             TCGTemp *ts2 = tcg_temp_alloc(s);
1754 
1755             tcg_debug_assert(ts2 == ts + 1);
1756 
1757             ts->base_type = TCG_TYPE_I64;
1758             ts->type = TCG_TYPE_I32;
1759             ts->kind = TEMP_CONST;
1760             ts->temp_allocated = 1;
1761 
1762             ts2->base_type = TCG_TYPE_I64;
1763             ts2->type = TCG_TYPE_I32;
1764             ts2->kind = TEMP_CONST;
1765             ts2->temp_allocated = 1;
1766             ts2->temp_subindex = 1;
1767 
1768             /*
1769              * Retain the full value of the 64-bit constant in the low
1770              * part, so that the hash table works.  Actual uses will
1771              * truncate the value to the low part.
1772              */
1773             ts[HOST_BIG_ENDIAN].val = val;
1774             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1775             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1776         } else {
1777             ts->base_type = type;
1778             ts->type = type;
1779             ts->kind = TEMP_CONST;
1780             ts->temp_allocated = 1;
1781             ts->val = val;
1782             val_ptr = &ts->val;
1783         }
1784         g_hash_table_insert(h, val_ptr, ts);
1785     }
1786 
1787     return ts;
1788 }
1789 
1790 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1791 {
1792     val = dup_const(vece, val);
1793     return temp_tcgv_vec(tcg_constant_internal(type, val));
1794 }
1795 
1796 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1797 {
1798     TCGTemp *t = tcgv_vec_temp(match);
1799 
1800     tcg_debug_assert(t->temp_allocated != 0);
1801     return tcg_constant_vec(t->base_type, vece, val);
1802 }
1803 
1804 #ifdef CONFIG_DEBUG_TCG
1805 size_t temp_idx(TCGTemp *ts)
1806 {
1807     ptrdiff_t n = ts - tcg_ctx->temps;
1808     assert(n >= 0 && n < tcg_ctx->nb_temps);
1809     return n;
1810 }
1811 
1812 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1813 {
1814     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1815 
1816     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1817     assert(o % sizeof(TCGTemp) == 0);
1818 
1819     return (void *)tcg_ctx + (uintptr_t)v;
1820 }
1821 #endif /* CONFIG_DEBUG_TCG */
1822 
1823 /* Return true if OP may appear in the opcode stream.
1824    Test the runtime variable that controls each opcode.  */
1825 bool tcg_op_supported(TCGOpcode op)
1826 {
1827     const bool have_vec
1828         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1829 
1830     switch (op) {
1831     case INDEX_op_discard:
1832     case INDEX_op_set_label:
1833     case INDEX_op_call:
1834     case INDEX_op_br:
1835     case INDEX_op_mb:
1836     case INDEX_op_insn_start:
1837     case INDEX_op_exit_tb:
1838     case INDEX_op_goto_tb:
1839     case INDEX_op_goto_ptr:
1840     case INDEX_op_qemu_ld_a32_i32:
1841     case INDEX_op_qemu_ld_a64_i32:
1842     case INDEX_op_qemu_st_a32_i32:
1843     case INDEX_op_qemu_st_a64_i32:
1844     case INDEX_op_qemu_ld_a32_i64:
1845     case INDEX_op_qemu_ld_a64_i64:
1846     case INDEX_op_qemu_st_a32_i64:
1847     case INDEX_op_qemu_st_a64_i64:
1848         return true;
1849 
1850     case INDEX_op_qemu_st8_a32_i32:
1851     case INDEX_op_qemu_st8_a64_i32:
1852         return TCG_TARGET_HAS_qemu_st8_i32;
1853 
1854     case INDEX_op_qemu_ld_a32_i128:
1855     case INDEX_op_qemu_ld_a64_i128:
1856     case INDEX_op_qemu_st_a32_i128:
1857     case INDEX_op_qemu_st_a64_i128:
1858         return TCG_TARGET_HAS_qemu_ldst_i128;
1859 
1860     case INDEX_op_mov_i32:
1861     case INDEX_op_setcond_i32:
1862     case INDEX_op_brcond_i32:
1863     case INDEX_op_ld8u_i32:
1864     case INDEX_op_ld8s_i32:
1865     case INDEX_op_ld16u_i32:
1866     case INDEX_op_ld16s_i32:
1867     case INDEX_op_ld_i32:
1868     case INDEX_op_st8_i32:
1869     case INDEX_op_st16_i32:
1870     case INDEX_op_st_i32:
1871     case INDEX_op_add_i32:
1872     case INDEX_op_sub_i32:
1873     case INDEX_op_mul_i32:
1874     case INDEX_op_and_i32:
1875     case INDEX_op_or_i32:
1876     case INDEX_op_xor_i32:
1877     case INDEX_op_shl_i32:
1878     case INDEX_op_shr_i32:
1879     case INDEX_op_sar_i32:
1880         return true;
1881 
1882     case INDEX_op_movcond_i32:
1883         return TCG_TARGET_HAS_movcond_i32;
1884     case INDEX_op_div_i32:
1885     case INDEX_op_divu_i32:
1886         return TCG_TARGET_HAS_div_i32;
1887     case INDEX_op_rem_i32:
1888     case INDEX_op_remu_i32:
1889         return TCG_TARGET_HAS_rem_i32;
1890     case INDEX_op_div2_i32:
1891     case INDEX_op_divu2_i32:
1892         return TCG_TARGET_HAS_div2_i32;
1893     case INDEX_op_rotl_i32:
1894     case INDEX_op_rotr_i32:
1895         return TCG_TARGET_HAS_rot_i32;
1896     case INDEX_op_deposit_i32:
1897         return TCG_TARGET_HAS_deposit_i32;
1898     case INDEX_op_extract_i32:
1899         return TCG_TARGET_HAS_extract_i32;
1900     case INDEX_op_sextract_i32:
1901         return TCG_TARGET_HAS_sextract_i32;
1902     case INDEX_op_extract2_i32:
1903         return TCG_TARGET_HAS_extract2_i32;
1904     case INDEX_op_add2_i32:
1905         return TCG_TARGET_HAS_add2_i32;
1906     case INDEX_op_sub2_i32:
1907         return TCG_TARGET_HAS_sub2_i32;
1908     case INDEX_op_mulu2_i32:
1909         return TCG_TARGET_HAS_mulu2_i32;
1910     case INDEX_op_muls2_i32:
1911         return TCG_TARGET_HAS_muls2_i32;
1912     case INDEX_op_muluh_i32:
1913         return TCG_TARGET_HAS_muluh_i32;
1914     case INDEX_op_mulsh_i32:
1915         return TCG_TARGET_HAS_mulsh_i32;
1916     case INDEX_op_ext8s_i32:
1917         return TCG_TARGET_HAS_ext8s_i32;
1918     case INDEX_op_ext16s_i32:
1919         return TCG_TARGET_HAS_ext16s_i32;
1920     case INDEX_op_ext8u_i32:
1921         return TCG_TARGET_HAS_ext8u_i32;
1922     case INDEX_op_ext16u_i32:
1923         return TCG_TARGET_HAS_ext16u_i32;
1924     case INDEX_op_bswap16_i32:
1925         return TCG_TARGET_HAS_bswap16_i32;
1926     case INDEX_op_bswap32_i32:
1927         return TCG_TARGET_HAS_bswap32_i32;
1928     case INDEX_op_not_i32:
1929         return TCG_TARGET_HAS_not_i32;
1930     case INDEX_op_neg_i32:
1931         return TCG_TARGET_HAS_neg_i32;
1932     case INDEX_op_andc_i32:
1933         return TCG_TARGET_HAS_andc_i32;
1934     case INDEX_op_orc_i32:
1935         return TCG_TARGET_HAS_orc_i32;
1936     case INDEX_op_eqv_i32:
1937         return TCG_TARGET_HAS_eqv_i32;
1938     case INDEX_op_nand_i32:
1939         return TCG_TARGET_HAS_nand_i32;
1940     case INDEX_op_nor_i32:
1941         return TCG_TARGET_HAS_nor_i32;
1942     case INDEX_op_clz_i32:
1943         return TCG_TARGET_HAS_clz_i32;
1944     case INDEX_op_ctz_i32:
1945         return TCG_TARGET_HAS_ctz_i32;
1946     case INDEX_op_ctpop_i32:
1947         return TCG_TARGET_HAS_ctpop_i32;
1948 
1949     case INDEX_op_brcond2_i32:
1950     case INDEX_op_setcond2_i32:
1951         return TCG_TARGET_REG_BITS == 32;
1952 
1953     case INDEX_op_mov_i64:
1954     case INDEX_op_setcond_i64:
1955     case INDEX_op_brcond_i64:
1956     case INDEX_op_ld8u_i64:
1957     case INDEX_op_ld8s_i64:
1958     case INDEX_op_ld16u_i64:
1959     case INDEX_op_ld16s_i64:
1960     case INDEX_op_ld32u_i64:
1961     case INDEX_op_ld32s_i64:
1962     case INDEX_op_ld_i64:
1963     case INDEX_op_st8_i64:
1964     case INDEX_op_st16_i64:
1965     case INDEX_op_st32_i64:
1966     case INDEX_op_st_i64:
1967     case INDEX_op_add_i64:
1968     case INDEX_op_sub_i64:
1969     case INDEX_op_mul_i64:
1970     case INDEX_op_and_i64:
1971     case INDEX_op_or_i64:
1972     case INDEX_op_xor_i64:
1973     case INDEX_op_shl_i64:
1974     case INDEX_op_shr_i64:
1975     case INDEX_op_sar_i64:
1976     case INDEX_op_ext_i32_i64:
1977     case INDEX_op_extu_i32_i64:
1978         return TCG_TARGET_REG_BITS == 64;
1979 
1980     case INDEX_op_movcond_i64:
1981         return TCG_TARGET_HAS_movcond_i64;
1982     case INDEX_op_div_i64:
1983     case INDEX_op_divu_i64:
1984         return TCG_TARGET_HAS_div_i64;
1985     case INDEX_op_rem_i64:
1986     case INDEX_op_remu_i64:
1987         return TCG_TARGET_HAS_rem_i64;
1988     case INDEX_op_div2_i64:
1989     case INDEX_op_divu2_i64:
1990         return TCG_TARGET_HAS_div2_i64;
1991     case INDEX_op_rotl_i64:
1992     case INDEX_op_rotr_i64:
1993         return TCG_TARGET_HAS_rot_i64;
1994     case INDEX_op_deposit_i64:
1995         return TCG_TARGET_HAS_deposit_i64;
1996     case INDEX_op_extract_i64:
1997         return TCG_TARGET_HAS_extract_i64;
1998     case INDEX_op_sextract_i64:
1999         return TCG_TARGET_HAS_sextract_i64;
2000     case INDEX_op_extract2_i64:
2001         return TCG_TARGET_HAS_extract2_i64;
2002     case INDEX_op_extrl_i64_i32:
2003         return TCG_TARGET_HAS_extrl_i64_i32;
2004     case INDEX_op_extrh_i64_i32:
2005         return TCG_TARGET_HAS_extrh_i64_i32;
2006     case INDEX_op_ext8s_i64:
2007         return TCG_TARGET_HAS_ext8s_i64;
2008     case INDEX_op_ext16s_i64:
2009         return TCG_TARGET_HAS_ext16s_i64;
2010     case INDEX_op_ext32s_i64:
2011         return TCG_TARGET_HAS_ext32s_i64;
2012     case INDEX_op_ext8u_i64:
2013         return TCG_TARGET_HAS_ext8u_i64;
2014     case INDEX_op_ext16u_i64:
2015         return TCG_TARGET_HAS_ext16u_i64;
2016     case INDEX_op_ext32u_i64:
2017         return TCG_TARGET_HAS_ext32u_i64;
2018     case INDEX_op_bswap16_i64:
2019         return TCG_TARGET_HAS_bswap16_i64;
2020     case INDEX_op_bswap32_i64:
2021         return TCG_TARGET_HAS_bswap32_i64;
2022     case INDEX_op_bswap64_i64:
2023         return TCG_TARGET_HAS_bswap64_i64;
2024     case INDEX_op_not_i64:
2025         return TCG_TARGET_HAS_not_i64;
2026     case INDEX_op_neg_i64:
2027         return TCG_TARGET_HAS_neg_i64;
2028     case INDEX_op_andc_i64:
2029         return TCG_TARGET_HAS_andc_i64;
2030     case INDEX_op_orc_i64:
2031         return TCG_TARGET_HAS_orc_i64;
2032     case INDEX_op_eqv_i64:
2033         return TCG_TARGET_HAS_eqv_i64;
2034     case INDEX_op_nand_i64:
2035         return TCG_TARGET_HAS_nand_i64;
2036     case INDEX_op_nor_i64:
2037         return TCG_TARGET_HAS_nor_i64;
2038     case INDEX_op_clz_i64:
2039         return TCG_TARGET_HAS_clz_i64;
2040     case INDEX_op_ctz_i64:
2041         return TCG_TARGET_HAS_ctz_i64;
2042     case INDEX_op_ctpop_i64:
2043         return TCG_TARGET_HAS_ctpop_i64;
2044     case INDEX_op_add2_i64:
2045         return TCG_TARGET_HAS_add2_i64;
2046     case INDEX_op_sub2_i64:
2047         return TCG_TARGET_HAS_sub2_i64;
2048     case INDEX_op_mulu2_i64:
2049         return TCG_TARGET_HAS_mulu2_i64;
2050     case INDEX_op_muls2_i64:
2051         return TCG_TARGET_HAS_muls2_i64;
2052     case INDEX_op_muluh_i64:
2053         return TCG_TARGET_HAS_muluh_i64;
2054     case INDEX_op_mulsh_i64:
2055         return TCG_TARGET_HAS_mulsh_i64;
2056 
2057     case INDEX_op_mov_vec:
2058     case INDEX_op_dup_vec:
2059     case INDEX_op_dupm_vec:
2060     case INDEX_op_ld_vec:
2061     case INDEX_op_st_vec:
2062     case INDEX_op_add_vec:
2063     case INDEX_op_sub_vec:
2064     case INDEX_op_and_vec:
2065     case INDEX_op_or_vec:
2066     case INDEX_op_xor_vec:
2067     case INDEX_op_cmp_vec:
2068         return have_vec;
2069     case INDEX_op_dup2_vec:
2070         return have_vec && TCG_TARGET_REG_BITS == 32;
2071     case INDEX_op_not_vec:
2072         return have_vec && TCG_TARGET_HAS_not_vec;
2073     case INDEX_op_neg_vec:
2074         return have_vec && TCG_TARGET_HAS_neg_vec;
2075     case INDEX_op_abs_vec:
2076         return have_vec && TCG_TARGET_HAS_abs_vec;
2077     case INDEX_op_andc_vec:
2078         return have_vec && TCG_TARGET_HAS_andc_vec;
2079     case INDEX_op_orc_vec:
2080         return have_vec && TCG_TARGET_HAS_orc_vec;
2081     case INDEX_op_nand_vec:
2082         return have_vec && TCG_TARGET_HAS_nand_vec;
2083     case INDEX_op_nor_vec:
2084         return have_vec && TCG_TARGET_HAS_nor_vec;
2085     case INDEX_op_eqv_vec:
2086         return have_vec && TCG_TARGET_HAS_eqv_vec;
2087     case INDEX_op_mul_vec:
2088         return have_vec && TCG_TARGET_HAS_mul_vec;
2089     case INDEX_op_shli_vec:
2090     case INDEX_op_shri_vec:
2091     case INDEX_op_sari_vec:
2092         return have_vec && TCG_TARGET_HAS_shi_vec;
2093     case INDEX_op_shls_vec:
2094     case INDEX_op_shrs_vec:
2095     case INDEX_op_sars_vec:
2096         return have_vec && TCG_TARGET_HAS_shs_vec;
2097     case INDEX_op_shlv_vec:
2098     case INDEX_op_shrv_vec:
2099     case INDEX_op_sarv_vec:
2100         return have_vec && TCG_TARGET_HAS_shv_vec;
2101     case INDEX_op_rotli_vec:
2102         return have_vec && TCG_TARGET_HAS_roti_vec;
2103     case INDEX_op_rotls_vec:
2104         return have_vec && TCG_TARGET_HAS_rots_vec;
2105     case INDEX_op_rotlv_vec:
2106     case INDEX_op_rotrv_vec:
2107         return have_vec && TCG_TARGET_HAS_rotv_vec;
2108     case INDEX_op_ssadd_vec:
2109     case INDEX_op_usadd_vec:
2110     case INDEX_op_sssub_vec:
2111     case INDEX_op_ussub_vec:
2112         return have_vec && TCG_TARGET_HAS_sat_vec;
2113     case INDEX_op_smin_vec:
2114     case INDEX_op_umin_vec:
2115     case INDEX_op_smax_vec:
2116     case INDEX_op_umax_vec:
2117         return have_vec && TCG_TARGET_HAS_minmax_vec;
2118     case INDEX_op_bitsel_vec:
2119         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2120     case INDEX_op_cmpsel_vec:
2121         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2122 
2123     default:
2124         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2125         return true;
2126     }
2127 }
2128 
2129 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2130 
2131 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2132 {
2133     TCGv_i64 extend_free[MAX_CALL_IARGS];
2134     int n_extend = 0;
2135     TCGOp *op;
2136     int i, n, pi = 0, total_args;
2137 
2138     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2139         init_call_layout(info);
2140         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2141     }
2142 
2143     total_args = info->nr_out + info->nr_in + 2;
2144     op = tcg_op_alloc(INDEX_op_call, total_args);
2145 
2146 #ifdef CONFIG_PLUGIN
2147     /* Flag helpers that may affect guest state */
2148     if (tcg_ctx->plugin_insn &&
2149         !(info->flags & TCG_CALL_PLUGIN) &&
2150         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2151         tcg_ctx->plugin_insn->calls_helpers = true;
2152     }
2153 #endif
2154 
2155     TCGOP_CALLO(op) = n = info->nr_out;
2156     switch (n) {
2157     case 0:
2158         tcg_debug_assert(ret == NULL);
2159         break;
2160     case 1:
2161         tcg_debug_assert(ret != NULL);
2162         op->args[pi++] = temp_arg(ret);
2163         break;
2164     case 2:
2165     case 4:
2166         tcg_debug_assert(ret != NULL);
2167         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2168         tcg_debug_assert(ret->temp_subindex == 0);
2169         for (i = 0; i < n; ++i) {
2170             op->args[pi++] = temp_arg(ret + i);
2171         }
2172         break;
2173     default:
2174         g_assert_not_reached();
2175     }
2176 
2177     TCGOP_CALLI(op) = n = info->nr_in;
2178     for (i = 0; i < n; i++) {
2179         const TCGCallArgumentLoc *loc = &info->in[i];
2180         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2181 
2182         switch (loc->kind) {
2183         case TCG_CALL_ARG_NORMAL:
2184         case TCG_CALL_ARG_BY_REF:
2185         case TCG_CALL_ARG_BY_REF_N:
2186             op->args[pi++] = temp_arg(ts);
2187             break;
2188 
2189         case TCG_CALL_ARG_EXTEND_U:
2190         case TCG_CALL_ARG_EXTEND_S:
2191             {
2192                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2193                 TCGv_i32 orig = temp_tcgv_i32(ts);
2194 
2195                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2196                     tcg_gen_ext_i32_i64(temp, orig);
2197                 } else {
2198                     tcg_gen_extu_i32_i64(temp, orig);
2199                 }
2200                 op->args[pi++] = tcgv_i64_arg(temp);
2201                 extend_free[n_extend++] = temp;
2202             }
2203             break;
2204 
2205         default:
2206             g_assert_not_reached();
2207         }
2208     }
2209     op->args[pi++] = (uintptr_t)info->func;
2210     op->args[pi++] = (uintptr_t)info;
2211     tcg_debug_assert(pi == total_args);
2212 
2213     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2214 
2215     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2216     for (i = 0; i < n_extend; ++i) {
2217         tcg_temp_free_i64(extend_free[i]);
2218     }
2219 }
2220 
2221 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2222 {
2223     tcg_gen_callN(info, ret, NULL);
2224 }
2225 
2226 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2227 {
2228     tcg_gen_callN(info, ret, &t1);
2229 }
2230 
2231 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2232 {
2233     TCGTemp *args[2] = { t1, t2 };
2234     tcg_gen_callN(info, ret, args);
2235 }
2236 
2237 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2238                    TCGTemp *t2, TCGTemp *t3)
2239 {
2240     TCGTemp *args[3] = { t1, t2, t3 };
2241     tcg_gen_callN(info, ret, args);
2242 }
2243 
2244 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2245                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2246 {
2247     TCGTemp *args[4] = { t1, t2, t3, t4 };
2248     tcg_gen_callN(info, ret, args);
2249 }
2250 
2251 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2252                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2253 {
2254     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2255     tcg_gen_callN(info, ret, args);
2256 }
2257 
2258 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2259                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2260 {
2261     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2262     tcg_gen_callN(info, ret, args);
2263 }
2264 
2265 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2266                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2267                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2268 {
2269     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2270     tcg_gen_callN(info, ret, args);
2271 }
2272 
2273 static void tcg_reg_alloc_start(TCGContext *s)
2274 {
2275     int i, n;
2276 
2277     for (i = 0, n = s->nb_temps; i < n; i++) {
2278         TCGTemp *ts = &s->temps[i];
2279         TCGTempVal val = TEMP_VAL_MEM;
2280 
2281         switch (ts->kind) {
2282         case TEMP_CONST:
2283             val = TEMP_VAL_CONST;
2284             break;
2285         case TEMP_FIXED:
2286             val = TEMP_VAL_REG;
2287             break;
2288         case TEMP_GLOBAL:
2289             break;
2290         case TEMP_EBB:
2291             val = TEMP_VAL_DEAD;
2292             /* fall through */
2293         case TEMP_TB:
2294             ts->mem_allocated = 0;
2295             break;
2296         default:
2297             g_assert_not_reached();
2298         }
2299         ts->val_type = val;
2300     }
2301 
2302     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2303 }
2304 
2305 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2306                                  TCGTemp *ts)
2307 {
2308     int idx = temp_idx(ts);
2309 
2310     switch (ts->kind) {
2311     case TEMP_FIXED:
2312     case TEMP_GLOBAL:
2313         pstrcpy(buf, buf_size, ts->name);
2314         break;
2315     case TEMP_TB:
2316         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2317         break;
2318     case TEMP_EBB:
2319         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2320         break;
2321     case TEMP_CONST:
2322         switch (ts->type) {
2323         case TCG_TYPE_I32:
2324             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2325             break;
2326 #if TCG_TARGET_REG_BITS > 32
2327         case TCG_TYPE_I64:
2328             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2329             break;
2330 #endif
2331         case TCG_TYPE_V64:
2332         case TCG_TYPE_V128:
2333         case TCG_TYPE_V256:
2334             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2335                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2336             break;
2337         default:
2338             g_assert_not_reached();
2339         }
2340         break;
2341     }
2342     return buf;
2343 }
2344 
2345 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2346                              int buf_size, TCGArg arg)
2347 {
2348     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2349 }
2350 
2351 static const char * const cond_name[] =
2352 {
2353     [TCG_COND_NEVER] = "never",
2354     [TCG_COND_ALWAYS] = "always",
2355     [TCG_COND_EQ] = "eq",
2356     [TCG_COND_NE] = "ne",
2357     [TCG_COND_LT] = "lt",
2358     [TCG_COND_GE] = "ge",
2359     [TCG_COND_LE] = "le",
2360     [TCG_COND_GT] = "gt",
2361     [TCG_COND_LTU] = "ltu",
2362     [TCG_COND_GEU] = "geu",
2363     [TCG_COND_LEU] = "leu",
2364     [TCG_COND_GTU] = "gtu"
2365 };
2366 
2367 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2368 {
2369     [MO_UB]   = "ub",
2370     [MO_SB]   = "sb",
2371     [MO_LEUW] = "leuw",
2372     [MO_LESW] = "lesw",
2373     [MO_LEUL] = "leul",
2374     [MO_LESL] = "lesl",
2375     [MO_LEUQ] = "leq",
2376     [MO_BEUW] = "beuw",
2377     [MO_BESW] = "besw",
2378     [MO_BEUL] = "beul",
2379     [MO_BESL] = "besl",
2380     [MO_BEUQ] = "beq",
2381     [MO_128 + MO_BE] = "beo",
2382     [MO_128 + MO_LE] = "leo",
2383 };
2384 
2385 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2386     [MO_UNALN >> MO_ASHIFT]    = "un+",
2387     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2388     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2389     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2390     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2391     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2392     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2393     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2394 };
2395 
2396 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2397     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2398     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2399     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2400     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2401     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2402     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2403 };
2404 
2405 static const char bswap_flag_name[][6] = {
2406     [TCG_BSWAP_IZ] = "iz",
2407     [TCG_BSWAP_OZ] = "oz",
2408     [TCG_BSWAP_OS] = "os",
2409     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2410     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2411 };
2412 
2413 static inline bool tcg_regset_single(TCGRegSet d)
2414 {
2415     return (d & (d - 1)) == 0;
2416 }
2417 
2418 static inline TCGReg tcg_regset_first(TCGRegSet d)
2419 {
2420     if (TCG_TARGET_NB_REGS <= 32) {
2421         return ctz32(d);
2422     } else {
2423         return ctz64(d);
2424     }
2425 }
2426 
2427 /* Return only the number of characters output -- no error return. */
2428 #define ne_fprintf(...) \
2429     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2430 
2431 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2432 {
2433     char buf[128];
2434     TCGOp *op;
2435 
2436     QTAILQ_FOREACH(op, &s->ops, link) {
2437         int i, k, nb_oargs, nb_iargs, nb_cargs;
2438         const TCGOpDef *def;
2439         TCGOpcode c;
2440         int col = 0;
2441 
2442         c = op->opc;
2443         def = &tcg_op_defs[c];
2444 
2445         if (c == INDEX_op_insn_start) {
2446             nb_oargs = 0;
2447             col += ne_fprintf(f, "\n ----");
2448 
2449             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2450                 col += ne_fprintf(f, " %016" PRIx64,
2451                                   tcg_get_insn_start_param(op, i));
2452             }
2453         } else if (c == INDEX_op_call) {
2454             const TCGHelperInfo *info = tcg_call_info(op);
2455             void *func = tcg_call_func(op);
2456 
2457             /* variable number of arguments */
2458             nb_oargs = TCGOP_CALLO(op);
2459             nb_iargs = TCGOP_CALLI(op);
2460             nb_cargs = def->nb_cargs;
2461 
2462             col += ne_fprintf(f, " %s ", def->name);
2463 
2464             /*
2465              * Print the function name from TCGHelperInfo, if available.
2466              * Note that plugins have a template function for the info,
2467              * but the actual function pointer comes from the plugin.
2468              */
2469             if (func == info->func) {
2470                 col += ne_fprintf(f, "%s", info->name);
2471             } else {
2472                 col += ne_fprintf(f, "plugin(%p)", func);
2473             }
2474 
2475             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2476             for (i = 0; i < nb_oargs; i++) {
2477                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2478                                                             op->args[i]));
2479             }
2480             for (i = 0; i < nb_iargs; i++) {
2481                 TCGArg arg = op->args[nb_oargs + i];
2482                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2483                 col += ne_fprintf(f, ",%s", t);
2484             }
2485         } else {
2486             col += ne_fprintf(f, " %s ", def->name);
2487 
2488             nb_oargs = def->nb_oargs;
2489             nb_iargs = def->nb_iargs;
2490             nb_cargs = def->nb_cargs;
2491 
2492             if (def->flags & TCG_OPF_VECTOR) {
2493                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2494                                   8 << TCGOP_VECE(op));
2495             }
2496 
2497             k = 0;
2498             for (i = 0; i < nb_oargs; i++) {
2499                 const char *sep =  k ? "," : "";
2500                 col += ne_fprintf(f, "%s%s", sep,
2501                                   tcg_get_arg_str(s, buf, sizeof(buf),
2502                                                   op->args[k++]));
2503             }
2504             for (i = 0; i < nb_iargs; i++) {
2505                 const char *sep =  k ? "," : "";
2506                 col += ne_fprintf(f, "%s%s", sep,
2507                                   tcg_get_arg_str(s, buf, sizeof(buf),
2508                                                   op->args[k++]));
2509             }
2510             switch (c) {
2511             case INDEX_op_brcond_i32:
2512             case INDEX_op_setcond_i32:
2513             case INDEX_op_movcond_i32:
2514             case INDEX_op_brcond2_i32:
2515             case INDEX_op_setcond2_i32:
2516             case INDEX_op_brcond_i64:
2517             case INDEX_op_setcond_i64:
2518             case INDEX_op_movcond_i64:
2519             case INDEX_op_cmp_vec:
2520             case INDEX_op_cmpsel_vec:
2521                 if (op->args[k] < ARRAY_SIZE(cond_name)
2522                     && cond_name[op->args[k]]) {
2523                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2524                 } else {
2525                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2526                 }
2527                 i = 1;
2528                 break;
2529             case INDEX_op_qemu_ld_a32_i32:
2530             case INDEX_op_qemu_ld_a64_i32:
2531             case INDEX_op_qemu_st_a32_i32:
2532             case INDEX_op_qemu_st_a64_i32:
2533             case INDEX_op_qemu_st8_a32_i32:
2534             case INDEX_op_qemu_st8_a64_i32:
2535             case INDEX_op_qemu_ld_a32_i64:
2536             case INDEX_op_qemu_ld_a64_i64:
2537             case INDEX_op_qemu_st_a32_i64:
2538             case INDEX_op_qemu_st_a64_i64:
2539             case INDEX_op_qemu_ld_a32_i128:
2540             case INDEX_op_qemu_ld_a64_i128:
2541             case INDEX_op_qemu_st_a32_i128:
2542             case INDEX_op_qemu_st_a64_i128:
2543                 {
2544                     const char *s_al, *s_op, *s_at;
2545                     MemOpIdx oi = op->args[k++];
2546                     MemOp op = get_memop(oi);
2547                     unsigned ix = get_mmuidx(oi);
2548 
2549                     s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2550                     s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2551                     s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2552                     op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2553 
2554                     /* If all fields are accounted for, print symbolically. */
2555                     if (!op && s_al && s_op && s_at) {
2556                         col += ne_fprintf(f, ",%s%s%s,%u",
2557                                           s_at, s_al, s_op, ix);
2558                     } else {
2559                         op = get_memop(oi);
2560                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2561                     }
2562                     i = 1;
2563                 }
2564                 break;
2565             case INDEX_op_bswap16_i32:
2566             case INDEX_op_bswap16_i64:
2567             case INDEX_op_bswap32_i32:
2568             case INDEX_op_bswap32_i64:
2569             case INDEX_op_bswap64_i64:
2570                 {
2571                     TCGArg flags = op->args[k];
2572                     const char *name = NULL;
2573 
2574                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2575                         name = bswap_flag_name[flags];
2576                     }
2577                     if (name) {
2578                         col += ne_fprintf(f, ",%s", name);
2579                     } else {
2580                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2581                     }
2582                     i = k = 1;
2583                 }
2584                 break;
2585             default:
2586                 i = 0;
2587                 break;
2588             }
2589             switch (c) {
2590             case INDEX_op_set_label:
2591             case INDEX_op_br:
2592             case INDEX_op_brcond_i32:
2593             case INDEX_op_brcond_i64:
2594             case INDEX_op_brcond2_i32:
2595                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2596                                   arg_label(op->args[k])->id);
2597                 i++, k++;
2598                 break;
2599             case INDEX_op_mb:
2600                 {
2601                     TCGBar membar = op->args[k];
2602                     const char *b_op, *m_op;
2603 
2604                     switch (membar & TCG_BAR_SC) {
2605                     case 0:
2606                         b_op = "none";
2607                         break;
2608                     case TCG_BAR_LDAQ:
2609                         b_op = "acq";
2610                         break;
2611                     case TCG_BAR_STRL:
2612                         b_op = "rel";
2613                         break;
2614                     case TCG_BAR_SC:
2615                         b_op = "seq";
2616                         break;
2617                     default:
2618                         g_assert_not_reached();
2619                     }
2620 
2621                     switch (membar & TCG_MO_ALL) {
2622                     case 0:
2623                         m_op = "none";
2624                         break;
2625                     case TCG_MO_LD_LD:
2626                         m_op = "rr";
2627                         break;
2628                     case TCG_MO_LD_ST:
2629                         m_op = "rw";
2630                         break;
2631                     case TCG_MO_ST_LD:
2632                         m_op = "wr";
2633                         break;
2634                     case TCG_MO_ST_ST:
2635                         m_op = "ww";
2636                         break;
2637                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2638                         m_op = "rr+rw";
2639                         break;
2640                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2641                         m_op = "rr+wr";
2642                         break;
2643                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2644                         m_op = "rr+ww";
2645                         break;
2646                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2647                         m_op = "rw+wr";
2648                         break;
2649                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2650                         m_op = "rw+ww";
2651                         break;
2652                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2653                         m_op = "wr+ww";
2654                         break;
2655                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2656                         m_op = "rr+rw+wr";
2657                         break;
2658                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2659                         m_op = "rr+rw+ww";
2660                         break;
2661                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2662                         m_op = "rr+wr+ww";
2663                         break;
2664                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2665                         m_op = "rw+wr+ww";
2666                         break;
2667                     case TCG_MO_ALL:
2668                         m_op = "all";
2669                         break;
2670                     default:
2671                         g_assert_not_reached();
2672                     }
2673 
2674                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2675                     i++, k++;
2676                 }
2677                 break;
2678             default:
2679                 break;
2680             }
2681             for (; i < nb_cargs; i++, k++) {
2682                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2683                                   op->args[k]);
2684             }
2685         }
2686 
2687         if (have_prefs || op->life) {
2688             for (; col < 40; ++col) {
2689                 putc(' ', f);
2690             }
2691         }
2692 
2693         if (op->life) {
2694             unsigned life = op->life;
2695 
2696             if (life & (SYNC_ARG * 3)) {
2697                 ne_fprintf(f, "  sync:");
2698                 for (i = 0; i < 2; ++i) {
2699                     if (life & (SYNC_ARG << i)) {
2700                         ne_fprintf(f, " %d", i);
2701                     }
2702                 }
2703             }
2704             life /= DEAD_ARG;
2705             if (life) {
2706                 ne_fprintf(f, "  dead:");
2707                 for (i = 0; life; ++i, life >>= 1) {
2708                     if (life & 1) {
2709                         ne_fprintf(f, " %d", i);
2710                     }
2711                 }
2712             }
2713         }
2714 
2715         if (have_prefs) {
2716             for (i = 0; i < nb_oargs; ++i) {
2717                 TCGRegSet set = output_pref(op, i);
2718 
2719                 if (i == 0) {
2720                     ne_fprintf(f, "  pref=");
2721                 } else {
2722                     ne_fprintf(f, ",");
2723                 }
2724                 if (set == 0) {
2725                     ne_fprintf(f, "none");
2726                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2727                     ne_fprintf(f, "all");
2728 #ifdef CONFIG_DEBUG_TCG
2729                 } else if (tcg_regset_single(set)) {
2730                     TCGReg reg = tcg_regset_first(set);
2731                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2732 #endif
2733                 } else if (TCG_TARGET_NB_REGS <= 32) {
2734                     ne_fprintf(f, "0x%x", (uint32_t)set);
2735                 } else {
2736                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2737                 }
2738             }
2739         }
2740 
2741         putc('\n', f);
2742     }
2743 }
2744 
2745 /* we give more priority to constraints with less registers */
2746 static int get_constraint_priority(const TCGOpDef *def, int k)
2747 {
2748     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2749     int n = ctpop64(arg_ct->regs);
2750 
2751     /*
2752      * Sort constraints of a single register first, which includes output
2753      * aliases (which must exactly match the input already allocated).
2754      */
2755     if (n == 1 || arg_ct->oalias) {
2756         return INT_MAX;
2757     }
2758 
2759     /*
2760      * Sort register pairs next, first then second immediately after.
2761      * Arbitrarily sort multiple pairs by the index of the first reg;
2762      * there shouldn't be many pairs.
2763      */
2764     switch (arg_ct->pair) {
2765     case 1:
2766     case 3:
2767         return (k + 1) * 2;
2768     case 2:
2769         return (arg_ct->pair_index + 1) * 2 - 1;
2770     }
2771 
2772     /* Finally, sort by decreasing register count. */
2773     assert(n > 1);
2774     return -n;
2775 }
2776 
2777 /* sort from highest priority to lowest */
2778 static void sort_constraints(TCGOpDef *def, int start, int n)
2779 {
2780     int i, j;
2781     TCGArgConstraint *a = def->args_ct;
2782 
2783     for (i = 0; i < n; i++) {
2784         a[start + i].sort_index = start + i;
2785     }
2786     if (n <= 1) {
2787         return;
2788     }
2789     for (i = 0; i < n - 1; i++) {
2790         for (j = i + 1; j < n; j++) {
2791             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2792             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2793             if (p1 < p2) {
2794                 int tmp = a[start + i].sort_index;
2795                 a[start + i].sort_index = a[start + j].sort_index;
2796                 a[start + j].sort_index = tmp;
2797             }
2798         }
2799     }
2800 }
2801 
2802 static void process_op_defs(TCGContext *s)
2803 {
2804     TCGOpcode op;
2805 
2806     for (op = 0; op < NB_OPS; op++) {
2807         TCGOpDef *def = &tcg_op_defs[op];
2808         const TCGTargetOpDef *tdefs;
2809         bool saw_alias_pair = false;
2810         int i, o, i2, o2, nb_args;
2811 
2812         if (def->flags & TCG_OPF_NOT_PRESENT) {
2813             continue;
2814         }
2815 
2816         nb_args = def->nb_iargs + def->nb_oargs;
2817         if (nb_args == 0) {
2818             continue;
2819         }
2820 
2821         /*
2822          * Macro magic should make it impossible, but double-check that
2823          * the array index is in range.  Since the signness of an enum
2824          * is implementation defined, force the result to unsigned.
2825          */
2826         unsigned con_set = tcg_target_op_def(op);
2827         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2828         tdefs = &constraint_sets[con_set];
2829 
2830         for (i = 0; i < nb_args; i++) {
2831             const char *ct_str = tdefs->args_ct_str[i];
2832             bool input_p = i >= def->nb_oargs;
2833 
2834             /* Incomplete TCGTargetOpDef entry. */
2835             tcg_debug_assert(ct_str != NULL);
2836 
2837             switch (*ct_str) {
2838             case '0' ... '9':
2839                 o = *ct_str - '0';
2840                 tcg_debug_assert(input_p);
2841                 tcg_debug_assert(o < def->nb_oargs);
2842                 tcg_debug_assert(def->args_ct[o].regs != 0);
2843                 tcg_debug_assert(!def->args_ct[o].oalias);
2844                 def->args_ct[i] = def->args_ct[o];
2845                 /* The output sets oalias.  */
2846                 def->args_ct[o].oalias = 1;
2847                 def->args_ct[o].alias_index = i;
2848                 /* The input sets ialias. */
2849                 def->args_ct[i].ialias = 1;
2850                 def->args_ct[i].alias_index = o;
2851                 if (def->args_ct[i].pair) {
2852                     saw_alias_pair = true;
2853                 }
2854                 tcg_debug_assert(ct_str[1] == '\0');
2855                 continue;
2856 
2857             case '&':
2858                 tcg_debug_assert(!input_p);
2859                 def->args_ct[i].newreg = true;
2860                 ct_str++;
2861                 break;
2862 
2863             case 'p': /* plus */
2864                 /* Allocate to the register after the previous. */
2865                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2866                 o = i - 1;
2867                 tcg_debug_assert(!def->args_ct[o].pair);
2868                 tcg_debug_assert(!def->args_ct[o].ct);
2869                 def->args_ct[i] = (TCGArgConstraint){
2870                     .pair = 2,
2871                     .pair_index = o,
2872                     .regs = def->args_ct[o].regs << 1,
2873                 };
2874                 def->args_ct[o].pair = 1;
2875                 def->args_ct[o].pair_index = i;
2876                 tcg_debug_assert(ct_str[1] == '\0');
2877                 continue;
2878 
2879             case 'm': /* minus */
2880                 /* Allocate to the register before the previous. */
2881                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2882                 o = i - 1;
2883                 tcg_debug_assert(!def->args_ct[o].pair);
2884                 tcg_debug_assert(!def->args_ct[o].ct);
2885                 def->args_ct[i] = (TCGArgConstraint){
2886                     .pair = 1,
2887                     .pair_index = o,
2888                     .regs = def->args_ct[o].regs >> 1,
2889                 };
2890                 def->args_ct[o].pair = 2;
2891                 def->args_ct[o].pair_index = i;
2892                 tcg_debug_assert(ct_str[1] == '\0');
2893                 continue;
2894             }
2895 
2896             do {
2897                 switch (*ct_str) {
2898                 case 'i':
2899                     def->args_ct[i].ct |= TCG_CT_CONST;
2900                     break;
2901 
2902                 /* Include all of the target-specific constraints. */
2903 
2904 #undef CONST
2905 #define CONST(CASE, MASK) \
2906     case CASE: def->args_ct[i].ct |= MASK; break;
2907 #define REGS(CASE, MASK) \
2908     case CASE: def->args_ct[i].regs |= MASK; break;
2909 
2910 #include "tcg-target-con-str.h"
2911 
2912 #undef REGS
2913 #undef CONST
2914                 default:
2915                 case '0' ... '9':
2916                 case '&':
2917                 case 'p':
2918                 case 'm':
2919                     /* Typo in TCGTargetOpDef constraint. */
2920                     g_assert_not_reached();
2921                 }
2922             } while (*++ct_str != '\0');
2923         }
2924 
2925         /* TCGTargetOpDef entry with too much information? */
2926         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2927 
2928         /*
2929          * Fix up output pairs that are aliased with inputs.
2930          * When we created the alias, we copied pair from the output.
2931          * There are three cases:
2932          *    (1a) Pairs of inputs alias pairs of outputs.
2933          *    (1b) One input aliases the first of a pair of outputs.
2934          *    (2)  One input aliases the second of a pair of outputs.
2935          *
2936          * Case 1a is handled by making sure that the pair_index'es are
2937          * properly updated so that they appear the same as a pair of inputs.
2938          *
2939          * Case 1b is handled by setting the pair_index of the input to
2940          * itself, simply so it doesn't point to an unrelated argument.
2941          * Since we don't encounter the "second" during the input allocation
2942          * phase, nothing happens with the second half of the input pair.
2943          *
2944          * Case 2 is handled by setting the second input to pair=3, the
2945          * first output to pair=3, and the pair_index'es to match.
2946          */
2947         if (saw_alias_pair) {
2948             for (i = def->nb_oargs; i < nb_args; i++) {
2949                 /*
2950                  * Since [0-9pm] must be alone in the constraint string,
2951                  * the only way they can both be set is if the pair comes
2952                  * from the output alias.
2953                  */
2954                 if (!def->args_ct[i].ialias) {
2955                     continue;
2956                 }
2957                 switch (def->args_ct[i].pair) {
2958                 case 0:
2959                     break;
2960                 case 1:
2961                     o = def->args_ct[i].alias_index;
2962                     o2 = def->args_ct[o].pair_index;
2963                     tcg_debug_assert(def->args_ct[o].pair == 1);
2964                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2965                     if (def->args_ct[o2].oalias) {
2966                         /* Case 1a */
2967                         i2 = def->args_ct[o2].alias_index;
2968                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2969                         def->args_ct[i2].pair_index = i;
2970                         def->args_ct[i].pair_index = i2;
2971                     } else {
2972                         /* Case 1b */
2973                         def->args_ct[i].pair_index = i;
2974                     }
2975                     break;
2976                 case 2:
2977                     o = def->args_ct[i].alias_index;
2978                     o2 = def->args_ct[o].pair_index;
2979                     tcg_debug_assert(def->args_ct[o].pair == 2);
2980                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2981                     if (def->args_ct[o2].oalias) {
2982                         /* Case 1a */
2983                         i2 = def->args_ct[o2].alias_index;
2984                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2985                         def->args_ct[i2].pair_index = i;
2986                         def->args_ct[i].pair_index = i2;
2987                     } else {
2988                         /* Case 2 */
2989                         def->args_ct[i].pair = 3;
2990                         def->args_ct[o2].pair = 3;
2991                         def->args_ct[i].pair_index = o2;
2992                         def->args_ct[o2].pair_index = i;
2993                     }
2994                     break;
2995                 default:
2996                     g_assert_not_reached();
2997                 }
2998             }
2999         }
3000 
3001         /* sort the constraints (XXX: this is just an heuristic) */
3002         sort_constraints(def, 0, def->nb_oargs);
3003         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3004     }
3005 }
3006 
3007 static void remove_label_use(TCGOp *op, int idx)
3008 {
3009     TCGLabel *label = arg_label(op->args[idx]);
3010     TCGLabelUse *use;
3011 
3012     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3013         if (use->op == op) {
3014             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3015             return;
3016         }
3017     }
3018     g_assert_not_reached();
3019 }
3020 
3021 void tcg_op_remove(TCGContext *s, TCGOp *op)
3022 {
3023     switch (op->opc) {
3024     case INDEX_op_br:
3025         remove_label_use(op, 0);
3026         break;
3027     case INDEX_op_brcond_i32:
3028     case INDEX_op_brcond_i64:
3029         remove_label_use(op, 3);
3030         break;
3031     case INDEX_op_brcond2_i32:
3032         remove_label_use(op, 5);
3033         break;
3034     default:
3035         break;
3036     }
3037 
3038     QTAILQ_REMOVE(&s->ops, op, link);
3039     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3040     s->nb_ops--;
3041 }
3042 
3043 void tcg_remove_ops_after(TCGOp *op)
3044 {
3045     TCGContext *s = tcg_ctx;
3046 
3047     while (true) {
3048         TCGOp *last = tcg_last_op();
3049         if (last == op) {
3050             return;
3051         }
3052         tcg_op_remove(s, last);
3053     }
3054 }
3055 
3056 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3057 {
3058     TCGContext *s = tcg_ctx;
3059     TCGOp *op = NULL;
3060 
3061     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3062         QTAILQ_FOREACH(op, &s->free_ops, link) {
3063             if (nargs <= op->nargs) {
3064                 QTAILQ_REMOVE(&s->free_ops, op, link);
3065                 nargs = op->nargs;
3066                 goto found;
3067             }
3068         }
3069     }
3070 
3071     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3072     nargs = MAX(4, nargs);
3073     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3074 
3075  found:
3076     memset(op, 0, offsetof(TCGOp, link));
3077     op->opc = opc;
3078     op->nargs = nargs;
3079 
3080     /* Check for bitfield overflow. */
3081     tcg_debug_assert(op->nargs == nargs);
3082 
3083     s->nb_ops++;
3084     return op;
3085 }
3086 
3087 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3088 {
3089     TCGOp *op = tcg_op_alloc(opc, nargs);
3090     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3091     return op;
3092 }
3093 
3094 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3095                             TCGOpcode opc, unsigned nargs)
3096 {
3097     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3098     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3099     return new_op;
3100 }
3101 
3102 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3103                            TCGOpcode opc, unsigned nargs)
3104 {
3105     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3106     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3107     return new_op;
3108 }
3109 
3110 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3111 {
3112     TCGLabelUse *u;
3113 
3114     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3115         TCGOp *op = u->op;
3116         switch (op->opc) {
3117         case INDEX_op_br:
3118             op->args[0] = label_arg(to);
3119             break;
3120         case INDEX_op_brcond_i32:
3121         case INDEX_op_brcond_i64:
3122             op->args[3] = label_arg(to);
3123             break;
3124         case INDEX_op_brcond2_i32:
3125             op->args[5] = label_arg(to);
3126             break;
3127         default:
3128             g_assert_not_reached();
3129         }
3130     }
3131 
3132     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3133 }
3134 
3135 /* Reachable analysis : remove unreachable code.  */
3136 static void __attribute__((noinline))
3137 reachable_code_pass(TCGContext *s)
3138 {
3139     TCGOp *op, *op_next, *op_prev;
3140     bool dead = false;
3141 
3142     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3143         bool remove = dead;
3144         TCGLabel *label;
3145 
3146         switch (op->opc) {
3147         case INDEX_op_set_label:
3148             label = arg_label(op->args[0]);
3149 
3150             /*
3151              * Note that the first op in the TB is always a load,
3152              * so there is always something before a label.
3153              */
3154             op_prev = QTAILQ_PREV(op, link);
3155 
3156             /*
3157              * If we find two sequential labels, move all branches to
3158              * reference the second label and remove the first label.
3159              * Do this before branch to next optimization, so that the
3160              * middle label is out of the way.
3161              */
3162             if (op_prev->opc == INDEX_op_set_label) {
3163                 move_label_uses(label, arg_label(op_prev->args[0]));
3164                 tcg_op_remove(s, op_prev);
3165                 op_prev = QTAILQ_PREV(op, link);
3166             }
3167 
3168             /*
3169              * Optimization can fold conditional branches to unconditional.
3170              * If we find a label which is preceded by an unconditional
3171              * branch to next, remove the branch.  We couldn't do this when
3172              * processing the branch because any dead code between the branch
3173              * and label had not yet been removed.
3174              */
3175             if (op_prev->opc == INDEX_op_br &&
3176                 label == arg_label(op_prev->args[0])) {
3177                 tcg_op_remove(s, op_prev);
3178                 /* Fall through means insns become live again.  */
3179                 dead = false;
3180             }
3181 
3182             if (QSIMPLEQ_EMPTY(&label->branches)) {
3183                 /*
3184                  * While there is an occasional backward branch, virtually
3185                  * all branches generated by the translators are forward.
3186                  * Which means that generally we will have already removed
3187                  * all references to the label that will be, and there is
3188                  * little to be gained by iterating.
3189                  */
3190                 remove = true;
3191             } else {
3192                 /* Once we see a label, insns become live again.  */
3193                 dead = false;
3194                 remove = false;
3195             }
3196             break;
3197 
3198         case INDEX_op_br:
3199         case INDEX_op_exit_tb:
3200         case INDEX_op_goto_ptr:
3201             /* Unconditional branches; everything following is dead.  */
3202             dead = true;
3203             break;
3204 
3205         case INDEX_op_call:
3206             /* Notice noreturn helper calls, raising exceptions.  */
3207             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3208                 dead = true;
3209             }
3210             break;
3211 
3212         case INDEX_op_insn_start:
3213             /* Never remove -- we need to keep these for unwind.  */
3214             remove = false;
3215             break;
3216 
3217         default:
3218             break;
3219         }
3220 
3221         if (remove) {
3222             tcg_op_remove(s, op);
3223         }
3224     }
3225 }
3226 
3227 #define TS_DEAD  1
3228 #define TS_MEM   2
3229 
3230 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3231 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3232 
3233 /* For liveness_pass_1, the register preferences for a given temp.  */
3234 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3235 {
3236     return ts->state_ptr;
3237 }
3238 
3239 /* For liveness_pass_1, reset the preferences for a given temp to the
3240  * maximal regset for its type.
3241  */
3242 static inline void la_reset_pref(TCGTemp *ts)
3243 {
3244     *la_temp_pref(ts)
3245         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3246 }
3247 
3248 /* liveness analysis: end of function: all temps are dead, and globals
3249    should be in memory. */
3250 static void la_func_end(TCGContext *s, int ng, int nt)
3251 {
3252     int i;
3253 
3254     for (i = 0; i < ng; ++i) {
3255         s->temps[i].state = TS_DEAD | TS_MEM;
3256         la_reset_pref(&s->temps[i]);
3257     }
3258     for (i = ng; i < nt; ++i) {
3259         s->temps[i].state = TS_DEAD;
3260         la_reset_pref(&s->temps[i]);
3261     }
3262 }
3263 
3264 /* liveness analysis: end of basic block: all temps are dead, globals
3265    and local temps should be in memory. */
3266 static void la_bb_end(TCGContext *s, int ng, int nt)
3267 {
3268     int i;
3269 
3270     for (i = 0; i < nt; ++i) {
3271         TCGTemp *ts = &s->temps[i];
3272         int state;
3273 
3274         switch (ts->kind) {
3275         case TEMP_FIXED:
3276         case TEMP_GLOBAL:
3277         case TEMP_TB:
3278             state = TS_DEAD | TS_MEM;
3279             break;
3280         case TEMP_EBB:
3281         case TEMP_CONST:
3282             state = TS_DEAD;
3283             break;
3284         default:
3285             g_assert_not_reached();
3286         }
3287         ts->state = state;
3288         la_reset_pref(ts);
3289     }
3290 }
3291 
3292 /* liveness analysis: sync globals back to memory.  */
3293 static void la_global_sync(TCGContext *s, int ng)
3294 {
3295     int i;
3296 
3297     for (i = 0; i < ng; ++i) {
3298         int state = s->temps[i].state;
3299         s->temps[i].state = state | TS_MEM;
3300         if (state == TS_DEAD) {
3301             /* If the global was previously dead, reset prefs.  */
3302             la_reset_pref(&s->temps[i]);
3303         }
3304     }
3305 }
3306 
3307 /*
3308  * liveness analysis: conditional branch: all temps are dead unless
3309  * explicitly live-across-conditional-branch, globals and local temps
3310  * should be synced.
3311  */
3312 static void la_bb_sync(TCGContext *s, int ng, int nt)
3313 {
3314     la_global_sync(s, ng);
3315 
3316     for (int i = ng; i < nt; ++i) {
3317         TCGTemp *ts = &s->temps[i];
3318         int state;
3319 
3320         switch (ts->kind) {
3321         case TEMP_TB:
3322             state = ts->state;
3323             ts->state = state | TS_MEM;
3324             if (state != TS_DEAD) {
3325                 continue;
3326             }
3327             break;
3328         case TEMP_EBB:
3329         case TEMP_CONST:
3330             continue;
3331         default:
3332             g_assert_not_reached();
3333         }
3334         la_reset_pref(&s->temps[i]);
3335     }
3336 }
3337 
3338 /* liveness analysis: sync globals back to memory and kill.  */
3339 static void la_global_kill(TCGContext *s, int ng)
3340 {
3341     int i;
3342 
3343     for (i = 0; i < ng; i++) {
3344         s->temps[i].state = TS_DEAD | TS_MEM;
3345         la_reset_pref(&s->temps[i]);
3346     }
3347 }
3348 
3349 /* liveness analysis: note live globals crossing calls.  */
3350 static void la_cross_call(TCGContext *s, int nt)
3351 {
3352     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3353     int i;
3354 
3355     for (i = 0; i < nt; i++) {
3356         TCGTemp *ts = &s->temps[i];
3357         if (!(ts->state & TS_DEAD)) {
3358             TCGRegSet *pset = la_temp_pref(ts);
3359             TCGRegSet set = *pset;
3360 
3361             set &= mask;
3362             /* If the combination is not possible, restart.  */
3363             if (set == 0) {
3364                 set = tcg_target_available_regs[ts->type] & mask;
3365             }
3366             *pset = set;
3367         }
3368     }
3369 }
3370 
3371 /*
3372  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3373  * to TEMP_EBB, if possible.
3374  */
3375 static void __attribute__((noinline))
3376 liveness_pass_0(TCGContext *s)
3377 {
3378     void * const multiple_ebb = (void *)(uintptr_t)-1;
3379     int nb_temps = s->nb_temps;
3380     TCGOp *op, *ebb;
3381 
3382     for (int i = s->nb_globals; i < nb_temps; ++i) {
3383         s->temps[i].state_ptr = NULL;
3384     }
3385 
3386     /*
3387      * Represent each EBB by the op at which it begins.  In the case of
3388      * the first EBB, this is the first op, otherwise it is a label.
3389      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3390      * within a single EBB, else MULTIPLE_EBB.
3391      */
3392     ebb = QTAILQ_FIRST(&s->ops);
3393     QTAILQ_FOREACH(op, &s->ops, link) {
3394         const TCGOpDef *def;
3395         int nb_oargs, nb_iargs;
3396 
3397         switch (op->opc) {
3398         case INDEX_op_set_label:
3399             ebb = op;
3400             continue;
3401         case INDEX_op_discard:
3402             continue;
3403         case INDEX_op_call:
3404             nb_oargs = TCGOP_CALLO(op);
3405             nb_iargs = TCGOP_CALLI(op);
3406             break;
3407         default:
3408             def = &tcg_op_defs[op->opc];
3409             nb_oargs = def->nb_oargs;
3410             nb_iargs = def->nb_iargs;
3411             break;
3412         }
3413 
3414         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3415             TCGTemp *ts = arg_temp(op->args[i]);
3416 
3417             if (ts->kind != TEMP_TB) {
3418                 continue;
3419             }
3420             if (ts->state_ptr == NULL) {
3421                 ts->state_ptr = ebb;
3422             } else if (ts->state_ptr != ebb) {
3423                 ts->state_ptr = multiple_ebb;
3424             }
3425         }
3426     }
3427 
3428     /*
3429      * For TEMP_TB that turned out not to be used beyond one EBB,
3430      * reduce the liveness to TEMP_EBB.
3431      */
3432     for (int i = s->nb_globals; i < nb_temps; ++i) {
3433         TCGTemp *ts = &s->temps[i];
3434         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3435             ts->kind = TEMP_EBB;
3436         }
3437     }
3438 }
3439 
3440 /* Liveness analysis : update the opc_arg_life array to tell if a
3441    given input arguments is dead. Instructions updating dead
3442    temporaries are removed. */
3443 static void __attribute__((noinline))
3444 liveness_pass_1(TCGContext *s)
3445 {
3446     int nb_globals = s->nb_globals;
3447     int nb_temps = s->nb_temps;
3448     TCGOp *op, *op_prev;
3449     TCGRegSet *prefs;
3450     int i;
3451 
3452     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3453     for (i = 0; i < nb_temps; ++i) {
3454         s->temps[i].state_ptr = prefs + i;
3455     }
3456 
3457     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3458     la_func_end(s, nb_globals, nb_temps);
3459 
3460     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3461         int nb_iargs, nb_oargs;
3462         TCGOpcode opc_new, opc_new2;
3463         bool have_opc_new2;
3464         TCGLifeData arg_life = 0;
3465         TCGTemp *ts;
3466         TCGOpcode opc = op->opc;
3467         const TCGOpDef *def = &tcg_op_defs[opc];
3468 
3469         switch (opc) {
3470         case INDEX_op_call:
3471             {
3472                 const TCGHelperInfo *info = tcg_call_info(op);
3473                 int call_flags = tcg_call_flags(op);
3474 
3475                 nb_oargs = TCGOP_CALLO(op);
3476                 nb_iargs = TCGOP_CALLI(op);
3477 
3478                 /* pure functions can be removed if their result is unused */
3479                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3480                     for (i = 0; i < nb_oargs; i++) {
3481                         ts = arg_temp(op->args[i]);
3482                         if (ts->state != TS_DEAD) {
3483                             goto do_not_remove_call;
3484                         }
3485                     }
3486                     goto do_remove;
3487                 }
3488             do_not_remove_call:
3489 
3490                 /* Output args are dead.  */
3491                 for (i = 0; i < nb_oargs; i++) {
3492                     ts = arg_temp(op->args[i]);
3493                     if (ts->state & TS_DEAD) {
3494                         arg_life |= DEAD_ARG << i;
3495                     }
3496                     if (ts->state & TS_MEM) {
3497                         arg_life |= SYNC_ARG << i;
3498                     }
3499                     ts->state = TS_DEAD;
3500                     la_reset_pref(ts);
3501                 }
3502 
3503                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3504                 memset(op->output_pref, 0, sizeof(op->output_pref));
3505 
3506                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3507                                     TCG_CALL_NO_READ_GLOBALS))) {
3508                     la_global_kill(s, nb_globals);
3509                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3510                     la_global_sync(s, nb_globals);
3511                 }
3512 
3513                 /* Record arguments that die in this helper.  */
3514                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3515                     ts = arg_temp(op->args[i]);
3516                     if (ts->state & TS_DEAD) {
3517                         arg_life |= DEAD_ARG << i;
3518                     }
3519                 }
3520 
3521                 /* For all live registers, remove call-clobbered prefs.  */
3522                 la_cross_call(s, nb_temps);
3523 
3524                 /*
3525                  * Input arguments are live for preceding opcodes.
3526                  *
3527                  * For those arguments that die, and will be allocated in
3528                  * registers, clear the register set for that arg, to be
3529                  * filled in below.  For args that will be on the stack,
3530                  * reset to any available reg.  Process arguments in reverse
3531                  * order so that if a temp is used more than once, the stack
3532                  * reset to max happens before the register reset to 0.
3533                  */
3534                 for (i = nb_iargs - 1; i >= 0; i--) {
3535                     const TCGCallArgumentLoc *loc = &info->in[i];
3536                     ts = arg_temp(op->args[nb_oargs + i]);
3537 
3538                     if (ts->state & TS_DEAD) {
3539                         switch (loc->kind) {
3540                         case TCG_CALL_ARG_NORMAL:
3541                         case TCG_CALL_ARG_EXTEND_U:
3542                         case TCG_CALL_ARG_EXTEND_S:
3543                             if (arg_slot_reg_p(loc->arg_slot)) {
3544                                 *la_temp_pref(ts) = 0;
3545                                 break;
3546                             }
3547                             /* fall through */
3548                         default:
3549                             *la_temp_pref(ts) =
3550                                 tcg_target_available_regs[ts->type];
3551                             break;
3552                         }
3553                         ts->state &= ~TS_DEAD;
3554                     }
3555                 }
3556 
3557                 /*
3558                  * For each input argument, add its input register to prefs.
3559                  * If a temp is used once, this produces a single set bit;
3560                  * if a temp is used multiple times, this produces a set.
3561                  */
3562                 for (i = 0; i < nb_iargs; i++) {
3563                     const TCGCallArgumentLoc *loc = &info->in[i];
3564                     ts = arg_temp(op->args[nb_oargs + i]);
3565 
3566                     switch (loc->kind) {
3567                     case TCG_CALL_ARG_NORMAL:
3568                     case TCG_CALL_ARG_EXTEND_U:
3569                     case TCG_CALL_ARG_EXTEND_S:
3570                         if (arg_slot_reg_p(loc->arg_slot)) {
3571                             tcg_regset_set_reg(*la_temp_pref(ts),
3572                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3573                         }
3574                         break;
3575                     default:
3576                         break;
3577                     }
3578                 }
3579             }
3580             break;
3581         case INDEX_op_insn_start:
3582             break;
3583         case INDEX_op_discard:
3584             /* mark the temporary as dead */
3585             ts = arg_temp(op->args[0]);
3586             ts->state = TS_DEAD;
3587             la_reset_pref(ts);
3588             break;
3589 
3590         case INDEX_op_add2_i32:
3591             opc_new = INDEX_op_add_i32;
3592             goto do_addsub2;
3593         case INDEX_op_sub2_i32:
3594             opc_new = INDEX_op_sub_i32;
3595             goto do_addsub2;
3596         case INDEX_op_add2_i64:
3597             opc_new = INDEX_op_add_i64;
3598             goto do_addsub2;
3599         case INDEX_op_sub2_i64:
3600             opc_new = INDEX_op_sub_i64;
3601         do_addsub2:
3602             nb_iargs = 4;
3603             nb_oargs = 2;
3604             /* Test if the high part of the operation is dead, but not
3605                the low part.  The result can be optimized to a simple
3606                add or sub.  This happens often for x86_64 guest when the
3607                cpu mode is set to 32 bit.  */
3608             if (arg_temp(op->args[1])->state == TS_DEAD) {
3609                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3610                     goto do_remove;
3611                 }
3612                 /* Replace the opcode and adjust the args in place,
3613                    leaving 3 unused args at the end.  */
3614                 op->opc = opc = opc_new;
3615                 op->args[1] = op->args[2];
3616                 op->args[2] = op->args[4];
3617                 /* Fall through and mark the single-word operation live.  */
3618                 nb_iargs = 2;
3619                 nb_oargs = 1;
3620             }
3621             goto do_not_remove;
3622 
3623         case INDEX_op_mulu2_i32:
3624             opc_new = INDEX_op_mul_i32;
3625             opc_new2 = INDEX_op_muluh_i32;
3626             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3627             goto do_mul2;
3628         case INDEX_op_muls2_i32:
3629             opc_new = INDEX_op_mul_i32;
3630             opc_new2 = INDEX_op_mulsh_i32;
3631             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3632             goto do_mul2;
3633         case INDEX_op_mulu2_i64:
3634             opc_new = INDEX_op_mul_i64;
3635             opc_new2 = INDEX_op_muluh_i64;
3636             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3637             goto do_mul2;
3638         case INDEX_op_muls2_i64:
3639             opc_new = INDEX_op_mul_i64;
3640             opc_new2 = INDEX_op_mulsh_i64;
3641             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3642             goto do_mul2;
3643         do_mul2:
3644             nb_iargs = 2;
3645             nb_oargs = 2;
3646             if (arg_temp(op->args[1])->state == TS_DEAD) {
3647                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3648                     /* Both parts of the operation are dead.  */
3649                     goto do_remove;
3650                 }
3651                 /* The high part of the operation is dead; generate the low. */
3652                 op->opc = opc = opc_new;
3653                 op->args[1] = op->args[2];
3654                 op->args[2] = op->args[3];
3655             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3656                 /* The low part of the operation is dead; generate the high. */
3657                 op->opc = opc = opc_new2;
3658                 op->args[0] = op->args[1];
3659                 op->args[1] = op->args[2];
3660                 op->args[2] = op->args[3];
3661             } else {
3662                 goto do_not_remove;
3663             }
3664             /* Mark the single-word operation live.  */
3665             nb_oargs = 1;
3666             goto do_not_remove;
3667 
3668         default:
3669             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3670             nb_iargs = def->nb_iargs;
3671             nb_oargs = def->nb_oargs;
3672 
3673             /* Test if the operation can be removed because all
3674                its outputs are dead. We assume that nb_oargs == 0
3675                implies side effects */
3676             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3677                 for (i = 0; i < nb_oargs; i++) {
3678                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3679                         goto do_not_remove;
3680                     }
3681                 }
3682                 goto do_remove;
3683             }
3684             goto do_not_remove;
3685 
3686         do_remove:
3687             tcg_op_remove(s, op);
3688             break;
3689 
3690         do_not_remove:
3691             for (i = 0; i < nb_oargs; i++) {
3692                 ts = arg_temp(op->args[i]);
3693 
3694                 /* Remember the preference of the uses that followed.  */
3695                 if (i < ARRAY_SIZE(op->output_pref)) {
3696                     op->output_pref[i] = *la_temp_pref(ts);
3697                 }
3698 
3699                 /* Output args are dead.  */
3700                 if (ts->state & TS_DEAD) {
3701                     arg_life |= DEAD_ARG << i;
3702                 }
3703                 if (ts->state & TS_MEM) {
3704                     arg_life |= SYNC_ARG << i;
3705                 }
3706                 ts->state = TS_DEAD;
3707                 la_reset_pref(ts);
3708             }
3709 
3710             /* If end of basic block, update.  */
3711             if (def->flags & TCG_OPF_BB_EXIT) {
3712                 la_func_end(s, nb_globals, nb_temps);
3713             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3714                 la_bb_sync(s, nb_globals, nb_temps);
3715             } else if (def->flags & TCG_OPF_BB_END) {
3716                 la_bb_end(s, nb_globals, nb_temps);
3717             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3718                 la_global_sync(s, nb_globals);
3719                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3720                     la_cross_call(s, nb_temps);
3721                 }
3722             }
3723 
3724             /* Record arguments that die in this opcode.  */
3725             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3726                 ts = arg_temp(op->args[i]);
3727                 if (ts->state & TS_DEAD) {
3728                     arg_life |= DEAD_ARG << i;
3729                 }
3730             }
3731 
3732             /* Input arguments are live for preceding opcodes.  */
3733             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3734                 ts = arg_temp(op->args[i]);
3735                 if (ts->state & TS_DEAD) {
3736                     /* For operands that were dead, initially allow
3737                        all regs for the type.  */
3738                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3739                     ts->state &= ~TS_DEAD;
3740                 }
3741             }
3742 
3743             /* Incorporate constraints for this operand.  */
3744             switch (opc) {
3745             case INDEX_op_mov_i32:
3746             case INDEX_op_mov_i64:
3747                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3748                    have proper constraints.  That said, special case
3749                    moves to propagate preferences backward.  */
3750                 if (IS_DEAD_ARG(1)) {
3751                     *la_temp_pref(arg_temp(op->args[0]))
3752                         = *la_temp_pref(arg_temp(op->args[1]));
3753                 }
3754                 break;
3755 
3756             default:
3757                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3758                     const TCGArgConstraint *ct = &def->args_ct[i];
3759                     TCGRegSet set, *pset;
3760 
3761                     ts = arg_temp(op->args[i]);
3762                     pset = la_temp_pref(ts);
3763                     set = *pset;
3764 
3765                     set &= ct->regs;
3766                     if (ct->ialias) {
3767                         set &= output_pref(op, ct->alias_index);
3768                     }
3769                     /* If the combination is not possible, restart.  */
3770                     if (set == 0) {
3771                         set = ct->regs;
3772                     }
3773                     *pset = set;
3774                 }
3775                 break;
3776             }
3777             break;
3778         }
3779         op->life = arg_life;
3780     }
3781 }
3782 
3783 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3784 static bool __attribute__((noinline))
3785 liveness_pass_2(TCGContext *s)
3786 {
3787     int nb_globals = s->nb_globals;
3788     int nb_temps, i;
3789     bool changes = false;
3790     TCGOp *op, *op_next;
3791 
3792     /* Create a temporary for each indirect global.  */
3793     for (i = 0; i < nb_globals; ++i) {
3794         TCGTemp *its = &s->temps[i];
3795         if (its->indirect_reg) {
3796             TCGTemp *dts = tcg_temp_alloc(s);
3797             dts->type = its->type;
3798             dts->base_type = its->base_type;
3799             dts->temp_subindex = its->temp_subindex;
3800             dts->kind = TEMP_EBB;
3801             its->state_ptr = dts;
3802         } else {
3803             its->state_ptr = NULL;
3804         }
3805         /* All globals begin dead.  */
3806         its->state = TS_DEAD;
3807     }
3808     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3809         TCGTemp *its = &s->temps[i];
3810         its->state_ptr = NULL;
3811         its->state = TS_DEAD;
3812     }
3813 
3814     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3815         TCGOpcode opc = op->opc;
3816         const TCGOpDef *def = &tcg_op_defs[opc];
3817         TCGLifeData arg_life = op->life;
3818         int nb_iargs, nb_oargs, call_flags;
3819         TCGTemp *arg_ts, *dir_ts;
3820 
3821         if (opc == INDEX_op_call) {
3822             nb_oargs = TCGOP_CALLO(op);
3823             nb_iargs = TCGOP_CALLI(op);
3824             call_flags = tcg_call_flags(op);
3825         } else {
3826             nb_iargs = def->nb_iargs;
3827             nb_oargs = def->nb_oargs;
3828 
3829             /* Set flags similar to how calls require.  */
3830             if (def->flags & TCG_OPF_COND_BRANCH) {
3831                 /* Like reading globals: sync_globals */
3832                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3833             } else if (def->flags & TCG_OPF_BB_END) {
3834                 /* Like writing globals: save_globals */
3835                 call_flags = 0;
3836             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3837                 /* Like reading globals: sync_globals */
3838                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3839             } else {
3840                 /* No effect on globals.  */
3841                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3842                               TCG_CALL_NO_WRITE_GLOBALS);
3843             }
3844         }
3845 
3846         /* Make sure that input arguments are available.  */
3847         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3848             arg_ts = arg_temp(op->args[i]);
3849             dir_ts = arg_ts->state_ptr;
3850             if (dir_ts && arg_ts->state == TS_DEAD) {
3851                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3852                                   ? INDEX_op_ld_i32
3853                                   : INDEX_op_ld_i64);
3854                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3855 
3856                 lop->args[0] = temp_arg(dir_ts);
3857                 lop->args[1] = temp_arg(arg_ts->mem_base);
3858                 lop->args[2] = arg_ts->mem_offset;
3859 
3860                 /* Loaded, but synced with memory.  */
3861                 arg_ts->state = TS_MEM;
3862             }
3863         }
3864 
3865         /* Perform input replacement, and mark inputs that became dead.
3866            No action is required except keeping temp_state up to date
3867            so that we reload when needed.  */
3868         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3869             arg_ts = arg_temp(op->args[i]);
3870             dir_ts = arg_ts->state_ptr;
3871             if (dir_ts) {
3872                 op->args[i] = temp_arg(dir_ts);
3873                 changes = true;
3874                 if (IS_DEAD_ARG(i)) {
3875                     arg_ts->state = TS_DEAD;
3876                 }
3877             }
3878         }
3879 
3880         /* Liveness analysis should ensure that the following are
3881            all correct, for call sites and basic block end points.  */
3882         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3883             /* Nothing to do */
3884         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3885             for (i = 0; i < nb_globals; ++i) {
3886                 /* Liveness should see that globals are synced back,
3887                    that is, either TS_DEAD or TS_MEM.  */
3888                 arg_ts = &s->temps[i];
3889                 tcg_debug_assert(arg_ts->state_ptr == 0
3890                                  || arg_ts->state != 0);
3891             }
3892         } else {
3893             for (i = 0; i < nb_globals; ++i) {
3894                 /* Liveness should see that globals are saved back,
3895                    that is, TS_DEAD, waiting to be reloaded.  */
3896                 arg_ts = &s->temps[i];
3897                 tcg_debug_assert(arg_ts->state_ptr == 0
3898                                  || arg_ts->state == TS_DEAD);
3899             }
3900         }
3901 
3902         /* Outputs become available.  */
3903         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3904             arg_ts = arg_temp(op->args[0]);
3905             dir_ts = arg_ts->state_ptr;
3906             if (dir_ts) {
3907                 op->args[0] = temp_arg(dir_ts);
3908                 changes = true;
3909 
3910                 /* The output is now live and modified.  */
3911                 arg_ts->state = 0;
3912 
3913                 if (NEED_SYNC_ARG(0)) {
3914                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3915                                       ? INDEX_op_st_i32
3916                                       : INDEX_op_st_i64);
3917                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3918                     TCGTemp *out_ts = dir_ts;
3919 
3920                     if (IS_DEAD_ARG(0)) {
3921                         out_ts = arg_temp(op->args[1]);
3922                         arg_ts->state = TS_DEAD;
3923                         tcg_op_remove(s, op);
3924                     } else {
3925                         arg_ts->state = TS_MEM;
3926                     }
3927 
3928                     sop->args[0] = temp_arg(out_ts);
3929                     sop->args[1] = temp_arg(arg_ts->mem_base);
3930                     sop->args[2] = arg_ts->mem_offset;
3931                 } else {
3932                     tcg_debug_assert(!IS_DEAD_ARG(0));
3933                 }
3934             }
3935         } else {
3936             for (i = 0; i < nb_oargs; i++) {
3937                 arg_ts = arg_temp(op->args[i]);
3938                 dir_ts = arg_ts->state_ptr;
3939                 if (!dir_ts) {
3940                     continue;
3941                 }
3942                 op->args[i] = temp_arg(dir_ts);
3943                 changes = true;
3944 
3945                 /* The output is now live and modified.  */
3946                 arg_ts->state = 0;
3947 
3948                 /* Sync outputs upon their last write.  */
3949                 if (NEED_SYNC_ARG(i)) {
3950                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3951                                       ? INDEX_op_st_i32
3952                                       : INDEX_op_st_i64);
3953                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3954 
3955                     sop->args[0] = temp_arg(dir_ts);
3956                     sop->args[1] = temp_arg(arg_ts->mem_base);
3957                     sop->args[2] = arg_ts->mem_offset;
3958 
3959                     arg_ts->state = TS_MEM;
3960                 }
3961                 /* Drop outputs that are dead.  */
3962                 if (IS_DEAD_ARG(i)) {
3963                     arg_ts->state = TS_DEAD;
3964                 }
3965             }
3966         }
3967     }
3968 
3969     return changes;
3970 }
3971 
3972 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3973 {
3974     intptr_t off;
3975     int size, align;
3976 
3977     /* When allocating an object, look at the full type. */
3978     size = tcg_type_size(ts->base_type);
3979     switch (ts->base_type) {
3980     case TCG_TYPE_I32:
3981         align = 4;
3982         break;
3983     case TCG_TYPE_I64:
3984     case TCG_TYPE_V64:
3985         align = 8;
3986         break;
3987     case TCG_TYPE_I128:
3988     case TCG_TYPE_V128:
3989     case TCG_TYPE_V256:
3990         /*
3991          * Note that we do not require aligned storage for V256,
3992          * and that we provide alignment for I128 to match V128,
3993          * even if that's above what the host ABI requires.
3994          */
3995         align = 16;
3996         break;
3997     default:
3998         g_assert_not_reached();
3999     }
4000 
4001     /*
4002      * Assume the stack is sufficiently aligned.
4003      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4004      * and do not require 16 byte vector alignment.  This seems slightly
4005      * easier than fully parameterizing the above switch statement.
4006      */
4007     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4008     off = ROUND_UP(s->current_frame_offset, align);
4009 
4010     /* If we've exhausted the stack frame, restart with a smaller TB. */
4011     if (off + size > s->frame_end) {
4012         tcg_raise_tb_overflow(s);
4013     }
4014     s->current_frame_offset = off + size;
4015 #if defined(__sparc__)
4016     off += TCG_TARGET_STACK_BIAS;
4017 #endif
4018 
4019     /* If the object was subdivided, assign memory to all the parts. */
4020     if (ts->base_type != ts->type) {
4021         int part_size = tcg_type_size(ts->type);
4022         int part_count = size / part_size;
4023 
4024         /*
4025          * Each part is allocated sequentially in tcg_temp_new_internal.
4026          * Jump back to the first part by subtracting the current index.
4027          */
4028         ts -= ts->temp_subindex;
4029         for (int i = 0; i < part_count; ++i) {
4030             ts[i].mem_offset = off + i * part_size;
4031             ts[i].mem_base = s->frame_temp;
4032             ts[i].mem_allocated = 1;
4033         }
4034     } else {
4035         ts->mem_offset = off;
4036         ts->mem_base = s->frame_temp;
4037         ts->mem_allocated = 1;
4038     }
4039 }
4040 
4041 /* Assign @reg to @ts, and update reg_to_temp[]. */
4042 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4043 {
4044     if (ts->val_type == TEMP_VAL_REG) {
4045         TCGReg old = ts->reg;
4046         tcg_debug_assert(s->reg_to_temp[old] == ts);
4047         if (old == reg) {
4048             return;
4049         }
4050         s->reg_to_temp[old] = NULL;
4051     }
4052     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4053     s->reg_to_temp[reg] = ts;
4054     ts->val_type = TEMP_VAL_REG;
4055     ts->reg = reg;
4056 }
4057 
4058 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4059 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4060 {
4061     tcg_debug_assert(type != TEMP_VAL_REG);
4062     if (ts->val_type == TEMP_VAL_REG) {
4063         TCGReg reg = ts->reg;
4064         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4065         s->reg_to_temp[reg] = NULL;
4066     }
4067     ts->val_type = type;
4068 }
4069 
4070 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4071 
4072 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4073    mark it free; otherwise mark it dead.  */
4074 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4075 {
4076     TCGTempVal new_type;
4077 
4078     switch (ts->kind) {
4079     case TEMP_FIXED:
4080         return;
4081     case TEMP_GLOBAL:
4082     case TEMP_TB:
4083         new_type = TEMP_VAL_MEM;
4084         break;
4085     case TEMP_EBB:
4086         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4087         break;
4088     case TEMP_CONST:
4089         new_type = TEMP_VAL_CONST;
4090         break;
4091     default:
4092         g_assert_not_reached();
4093     }
4094     set_temp_val_nonreg(s, ts, new_type);
4095 }
4096 
4097 /* Mark a temporary as dead.  */
4098 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4099 {
4100     temp_free_or_dead(s, ts, 1);
4101 }
4102 
4103 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4104    registers needs to be allocated to store a constant.  If 'free_or_dead'
4105    is non-zero, subsequently release the temporary; if it is positive, the
4106    temp is dead; if it is negative, the temp is free.  */
4107 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4108                       TCGRegSet preferred_regs, int free_or_dead)
4109 {
4110     if (!temp_readonly(ts) && !ts->mem_coherent) {
4111         if (!ts->mem_allocated) {
4112             temp_allocate_frame(s, ts);
4113         }
4114         switch (ts->val_type) {
4115         case TEMP_VAL_CONST:
4116             /* If we're going to free the temp immediately, then we won't
4117                require it later in a register, so attempt to store the
4118                constant to memory directly.  */
4119             if (free_or_dead
4120                 && tcg_out_sti(s, ts->type, ts->val,
4121                                ts->mem_base->reg, ts->mem_offset)) {
4122                 break;
4123             }
4124             temp_load(s, ts, tcg_target_available_regs[ts->type],
4125                       allocated_regs, preferred_regs);
4126             /* fallthrough */
4127 
4128         case TEMP_VAL_REG:
4129             tcg_out_st(s, ts->type, ts->reg,
4130                        ts->mem_base->reg, ts->mem_offset);
4131             break;
4132 
4133         case TEMP_VAL_MEM:
4134             break;
4135 
4136         case TEMP_VAL_DEAD:
4137         default:
4138             g_assert_not_reached();
4139         }
4140         ts->mem_coherent = 1;
4141     }
4142     if (free_or_dead) {
4143         temp_free_or_dead(s, ts, free_or_dead);
4144     }
4145 }
4146 
4147 /* free register 'reg' by spilling the corresponding temporary if necessary */
4148 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4149 {
4150     TCGTemp *ts = s->reg_to_temp[reg];
4151     if (ts != NULL) {
4152         temp_sync(s, ts, allocated_regs, 0, -1);
4153     }
4154 }
4155 
4156 /**
4157  * tcg_reg_alloc:
4158  * @required_regs: Set of registers in which we must allocate.
4159  * @allocated_regs: Set of registers which must be avoided.
4160  * @preferred_regs: Set of registers we should prefer.
4161  * @rev: True if we search the registers in "indirect" order.
4162  *
4163  * The allocated register must be in @required_regs & ~@allocated_regs,
4164  * but if we can put it in @preferred_regs we may save a move later.
4165  */
4166 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4167                             TCGRegSet allocated_regs,
4168                             TCGRegSet preferred_regs, bool rev)
4169 {
4170     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4171     TCGRegSet reg_ct[2];
4172     const int *order;
4173 
4174     reg_ct[1] = required_regs & ~allocated_regs;
4175     tcg_debug_assert(reg_ct[1] != 0);
4176     reg_ct[0] = reg_ct[1] & preferred_regs;
4177 
4178     /* Skip the preferred_regs option if it cannot be satisfied,
4179        or if the preference made no difference.  */
4180     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4181 
4182     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4183 
4184     /* Try free registers, preferences first.  */
4185     for (j = f; j < 2; j++) {
4186         TCGRegSet set = reg_ct[j];
4187 
4188         if (tcg_regset_single(set)) {
4189             /* One register in the set.  */
4190             TCGReg reg = tcg_regset_first(set);
4191             if (s->reg_to_temp[reg] == NULL) {
4192                 return reg;
4193             }
4194         } else {
4195             for (i = 0; i < n; i++) {
4196                 TCGReg reg = order[i];
4197                 if (s->reg_to_temp[reg] == NULL &&
4198                     tcg_regset_test_reg(set, reg)) {
4199                     return reg;
4200                 }
4201             }
4202         }
4203     }
4204 
4205     /* We must spill something.  */
4206     for (j = f; j < 2; j++) {
4207         TCGRegSet set = reg_ct[j];
4208 
4209         if (tcg_regset_single(set)) {
4210             /* One register in the set.  */
4211             TCGReg reg = tcg_regset_first(set);
4212             tcg_reg_free(s, reg, allocated_regs);
4213             return reg;
4214         } else {
4215             for (i = 0; i < n; i++) {
4216                 TCGReg reg = order[i];
4217                 if (tcg_regset_test_reg(set, reg)) {
4218                     tcg_reg_free(s, reg, allocated_regs);
4219                     return reg;
4220                 }
4221             }
4222         }
4223     }
4224 
4225     g_assert_not_reached();
4226 }
4227 
4228 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4229                                  TCGRegSet allocated_regs,
4230                                  TCGRegSet preferred_regs, bool rev)
4231 {
4232     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4233     TCGRegSet reg_ct[2];
4234     const int *order;
4235 
4236     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4237     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4238     tcg_debug_assert(reg_ct[1] != 0);
4239     reg_ct[0] = reg_ct[1] & preferred_regs;
4240 
4241     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4242 
4243     /*
4244      * Skip the preferred_regs option if it cannot be satisfied,
4245      * or if the preference made no difference.
4246      */
4247     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4248 
4249     /*
4250      * Minimize the number of flushes by looking for 2 free registers first,
4251      * then a single flush, then two flushes.
4252      */
4253     for (fmin = 2; fmin >= 0; fmin--) {
4254         for (j = k; j < 2; j++) {
4255             TCGRegSet set = reg_ct[j];
4256 
4257             for (i = 0; i < n; i++) {
4258                 TCGReg reg = order[i];
4259 
4260                 if (tcg_regset_test_reg(set, reg)) {
4261                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4262                     if (f >= fmin) {
4263                         tcg_reg_free(s, reg, allocated_regs);
4264                         tcg_reg_free(s, reg + 1, allocated_regs);
4265                         return reg;
4266                     }
4267                 }
4268             }
4269         }
4270     }
4271     g_assert_not_reached();
4272 }
4273 
4274 /* Make sure the temporary is in a register.  If needed, allocate the register
4275    from DESIRED while avoiding ALLOCATED.  */
4276 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4277                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4278 {
4279     TCGReg reg;
4280 
4281     switch (ts->val_type) {
4282     case TEMP_VAL_REG:
4283         return;
4284     case TEMP_VAL_CONST:
4285         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4286                             preferred_regs, ts->indirect_base);
4287         if (ts->type <= TCG_TYPE_I64) {
4288             tcg_out_movi(s, ts->type, reg, ts->val);
4289         } else {
4290             uint64_t val = ts->val;
4291             MemOp vece = MO_64;
4292 
4293             /*
4294              * Find the minimal vector element that matches the constant.
4295              * The targets will, in general, have to do this search anyway,
4296              * do this generically.
4297              */
4298             if (val == dup_const(MO_8, val)) {
4299                 vece = MO_8;
4300             } else if (val == dup_const(MO_16, val)) {
4301                 vece = MO_16;
4302             } else if (val == dup_const(MO_32, val)) {
4303                 vece = MO_32;
4304             }
4305 
4306             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4307         }
4308         ts->mem_coherent = 0;
4309         break;
4310     case TEMP_VAL_MEM:
4311         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4312                             preferred_regs, ts->indirect_base);
4313         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4314         ts->mem_coherent = 1;
4315         break;
4316     case TEMP_VAL_DEAD:
4317     default:
4318         g_assert_not_reached();
4319     }
4320     set_temp_val_reg(s, ts, reg);
4321 }
4322 
4323 /* Save a temporary to memory. 'allocated_regs' is used in case a
4324    temporary registers needs to be allocated to store a constant.  */
4325 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4326 {
4327     /* The liveness analysis already ensures that globals are back
4328        in memory. Keep an tcg_debug_assert for safety. */
4329     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4330 }
4331 
4332 /* save globals to their canonical location and assume they can be
4333    modified be the following code. 'allocated_regs' is used in case a
4334    temporary registers needs to be allocated to store a constant. */
4335 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4336 {
4337     int i, n;
4338 
4339     for (i = 0, n = s->nb_globals; i < n; i++) {
4340         temp_save(s, &s->temps[i], allocated_regs);
4341     }
4342 }
4343 
4344 /* sync globals to their canonical location and assume they can be
4345    read by the following code. 'allocated_regs' is used in case a
4346    temporary registers needs to be allocated to store a constant. */
4347 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4348 {
4349     int i, n;
4350 
4351     for (i = 0, n = s->nb_globals; i < n; i++) {
4352         TCGTemp *ts = &s->temps[i];
4353         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4354                          || ts->kind == TEMP_FIXED
4355                          || ts->mem_coherent);
4356     }
4357 }
4358 
4359 /* at the end of a basic block, we assume all temporaries are dead and
4360    all globals are stored at their canonical location. */
4361 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4362 {
4363     int i;
4364 
4365     for (i = s->nb_globals; i < s->nb_temps; i++) {
4366         TCGTemp *ts = &s->temps[i];
4367 
4368         switch (ts->kind) {
4369         case TEMP_TB:
4370             temp_save(s, ts, allocated_regs);
4371             break;
4372         case TEMP_EBB:
4373             /* The liveness analysis already ensures that temps are dead.
4374                Keep an tcg_debug_assert for safety. */
4375             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4376             break;
4377         case TEMP_CONST:
4378             /* Similarly, we should have freed any allocated register. */
4379             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4380             break;
4381         default:
4382             g_assert_not_reached();
4383         }
4384     }
4385 
4386     save_globals(s, allocated_regs);
4387 }
4388 
4389 /*
4390  * At a conditional branch, we assume all temporaries are dead unless
4391  * explicitly live-across-conditional-branch; all globals and local
4392  * temps are synced to their location.
4393  */
4394 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4395 {
4396     sync_globals(s, allocated_regs);
4397 
4398     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4399         TCGTemp *ts = &s->temps[i];
4400         /*
4401          * The liveness analysis already ensures that temps are dead.
4402          * Keep tcg_debug_asserts for safety.
4403          */
4404         switch (ts->kind) {
4405         case TEMP_TB:
4406             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4407             break;
4408         case TEMP_EBB:
4409         case TEMP_CONST:
4410             break;
4411         default:
4412             g_assert_not_reached();
4413         }
4414     }
4415 }
4416 
4417 /*
4418  * Specialized code generation for INDEX_op_mov_* with a constant.
4419  */
4420 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4421                                   tcg_target_ulong val, TCGLifeData arg_life,
4422                                   TCGRegSet preferred_regs)
4423 {
4424     /* ENV should not be modified.  */
4425     tcg_debug_assert(!temp_readonly(ots));
4426 
4427     /* The movi is not explicitly generated here.  */
4428     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4429     ots->val = val;
4430     ots->mem_coherent = 0;
4431     if (NEED_SYNC_ARG(0)) {
4432         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4433     } else if (IS_DEAD_ARG(0)) {
4434         temp_dead(s, ots);
4435     }
4436 }
4437 
4438 /*
4439  * Specialized code generation for INDEX_op_mov_*.
4440  */
4441 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4442 {
4443     const TCGLifeData arg_life = op->life;
4444     TCGRegSet allocated_regs, preferred_regs;
4445     TCGTemp *ts, *ots;
4446     TCGType otype, itype;
4447     TCGReg oreg, ireg;
4448 
4449     allocated_regs = s->reserved_regs;
4450     preferred_regs = output_pref(op, 0);
4451     ots = arg_temp(op->args[0]);
4452     ts = arg_temp(op->args[1]);
4453 
4454     /* ENV should not be modified.  */
4455     tcg_debug_assert(!temp_readonly(ots));
4456 
4457     /* Note that otype != itype for no-op truncation.  */
4458     otype = ots->type;
4459     itype = ts->type;
4460 
4461     if (ts->val_type == TEMP_VAL_CONST) {
4462         /* propagate constant or generate sti */
4463         tcg_target_ulong val = ts->val;
4464         if (IS_DEAD_ARG(1)) {
4465             temp_dead(s, ts);
4466         }
4467         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4468         return;
4469     }
4470 
4471     /* If the source value is in memory we're going to be forced
4472        to have it in a register in order to perform the copy.  Copy
4473        the SOURCE value into its own register first, that way we
4474        don't have to reload SOURCE the next time it is used. */
4475     if (ts->val_type == TEMP_VAL_MEM) {
4476         temp_load(s, ts, tcg_target_available_regs[itype],
4477                   allocated_regs, preferred_regs);
4478     }
4479     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4480     ireg = ts->reg;
4481 
4482     if (IS_DEAD_ARG(0)) {
4483         /* mov to a non-saved dead register makes no sense (even with
4484            liveness analysis disabled). */
4485         tcg_debug_assert(NEED_SYNC_ARG(0));
4486         if (!ots->mem_allocated) {
4487             temp_allocate_frame(s, ots);
4488         }
4489         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4490         if (IS_DEAD_ARG(1)) {
4491             temp_dead(s, ts);
4492         }
4493         temp_dead(s, ots);
4494         return;
4495     }
4496 
4497     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4498         /*
4499          * The mov can be suppressed.  Kill input first, so that it
4500          * is unlinked from reg_to_temp, then set the output to the
4501          * reg that we saved from the input.
4502          */
4503         temp_dead(s, ts);
4504         oreg = ireg;
4505     } else {
4506         if (ots->val_type == TEMP_VAL_REG) {
4507             oreg = ots->reg;
4508         } else {
4509             /* Make sure to not spill the input register during allocation. */
4510             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4511                                  allocated_regs | ((TCGRegSet)1 << ireg),
4512                                  preferred_regs, ots->indirect_base);
4513         }
4514         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4515             /*
4516              * Cross register class move not supported.
4517              * Store the source register into the destination slot
4518              * and leave the destination temp as TEMP_VAL_MEM.
4519              */
4520             assert(!temp_readonly(ots));
4521             if (!ts->mem_allocated) {
4522                 temp_allocate_frame(s, ots);
4523             }
4524             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4525             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4526             ots->mem_coherent = 1;
4527             return;
4528         }
4529     }
4530     set_temp_val_reg(s, ots, oreg);
4531     ots->mem_coherent = 0;
4532 
4533     if (NEED_SYNC_ARG(0)) {
4534         temp_sync(s, ots, allocated_regs, 0, 0);
4535     }
4536 }
4537 
4538 /*
4539  * Specialized code generation for INDEX_op_dup_vec.
4540  */
4541 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4542 {
4543     const TCGLifeData arg_life = op->life;
4544     TCGRegSet dup_out_regs, dup_in_regs;
4545     TCGTemp *its, *ots;
4546     TCGType itype, vtype;
4547     unsigned vece;
4548     int lowpart_ofs;
4549     bool ok;
4550 
4551     ots = arg_temp(op->args[0]);
4552     its = arg_temp(op->args[1]);
4553 
4554     /* ENV should not be modified.  */
4555     tcg_debug_assert(!temp_readonly(ots));
4556 
4557     itype = its->type;
4558     vece = TCGOP_VECE(op);
4559     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4560 
4561     if (its->val_type == TEMP_VAL_CONST) {
4562         /* Propagate constant via movi -> dupi.  */
4563         tcg_target_ulong val = its->val;
4564         if (IS_DEAD_ARG(1)) {
4565             temp_dead(s, its);
4566         }
4567         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4568         return;
4569     }
4570 
4571     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4572     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4573 
4574     /* Allocate the output register now.  */
4575     if (ots->val_type != TEMP_VAL_REG) {
4576         TCGRegSet allocated_regs = s->reserved_regs;
4577         TCGReg oreg;
4578 
4579         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4580             /* Make sure to not spill the input register. */
4581             tcg_regset_set_reg(allocated_regs, its->reg);
4582         }
4583         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4584                              output_pref(op, 0), ots->indirect_base);
4585         set_temp_val_reg(s, ots, oreg);
4586     }
4587 
4588     switch (its->val_type) {
4589     case TEMP_VAL_REG:
4590         /*
4591          * The dup constriaints must be broad, covering all possible VECE.
4592          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4593          * to fail, indicating that extra moves are required for that case.
4594          */
4595         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4596             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4597                 goto done;
4598             }
4599             /* Try again from memory or a vector input register.  */
4600         }
4601         if (!its->mem_coherent) {
4602             /*
4603              * The input register is not synced, and so an extra store
4604              * would be required to use memory.  Attempt an integer-vector
4605              * register move first.  We do not have a TCGRegSet for this.
4606              */
4607             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4608                 break;
4609             }
4610             /* Sync the temp back to its slot and load from there.  */
4611             temp_sync(s, its, s->reserved_regs, 0, 0);
4612         }
4613         /* fall through */
4614 
4615     case TEMP_VAL_MEM:
4616         lowpart_ofs = 0;
4617         if (HOST_BIG_ENDIAN) {
4618             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4619         }
4620         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4621                              its->mem_offset + lowpart_ofs)) {
4622             goto done;
4623         }
4624         /* Load the input into the destination vector register. */
4625         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4626         break;
4627 
4628     default:
4629         g_assert_not_reached();
4630     }
4631 
4632     /* We now have a vector input register, so dup must succeed. */
4633     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4634     tcg_debug_assert(ok);
4635 
4636  done:
4637     ots->mem_coherent = 0;
4638     if (IS_DEAD_ARG(1)) {
4639         temp_dead(s, its);
4640     }
4641     if (NEED_SYNC_ARG(0)) {
4642         temp_sync(s, ots, s->reserved_regs, 0, 0);
4643     }
4644     if (IS_DEAD_ARG(0)) {
4645         temp_dead(s, ots);
4646     }
4647 }
4648 
4649 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4650 {
4651     const TCGLifeData arg_life = op->life;
4652     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4653     TCGRegSet i_allocated_regs;
4654     TCGRegSet o_allocated_regs;
4655     int i, k, nb_iargs, nb_oargs;
4656     TCGReg reg;
4657     TCGArg arg;
4658     const TCGArgConstraint *arg_ct;
4659     TCGTemp *ts;
4660     TCGArg new_args[TCG_MAX_OP_ARGS];
4661     int const_args[TCG_MAX_OP_ARGS];
4662 
4663     nb_oargs = def->nb_oargs;
4664     nb_iargs = def->nb_iargs;
4665 
4666     /* copy constants */
4667     memcpy(new_args + nb_oargs + nb_iargs,
4668            op->args + nb_oargs + nb_iargs,
4669            sizeof(TCGArg) * def->nb_cargs);
4670 
4671     i_allocated_regs = s->reserved_regs;
4672     o_allocated_regs = s->reserved_regs;
4673 
4674     /* satisfy input constraints */
4675     for (k = 0; k < nb_iargs; k++) {
4676         TCGRegSet i_preferred_regs, i_required_regs;
4677         bool allocate_new_reg, copyto_new_reg;
4678         TCGTemp *ts2;
4679         int i1, i2;
4680 
4681         i = def->args_ct[nb_oargs + k].sort_index;
4682         arg = op->args[i];
4683         arg_ct = &def->args_ct[i];
4684         ts = arg_temp(arg);
4685 
4686         if (ts->val_type == TEMP_VAL_CONST
4687             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4688             /* constant is OK for instruction */
4689             const_args[i] = 1;
4690             new_args[i] = ts->val;
4691             continue;
4692         }
4693 
4694         reg = ts->reg;
4695         i_preferred_regs = 0;
4696         i_required_regs = arg_ct->regs;
4697         allocate_new_reg = false;
4698         copyto_new_reg = false;
4699 
4700         switch (arg_ct->pair) {
4701         case 0: /* not paired */
4702             if (arg_ct->ialias) {
4703                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4704 
4705                 /*
4706                  * If the input is readonly, then it cannot also be an
4707                  * output and aliased to itself.  If the input is not
4708                  * dead after the instruction, we must allocate a new
4709                  * register and move it.
4710                  */
4711                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4712                     || def->args_ct[arg_ct->alias_index].newreg) {
4713                     allocate_new_reg = true;
4714                 } else if (ts->val_type == TEMP_VAL_REG) {
4715                     /*
4716                      * Check if the current register has already been
4717                      * allocated for another input.
4718                      */
4719                     allocate_new_reg =
4720                         tcg_regset_test_reg(i_allocated_regs, reg);
4721                 }
4722             }
4723             if (!allocate_new_reg) {
4724                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4725                           i_preferred_regs);
4726                 reg = ts->reg;
4727                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4728             }
4729             if (allocate_new_reg) {
4730                 /*
4731                  * Allocate a new register matching the constraint
4732                  * and move the temporary register into it.
4733                  */
4734                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4735                           i_allocated_regs, 0);
4736                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4737                                     i_preferred_regs, ts->indirect_base);
4738                 copyto_new_reg = true;
4739             }
4740             break;
4741 
4742         case 1:
4743             /* First of an input pair; if i1 == i2, the second is an output. */
4744             i1 = i;
4745             i2 = arg_ct->pair_index;
4746             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4747 
4748             /*
4749              * It is easier to default to allocating a new pair
4750              * and to identify a few cases where it's not required.
4751              */
4752             if (arg_ct->ialias) {
4753                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4754                 if (IS_DEAD_ARG(i1) &&
4755                     IS_DEAD_ARG(i2) &&
4756                     !temp_readonly(ts) &&
4757                     ts->val_type == TEMP_VAL_REG &&
4758                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4759                     tcg_regset_test_reg(i_required_regs, reg) &&
4760                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4761                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4762                     (ts2
4763                      ? ts2->val_type == TEMP_VAL_REG &&
4764                        ts2->reg == reg + 1 &&
4765                        !temp_readonly(ts2)
4766                      : s->reg_to_temp[reg + 1] == NULL)) {
4767                     break;
4768                 }
4769             } else {
4770                 /* Without aliasing, the pair must also be an input. */
4771                 tcg_debug_assert(ts2);
4772                 if (ts->val_type == TEMP_VAL_REG &&
4773                     ts2->val_type == TEMP_VAL_REG &&
4774                     ts2->reg == reg + 1 &&
4775                     tcg_regset_test_reg(i_required_regs, reg)) {
4776                     break;
4777                 }
4778             }
4779             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4780                                      0, ts->indirect_base);
4781             goto do_pair;
4782 
4783         case 2: /* pair second */
4784             reg = new_args[arg_ct->pair_index] + 1;
4785             goto do_pair;
4786 
4787         case 3: /* ialias with second output, no first input */
4788             tcg_debug_assert(arg_ct->ialias);
4789             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4790 
4791             if (IS_DEAD_ARG(i) &&
4792                 !temp_readonly(ts) &&
4793                 ts->val_type == TEMP_VAL_REG &&
4794                 reg > 0 &&
4795                 s->reg_to_temp[reg - 1] == NULL &&
4796                 tcg_regset_test_reg(i_required_regs, reg) &&
4797                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4798                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4799                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4800                 break;
4801             }
4802             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4803                                      i_allocated_regs, 0,
4804                                      ts->indirect_base);
4805             tcg_regset_set_reg(i_allocated_regs, reg);
4806             reg += 1;
4807             goto do_pair;
4808 
4809         do_pair:
4810             /*
4811              * If an aliased input is not dead after the instruction,
4812              * we must allocate a new register and move it.
4813              */
4814             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4815                 TCGRegSet t_allocated_regs = i_allocated_regs;
4816 
4817                 /*
4818                  * Because of the alias, and the continued life, make sure
4819                  * that the temp is somewhere *other* than the reg pair,
4820                  * and we get a copy in reg.
4821                  */
4822                 tcg_regset_set_reg(t_allocated_regs, reg);
4823                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4824                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4825                     /* If ts was already in reg, copy it somewhere else. */
4826                     TCGReg nr;
4827                     bool ok;
4828 
4829                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4830                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4831                                        t_allocated_regs, 0, ts->indirect_base);
4832                     ok = tcg_out_mov(s, ts->type, nr, reg);
4833                     tcg_debug_assert(ok);
4834 
4835                     set_temp_val_reg(s, ts, nr);
4836                 } else {
4837                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4838                               t_allocated_regs, 0);
4839                     copyto_new_reg = true;
4840                 }
4841             } else {
4842                 /* Preferably allocate to reg, otherwise copy. */
4843                 i_required_regs = (TCGRegSet)1 << reg;
4844                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4845                           i_preferred_regs);
4846                 copyto_new_reg = ts->reg != reg;
4847             }
4848             break;
4849 
4850         default:
4851             g_assert_not_reached();
4852         }
4853 
4854         if (copyto_new_reg) {
4855             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4856                 /*
4857                  * Cross register class move not supported.  Sync the
4858                  * temp back to its slot and load from there.
4859                  */
4860                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4861                 tcg_out_ld(s, ts->type, reg,
4862                            ts->mem_base->reg, ts->mem_offset);
4863             }
4864         }
4865         new_args[i] = reg;
4866         const_args[i] = 0;
4867         tcg_regset_set_reg(i_allocated_regs, reg);
4868     }
4869 
4870     /* mark dead temporaries and free the associated registers */
4871     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4872         if (IS_DEAD_ARG(i)) {
4873             temp_dead(s, arg_temp(op->args[i]));
4874         }
4875     }
4876 
4877     if (def->flags & TCG_OPF_COND_BRANCH) {
4878         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4879     } else if (def->flags & TCG_OPF_BB_END) {
4880         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4881     } else {
4882         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4883             /* XXX: permit generic clobber register list ? */
4884             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4885                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4886                     tcg_reg_free(s, i, i_allocated_regs);
4887                 }
4888             }
4889         }
4890         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4891             /* sync globals if the op has side effects and might trigger
4892                an exception. */
4893             sync_globals(s, i_allocated_regs);
4894         }
4895 
4896         /* satisfy the output constraints */
4897         for(k = 0; k < nb_oargs; k++) {
4898             i = def->args_ct[k].sort_index;
4899             arg = op->args[i];
4900             arg_ct = &def->args_ct[i];
4901             ts = arg_temp(arg);
4902 
4903             /* ENV should not be modified.  */
4904             tcg_debug_assert(!temp_readonly(ts));
4905 
4906             switch (arg_ct->pair) {
4907             case 0: /* not paired */
4908                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4909                     reg = new_args[arg_ct->alias_index];
4910                 } else if (arg_ct->newreg) {
4911                     reg = tcg_reg_alloc(s, arg_ct->regs,
4912                                         i_allocated_regs | o_allocated_regs,
4913                                         output_pref(op, k), ts->indirect_base);
4914                 } else {
4915                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4916                                         output_pref(op, k), ts->indirect_base);
4917                 }
4918                 break;
4919 
4920             case 1: /* first of pair */
4921                 tcg_debug_assert(!arg_ct->newreg);
4922                 if (arg_ct->oalias) {
4923                     reg = new_args[arg_ct->alias_index];
4924                     break;
4925                 }
4926                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4927                                          output_pref(op, k), ts->indirect_base);
4928                 break;
4929 
4930             case 2: /* second of pair */
4931                 tcg_debug_assert(!arg_ct->newreg);
4932                 if (arg_ct->oalias) {
4933                     reg = new_args[arg_ct->alias_index];
4934                 } else {
4935                     reg = new_args[arg_ct->pair_index] + 1;
4936                 }
4937                 break;
4938 
4939             case 3: /* first of pair, aliasing with a second input */
4940                 tcg_debug_assert(!arg_ct->newreg);
4941                 reg = new_args[arg_ct->pair_index] - 1;
4942                 break;
4943 
4944             default:
4945                 g_assert_not_reached();
4946             }
4947             tcg_regset_set_reg(o_allocated_regs, reg);
4948             set_temp_val_reg(s, ts, reg);
4949             ts->mem_coherent = 0;
4950             new_args[i] = reg;
4951         }
4952     }
4953 
4954     /* emit instruction */
4955     switch (op->opc) {
4956     case INDEX_op_ext8s_i32:
4957         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4958         break;
4959     case INDEX_op_ext8s_i64:
4960         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4961         break;
4962     case INDEX_op_ext8u_i32:
4963     case INDEX_op_ext8u_i64:
4964         tcg_out_ext8u(s, new_args[0], new_args[1]);
4965         break;
4966     case INDEX_op_ext16s_i32:
4967         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4968         break;
4969     case INDEX_op_ext16s_i64:
4970         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4971         break;
4972     case INDEX_op_ext16u_i32:
4973     case INDEX_op_ext16u_i64:
4974         tcg_out_ext16u(s, new_args[0], new_args[1]);
4975         break;
4976     case INDEX_op_ext32s_i64:
4977         tcg_out_ext32s(s, new_args[0], new_args[1]);
4978         break;
4979     case INDEX_op_ext32u_i64:
4980         tcg_out_ext32u(s, new_args[0], new_args[1]);
4981         break;
4982     case INDEX_op_ext_i32_i64:
4983         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4984         break;
4985     case INDEX_op_extu_i32_i64:
4986         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4987         break;
4988     case INDEX_op_extrl_i64_i32:
4989         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4990         break;
4991     default:
4992         if (def->flags & TCG_OPF_VECTOR) {
4993             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4994                            new_args, const_args);
4995         } else {
4996             tcg_out_op(s, op->opc, new_args, const_args);
4997         }
4998         break;
4999     }
5000 
5001     /* move the outputs in the correct register if needed */
5002     for(i = 0; i < nb_oargs; i++) {
5003         ts = arg_temp(op->args[i]);
5004 
5005         /* ENV should not be modified.  */
5006         tcg_debug_assert(!temp_readonly(ts));
5007 
5008         if (NEED_SYNC_ARG(i)) {
5009             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5010         } else if (IS_DEAD_ARG(i)) {
5011             temp_dead(s, ts);
5012         }
5013     }
5014 }
5015 
5016 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5017 {
5018     const TCGLifeData arg_life = op->life;
5019     TCGTemp *ots, *itsl, *itsh;
5020     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5021 
5022     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5023     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5024     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5025 
5026     ots = arg_temp(op->args[0]);
5027     itsl = arg_temp(op->args[1]);
5028     itsh = arg_temp(op->args[2]);
5029 
5030     /* ENV should not be modified.  */
5031     tcg_debug_assert(!temp_readonly(ots));
5032 
5033     /* Allocate the output register now.  */
5034     if (ots->val_type != TEMP_VAL_REG) {
5035         TCGRegSet allocated_regs = s->reserved_regs;
5036         TCGRegSet dup_out_regs =
5037             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5038         TCGReg oreg;
5039 
5040         /* Make sure to not spill the input registers. */
5041         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5042             tcg_regset_set_reg(allocated_regs, itsl->reg);
5043         }
5044         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5045             tcg_regset_set_reg(allocated_regs, itsh->reg);
5046         }
5047 
5048         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5049                              output_pref(op, 0), ots->indirect_base);
5050         set_temp_val_reg(s, ots, oreg);
5051     }
5052 
5053     /* Promote dup2 of immediates to dupi_vec. */
5054     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5055         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5056         MemOp vece = MO_64;
5057 
5058         if (val == dup_const(MO_8, val)) {
5059             vece = MO_8;
5060         } else if (val == dup_const(MO_16, val)) {
5061             vece = MO_16;
5062         } else if (val == dup_const(MO_32, val)) {
5063             vece = MO_32;
5064         }
5065 
5066         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5067         goto done;
5068     }
5069 
5070     /* If the two inputs form one 64-bit value, try dupm_vec. */
5071     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5072         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5073         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5074         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5075 
5076         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5077         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5078 
5079         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5080                              its->mem_base->reg, its->mem_offset)) {
5081             goto done;
5082         }
5083     }
5084 
5085     /* Fall back to generic expansion. */
5086     return false;
5087 
5088  done:
5089     ots->mem_coherent = 0;
5090     if (IS_DEAD_ARG(1)) {
5091         temp_dead(s, itsl);
5092     }
5093     if (IS_DEAD_ARG(2)) {
5094         temp_dead(s, itsh);
5095     }
5096     if (NEED_SYNC_ARG(0)) {
5097         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5098     } else if (IS_DEAD_ARG(0)) {
5099         temp_dead(s, ots);
5100     }
5101     return true;
5102 }
5103 
5104 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5105                          TCGRegSet allocated_regs)
5106 {
5107     if (ts->val_type == TEMP_VAL_REG) {
5108         if (ts->reg != reg) {
5109             tcg_reg_free(s, reg, allocated_regs);
5110             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5111                 /*
5112                  * Cross register class move not supported.  Sync the
5113                  * temp back to its slot and load from there.
5114                  */
5115                 temp_sync(s, ts, allocated_regs, 0, 0);
5116                 tcg_out_ld(s, ts->type, reg,
5117                            ts->mem_base->reg, ts->mem_offset);
5118             }
5119         }
5120     } else {
5121         TCGRegSet arg_set = 0;
5122 
5123         tcg_reg_free(s, reg, allocated_regs);
5124         tcg_regset_set_reg(arg_set, reg);
5125         temp_load(s, ts, arg_set, allocated_regs, 0);
5126     }
5127 }
5128 
5129 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5130                          TCGRegSet allocated_regs)
5131 {
5132     /*
5133      * When the destination is on the stack, load up the temp and store.
5134      * If there are many call-saved registers, the temp might live to
5135      * see another use; otherwise it'll be discarded.
5136      */
5137     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5138     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5139                arg_slot_stk_ofs(arg_slot));
5140 }
5141 
5142 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5143                             TCGTemp *ts, TCGRegSet *allocated_regs)
5144 {
5145     if (arg_slot_reg_p(l->arg_slot)) {
5146         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5147         load_arg_reg(s, reg, ts, *allocated_regs);
5148         tcg_regset_set_reg(*allocated_regs, reg);
5149     } else {
5150         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5151     }
5152 }
5153 
5154 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5155                          intptr_t ref_off, TCGRegSet *allocated_regs)
5156 {
5157     TCGReg reg;
5158 
5159     if (arg_slot_reg_p(arg_slot)) {
5160         reg = tcg_target_call_iarg_regs[arg_slot];
5161         tcg_reg_free(s, reg, *allocated_regs);
5162         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5163         tcg_regset_set_reg(*allocated_regs, reg);
5164     } else {
5165         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5166                             *allocated_regs, 0, false);
5167         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5168         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5169                    arg_slot_stk_ofs(arg_slot));
5170     }
5171 }
5172 
5173 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5174 {
5175     const int nb_oargs = TCGOP_CALLO(op);
5176     const int nb_iargs = TCGOP_CALLI(op);
5177     const TCGLifeData arg_life = op->life;
5178     const TCGHelperInfo *info = tcg_call_info(op);
5179     TCGRegSet allocated_regs = s->reserved_regs;
5180     int i;
5181 
5182     /*
5183      * Move inputs into place in reverse order,
5184      * so that we place stacked arguments first.
5185      */
5186     for (i = nb_iargs - 1; i >= 0; --i) {
5187         const TCGCallArgumentLoc *loc = &info->in[i];
5188         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5189 
5190         switch (loc->kind) {
5191         case TCG_CALL_ARG_NORMAL:
5192         case TCG_CALL_ARG_EXTEND_U:
5193         case TCG_CALL_ARG_EXTEND_S:
5194             load_arg_normal(s, loc, ts, &allocated_regs);
5195             break;
5196         case TCG_CALL_ARG_BY_REF:
5197             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5198             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5199                          arg_slot_stk_ofs(loc->ref_slot),
5200                          &allocated_regs);
5201             break;
5202         case TCG_CALL_ARG_BY_REF_N:
5203             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5204             break;
5205         default:
5206             g_assert_not_reached();
5207         }
5208     }
5209 
5210     /* Mark dead temporaries and free the associated registers.  */
5211     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5212         if (IS_DEAD_ARG(i)) {
5213             temp_dead(s, arg_temp(op->args[i]));
5214         }
5215     }
5216 
5217     /* Clobber call registers.  */
5218     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5219         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5220             tcg_reg_free(s, i, allocated_regs);
5221         }
5222     }
5223 
5224     /*
5225      * Save globals if they might be written by the helper,
5226      * sync them if they might be read.
5227      */
5228     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5229         /* Nothing to do */
5230     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5231         sync_globals(s, allocated_regs);
5232     } else {
5233         save_globals(s, allocated_regs);
5234     }
5235 
5236     /*
5237      * If the ABI passes a pointer to the returned struct as the first
5238      * argument, load that now.  Pass a pointer to the output home slot.
5239      */
5240     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5241         TCGTemp *ts = arg_temp(op->args[0]);
5242 
5243         if (!ts->mem_allocated) {
5244             temp_allocate_frame(s, ts);
5245         }
5246         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5247     }
5248 
5249     tcg_out_call(s, tcg_call_func(op), info);
5250 
5251     /* Assign output registers and emit moves if needed.  */
5252     switch (info->out_kind) {
5253     case TCG_CALL_RET_NORMAL:
5254         for (i = 0; i < nb_oargs; i++) {
5255             TCGTemp *ts = arg_temp(op->args[i]);
5256             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5257 
5258             /* ENV should not be modified.  */
5259             tcg_debug_assert(!temp_readonly(ts));
5260 
5261             set_temp_val_reg(s, ts, reg);
5262             ts->mem_coherent = 0;
5263         }
5264         break;
5265 
5266     case TCG_CALL_RET_BY_VEC:
5267         {
5268             TCGTemp *ts = arg_temp(op->args[0]);
5269 
5270             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5271             tcg_debug_assert(ts->temp_subindex == 0);
5272             if (!ts->mem_allocated) {
5273                 temp_allocate_frame(s, ts);
5274             }
5275             tcg_out_st(s, TCG_TYPE_V128,
5276                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5277                        ts->mem_base->reg, ts->mem_offset);
5278         }
5279         /* fall through to mark all parts in memory */
5280 
5281     case TCG_CALL_RET_BY_REF:
5282         /* The callee has performed a write through the reference. */
5283         for (i = 0; i < nb_oargs; i++) {
5284             TCGTemp *ts = arg_temp(op->args[i]);
5285             ts->val_type = TEMP_VAL_MEM;
5286         }
5287         break;
5288 
5289     default:
5290         g_assert_not_reached();
5291     }
5292 
5293     /* Flush or discard output registers as needed. */
5294     for (i = 0; i < nb_oargs; i++) {
5295         TCGTemp *ts = arg_temp(op->args[i]);
5296         if (NEED_SYNC_ARG(i)) {
5297             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5298         } else if (IS_DEAD_ARG(i)) {
5299             temp_dead(s, ts);
5300         }
5301     }
5302 }
5303 
5304 /**
5305  * atom_and_align_for_opc:
5306  * @s: tcg context
5307  * @opc: memory operation code
5308  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5309  * @allow_two_ops: true if we are prepared to issue two operations
5310  *
5311  * Return the alignment and atomicity to use for the inline fast path
5312  * for the given memory operation.  The alignment may be larger than
5313  * that specified in @opc, and the correct alignment will be diagnosed
5314  * by the slow path helper.
5315  *
5316  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5317  * and issue two loads or stores for subalignment.
5318  */
5319 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5320                                            MemOp host_atom, bool allow_two_ops)
5321 {
5322     MemOp align = get_alignment_bits(opc);
5323     MemOp size = opc & MO_SIZE;
5324     MemOp half = size ? size - 1 : 0;
5325     MemOp atmax;
5326     MemOp atom;
5327 
5328     /* When serialized, no further atomicity required.  */
5329     if (s->gen_tb->cflags & CF_PARALLEL) {
5330         atom = opc & MO_ATOM_MASK;
5331     } else {
5332         atom = MO_ATOM_NONE;
5333     }
5334 
5335     switch (atom) {
5336     case MO_ATOM_NONE:
5337         /* The operation requires no specific atomicity. */
5338         atmax = MO_8;
5339         break;
5340 
5341     case MO_ATOM_IFALIGN:
5342         atmax = size;
5343         break;
5344 
5345     case MO_ATOM_IFALIGN_PAIR:
5346         atmax = half;
5347         break;
5348 
5349     case MO_ATOM_WITHIN16:
5350         atmax = size;
5351         if (size == MO_128) {
5352             /* Misalignment implies !within16, and therefore no atomicity. */
5353         } else if (host_atom != MO_ATOM_WITHIN16) {
5354             /* The host does not implement within16, so require alignment. */
5355             align = MAX(align, size);
5356         }
5357         break;
5358 
5359     case MO_ATOM_WITHIN16_PAIR:
5360         atmax = size;
5361         /*
5362          * Misalignment implies !within16, and therefore half atomicity.
5363          * Any host prepared for two operations can implement this with
5364          * half alignment.
5365          */
5366         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5367             align = MAX(align, half);
5368         }
5369         break;
5370 
5371     case MO_ATOM_SUBALIGN:
5372         atmax = size;
5373         if (host_atom != MO_ATOM_SUBALIGN) {
5374             /* If unaligned but not odd, there are subobjects up to half. */
5375             if (allow_two_ops) {
5376                 align = MAX(align, half);
5377             } else {
5378                 align = MAX(align, size);
5379             }
5380         }
5381         break;
5382 
5383     default:
5384         g_assert_not_reached();
5385     }
5386 
5387     return (TCGAtomAlign){ .atom = atmax, .align = align };
5388 }
5389 
5390 /*
5391  * Similarly for qemu_ld/st slow path helpers.
5392  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5393  * using only the provided backend tcg_out_* functions.
5394  */
5395 
5396 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5397 {
5398     int ofs = arg_slot_stk_ofs(slot);
5399 
5400     /*
5401      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5402      * require extension to uint64_t, adjust the address for uint32_t.
5403      */
5404     if (HOST_BIG_ENDIAN &&
5405         TCG_TARGET_REG_BITS == 64 &&
5406         type == TCG_TYPE_I32) {
5407         ofs += 4;
5408     }
5409     return ofs;
5410 }
5411 
5412 static void tcg_out_helper_load_slots(TCGContext *s,
5413                                       unsigned nmov, TCGMovExtend *mov,
5414                                       const TCGLdstHelperParam *parm)
5415 {
5416     unsigned i;
5417     TCGReg dst3;
5418 
5419     /*
5420      * Start from the end, storing to the stack first.
5421      * This frees those registers, so we need not consider overlap.
5422      */
5423     for (i = nmov; i-- > 0; ) {
5424         unsigned slot = mov[i].dst;
5425 
5426         if (arg_slot_reg_p(slot)) {
5427             goto found_reg;
5428         }
5429 
5430         TCGReg src = mov[i].src;
5431         TCGType dst_type = mov[i].dst_type;
5432         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5433 
5434         /* The argument is going onto the stack; extend into scratch. */
5435         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5436             tcg_debug_assert(parm->ntmp != 0);
5437             mov[i].dst = src = parm->tmp[0];
5438             tcg_out_movext1(s, &mov[i]);
5439         }
5440 
5441         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5442                    tcg_out_helper_stk_ofs(dst_type, slot));
5443     }
5444     return;
5445 
5446  found_reg:
5447     /*
5448      * The remaining arguments are in registers.
5449      * Convert slot numbers to argument registers.
5450      */
5451     nmov = i + 1;
5452     for (i = 0; i < nmov; ++i) {
5453         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5454     }
5455 
5456     switch (nmov) {
5457     case 4:
5458         /* The backend must have provided enough temps for the worst case. */
5459         tcg_debug_assert(parm->ntmp >= 2);
5460 
5461         dst3 = mov[3].dst;
5462         for (unsigned j = 0; j < 3; ++j) {
5463             if (dst3 == mov[j].src) {
5464                 /*
5465                  * Conflict. Copy the source to a temporary, perform the
5466                  * remaining moves, then the extension from our scratch
5467                  * on the way out.
5468                  */
5469                 TCGReg scratch = parm->tmp[1];
5470 
5471                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5472                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5473                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5474                 break;
5475             }
5476         }
5477 
5478         /* No conflicts: perform this move and continue. */
5479         tcg_out_movext1(s, &mov[3]);
5480         /* fall through */
5481 
5482     case 3:
5483         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5484                         parm->ntmp ? parm->tmp[0] : -1);
5485         break;
5486     case 2:
5487         tcg_out_movext2(s, mov, mov + 1,
5488                         parm->ntmp ? parm->tmp[0] : -1);
5489         break;
5490     case 1:
5491         tcg_out_movext1(s, mov);
5492         break;
5493     default:
5494         g_assert_not_reached();
5495     }
5496 }
5497 
5498 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5499                                     TCGType type, tcg_target_long imm,
5500                                     const TCGLdstHelperParam *parm)
5501 {
5502     if (arg_slot_reg_p(slot)) {
5503         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5504     } else {
5505         int ofs = tcg_out_helper_stk_ofs(type, slot);
5506         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5507             tcg_debug_assert(parm->ntmp != 0);
5508             tcg_out_movi(s, type, parm->tmp[0], imm);
5509             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5510         }
5511     }
5512 }
5513 
5514 static void tcg_out_helper_load_common_args(TCGContext *s,
5515                                             const TCGLabelQemuLdst *ldst,
5516                                             const TCGLdstHelperParam *parm,
5517                                             const TCGHelperInfo *info,
5518                                             unsigned next_arg)
5519 {
5520     TCGMovExtend ptr_mov = {
5521         .dst_type = TCG_TYPE_PTR,
5522         .src_type = TCG_TYPE_PTR,
5523         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5524     };
5525     const TCGCallArgumentLoc *loc = &info->in[0];
5526     TCGType type;
5527     unsigned slot;
5528     tcg_target_ulong imm;
5529 
5530     /*
5531      * Handle env, which is always first.
5532      */
5533     ptr_mov.dst = loc->arg_slot;
5534     ptr_mov.src = TCG_AREG0;
5535     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5536 
5537     /*
5538      * Handle oi.
5539      */
5540     imm = ldst->oi;
5541     loc = &info->in[next_arg];
5542     type = TCG_TYPE_I32;
5543     switch (loc->kind) {
5544     case TCG_CALL_ARG_NORMAL:
5545         break;
5546     case TCG_CALL_ARG_EXTEND_U:
5547     case TCG_CALL_ARG_EXTEND_S:
5548         /* No extension required for MemOpIdx. */
5549         tcg_debug_assert(imm <= INT32_MAX);
5550         type = TCG_TYPE_REG;
5551         break;
5552     default:
5553         g_assert_not_reached();
5554     }
5555     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5556     next_arg++;
5557 
5558     /*
5559      * Handle ra.
5560      */
5561     loc = &info->in[next_arg];
5562     slot = loc->arg_slot;
5563     if (parm->ra_gen) {
5564         int arg_reg = -1;
5565         TCGReg ra_reg;
5566 
5567         if (arg_slot_reg_p(slot)) {
5568             arg_reg = tcg_target_call_iarg_regs[slot];
5569         }
5570         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5571 
5572         ptr_mov.dst = slot;
5573         ptr_mov.src = ra_reg;
5574         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5575     } else {
5576         imm = (uintptr_t)ldst->raddr;
5577         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5578     }
5579 }
5580 
5581 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5582                                        const TCGCallArgumentLoc *loc,
5583                                        TCGType dst_type, TCGType src_type,
5584                                        TCGReg lo, TCGReg hi)
5585 {
5586     MemOp reg_mo;
5587 
5588     if (dst_type <= TCG_TYPE_REG) {
5589         MemOp src_ext;
5590 
5591         switch (loc->kind) {
5592         case TCG_CALL_ARG_NORMAL:
5593             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5594             break;
5595         case TCG_CALL_ARG_EXTEND_U:
5596             dst_type = TCG_TYPE_REG;
5597             src_ext = MO_UL;
5598             break;
5599         case TCG_CALL_ARG_EXTEND_S:
5600             dst_type = TCG_TYPE_REG;
5601             src_ext = MO_SL;
5602             break;
5603         default:
5604             g_assert_not_reached();
5605         }
5606 
5607         mov[0].dst = loc->arg_slot;
5608         mov[0].dst_type = dst_type;
5609         mov[0].src = lo;
5610         mov[0].src_type = src_type;
5611         mov[0].src_ext = src_ext;
5612         return 1;
5613     }
5614 
5615     if (TCG_TARGET_REG_BITS == 32) {
5616         assert(dst_type == TCG_TYPE_I64);
5617         reg_mo = MO_32;
5618     } else {
5619         assert(dst_type == TCG_TYPE_I128);
5620         reg_mo = MO_64;
5621     }
5622 
5623     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5624     mov[0].src = lo;
5625     mov[0].dst_type = TCG_TYPE_REG;
5626     mov[0].src_type = TCG_TYPE_REG;
5627     mov[0].src_ext = reg_mo;
5628 
5629     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5630     mov[1].src = hi;
5631     mov[1].dst_type = TCG_TYPE_REG;
5632     mov[1].src_type = TCG_TYPE_REG;
5633     mov[1].src_ext = reg_mo;
5634 
5635     return 2;
5636 }
5637 
5638 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5639                                    const TCGLdstHelperParam *parm)
5640 {
5641     const TCGHelperInfo *info;
5642     const TCGCallArgumentLoc *loc;
5643     TCGMovExtend mov[2];
5644     unsigned next_arg, nmov;
5645     MemOp mop = get_memop(ldst->oi);
5646 
5647     switch (mop & MO_SIZE) {
5648     case MO_8:
5649     case MO_16:
5650     case MO_32:
5651         info = &info_helper_ld32_mmu;
5652         break;
5653     case MO_64:
5654         info = &info_helper_ld64_mmu;
5655         break;
5656     case MO_128:
5657         info = &info_helper_ld128_mmu;
5658         break;
5659     default:
5660         g_assert_not_reached();
5661     }
5662 
5663     /* Defer env argument. */
5664     next_arg = 1;
5665 
5666     loc = &info->in[next_arg];
5667     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5668         /*
5669          * 32-bit host with 32-bit guest: zero-extend the guest address
5670          * to 64-bits for the helper by storing the low part, then
5671          * load a zero for the high part.
5672          */
5673         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5674                                TCG_TYPE_I32, TCG_TYPE_I32,
5675                                ldst->addrlo_reg, -1);
5676         tcg_out_helper_load_slots(s, 1, mov, parm);
5677 
5678         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5679                                 TCG_TYPE_I32, 0, parm);
5680         next_arg += 2;
5681     } else {
5682         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5683                                       ldst->addrlo_reg, ldst->addrhi_reg);
5684         tcg_out_helper_load_slots(s, nmov, mov, parm);
5685         next_arg += nmov;
5686     }
5687 
5688     switch (info->out_kind) {
5689     case TCG_CALL_RET_NORMAL:
5690     case TCG_CALL_RET_BY_VEC:
5691         break;
5692     case TCG_CALL_RET_BY_REF:
5693         /*
5694          * The return reference is in the first argument slot.
5695          * We need memory in which to return: re-use the top of stack.
5696          */
5697         {
5698             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5699 
5700             if (arg_slot_reg_p(0)) {
5701                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5702                                  TCG_REG_CALL_STACK, ofs_slot0);
5703             } else {
5704                 tcg_debug_assert(parm->ntmp != 0);
5705                 tcg_out_addi_ptr(s, parm->tmp[0],
5706                                  TCG_REG_CALL_STACK, ofs_slot0);
5707                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5708                            TCG_REG_CALL_STACK, ofs_slot0);
5709             }
5710         }
5711         break;
5712     default:
5713         g_assert_not_reached();
5714     }
5715 
5716     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5717 }
5718 
5719 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5720                                   bool load_sign,
5721                                   const TCGLdstHelperParam *parm)
5722 {
5723     MemOp mop = get_memop(ldst->oi);
5724     TCGMovExtend mov[2];
5725     int ofs_slot0;
5726 
5727     switch (ldst->type) {
5728     case TCG_TYPE_I64:
5729         if (TCG_TARGET_REG_BITS == 32) {
5730             break;
5731         }
5732         /* fall through */
5733 
5734     case TCG_TYPE_I32:
5735         mov[0].dst = ldst->datalo_reg;
5736         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5737         mov[0].dst_type = ldst->type;
5738         mov[0].src_type = TCG_TYPE_REG;
5739 
5740         /*
5741          * If load_sign, then we allowed the helper to perform the
5742          * appropriate sign extension to tcg_target_ulong, and all
5743          * we need now is a plain move.
5744          *
5745          * If they do not, then we expect the relevant extension
5746          * instruction to be no more expensive than a move, and
5747          * we thus save the icache etc by only using one of two
5748          * helper functions.
5749          */
5750         if (load_sign || !(mop & MO_SIGN)) {
5751             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5752                 mov[0].src_ext = MO_32;
5753             } else {
5754                 mov[0].src_ext = MO_64;
5755             }
5756         } else {
5757             mov[0].src_ext = mop & MO_SSIZE;
5758         }
5759         tcg_out_movext1(s, mov);
5760         return;
5761 
5762     case TCG_TYPE_I128:
5763         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5764         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5765         switch (TCG_TARGET_CALL_RET_I128) {
5766         case TCG_CALL_RET_NORMAL:
5767             break;
5768         case TCG_CALL_RET_BY_VEC:
5769             tcg_out_st(s, TCG_TYPE_V128,
5770                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5771                        TCG_REG_CALL_STACK, ofs_slot0);
5772             /* fall through */
5773         case TCG_CALL_RET_BY_REF:
5774             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5775                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5776             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5777                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5778             return;
5779         default:
5780             g_assert_not_reached();
5781         }
5782         break;
5783 
5784     default:
5785         g_assert_not_reached();
5786     }
5787 
5788     mov[0].dst = ldst->datalo_reg;
5789     mov[0].src =
5790         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5791     mov[0].dst_type = TCG_TYPE_REG;
5792     mov[0].src_type = TCG_TYPE_REG;
5793     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5794 
5795     mov[1].dst = ldst->datahi_reg;
5796     mov[1].src =
5797         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5798     mov[1].dst_type = TCG_TYPE_REG;
5799     mov[1].src_type = TCG_TYPE_REG;
5800     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5801 
5802     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5803 }
5804 
5805 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5806                                    const TCGLdstHelperParam *parm)
5807 {
5808     const TCGHelperInfo *info;
5809     const TCGCallArgumentLoc *loc;
5810     TCGMovExtend mov[4];
5811     TCGType data_type;
5812     unsigned next_arg, nmov, n;
5813     MemOp mop = get_memop(ldst->oi);
5814 
5815     switch (mop & MO_SIZE) {
5816     case MO_8:
5817     case MO_16:
5818     case MO_32:
5819         info = &info_helper_st32_mmu;
5820         data_type = TCG_TYPE_I32;
5821         break;
5822     case MO_64:
5823         info = &info_helper_st64_mmu;
5824         data_type = TCG_TYPE_I64;
5825         break;
5826     case MO_128:
5827         info = &info_helper_st128_mmu;
5828         data_type = TCG_TYPE_I128;
5829         break;
5830     default:
5831         g_assert_not_reached();
5832     }
5833 
5834     /* Defer env argument. */
5835     next_arg = 1;
5836     nmov = 0;
5837 
5838     /* Handle addr argument. */
5839     loc = &info->in[next_arg];
5840     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5841         /*
5842          * 32-bit host with 32-bit guest: zero-extend the guest address
5843          * to 64-bits for the helper by storing the low part.  Later,
5844          * after we have processed the register inputs, we will load a
5845          * zero for the high part.
5846          */
5847         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5848                                TCG_TYPE_I32, TCG_TYPE_I32,
5849                                ldst->addrlo_reg, -1);
5850         next_arg += 2;
5851         nmov += 1;
5852     } else {
5853         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5854                                    ldst->addrlo_reg, ldst->addrhi_reg);
5855         next_arg += n;
5856         nmov += n;
5857     }
5858 
5859     /* Handle data argument. */
5860     loc = &info->in[next_arg];
5861     switch (loc->kind) {
5862     case TCG_CALL_ARG_NORMAL:
5863     case TCG_CALL_ARG_EXTEND_U:
5864     case TCG_CALL_ARG_EXTEND_S:
5865         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5866                                    ldst->datalo_reg, ldst->datahi_reg);
5867         next_arg += n;
5868         nmov += n;
5869         tcg_out_helper_load_slots(s, nmov, mov, parm);
5870         break;
5871 
5872     case TCG_CALL_ARG_BY_REF:
5873         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5874         tcg_debug_assert(data_type == TCG_TYPE_I128);
5875         tcg_out_st(s, TCG_TYPE_I64,
5876                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5877                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5878         tcg_out_st(s, TCG_TYPE_I64,
5879                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5880                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5881 
5882         tcg_out_helper_load_slots(s, nmov, mov, parm);
5883 
5884         if (arg_slot_reg_p(loc->arg_slot)) {
5885             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5886                              TCG_REG_CALL_STACK,
5887                              arg_slot_stk_ofs(loc->ref_slot));
5888         } else {
5889             tcg_debug_assert(parm->ntmp != 0);
5890             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5891                              arg_slot_stk_ofs(loc->ref_slot));
5892             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5893                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5894         }
5895         next_arg += 2;
5896         break;
5897 
5898     default:
5899         g_assert_not_reached();
5900     }
5901 
5902     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5903         /* Zero extend the address by loading a zero for the high part. */
5904         loc = &info->in[1 + !HOST_BIG_ENDIAN];
5905         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5906     }
5907 
5908     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5909 }
5910 
5911 void tcg_dump_op_count(GString *buf)
5912 {
5913     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5914 }
5915 
5916 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5917 {
5918     int i, start_words, num_insns;
5919     TCGOp *op;
5920 
5921     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5922                  && qemu_log_in_addr_range(pc_start))) {
5923         FILE *logfile = qemu_log_trylock();
5924         if (logfile) {
5925             fprintf(logfile, "OP:\n");
5926             tcg_dump_ops(s, logfile, false);
5927             fprintf(logfile, "\n");
5928             qemu_log_unlock(logfile);
5929         }
5930     }
5931 
5932 #ifdef CONFIG_DEBUG_TCG
5933     /* Ensure all labels referenced have been emitted.  */
5934     {
5935         TCGLabel *l;
5936         bool error = false;
5937 
5938         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5939             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5940                 qemu_log_mask(CPU_LOG_TB_OP,
5941                               "$L%d referenced but not present.\n", l->id);
5942                 error = true;
5943             }
5944         }
5945         assert(!error);
5946     }
5947 #endif
5948 
5949     tcg_optimize(s);
5950 
5951     reachable_code_pass(s);
5952     liveness_pass_0(s);
5953     liveness_pass_1(s);
5954 
5955     if (s->nb_indirects > 0) {
5956         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5957                      && qemu_log_in_addr_range(pc_start))) {
5958             FILE *logfile = qemu_log_trylock();
5959             if (logfile) {
5960                 fprintf(logfile, "OP before indirect lowering:\n");
5961                 tcg_dump_ops(s, logfile, false);
5962                 fprintf(logfile, "\n");
5963                 qemu_log_unlock(logfile);
5964             }
5965         }
5966 
5967         /* Replace indirect temps with direct temps.  */
5968         if (liveness_pass_2(s)) {
5969             /* If changes were made, re-run liveness.  */
5970             liveness_pass_1(s);
5971         }
5972     }
5973 
5974     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5975                  && qemu_log_in_addr_range(pc_start))) {
5976         FILE *logfile = qemu_log_trylock();
5977         if (logfile) {
5978             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5979             tcg_dump_ops(s, logfile, true);
5980             fprintf(logfile, "\n");
5981             qemu_log_unlock(logfile);
5982         }
5983     }
5984 
5985     /* Initialize goto_tb jump offsets. */
5986     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5987     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5988     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5989     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5990 
5991     tcg_reg_alloc_start(s);
5992 
5993     /*
5994      * Reset the buffer pointers when restarting after overflow.
5995      * TODO: Move this into translate-all.c with the rest of the
5996      * buffer management.  Having only this done here is confusing.
5997      */
5998     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
5999     s->code_ptr = s->code_buf;
6000 
6001 #ifdef TCG_TARGET_NEED_LDST_LABELS
6002     QSIMPLEQ_INIT(&s->ldst_labels);
6003 #endif
6004 #ifdef TCG_TARGET_NEED_POOL_LABELS
6005     s->pool_labels = NULL;
6006 #endif
6007 
6008     start_words = s->insn_start_words;
6009     s->gen_insn_data =
6010         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6011 
6012     num_insns = -1;
6013     QTAILQ_FOREACH(op, &s->ops, link) {
6014         TCGOpcode opc = op->opc;
6015 
6016         switch (opc) {
6017         case INDEX_op_mov_i32:
6018         case INDEX_op_mov_i64:
6019         case INDEX_op_mov_vec:
6020             tcg_reg_alloc_mov(s, op);
6021             break;
6022         case INDEX_op_dup_vec:
6023             tcg_reg_alloc_dup(s, op);
6024             break;
6025         case INDEX_op_insn_start:
6026             if (num_insns >= 0) {
6027                 size_t off = tcg_current_code_size(s);
6028                 s->gen_insn_end_off[num_insns] = off;
6029                 /* Assert that we do not overflow our stored offset.  */
6030                 assert(s->gen_insn_end_off[num_insns] == off);
6031             }
6032             num_insns++;
6033             for (i = 0; i < start_words; ++i) {
6034                 s->gen_insn_data[num_insns * start_words + i] =
6035                     tcg_get_insn_start_param(op, i);
6036             }
6037             break;
6038         case INDEX_op_discard:
6039             temp_dead(s, arg_temp(op->args[0]));
6040             break;
6041         case INDEX_op_set_label:
6042             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6043             tcg_out_label(s, arg_label(op->args[0]));
6044             break;
6045         case INDEX_op_call:
6046             tcg_reg_alloc_call(s, op);
6047             break;
6048         case INDEX_op_exit_tb:
6049             tcg_out_exit_tb(s, op->args[0]);
6050             break;
6051         case INDEX_op_goto_tb:
6052             tcg_out_goto_tb(s, op->args[0]);
6053             break;
6054         case INDEX_op_dup2_vec:
6055             if (tcg_reg_alloc_dup2(s, op)) {
6056                 break;
6057             }
6058             /* fall through */
6059         default:
6060             /* Sanity check that we've not introduced any unhandled opcodes. */
6061             tcg_debug_assert(tcg_op_supported(opc));
6062             /* Note: in order to speed up the code, it would be much
6063                faster to have specialized register allocator functions for
6064                some common argument patterns */
6065             tcg_reg_alloc_op(s, op);
6066             break;
6067         }
6068         /* Test for (pending) buffer overflow.  The assumption is that any
6069            one operation beginning below the high water mark cannot overrun
6070            the buffer completely.  Thus we can test for overflow after
6071            generating code without having to check during generation.  */
6072         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6073             return -1;
6074         }
6075         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6076         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6077             return -2;
6078         }
6079     }
6080     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6081     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6082 
6083     /* Generate TB finalization at the end of block */
6084 #ifdef TCG_TARGET_NEED_LDST_LABELS
6085     i = tcg_out_ldst_finalize(s);
6086     if (i < 0) {
6087         return i;
6088     }
6089 #endif
6090 #ifdef TCG_TARGET_NEED_POOL_LABELS
6091     i = tcg_out_pool_finalize(s);
6092     if (i < 0) {
6093         return i;
6094     }
6095 #endif
6096     if (!tcg_resolve_relocs(s)) {
6097         return -2;
6098     }
6099 
6100 #ifndef CONFIG_TCG_INTERPRETER
6101     /* flush instruction cache */
6102     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6103                         (uintptr_t)s->code_buf,
6104                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6105 #endif
6106 
6107     return tcg_current_code_size(s);
6108 }
6109 
6110 void tcg_dump_info(GString *buf)
6111 {
6112     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6113 }
6114 
6115 #ifdef ELF_HOST_MACHINE
6116 /* In order to use this feature, the backend needs to do three things:
6117 
6118    (1) Define ELF_HOST_MACHINE to indicate both what value to
6119        put into the ELF image and to indicate support for the feature.
6120 
6121    (2) Define tcg_register_jit.  This should create a buffer containing
6122        the contents of a .debug_frame section that describes the post-
6123        prologue unwind info for the tcg machine.
6124 
6125    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6126 */
6127 
6128 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6129 typedef enum {
6130     JIT_NOACTION = 0,
6131     JIT_REGISTER_FN,
6132     JIT_UNREGISTER_FN
6133 } jit_actions_t;
6134 
6135 struct jit_code_entry {
6136     struct jit_code_entry *next_entry;
6137     struct jit_code_entry *prev_entry;
6138     const void *symfile_addr;
6139     uint64_t symfile_size;
6140 };
6141 
6142 struct jit_descriptor {
6143     uint32_t version;
6144     uint32_t action_flag;
6145     struct jit_code_entry *relevant_entry;
6146     struct jit_code_entry *first_entry;
6147 };
6148 
6149 void __jit_debug_register_code(void) __attribute__((noinline));
6150 void __jit_debug_register_code(void)
6151 {
6152     asm("");
6153 }
6154 
6155 /* Must statically initialize the version, because GDB may check
6156    the version before we can set it.  */
6157 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6158 
6159 /* End GDB interface.  */
6160 
6161 static int find_string(const char *strtab, const char *str)
6162 {
6163     const char *p = strtab + 1;
6164 
6165     while (1) {
6166         if (strcmp(p, str) == 0) {
6167             return p - strtab;
6168         }
6169         p += strlen(p) + 1;
6170     }
6171 }
6172 
6173 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6174                                  const void *debug_frame,
6175                                  size_t debug_frame_size)
6176 {
6177     struct __attribute__((packed)) DebugInfo {
6178         uint32_t  len;
6179         uint16_t  version;
6180         uint32_t  abbrev;
6181         uint8_t   ptr_size;
6182         uint8_t   cu_die;
6183         uint16_t  cu_lang;
6184         uintptr_t cu_low_pc;
6185         uintptr_t cu_high_pc;
6186         uint8_t   fn_die;
6187         char      fn_name[16];
6188         uintptr_t fn_low_pc;
6189         uintptr_t fn_high_pc;
6190         uint8_t   cu_eoc;
6191     };
6192 
6193     struct ElfImage {
6194         ElfW(Ehdr) ehdr;
6195         ElfW(Phdr) phdr;
6196         ElfW(Shdr) shdr[7];
6197         ElfW(Sym)  sym[2];
6198         struct DebugInfo di;
6199         uint8_t    da[24];
6200         char       str[80];
6201     };
6202 
6203     struct ElfImage *img;
6204 
6205     static const struct ElfImage img_template = {
6206         .ehdr = {
6207             .e_ident[EI_MAG0] = ELFMAG0,
6208             .e_ident[EI_MAG1] = ELFMAG1,
6209             .e_ident[EI_MAG2] = ELFMAG2,
6210             .e_ident[EI_MAG3] = ELFMAG3,
6211             .e_ident[EI_CLASS] = ELF_CLASS,
6212             .e_ident[EI_DATA] = ELF_DATA,
6213             .e_ident[EI_VERSION] = EV_CURRENT,
6214             .e_type = ET_EXEC,
6215             .e_machine = ELF_HOST_MACHINE,
6216             .e_version = EV_CURRENT,
6217             .e_phoff = offsetof(struct ElfImage, phdr),
6218             .e_shoff = offsetof(struct ElfImage, shdr),
6219             .e_ehsize = sizeof(ElfW(Shdr)),
6220             .e_phentsize = sizeof(ElfW(Phdr)),
6221             .e_phnum = 1,
6222             .e_shentsize = sizeof(ElfW(Shdr)),
6223             .e_shnum = ARRAY_SIZE(img->shdr),
6224             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6225 #ifdef ELF_HOST_FLAGS
6226             .e_flags = ELF_HOST_FLAGS,
6227 #endif
6228 #ifdef ELF_OSABI
6229             .e_ident[EI_OSABI] = ELF_OSABI,
6230 #endif
6231         },
6232         .phdr = {
6233             .p_type = PT_LOAD,
6234             .p_flags = PF_X,
6235         },
6236         .shdr = {
6237             [0] = { .sh_type = SHT_NULL },
6238             /* Trick: The contents of code_gen_buffer are not present in
6239                this fake ELF file; that got allocated elsewhere.  Therefore
6240                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6241                will not look for contents.  We can record any address.  */
6242             [1] = { /* .text */
6243                 .sh_type = SHT_NOBITS,
6244                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6245             },
6246             [2] = { /* .debug_info */
6247                 .sh_type = SHT_PROGBITS,
6248                 .sh_offset = offsetof(struct ElfImage, di),
6249                 .sh_size = sizeof(struct DebugInfo),
6250             },
6251             [3] = { /* .debug_abbrev */
6252                 .sh_type = SHT_PROGBITS,
6253                 .sh_offset = offsetof(struct ElfImage, da),
6254                 .sh_size = sizeof(img->da),
6255             },
6256             [4] = { /* .debug_frame */
6257                 .sh_type = SHT_PROGBITS,
6258                 .sh_offset = sizeof(struct ElfImage),
6259             },
6260             [5] = { /* .symtab */
6261                 .sh_type = SHT_SYMTAB,
6262                 .sh_offset = offsetof(struct ElfImage, sym),
6263                 .sh_size = sizeof(img->sym),
6264                 .sh_info = 1,
6265                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6266                 .sh_entsize = sizeof(ElfW(Sym)),
6267             },
6268             [6] = { /* .strtab */
6269                 .sh_type = SHT_STRTAB,
6270                 .sh_offset = offsetof(struct ElfImage, str),
6271                 .sh_size = sizeof(img->str),
6272             }
6273         },
6274         .sym = {
6275             [1] = { /* code_gen_buffer */
6276                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6277                 .st_shndx = 1,
6278             }
6279         },
6280         .di = {
6281             .len = sizeof(struct DebugInfo) - 4,
6282             .version = 2,
6283             .ptr_size = sizeof(void *),
6284             .cu_die = 1,
6285             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6286             .fn_die = 2,
6287             .fn_name = "code_gen_buffer"
6288         },
6289         .da = {
6290             1,          /* abbrev number (the cu) */
6291             0x11, 1,    /* DW_TAG_compile_unit, has children */
6292             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6293             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6294             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6295             0, 0,       /* end of abbrev */
6296             2,          /* abbrev number (the fn) */
6297             0x2e, 0,    /* DW_TAG_subprogram, no children */
6298             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6299             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6300             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6301             0, 0,       /* end of abbrev */
6302             0           /* no more abbrev */
6303         },
6304         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6305                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6306     };
6307 
6308     /* We only need a single jit entry; statically allocate it.  */
6309     static struct jit_code_entry one_entry;
6310 
6311     uintptr_t buf = (uintptr_t)buf_ptr;
6312     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6313     DebugFrameHeader *dfh;
6314 
6315     img = g_malloc(img_size);
6316     *img = img_template;
6317 
6318     img->phdr.p_vaddr = buf;
6319     img->phdr.p_paddr = buf;
6320     img->phdr.p_memsz = buf_size;
6321 
6322     img->shdr[1].sh_name = find_string(img->str, ".text");
6323     img->shdr[1].sh_addr = buf;
6324     img->shdr[1].sh_size = buf_size;
6325 
6326     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6327     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6328 
6329     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6330     img->shdr[4].sh_size = debug_frame_size;
6331 
6332     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6333     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6334 
6335     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6336     img->sym[1].st_value = buf;
6337     img->sym[1].st_size = buf_size;
6338 
6339     img->di.cu_low_pc = buf;
6340     img->di.cu_high_pc = buf + buf_size;
6341     img->di.fn_low_pc = buf;
6342     img->di.fn_high_pc = buf + buf_size;
6343 
6344     dfh = (DebugFrameHeader *)(img + 1);
6345     memcpy(dfh, debug_frame, debug_frame_size);
6346     dfh->fde.func_start = buf;
6347     dfh->fde.func_len = buf_size;
6348 
6349 #ifdef DEBUG_JIT
6350     /* Enable this block to be able to debug the ELF image file creation.
6351        One can use readelf, objdump, or other inspection utilities.  */
6352     {
6353         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6354         FILE *f = fopen(jit, "w+b");
6355         if (f) {
6356             if (fwrite(img, img_size, 1, f) != img_size) {
6357                 /* Avoid stupid unused return value warning for fwrite.  */
6358             }
6359             fclose(f);
6360         }
6361     }
6362 #endif
6363 
6364     one_entry.symfile_addr = img;
6365     one_entry.symfile_size = img_size;
6366 
6367     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6368     __jit_debug_descriptor.relevant_entry = &one_entry;
6369     __jit_debug_descriptor.first_entry = &one_entry;
6370     __jit_debug_register_code();
6371 }
6372 #else
6373 /* No support for the feature.  Provide the entry point expected by exec.c,
6374    and implement the internal function we declared earlier.  */
6375 
6376 static void tcg_register_jit_int(const void *buf, size_t size,
6377                                  const void *debug_frame,
6378                                  size_t debug_frame_size)
6379 {
6380 }
6381 
6382 void tcg_register_jit(const void *buf, size_t buf_size)
6383 {
6384 }
6385 #endif /* ELF_HOST_MACHINE */
6386 
6387 #if !TCG_TARGET_MAYBE_vec
6388 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6389 {
6390     g_assert_not_reached();
6391 }
6392 #endif
6393