xref: /openbmc/qemu/tcg/tcg.c (revision c1774bdb)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "accel/tcg/perf.h"
59 #ifdef CONFIG_USER_ONLY
60 #include "exec/user/guest-base.h"
61 #endif
62 
63 /* Forward declarations for functions declared in tcg-target.c.inc and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68                         intptr_t value, intptr_t addend);
69 
70 /* The CIE and FDE header definitions will be common to all hosts.  */
71 typedef struct {
72     uint32_t len __attribute__((aligned((sizeof(void *)))));
73     uint32_t id;
74     uint8_t version;
75     char augmentation[1];
76     uint8_t code_align;
77     uint8_t data_align;
78     uint8_t return_column;
79 } DebugFrameCIE;
80 
81 typedef struct QEMU_PACKED {
82     uint32_t len __attribute__((aligned((sizeof(void *)))));
83     uint32_t cie_offset;
84     uintptr_t func_start;
85     uintptr_t func_len;
86 } DebugFrameFDEHeader;
87 
88 typedef struct QEMU_PACKED {
89     DebugFrameCIE cie;
90     DebugFrameFDEHeader fde;
91 } DebugFrameHeader;
92 
93 typedef struct TCGLabelQemuLdst {
94     bool is_ld;             /* qemu_ld: true, qemu_st: false */
95     MemOpIdx oi;
96     TCGType type;           /* result type of a load */
97     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104 } TCGLabelQemuLdst;
105 
106 static void tcg_register_jit_int(const void *buf, size_t size,
107                                  const void *debug_frame,
108                                  size_t debug_frame_size)
109     __attribute__((unused));
110 
111 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
112 static void tcg_out_tb_start(TCGContext *s);
113 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114                        intptr_t arg2);
115 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116 static void tcg_out_movi(TCGContext *s, TCGType type,
117                          TCGReg ret, tcg_target_long arg);
118 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130 static void tcg_out_goto_tb(TCGContext *s, int which);
131 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132                        const TCGArg args[TCG_MAX_OP_ARGS],
133                        const int const_args[TCG_MAX_OP_ARGS]);
134 #if TCG_TARGET_MAYBE_vec
135 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136                             TCGReg dst, TCGReg src);
137 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, TCGReg base, intptr_t offset);
139 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140                              TCGReg dst, int64_t arg);
141 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142                            unsigned vecl, unsigned vece,
143                            const TCGArg args[TCG_MAX_OP_ARGS],
144                            const int const_args[TCG_MAX_OP_ARGS]);
145 #else
146 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147                                    TCGReg dst, TCGReg src)
148 {
149     g_assert_not_reached();
150 }
151 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152                                     TCGReg dst, TCGReg base, intptr_t offset)
153 {
154     g_assert_not_reached();
155 }
156 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, int64_t arg)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162                                   unsigned vecl, unsigned vece,
163                                   const TCGArg args[TCG_MAX_OP_ARGS],
164                                   const int const_args[TCG_MAX_OP_ARGS])
165 {
166     g_assert_not_reached();
167 }
168 #endif
169 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170                        intptr_t arg2);
171 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172                         TCGReg base, intptr_t ofs);
173 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174                          const TCGHelperInfo *info);
175 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176 static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
177 #ifdef TCG_TARGET_NEED_LDST_LABELS
178 static int tcg_out_ldst_finalize(TCGContext *s);
179 #endif
180 
181 typedef struct TCGLdstHelperParam {
182     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
183     unsigned ntmp;
184     int tmp[3];
185 } TCGLdstHelperParam;
186 
187 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
188                                    const TCGLdstHelperParam *p)
189     __attribute__((unused));
190 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
191                                   bool load_sign, const TCGLdstHelperParam *p)
192     __attribute__((unused));
193 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
194                                    const TCGLdstHelperParam *p)
195     __attribute__((unused));
196 
197 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
198     [MO_UB] = helper_ldub_mmu,
199     [MO_SB] = helper_ldsb_mmu,
200     [MO_UW] = helper_lduw_mmu,
201     [MO_SW] = helper_ldsw_mmu,
202     [MO_UL] = helper_ldul_mmu,
203     [MO_UQ] = helper_ldq_mmu,
204 #if TCG_TARGET_REG_BITS == 64
205     [MO_SL] = helper_ldsl_mmu,
206     [MO_128] = helper_ld16_mmu,
207 #endif
208 };
209 
210 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
211     [MO_8]  = helper_stb_mmu,
212     [MO_16] = helper_stw_mmu,
213     [MO_32] = helper_stl_mmu,
214     [MO_64] = helper_stq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_128] = helper_st16_mmu,
217 #endif
218 };
219 
220 typedef struct {
221     MemOp atom;   /* lg2 bits of atomicity required */
222     MemOp align;  /* lg2 bits of alignment to use */
223 } TCGAtomAlign;
224 
225 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
226                                            MemOp host_atom, bool allow_two_ops)
227     __attribute__((unused));
228 
229 TCGContext tcg_init_ctx;
230 __thread TCGContext *tcg_ctx;
231 
232 TCGContext **tcg_ctxs;
233 unsigned int tcg_cur_ctxs;
234 unsigned int tcg_max_ctxs;
235 TCGv_env tcg_env;
236 const void *tcg_code_gen_epilogue;
237 uintptr_t tcg_splitwx_diff;
238 
239 #ifndef CONFIG_TCG_INTERPRETER
240 tcg_prologue_fn *tcg_qemu_tb_exec;
241 #endif
242 
243 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
244 static TCGRegSet tcg_target_call_clobber_regs;
245 
246 #if TCG_TARGET_INSN_UNIT_SIZE == 1
247 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
248 {
249     *s->code_ptr++ = v;
250 }
251 
252 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
253                                                       uint8_t v)
254 {
255     *p = v;
256 }
257 #endif
258 
259 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
260 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
261 {
262     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
263         *s->code_ptr++ = v;
264     } else {
265         tcg_insn_unit *p = s->code_ptr;
266         memcpy(p, &v, sizeof(v));
267         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
268     }
269 }
270 
271 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
272                                                        uint16_t v)
273 {
274     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
275         *p = v;
276     } else {
277         memcpy(p, &v, sizeof(v));
278     }
279 }
280 #endif
281 
282 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
283 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
284 {
285     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
286         *s->code_ptr++ = v;
287     } else {
288         tcg_insn_unit *p = s->code_ptr;
289         memcpy(p, &v, sizeof(v));
290         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
291     }
292 }
293 
294 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
295                                                        uint32_t v)
296 {
297     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
298         *p = v;
299     } else {
300         memcpy(p, &v, sizeof(v));
301     }
302 }
303 #endif
304 
305 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
306 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
307 {
308     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
309         *s->code_ptr++ = v;
310     } else {
311         tcg_insn_unit *p = s->code_ptr;
312         memcpy(p, &v, sizeof(v));
313         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
314     }
315 }
316 
317 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
318                                                        uint64_t v)
319 {
320     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
321         *p = v;
322     } else {
323         memcpy(p, &v, sizeof(v));
324     }
325 }
326 #endif
327 
328 /* label relocation processing */
329 
330 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
331                           TCGLabel *l, intptr_t addend)
332 {
333     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
334 
335     r->type = type;
336     r->ptr = code_ptr;
337     r->addend = addend;
338     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
339 }
340 
341 static void tcg_out_label(TCGContext *s, TCGLabel *l)
342 {
343     tcg_debug_assert(!l->has_value);
344     l->has_value = 1;
345     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
346 }
347 
348 TCGLabel *gen_new_label(void)
349 {
350     TCGContext *s = tcg_ctx;
351     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
352 
353     memset(l, 0, sizeof(TCGLabel));
354     l->id = s->nb_labels++;
355     QSIMPLEQ_INIT(&l->branches);
356     QSIMPLEQ_INIT(&l->relocs);
357 
358     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
359 
360     return l;
361 }
362 
363 static bool tcg_resolve_relocs(TCGContext *s)
364 {
365     TCGLabel *l;
366 
367     QSIMPLEQ_FOREACH(l, &s->labels, next) {
368         TCGRelocation *r;
369         uintptr_t value = l->u.value;
370 
371         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
372             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
373                 return false;
374             }
375         }
376     }
377     return true;
378 }
379 
380 static void set_jmp_reset_offset(TCGContext *s, int which)
381 {
382     /*
383      * We will check for overflow at the end of the opcode loop in
384      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
385      */
386     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
387 }
388 
389 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
390 {
391     /*
392      * We will check for overflow at the end of the opcode loop in
393      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
394      */
395     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
396 }
397 
398 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
399 {
400     /*
401      * Return the read-execute version of the pointer, for the benefit
402      * of any pc-relative addressing mode.
403      */
404     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
405 }
406 
407 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
408 static int tlb_mask_table_ofs(TCGContext *s, int which)
409 {
410     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
411             sizeof(CPUNegativeOffsetState));
412 }
413 #endif
414 
415 /* Signal overflow, starting over with fewer guest insns. */
416 static G_NORETURN
417 void tcg_raise_tb_overflow(TCGContext *s)
418 {
419     siglongjmp(s->jmp_trans, -2);
420 }
421 
422 /*
423  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
424  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
425  *
426  * However, tcg_out_helper_load_slots reuses this field to hold an
427  * argument slot number (which may designate a argument register or an
428  * argument stack slot), converting to TCGReg once all arguments that
429  * are destined for the stack are processed.
430  */
431 typedef struct TCGMovExtend {
432     unsigned dst;
433     TCGReg src;
434     TCGType dst_type;
435     TCGType src_type;
436     MemOp src_ext;
437 } TCGMovExtend;
438 
439 /**
440  * tcg_out_movext -- move and extend
441  * @s: tcg context
442  * @dst_type: integral type for destination
443  * @dst: destination register
444  * @src_type: integral type for source
445  * @src_ext: extension to apply to source
446  * @src: source register
447  *
448  * Move or extend @src into @dst, depending on @src_ext and the types.
449  */
450 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
451                            TCGType src_type, MemOp src_ext, TCGReg src)
452 {
453     switch (src_ext) {
454     case MO_UB:
455         tcg_out_ext8u(s, dst, src);
456         break;
457     case MO_SB:
458         tcg_out_ext8s(s, dst_type, dst, src);
459         break;
460     case MO_UW:
461         tcg_out_ext16u(s, dst, src);
462         break;
463     case MO_SW:
464         tcg_out_ext16s(s, dst_type, dst, src);
465         break;
466     case MO_UL:
467     case MO_SL:
468         if (dst_type == TCG_TYPE_I32) {
469             if (src_type == TCG_TYPE_I32) {
470                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
471             } else {
472                 tcg_out_extrl_i64_i32(s, dst, src);
473             }
474         } else if (src_type == TCG_TYPE_I32) {
475             if (src_ext & MO_SIGN) {
476                 tcg_out_exts_i32_i64(s, dst, src);
477             } else {
478                 tcg_out_extu_i32_i64(s, dst, src);
479             }
480         } else {
481             if (src_ext & MO_SIGN) {
482                 tcg_out_ext32s(s, dst, src);
483             } else {
484                 tcg_out_ext32u(s, dst, src);
485             }
486         }
487         break;
488     case MO_UQ:
489         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
490         if (dst_type == TCG_TYPE_I32) {
491             tcg_out_extrl_i64_i32(s, dst, src);
492         } else {
493             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
494         }
495         break;
496     default:
497         g_assert_not_reached();
498     }
499 }
500 
501 /* Minor variations on a theme, using a structure. */
502 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
503                                     TCGReg src)
504 {
505     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
506 }
507 
508 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
509 {
510     tcg_out_movext1_new_src(s, i, i->src);
511 }
512 
513 /**
514  * tcg_out_movext2 -- move and extend two pair
515  * @s: tcg context
516  * @i1: first move description
517  * @i2: second move description
518  * @scratch: temporary register, or -1 for none
519  *
520  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
521  * between the sources and destinations.
522  */
523 
524 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
525                             const TCGMovExtend *i2, int scratch)
526 {
527     TCGReg src1 = i1->src;
528     TCGReg src2 = i2->src;
529 
530     if (i1->dst != src2) {
531         tcg_out_movext1(s, i1);
532         tcg_out_movext1(s, i2);
533         return;
534     }
535     if (i2->dst == src1) {
536         TCGType src1_type = i1->src_type;
537         TCGType src2_type = i2->src_type;
538 
539         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
540             /* The data is now in the correct registers, now extend. */
541             src1 = i2->src;
542             src2 = i1->src;
543         } else {
544             tcg_debug_assert(scratch >= 0);
545             tcg_out_mov(s, src1_type, scratch, src1);
546             src1 = scratch;
547         }
548     }
549     tcg_out_movext1_new_src(s, i2, src2);
550     tcg_out_movext1_new_src(s, i1, src1);
551 }
552 
553 /**
554  * tcg_out_movext3 -- move and extend three pair
555  * @s: tcg context
556  * @i1: first move description
557  * @i2: second move description
558  * @i3: third move description
559  * @scratch: temporary register, or -1 for none
560  *
561  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
562  * between the sources and destinations.
563  */
564 
565 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
566                             const TCGMovExtend *i2, const TCGMovExtend *i3,
567                             int scratch)
568 {
569     TCGReg src1 = i1->src;
570     TCGReg src2 = i2->src;
571     TCGReg src3 = i3->src;
572 
573     if (i1->dst != src2 && i1->dst != src3) {
574         tcg_out_movext1(s, i1);
575         tcg_out_movext2(s, i2, i3, scratch);
576         return;
577     }
578     if (i2->dst != src1 && i2->dst != src3) {
579         tcg_out_movext1(s, i2);
580         tcg_out_movext2(s, i1, i3, scratch);
581         return;
582     }
583     if (i3->dst != src1 && i3->dst != src2) {
584         tcg_out_movext1(s, i3);
585         tcg_out_movext2(s, i1, i2, scratch);
586         return;
587     }
588 
589     /*
590      * There is a cycle.  Since there are only 3 nodes, the cycle is
591      * either "clockwise" or "anti-clockwise", and can be solved with
592      * a single scratch or two xchg.
593      */
594     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
595         /* "Clockwise" */
596         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
597             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
598             /* The data is now in the correct registers, now extend. */
599             tcg_out_movext1_new_src(s, i1, i1->dst);
600             tcg_out_movext1_new_src(s, i2, i2->dst);
601             tcg_out_movext1_new_src(s, i3, i3->dst);
602         } else {
603             tcg_debug_assert(scratch >= 0);
604             tcg_out_mov(s, i1->src_type, scratch, src1);
605             tcg_out_movext1(s, i3);
606             tcg_out_movext1(s, i2);
607             tcg_out_movext1_new_src(s, i1, scratch);
608         }
609     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
610         /* "Anti-clockwise" */
611         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
612             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
613             /* The data is now in the correct registers, now extend. */
614             tcg_out_movext1_new_src(s, i1, i1->dst);
615             tcg_out_movext1_new_src(s, i2, i2->dst);
616             tcg_out_movext1_new_src(s, i3, i3->dst);
617         } else {
618             tcg_debug_assert(scratch >= 0);
619             tcg_out_mov(s, i1->src_type, scratch, src1);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1(s, i3);
622             tcg_out_movext1_new_src(s, i1, scratch);
623         }
624     } else {
625         g_assert_not_reached();
626     }
627 }
628 
629 #define C_PFX1(P, A)                    P##A
630 #define C_PFX2(P, A, B)                 P##A##_##B
631 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
632 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
633 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
634 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
635 
636 /* Define an enumeration for the various combinations. */
637 
638 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
639 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
640 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
641 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
642 
643 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
644 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
645 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
646 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
647 
648 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
649 
650 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
651 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
652 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
653 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
654 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
655 
656 typedef enum {
657 #include "tcg-target-con-set.h"
658 } TCGConstraintSetIndex;
659 
660 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
661 
662 #undef C_O0_I1
663 #undef C_O0_I2
664 #undef C_O0_I3
665 #undef C_O0_I4
666 #undef C_O1_I1
667 #undef C_O1_I2
668 #undef C_O1_I3
669 #undef C_O1_I4
670 #undef C_N1_I2
671 #undef C_O2_I1
672 #undef C_O2_I2
673 #undef C_O2_I3
674 #undef C_O2_I4
675 #undef C_N1_O1_I4
676 
677 /* Put all of the constraint sets into an array, indexed by the enum. */
678 
679 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
680 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
681 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
682 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
683 
684 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
685 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
686 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
687 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
688 
689 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
690 
691 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
692 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
693 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
694 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
695 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
696 
697 static const TCGTargetOpDef constraint_sets[] = {
698 #include "tcg-target-con-set.h"
699 };
700 
701 
702 #undef C_O0_I1
703 #undef C_O0_I2
704 #undef C_O0_I3
705 #undef C_O0_I4
706 #undef C_O1_I1
707 #undef C_O1_I2
708 #undef C_O1_I3
709 #undef C_O1_I4
710 #undef C_N1_I2
711 #undef C_O2_I1
712 #undef C_O2_I2
713 #undef C_O2_I3
714 #undef C_O2_I4
715 #undef C_N1_O1_I4
716 
717 /* Expand the enumerator to be returned from tcg_target_op_def(). */
718 
719 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
720 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
721 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
722 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
723 
724 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
725 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
726 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
727 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
728 
729 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
730 
731 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
732 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
733 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
734 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
735 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
736 
737 #include "tcg-target.c.inc"
738 
739 #ifndef CONFIG_TCG_INTERPRETER
740 /* Validate CPUTLBDescFast placement. */
741 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
742                         sizeof(CPUNegativeOffsetState))
743                   < MIN_TLB_MASK_TABLE_OFS);
744 #endif
745 
746 static void alloc_tcg_plugin_context(TCGContext *s)
747 {
748 #ifdef CONFIG_PLUGIN
749     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
750     s->plugin_tb->insns =
751         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
752 #endif
753 }
754 
755 /*
756  * All TCG threads except the parent (i.e. the one that called tcg_context_init
757  * and registered the target's TCG globals) must register with this function
758  * before initiating translation.
759  *
760  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
761  * of tcg_region_init() for the reasoning behind this.
762  *
763  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
764  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
765  * is not used anymore for translation once this function is called.
766  *
767  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
768  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
769  */
770 #ifdef CONFIG_USER_ONLY
771 void tcg_register_thread(void)
772 {
773     tcg_ctx = &tcg_init_ctx;
774 }
775 #else
776 void tcg_register_thread(void)
777 {
778     TCGContext *s = g_malloc(sizeof(*s));
779     unsigned int i, n;
780 
781     *s = tcg_init_ctx;
782 
783     /* Relink mem_base.  */
784     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
785         if (tcg_init_ctx.temps[i].mem_base) {
786             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
787             tcg_debug_assert(b >= 0 && b < n);
788             s->temps[i].mem_base = &s->temps[b];
789         }
790     }
791 
792     /* Claim an entry in tcg_ctxs */
793     n = qatomic_fetch_inc(&tcg_cur_ctxs);
794     g_assert(n < tcg_max_ctxs);
795     qatomic_set(&tcg_ctxs[n], s);
796 
797     if (n > 0) {
798         alloc_tcg_plugin_context(s);
799         tcg_region_initial_alloc(s);
800     }
801 
802     tcg_ctx = s;
803 }
804 #endif /* !CONFIG_USER_ONLY */
805 
806 /* pool based memory allocation */
807 void *tcg_malloc_internal(TCGContext *s, int size)
808 {
809     TCGPool *p;
810     int pool_size;
811 
812     if (size > TCG_POOL_CHUNK_SIZE) {
813         /* big malloc: insert a new pool (XXX: could optimize) */
814         p = g_malloc(sizeof(TCGPool) + size);
815         p->size = size;
816         p->next = s->pool_first_large;
817         s->pool_first_large = p;
818         return p->data;
819     } else {
820         p = s->pool_current;
821         if (!p) {
822             p = s->pool_first;
823             if (!p)
824                 goto new_pool;
825         } else {
826             if (!p->next) {
827             new_pool:
828                 pool_size = TCG_POOL_CHUNK_SIZE;
829                 p = g_malloc(sizeof(TCGPool) + pool_size);
830                 p->size = pool_size;
831                 p->next = NULL;
832                 if (s->pool_current) {
833                     s->pool_current->next = p;
834                 } else {
835                     s->pool_first = p;
836                 }
837             } else {
838                 p = p->next;
839             }
840         }
841     }
842     s->pool_current = p;
843     s->pool_cur = p->data + size;
844     s->pool_end = p->data + p->size;
845     return p->data;
846 }
847 
848 void tcg_pool_reset(TCGContext *s)
849 {
850     TCGPool *p, *t;
851     for (p = s->pool_first_large; p; p = t) {
852         t = p->next;
853         g_free(p);
854     }
855     s->pool_first_large = NULL;
856     s->pool_cur = s->pool_end = NULL;
857     s->pool_current = NULL;
858 }
859 
860 /*
861  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
862  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
863  * We only use these for layout in tcg_out_ld_helper_ret and
864  * tcg_out_st_helper_args, and share them between several of
865  * the helpers, with the end result that it's easier to build manually.
866  */
867 
868 #if TCG_TARGET_REG_BITS == 32
869 # define dh_typecode_ttl  dh_typecode_i32
870 #else
871 # define dh_typecode_ttl  dh_typecode_i64
872 #endif
873 
874 static TCGHelperInfo info_helper_ld32_mmu = {
875     .flags = TCG_CALL_NO_WG,
876     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
877               | dh_typemask(env, 1)
878               | dh_typemask(i64, 2)  /* uint64_t addr */
879               | dh_typemask(i32, 3)  /* unsigned oi */
880               | dh_typemask(ptr, 4)  /* uintptr_t ra */
881 };
882 
883 static TCGHelperInfo info_helper_ld64_mmu = {
884     .flags = TCG_CALL_NO_WG,
885     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
886               | dh_typemask(env, 1)
887               | dh_typemask(i64, 2)  /* uint64_t addr */
888               | dh_typemask(i32, 3)  /* unsigned oi */
889               | dh_typemask(ptr, 4)  /* uintptr_t ra */
890 };
891 
892 static TCGHelperInfo info_helper_ld128_mmu = {
893     .flags = TCG_CALL_NO_WG,
894     .typemask = dh_typemask(i128, 0) /* return Int128 */
895               | dh_typemask(env, 1)
896               | dh_typemask(i64, 2)  /* uint64_t addr */
897               | dh_typemask(i32, 3)  /* unsigned oi */
898               | dh_typemask(ptr, 4)  /* uintptr_t ra */
899 };
900 
901 static TCGHelperInfo info_helper_st32_mmu = {
902     .flags = TCG_CALL_NO_WG,
903     .typemask = dh_typemask(void, 0)
904               | dh_typemask(env, 1)
905               | dh_typemask(i64, 2)  /* uint64_t addr */
906               | dh_typemask(i32, 3)  /* uint32_t data */
907               | dh_typemask(i32, 4)  /* unsigned oi */
908               | dh_typemask(ptr, 5)  /* uintptr_t ra */
909 };
910 
911 static TCGHelperInfo info_helper_st64_mmu = {
912     .flags = TCG_CALL_NO_WG,
913     .typemask = dh_typemask(void, 0)
914               | dh_typemask(env, 1)
915               | dh_typemask(i64, 2)  /* uint64_t addr */
916               | dh_typemask(i64, 3)  /* uint64_t data */
917               | dh_typemask(i32, 4)  /* unsigned oi */
918               | dh_typemask(ptr, 5)  /* uintptr_t ra */
919 };
920 
921 static TCGHelperInfo info_helper_st128_mmu = {
922     .flags = TCG_CALL_NO_WG,
923     .typemask = dh_typemask(void, 0)
924               | dh_typemask(env, 1)
925               | dh_typemask(i64, 2)  /* uint64_t addr */
926               | dh_typemask(i128, 3) /* Int128 data */
927               | dh_typemask(i32, 4)  /* unsigned oi */
928               | dh_typemask(ptr, 5)  /* uintptr_t ra */
929 };
930 
931 #ifdef CONFIG_TCG_INTERPRETER
932 static ffi_type *typecode_to_ffi(int argmask)
933 {
934     /*
935      * libffi does not support __int128_t, so we have forced Int128
936      * to use the structure definition instead of the builtin type.
937      */
938     static ffi_type *ffi_type_i128_elements[3] = {
939         &ffi_type_uint64,
940         &ffi_type_uint64,
941         NULL
942     };
943     static ffi_type ffi_type_i128 = {
944         .size = 16,
945         .alignment = __alignof__(Int128),
946         .type = FFI_TYPE_STRUCT,
947         .elements = ffi_type_i128_elements,
948     };
949 
950     switch (argmask) {
951     case dh_typecode_void:
952         return &ffi_type_void;
953     case dh_typecode_i32:
954         return &ffi_type_uint32;
955     case dh_typecode_s32:
956         return &ffi_type_sint32;
957     case dh_typecode_i64:
958         return &ffi_type_uint64;
959     case dh_typecode_s64:
960         return &ffi_type_sint64;
961     case dh_typecode_ptr:
962         return &ffi_type_pointer;
963     case dh_typecode_i128:
964         return &ffi_type_i128;
965     }
966     g_assert_not_reached();
967 }
968 
969 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
970 {
971     unsigned typemask = info->typemask;
972     struct {
973         ffi_cif cif;
974         ffi_type *args[];
975     } *ca;
976     ffi_status status;
977     int nargs;
978 
979     /* Ignoring the return type, find the last non-zero field. */
980     nargs = 32 - clz32(typemask >> 3);
981     nargs = DIV_ROUND_UP(nargs, 3);
982     assert(nargs <= MAX_CALL_IARGS);
983 
984     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
985     ca->cif.rtype = typecode_to_ffi(typemask & 7);
986     ca->cif.nargs = nargs;
987 
988     if (nargs != 0) {
989         ca->cif.arg_types = ca->args;
990         for (int j = 0; j < nargs; ++j) {
991             int typecode = extract32(typemask, (j + 1) * 3, 3);
992             ca->args[j] = typecode_to_ffi(typecode);
993         }
994     }
995 
996     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
997                           ca->cif.rtype, ca->cif.arg_types);
998     assert(status == FFI_OK);
999 
1000     return &ca->cif;
1001 }
1002 
1003 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1004 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1005 #else
1006 #define HELPER_INFO_INIT(I)      (&(I)->init)
1007 #define HELPER_INFO_INIT_VAL(I)  1
1008 #endif /* CONFIG_TCG_INTERPRETER */
1009 
1010 static inline bool arg_slot_reg_p(unsigned arg_slot)
1011 {
1012     /*
1013      * Split the sizeof away from the comparison to avoid Werror from
1014      * "unsigned < 0 is always false", when iarg_regs is empty.
1015      */
1016     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1017     return arg_slot < nreg;
1018 }
1019 
1020 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1021 {
1022     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1023     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1024 
1025     tcg_debug_assert(stk_slot < max);
1026     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1027 }
1028 
1029 typedef struct TCGCumulativeArgs {
1030     int arg_idx;                /* tcg_gen_callN args[] */
1031     int info_in_idx;            /* TCGHelperInfo in[] */
1032     int arg_slot;               /* regs+stack slot */
1033     int ref_slot;               /* stack slots for references */
1034 } TCGCumulativeArgs;
1035 
1036 static void layout_arg_even(TCGCumulativeArgs *cum)
1037 {
1038     cum->arg_slot += cum->arg_slot & 1;
1039 }
1040 
1041 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1042                          TCGCallArgumentKind kind)
1043 {
1044     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1045 
1046     *loc = (TCGCallArgumentLoc){
1047         .kind = kind,
1048         .arg_idx = cum->arg_idx,
1049         .arg_slot = cum->arg_slot,
1050     };
1051     cum->info_in_idx++;
1052     cum->arg_slot++;
1053 }
1054 
1055 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1056                                 TCGHelperInfo *info, int n)
1057 {
1058     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1059 
1060     for (int i = 0; i < n; ++i) {
1061         /* Layout all using the same arg_idx, adjusting the subindex. */
1062         loc[i] = (TCGCallArgumentLoc){
1063             .kind = TCG_CALL_ARG_NORMAL,
1064             .arg_idx = cum->arg_idx,
1065             .tmp_subindex = i,
1066             .arg_slot = cum->arg_slot + i,
1067         };
1068     }
1069     cum->info_in_idx += n;
1070     cum->arg_slot += n;
1071 }
1072 
1073 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1074 {
1075     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1076     int n = 128 / TCG_TARGET_REG_BITS;
1077 
1078     /* The first subindex carries the pointer. */
1079     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1080 
1081     /*
1082      * The callee is allowed to clobber memory associated with
1083      * structure pass by-reference.  Therefore we must make copies.
1084      * Allocate space from "ref_slot", which will be adjusted to
1085      * follow the parameters on the stack.
1086      */
1087     loc[0].ref_slot = cum->ref_slot;
1088 
1089     /*
1090      * Subsequent words also go into the reference slot, but
1091      * do not accumulate into the regular arguments.
1092      */
1093     for (int i = 1; i < n; ++i) {
1094         loc[i] = (TCGCallArgumentLoc){
1095             .kind = TCG_CALL_ARG_BY_REF_N,
1096             .arg_idx = cum->arg_idx,
1097             .tmp_subindex = i,
1098             .ref_slot = cum->ref_slot + i,
1099         };
1100     }
1101     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1102     cum->ref_slot += n;
1103 }
1104 
1105 static void init_call_layout(TCGHelperInfo *info)
1106 {
1107     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1108     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1109     unsigned typemask = info->typemask;
1110     unsigned typecode;
1111     TCGCumulativeArgs cum = { };
1112 
1113     /*
1114      * Parse and place any function return value.
1115      */
1116     typecode = typemask & 7;
1117     switch (typecode) {
1118     case dh_typecode_void:
1119         info->nr_out = 0;
1120         break;
1121     case dh_typecode_i32:
1122     case dh_typecode_s32:
1123     case dh_typecode_ptr:
1124         info->nr_out = 1;
1125         info->out_kind = TCG_CALL_RET_NORMAL;
1126         break;
1127     case dh_typecode_i64:
1128     case dh_typecode_s64:
1129         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1130         info->out_kind = TCG_CALL_RET_NORMAL;
1131         /* Query the last register now to trigger any assert early. */
1132         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1133         break;
1134     case dh_typecode_i128:
1135         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1136         info->out_kind = TCG_TARGET_CALL_RET_I128;
1137         switch (TCG_TARGET_CALL_RET_I128) {
1138         case TCG_CALL_RET_NORMAL:
1139             /* Query the last register now to trigger any assert early. */
1140             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1141             break;
1142         case TCG_CALL_RET_BY_VEC:
1143             /* Query the single register now to trigger any assert early. */
1144             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1145             break;
1146         case TCG_CALL_RET_BY_REF:
1147             /*
1148              * Allocate the first argument to the output.
1149              * We don't need to store this anywhere, just make it
1150              * unavailable for use in the input loop below.
1151              */
1152             cum.arg_slot = 1;
1153             break;
1154         default:
1155             qemu_build_not_reached();
1156         }
1157         break;
1158     default:
1159         g_assert_not_reached();
1160     }
1161 
1162     /*
1163      * Parse and place function arguments.
1164      */
1165     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1166         TCGCallArgumentKind kind;
1167         TCGType type;
1168 
1169         typecode = typemask & 7;
1170         switch (typecode) {
1171         case dh_typecode_i32:
1172         case dh_typecode_s32:
1173             type = TCG_TYPE_I32;
1174             break;
1175         case dh_typecode_i64:
1176         case dh_typecode_s64:
1177             type = TCG_TYPE_I64;
1178             break;
1179         case dh_typecode_ptr:
1180             type = TCG_TYPE_PTR;
1181             break;
1182         case dh_typecode_i128:
1183             type = TCG_TYPE_I128;
1184             break;
1185         default:
1186             g_assert_not_reached();
1187         }
1188 
1189         switch (type) {
1190         case TCG_TYPE_I32:
1191             switch (TCG_TARGET_CALL_ARG_I32) {
1192             case TCG_CALL_ARG_EVEN:
1193                 layout_arg_even(&cum);
1194                 /* fall through */
1195             case TCG_CALL_ARG_NORMAL:
1196                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1197                 break;
1198             case TCG_CALL_ARG_EXTEND:
1199                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1200                 layout_arg_1(&cum, info, kind);
1201                 break;
1202             default:
1203                 qemu_build_not_reached();
1204             }
1205             break;
1206 
1207         case TCG_TYPE_I64:
1208             switch (TCG_TARGET_CALL_ARG_I64) {
1209             case TCG_CALL_ARG_EVEN:
1210                 layout_arg_even(&cum);
1211                 /* fall through */
1212             case TCG_CALL_ARG_NORMAL:
1213                 if (TCG_TARGET_REG_BITS == 32) {
1214                     layout_arg_normal_n(&cum, info, 2);
1215                 } else {
1216                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1217                 }
1218                 break;
1219             default:
1220                 qemu_build_not_reached();
1221             }
1222             break;
1223 
1224         case TCG_TYPE_I128:
1225             switch (TCG_TARGET_CALL_ARG_I128) {
1226             case TCG_CALL_ARG_EVEN:
1227                 layout_arg_even(&cum);
1228                 /* fall through */
1229             case TCG_CALL_ARG_NORMAL:
1230                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1231                 break;
1232             case TCG_CALL_ARG_BY_REF:
1233                 layout_arg_by_ref(&cum, info);
1234                 break;
1235             default:
1236                 qemu_build_not_reached();
1237             }
1238             break;
1239 
1240         default:
1241             g_assert_not_reached();
1242         }
1243     }
1244     info->nr_in = cum.info_in_idx;
1245 
1246     /* Validate that we didn't overrun the input array. */
1247     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1248     /* Validate the backend has enough argument space. */
1249     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1250 
1251     /*
1252      * Relocate the "ref_slot" area to the end of the parameters.
1253      * Minimizing this stack offset helps code size for x86,
1254      * which has a signed 8-bit offset encoding.
1255      */
1256     if (cum.ref_slot != 0) {
1257         int ref_base = 0;
1258 
1259         if (cum.arg_slot > max_reg_slots) {
1260             int align = __alignof(Int128) / sizeof(tcg_target_long);
1261 
1262             ref_base = cum.arg_slot - max_reg_slots;
1263             if (align > 1) {
1264                 ref_base = ROUND_UP(ref_base, align);
1265             }
1266         }
1267         assert(ref_base + cum.ref_slot <= max_stk_slots);
1268         ref_base += max_reg_slots;
1269 
1270         if (ref_base != 0) {
1271             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1272                 TCGCallArgumentLoc *loc = &info->in[i];
1273                 switch (loc->kind) {
1274                 case TCG_CALL_ARG_BY_REF:
1275                 case TCG_CALL_ARG_BY_REF_N:
1276                     loc->ref_slot += ref_base;
1277                     break;
1278                 default:
1279                     break;
1280                 }
1281             }
1282         }
1283     }
1284 }
1285 
1286 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1287 static void process_op_defs(TCGContext *s);
1288 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1289                                             TCGReg reg, const char *name);
1290 
1291 static void tcg_context_init(unsigned max_cpus)
1292 {
1293     TCGContext *s = &tcg_init_ctx;
1294     int op, total_args, n, i;
1295     TCGOpDef *def;
1296     TCGArgConstraint *args_ct;
1297     TCGTemp *ts;
1298 
1299     memset(s, 0, sizeof(*s));
1300     s->nb_globals = 0;
1301 
1302     /* Count total number of arguments and allocate the corresponding
1303        space */
1304     total_args = 0;
1305     for(op = 0; op < NB_OPS; op++) {
1306         def = &tcg_op_defs[op];
1307         n = def->nb_iargs + def->nb_oargs;
1308         total_args += n;
1309     }
1310 
1311     args_ct = g_new0(TCGArgConstraint, total_args);
1312 
1313     for(op = 0; op < NB_OPS; op++) {
1314         def = &tcg_op_defs[op];
1315         def->args_ct = args_ct;
1316         n = def->nb_iargs + def->nb_oargs;
1317         args_ct += n;
1318     }
1319 
1320     init_call_layout(&info_helper_ld32_mmu);
1321     init_call_layout(&info_helper_ld64_mmu);
1322     init_call_layout(&info_helper_ld128_mmu);
1323     init_call_layout(&info_helper_st32_mmu);
1324     init_call_layout(&info_helper_st64_mmu);
1325     init_call_layout(&info_helper_st128_mmu);
1326 
1327     tcg_target_init(s);
1328     process_op_defs(s);
1329 
1330     /* Reverse the order of the saved registers, assuming they're all at
1331        the start of tcg_target_reg_alloc_order.  */
1332     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1333         int r = tcg_target_reg_alloc_order[n];
1334         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1335             break;
1336         }
1337     }
1338     for (i = 0; i < n; ++i) {
1339         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1340     }
1341     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1342         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1343     }
1344 
1345     alloc_tcg_plugin_context(s);
1346 
1347     tcg_ctx = s;
1348     /*
1349      * In user-mode we simply share the init context among threads, since we
1350      * use a single region. See the documentation tcg_region_init() for the
1351      * reasoning behind this.
1352      * In softmmu we will have at most max_cpus TCG threads.
1353      */
1354 #ifdef CONFIG_USER_ONLY
1355     tcg_ctxs = &tcg_ctx;
1356     tcg_cur_ctxs = 1;
1357     tcg_max_ctxs = 1;
1358 #else
1359     tcg_max_ctxs = max_cpus;
1360     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1361 #endif
1362 
1363     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1364     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1365     tcg_env = temp_tcgv_ptr(ts);
1366 }
1367 
1368 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1369 {
1370     tcg_context_init(max_cpus);
1371     tcg_region_init(tb_size, splitwx, max_cpus);
1372 }
1373 
1374 /*
1375  * Allocate TBs right before their corresponding translated code, making
1376  * sure that TBs and code are on different cache lines.
1377  */
1378 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1379 {
1380     uintptr_t align = qemu_icache_linesize;
1381     TranslationBlock *tb;
1382     void *next;
1383 
1384  retry:
1385     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1386     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1387 
1388     if (unlikely(next > s->code_gen_highwater)) {
1389         if (tcg_region_alloc(s)) {
1390             return NULL;
1391         }
1392         goto retry;
1393     }
1394     qatomic_set(&s->code_gen_ptr, next);
1395     s->data_gen_ptr = NULL;
1396     return tb;
1397 }
1398 
1399 void tcg_prologue_init(void)
1400 {
1401     TCGContext *s = tcg_ctx;
1402     size_t prologue_size;
1403 
1404     s->code_ptr = s->code_gen_ptr;
1405     s->code_buf = s->code_gen_ptr;
1406     s->data_gen_ptr = NULL;
1407 
1408 #ifndef CONFIG_TCG_INTERPRETER
1409     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1410 #endif
1411 
1412 #ifdef TCG_TARGET_NEED_POOL_LABELS
1413     s->pool_labels = NULL;
1414 #endif
1415 
1416     qemu_thread_jit_write();
1417     /* Generate the prologue.  */
1418     tcg_target_qemu_prologue(s);
1419 
1420 #ifdef TCG_TARGET_NEED_POOL_LABELS
1421     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1422     {
1423         int result = tcg_out_pool_finalize(s);
1424         tcg_debug_assert(result == 0);
1425     }
1426 #endif
1427 
1428     prologue_size = tcg_current_code_size(s);
1429     perf_report_prologue(s->code_gen_ptr, prologue_size);
1430 
1431 #ifndef CONFIG_TCG_INTERPRETER
1432     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1433                         (uintptr_t)s->code_buf, prologue_size);
1434 #endif
1435 
1436     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1437         FILE *logfile = qemu_log_trylock();
1438         if (logfile) {
1439             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1440             if (s->data_gen_ptr) {
1441                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1442                 size_t data_size = prologue_size - code_size;
1443                 size_t i;
1444 
1445                 disas(logfile, s->code_gen_ptr, code_size);
1446 
1447                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1448                     if (sizeof(tcg_target_ulong) == 8) {
1449                         fprintf(logfile,
1450                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1451                                 (uintptr_t)s->data_gen_ptr + i,
1452                                 *(uint64_t *)(s->data_gen_ptr + i));
1453                     } else {
1454                         fprintf(logfile,
1455                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1456                                 (uintptr_t)s->data_gen_ptr + i,
1457                                 *(uint32_t *)(s->data_gen_ptr + i));
1458                     }
1459                 }
1460             } else {
1461                 disas(logfile, s->code_gen_ptr, prologue_size);
1462             }
1463             fprintf(logfile, "\n");
1464             qemu_log_unlock(logfile);
1465         }
1466     }
1467 
1468 #ifndef CONFIG_TCG_INTERPRETER
1469     /*
1470      * Assert that goto_ptr is implemented completely, setting an epilogue.
1471      * For tci, we use NULL as the signal to return from the interpreter,
1472      * so skip this check.
1473      */
1474     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1475 #endif
1476 
1477     tcg_region_prologue_set(s);
1478 }
1479 
1480 void tcg_func_start(TCGContext *s)
1481 {
1482     tcg_pool_reset(s);
1483     s->nb_temps = s->nb_globals;
1484 
1485     /* No temps have been previously allocated for size or locality.  */
1486     memset(s->free_temps, 0, sizeof(s->free_temps));
1487 
1488     /* No constant temps have been previously allocated. */
1489     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1490         if (s->const_table[i]) {
1491             g_hash_table_remove_all(s->const_table[i]);
1492         }
1493     }
1494 
1495     s->nb_ops = 0;
1496     s->nb_labels = 0;
1497     s->current_frame_offset = s->frame_start;
1498 
1499 #ifdef CONFIG_DEBUG_TCG
1500     s->goto_tb_issue_mask = 0;
1501 #endif
1502 
1503     QTAILQ_INIT(&s->ops);
1504     QTAILQ_INIT(&s->free_ops);
1505     QSIMPLEQ_INIT(&s->labels);
1506 
1507     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1508                      s->addr_type == TCG_TYPE_I64);
1509 
1510     tcg_debug_assert(s->insn_start_words > 0);
1511 }
1512 
1513 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1514 {
1515     int n = s->nb_temps++;
1516 
1517     if (n >= TCG_MAX_TEMPS) {
1518         tcg_raise_tb_overflow(s);
1519     }
1520     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1521 }
1522 
1523 static TCGTemp *tcg_global_alloc(TCGContext *s)
1524 {
1525     TCGTemp *ts;
1526 
1527     tcg_debug_assert(s->nb_globals == s->nb_temps);
1528     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1529     s->nb_globals++;
1530     ts = tcg_temp_alloc(s);
1531     ts->kind = TEMP_GLOBAL;
1532 
1533     return ts;
1534 }
1535 
1536 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1537                                             TCGReg reg, const char *name)
1538 {
1539     TCGTemp *ts;
1540 
1541     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1542 
1543     ts = tcg_global_alloc(s);
1544     ts->base_type = type;
1545     ts->type = type;
1546     ts->kind = TEMP_FIXED;
1547     ts->reg = reg;
1548     ts->name = name;
1549     tcg_regset_set_reg(s->reserved_regs, reg);
1550 
1551     return ts;
1552 }
1553 
1554 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1555 {
1556     s->frame_start = start;
1557     s->frame_end = start + size;
1558     s->frame_temp
1559         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1560 }
1561 
1562 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1563                                      intptr_t offset, const char *name)
1564 {
1565     TCGContext *s = tcg_ctx;
1566     TCGTemp *base_ts = tcgv_ptr_temp(base);
1567     TCGTemp *ts = tcg_global_alloc(s);
1568     int indirect_reg = 0;
1569 
1570     switch (base_ts->kind) {
1571     case TEMP_FIXED:
1572         break;
1573     case TEMP_GLOBAL:
1574         /* We do not support double-indirect registers.  */
1575         tcg_debug_assert(!base_ts->indirect_reg);
1576         base_ts->indirect_base = 1;
1577         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1578                             ? 2 : 1);
1579         indirect_reg = 1;
1580         break;
1581     default:
1582         g_assert_not_reached();
1583     }
1584 
1585     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1586         TCGTemp *ts2 = tcg_global_alloc(s);
1587         char buf[64];
1588 
1589         ts->base_type = TCG_TYPE_I64;
1590         ts->type = TCG_TYPE_I32;
1591         ts->indirect_reg = indirect_reg;
1592         ts->mem_allocated = 1;
1593         ts->mem_base = base_ts;
1594         ts->mem_offset = offset;
1595         pstrcpy(buf, sizeof(buf), name);
1596         pstrcat(buf, sizeof(buf), "_0");
1597         ts->name = strdup(buf);
1598 
1599         tcg_debug_assert(ts2 == ts + 1);
1600         ts2->base_type = TCG_TYPE_I64;
1601         ts2->type = TCG_TYPE_I32;
1602         ts2->indirect_reg = indirect_reg;
1603         ts2->mem_allocated = 1;
1604         ts2->mem_base = base_ts;
1605         ts2->mem_offset = offset + 4;
1606         ts2->temp_subindex = 1;
1607         pstrcpy(buf, sizeof(buf), name);
1608         pstrcat(buf, sizeof(buf), "_1");
1609         ts2->name = strdup(buf);
1610     } else {
1611         ts->base_type = type;
1612         ts->type = type;
1613         ts->indirect_reg = indirect_reg;
1614         ts->mem_allocated = 1;
1615         ts->mem_base = base_ts;
1616         ts->mem_offset = offset;
1617         ts->name = name;
1618     }
1619     return ts;
1620 }
1621 
1622 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1623 {
1624     TCGContext *s = tcg_ctx;
1625     TCGTemp *ts;
1626     int n;
1627 
1628     if (kind == TEMP_EBB) {
1629         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1630 
1631         if (idx < TCG_MAX_TEMPS) {
1632             /* There is already an available temp with the right type.  */
1633             clear_bit(idx, s->free_temps[type].l);
1634 
1635             ts = &s->temps[idx];
1636             ts->temp_allocated = 1;
1637             tcg_debug_assert(ts->base_type == type);
1638             tcg_debug_assert(ts->kind == kind);
1639             return ts;
1640         }
1641     } else {
1642         tcg_debug_assert(kind == TEMP_TB);
1643     }
1644 
1645     switch (type) {
1646     case TCG_TYPE_I32:
1647     case TCG_TYPE_V64:
1648     case TCG_TYPE_V128:
1649     case TCG_TYPE_V256:
1650         n = 1;
1651         break;
1652     case TCG_TYPE_I64:
1653         n = 64 / TCG_TARGET_REG_BITS;
1654         break;
1655     case TCG_TYPE_I128:
1656         n = 128 / TCG_TARGET_REG_BITS;
1657         break;
1658     default:
1659         g_assert_not_reached();
1660     }
1661 
1662     ts = tcg_temp_alloc(s);
1663     ts->base_type = type;
1664     ts->temp_allocated = 1;
1665     ts->kind = kind;
1666 
1667     if (n == 1) {
1668         ts->type = type;
1669     } else {
1670         ts->type = TCG_TYPE_REG;
1671 
1672         for (int i = 1; i < n; ++i) {
1673             TCGTemp *ts2 = tcg_temp_alloc(s);
1674 
1675             tcg_debug_assert(ts2 == ts + i);
1676             ts2->base_type = type;
1677             ts2->type = TCG_TYPE_REG;
1678             ts2->temp_allocated = 1;
1679             ts2->temp_subindex = i;
1680             ts2->kind = kind;
1681         }
1682     }
1683     return ts;
1684 }
1685 
1686 TCGv_vec tcg_temp_new_vec(TCGType type)
1687 {
1688     TCGTemp *t;
1689 
1690 #ifdef CONFIG_DEBUG_TCG
1691     switch (type) {
1692     case TCG_TYPE_V64:
1693         assert(TCG_TARGET_HAS_v64);
1694         break;
1695     case TCG_TYPE_V128:
1696         assert(TCG_TARGET_HAS_v128);
1697         break;
1698     case TCG_TYPE_V256:
1699         assert(TCG_TARGET_HAS_v256);
1700         break;
1701     default:
1702         g_assert_not_reached();
1703     }
1704 #endif
1705 
1706     t = tcg_temp_new_internal(type, TEMP_EBB);
1707     return temp_tcgv_vec(t);
1708 }
1709 
1710 /* Create a new temp of the same type as an existing temp.  */
1711 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1712 {
1713     TCGTemp *t = tcgv_vec_temp(match);
1714 
1715     tcg_debug_assert(t->temp_allocated != 0);
1716 
1717     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1718     return temp_tcgv_vec(t);
1719 }
1720 
1721 void tcg_temp_free_internal(TCGTemp *ts)
1722 {
1723     TCGContext *s = tcg_ctx;
1724 
1725     switch (ts->kind) {
1726     case TEMP_CONST:
1727     case TEMP_TB:
1728         /* Silently ignore free. */
1729         break;
1730     case TEMP_EBB:
1731         tcg_debug_assert(ts->temp_allocated != 0);
1732         ts->temp_allocated = 0;
1733         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1734         break;
1735     default:
1736         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1737         g_assert_not_reached();
1738     }
1739 }
1740 
1741 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1742 {
1743     TCGContext *s = tcg_ctx;
1744     GHashTable *h = s->const_table[type];
1745     TCGTemp *ts;
1746 
1747     if (h == NULL) {
1748         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1749         s->const_table[type] = h;
1750     }
1751 
1752     ts = g_hash_table_lookup(h, &val);
1753     if (ts == NULL) {
1754         int64_t *val_ptr;
1755 
1756         ts = tcg_temp_alloc(s);
1757 
1758         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1759             TCGTemp *ts2 = tcg_temp_alloc(s);
1760 
1761             tcg_debug_assert(ts2 == ts + 1);
1762 
1763             ts->base_type = TCG_TYPE_I64;
1764             ts->type = TCG_TYPE_I32;
1765             ts->kind = TEMP_CONST;
1766             ts->temp_allocated = 1;
1767 
1768             ts2->base_type = TCG_TYPE_I64;
1769             ts2->type = TCG_TYPE_I32;
1770             ts2->kind = TEMP_CONST;
1771             ts2->temp_allocated = 1;
1772             ts2->temp_subindex = 1;
1773 
1774             /*
1775              * Retain the full value of the 64-bit constant in the low
1776              * part, so that the hash table works.  Actual uses will
1777              * truncate the value to the low part.
1778              */
1779             ts[HOST_BIG_ENDIAN].val = val;
1780             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1781             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1782         } else {
1783             ts->base_type = type;
1784             ts->type = type;
1785             ts->kind = TEMP_CONST;
1786             ts->temp_allocated = 1;
1787             ts->val = val;
1788             val_ptr = &ts->val;
1789         }
1790         g_hash_table_insert(h, val_ptr, ts);
1791     }
1792 
1793     return ts;
1794 }
1795 
1796 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1797 {
1798     val = dup_const(vece, val);
1799     return temp_tcgv_vec(tcg_constant_internal(type, val));
1800 }
1801 
1802 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1803 {
1804     TCGTemp *t = tcgv_vec_temp(match);
1805 
1806     tcg_debug_assert(t->temp_allocated != 0);
1807     return tcg_constant_vec(t->base_type, vece, val);
1808 }
1809 
1810 #ifdef CONFIG_DEBUG_TCG
1811 size_t temp_idx(TCGTemp *ts)
1812 {
1813     ptrdiff_t n = ts - tcg_ctx->temps;
1814     assert(n >= 0 && n < tcg_ctx->nb_temps);
1815     return n;
1816 }
1817 
1818 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1819 {
1820     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1821 
1822     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1823     assert(o % sizeof(TCGTemp) == 0);
1824 
1825     return (void *)tcg_ctx + (uintptr_t)v;
1826 }
1827 #endif /* CONFIG_DEBUG_TCG */
1828 
1829 /* Return true if OP may appear in the opcode stream.
1830    Test the runtime variable that controls each opcode.  */
1831 bool tcg_op_supported(TCGOpcode op)
1832 {
1833     const bool have_vec
1834         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1835 
1836     switch (op) {
1837     case INDEX_op_discard:
1838     case INDEX_op_set_label:
1839     case INDEX_op_call:
1840     case INDEX_op_br:
1841     case INDEX_op_mb:
1842     case INDEX_op_insn_start:
1843     case INDEX_op_exit_tb:
1844     case INDEX_op_goto_tb:
1845     case INDEX_op_goto_ptr:
1846     case INDEX_op_qemu_ld_a32_i32:
1847     case INDEX_op_qemu_ld_a64_i32:
1848     case INDEX_op_qemu_st_a32_i32:
1849     case INDEX_op_qemu_st_a64_i32:
1850     case INDEX_op_qemu_ld_a32_i64:
1851     case INDEX_op_qemu_ld_a64_i64:
1852     case INDEX_op_qemu_st_a32_i64:
1853     case INDEX_op_qemu_st_a64_i64:
1854         return true;
1855 
1856     case INDEX_op_qemu_st8_a32_i32:
1857     case INDEX_op_qemu_st8_a64_i32:
1858         return TCG_TARGET_HAS_qemu_st8_i32;
1859 
1860     case INDEX_op_qemu_ld_a32_i128:
1861     case INDEX_op_qemu_ld_a64_i128:
1862     case INDEX_op_qemu_st_a32_i128:
1863     case INDEX_op_qemu_st_a64_i128:
1864         return TCG_TARGET_HAS_qemu_ldst_i128;
1865 
1866     case INDEX_op_mov_i32:
1867     case INDEX_op_setcond_i32:
1868     case INDEX_op_brcond_i32:
1869     case INDEX_op_ld8u_i32:
1870     case INDEX_op_ld8s_i32:
1871     case INDEX_op_ld16u_i32:
1872     case INDEX_op_ld16s_i32:
1873     case INDEX_op_ld_i32:
1874     case INDEX_op_st8_i32:
1875     case INDEX_op_st16_i32:
1876     case INDEX_op_st_i32:
1877     case INDEX_op_add_i32:
1878     case INDEX_op_sub_i32:
1879     case INDEX_op_mul_i32:
1880     case INDEX_op_and_i32:
1881     case INDEX_op_or_i32:
1882     case INDEX_op_xor_i32:
1883     case INDEX_op_shl_i32:
1884     case INDEX_op_shr_i32:
1885     case INDEX_op_sar_i32:
1886         return true;
1887 
1888     case INDEX_op_negsetcond_i32:
1889         return TCG_TARGET_HAS_negsetcond_i32;
1890     case INDEX_op_movcond_i32:
1891         return TCG_TARGET_HAS_movcond_i32;
1892     case INDEX_op_div_i32:
1893     case INDEX_op_divu_i32:
1894         return TCG_TARGET_HAS_div_i32;
1895     case INDEX_op_rem_i32:
1896     case INDEX_op_remu_i32:
1897         return TCG_TARGET_HAS_rem_i32;
1898     case INDEX_op_div2_i32:
1899     case INDEX_op_divu2_i32:
1900         return TCG_TARGET_HAS_div2_i32;
1901     case INDEX_op_rotl_i32:
1902     case INDEX_op_rotr_i32:
1903         return TCG_TARGET_HAS_rot_i32;
1904     case INDEX_op_deposit_i32:
1905         return TCG_TARGET_HAS_deposit_i32;
1906     case INDEX_op_extract_i32:
1907         return TCG_TARGET_HAS_extract_i32;
1908     case INDEX_op_sextract_i32:
1909         return TCG_TARGET_HAS_sextract_i32;
1910     case INDEX_op_extract2_i32:
1911         return TCG_TARGET_HAS_extract2_i32;
1912     case INDEX_op_add2_i32:
1913         return TCG_TARGET_HAS_add2_i32;
1914     case INDEX_op_sub2_i32:
1915         return TCG_TARGET_HAS_sub2_i32;
1916     case INDEX_op_mulu2_i32:
1917         return TCG_TARGET_HAS_mulu2_i32;
1918     case INDEX_op_muls2_i32:
1919         return TCG_TARGET_HAS_muls2_i32;
1920     case INDEX_op_muluh_i32:
1921         return TCG_TARGET_HAS_muluh_i32;
1922     case INDEX_op_mulsh_i32:
1923         return TCG_TARGET_HAS_mulsh_i32;
1924     case INDEX_op_ext8s_i32:
1925         return TCG_TARGET_HAS_ext8s_i32;
1926     case INDEX_op_ext16s_i32:
1927         return TCG_TARGET_HAS_ext16s_i32;
1928     case INDEX_op_ext8u_i32:
1929         return TCG_TARGET_HAS_ext8u_i32;
1930     case INDEX_op_ext16u_i32:
1931         return TCG_TARGET_HAS_ext16u_i32;
1932     case INDEX_op_bswap16_i32:
1933         return TCG_TARGET_HAS_bswap16_i32;
1934     case INDEX_op_bswap32_i32:
1935         return TCG_TARGET_HAS_bswap32_i32;
1936     case INDEX_op_not_i32:
1937         return TCG_TARGET_HAS_not_i32;
1938     case INDEX_op_neg_i32:
1939         return TCG_TARGET_HAS_neg_i32;
1940     case INDEX_op_andc_i32:
1941         return TCG_TARGET_HAS_andc_i32;
1942     case INDEX_op_orc_i32:
1943         return TCG_TARGET_HAS_orc_i32;
1944     case INDEX_op_eqv_i32:
1945         return TCG_TARGET_HAS_eqv_i32;
1946     case INDEX_op_nand_i32:
1947         return TCG_TARGET_HAS_nand_i32;
1948     case INDEX_op_nor_i32:
1949         return TCG_TARGET_HAS_nor_i32;
1950     case INDEX_op_clz_i32:
1951         return TCG_TARGET_HAS_clz_i32;
1952     case INDEX_op_ctz_i32:
1953         return TCG_TARGET_HAS_ctz_i32;
1954     case INDEX_op_ctpop_i32:
1955         return TCG_TARGET_HAS_ctpop_i32;
1956 
1957     case INDEX_op_brcond2_i32:
1958     case INDEX_op_setcond2_i32:
1959         return TCG_TARGET_REG_BITS == 32;
1960 
1961     case INDEX_op_mov_i64:
1962     case INDEX_op_setcond_i64:
1963     case INDEX_op_brcond_i64:
1964     case INDEX_op_ld8u_i64:
1965     case INDEX_op_ld8s_i64:
1966     case INDEX_op_ld16u_i64:
1967     case INDEX_op_ld16s_i64:
1968     case INDEX_op_ld32u_i64:
1969     case INDEX_op_ld32s_i64:
1970     case INDEX_op_ld_i64:
1971     case INDEX_op_st8_i64:
1972     case INDEX_op_st16_i64:
1973     case INDEX_op_st32_i64:
1974     case INDEX_op_st_i64:
1975     case INDEX_op_add_i64:
1976     case INDEX_op_sub_i64:
1977     case INDEX_op_mul_i64:
1978     case INDEX_op_and_i64:
1979     case INDEX_op_or_i64:
1980     case INDEX_op_xor_i64:
1981     case INDEX_op_shl_i64:
1982     case INDEX_op_shr_i64:
1983     case INDEX_op_sar_i64:
1984     case INDEX_op_ext_i32_i64:
1985     case INDEX_op_extu_i32_i64:
1986         return TCG_TARGET_REG_BITS == 64;
1987 
1988     case INDEX_op_negsetcond_i64:
1989         return TCG_TARGET_HAS_negsetcond_i64;
1990     case INDEX_op_movcond_i64:
1991         return TCG_TARGET_HAS_movcond_i64;
1992     case INDEX_op_div_i64:
1993     case INDEX_op_divu_i64:
1994         return TCG_TARGET_HAS_div_i64;
1995     case INDEX_op_rem_i64:
1996     case INDEX_op_remu_i64:
1997         return TCG_TARGET_HAS_rem_i64;
1998     case INDEX_op_div2_i64:
1999     case INDEX_op_divu2_i64:
2000         return TCG_TARGET_HAS_div2_i64;
2001     case INDEX_op_rotl_i64:
2002     case INDEX_op_rotr_i64:
2003         return TCG_TARGET_HAS_rot_i64;
2004     case INDEX_op_deposit_i64:
2005         return TCG_TARGET_HAS_deposit_i64;
2006     case INDEX_op_extract_i64:
2007         return TCG_TARGET_HAS_extract_i64;
2008     case INDEX_op_sextract_i64:
2009         return TCG_TARGET_HAS_sextract_i64;
2010     case INDEX_op_extract2_i64:
2011         return TCG_TARGET_HAS_extract2_i64;
2012     case INDEX_op_extrl_i64_i32:
2013     case INDEX_op_extrh_i64_i32:
2014         return TCG_TARGET_HAS_extr_i64_i32;
2015     case INDEX_op_ext8s_i64:
2016         return TCG_TARGET_HAS_ext8s_i64;
2017     case INDEX_op_ext16s_i64:
2018         return TCG_TARGET_HAS_ext16s_i64;
2019     case INDEX_op_ext32s_i64:
2020         return TCG_TARGET_HAS_ext32s_i64;
2021     case INDEX_op_ext8u_i64:
2022         return TCG_TARGET_HAS_ext8u_i64;
2023     case INDEX_op_ext16u_i64:
2024         return TCG_TARGET_HAS_ext16u_i64;
2025     case INDEX_op_ext32u_i64:
2026         return TCG_TARGET_HAS_ext32u_i64;
2027     case INDEX_op_bswap16_i64:
2028         return TCG_TARGET_HAS_bswap16_i64;
2029     case INDEX_op_bswap32_i64:
2030         return TCG_TARGET_HAS_bswap32_i64;
2031     case INDEX_op_bswap64_i64:
2032         return TCG_TARGET_HAS_bswap64_i64;
2033     case INDEX_op_not_i64:
2034         return TCG_TARGET_HAS_not_i64;
2035     case INDEX_op_neg_i64:
2036         return TCG_TARGET_HAS_neg_i64;
2037     case INDEX_op_andc_i64:
2038         return TCG_TARGET_HAS_andc_i64;
2039     case INDEX_op_orc_i64:
2040         return TCG_TARGET_HAS_orc_i64;
2041     case INDEX_op_eqv_i64:
2042         return TCG_TARGET_HAS_eqv_i64;
2043     case INDEX_op_nand_i64:
2044         return TCG_TARGET_HAS_nand_i64;
2045     case INDEX_op_nor_i64:
2046         return TCG_TARGET_HAS_nor_i64;
2047     case INDEX_op_clz_i64:
2048         return TCG_TARGET_HAS_clz_i64;
2049     case INDEX_op_ctz_i64:
2050         return TCG_TARGET_HAS_ctz_i64;
2051     case INDEX_op_ctpop_i64:
2052         return TCG_TARGET_HAS_ctpop_i64;
2053     case INDEX_op_add2_i64:
2054         return TCG_TARGET_HAS_add2_i64;
2055     case INDEX_op_sub2_i64:
2056         return TCG_TARGET_HAS_sub2_i64;
2057     case INDEX_op_mulu2_i64:
2058         return TCG_TARGET_HAS_mulu2_i64;
2059     case INDEX_op_muls2_i64:
2060         return TCG_TARGET_HAS_muls2_i64;
2061     case INDEX_op_muluh_i64:
2062         return TCG_TARGET_HAS_muluh_i64;
2063     case INDEX_op_mulsh_i64:
2064         return TCG_TARGET_HAS_mulsh_i64;
2065 
2066     case INDEX_op_mov_vec:
2067     case INDEX_op_dup_vec:
2068     case INDEX_op_dupm_vec:
2069     case INDEX_op_ld_vec:
2070     case INDEX_op_st_vec:
2071     case INDEX_op_add_vec:
2072     case INDEX_op_sub_vec:
2073     case INDEX_op_and_vec:
2074     case INDEX_op_or_vec:
2075     case INDEX_op_xor_vec:
2076     case INDEX_op_cmp_vec:
2077         return have_vec;
2078     case INDEX_op_dup2_vec:
2079         return have_vec && TCG_TARGET_REG_BITS == 32;
2080     case INDEX_op_not_vec:
2081         return have_vec && TCG_TARGET_HAS_not_vec;
2082     case INDEX_op_neg_vec:
2083         return have_vec && TCG_TARGET_HAS_neg_vec;
2084     case INDEX_op_abs_vec:
2085         return have_vec && TCG_TARGET_HAS_abs_vec;
2086     case INDEX_op_andc_vec:
2087         return have_vec && TCG_TARGET_HAS_andc_vec;
2088     case INDEX_op_orc_vec:
2089         return have_vec && TCG_TARGET_HAS_orc_vec;
2090     case INDEX_op_nand_vec:
2091         return have_vec && TCG_TARGET_HAS_nand_vec;
2092     case INDEX_op_nor_vec:
2093         return have_vec && TCG_TARGET_HAS_nor_vec;
2094     case INDEX_op_eqv_vec:
2095         return have_vec && TCG_TARGET_HAS_eqv_vec;
2096     case INDEX_op_mul_vec:
2097         return have_vec && TCG_TARGET_HAS_mul_vec;
2098     case INDEX_op_shli_vec:
2099     case INDEX_op_shri_vec:
2100     case INDEX_op_sari_vec:
2101         return have_vec && TCG_TARGET_HAS_shi_vec;
2102     case INDEX_op_shls_vec:
2103     case INDEX_op_shrs_vec:
2104     case INDEX_op_sars_vec:
2105         return have_vec && TCG_TARGET_HAS_shs_vec;
2106     case INDEX_op_shlv_vec:
2107     case INDEX_op_shrv_vec:
2108     case INDEX_op_sarv_vec:
2109         return have_vec && TCG_TARGET_HAS_shv_vec;
2110     case INDEX_op_rotli_vec:
2111         return have_vec && TCG_TARGET_HAS_roti_vec;
2112     case INDEX_op_rotls_vec:
2113         return have_vec && TCG_TARGET_HAS_rots_vec;
2114     case INDEX_op_rotlv_vec:
2115     case INDEX_op_rotrv_vec:
2116         return have_vec && TCG_TARGET_HAS_rotv_vec;
2117     case INDEX_op_ssadd_vec:
2118     case INDEX_op_usadd_vec:
2119     case INDEX_op_sssub_vec:
2120     case INDEX_op_ussub_vec:
2121         return have_vec && TCG_TARGET_HAS_sat_vec;
2122     case INDEX_op_smin_vec:
2123     case INDEX_op_umin_vec:
2124     case INDEX_op_smax_vec:
2125     case INDEX_op_umax_vec:
2126         return have_vec && TCG_TARGET_HAS_minmax_vec;
2127     case INDEX_op_bitsel_vec:
2128         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2129     case INDEX_op_cmpsel_vec:
2130         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2131 
2132     default:
2133         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2134         return true;
2135     }
2136 }
2137 
2138 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2139 
2140 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2141 {
2142     TCGv_i64 extend_free[MAX_CALL_IARGS];
2143     int n_extend = 0;
2144     TCGOp *op;
2145     int i, n, pi = 0, total_args;
2146 
2147     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2148         init_call_layout(info);
2149         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2150     }
2151 
2152     total_args = info->nr_out + info->nr_in + 2;
2153     op = tcg_op_alloc(INDEX_op_call, total_args);
2154 
2155 #ifdef CONFIG_PLUGIN
2156     /* Flag helpers that may affect guest state */
2157     if (tcg_ctx->plugin_insn &&
2158         !(info->flags & TCG_CALL_PLUGIN) &&
2159         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2160         tcg_ctx->plugin_insn->calls_helpers = true;
2161     }
2162 #endif
2163 
2164     TCGOP_CALLO(op) = n = info->nr_out;
2165     switch (n) {
2166     case 0:
2167         tcg_debug_assert(ret == NULL);
2168         break;
2169     case 1:
2170         tcg_debug_assert(ret != NULL);
2171         op->args[pi++] = temp_arg(ret);
2172         break;
2173     case 2:
2174     case 4:
2175         tcg_debug_assert(ret != NULL);
2176         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2177         tcg_debug_assert(ret->temp_subindex == 0);
2178         for (i = 0; i < n; ++i) {
2179             op->args[pi++] = temp_arg(ret + i);
2180         }
2181         break;
2182     default:
2183         g_assert_not_reached();
2184     }
2185 
2186     TCGOP_CALLI(op) = n = info->nr_in;
2187     for (i = 0; i < n; i++) {
2188         const TCGCallArgumentLoc *loc = &info->in[i];
2189         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2190 
2191         switch (loc->kind) {
2192         case TCG_CALL_ARG_NORMAL:
2193         case TCG_CALL_ARG_BY_REF:
2194         case TCG_CALL_ARG_BY_REF_N:
2195             op->args[pi++] = temp_arg(ts);
2196             break;
2197 
2198         case TCG_CALL_ARG_EXTEND_U:
2199         case TCG_CALL_ARG_EXTEND_S:
2200             {
2201                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2202                 TCGv_i32 orig = temp_tcgv_i32(ts);
2203 
2204                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2205                     tcg_gen_ext_i32_i64(temp, orig);
2206                 } else {
2207                     tcg_gen_extu_i32_i64(temp, orig);
2208                 }
2209                 op->args[pi++] = tcgv_i64_arg(temp);
2210                 extend_free[n_extend++] = temp;
2211             }
2212             break;
2213 
2214         default:
2215             g_assert_not_reached();
2216         }
2217     }
2218     op->args[pi++] = (uintptr_t)info->func;
2219     op->args[pi++] = (uintptr_t)info;
2220     tcg_debug_assert(pi == total_args);
2221 
2222     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2223 
2224     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2225     for (i = 0; i < n_extend; ++i) {
2226         tcg_temp_free_i64(extend_free[i]);
2227     }
2228 }
2229 
2230 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2231 {
2232     tcg_gen_callN(info, ret, NULL);
2233 }
2234 
2235 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2236 {
2237     tcg_gen_callN(info, ret, &t1);
2238 }
2239 
2240 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2241 {
2242     TCGTemp *args[2] = { t1, t2 };
2243     tcg_gen_callN(info, ret, args);
2244 }
2245 
2246 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2247                    TCGTemp *t2, TCGTemp *t3)
2248 {
2249     TCGTemp *args[3] = { t1, t2, t3 };
2250     tcg_gen_callN(info, ret, args);
2251 }
2252 
2253 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2254                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2255 {
2256     TCGTemp *args[4] = { t1, t2, t3, t4 };
2257     tcg_gen_callN(info, ret, args);
2258 }
2259 
2260 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2261                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2262 {
2263     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2264     tcg_gen_callN(info, ret, args);
2265 }
2266 
2267 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2268                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2269 {
2270     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2271     tcg_gen_callN(info, ret, args);
2272 }
2273 
2274 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2275                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2276                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2277 {
2278     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2279     tcg_gen_callN(info, ret, args);
2280 }
2281 
2282 static void tcg_reg_alloc_start(TCGContext *s)
2283 {
2284     int i, n;
2285 
2286     for (i = 0, n = s->nb_temps; i < n; i++) {
2287         TCGTemp *ts = &s->temps[i];
2288         TCGTempVal val = TEMP_VAL_MEM;
2289 
2290         switch (ts->kind) {
2291         case TEMP_CONST:
2292             val = TEMP_VAL_CONST;
2293             break;
2294         case TEMP_FIXED:
2295             val = TEMP_VAL_REG;
2296             break;
2297         case TEMP_GLOBAL:
2298             break;
2299         case TEMP_EBB:
2300             val = TEMP_VAL_DEAD;
2301             /* fall through */
2302         case TEMP_TB:
2303             ts->mem_allocated = 0;
2304             break;
2305         default:
2306             g_assert_not_reached();
2307         }
2308         ts->val_type = val;
2309     }
2310 
2311     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2312 }
2313 
2314 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2315                                  TCGTemp *ts)
2316 {
2317     int idx = temp_idx(ts);
2318 
2319     switch (ts->kind) {
2320     case TEMP_FIXED:
2321     case TEMP_GLOBAL:
2322         pstrcpy(buf, buf_size, ts->name);
2323         break;
2324     case TEMP_TB:
2325         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2326         break;
2327     case TEMP_EBB:
2328         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2329         break;
2330     case TEMP_CONST:
2331         switch (ts->type) {
2332         case TCG_TYPE_I32:
2333             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2334             break;
2335 #if TCG_TARGET_REG_BITS > 32
2336         case TCG_TYPE_I64:
2337             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2338             break;
2339 #endif
2340         case TCG_TYPE_V64:
2341         case TCG_TYPE_V128:
2342         case TCG_TYPE_V256:
2343             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2344                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2345             break;
2346         default:
2347             g_assert_not_reached();
2348         }
2349         break;
2350     }
2351     return buf;
2352 }
2353 
2354 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2355                              int buf_size, TCGArg arg)
2356 {
2357     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2358 }
2359 
2360 static const char * const cond_name[] =
2361 {
2362     [TCG_COND_NEVER] = "never",
2363     [TCG_COND_ALWAYS] = "always",
2364     [TCG_COND_EQ] = "eq",
2365     [TCG_COND_NE] = "ne",
2366     [TCG_COND_LT] = "lt",
2367     [TCG_COND_GE] = "ge",
2368     [TCG_COND_LE] = "le",
2369     [TCG_COND_GT] = "gt",
2370     [TCG_COND_LTU] = "ltu",
2371     [TCG_COND_GEU] = "geu",
2372     [TCG_COND_LEU] = "leu",
2373     [TCG_COND_GTU] = "gtu"
2374 };
2375 
2376 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2377 {
2378     [MO_UB]   = "ub",
2379     [MO_SB]   = "sb",
2380     [MO_LEUW] = "leuw",
2381     [MO_LESW] = "lesw",
2382     [MO_LEUL] = "leul",
2383     [MO_LESL] = "lesl",
2384     [MO_LEUQ] = "leq",
2385     [MO_BEUW] = "beuw",
2386     [MO_BESW] = "besw",
2387     [MO_BEUL] = "beul",
2388     [MO_BESL] = "besl",
2389     [MO_BEUQ] = "beq",
2390     [MO_128 + MO_BE] = "beo",
2391     [MO_128 + MO_LE] = "leo",
2392 };
2393 
2394 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2395     [MO_UNALN >> MO_ASHIFT]    = "un+",
2396     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2397     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2398     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2399     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2400     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2401     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2402     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2403 };
2404 
2405 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2406     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2407     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2408     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2409     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2410     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2411     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2412 };
2413 
2414 static const char bswap_flag_name[][6] = {
2415     [TCG_BSWAP_IZ] = "iz",
2416     [TCG_BSWAP_OZ] = "oz",
2417     [TCG_BSWAP_OS] = "os",
2418     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2419     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2420 };
2421 
2422 static inline bool tcg_regset_single(TCGRegSet d)
2423 {
2424     return (d & (d - 1)) == 0;
2425 }
2426 
2427 static inline TCGReg tcg_regset_first(TCGRegSet d)
2428 {
2429     if (TCG_TARGET_NB_REGS <= 32) {
2430         return ctz32(d);
2431     } else {
2432         return ctz64(d);
2433     }
2434 }
2435 
2436 /* Return only the number of characters output -- no error return. */
2437 #define ne_fprintf(...) \
2438     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2439 
2440 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2441 {
2442     char buf[128];
2443     TCGOp *op;
2444 
2445     QTAILQ_FOREACH(op, &s->ops, link) {
2446         int i, k, nb_oargs, nb_iargs, nb_cargs;
2447         const TCGOpDef *def;
2448         TCGOpcode c;
2449         int col = 0;
2450 
2451         c = op->opc;
2452         def = &tcg_op_defs[c];
2453 
2454         if (c == INDEX_op_insn_start) {
2455             nb_oargs = 0;
2456             col += ne_fprintf(f, "\n ----");
2457 
2458             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2459                 col += ne_fprintf(f, " %016" PRIx64,
2460                                   tcg_get_insn_start_param(op, i));
2461             }
2462         } else if (c == INDEX_op_call) {
2463             const TCGHelperInfo *info = tcg_call_info(op);
2464             void *func = tcg_call_func(op);
2465 
2466             /* variable number of arguments */
2467             nb_oargs = TCGOP_CALLO(op);
2468             nb_iargs = TCGOP_CALLI(op);
2469             nb_cargs = def->nb_cargs;
2470 
2471             col += ne_fprintf(f, " %s ", def->name);
2472 
2473             /*
2474              * Print the function name from TCGHelperInfo, if available.
2475              * Note that plugins have a template function for the info,
2476              * but the actual function pointer comes from the plugin.
2477              */
2478             if (func == info->func) {
2479                 col += ne_fprintf(f, "%s", info->name);
2480             } else {
2481                 col += ne_fprintf(f, "plugin(%p)", func);
2482             }
2483 
2484             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2485             for (i = 0; i < nb_oargs; i++) {
2486                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2487                                                             op->args[i]));
2488             }
2489             for (i = 0; i < nb_iargs; i++) {
2490                 TCGArg arg = op->args[nb_oargs + i];
2491                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2492                 col += ne_fprintf(f, ",%s", t);
2493             }
2494         } else {
2495             col += ne_fprintf(f, " %s ", def->name);
2496 
2497             nb_oargs = def->nb_oargs;
2498             nb_iargs = def->nb_iargs;
2499             nb_cargs = def->nb_cargs;
2500 
2501             if (def->flags & TCG_OPF_VECTOR) {
2502                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2503                                   8 << TCGOP_VECE(op));
2504             }
2505 
2506             k = 0;
2507             for (i = 0; i < nb_oargs; i++) {
2508                 const char *sep =  k ? "," : "";
2509                 col += ne_fprintf(f, "%s%s", sep,
2510                                   tcg_get_arg_str(s, buf, sizeof(buf),
2511                                                   op->args[k++]));
2512             }
2513             for (i = 0; i < nb_iargs; i++) {
2514                 const char *sep =  k ? "," : "";
2515                 col += ne_fprintf(f, "%s%s", sep,
2516                                   tcg_get_arg_str(s, buf, sizeof(buf),
2517                                                   op->args[k++]));
2518             }
2519             switch (c) {
2520             case INDEX_op_brcond_i32:
2521             case INDEX_op_setcond_i32:
2522             case INDEX_op_negsetcond_i32:
2523             case INDEX_op_movcond_i32:
2524             case INDEX_op_brcond2_i32:
2525             case INDEX_op_setcond2_i32:
2526             case INDEX_op_brcond_i64:
2527             case INDEX_op_setcond_i64:
2528             case INDEX_op_negsetcond_i64:
2529             case INDEX_op_movcond_i64:
2530             case INDEX_op_cmp_vec:
2531             case INDEX_op_cmpsel_vec:
2532                 if (op->args[k] < ARRAY_SIZE(cond_name)
2533                     && cond_name[op->args[k]]) {
2534                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2535                 } else {
2536                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2537                 }
2538                 i = 1;
2539                 break;
2540             case INDEX_op_qemu_ld_a32_i32:
2541             case INDEX_op_qemu_ld_a64_i32:
2542             case INDEX_op_qemu_st_a32_i32:
2543             case INDEX_op_qemu_st_a64_i32:
2544             case INDEX_op_qemu_st8_a32_i32:
2545             case INDEX_op_qemu_st8_a64_i32:
2546             case INDEX_op_qemu_ld_a32_i64:
2547             case INDEX_op_qemu_ld_a64_i64:
2548             case INDEX_op_qemu_st_a32_i64:
2549             case INDEX_op_qemu_st_a64_i64:
2550             case INDEX_op_qemu_ld_a32_i128:
2551             case INDEX_op_qemu_ld_a64_i128:
2552             case INDEX_op_qemu_st_a32_i128:
2553             case INDEX_op_qemu_st_a64_i128:
2554                 {
2555                     const char *s_al, *s_op, *s_at;
2556                     MemOpIdx oi = op->args[k++];
2557                     MemOp mop = get_memop(oi);
2558                     unsigned ix = get_mmuidx(oi);
2559 
2560                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2561                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2562                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2563                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2564 
2565                     /* If all fields are accounted for, print symbolically. */
2566                     if (!mop && s_al && s_op && s_at) {
2567                         col += ne_fprintf(f, ",%s%s%s,%u",
2568                                           s_at, s_al, s_op, ix);
2569                     } else {
2570                         mop = get_memop(oi);
2571                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2572                     }
2573                     i = 1;
2574                 }
2575                 break;
2576             case INDEX_op_bswap16_i32:
2577             case INDEX_op_bswap16_i64:
2578             case INDEX_op_bswap32_i32:
2579             case INDEX_op_bswap32_i64:
2580             case INDEX_op_bswap64_i64:
2581                 {
2582                     TCGArg flags = op->args[k];
2583                     const char *name = NULL;
2584 
2585                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2586                         name = bswap_flag_name[flags];
2587                     }
2588                     if (name) {
2589                         col += ne_fprintf(f, ",%s", name);
2590                     } else {
2591                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2592                     }
2593                     i = k = 1;
2594                 }
2595                 break;
2596             default:
2597                 i = 0;
2598                 break;
2599             }
2600             switch (c) {
2601             case INDEX_op_set_label:
2602             case INDEX_op_br:
2603             case INDEX_op_brcond_i32:
2604             case INDEX_op_brcond_i64:
2605             case INDEX_op_brcond2_i32:
2606                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2607                                   arg_label(op->args[k])->id);
2608                 i++, k++;
2609                 break;
2610             case INDEX_op_mb:
2611                 {
2612                     TCGBar membar = op->args[k];
2613                     const char *b_op, *m_op;
2614 
2615                     switch (membar & TCG_BAR_SC) {
2616                     case 0:
2617                         b_op = "none";
2618                         break;
2619                     case TCG_BAR_LDAQ:
2620                         b_op = "acq";
2621                         break;
2622                     case TCG_BAR_STRL:
2623                         b_op = "rel";
2624                         break;
2625                     case TCG_BAR_SC:
2626                         b_op = "seq";
2627                         break;
2628                     default:
2629                         g_assert_not_reached();
2630                     }
2631 
2632                     switch (membar & TCG_MO_ALL) {
2633                     case 0:
2634                         m_op = "none";
2635                         break;
2636                     case TCG_MO_LD_LD:
2637                         m_op = "rr";
2638                         break;
2639                     case TCG_MO_LD_ST:
2640                         m_op = "rw";
2641                         break;
2642                     case TCG_MO_ST_LD:
2643                         m_op = "wr";
2644                         break;
2645                     case TCG_MO_ST_ST:
2646                         m_op = "ww";
2647                         break;
2648                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2649                         m_op = "rr+rw";
2650                         break;
2651                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2652                         m_op = "rr+wr";
2653                         break;
2654                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2655                         m_op = "rr+ww";
2656                         break;
2657                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2658                         m_op = "rw+wr";
2659                         break;
2660                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2661                         m_op = "rw+ww";
2662                         break;
2663                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2664                         m_op = "wr+ww";
2665                         break;
2666                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2667                         m_op = "rr+rw+wr";
2668                         break;
2669                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2670                         m_op = "rr+rw+ww";
2671                         break;
2672                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2673                         m_op = "rr+wr+ww";
2674                         break;
2675                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2676                         m_op = "rw+wr+ww";
2677                         break;
2678                     case TCG_MO_ALL:
2679                         m_op = "all";
2680                         break;
2681                     default:
2682                         g_assert_not_reached();
2683                     }
2684 
2685                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2686                     i++, k++;
2687                 }
2688                 break;
2689             default:
2690                 break;
2691             }
2692             for (; i < nb_cargs; i++, k++) {
2693                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2694                                   op->args[k]);
2695             }
2696         }
2697 
2698         if (have_prefs || op->life) {
2699             for (; col < 40; ++col) {
2700                 putc(' ', f);
2701             }
2702         }
2703 
2704         if (op->life) {
2705             unsigned life = op->life;
2706 
2707             if (life & (SYNC_ARG * 3)) {
2708                 ne_fprintf(f, "  sync:");
2709                 for (i = 0; i < 2; ++i) {
2710                     if (life & (SYNC_ARG << i)) {
2711                         ne_fprintf(f, " %d", i);
2712                     }
2713                 }
2714             }
2715             life /= DEAD_ARG;
2716             if (life) {
2717                 ne_fprintf(f, "  dead:");
2718                 for (i = 0; life; ++i, life >>= 1) {
2719                     if (life & 1) {
2720                         ne_fprintf(f, " %d", i);
2721                     }
2722                 }
2723             }
2724         }
2725 
2726         if (have_prefs) {
2727             for (i = 0; i < nb_oargs; ++i) {
2728                 TCGRegSet set = output_pref(op, i);
2729 
2730                 if (i == 0) {
2731                     ne_fprintf(f, "  pref=");
2732                 } else {
2733                     ne_fprintf(f, ",");
2734                 }
2735                 if (set == 0) {
2736                     ne_fprintf(f, "none");
2737                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2738                     ne_fprintf(f, "all");
2739 #ifdef CONFIG_DEBUG_TCG
2740                 } else if (tcg_regset_single(set)) {
2741                     TCGReg reg = tcg_regset_first(set);
2742                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2743 #endif
2744                 } else if (TCG_TARGET_NB_REGS <= 32) {
2745                     ne_fprintf(f, "0x%x", (uint32_t)set);
2746                 } else {
2747                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2748                 }
2749             }
2750         }
2751 
2752         putc('\n', f);
2753     }
2754 }
2755 
2756 /* we give more priority to constraints with less registers */
2757 static int get_constraint_priority(const TCGOpDef *def, int k)
2758 {
2759     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2760     int n = ctpop64(arg_ct->regs);
2761 
2762     /*
2763      * Sort constraints of a single register first, which includes output
2764      * aliases (which must exactly match the input already allocated).
2765      */
2766     if (n == 1 || arg_ct->oalias) {
2767         return INT_MAX;
2768     }
2769 
2770     /*
2771      * Sort register pairs next, first then second immediately after.
2772      * Arbitrarily sort multiple pairs by the index of the first reg;
2773      * there shouldn't be many pairs.
2774      */
2775     switch (arg_ct->pair) {
2776     case 1:
2777     case 3:
2778         return (k + 1) * 2;
2779     case 2:
2780         return (arg_ct->pair_index + 1) * 2 - 1;
2781     }
2782 
2783     /* Finally, sort by decreasing register count. */
2784     assert(n > 1);
2785     return -n;
2786 }
2787 
2788 /* sort from highest priority to lowest */
2789 static void sort_constraints(TCGOpDef *def, int start, int n)
2790 {
2791     int i, j;
2792     TCGArgConstraint *a = def->args_ct;
2793 
2794     for (i = 0; i < n; i++) {
2795         a[start + i].sort_index = start + i;
2796     }
2797     if (n <= 1) {
2798         return;
2799     }
2800     for (i = 0; i < n - 1; i++) {
2801         for (j = i + 1; j < n; j++) {
2802             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2803             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2804             if (p1 < p2) {
2805                 int tmp = a[start + i].sort_index;
2806                 a[start + i].sort_index = a[start + j].sort_index;
2807                 a[start + j].sort_index = tmp;
2808             }
2809         }
2810     }
2811 }
2812 
2813 static void process_op_defs(TCGContext *s)
2814 {
2815     TCGOpcode op;
2816 
2817     for (op = 0; op < NB_OPS; op++) {
2818         TCGOpDef *def = &tcg_op_defs[op];
2819         const TCGTargetOpDef *tdefs;
2820         bool saw_alias_pair = false;
2821         int i, o, i2, o2, nb_args;
2822 
2823         if (def->flags & TCG_OPF_NOT_PRESENT) {
2824             continue;
2825         }
2826 
2827         nb_args = def->nb_iargs + def->nb_oargs;
2828         if (nb_args == 0) {
2829             continue;
2830         }
2831 
2832         /*
2833          * Macro magic should make it impossible, but double-check that
2834          * the array index is in range.  Since the signness of an enum
2835          * is implementation defined, force the result to unsigned.
2836          */
2837         unsigned con_set = tcg_target_op_def(op);
2838         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2839         tdefs = &constraint_sets[con_set];
2840 
2841         for (i = 0; i < nb_args; i++) {
2842             const char *ct_str = tdefs->args_ct_str[i];
2843             bool input_p = i >= def->nb_oargs;
2844 
2845             /* Incomplete TCGTargetOpDef entry. */
2846             tcg_debug_assert(ct_str != NULL);
2847 
2848             switch (*ct_str) {
2849             case '0' ... '9':
2850                 o = *ct_str - '0';
2851                 tcg_debug_assert(input_p);
2852                 tcg_debug_assert(o < def->nb_oargs);
2853                 tcg_debug_assert(def->args_ct[o].regs != 0);
2854                 tcg_debug_assert(!def->args_ct[o].oalias);
2855                 def->args_ct[i] = def->args_ct[o];
2856                 /* The output sets oalias.  */
2857                 def->args_ct[o].oalias = 1;
2858                 def->args_ct[o].alias_index = i;
2859                 /* The input sets ialias. */
2860                 def->args_ct[i].ialias = 1;
2861                 def->args_ct[i].alias_index = o;
2862                 if (def->args_ct[i].pair) {
2863                     saw_alias_pair = true;
2864                 }
2865                 tcg_debug_assert(ct_str[1] == '\0');
2866                 continue;
2867 
2868             case '&':
2869                 tcg_debug_assert(!input_p);
2870                 def->args_ct[i].newreg = true;
2871                 ct_str++;
2872                 break;
2873 
2874             case 'p': /* plus */
2875                 /* Allocate to the register after the previous. */
2876                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2877                 o = i - 1;
2878                 tcg_debug_assert(!def->args_ct[o].pair);
2879                 tcg_debug_assert(!def->args_ct[o].ct);
2880                 def->args_ct[i] = (TCGArgConstraint){
2881                     .pair = 2,
2882                     .pair_index = o,
2883                     .regs = def->args_ct[o].regs << 1,
2884                 };
2885                 def->args_ct[o].pair = 1;
2886                 def->args_ct[o].pair_index = i;
2887                 tcg_debug_assert(ct_str[1] == '\0');
2888                 continue;
2889 
2890             case 'm': /* minus */
2891                 /* Allocate to the register before the previous. */
2892                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2893                 o = i - 1;
2894                 tcg_debug_assert(!def->args_ct[o].pair);
2895                 tcg_debug_assert(!def->args_ct[o].ct);
2896                 def->args_ct[i] = (TCGArgConstraint){
2897                     .pair = 1,
2898                     .pair_index = o,
2899                     .regs = def->args_ct[o].regs >> 1,
2900                 };
2901                 def->args_ct[o].pair = 2;
2902                 def->args_ct[o].pair_index = i;
2903                 tcg_debug_assert(ct_str[1] == '\0');
2904                 continue;
2905             }
2906 
2907             do {
2908                 switch (*ct_str) {
2909                 case 'i':
2910                     def->args_ct[i].ct |= TCG_CT_CONST;
2911                     break;
2912 
2913                 /* Include all of the target-specific constraints. */
2914 
2915 #undef CONST
2916 #define CONST(CASE, MASK) \
2917     case CASE: def->args_ct[i].ct |= MASK; break;
2918 #define REGS(CASE, MASK) \
2919     case CASE: def->args_ct[i].regs |= MASK; break;
2920 
2921 #include "tcg-target-con-str.h"
2922 
2923 #undef REGS
2924 #undef CONST
2925                 default:
2926                 case '0' ... '9':
2927                 case '&':
2928                 case 'p':
2929                 case 'm':
2930                     /* Typo in TCGTargetOpDef constraint. */
2931                     g_assert_not_reached();
2932                 }
2933             } while (*++ct_str != '\0');
2934         }
2935 
2936         /* TCGTargetOpDef entry with too much information? */
2937         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2938 
2939         /*
2940          * Fix up output pairs that are aliased with inputs.
2941          * When we created the alias, we copied pair from the output.
2942          * There are three cases:
2943          *    (1a) Pairs of inputs alias pairs of outputs.
2944          *    (1b) One input aliases the first of a pair of outputs.
2945          *    (2)  One input aliases the second of a pair of outputs.
2946          *
2947          * Case 1a is handled by making sure that the pair_index'es are
2948          * properly updated so that they appear the same as a pair of inputs.
2949          *
2950          * Case 1b is handled by setting the pair_index of the input to
2951          * itself, simply so it doesn't point to an unrelated argument.
2952          * Since we don't encounter the "second" during the input allocation
2953          * phase, nothing happens with the second half of the input pair.
2954          *
2955          * Case 2 is handled by setting the second input to pair=3, the
2956          * first output to pair=3, and the pair_index'es to match.
2957          */
2958         if (saw_alias_pair) {
2959             for (i = def->nb_oargs; i < nb_args; i++) {
2960                 /*
2961                  * Since [0-9pm] must be alone in the constraint string,
2962                  * the only way they can both be set is if the pair comes
2963                  * from the output alias.
2964                  */
2965                 if (!def->args_ct[i].ialias) {
2966                     continue;
2967                 }
2968                 switch (def->args_ct[i].pair) {
2969                 case 0:
2970                     break;
2971                 case 1:
2972                     o = def->args_ct[i].alias_index;
2973                     o2 = def->args_ct[o].pair_index;
2974                     tcg_debug_assert(def->args_ct[o].pair == 1);
2975                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2976                     if (def->args_ct[o2].oalias) {
2977                         /* Case 1a */
2978                         i2 = def->args_ct[o2].alias_index;
2979                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2980                         def->args_ct[i2].pair_index = i;
2981                         def->args_ct[i].pair_index = i2;
2982                     } else {
2983                         /* Case 1b */
2984                         def->args_ct[i].pair_index = i;
2985                     }
2986                     break;
2987                 case 2:
2988                     o = def->args_ct[i].alias_index;
2989                     o2 = def->args_ct[o].pair_index;
2990                     tcg_debug_assert(def->args_ct[o].pair == 2);
2991                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2992                     if (def->args_ct[o2].oalias) {
2993                         /* Case 1a */
2994                         i2 = def->args_ct[o2].alias_index;
2995                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2996                         def->args_ct[i2].pair_index = i;
2997                         def->args_ct[i].pair_index = i2;
2998                     } else {
2999                         /* Case 2 */
3000                         def->args_ct[i].pair = 3;
3001                         def->args_ct[o2].pair = 3;
3002                         def->args_ct[i].pair_index = o2;
3003                         def->args_ct[o2].pair_index = i;
3004                     }
3005                     break;
3006                 default:
3007                     g_assert_not_reached();
3008                 }
3009             }
3010         }
3011 
3012         /* sort the constraints (XXX: this is just an heuristic) */
3013         sort_constraints(def, 0, def->nb_oargs);
3014         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3015     }
3016 }
3017 
3018 static void remove_label_use(TCGOp *op, int idx)
3019 {
3020     TCGLabel *label = arg_label(op->args[idx]);
3021     TCGLabelUse *use;
3022 
3023     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3024         if (use->op == op) {
3025             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3026             return;
3027         }
3028     }
3029     g_assert_not_reached();
3030 }
3031 
3032 void tcg_op_remove(TCGContext *s, TCGOp *op)
3033 {
3034     switch (op->opc) {
3035     case INDEX_op_br:
3036         remove_label_use(op, 0);
3037         break;
3038     case INDEX_op_brcond_i32:
3039     case INDEX_op_brcond_i64:
3040         remove_label_use(op, 3);
3041         break;
3042     case INDEX_op_brcond2_i32:
3043         remove_label_use(op, 5);
3044         break;
3045     default:
3046         break;
3047     }
3048 
3049     QTAILQ_REMOVE(&s->ops, op, link);
3050     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3051     s->nb_ops--;
3052 }
3053 
3054 void tcg_remove_ops_after(TCGOp *op)
3055 {
3056     TCGContext *s = tcg_ctx;
3057 
3058     while (true) {
3059         TCGOp *last = tcg_last_op();
3060         if (last == op) {
3061             return;
3062         }
3063         tcg_op_remove(s, last);
3064     }
3065 }
3066 
3067 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3068 {
3069     TCGContext *s = tcg_ctx;
3070     TCGOp *op = NULL;
3071 
3072     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3073         QTAILQ_FOREACH(op, &s->free_ops, link) {
3074             if (nargs <= op->nargs) {
3075                 QTAILQ_REMOVE(&s->free_ops, op, link);
3076                 nargs = op->nargs;
3077                 goto found;
3078             }
3079         }
3080     }
3081 
3082     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3083     nargs = MAX(4, nargs);
3084     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3085 
3086  found:
3087     memset(op, 0, offsetof(TCGOp, link));
3088     op->opc = opc;
3089     op->nargs = nargs;
3090 
3091     /* Check for bitfield overflow. */
3092     tcg_debug_assert(op->nargs == nargs);
3093 
3094     s->nb_ops++;
3095     return op;
3096 }
3097 
3098 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3099 {
3100     TCGOp *op = tcg_op_alloc(opc, nargs);
3101     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3102     return op;
3103 }
3104 
3105 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3106                             TCGOpcode opc, unsigned nargs)
3107 {
3108     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3109     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3110     return new_op;
3111 }
3112 
3113 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3114                            TCGOpcode opc, unsigned nargs)
3115 {
3116     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3117     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3118     return new_op;
3119 }
3120 
3121 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3122 {
3123     TCGLabelUse *u;
3124 
3125     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3126         TCGOp *op = u->op;
3127         switch (op->opc) {
3128         case INDEX_op_br:
3129             op->args[0] = label_arg(to);
3130             break;
3131         case INDEX_op_brcond_i32:
3132         case INDEX_op_brcond_i64:
3133             op->args[3] = label_arg(to);
3134             break;
3135         case INDEX_op_brcond2_i32:
3136             op->args[5] = label_arg(to);
3137             break;
3138         default:
3139             g_assert_not_reached();
3140         }
3141     }
3142 
3143     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3144 }
3145 
3146 /* Reachable analysis : remove unreachable code.  */
3147 static void __attribute__((noinline))
3148 reachable_code_pass(TCGContext *s)
3149 {
3150     TCGOp *op, *op_next, *op_prev;
3151     bool dead = false;
3152 
3153     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3154         bool remove = dead;
3155         TCGLabel *label;
3156 
3157         switch (op->opc) {
3158         case INDEX_op_set_label:
3159             label = arg_label(op->args[0]);
3160 
3161             /*
3162              * Note that the first op in the TB is always a load,
3163              * so there is always something before a label.
3164              */
3165             op_prev = QTAILQ_PREV(op, link);
3166 
3167             /*
3168              * If we find two sequential labels, move all branches to
3169              * reference the second label and remove the first label.
3170              * Do this before branch to next optimization, so that the
3171              * middle label is out of the way.
3172              */
3173             if (op_prev->opc == INDEX_op_set_label) {
3174                 move_label_uses(label, arg_label(op_prev->args[0]));
3175                 tcg_op_remove(s, op_prev);
3176                 op_prev = QTAILQ_PREV(op, link);
3177             }
3178 
3179             /*
3180              * Optimization can fold conditional branches to unconditional.
3181              * If we find a label which is preceded by an unconditional
3182              * branch to next, remove the branch.  We couldn't do this when
3183              * processing the branch because any dead code between the branch
3184              * and label had not yet been removed.
3185              */
3186             if (op_prev->opc == INDEX_op_br &&
3187                 label == arg_label(op_prev->args[0])) {
3188                 tcg_op_remove(s, op_prev);
3189                 /* Fall through means insns become live again.  */
3190                 dead = false;
3191             }
3192 
3193             if (QSIMPLEQ_EMPTY(&label->branches)) {
3194                 /*
3195                  * While there is an occasional backward branch, virtually
3196                  * all branches generated by the translators are forward.
3197                  * Which means that generally we will have already removed
3198                  * all references to the label that will be, and there is
3199                  * little to be gained by iterating.
3200                  */
3201                 remove = true;
3202             } else {
3203                 /* Once we see a label, insns become live again.  */
3204                 dead = false;
3205                 remove = false;
3206             }
3207             break;
3208 
3209         case INDEX_op_br:
3210         case INDEX_op_exit_tb:
3211         case INDEX_op_goto_ptr:
3212             /* Unconditional branches; everything following is dead.  */
3213             dead = true;
3214             break;
3215 
3216         case INDEX_op_call:
3217             /* Notice noreturn helper calls, raising exceptions.  */
3218             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3219                 dead = true;
3220             }
3221             break;
3222 
3223         case INDEX_op_insn_start:
3224             /* Never remove -- we need to keep these for unwind.  */
3225             remove = false;
3226             break;
3227 
3228         default:
3229             break;
3230         }
3231 
3232         if (remove) {
3233             tcg_op_remove(s, op);
3234         }
3235     }
3236 }
3237 
3238 #define TS_DEAD  1
3239 #define TS_MEM   2
3240 
3241 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3242 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3243 
3244 /* For liveness_pass_1, the register preferences for a given temp.  */
3245 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3246 {
3247     return ts->state_ptr;
3248 }
3249 
3250 /* For liveness_pass_1, reset the preferences for a given temp to the
3251  * maximal regset for its type.
3252  */
3253 static inline void la_reset_pref(TCGTemp *ts)
3254 {
3255     *la_temp_pref(ts)
3256         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3257 }
3258 
3259 /* liveness analysis: end of function: all temps are dead, and globals
3260    should be in memory. */
3261 static void la_func_end(TCGContext *s, int ng, int nt)
3262 {
3263     int i;
3264 
3265     for (i = 0; i < ng; ++i) {
3266         s->temps[i].state = TS_DEAD | TS_MEM;
3267         la_reset_pref(&s->temps[i]);
3268     }
3269     for (i = ng; i < nt; ++i) {
3270         s->temps[i].state = TS_DEAD;
3271         la_reset_pref(&s->temps[i]);
3272     }
3273 }
3274 
3275 /* liveness analysis: end of basic block: all temps are dead, globals
3276    and local temps should be in memory. */
3277 static void la_bb_end(TCGContext *s, int ng, int nt)
3278 {
3279     int i;
3280 
3281     for (i = 0; i < nt; ++i) {
3282         TCGTemp *ts = &s->temps[i];
3283         int state;
3284 
3285         switch (ts->kind) {
3286         case TEMP_FIXED:
3287         case TEMP_GLOBAL:
3288         case TEMP_TB:
3289             state = TS_DEAD | TS_MEM;
3290             break;
3291         case TEMP_EBB:
3292         case TEMP_CONST:
3293             state = TS_DEAD;
3294             break;
3295         default:
3296             g_assert_not_reached();
3297         }
3298         ts->state = state;
3299         la_reset_pref(ts);
3300     }
3301 }
3302 
3303 /* liveness analysis: sync globals back to memory.  */
3304 static void la_global_sync(TCGContext *s, int ng)
3305 {
3306     int i;
3307 
3308     for (i = 0; i < ng; ++i) {
3309         int state = s->temps[i].state;
3310         s->temps[i].state = state | TS_MEM;
3311         if (state == TS_DEAD) {
3312             /* If the global was previously dead, reset prefs.  */
3313             la_reset_pref(&s->temps[i]);
3314         }
3315     }
3316 }
3317 
3318 /*
3319  * liveness analysis: conditional branch: all temps are dead unless
3320  * explicitly live-across-conditional-branch, globals and local temps
3321  * should be synced.
3322  */
3323 static void la_bb_sync(TCGContext *s, int ng, int nt)
3324 {
3325     la_global_sync(s, ng);
3326 
3327     for (int i = ng; i < nt; ++i) {
3328         TCGTemp *ts = &s->temps[i];
3329         int state;
3330 
3331         switch (ts->kind) {
3332         case TEMP_TB:
3333             state = ts->state;
3334             ts->state = state | TS_MEM;
3335             if (state != TS_DEAD) {
3336                 continue;
3337             }
3338             break;
3339         case TEMP_EBB:
3340         case TEMP_CONST:
3341             continue;
3342         default:
3343             g_assert_not_reached();
3344         }
3345         la_reset_pref(&s->temps[i]);
3346     }
3347 }
3348 
3349 /* liveness analysis: sync globals back to memory and kill.  */
3350 static void la_global_kill(TCGContext *s, int ng)
3351 {
3352     int i;
3353 
3354     for (i = 0; i < ng; i++) {
3355         s->temps[i].state = TS_DEAD | TS_MEM;
3356         la_reset_pref(&s->temps[i]);
3357     }
3358 }
3359 
3360 /* liveness analysis: note live globals crossing calls.  */
3361 static void la_cross_call(TCGContext *s, int nt)
3362 {
3363     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3364     int i;
3365 
3366     for (i = 0; i < nt; i++) {
3367         TCGTemp *ts = &s->temps[i];
3368         if (!(ts->state & TS_DEAD)) {
3369             TCGRegSet *pset = la_temp_pref(ts);
3370             TCGRegSet set = *pset;
3371 
3372             set &= mask;
3373             /* If the combination is not possible, restart.  */
3374             if (set == 0) {
3375                 set = tcg_target_available_regs[ts->type] & mask;
3376             }
3377             *pset = set;
3378         }
3379     }
3380 }
3381 
3382 /*
3383  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3384  * to TEMP_EBB, if possible.
3385  */
3386 static void __attribute__((noinline))
3387 liveness_pass_0(TCGContext *s)
3388 {
3389     void * const multiple_ebb = (void *)(uintptr_t)-1;
3390     int nb_temps = s->nb_temps;
3391     TCGOp *op, *ebb;
3392 
3393     for (int i = s->nb_globals; i < nb_temps; ++i) {
3394         s->temps[i].state_ptr = NULL;
3395     }
3396 
3397     /*
3398      * Represent each EBB by the op at which it begins.  In the case of
3399      * the first EBB, this is the first op, otherwise it is a label.
3400      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3401      * within a single EBB, else MULTIPLE_EBB.
3402      */
3403     ebb = QTAILQ_FIRST(&s->ops);
3404     QTAILQ_FOREACH(op, &s->ops, link) {
3405         const TCGOpDef *def;
3406         int nb_oargs, nb_iargs;
3407 
3408         switch (op->opc) {
3409         case INDEX_op_set_label:
3410             ebb = op;
3411             continue;
3412         case INDEX_op_discard:
3413             continue;
3414         case INDEX_op_call:
3415             nb_oargs = TCGOP_CALLO(op);
3416             nb_iargs = TCGOP_CALLI(op);
3417             break;
3418         default:
3419             def = &tcg_op_defs[op->opc];
3420             nb_oargs = def->nb_oargs;
3421             nb_iargs = def->nb_iargs;
3422             break;
3423         }
3424 
3425         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3426             TCGTemp *ts = arg_temp(op->args[i]);
3427 
3428             if (ts->kind != TEMP_TB) {
3429                 continue;
3430             }
3431             if (ts->state_ptr == NULL) {
3432                 ts->state_ptr = ebb;
3433             } else if (ts->state_ptr != ebb) {
3434                 ts->state_ptr = multiple_ebb;
3435             }
3436         }
3437     }
3438 
3439     /*
3440      * For TEMP_TB that turned out not to be used beyond one EBB,
3441      * reduce the liveness to TEMP_EBB.
3442      */
3443     for (int i = s->nb_globals; i < nb_temps; ++i) {
3444         TCGTemp *ts = &s->temps[i];
3445         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3446             ts->kind = TEMP_EBB;
3447         }
3448     }
3449 }
3450 
3451 /* Liveness analysis : update the opc_arg_life array to tell if a
3452    given input arguments is dead. Instructions updating dead
3453    temporaries are removed. */
3454 static void __attribute__((noinline))
3455 liveness_pass_1(TCGContext *s)
3456 {
3457     int nb_globals = s->nb_globals;
3458     int nb_temps = s->nb_temps;
3459     TCGOp *op, *op_prev;
3460     TCGRegSet *prefs;
3461     int i;
3462 
3463     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3464     for (i = 0; i < nb_temps; ++i) {
3465         s->temps[i].state_ptr = prefs + i;
3466     }
3467 
3468     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3469     la_func_end(s, nb_globals, nb_temps);
3470 
3471     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3472         int nb_iargs, nb_oargs;
3473         TCGOpcode opc_new, opc_new2;
3474         bool have_opc_new2;
3475         TCGLifeData arg_life = 0;
3476         TCGTemp *ts;
3477         TCGOpcode opc = op->opc;
3478         const TCGOpDef *def = &tcg_op_defs[opc];
3479 
3480         switch (opc) {
3481         case INDEX_op_call:
3482             {
3483                 const TCGHelperInfo *info = tcg_call_info(op);
3484                 int call_flags = tcg_call_flags(op);
3485 
3486                 nb_oargs = TCGOP_CALLO(op);
3487                 nb_iargs = TCGOP_CALLI(op);
3488 
3489                 /* pure functions can be removed if their result is unused */
3490                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3491                     for (i = 0; i < nb_oargs; i++) {
3492                         ts = arg_temp(op->args[i]);
3493                         if (ts->state != TS_DEAD) {
3494                             goto do_not_remove_call;
3495                         }
3496                     }
3497                     goto do_remove;
3498                 }
3499             do_not_remove_call:
3500 
3501                 /* Output args are dead.  */
3502                 for (i = 0; i < nb_oargs; i++) {
3503                     ts = arg_temp(op->args[i]);
3504                     if (ts->state & TS_DEAD) {
3505                         arg_life |= DEAD_ARG << i;
3506                     }
3507                     if (ts->state & TS_MEM) {
3508                         arg_life |= SYNC_ARG << i;
3509                     }
3510                     ts->state = TS_DEAD;
3511                     la_reset_pref(ts);
3512                 }
3513 
3514                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3515                 memset(op->output_pref, 0, sizeof(op->output_pref));
3516 
3517                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3518                                     TCG_CALL_NO_READ_GLOBALS))) {
3519                     la_global_kill(s, nb_globals);
3520                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3521                     la_global_sync(s, nb_globals);
3522                 }
3523 
3524                 /* Record arguments that die in this helper.  */
3525                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3526                     ts = arg_temp(op->args[i]);
3527                     if (ts->state & TS_DEAD) {
3528                         arg_life |= DEAD_ARG << i;
3529                     }
3530                 }
3531 
3532                 /* For all live registers, remove call-clobbered prefs.  */
3533                 la_cross_call(s, nb_temps);
3534 
3535                 /*
3536                  * Input arguments are live for preceding opcodes.
3537                  *
3538                  * For those arguments that die, and will be allocated in
3539                  * registers, clear the register set for that arg, to be
3540                  * filled in below.  For args that will be on the stack,
3541                  * reset to any available reg.  Process arguments in reverse
3542                  * order so that if a temp is used more than once, the stack
3543                  * reset to max happens before the register reset to 0.
3544                  */
3545                 for (i = nb_iargs - 1; i >= 0; i--) {
3546                     const TCGCallArgumentLoc *loc = &info->in[i];
3547                     ts = arg_temp(op->args[nb_oargs + i]);
3548 
3549                     if (ts->state & TS_DEAD) {
3550                         switch (loc->kind) {
3551                         case TCG_CALL_ARG_NORMAL:
3552                         case TCG_CALL_ARG_EXTEND_U:
3553                         case TCG_CALL_ARG_EXTEND_S:
3554                             if (arg_slot_reg_p(loc->arg_slot)) {
3555                                 *la_temp_pref(ts) = 0;
3556                                 break;
3557                             }
3558                             /* fall through */
3559                         default:
3560                             *la_temp_pref(ts) =
3561                                 tcg_target_available_regs[ts->type];
3562                             break;
3563                         }
3564                         ts->state &= ~TS_DEAD;
3565                     }
3566                 }
3567 
3568                 /*
3569                  * For each input argument, add its input register to prefs.
3570                  * If a temp is used once, this produces a single set bit;
3571                  * if a temp is used multiple times, this produces a set.
3572                  */
3573                 for (i = 0; i < nb_iargs; i++) {
3574                     const TCGCallArgumentLoc *loc = &info->in[i];
3575                     ts = arg_temp(op->args[nb_oargs + i]);
3576 
3577                     switch (loc->kind) {
3578                     case TCG_CALL_ARG_NORMAL:
3579                     case TCG_CALL_ARG_EXTEND_U:
3580                     case TCG_CALL_ARG_EXTEND_S:
3581                         if (arg_slot_reg_p(loc->arg_slot)) {
3582                             tcg_regset_set_reg(*la_temp_pref(ts),
3583                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3584                         }
3585                         break;
3586                     default:
3587                         break;
3588                     }
3589                 }
3590             }
3591             break;
3592         case INDEX_op_insn_start:
3593             break;
3594         case INDEX_op_discard:
3595             /* mark the temporary as dead */
3596             ts = arg_temp(op->args[0]);
3597             ts->state = TS_DEAD;
3598             la_reset_pref(ts);
3599             break;
3600 
3601         case INDEX_op_add2_i32:
3602             opc_new = INDEX_op_add_i32;
3603             goto do_addsub2;
3604         case INDEX_op_sub2_i32:
3605             opc_new = INDEX_op_sub_i32;
3606             goto do_addsub2;
3607         case INDEX_op_add2_i64:
3608             opc_new = INDEX_op_add_i64;
3609             goto do_addsub2;
3610         case INDEX_op_sub2_i64:
3611             opc_new = INDEX_op_sub_i64;
3612         do_addsub2:
3613             nb_iargs = 4;
3614             nb_oargs = 2;
3615             /* Test if the high part of the operation is dead, but not
3616                the low part.  The result can be optimized to a simple
3617                add or sub.  This happens often for x86_64 guest when the
3618                cpu mode is set to 32 bit.  */
3619             if (arg_temp(op->args[1])->state == TS_DEAD) {
3620                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3621                     goto do_remove;
3622                 }
3623                 /* Replace the opcode and adjust the args in place,
3624                    leaving 3 unused args at the end.  */
3625                 op->opc = opc = opc_new;
3626                 op->args[1] = op->args[2];
3627                 op->args[2] = op->args[4];
3628                 /* Fall through and mark the single-word operation live.  */
3629                 nb_iargs = 2;
3630                 nb_oargs = 1;
3631             }
3632             goto do_not_remove;
3633 
3634         case INDEX_op_mulu2_i32:
3635             opc_new = INDEX_op_mul_i32;
3636             opc_new2 = INDEX_op_muluh_i32;
3637             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3638             goto do_mul2;
3639         case INDEX_op_muls2_i32:
3640             opc_new = INDEX_op_mul_i32;
3641             opc_new2 = INDEX_op_mulsh_i32;
3642             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3643             goto do_mul2;
3644         case INDEX_op_mulu2_i64:
3645             opc_new = INDEX_op_mul_i64;
3646             opc_new2 = INDEX_op_muluh_i64;
3647             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3648             goto do_mul2;
3649         case INDEX_op_muls2_i64:
3650             opc_new = INDEX_op_mul_i64;
3651             opc_new2 = INDEX_op_mulsh_i64;
3652             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3653             goto do_mul2;
3654         do_mul2:
3655             nb_iargs = 2;
3656             nb_oargs = 2;
3657             if (arg_temp(op->args[1])->state == TS_DEAD) {
3658                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3659                     /* Both parts of the operation are dead.  */
3660                     goto do_remove;
3661                 }
3662                 /* The high part of the operation is dead; generate the low. */
3663                 op->opc = opc = opc_new;
3664                 op->args[1] = op->args[2];
3665                 op->args[2] = op->args[3];
3666             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3667                 /* The low part of the operation is dead; generate the high. */
3668                 op->opc = opc = opc_new2;
3669                 op->args[0] = op->args[1];
3670                 op->args[1] = op->args[2];
3671                 op->args[2] = op->args[3];
3672             } else {
3673                 goto do_not_remove;
3674             }
3675             /* Mark the single-word operation live.  */
3676             nb_oargs = 1;
3677             goto do_not_remove;
3678 
3679         default:
3680             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3681             nb_iargs = def->nb_iargs;
3682             nb_oargs = def->nb_oargs;
3683 
3684             /* Test if the operation can be removed because all
3685                its outputs are dead. We assume that nb_oargs == 0
3686                implies side effects */
3687             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3688                 for (i = 0; i < nb_oargs; i++) {
3689                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3690                         goto do_not_remove;
3691                     }
3692                 }
3693                 goto do_remove;
3694             }
3695             goto do_not_remove;
3696 
3697         do_remove:
3698             tcg_op_remove(s, op);
3699             break;
3700 
3701         do_not_remove:
3702             for (i = 0; i < nb_oargs; i++) {
3703                 ts = arg_temp(op->args[i]);
3704 
3705                 /* Remember the preference of the uses that followed.  */
3706                 if (i < ARRAY_SIZE(op->output_pref)) {
3707                     op->output_pref[i] = *la_temp_pref(ts);
3708                 }
3709 
3710                 /* Output args are dead.  */
3711                 if (ts->state & TS_DEAD) {
3712                     arg_life |= DEAD_ARG << i;
3713                 }
3714                 if (ts->state & TS_MEM) {
3715                     arg_life |= SYNC_ARG << i;
3716                 }
3717                 ts->state = TS_DEAD;
3718                 la_reset_pref(ts);
3719             }
3720 
3721             /* If end of basic block, update.  */
3722             if (def->flags & TCG_OPF_BB_EXIT) {
3723                 la_func_end(s, nb_globals, nb_temps);
3724             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3725                 la_bb_sync(s, nb_globals, nb_temps);
3726             } else if (def->flags & TCG_OPF_BB_END) {
3727                 la_bb_end(s, nb_globals, nb_temps);
3728             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3729                 la_global_sync(s, nb_globals);
3730                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3731                     la_cross_call(s, nb_temps);
3732                 }
3733             }
3734 
3735             /* Record arguments that die in this opcode.  */
3736             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3737                 ts = arg_temp(op->args[i]);
3738                 if (ts->state & TS_DEAD) {
3739                     arg_life |= DEAD_ARG << i;
3740                 }
3741             }
3742 
3743             /* Input arguments are live for preceding opcodes.  */
3744             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3745                 ts = arg_temp(op->args[i]);
3746                 if (ts->state & TS_DEAD) {
3747                     /* For operands that were dead, initially allow
3748                        all regs for the type.  */
3749                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3750                     ts->state &= ~TS_DEAD;
3751                 }
3752             }
3753 
3754             /* Incorporate constraints for this operand.  */
3755             switch (opc) {
3756             case INDEX_op_mov_i32:
3757             case INDEX_op_mov_i64:
3758                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3759                    have proper constraints.  That said, special case
3760                    moves to propagate preferences backward.  */
3761                 if (IS_DEAD_ARG(1)) {
3762                     *la_temp_pref(arg_temp(op->args[0]))
3763                         = *la_temp_pref(arg_temp(op->args[1]));
3764                 }
3765                 break;
3766 
3767             default:
3768                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3769                     const TCGArgConstraint *ct = &def->args_ct[i];
3770                     TCGRegSet set, *pset;
3771 
3772                     ts = arg_temp(op->args[i]);
3773                     pset = la_temp_pref(ts);
3774                     set = *pset;
3775 
3776                     set &= ct->regs;
3777                     if (ct->ialias) {
3778                         set &= output_pref(op, ct->alias_index);
3779                     }
3780                     /* If the combination is not possible, restart.  */
3781                     if (set == 0) {
3782                         set = ct->regs;
3783                     }
3784                     *pset = set;
3785                 }
3786                 break;
3787             }
3788             break;
3789         }
3790         op->life = arg_life;
3791     }
3792 }
3793 
3794 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3795 static bool __attribute__((noinline))
3796 liveness_pass_2(TCGContext *s)
3797 {
3798     int nb_globals = s->nb_globals;
3799     int nb_temps, i;
3800     bool changes = false;
3801     TCGOp *op, *op_next;
3802 
3803     /* Create a temporary for each indirect global.  */
3804     for (i = 0; i < nb_globals; ++i) {
3805         TCGTemp *its = &s->temps[i];
3806         if (its->indirect_reg) {
3807             TCGTemp *dts = tcg_temp_alloc(s);
3808             dts->type = its->type;
3809             dts->base_type = its->base_type;
3810             dts->temp_subindex = its->temp_subindex;
3811             dts->kind = TEMP_EBB;
3812             its->state_ptr = dts;
3813         } else {
3814             its->state_ptr = NULL;
3815         }
3816         /* All globals begin dead.  */
3817         its->state = TS_DEAD;
3818     }
3819     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3820         TCGTemp *its = &s->temps[i];
3821         its->state_ptr = NULL;
3822         its->state = TS_DEAD;
3823     }
3824 
3825     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3826         TCGOpcode opc = op->opc;
3827         const TCGOpDef *def = &tcg_op_defs[opc];
3828         TCGLifeData arg_life = op->life;
3829         int nb_iargs, nb_oargs, call_flags;
3830         TCGTemp *arg_ts, *dir_ts;
3831 
3832         if (opc == INDEX_op_call) {
3833             nb_oargs = TCGOP_CALLO(op);
3834             nb_iargs = TCGOP_CALLI(op);
3835             call_flags = tcg_call_flags(op);
3836         } else {
3837             nb_iargs = def->nb_iargs;
3838             nb_oargs = def->nb_oargs;
3839 
3840             /* Set flags similar to how calls require.  */
3841             if (def->flags & TCG_OPF_COND_BRANCH) {
3842                 /* Like reading globals: sync_globals */
3843                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3844             } else if (def->flags & TCG_OPF_BB_END) {
3845                 /* Like writing globals: save_globals */
3846                 call_flags = 0;
3847             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3848                 /* Like reading globals: sync_globals */
3849                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3850             } else {
3851                 /* No effect on globals.  */
3852                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3853                               TCG_CALL_NO_WRITE_GLOBALS);
3854             }
3855         }
3856 
3857         /* Make sure that input arguments are available.  */
3858         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3859             arg_ts = arg_temp(op->args[i]);
3860             dir_ts = arg_ts->state_ptr;
3861             if (dir_ts && arg_ts->state == TS_DEAD) {
3862                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3863                                   ? INDEX_op_ld_i32
3864                                   : INDEX_op_ld_i64);
3865                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3866 
3867                 lop->args[0] = temp_arg(dir_ts);
3868                 lop->args[1] = temp_arg(arg_ts->mem_base);
3869                 lop->args[2] = arg_ts->mem_offset;
3870 
3871                 /* Loaded, but synced with memory.  */
3872                 arg_ts->state = TS_MEM;
3873             }
3874         }
3875 
3876         /* Perform input replacement, and mark inputs that became dead.
3877            No action is required except keeping temp_state up to date
3878            so that we reload when needed.  */
3879         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3880             arg_ts = arg_temp(op->args[i]);
3881             dir_ts = arg_ts->state_ptr;
3882             if (dir_ts) {
3883                 op->args[i] = temp_arg(dir_ts);
3884                 changes = true;
3885                 if (IS_DEAD_ARG(i)) {
3886                     arg_ts->state = TS_DEAD;
3887                 }
3888             }
3889         }
3890 
3891         /* Liveness analysis should ensure that the following are
3892            all correct, for call sites and basic block end points.  */
3893         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3894             /* Nothing to do */
3895         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3896             for (i = 0; i < nb_globals; ++i) {
3897                 /* Liveness should see that globals are synced back,
3898                    that is, either TS_DEAD or TS_MEM.  */
3899                 arg_ts = &s->temps[i];
3900                 tcg_debug_assert(arg_ts->state_ptr == 0
3901                                  || arg_ts->state != 0);
3902             }
3903         } else {
3904             for (i = 0; i < nb_globals; ++i) {
3905                 /* Liveness should see that globals are saved back,
3906                    that is, TS_DEAD, waiting to be reloaded.  */
3907                 arg_ts = &s->temps[i];
3908                 tcg_debug_assert(arg_ts->state_ptr == 0
3909                                  || arg_ts->state == TS_DEAD);
3910             }
3911         }
3912 
3913         /* Outputs become available.  */
3914         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3915             arg_ts = arg_temp(op->args[0]);
3916             dir_ts = arg_ts->state_ptr;
3917             if (dir_ts) {
3918                 op->args[0] = temp_arg(dir_ts);
3919                 changes = true;
3920 
3921                 /* The output is now live and modified.  */
3922                 arg_ts->state = 0;
3923 
3924                 if (NEED_SYNC_ARG(0)) {
3925                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3926                                       ? INDEX_op_st_i32
3927                                       : INDEX_op_st_i64);
3928                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3929                     TCGTemp *out_ts = dir_ts;
3930 
3931                     if (IS_DEAD_ARG(0)) {
3932                         out_ts = arg_temp(op->args[1]);
3933                         arg_ts->state = TS_DEAD;
3934                         tcg_op_remove(s, op);
3935                     } else {
3936                         arg_ts->state = TS_MEM;
3937                     }
3938 
3939                     sop->args[0] = temp_arg(out_ts);
3940                     sop->args[1] = temp_arg(arg_ts->mem_base);
3941                     sop->args[2] = arg_ts->mem_offset;
3942                 } else {
3943                     tcg_debug_assert(!IS_DEAD_ARG(0));
3944                 }
3945             }
3946         } else {
3947             for (i = 0; i < nb_oargs; i++) {
3948                 arg_ts = arg_temp(op->args[i]);
3949                 dir_ts = arg_ts->state_ptr;
3950                 if (!dir_ts) {
3951                     continue;
3952                 }
3953                 op->args[i] = temp_arg(dir_ts);
3954                 changes = true;
3955 
3956                 /* The output is now live and modified.  */
3957                 arg_ts->state = 0;
3958 
3959                 /* Sync outputs upon their last write.  */
3960                 if (NEED_SYNC_ARG(i)) {
3961                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3962                                       ? INDEX_op_st_i32
3963                                       : INDEX_op_st_i64);
3964                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3965 
3966                     sop->args[0] = temp_arg(dir_ts);
3967                     sop->args[1] = temp_arg(arg_ts->mem_base);
3968                     sop->args[2] = arg_ts->mem_offset;
3969 
3970                     arg_ts->state = TS_MEM;
3971                 }
3972                 /* Drop outputs that are dead.  */
3973                 if (IS_DEAD_ARG(i)) {
3974                     arg_ts->state = TS_DEAD;
3975                 }
3976             }
3977         }
3978     }
3979 
3980     return changes;
3981 }
3982 
3983 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3984 {
3985     intptr_t off;
3986     int size, align;
3987 
3988     /* When allocating an object, look at the full type. */
3989     size = tcg_type_size(ts->base_type);
3990     switch (ts->base_type) {
3991     case TCG_TYPE_I32:
3992         align = 4;
3993         break;
3994     case TCG_TYPE_I64:
3995     case TCG_TYPE_V64:
3996         align = 8;
3997         break;
3998     case TCG_TYPE_I128:
3999     case TCG_TYPE_V128:
4000     case TCG_TYPE_V256:
4001         /*
4002          * Note that we do not require aligned storage for V256,
4003          * and that we provide alignment for I128 to match V128,
4004          * even if that's above what the host ABI requires.
4005          */
4006         align = 16;
4007         break;
4008     default:
4009         g_assert_not_reached();
4010     }
4011 
4012     /*
4013      * Assume the stack is sufficiently aligned.
4014      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4015      * and do not require 16 byte vector alignment.  This seems slightly
4016      * easier than fully parameterizing the above switch statement.
4017      */
4018     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4019     off = ROUND_UP(s->current_frame_offset, align);
4020 
4021     /* If we've exhausted the stack frame, restart with a smaller TB. */
4022     if (off + size > s->frame_end) {
4023         tcg_raise_tb_overflow(s);
4024     }
4025     s->current_frame_offset = off + size;
4026 #if defined(__sparc__)
4027     off += TCG_TARGET_STACK_BIAS;
4028 #endif
4029 
4030     /* If the object was subdivided, assign memory to all the parts. */
4031     if (ts->base_type != ts->type) {
4032         int part_size = tcg_type_size(ts->type);
4033         int part_count = size / part_size;
4034 
4035         /*
4036          * Each part is allocated sequentially in tcg_temp_new_internal.
4037          * Jump back to the first part by subtracting the current index.
4038          */
4039         ts -= ts->temp_subindex;
4040         for (int i = 0; i < part_count; ++i) {
4041             ts[i].mem_offset = off + i * part_size;
4042             ts[i].mem_base = s->frame_temp;
4043             ts[i].mem_allocated = 1;
4044         }
4045     } else {
4046         ts->mem_offset = off;
4047         ts->mem_base = s->frame_temp;
4048         ts->mem_allocated = 1;
4049     }
4050 }
4051 
4052 /* Assign @reg to @ts, and update reg_to_temp[]. */
4053 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4054 {
4055     if (ts->val_type == TEMP_VAL_REG) {
4056         TCGReg old = ts->reg;
4057         tcg_debug_assert(s->reg_to_temp[old] == ts);
4058         if (old == reg) {
4059             return;
4060         }
4061         s->reg_to_temp[old] = NULL;
4062     }
4063     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4064     s->reg_to_temp[reg] = ts;
4065     ts->val_type = TEMP_VAL_REG;
4066     ts->reg = reg;
4067 }
4068 
4069 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4070 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4071 {
4072     tcg_debug_assert(type != TEMP_VAL_REG);
4073     if (ts->val_type == TEMP_VAL_REG) {
4074         TCGReg reg = ts->reg;
4075         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4076         s->reg_to_temp[reg] = NULL;
4077     }
4078     ts->val_type = type;
4079 }
4080 
4081 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4082 
4083 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4084    mark it free; otherwise mark it dead.  */
4085 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4086 {
4087     TCGTempVal new_type;
4088 
4089     switch (ts->kind) {
4090     case TEMP_FIXED:
4091         return;
4092     case TEMP_GLOBAL:
4093     case TEMP_TB:
4094         new_type = TEMP_VAL_MEM;
4095         break;
4096     case TEMP_EBB:
4097         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4098         break;
4099     case TEMP_CONST:
4100         new_type = TEMP_VAL_CONST;
4101         break;
4102     default:
4103         g_assert_not_reached();
4104     }
4105     set_temp_val_nonreg(s, ts, new_type);
4106 }
4107 
4108 /* Mark a temporary as dead.  */
4109 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4110 {
4111     temp_free_or_dead(s, ts, 1);
4112 }
4113 
4114 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4115    registers needs to be allocated to store a constant.  If 'free_or_dead'
4116    is non-zero, subsequently release the temporary; if it is positive, the
4117    temp is dead; if it is negative, the temp is free.  */
4118 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4119                       TCGRegSet preferred_regs, int free_or_dead)
4120 {
4121     if (!temp_readonly(ts) && !ts->mem_coherent) {
4122         if (!ts->mem_allocated) {
4123             temp_allocate_frame(s, ts);
4124         }
4125         switch (ts->val_type) {
4126         case TEMP_VAL_CONST:
4127             /* If we're going to free the temp immediately, then we won't
4128                require it later in a register, so attempt to store the
4129                constant to memory directly.  */
4130             if (free_or_dead
4131                 && tcg_out_sti(s, ts->type, ts->val,
4132                                ts->mem_base->reg, ts->mem_offset)) {
4133                 break;
4134             }
4135             temp_load(s, ts, tcg_target_available_regs[ts->type],
4136                       allocated_regs, preferred_regs);
4137             /* fallthrough */
4138 
4139         case TEMP_VAL_REG:
4140             tcg_out_st(s, ts->type, ts->reg,
4141                        ts->mem_base->reg, ts->mem_offset);
4142             break;
4143 
4144         case TEMP_VAL_MEM:
4145             break;
4146 
4147         case TEMP_VAL_DEAD:
4148         default:
4149             g_assert_not_reached();
4150         }
4151         ts->mem_coherent = 1;
4152     }
4153     if (free_or_dead) {
4154         temp_free_or_dead(s, ts, free_or_dead);
4155     }
4156 }
4157 
4158 /* free register 'reg' by spilling the corresponding temporary if necessary */
4159 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4160 {
4161     TCGTemp *ts = s->reg_to_temp[reg];
4162     if (ts != NULL) {
4163         temp_sync(s, ts, allocated_regs, 0, -1);
4164     }
4165 }
4166 
4167 /**
4168  * tcg_reg_alloc:
4169  * @required_regs: Set of registers in which we must allocate.
4170  * @allocated_regs: Set of registers which must be avoided.
4171  * @preferred_regs: Set of registers we should prefer.
4172  * @rev: True if we search the registers in "indirect" order.
4173  *
4174  * The allocated register must be in @required_regs & ~@allocated_regs,
4175  * but if we can put it in @preferred_regs we may save a move later.
4176  */
4177 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4178                             TCGRegSet allocated_regs,
4179                             TCGRegSet preferred_regs, bool rev)
4180 {
4181     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4182     TCGRegSet reg_ct[2];
4183     const int *order;
4184 
4185     reg_ct[1] = required_regs & ~allocated_regs;
4186     tcg_debug_assert(reg_ct[1] != 0);
4187     reg_ct[0] = reg_ct[1] & preferred_regs;
4188 
4189     /* Skip the preferred_regs option if it cannot be satisfied,
4190        or if the preference made no difference.  */
4191     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4192 
4193     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4194 
4195     /* Try free registers, preferences first.  */
4196     for (j = f; j < 2; j++) {
4197         TCGRegSet set = reg_ct[j];
4198 
4199         if (tcg_regset_single(set)) {
4200             /* One register in the set.  */
4201             TCGReg reg = tcg_regset_first(set);
4202             if (s->reg_to_temp[reg] == NULL) {
4203                 return reg;
4204             }
4205         } else {
4206             for (i = 0; i < n; i++) {
4207                 TCGReg reg = order[i];
4208                 if (s->reg_to_temp[reg] == NULL &&
4209                     tcg_regset_test_reg(set, reg)) {
4210                     return reg;
4211                 }
4212             }
4213         }
4214     }
4215 
4216     /* We must spill something.  */
4217     for (j = f; j < 2; j++) {
4218         TCGRegSet set = reg_ct[j];
4219 
4220         if (tcg_regset_single(set)) {
4221             /* One register in the set.  */
4222             TCGReg reg = tcg_regset_first(set);
4223             tcg_reg_free(s, reg, allocated_regs);
4224             return reg;
4225         } else {
4226             for (i = 0; i < n; i++) {
4227                 TCGReg reg = order[i];
4228                 if (tcg_regset_test_reg(set, reg)) {
4229                     tcg_reg_free(s, reg, allocated_regs);
4230                     return reg;
4231                 }
4232             }
4233         }
4234     }
4235 
4236     g_assert_not_reached();
4237 }
4238 
4239 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4240                                  TCGRegSet allocated_regs,
4241                                  TCGRegSet preferred_regs, bool rev)
4242 {
4243     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4244     TCGRegSet reg_ct[2];
4245     const int *order;
4246 
4247     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4248     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4249     tcg_debug_assert(reg_ct[1] != 0);
4250     reg_ct[0] = reg_ct[1] & preferred_regs;
4251 
4252     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4253 
4254     /*
4255      * Skip the preferred_regs option if it cannot be satisfied,
4256      * or if the preference made no difference.
4257      */
4258     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4259 
4260     /*
4261      * Minimize the number of flushes by looking for 2 free registers first,
4262      * then a single flush, then two flushes.
4263      */
4264     for (fmin = 2; fmin >= 0; fmin--) {
4265         for (j = k; j < 2; j++) {
4266             TCGRegSet set = reg_ct[j];
4267 
4268             for (i = 0; i < n; i++) {
4269                 TCGReg reg = order[i];
4270 
4271                 if (tcg_regset_test_reg(set, reg)) {
4272                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4273                     if (f >= fmin) {
4274                         tcg_reg_free(s, reg, allocated_regs);
4275                         tcg_reg_free(s, reg + 1, allocated_regs);
4276                         return reg;
4277                     }
4278                 }
4279             }
4280         }
4281     }
4282     g_assert_not_reached();
4283 }
4284 
4285 /* Make sure the temporary is in a register.  If needed, allocate the register
4286    from DESIRED while avoiding ALLOCATED.  */
4287 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4288                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4289 {
4290     TCGReg reg;
4291 
4292     switch (ts->val_type) {
4293     case TEMP_VAL_REG:
4294         return;
4295     case TEMP_VAL_CONST:
4296         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4297                             preferred_regs, ts->indirect_base);
4298         if (ts->type <= TCG_TYPE_I64) {
4299             tcg_out_movi(s, ts->type, reg, ts->val);
4300         } else {
4301             uint64_t val = ts->val;
4302             MemOp vece = MO_64;
4303 
4304             /*
4305              * Find the minimal vector element that matches the constant.
4306              * The targets will, in general, have to do this search anyway,
4307              * do this generically.
4308              */
4309             if (val == dup_const(MO_8, val)) {
4310                 vece = MO_8;
4311             } else if (val == dup_const(MO_16, val)) {
4312                 vece = MO_16;
4313             } else if (val == dup_const(MO_32, val)) {
4314                 vece = MO_32;
4315             }
4316 
4317             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4318         }
4319         ts->mem_coherent = 0;
4320         break;
4321     case TEMP_VAL_MEM:
4322         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4323                             preferred_regs, ts->indirect_base);
4324         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4325         ts->mem_coherent = 1;
4326         break;
4327     case TEMP_VAL_DEAD:
4328     default:
4329         g_assert_not_reached();
4330     }
4331     set_temp_val_reg(s, ts, reg);
4332 }
4333 
4334 /* Save a temporary to memory. 'allocated_regs' is used in case a
4335    temporary registers needs to be allocated to store a constant.  */
4336 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4337 {
4338     /* The liveness analysis already ensures that globals are back
4339        in memory. Keep an tcg_debug_assert for safety. */
4340     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4341 }
4342 
4343 /* save globals to their canonical location and assume they can be
4344    modified be the following code. 'allocated_regs' is used in case a
4345    temporary registers needs to be allocated to store a constant. */
4346 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4347 {
4348     int i, n;
4349 
4350     for (i = 0, n = s->nb_globals; i < n; i++) {
4351         temp_save(s, &s->temps[i], allocated_regs);
4352     }
4353 }
4354 
4355 /* sync globals to their canonical location and assume they can be
4356    read by the following code. 'allocated_regs' is used in case a
4357    temporary registers needs to be allocated to store a constant. */
4358 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4359 {
4360     int i, n;
4361 
4362     for (i = 0, n = s->nb_globals; i < n; i++) {
4363         TCGTemp *ts = &s->temps[i];
4364         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4365                          || ts->kind == TEMP_FIXED
4366                          || ts->mem_coherent);
4367     }
4368 }
4369 
4370 /* at the end of a basic block, we assume all temporaries are dead and
4371    all globals are stored at their canonical location. */
4372 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4373 {
4374     int i;
4375 
4376     for (i = s->nb_globals; i < s->nb_temps; i++) {
4377         TCGTemp *ts = &s->temps[i];
4378 
4379         switch (ts->kind) {
4380         case TEMP_TB:
4381             temp_save(s, ts, allocated_regs);
4382             break;
4383         case TEMP_EBB:
4384             /* The liveness analysis already ensures that temps are dead.
4385                Keep an tcg_debug_assert for safety. */
4386             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4387             break;
4388         case TEMP_CONST:
4389             /* Similarly, we should have freed any allocated register. */
4390             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4391             break;
4392         default:
4393             g_assert_not_reached();
4394         }
4395     }
4396 
4397     save_globals(s, allocated_regs);
4398 }
4399 
4400 /*
4401  * At a conditional branch, we assume all temporaries are dead unless
4402  * explicitly live-across-conditional-branch; all globals and local
4403  * temps are synced to their location.
4404  */
4405 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4406 {
4407     sync_globals(s, allocated_regs);
4408 
4409     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4410         TCGTemp *ts = &s->temps[i];
4411         /*
4412          * The liveness analysis already ensures that temps are dead.
4413          * Keep tcg_debug_asserts for safety.
4414          */
4415         switch (ts->kind) {
4416         case TEMP_TB:
4417             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4418             break;
4419         case TEMP_EBB:
4420         case TEMP_CONST:
4421             break;
4422         default:
4423             g_assert_not_reached();
4424         }
4425     }
4426 }
4427 
4428 /*
4429  * Specialized code generation for INDEX_op_mov_* with a constant.
4430  */
4431 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4432                                   tcg_target_ulong val, TCGLifeData arg_life,
4433                                   TCGRegSet preferred_regs)
4434 {
4435     /* ENV should not be modified.  */
4436     tcg_debug_assert(!temp_readonly(ots));
4437 
4438     /* The movi is not explicitly generated here.  */
4439     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4440     ots->val = val;
4441     ots->mem_coherent = 0;
4442     if (NEED_SYNC_ARG(0)) {
4443         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4444     } else if (IS_DEAD_ARG(0)) {
4445         temp_dead(s, ots);
4446     }
4447 }
4448 
4449 /*
4450  * Specialized code generation for INDEX_op_mov_*.
4451  */
4452 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4453 {
4454     const TCGLifeData arg_life = op->life;
4455     TCGRegSet allocated_regs, preferred_regs;
4456     TCGTemp *ts, *ots;
4457     TCGType otype, itype;
4458     TCGReg oreg, ireg;
4459 
4460     allocated_regs = s->reserved_regs;
4461     preferred_regs = output_pref(op, 0);
4462     ots = arg_temp(op->args[0]);
4463     ts = arg_temp(op->args[1]);
4464 
4465     /* ENV should not be modified.  */
4466     tcg_debug_assert(!temp_readonly(ots));
4467 
4468     /* Note that otype != itype for no-op truncation.  */
4469     otype = ots->type;
4470     itype = ts->type;
4471 
4472     if (ts->val_type == TEMP_VAL_CONST) {
4473         /* propagate constant or generate sti */
4474         tcg_target_ulong val = ts->val;
4475         if (IS_DEAD_ARG(1)) {
4476             temp_dead(s, ts);
4477         }
4478         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4479         return;
4480     }
4481 
4482     /* If the source value is in memory we're going to be forced
4483        to have it in a register in order to perform the copy.  Copy
4484        the SOURCE value into its own register first, that way we
4485        don't have to reload SOURCE the next time it is used. */
4486     if (ts->val_type == TEMP_VAL_MEM) {
4487         temp_load(s, ts, tcg_target_available_regs[itype],
4488                   allocated_regs, preferred_regs);
4489     }
4490     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4491     ireg = ts->reg;
4492 
4493     if (IS_DEAD_ARG(0)) {
4494         /* mov to a non-saved dead register makes no sense (even with
4495            liveness analysis disabled). */
4496         tcg_debug_assert(NEED_SYNC_ARG(0));
4497         if (!ots->mem_allocated) {
4498             temp_allocate_frame(s, ots);
4499         }
4500         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4501         if (IS_DEAD_ARG(1)) {
4502             temp_dead(s, ts);
4503         }
4504         temp_dead(s, ots);
4505         return;
4506     }
4507 
4508     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4509         /*
4510          * The mov can be suppressed.  Kill input first, so that it
4511          * is unlinked from reg_to_temp, then set the output to the
4512          * reg that we saved from the input.
4513          */
4514         temp_dead(s, ts);
4515         oreg = ireg;
4516     } else {
4517         if (ots->val_type == TEMP_VAL_REG) {
4518             oreg = ots->reg;
4519         } else {
4520             /* Make sure to not spill the input register during allocation. */
4521             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4522                                  allocated_regs | ((TCGRegSet)1 << ireg),
4523                                  preferred_regs, ots->indirect_base);
4524         }
4525         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4526             /*
4527              * Cross register class move not supported.
4528              * Store the source register into the destination slot
4529              * and leave the destination temp as TEMP_VAL_MEM.
4530              */
4531             assert(!temp_readonly(ots));
4532             if (!ts->mem_allocated) {
4533                 temp_allocate_frame(s, ots);
4534             }
4535             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4536             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4537             ots->mem_coherent = 1;
4538             return;
4539         }
4540     }
4541     set_temp_val_reg(s, ots, oreg);
4542     ots->mem_coherent = 0;
4543 
4544     if (NEED_SYNC_ARG(0)) {
4545         temp_sync(s, ots, allocated_regs, 0, 0);
4546     }
4547 }
4548 
4549 /*
4550  * Specialized code generation for INDEX_op_dup_vec.
4551  */
4552 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4553 {
4554     const TCGLifeData arg_life = op->life;
4555     TCGRegSet dup_out_regs, dup_in_regs;
4556     TCGTemp *its, *ots;
4557     TCGType itype, vtype;
4558     unsigned vece;
4559     int lowpart_ofs;
4560     bool ok;
4561 
4562     ots = arg_temp(op->args[0]);
4563     its = arg_temp(op->args[1]);
4564 
4565     /* ENV should not be modified.  */
4566     tcg_debug_assert(!temp_readonly(ots));
4567 
4568     itype = its->type;
4569     vece = TCGOP_VECE(op);
4570     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4571 
4572     if (its->val_type == TEMP_VAL_CONST) {
4573         /* Propagate constant via movi -> dupi.  */
4574         tcg_target_ulong val = its->val;
4575         if (IS_DEAD_ARG(1)) {
4576             temp_dead(s, its);
4577         }
4578         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4579         return;
4580     }
4581 
4582     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4583     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4584 
4585     /* Allocate the output register now.  */
4586     if (ots->val_type != TEMP_VAL_REG) {
4587         TCGRegSet allocated_regs = s->reserved_regs;
4588         TCGReg oreg;
4589 
4590         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4591             /* Make sure to not spill the input register. */
4592             tcg_regset_set_reg(allocated_regs, its->reg);
4593         }
4594         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4595                              output_pref(op, 0), ots->indirect_base);
4596         set_temp_val_reg(s, ots, oreg);
4597     }
4598 
4599     switch (its->val_type) {
4600     case TEMP_VAL_REG:
4601         /*
4602          * The dup constriaints must be broad, covering all possible VECE.
4603          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4604          * to fail, indicating that extra moves are required for that case.
4605          */
4606         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4607             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4608                 goto done;
4609             }
4610             /* Try again from memory or a vector input register.  */
4611         }
4612         if (!its->mem_coherent) {
4613             /*
4614              * The input register is not synced, and so an extra store
4615              * would be required to use memory.  Attempt an integer-vector
4616              * register move first.  We do not have a TCGRegSet for this.
4617              */
4618             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4619                 break;
4620             }
4621             /* Sync the temp back to its slot and load from there.  */
4622             temp_sync(s, its, s->reserved_regs, 0, 0);
4623         }
4624         /* fall through */
4625 
4626     case TEMP_VAL_MEM:
4627         lowpart_ofs = 0;
4628         if (HOST_BIG_ENDIAN) {
4629             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4630         }
4631         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4632                              its->mem_offset + lowpart_ofs)) {
4633             goto done;
4634         }
4635         /* Load the input into the destination vector register. */
4636         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4637         break;
4638 
4639     default:
4640         g_assert_not_reached();
4641     }
4642 
4643     /* We now have a vector input register, so dup must succeed. */
4644     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4645     tcg_debug_assert(ok);
4646 
4647  done:
4648     ots->mem_coherent = 0;
4649     if (IS_DEAD_ARG(1)) {
4650         temp_dead(s, its);
4651     }
4652     if (NEED_SYNC_ARG(0)) {
4653         temp_sync(s, ots, s->reserved_regs, 0, 0);
4654     }
4655     if (IS_DEAD_ARG(0)) {
4656         temp_dead(s, ots);
4657     }
4658 }
4659 
4660 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4661 {
4662     const TCGLifeData arg_life = op->life;
4663     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4664     TCGRegSet i_allocated_regs;
4665     TCGRegSet o_allocated_regs;
4666     int i, k, nb_iargs, nb_oargs;
4667     TCGReg reg;
4668     TCGArg arg;
4669     const TCGArgConstraint *arg_ct;
4670     TCGTemp *ts;
4671     TCGArg new_args[TCG_MAX_OP_ARGS];
4672     int const_args[TCG_MAX_OP_ARGS];
4673 
4674     nb_oargs = def->nb_oargs;
4675     nb_iargs = def->nb_iargs;
4676 
4677     /* copy constants */
4678     memcpy(new_args + nb_oargs + nb_iargs,
4679            op->args + nb_oargs + nb_iargs,
4680            sizeof(TCGArg) * def->nb_cargs);
4681 
4682     i_allocated_regs = s->reserved_regs;
4683     o_allocated_regs = s->reserved_regs;
4684 
4685     /* satisfy input constraints */
4686     for (k = 0; k < nb_iargs; k++) {
4687         TCGRegSet i_preferred_regs, i_required_regs;
4688         bool allocate_new_reg, copyto_new_reg;
4689         TCGTemp *ts2;
4690         int i1, i2;
4691 
4692         i = def->args_ct[nb_oargs + k].sort_index;
4693         arg = op->args[i];
4694         arg_ct = &def->args_ct[i];
4695         ts = arg_temp(arg);
4696 
4697         if (ts->val_type == TEMP_VAL_CONST
4698             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct, TCGOP_VECE(op))) {
4699             /* constant is OK for instruction */
4700             const_args[i] = 1;
4701             new_args[i] = ts->val;
4702             continue;
4703         }
4704 
4705         reg = ts->reg;
4706         i_preferred_regs = 0;
4707         i_required_regs = arg_ct->regs;
4708         allocate_new_reg = false;
4709         copyto_new_reg = false;
4710 
4711         switch (arg_ct->pair) {
4712         case 0: /* not paired */
4713             if (arg_ct->ialias) {
4714                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4715 
4716                 /*
4717                  * If the input is readonly, then it cannot also be an
4718                  * output and aliased to itself.  If the input is not
4719                  * dead after the instruction, we must allocate a new
4720                  * register and move it.
4721                  */
4722                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4723                     || def->args_ct[arg_ct->alias_index].newreg) {
4724                     allocate_new_reg = true;
4725                 } else if (ts->val_type == TEMP_VAL_REG) {
4726                     /*
4727                      * Check if the current register has already been
4728                      * allocated for another input.
4729                      */
4730                     allocate_new_reg =
4731                         tcg_regset_test_reg(i_allocated_regs, reg);
4732                 }
4733             }
4734             if (!allocate_new_reg) {
4735                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4736                           i_preferred_regs);
4737                 reg = ts->reg;
4738                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4739             }
4740             if (allocate_new_reg) {
4741                 /*
4742                  * Allocate a new register matching the constraint
4743                  * and move the temporary register into it.
4744                  */
4745                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4746                           i_allocated_regs, 0);
4747                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4748                                     i_preferred_regs, ts->indirect_base);
4749                 copyto_new_reg = true;
4750             }
4751             break;
4752 
4753         case 1:
4754             /* First of an input pair; if i1 == i2, the second is an output. */
4755             i1 = i;
4756             i2 = arg_ct->pair_index;
4757             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4758 
4759             /*
4760              * It is easier to default to allocating a new pair
4761              * and to identify a few cases where it's not required.
4762              */
4763             if (arg_ct->ialias) {
4764                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4765                 if (IS_DEAD_ARG(i1) &&
4766                     IS_DEAD_ARG(i2) &&
4767                     !temp_readonly(ts) &&
4768                     ts->val_type == TEMP_VAL_REG &&
4769                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4770                     tcg_regset_test_reg(i_required_regs, reg) &&
4771                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4772                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4773                     (ts2
4774                      ? ts2->val_type == TEMP_VAL_REG &&
4775                        ts2->reg == reg + 1 &&
4776                        !temp_readonly(ts2)
4777                      : s->reg_to_temp[reg + 1] == NULL)) {
4778                     break;
4779                 }
4780             } else {
4781                 /* Without aliasing, the pair must also be an input. */
4782                 tcg_debug_assert(ts2);
4783                 if (ts->val_type == TEMP_VAL_REG &&
4784                     ts2->val_type == TEMP_VAL_REG &&
4785                     ts2->reg == reg + 1 &&
4786                     tcg_regset_test_reg(i_required_regs, reg)) {
4787                     break;
4788                 }
4789             }
4790             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4791                                      0, ts->indirect_base);
4792             goto do_pair;
4793 
4794         case 2: /* pair second */
4795             reg = new_args[arg_ct->pair_index] + 1;
4796             goto do_pair;
4797 
4798         case 3: /* ialias with second output, no first input */
4799             tcg_debug_assert(arg_ct->ialias);
4800             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4801 
4802             if (IS_DEAD_ARG(i) &&
4803                 !temp_readonly(ts) &&
4804                 ts->val_type == TEMP_VAL_REG &&
4805                 reg > 0 &&
4806                 s->reg_to_temp[reg - 1] == NULL &&
4807                 tcg_regset_test_reg(i_required_regs, reg) &&
4808                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4809                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4810                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4811                 break;
4812             }
4813             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4814                                      i_allocated_regs, 0,
4815                                      ts->indirect_base);
4816             tcg_regset_set_reg(i_allocated_regs, reg);
4817             reg += 1;
4818             goto do_pair;
4819 
4820         do_pair:
4821             /*
4822              * If an aliased input is not dead after the instruction,
4823              * we must allocate a new register and move it.
4824              */
4825             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4826                 TCGRegSet t_allocated_regs = i_allocated_regs;
4827 
4828                 /*
4829                  * Because of the alias, and the continued life, make sure
4830                  * that the temp is somewhere *other* than the reg pair,
4831                  * and we get a copy in reg.
4832                  */
4833                 tcg_regset_set_reg(t_allocated_regs, reg);
4834                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4835                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4836                     /* If ts was already in reg, copy it somewhere else. */
4837                     TCGReg nr;
4838                     bool ok;
4839 
4840                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4841                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4842                                        t_allocated_regs, 0, ts->indirect_base);
4843                     ok = tcg_out_mov(s, ts->type, nr, reg);
4844                     tcg_debug_assert(ok);
4845 
4846                     set_temp_val_reg(s, ts, nr);
4847                 } else {
4848                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4849                               t_allocated_regs, 0);
4850                     copyto_new_reg = true;
4851                 }
4852             } else {
4853                 /* Preferably allocate to reg, otherwise copy. */
4854                 i_required_regs = (TCGRegSet)1 << reg;
4855                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4856                           i_preferred_regs);
4857                 copyto_new_reg = ts->reg != reg;
4858             }
4859             break;
4860 
4861         default:
4862             g_assert_not_reached();
4863         }
4864 
4865         if (copyto_new_reg) {
4866             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4867                 /*
4868                  * Cross register class move not supported.  Sync the
4869                  * temp back to its slot and load from there.
4870                  */
4871                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4872                 tcg_out_ld(s, ts->type, reg,
4873                            ts->mem_base->reg, ts->mem_offset);
4874             }
4875         }
4876         new_args[i] = reg;
4877         const_args[i] = 0;
4878         tcg_regset_set_reg(i_allocated_regs, reg);
4879     }
4880 
4881     /* mark dead temporaries and free the associated registers */
4882     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4883         if (IS_DEAD_ARG(i)) {
4884             temp_dead(s, arg_temp(op->args[i]));
4885         }
4886     }
4887 
4888     if (def->flags & TCG_OPF_COND_BRANCH) {
4889         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4890     } else if (def->flags & TCG_OPF_BB_END) {
4891         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4892     } else {
4893         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4894             /* XXX: permit generic clobber register list ? */
4895             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4896                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4897                     tcg_reg_free(s, i, i_allocated_regs);
4898                 }
4899             }
4900         }
4901         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4902             /* sync globals if the op has side effects and might trigger
4903                an exception. */
4904             sync_globals(s, i_allocated_regs);
4905         }
4906 
4907         /* satisfy the output constraints */
4908         for(k = 0; k < nb_oargs; k++) {
4909             i = def->args_ct[k].sort_index;
4910             arg = op->args[i];
4911             arg_ct = &def->args_ct[i];
4912             ts = arg_temp(arg);
4913 
4914             /* ENV should not be modified.  */
4915             tcg_debug_assert(!temp_readonly(ts));
4916 
4917             switch (arg_ct->pair) {
4918             case 0: /* not paired */
4919                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4920                     reg = new_args[arg_ct->alias_index];
4921                 } else if (arg_ct->newreg) {
4922                     reg = tcg_reg_alloc(s, arg_ct->regs,
4923                                         i_allocated_regs | o_allocated_regs,
4924                                         output_pref(op, k), ts->indirect_base);
4925                 } else {
4926                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4927                                         output_pref(op, k), ts->indirect_base);
4928                 }
4929                 break;
4930 
4931             case 1: /* first of pair */
4932                 tcg_debug_assert(!arg_ct->newreg);
4933                 if (arg_ct->oalias) {
4934                     reg = new_args[arg_ct->alias_index];
4935                     break;
4936                 }
4937                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4938                                          output_pref(op, k), ts->indirect_base);
4939                 break;
4940 
4941             case 2: /* second of pair */
4942                 tcg_debug_assert(!arg_ct->newreg);
4943                 if (arg_ct->oalias) {
4944                     reg = new_args[arg_ct->alias_index];
4945                 } else {
4946                     reg = new_args[arg_ct->pair_index] + 1;
4947                 }
4948                 break;
4949 
4950             case 3: /* first of pair, aliasing with a second input */
4951                 tcg_debug_assert(!arg_ct->newreg);
4952                 reg = new_args[arg_ct->pair_index] - 1;
4953                 break;
4954 
4955             default:
4956                 g_assert_not_reached();
4957             }
4958             tcg_regset_set_reg(o_allocated_regs, reg);
4959             set_temp_val_reg(s, ts, reg);
4960             ts->mem_coherent = 0;
4961             new_args[i] = reg;
4962         }
4963     }
4964 
4965     /* emit instruction */
4966     switch (op->opc) {
4967     case INDEX_op_ext8s_i32:
4968         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4969         break;
4970     case INDEX_op_ext8s_i64:
4971         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4972         break;
4973     case INDEX_op_ext8u_i32:
4974     case INDEX_op_ext8u_i64:
4975         tcg_out_ext8u(s, new_args[0], new_args[1]);
4976         break;
4977     case INDEX_op_ext16s_i32:
4978         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4979         break;
4980     case INDEX_op_ext16s_i64:
4981         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4982         break;
4983     case INDEX_op_ext16u_i32:
4984     case INDEX_op_ext16u_i64:
4985         tcg_out_ext16u(s, new_args[0], new_args[1]);
4986         break;
4987     case INDEX_op_ext32s_i64:
4988         tcg_out_ext32s(s, new_args[0], new_args[1]);
4989         break;
4990     case INDEX_op_ext32u_i64:
4991         tcg_out_ext32u(s, new_args[0], new_args[1]);
4992         break;
4993     case INDEX_op_ext_i32_i64:
4994         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4995         break;
4996     case INDEX_op_extu_i32_i64:
4997         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4998         break;
4999     case INDEX_op_extrl_i64_i32:
5000         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5001         break;
5002     default:
5003         if (def->flags & TCG_OPF_VECTOR) {
5004             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5005                            new_args, const_args);
5006         } else {
5007             tcg_out_op(s, op->opc, new_args, const_args);
5008         }
5009         break;
5010     }
5011 
5012     /* move the outputs in the correct register if needed */
5013     for(i = 0; i < nb_oargs; i++) {
5014         ts = arg_temp(op->args[i]);
5015 
5016         /* ENV should not be modified.  */
5017         tcg_debug_assert(!temp_readonly(ts));
5018 
5019         if (NEED_SYNC_ARG(i)) {
5020             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5021         } else if (IS_DEAD_ARG(i)) {
5022             temp_dead(s, ts);
5023         }
5024     }
5025 }
5026 
5027 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5028 {
5029     const TCGLifeData arg_life = op->life;
5030     TCGTemp *ots, *itsl, *itsh;
5031     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5032 
5033     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5034     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5035     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5036 
5037     ots = arg_temp(op->args[0]);
5038     itsl = arg_temp(op->args[1]);
5039     itsh = arg_temp(op->args[2]);
5040 
5041     /* ENV should not be modified.  */
5042     tcg_debug_assert(!temp_readonly(ots));
5043 
5044     /* Allocate the output register now.  */
5045     if (ots->val_type != TEMP_VAL_REG) {
5046         TCGRegSet allocated_regs = s->reserved_regs;
5047         TCGRegSet dup_out_regs =
5048             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5049         TCGReg oreg;
5050 
5051         /* Make sure to not spill the input registers. */
5052         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5053             tcg_regset_set_reg(allocated_regs, itsl->reg);
5054         }
5055         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5056             tcg_regset_set_reg(allocated_regs, itsh->reg);
5057         }
5058 
5059         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5060                              output_pref(op, 0), ots->indirect_base);
5061         set_temp_val_reg(s, ots, oreg);
5062     }
5063 
5064     /* Promote dup2 of immediates to dupi_vec. */
5065     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5066         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5067         MemOp vece = MO_64;
5068 
5069         if (val == dup_const(MO_8, val)) {
5070             vece = MO_8;
5071         } else if (val == dup_const(MO_16, val)) {
5072             vece = MO_16;
5073         } else if (val == dup_const(MO_32, val)) {
5074             vece = MO_32;
5075         }
5076 
5077         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5078         goto done;
5079     }
5080 
5081     /* If the two inputs form one 64-bit value, try dupm_vec. */
5082     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5083         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5084         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5085         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5086 
5087         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5088         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5089 
5090         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5091                              its->mem_base->reg, its->mem_offset)) {
5092             goto done;
5093         }
5094     }
5095 
5096     /* Fall back to generic expansion. */
5097     return false;
5098 
5099  done:
5100     ots->mem_coherent = 0;
5101     if (IS_DEAD_ARG(1)) {
5102         temp_dead(s, itsl);
5103     }
5104     if (IS_DEAD_ARG(2)) {
5105         temp_dead(s, itsh);
5106     }
5107     if (NEED_SYNC_ARG(0)) {
5108         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5109     } else if (IS_DEAD_ARG(0)) {
5110         temp_dead(s, ots);
5111     }
5112     return true;
5113 }
5114 
5115 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5116                          TCGRegSet allocated_regs)
5117 {
5118     if (ts->val_type == TEMP_VAL_REG) {
5119         if (ts->reg != reg) {
5120             tcg_reg_free(s, reg, allocated_regs);
5121             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5122                 /*
5123                  * Cross register class move not supported.  Sync the
5124                  * temp back to its slot and load from there.
5125                  */
5126                 temp_sync(s, ts, allocated_regs, 0, 0);
5127                 tcg_out_ld(s, ts->type, reg,
5128                            ts->mem_base->reg, ts->mem_offset);
5129             }
5130         }
5131     } else {
5132         TCGRegSet arg_set = 0;
5133 
5134         tcg_reg_free(s, reg, allocated_regs);
5135         tcg_regset_set_reg(arg_set, reg);
5136         temp_load(s, ts, arg_set, allocated_regs, 0);
5137     }
5138 }
5139 
5140 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5141                          TCGRegSet allocated_regs)
5142 {
5143     /*
5144      * When the destination is on the stack, load up the temp and store.
5145      * If there are many call-saved registers, the temp might live to
5146      * see another use; otherwise it'll be discarded.
5147      */
5148     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5149     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5150                arg_slot_stk_ofs(arg_slot));
5151 }
5152 
5153 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5154                             TCGTemp *ts, TCGRegSet *allocated_regs)
5155 {
5156     if (arg_slot_reg_p(l->arg_slot)) {
5157         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5158         load_arg_reg(s, reg, ts, *allocated_regs);
5159         tcg_regset_set_reg(*allocated_regs, reg);
5160     } else {
5161         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5162     }
5163 }
5164 
5165 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5166                          intptr_t ref_off, TCGRegSet *allocated_regs)
5167 {
5168     TCGReg reg;
5169 
5170     if (arg_slot_reg_p(arg_slot)) {
5171         reg = tcg_target_call_iarg_regs[arg_slot];
5172         tcg_reg_free(s, reg, *allocated_regs);
5173         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5174         tcg_regset_set_reg(*allocated_regs, reg);
5175     } else {
5176         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5177                             *allocated_regs, 0, false);
5178         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5179         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5180                    arg_slot_stk_ofs(arg_slot));
5181     }
5182 }
5183 
5184 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5185 {
5186     const int nb_oargs = TCGOP_CALLO(op);
5187     const int nb_iargs = TCGOP_CALLI(op);
5188     const TCGLifeData arg_life = op->life;
5189     const TCGHelperInfo *info = tcg_call_info(op);
5190     TCGRegSet allocated_regs = s->reserved_regs;
5191     int i;
5192 
5193     /*
5194      * Move inputs into place in reverse order,
5195      * so that we place stacked arguments first.
5196      */
5197     for (i = nb_iargs - 1; i >= 0; --i) {
5198         const TCGCallArgumentLoc *loc = &info->in[i];
5199         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5200 
5201         switch (loc->kind) {
5202         case TCG_CALL_ARG_NORMAL:
5203         case TCG_CALL_ARG_EXTEND_U:
5204         case TCG_CALL_ARG_EXTEND_S:
5205             load_arg_normal(s, loc, ts, &allocated_regs);
5206             break;
5207         case TCG_CALL_ARG_BY_REF:
5208             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5209             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5210                          arg_slot_stk_ofs(loc->ref_slot),
5211                          &allocated_regs);
5212             break;
5213         case TCG_CALL_ARG_BY_REF_N:
5214             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5215             break;
5216         default:
5217             g_assert_not_reached();
5218         }
5219     }
5220 
5221     /* Mark dead temporaries and free the associated registers.  */
5222     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5223         if (IS_DEAD_ARG(i)) {
5224             temp_dead(s, arg_temp(op->args[i]));
5225         }
5226     }
5227 
5228     /* Clobber call registers.  */
5229     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5230         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5231             tcg_reg_free(s, i, allocated_regs);
5232         }
5233     }
5234 
5235     /*
5236      * Save globals if they might be written by the helper,
5237      * sync them if they might be read.
5238      */
5239     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5240         /* Nothing to do */
5241     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5242         sync_globals(s, allocated_regs);
5243     } else {
5244         save_globals(s, allocated_regs);
5245     }
5246 
5247     /*
5248      * If the ABI passes a pointer to the returned struct as the first
5249      * argument, load that now.  Pass a pointer to the output home slot.
5250      */
5251     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5252         TCGTemp *ts = arg_temp(op->args[0]);
5253 
5254         if (!ts->mem_allocated) {
5255             temp_allocate_frame(s, ts);
5256         }
5257         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5258     }
5259 
5260     tcg_out_call(s, tcg_call_func(op), info);
5261 
5262     /* Assign output registers and emit moves if needed.  */
5263     switch (info->out_kind) {
5264     case TCG_CALL_RET_NORMAL:
5265         for (i = 0; i < nb_oargs; i++) {
5266             TCGTemp *ts = arg_temp(op->args[i]);
5267             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5268 
5269             /* ENV should not be modified.  */
5270             tcg_debug_assert(!temp_readonly(ts));
5271 
5272             set_temp_val_reg(s, ts, reg);
5273             ts->mem_coherent = 0;
5274         }
5275         break;
5276 
5277     case TCG_CALL_RET_BY_VEC:
5278         {
5279             TCGTemp *ts = arg_temp(op->args[0]);
5280 
5281             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5282             tcg_debug_assert(ts->temp_subindex == 0);
5283             if (!ts->mem_allocated) {
5284                 temp_allocate_frame(s, ts);
5285             }
5286             tcg_out_st(s, TCG_TYPE_V128,
5287                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5288                        ts->mem_base->reg, ts->mem_offset);
5289         }
5290         /* fall through to mark all parts in memory */
5291 
5292     case TCG_CALL_RET_BY_REF:
5293         /* The callee has performed a write through the reference. */
5294         for (i = 0; i < nb_oargs; i++) {
5295             TCGTemp *ts = arg_temp(op->args[i]);
5296             ts->val_type = TEMP_VAL_MEM;
5297         }
5298         break;
5299 
5300     default:
5301         g_assert_not_reached();
5302     }
5303 
5304     /* Flush or discard output registers as needed. */
5305     for (i = 0; i < nb_oargs; i++) {
5306         TCGTemp *ts = arg_temp(op->args[i]);
5307         if (NEED_SYNC_ARG(i)) {
5308             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5309         } else if (IS_DEAD_ARG(i)) {
5310             temp_dead(s, ts);
5311         }
5312     }
5313 }
5314 
5315 /**
5316  * atom_and_align_for_opc:
5317  * @s: tcg context
5318  * @opc: memory operation code
5319  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5320  * @allow_two_ops: true if we are prepared to issue two operations
5321  *
5322  * Return the alignment and atomicity to use for the inline fast path
5323  * for the given memory operation.  The alignment may be larger than
5324  * that specified in @opc, and the correct alignment will be diagnosed
5325  * by the slow path helper.
5326  *
5327  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5328  * and issue two loads or stores for subalignment.
5329  */
5330 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5331                                            MemOp host_atom, bool allow_two_ops)
5332 {
5333     MemOp align = get_alignment_bits(opc);
5334     MemOp size = opc & MO_SIZE;
5335     MemOp half = size ? size - 1 : 0;
5336     MemOp atmax;
5337     MemOp atom;
5338 
5339     /* When serialized, no further atomicity required.  */
5340     if (s->gen_tb->cflags & CF_PARALLEL) {
5341         atom = opc & MO_ATOM_MASK;
5342     } else {
5343         atom = MO_ATOM_NONE;
5344     }
5345 
5346     switch (atom) {
5347     case MO_ATOM_NONE:
5348         /* The operation requires no specific atomicity. */
5349         atmax = MO_8;
5350         break;
5351 
5352     case MO_ATOM_IFALIGN:
5353         atmax = size;
5354         break;
5355 
5356     case MO_ATOM_IFALIGN_PAIR:
5357         atmax = half;
5358         break;
5359 
5360     case MO_ATOM_WITHIN16:
5361         atmax = size;
5362         if (size == MO_128) {
5363             /* Misalignment implies !within16, and therefore no atomicity. */
5364         } else if (host_atom != MO_ATOM_WITHIN16) {
5365             /* The host does not implement within16, so require alignment. */
5366             align = MAX(align, size);
5367         }
5368         break;
5369 
5370     case MO_ATOM_WITHIN16_PAIR:
5371         atmax = size;
5372         /*
5373          * Misalignment implies !within16, and therefore half atomicity.
5374          * Any host prepared for two operations can implement this with
5375          * half alignment.
5376          */
5377         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5378             align = MAX(align, half);
5379         }
5380         break;
5381 
5382     case MO_ATOM_SUBALIGN:
5383         atmax = size;
5384         if (host_atom != MO_ATOM_SUBALIGN) {
5385             /* If unaligned but not odd, there are subobjects up to half. */
5386             if (allow_two_ops) {
5387                 align = MAX(align, half);
5388             } else {
5389                 align = MAX(align, size);
5390             }
5391         }
5392         break;
5393 
5394     default:
5395         g_assert_not_reached();
5396     }
5397 
5398     return (TCGAtomAlign){ .atom = atmax, .align = align };
5399 }
5400 
5401 /*
5402  * Similarly for qemu_ld/st slow path helpers.
5403  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5404  * using only the provided backend tcg_out_* functions.
5405  */
5406 
5407 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5408 {
5409     int ofs = arg_slot_stk_ofs(slot);
5410 
5411     /*
5412      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5413      * require extension to uint64_t, adjust the address for uint32_t.
5414      */
5415     if (HOST_BIG_ENDIAN &&
5416         TCG_TARGET_REG_BITS == 64 &&
5417         type == TCG_TYPE_I32) {
5418         ofs += 4;
5419     }
5420     return ofs;
5421 }
5422 
5423 static void tcg_out_helper_load_slots(TCGContext *s,
5424                                       unsigned nmov, TCGMovExtend *mov,
5425                                       const TCGLdstHelperParam *parm)
5426 {
5427     unsigned i;
5428     TCGReg dst3;
5429 
5430     /*
5431      * Start from the end, storing to the stack first.
5432      * This frees those registers, so we need not consider overlap.
5433      */
5434     for (i = nmov; i-- > 0; ) {
5435         unsigned slot = mov[i].dst;
5436 
5437         if (arg_slot_reg_p(slot)) {
5438             goto found_reg;
5439         }
5440 
5441         TCGReg src = mov[i].src;
5442         TCGType dst_type = mov[i].dst_type;
5443         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5444 
5445         /* The argument is going onto the stack; extend into scratch. */
5446         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5447             tcg_debug_assert(parm->ntmp != 0);
5448             mov[i].dst = src = parm->tmp[0];
5449             tcg_out_movext1(s, &mov[i]);
5450         }
5451 
5452         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5453                    tcg_out_helper_stk_ofs(dst_type, slot));
5454     }
5455     return;
5456 
5457  found_reg:
5458     /*
5459      * The remaining arguments are in registers.
5460      * Convert slot numbers to argument registers.
5461      */
5462     nmov = i + 1;
5463     for (i = 0; i < nmov; ++i) {
5464         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5465     }
5466 
5467     switch (nmov) {
5468     case 4:
5469         /* The backend must have provided enough temps for the worst case. */
5470         tcg_debug_assert(parm->ntmp >= 2);
5471 
5472         dst3 = mov[3].dst;
5473         for (unsigned j = 0; j < 3; ++j) {
5474             if (dst3 == mov[j].src) {
5475                 /*
5476                  * Conflict. Copy the source to a temporary, perform the
5477                  * remaining moves, then the extension from our scratch
5478                  * on the way out.
5479                  */
5480                 TCGReg scratch = parm->tmp[1];
5481 
5482                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5483                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5484                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5485                 break;
5486             }
5487         }
5488 
5489         /* No conflicts: perform this move and continue. */
5490         tcg_out_movext1(s, &mov[3]);
5491         /* fall through */
5492 
5493     case 3:
5494         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5495                         parm->ntmp ? parm->tmp[0] : -1);
5496         break;
5497     case 2:
5498         tcg_out_movext2(s, mov, mov + 1,
5499                         parm->ntmp ? parm->tmp[0] : -1);
5500         break;
5501     case 1:
5502         tcg_out_movext1(s, mov);
5503         break;
5504     default:
5505         g_assert_not_reached();
5506     }
5507 }
5508 
5509 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5510                                     TCGType type, tcg_target_long imm,
5511                                     const TCGLdstHelperParam *parm)
5512 {
5513     if (arg_slot_reg_p(slot)) {
5514         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5515     } else {
5516         int ofs = tcg_out_helper_stk_ofs(type, slot);
5517         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5518             tcg_debug_assert(parm->ntmp != 0);
5519             tcg_out_movi(s, type, parm->tmp[0], imm);
5520             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5521         }
5522     }
5523 }
5524 
5525 static void tcg_out_helper_load_common_args(TCGContext *s,
5526                                             const TCGLabelQemuLdst *ldst,
5527                                             const TCGLdstHelperParam *parm,
5528                                             const TCGHelperInfo *info,
5529                                             unsigned next_arg)
5530 {
5531     TCGMovExtend ptr_mov = {
5532         .dst_type = TCG_TYPE_PTR,
5533         .src_type = TCG_TYPE_PTR,
5534         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5535     };
5536     const TCGCallArgumentLoc *loc = &info->in[0];
5537     TCGType type;
5538     unsigned slot;
5539     tcg_target_ulong imm;
5540 
5541     /*
5542      * Handle env, which is always first.
5543      */
5544     ptr_mov.dst = loc->arg_slot;
5545     ptr_mov.src = TCG_AREG0;
5546     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5547 
5548     /*
5549      * Handle oi.
5550      */
5551     imm = ldst->oi;
5552     loc = &info->in[next_arg];
5553     type = TCG_TYPE_I32;
5554     switch (loc->kind) {
5555     case TCG_CALL_ARG_NORMAL:
5556         break;
5557     case TCG_CALL_ARG_EXTEND_U:
5558     case TCG_CALL_ARG_EXTEND_S:
5559         /* No extension required for MemOpIdx. */
5560         tcg_debug_assert(imm <= INT32_MAX);
5561         type = TCG_TYPE_REG;
5562         break;
5563     default:
5564         g_assert_not_reached();
5565     }
5566     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5567     next_arg++;
5568 
5569     /*
5570      * Handle ra.
5571      */
5572     loc = &info->in[next_arg];
5573     slot = loc->arg_slot;
5574     if (parm->ra_gen) {
5575         int arg_reg = -1;
5576         TCGReg ra_reg;
5577 
5578         if (arg_slot_reg_p(slot)) {
5579             arg_reg = tcg_target_call_iarg_regs[slot];
5580         }
5581         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5582 
5583         ptr_mov.dst = slot;
5584         ptr_mov.src = ra_reg;
5585         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5586     } else {
5587         imm = (uintptr_t)ldst->raddr;
5588         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5589     }
5590 }
5591 
5592 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5593                                        const TCGCallArgumentLoc *loc,
5594                                        TCGType dst_type, TCGType src_type,
5595                                        TCGReg lo, TCGReg hi)
5596 {
5597     MemOp reg_mo;
5598 
5599     if (dst_type <= TCG_TYPE_REG) {
5600         MemOp src_ext;
5601 
5602         switch (loc->kind) {
5603         case TCG_CALL_ARG_NORMAL:
5604             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5605             break;
5606         case TCG_CALL_ARG_EXTEND_U:
5607             dst_type = TCG_TYPE_REG;
5608             src_ext = MO_UL;
5609             break;
5610         case TCG_CALL_ARG_EXTEND_S:
5611             dst_type = TCG_TYPE_REG;
5612             src_ext = MO_SL;
5613             break;
5614         default:
5615             g_assert_not_reached();
5616         }
5617 
5618         mov[0].dst = loc->arg_slot;
5619         mov[0].dst_type = dst_type;
5620         mov[0].src = lo;
5621         mov[0].src_type = src_type;
5622         mov[0].src_ext = src_ext;
5623         return 1;
5624     }
5625 
5626     if (TCG_TARGET_REG_BITS == 32) {
5627         assert(dst_type == TCG_TYPE_I64);
5628         reg_mo = MO_32;
5629     } else {
5630         assert(dst_type == TCG_TYPE_I128);
5631         reg_mo = MO_64;
5632     }
5633 
5634     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5635     mov[0].src = lo;
5636     mov[0].dst_type = TCG_TYPE_REG;
5637     mov[0].src_type = TCG_TYPE_REG;
5638     mov[0].src_ext = reg_mo;
5639 
5640     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5641     mov[1].src = hi;
5642     mov[1].dst_type = TCG_TYPE_REG;
5643     mov[1].src_type = TCG_TYPE_REG;
5644     mov[1].src_ext = reg_mo;
5645 
5646     return 2;
5647 }
5648 
5649 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5650                                    const TCGLdstHelperParam *parm)
5651 {
5652     const TCGHelperInfo *info;
5653     const TCGCallArgumentLoc *loc;
5654     TCGMovExtend mov[2];
5655     unsigned next_arg, nmov;
5656     MemOp mop = get_memop(ldst->oi);
5657 
5658     switch (mop & MO_SIZE) {
5659     case MO_8:
5660     case MO_16:
5661     case MO_32:
5662         info = &info_helper_ld32_mmu;
5663         break;
5664     case MO_64:
5665         info = &info_helper_ld64_mmu;
5666         break;
5667     case MO_128:
5668         info = &info_helper_ld128_mmu;
5669         break;
5670     default:
5671         g_assert_not_reached();
5672     }
5673 
5674     /* Defer env argument. */
5675     next_arg = 1;
5676 
5677     loc = &info->in[next_arg];
5678     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5679         /*
5680          * 32-bit host with 32-bit guest: zero-extend the guest address
5681          * to 64-bits for the helper by storing the low part, then
5682          * load a zero for the high part.
5683          */
5684         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5685                                TCG_TYPE_I32, TCG_TYPE_I32,
5686                                ldst->addrlo_reg, -1);
5687         tcg_out_helper_load_slots(s, 1, mov, parm);
5688 
5689         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5690                                 TCG_TYPE_I32, 0, parm);
5691         next_arg += 2;
5692     } else {
5693         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5694                                       ldst->addrlo_reg, ldst->addrhi_reg);
5695         tcg_out_helper_load_slots(s, nmov, mov, parm);
5696         next_arg += nmov;
5697     }
5698 
5699     switch (info->out_kind) {
5700     case TCG_CALL_RET_NORMAL:
5701     case TCG_CALL_RET_BY_VEC:
5702         break;
5703     case TCG_CALL_RET_BY_REF:
5704         /*
5705          * The return reference is in the first argument slot.
5706          * We need memory in which to return: re-use the top of stack.
5707          */
5708         {
5709             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5710 
5711             if (arg_slot_reg_p(0)) {
5712                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5713                                  TCG_REG_CALL_STACK, ofs_slot0);
5714             } else {
5715                 tcg_debug_assert(parm->ntmp != 0);
5716                 tcg_out_addi_ptr(s, parm->tmp[0],
5717                                  TCG_REG_CALL_STACK, ofs_slot0);
5718                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5719                            TCG_REG_CALL_STACK, ofs_slot0);
5720             }
5721         }
5722         break;
5723     default:
5724         g_assert_not_reached();
5725     }
5726 
5727     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5728 }
5729 
5730 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5731                                   bool load_sign,
5732                                   const TCGLdstHelperParam *parm)
5733 {
5734     MemOp mop = get_memop(ldst->oi);
5735     TCGMovExtend mov[2];
5736     int ofs_slot0;
5737 
5738     switch (ldst->type) {
5739     case TCG_TYPE_I64:
5740         if (TCG_TARGET_REG_BITS == 32) {
5741             break;
5742         }
5743         /* fall through */
5744 
5745     case TCG_TYPE_I32:
5746         mov[0].dst = ldst->datalo_reg;
5747         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5748         mov[0].dst_type = ldst->type;
5749         mov[0].src_type = TCG_TYPE_REG;
5750 
5751         /*
5752          * If load_sign, then we allowed the helper to perform the
5753          * appropriate sign extension to tcg_target_ulong, and all
5754          * we need now is a plain move.
5755          *
5756          * If they do not, then we expect the relevant extension
5757          * instruction to be no more expensive than a move, and
5758          * we thus save the icache etc by only using one of two
5759          * helper functions.
5760          */
5761         if (load_sign || !(mop & MO_SIGN)) {
5762             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5763                 mov[0].src_ext = MO_32;
5764             } else {
5765                 mov[0].src_ext = MO_64;
5766             }
5767         } else {
5768             mov[0].src_ext = mop & MO_SSIZE;
5769         }
5770         tcg_out_movext1(s, mov);
5771         return;
5772 
5773     case TCG_TYPE_I128:
5774         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5775         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5776         switch (TCG_TARGET_CALL_RET_I128) {
5777         case TCG_CALL_RET_NORMAL:
5778             break;
5779         case TCG_CALL_RET_BY_VEC:
5780             tcg_out_st(s, TCG_TYPE_V128,
5781                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5782                        TCG_REG_CALL_STACK, ofs_slot0);
5783             /* fall through */
5784         case TCG_CALL_RET_BY_REF:
5785             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5786                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5787             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5788                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5789             return;
5790         default:
5791             g_assert_not_reached();
5792         }
5793         break;
5794 
5795     default:
5796         g_assert_not_reached();
5797     }
5798 
5799     mov[0].dst = ldst->datalo_reg;
5800     mov[0].src =
5801         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5802     mov[0].dst_type = TCG_TYPE_REG;
5803     mov[0].src_type = TCG_TYPE_REG;
5804     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5805 
5806     mov[1].dst = ldst->datahi_reg;
5807     mov[1].src =
5808         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5809     mov[1].dst_type = TCG_TYPE_REG;
5810     mov[1].src_type = TCG_TYPE_REG;
5811     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5812 
5813     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5814 }
5815 
5816 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5817                                    const TCGLdstHelperParam *parm)
5818 {
5819     const TCGHelperInfo *info;
5820     const TCGCallArgumentLoc *loc;
5821     TCGMovExtend mov[4];
5822     TCGType data_type;
5823     unsigned next_arg, nmov, n;
5824     MemOp mop = get_memop(ldst->oi);
5825 
5826     switch (mop & MO_SIZE) {
5827     case MO_8:
5828     case MO_16:
5829     case MO_32:
5830         info = &info_helper_st32_mmu;
5831         data_type = TCG_TYPE_I32;
5832         break;
5833     case MO_64:
5834         info = &info_helper_st64_mmu;
5835         data_type = TCG_TYPE_I64;
5836         break;
5837     case MO_128:
5838         info = &info_helper_st128_mmu;
5839         data_type = TCG_TYPE_I128;
5840         break;
5841     default:
5842         g_assert_not_reached();
5843     }
5844 
5845     /* Defer env argument. */
5846     next_arg = 1;
5847     nmov = 0;
5848 
5849     /* Handle addr argument. */
5850     loc = &info->in[next_arg];
5851     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5852         /*
5853          * 32-bit host with 32-bit guest: zero-extend the guest address
5854          * to 64-bits for the helper by storing the low part.  Later,
5855          * after we have processed the register inputs, we will load a
5856          * zero for the high part.
5857          */
5858         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5859                                TCG_TYPE_I32, TCG_TYPE_I32,
5860                                ldst->addrlo_reg, -1);
5861         next_arg += 2;
5862         nmov += 1;
5863     } else {
5864         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5865                                    ldst->addrlo_reg, ldst->addrhi_reg);
5866         next_arg += n;
5867         nmov += n;
5868     }
5869 
5870     /* Handle data argument. */
5871     loc = &info->in[next_arg];
5872     switch (loc->kind) {
5873     case TCG_CALL_ARG_NORMAL:
5874     case TCG_CALL_ARG_EXTEND_U:
5875     case TCG_CALL_ARG_EXTEND_S:
5876         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5877                                    ldst->datalo_reg, ldst->datahi_reg);
5878         next_arg += n;
5879         nmov += n;
5880         tcg_out_helper_load_slots(s, nmov, mov, parm);
5881         break;
5882 
5883     case TCG_CALL_ARG_BY_REF:
5884         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5885         tcg_debug_assert(data_type == TCG_TYPE_I128);
5886         tcg_out_st(s, TCG_TYPE_I64,
5887                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5888                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5889         tcg_out_st(s, TCG_TYPE_I64,
5890                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5891                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5892 
5893         tcg_out_helper_load_slots(s, nmov, mov, parm);
5894 
5895         if (arg_slot_reg_p(loc->arg_slot)) {
5896             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5897                              TCG_REG_CALL_STACK,
5898                              arg_slot_stk_ofs(loc->ref_slot));
5899         } else {
5900             tcg_debug_assert(parm->ntmp != 0);
5901             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5902                              arg_slot_stk_ofs(loc->ref_slot));
5903             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5904                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5905         }
5906         next_arg += 2;
5907         break;
5908 
5909     default:
5910         g_assert_not_reached();
5911     }
5912 
5913     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5914         /* Zero extend the address by loading a zero for the high part. */
5915         loc = &info->in[1 + !HOST_BIG_ENDIAN];
5916         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5917     }
5918 
5919     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5920 }
5921 
5922 void tcg_dump_op_count(GString *buf)
5923 {
5924     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5925 }
5926 
5927 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5928 {
5929     int i, start_words, num_insns;
5930     TCGOp *op;
5931 
5932     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5933                  && qemu_log_in_addr_range(pc_start))) {
5934         FILE *logfile = qemu_log_trylock();
5935         if (logfile) {
5936             fprintf(logfile, "OP:\n");
5937             tcg_dump_ops(s, logfile, false);
5938             fprintf(logfile, "\n");
5939             qemu_log_unlock(logfile);
5940         }
5941     }
5942 
5943 #ifdef CONFIG_DEBUG_TCG
5944     /* Ensure all labels referenced have been emitted.  */
5945     {
5946         TCGLabel *l;
5947         bool error = false;
5948 
5949         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5950             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5951                 qemu_log_mask(CPU_LOG_TB_OP,
5952                               "$L%d referenced but not present.\n", l->id);
5953                 error = true;
5954             }
5955         }
5956         assert(!error);
5957     }
5958 #endif
5959 
5960     tcg_optimize(s);
5961 
5962     reachable_code_pass(s);
5963     liveness_pass_0(s);
5964     liveness_pass_1(s);
5965 
5966     if (s->nb_indirects > 0) {
5967         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5968                      && qemu_log_in_addr_range(pc_start))) {
5969             FILE *logfile = qemu_log_trylock();
5970             if (logfile) {
5971                 fprintf(logfile, "OP before indirect lowering:\n");
5972                 tcg_dump_ops(s, logfile, false);
5973                 fprintf(logfile, "\n");
5974                 qemu_log_unlock(logfile);
5975             }
5976         }
5977 
5978         /* Replace indirect temps with direct temps.  */
5979         if (liveness_pass_2(s)) {
5980             /* If changes were made, re-run liveness.  */
5981             liveness_pass_1(s);
5982         }
5983     }
5984 
5985     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5986                  && qemu_log_in_addr_range(pc_start))) {
5987         FILE *logfile = qemu_log_trylock();
5988         if (logfile) {
5989             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5990             tcg_dump_ops(s, logfile, true);
5991             fprintf(logfile, "\n");
5992             qemu_log_unlock(logfile);
5993         }
5994     }
5995 
5996     /* Initialize goto_tb jump offsets. */
5997     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5998     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5999     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6000     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6001 
6002     tcg_reg_alloc_start(s);
6003 
6004     /*
6005      * Reset the buffer pointers when restarting after overflow.
6006      * TODO: Move this into translate-all.c with the rest of the
6007      * buffer management.  Having only this done here is confusing.
6008      */
6009     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6010     s->code_ptr = s->code_buf;
6011 
6012 #ifdef TCG_TARGET_NEED_LDST_LABELS
6013     QSIMPLEQ_INIT(&s->ldst_labels);
6014 #endif
6015 #ifdef TCG_TARGET_NEED_POOL_LABELS
6016     s->pool_labels = NULL;
6017 #endif
6018 
6019     start_words = s->insn_start_words;
6020     s->gen_insn_data =
6021         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6022 
6023     tcg_out_tb_start(s);
6024 
6025     num_insns = -1;
6026     QTAILQ_FOREACH(op, &s->ops, link) {
6027         TCGOpcode opc = op->opc;
6028 
6029         switch (opc) {
6030         case INDEX_op_mov_i32:
6031         case INDEX_op_mov_i64:
6032         case INDEX_op_mov_vec:
6033             tcg_reg_alloc_mov(s, op);
6034             break;
6035         case INDEX_op_dup_vec:
6036             tcg_reg_alloc_dup(s, op);
6037             break;
6038         case INDEX_op_insn_start:
6039             if (num_insns >= 0) {
6040                 size_t off = tcg_current_code_size(s);
6041                 s->gen_insn_end_off[num_insns] = off;
6042                 /* Assert that we do not overflow our stored offset.  */
6043                 assert(s->gen_insn_end_off[num_insns] == off);
6044             }
6045             num_insns++;
6046             for (i = 0; i < start_words; ++i) {
6047                 s->gen_insn_data[num_insns * start_words + i] =
6048                     tcg_get_insn_start_param(op, i);
6049             }
6050             break;
6051         case INDEX_op_discard:
6052             temp_dead(s, arg_temp(op->args[0]));
6053             break;
6054         case INDEX_op_set_label:
6055             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6056             tcg_out_label(s, arg_label(op->args[0]));
6057             break;
6058         case INDEX_op_call:
6059             tcg_reg_alloc_call(s, op);
6060             break;
6061         case INDEX_op_exit_tb:
6062             tcg_out_exit_tb(s, op->args[0]);
6063             break;
6064         case INDEX_op_goto_tb:
6065             tcg_out_goto_tb(s, op->args[0]);
6066             break;
6067         case INDEX_op_dup2_vec:
6068             if (tcg_reg_alloc_dup2(s, op)) {
6069                 break;
6070             }
6071             /* fall through */
6072         default:
6073             /* Sanity check that we've not introduced any unhandled opcodes. */
6074             tcg_debug_assert(tcg_op_supported(opc));
6075             /* Note: in order to speed up the code, it would be much
6076                faster to have specialized register allocator functions for
6077                some common argument patterns */
6078             tcg_reg_alloc_op(s, op);
6079             break;
6080         }
6081         /* Test for (pending) buffer overflow.  The assumption is that any
6082            one operation beginning below the high water mark cannot overrun
6083            the buffer completely.  Thus we can test for overflow after
6084            generating code without having to check during generation.  */
6085         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6086             return -1;
6087         }
6088         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6089         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6090             return -2;
6091         }
6092     }
6093     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6094     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6095 
6096     /* Generate TB finalization at the end of block */
6097 #ifdef TCG_TARGET_NEED_LDST_LABELS
6098     i = tcg_out_ldst_finalize(s);
6099     if (i < 0) {
6100         return i;
6101     }
6102 #endif
6103 #ifdef TCG_TARGET_NEED_POOL_LABELS
6104     i = tcg_out_pool_finalize(s);
6105     if (i < 0) {
6106         return i;
6107     }
6108 #endif
6109     if (!tcg_resolve_relocs(s)) {
6110         return -2;
6111     }
6112 
6113 #ifndef CONFIG_TCG_INTERPRETER
6114     /* flush instruction cache */
6115     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6116                         (uintptr_t)s->code_buf,
6117                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6118 #endif
6119 
6120     return tcg_current_code_size(s);
6121 }
6122 
6123 void tcg_dump_info(GString *buf)
6124 {
6125     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6126 }
6127 
6128 #ifdef ELF_HOST_MACHINE
6129 /* In order to use this feature, the backend needs to do three things:
6130 
6131    (1) Define ELF_HOST_MACHINE to indicate both what value to
6132        put into the ELF image and to indicate support for the feature.
6133 
6134    (2) Define tcg_register_jit.  This should create a buffer containing
6135        the contents of a .debug_frame section that describes the post-
6136        prologue unwind info for the tcg machine.
6137 
6138    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6139 */
6140 
6141 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6142 typedef enum {
6143     JIT_NOACTION = 0,
6144     JIT_REGISTER_FN,
6145     JIT_UNREGISTER_FN
6146 } jit_actions_t;
6147 
6148 struct jit_code_entry {
6149     struct jit_code_entry *next_entry;
6150     struct jit_code_entry *prev_entry;
6151     const void *symfile_addr;
6152     uint64_t symfile_size;
6153 };
6154 
6155 struct jit_descriptor {
6156     uint32_t version;
6157     uint32_t action_flag;
6158     struct jit_code_entry *relevant_entry;
6159     struct jit_code_entry *first_entry;
6160 };
6161 
6162 void __jit_debug_register_code(void) __attribute__((noinline));
6163 void __jit_debug_register_code(void)
6164 {
6165     asm("");
6166 }
6167 
6168 /* Must statically initialize the version, because GDB may check
6169    the version before we can set it.  */
6170 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6171 
6172 /* End GDB interface.  */
6173 
6174 static int find_string(const char *strtab, const char *str)
6175 {
6176     const char *p = strtab + 1;
6177 
6178     while (1) {
6179         if (strcmp(p, str) == 0) {
6180             return p - strtab;
6181         }
6182         p += strlen(p) + 1;
6183     }
6184 }
6185 
6186 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6187                                  const void *debug_frame,
6188                                  size_t debug_frame_size)
6189 {
6190     struct __attribute__((packed)) DebugInfo {
6191         uint32_t  len;
6192         uint16_t  version;
6193         uint32_t  abbrev;
6194         uint8_t   ptr_size;
6195         uint8_t   cu_die;
6196         uint16_t  cu_lang;
6197         uintptr_t cu_low_pc;
6198         uintptr_t cu_high_pc;
6199         uint8_t   fn_die;
6200         char      fn_name[16];
6201         uintptr_t fn_low_pc;
6202         uintptr_t fn_high_pc;
6203         uint8_t   cu_eoc;
6204     };
6205 
6206     struct ElfImage {
6207         ElfW(Ehdr) ehdr;
6208         ElfW(Phdr) phdr;
6209         ElfW(Shdr) shdr[7];
6210         ElfW(Sym)  sym[2];
6211         struct DebugInfo di;
6212         uint8_t    da[24];
6213         char       str[80];
6214     };
6215 
6216     struct ElfImage *img;
6217 
6218     static const struct ElfImage img_template = {
6219         .ehdr = {
6220             .e_ident[EI_MAG0] = ELFMAG0,
6221             .e_ident[EI_MAG1] = ELFMAG1,
6222             .e_ident[EI_MAG2] = ELFMAG2,
6223             .e_ident[EI_MAG3] = ELFMAG3,
6224             .e_ident[EI_CLASS] = ELF_CLASS,
6225             .e_ident[EI_DATA] = ELF_DATA,
6226             .e_ident[EI_VERSION] = EV_CURRENT,
6227             .e_type = ET_EXEC,
6228             .e_machine = ELF_HOST_MACHINE,
6229             .e_version = EV_CURRENT,
6230             .e_phoff = offsetof(struct ElfImage, phdr),
6231             .e_shoff = offsetof(struct ElfImage, shdr),
6232             .e_ehsize = sizeof(ElfW(Shdr)),
6233             .e_phentsize = sizeof(ElfW(Phdr)),
6234             .e_phnum = 1,
6235             .e_shentsize = sizeof(ElfW(Shdr)),
6236             .e_shnum = ARRAY_SIZE(img->shdr),
6237             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6238 #ifdef ELF_HOST_FLAGS
6239             .e_flags = ELF_HOST_FLAGS,
6240 #endif
6241 #ifdef ELF_OSABI
6242             .e_ident[EI_OSABI] = ELF_OSABI,
6243 #endif
6244         },
6245         .phdr = {
6246             .p_type = PT_LOAD,
6247             .p_flags = PF_X,
6248         },
6249         .shdr = {
6250             [0] = { .sh_type = SHT_NULL },
6251             /* Trick: The contents of code_gen_buffer are not present in
6252                this fake ELF file; that got allocated elsewhere.  Therefore
6253                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6254                will not look for contents.  We can record any address.  */
6255             [1] = { /* .text */
6256                 .sh_type = SHT_NOBITS,
6257                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6258             },
6259             [2] = { /* .debug_info */
6260                 .sh_type = SHT_PROGBITS,
6261                 .sh_offset = offsetof(struct ElfImage, di),
6262                 .sh_size = sizeof(struct DebugInfo),
6263             },
6264             [3] = { /* .debug_abbrev */
6265                 .sh_type = SHT_PROGBITS,
6266                 .sh_offset = offsetof(struct ElfImage, da),
6267                 .sh_size = sizeof(img->da),
6268             },
6269             [4] = { /* .debug_frame */
6270                 .sh_type = SHT_PROGBITS,
6271                 .sh_offset = sizeof(struct ElfImage),
6272             },
6273             [5] = { /* .symtab */
6274                 .sh_type = SHT_SYMTAB,
6275                 .sh_offset = offsetof(struct ElfImage, sym),
6276                 .sh_size = sizeof(img->sym),
6277                 .sh_info = 1,
6278                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6279                 .sh_entsize = sizeof(ElfW(Sym)),
6280             },
6281             [6] = { /* .strtab */
6282                 .sh_type = SHT_STRTAB,
6283                 .sh_offset = offsetof(struct ElfImage, str),
6284                 .sh_size = sizeof(img->str),
6285             }
6286         },
6287         .sym = {
6288             [1] = { /* code_gen_buffer */
6289                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6290                 .st_shndx = 1,
6291             }
6292         },
6293         .di = {
6294             .len = sizeof(struct DebugInfo) - 4,
6295             .version = 2,
6296             .ptr_size = sizeof(void *),
6297             .cu_die = 1,
6298             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6299             .fn_die = 2,
6300             .fn_name = "code_gen_buffer"
6301         },
6302         .da = {
6303             1,          /* abbrev number (the cu) */
6304             0x11, 1,    /* DW_TAG_compile_unit, has children */
6305             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6306             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6307             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6308             0, 0,       /* end of abbrev */
6309             2,          /* abbrev number (the fn) */
6310             0x2e, 0,    /* DW_TAG_subprogram, no children */
6311             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6312             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6313             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6314             0, 0,       /* end of abbrev */
6315             0           /* no more abbrev */
6316         },
6317         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6318                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6319     };
6320 
6321     /* We only need a single jit entry; statically allocate it.  */
6322     static struct jit_code_entry one_entry;
6323 
6324     uintptr_t buf = (uintptr_t)buf_ptr;
6325     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6326     DebugFrameHeader *dfh;
6327 
6328     img = g_malloc(img_size);
6329     *img = img_template;
6330 
6331     img->phdr.p_vaddr = buf;
6332     img->phdr.p_paddr = buf;
6333     img->phdr.p_memsz = buf_size;
6334 
6335     img->shdr[1].sh_name = find_string(img->str, ".text");
6336     img->shdr[1].sh_addr = buf;
6337     img->shdr[1].sh_size = buf_size;
6338 
6339     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6340     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6341 
6342     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6343     img->shdr[4].sh_size = debug_frame_size;
6344 
6345     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6346     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6347 
6348     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6349     img->sym[1].st_value = buf;
6350     img->sym[1].st_size = buf_size;
6351 
6352     img->di.cu_low_pc = buf;
6353     img->di.cu_high_pc = buf + buf_size;
6354     img->di.fn_low_pc = buf;
6355     img->di.fn_high_pc = buf + buf_size;
6356 
6357     dfh = (DebugFrameHeader *)(img + 1);
6358     memcpy(dfh, debug_frame, debug_frame_size);
6359     dfh->fde.func_start = buf;
6360     dfh->fde.func_len = buf_size;
6361 
6362 #ifdef DEBUG_JIT
6363     /* Enable this block to be able to debug the ELF image file creation.
6364        One can use readelf, objdump, or other inspection utilities.  */
6365     {
6366         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6367         FILE *f = fopen(jit, "w+b");
6368         if (f) {
6369             if (fwrite(img, img_size, 1, f) != img_size) {
6370                 /* Avoid stupid unused return value warning for fwrite.  */
6371             }
6372             fclose(f);
6373         }
6374     }
6375 #endif
6376 
6377     one_entry.symfile_addr = img;
6378     one_entry.symfile_size = img_size;
6379 
6380     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6381     __jit_debug_descriptor.relevant_entry = &one_entry;
6382     __jit_debug_descriptor.first_entry = &one_entry;
6383     __jit_debug_register_code();
6384 }
6385 #else
6386 /* No support for the feature.  Provide the entry point expected by exec.c,
6387    and implement the internal function we declared earlier.  */
6388 
6389 static void tcg_register_jit_int(const void *buf, size_t size,
6390                                  const void *debug_frame,
6391                                  size_t debug_frame_size)
6392 {
6393 }
6394 
6395 void tcg_register_jit(const void *buf, size_t buf_size)
6396 {
6397 }
6398 #endif /* ELF_HOST_MACHINE */
6399 
6400 #if !TCG_TARGET_MAYBE_vec
6401 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6402 {
6403     g_assert_not_reached();
6404 }
6405 #endif
6406