xref: /openbmc/qemu/tcg/tcg.c (revision e0c72452)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "accel/tcg/perf.h"
59 #ifdef CONFIG_USER_ONLY
60 #include "exec/user/guest-base.h"
61 #endif
62 
63 /* Forward declarations for functions declared in tcg-target.c.inc and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68                         intptr_t value, intptr_t addend);
69 
70 /* The CIE and FDE header definitions will be common to all hosts.  */
71 typedef struct {
72     uint32_t len __attribute__((aligned((sizeof(void *)))));
73     uint32_t id;
74     uint8_t version;
75     char augmentation[1];
76     uint8_t code_align;
77     uint8_t data_align;
78     uint8_t return_column;
79 } DebugFrameCIE;
80 
81 typedef struct QEMU_PACKED {
82     uint32_t len __attribute__((aligned((sizeof(void *)))));
83     uint32_t cie_offset;
84     uintptr_t func_start;
85     uintptr_t func_len;
86 } DebugFrameFDEHeader;
87 
88 typedef struct QEMU_PACKED {
89     DebugFrameCIE cie;
90     DebugFrameFDEHeader fde;
91 } DebugFrameHeader;
92 
93 typedef struct TCGLabelQemuLdst {
94     bool is_ld;             /* qemu_ld: true, qemu_st: false */
95     MemOpIdx oi;
96     TCGType type;           /* result type of a load */
97     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104 } TCGLabelQemuLdst;
105 
106 static void tcg_register_jit_int(const void *buf, size_t size,
107                                  const void *debug_frame,
108                                  size_t debug_frame_size)
109     __attribute__((unused));
110 
111 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
112 static void tcg_out_tb_start(TCGContext *s);
113 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114                        intptr_t arg2);
115 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116 static void tcg_out_movi(TCGContext *s, TCGType type,
117                          TCGReg ret, tcg_target_long arg);
118 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130 static void tcg_out_goto_tb(TCGContext *s, int which);
131 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132                        const TCGArg args[TCG_MAX_OP_ARGS],
133                        const int const_args[TCG_MAX_OP_ARGS]);
134 #if TCG_TARGET_MAYBE_vec
135 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136                             TCGReg dst, TCGReg src);
137 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, TCGReg base, intptr_t offset);
139 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140                              TCGReg dst, int64_t arg);
141 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142                            unsigned vecl, unsigned vece,
143                            const TCGArg args[TCG_MAX_OP_ARGS],
144                            const int const_args[TCG_MAX_OP_ARGS]);
145 #else
146 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147                                    TCGReg dst, TCGReg src)
148 {
149     g_assert_not_reached();
150 }
151 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152                                     TCGReg dst, TCGReg base, intptr_t offset)
153 {
154     g_assert_not_reached();
155 }
156 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, int64_t arg)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162                                   unsigned vecl, unsigned vece,
163                                   const TCGArg args[TCG_MAX_OP_ARGS],
164                                   const int const_args[TCG_MAX_OP_ARGS])
165 {
166     g_assert_not_reached();
167 }
168 #endif
169 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170                        intptr_t arg2);
171 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172                         TCGReg base, intptr_t ofs);
173 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174                          const TCGHelperInfo *info);
175 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176 static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
177 #ifdef TCG_TARGET_NEED_LDST_LABELS
178 static int tcg_out_ldst_finalize(TCGContext *s);
179 #endif
180 
181 typedef struct TCGLdstHelperParam {
182     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
183     unsigned ntmp;
184     int tmp[3];
185 } TCGLdstHelperParam;
186 
187 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
188                                    const TCGLdstHelperParam *p)
189     __attribute__((unused));
190 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
191                                   bool load_sign, const TCGLdstHelperParam *p)
192     __attribute__((unused));
193 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
194                                    const TCGLdstHelperParam *p)
195     __attribute__((unused));
196 
197 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
198     [MO_UB] = helper_ldub_mmu,
199     [MO_SB] = helper_ldsb_mmu,
200     [MO_UW] = helper_lduw_mmu,
201     [MO_SW] = helper_ldsw_mmu,
202     [MO_UL] = helper_ldul_mmu,
203     [MO_UQ] = helper_ldq_mmu,
204 #if TCG_TARGET_REG_BITS == 64
205     [MO_SL] = helper_ldsl_mmu,
206     [MO_128] = helper_ld16_mmu,
207 #endif
208 };
209 
210 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
211     [MO_8]  = helper_stb_mmu,
212     [MO_16] = helper_stw_mmu,
213     [MO_32] = helper_stl_mmu,
214     [MO_64] = helper_stq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_128] = helper_st16_mmu,
217 #endif
218 };
219 
220 typedef struct {
221     MemOp atom;   /* lg2 bits of atomicity required */
222     MemOp align;  /* lg2 bits of alignment to use */
223 } TCGAtomAlign;
224 
225 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
226                                            MemOp host_atom, bool allow_two_ops)
227     __attribute__((unused));
228 
229 TCGContext tcg_init_ctx;
230 __thread TCGContext *tcg_ctx;
231 
232 TCGContext **tcg_ctxs;
233 unsigned int tcg_cur_ctxs;
234 unsigned int tcg_max_ctxs;
235 TCGv_env tcg_env;
236 const void *tcg_code_gen_epilogue;
237 uintptr_t tcg_splitwx_diff;
238 
239 #ifndef CONFIG_TCG_INTERPRETER
240 tcg_prologue_fn *tcg_qemu_tb_exec;
241 #endif
242 
243 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
244 static TCGRegSet tcg_target_call_clobber_regs;
245 
246 #if TCG_TARGET_INSN_UNIT_SIZE == 1
247 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
248 {
249     *s->code_ptr++ = v;
250 }
251 
252 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
253                                                       uint8_t v)
254 {
255     *p = v;
256 }
257 #endif
258 
259 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
260 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
261 {
262     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
263         *s->code_ptr++ = v;
264     } else {
265         tcg_insn_unit *p = s->code_ptr;
266         memcpy(p, &v, sizeof(v));
267         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
268     }
269 }
270 
271 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
272                                                        uint16_t v)
273 {
274     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
275         *p = v;
276     } else {
277         memcpy(p, &v, sizeof(v));
278     }
279 }
280 #endif
281 
282 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
283 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
284 {
285     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
286         *s->code_ptr++ = v;
287     } else {
288         tcg_insn_unit *p = s->code_ptr;
289         memcpy(p, &v, sizeof(v));
290         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
291     }
292 }
293 
294 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
295                                                        uint32_t v)
296 {
297     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
298         *p = v;
299     } else {
300         memcpy(p, &v, sizeof(v));
301     }
302 }
303 #endif
304 
305 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
306 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
307 {
308     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
309         *s->code_ptr++ = v;
310     } else {
311         tcg_insn_unit *p = s->code_ptr;
312         memcpy(p, &v, sizeof(v));
313         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
314     }
315 }
316 
317 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
318                                                        uint64_t v)
319 {
320     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
321         *p = v;
322     } else {
323         memcpy(p, &v, sizeof(v));
324     }
325 }
326 #endif
327 
328 /* label relocation processing */
329 
330 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
331                           TCGLabel *l, intptr_t addend)
332 {
333     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
334 
335     r->type = type;
336     r->ptr = code_ptr;
337     r->addend = addend;
338     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
339 }
340 
341 static void tcg_out_label(TCGContext *s, TCGLabel *l)
342 {
343     tcg_debug_assert(!l->has_value);
344     l->has_value = 1;
345     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
346 }
347 
348 TCGLabel *gen_new_label(void)
349 {
350     TCGContext *s = tcg_ctx;
351     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
352 
353     memset(l, 0, sizeof(TCGLabel));
354     l->id = s->nb_labels++;
355     QSIMPLEQ_INIT(&l->branches);
356     QSIMPLEQ_INIT(&l->relocs);
357 
358     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
359 
360     return l;
361 }
362 
363 static bool tcg_resolve_relocs(TCGContext *s)
364 {
365     TCGLabel *l;
366 
367     QSIMPLEQ_FOREACH(l, &s->labels, next) {
368         TCGRelocation *r;
369         uintptr_t value = l->u.value;
370 
371         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
372             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
373                 return false;
374             }
375         }
376     }
377     return true;
378 }
379 
380 static void set_jmp_reset_offset(TCGContext *s, int which)
381 {
382     /*
383      * We will check for overflow at the end of the opcode loop in
384      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
385      */
386     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
387 }
388 
389 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
390 {
391     /*
392      * We will check for overflow at the end of the opcode loop in
393      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
394      */
395     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
396 }
397 
398 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
399 {
400     /*
401      * Return the read-execute version of the pointer, for the benefit
402      * of any pc-relative addressing mode.
403      */
404     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
405 }
406 
407 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
408 static int tlb_mask_table_ofs(TCGContext *s, int which)
409 {
410     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
411             sizeof(CPUNegativeOffsetState));
412 }
413 #endif
414 
415 /* Signal overflow, starting over with fewer guest insns. */
416 static G_NORETURN
417 void tcg_raise_tb_overflow(TCGContext *s)
418 {
419     siglongjmp(s->jmp_trans, -2);
420 }
421 
422 /*
423  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
424  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
425  *
426  * However, tcg_out_helper_load_slots reuses this field to hold an
427  * argument slot number (which may designate a argument register or an
428  * argument stack slot), converting to TCGReg once all arguments that
429  * are destined for the stack are processed.
430  */
431 typedef struct TCGMovExtend {
432     unsigned dst;
433     TCGReg src;
434     TCGType dst_type;
435     TCGType src_type;
436     MemOp src_ext;
437 } TCGMovExtend;
438 
439 /**
440  * tcg_out_movext -- move and extend
441  * @s: tcg context
442  * @dst_type: integral type for destination
443  * @dst: destination register
444  * @src_type: integral type for source
445  * @src_ext: extension to apply to source
446  * @src: source register
447  *
448  * Move or extend @src into @dst, depending on @src_ext and the types.
449  */
450 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
451                            TCGType src_type, MemOp src_ext, TCGReg src)
452 {
453     switch (src_ext) {
454     case MO_UB:
455         tcg_out_ext8u(s, dst, src);
456         break;
457     case MO_SB:
458         tcg_out_ext8s(s, dst_type, dst, src);
459         break;
460     case MO_UW:
461         tcg_out_ext16u(s, dst, src);
462         break;
463     case MO_SW:
464         tcg_out_ext16s(s, dst_type, dst, src);
465         break;
466     case MO_UL:
467     case MO_SL:
468         if (dst_type == TCG_TYPE_I32) {
469             if (src_type == TCG_TYPE_I32) {
470                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
471             } else {
472                 tcg_out_extrl_i64_i32(s, dst, src);
473             }
474         } else if (src_type == TCG_TYPE_I32) {
475             if (src_ext & MO_SIGN) {
476                 tcg_out_exts_i32_i64(s, dst, src);
477             } else {
478                 tcg_out_extu_i32_i64(s, dst, src);
479             }
480         } else {
481             if (src_ext & MO_SIGN) {
482                 tcg_out_ext32s(s, dst, src);
483             } else {
484                 tcg_out_ext32u(s, dst, src);
485             }
486         }
487         break;
488     case MO_UQ:
489         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
490         if (dst_type == TCG_TYPE_I32) {
491             tcg_out_extrl_i64_i32(s, dst, src);
492         } else {
493             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
494         }
495         break;
496     default:
497         g_assert_not_reached();
498     }
499 }
500 
501 /* Minor variations on a theme, using a structure. */
502 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
503                                     TCGReg src)
504 {
505     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
506 }
507 
508 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
509 {
510     tcg_out_movext1_new_src(s, i, i->src);
511 }
512 
513 /**
514  * tcg_out_movext2 -- move and extend two pair
515  * @s: tcg context
516  * @i1: first move description
517  * @i2: second move description
518  * @scratch: temporary register, or -1 for none
519  *
520  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
521  * between the sources and destinations.
522  */
523 
524 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
525                             const TCGMovExtend *i2, int scratch)
526 {
527     TCGReg src1 = i1->src;
528     TCGReg src2 = i2->src;
529 
530     if (i1->dst != src2) {
531         tcg_out_movext1(s, i1);
532         tcg_out_movext1(s, i2);
533         return;
534     }
535     if (i2->dst == src1) {
536         TCGType src1_type = i1->src_type;
537         TCGType src2_type = i2->src_type;
538 
539         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
540             /* The data is now in the correct registers, now extend. */
541             src1 = i2->src;
542             src2 = i1->src;
543         } else {
544             tcg_debug_assert(scratch >= 0);
545             tcg_out_mov(s, src1_type, scratch, src1);
546             src1 = scratch;
547         }
548     }
549     tcg_out_movext1_new_src(s, i2, src2);
550     tcg_out_movext1_new_src(s, i1, src1);
551 }
552 
553 /**
554  * tcg_out_movext3 -- move and extend three pair
555  * @s: tcg context
556  * @i1: first move description
557  * @i2: second move description
558  * @i3: third move description
559  * @scratch: temporary register, or -1 for none
560  *
561  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
562  * between the sources and destinations.
563  */
564 
565 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
566                             const TCGMovExtend *i2, const TCGMovExtend *i3,
567                             int scratch)
568 {
569     TCGReg src1 = i1->src;
570     TCGReg src2 = i2->src;
571     TCGReg src3 = i3->src;
572 
573     if (i1->dst != src2 && i1->dst != src3) {
574         tcg_out_movext1(s, i1);
575         tcg_out_movext2(s, i2, i3, scratch);
576         return;
577     }
578     if (i2->dst != src1 && i2->dst != src3) {
579         tcg_out_movext1(s, i2);
580         tcg_out_movext2(s, i1, i3, scratch);
581         return;
582     }
583     if (i3->dst != src1 && i3->dst != src2) {
584         tcg_out_movext1(s, i3);
585         tcg_out_movext2(s, i1, i2, scratch);
586         return;
587     }
588 
589     /*
590      * There is a cycle.  Since there are only 3 nodes, the cycle is
591      * either "clockwise" or "anti-clockwise", and can be solved with
592      * a single scratch or two xchg.
593      */
594     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
595         /* "Clockwise" */
596         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
597             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
598             /* The data is now in the correct registers, now extend. */
599             tcg_out_movext1_new_src(s, i1, i1->dst);
600             tcg_out_movext1_new_src(s, i2, i2->dst);
601             tcg_out_movext1_new_src(s, i3, i3->dst);
602         } else {
603             tcg_debug_assert(scratch >= 0);
604             tcg_out_mov(s, i1->src_type, scratch, src1);
605             tcg_out_movext1(s, i3);
606             tcg_out_movext1(s, i2);
607             tcg_out_movext1_new_src(s, i1, scratch);
608         }
609     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
610         /* "Anti-clockwise" */
611         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
612             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
613             /* The data is now in the correct registers, now extend. */
614             tcg_out_movext1_new_src(s, i1, i1->dst);
615             tcg_out_movext1_new_src(s, i2, i2->dst);
616             tcg_out_movext1_new_src(s, i3, i3->dst);
617         } else {
618             tcg_debug_assert(scratch >= 0);
619             tcg_out_mov(s, i1->src_type, scratch, src1);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1(s, i3);
622             tcg_out_movext1_new_src(s, i1, scratch);
623         }
624     } else {
625         g_assert_not_reached();
626     }
627 }
628 
629 #define C_PFX1(P, A)                    P##A
630 #define C_PFX2(P, A, B)                 P##A##_##B
631 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
632 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
633 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
634 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
635 
636 /* Define an enumeration for the various combinations. */
637 
638 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
639 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
640 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
641 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
642 
643 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
644 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
645 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
646 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
647 
648 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
649 
650 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
651 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
652 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
653 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
654 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
655 
656 typedef enum {
657 #include "tcg-target-con-set.h"
658 } TCGConstraintSetIndex;
659 
660 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
661 
662 #undef C_O0_I1
663 #undef C_O0_I2
664 #undef C_O0_I3
665 #undef C_O0_I4
666 #undef C_O1_I1
667 #undef C_O1_I2
668 #undef C_O1_I3
669 #undef C_O1_I4
670 #undef C_N1_I2
671 #undef C_O2_I1
672 #undef C_O2_I2
673 #undef C_O2_I3
674 #undef C_O2_I4
675 #undef C_N1_O1_I4
676 
677 /* Put all of the constraint sets into an array, indexed by the enum. */
678 
679 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
680 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
681 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
682 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
683 
684 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
685 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
686 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
687 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
688 
689 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
690 
691 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
692 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
693 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
694 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
695 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
696 
697 static const TCGTargetOpDef constraint_sets[] = {
698 #include "tcg-target-con-set.h"
699 };
700 
701 
702 #undef C_O0_I1
703 #undef C_O0_I2
704 #undef C_O0_I3
705 #undef C_O0_I4
706 #undef C_O1_I1
707 #undef C_O1_I2
708 #undef C_O1_I3
709 #undef C_O1_I4
710 #undef C_N1_I2
711 #undef C_O2_I1
712 #undef C_O2_I2
713 #undef C_O2_I3
714 #undef C_O2_I4
715 #undef C_N1_O1_I4
716 
717 /* Expand the enumerator to be returned from tcg_target_op_def(). */
718 
719 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
720 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
721 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
722 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
723 
724 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
725 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
726 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
727 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
728 
729 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
730 
731 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
732 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
733 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
734 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
735 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
736 
737 #include "tcg-target.c.inc"
738 
739 #ifndef CONFIG_TCG_INTERPRETER
740 /* Validate CPUTLBDescFast placement. */
741 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
742                         sizeof(CPUNegativeOffsetState))
743                   < MIN_TLB_MASK_TABLE_OFS);
744 #endif
745 
746 static void alloc_tcg_plugin_context(TCGContext *s)
747 {
748 #ifdef CONFIG_PLUGIN
749     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
750     s->plugin_tb->insns =
751         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
752 #endif
753 }
754 
755 /*
756  * All TCG threads except the parent (i.e. the one that called tcg_context_init
757  * and registered the target's TCG globals) must register with this function
758  * before initiating translation.
759  *
760  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
761  * of tcg_region_init() for the reasoning behind this.
762  *
763  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
764  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
765  * is not used anymore for translation once this function is called.
766  *
767  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
768  * iterates over the array (e.g. tcg_code_size() the same for both system/user
769  * modes.
770  */
771 #ifdef CONFIG_USER_ONLY
772 void tcg_register_thread(void)
773 {
774     tcg_ctx = &tcg_init_ctx;
775 }
776 #else
777 void tcg_register_thread(void)
778 {
779     TCGContext *s = g_malloc(sizeof(*s));
780     unsigned int i, n;
781 
782     *s = tcg_init_ctx;
783 
784     /* Relink mem_base.  */
785     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
786         if (tcg_init_ctx.temps[i].mem_base) {
787             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
788             tcg_debug_assert(b >= 0 && b < n);
789             s->temps[i].mem_base = &s->temps[b];
790         }
791     }
792 
793     /* Claim an entry in tcg_ctxs */
794     n = qatomic_fetch_inc(&tcg_cur_ctxs);
795     g_assert(n < tcg_max_ctxs);
796     qatomic_set(&tcg_ctxs[n], s);
797 
798     if (n > 0) {
799         alloc_tcg_plugin_context(s);
800         tcg_region_initial_alloc(s);
801     }
802 
803     tcg_ctx = s;
804 }
805 #endif /* !CONFIG_USER_ONLY */
806 
807 /* pool based memory allocation */
808 void *tcg_malloc_internal(TCGContext *s, int size)
809 {
810     TCGPool *p;
811     int pool_size;
812 
813     if (size > TCG_POOL_CHUNK_SIZE) {
814         /* big malloc: insert a new pool (XXX: could optimize) */
815         p = g_malloc(sizeof(TCGPool) + size);
816         p->size = size;
817         p->next = s->pool_first_large;
818         s->pool_first_large = p;
819         return p->data;
820     } else {
821         p = s->pool_current;
822         if (!p) {
823             p = s->pool_first;
824             if (!p)
825                 goto new_pool;
826         } else {
827             if (!p->next) {
828             new_pool:
829                 pool_size = TCG_POOL_CHUNK_SIZE;
830                 p = g_malloc(sizeof(TCGPool) + pool_size);
831                 p->size = pool_size;
832                 p->next = NULL;
833                 if (s->pool_current) {
834                     s->pool_current->next = p;
835                 } else {
836                     s->pool_first = p;
837                 }
838             } else {
839                 p = p->next;
840             }
841         }
842     }
843     s->pool_current = p;
844     s->pool_cur = p->data + size;
845     s->pool_end = p->data + p->size;
846     return p->data;
847 }
848 
849 void tcg_pool_reset(TCGContext *s)
850 {
851     TCGPool *p, *t;
852     for (p = s->pool_first_large; p; p = t) {
853         t = p->next;
854         g_free(p);
855     }
856     s->pool_first_large = NULL;
857     s->pool_cur = s->pool_end = NULL;
858     s->pool_current = NULL;
859 }
860 
861 /*
862  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
863  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
864  * We only use these for layout in tcg_out_ld_helper_ret and
865  * tcg_out_st_helper_args, and share them between several of
866  * the helpers, with the end result that it's easier to build manually.
867  */
868 
869 #if TCG_TARGET_REG_BITS == 32
870 # define dh_typecode_ttl  dh_typecode_i32
871 #else
872 # define dh_typecode_ttl  dh_typecode_i64
873 #endif
874 
875 static TCGHelperInfo info_helper_ld32_mmu = {
876     .flags = TCG_CALL_NO_WG,
877     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
878               | dh_typemask(env, 1)
879               | dh_typemask(i64, 2)  /* uint64_t addr */
880               | dh_typemask(i32, 3)  /* unsigned oi */
881               | dh_typemask(ptr, 4)  /* uintptr_t ra */
882 };
883 
884 static TCGHelperInfo info_helper_ld64_mmu = {
885     .flags = TCG_CALL_NO_WG,
886     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
887               | dh_typemask(env, 1)
888               | dh_typemask(i64, 2)  /* uint64_t addr */
889               | dh_typemask(i32, 3)  /* unsigned oi */
890               | dh_typemask(ptr, 4)  /* uintptr_t ra */
891 };
892 
893 static TCGHelperInfo info_helper_ld128_mmu = {
894     .flags = TCG_CALL_NO_WG,
895     .typemask = dh_typemask(i128, 0) /* return Int128 */
896               | dh_typemask(env, 1)
897               | dh_typemask(i64, 2)  /* uint64_t addr */
898               | dh_typemask(i32, 3)  /* unsigned oi */
899               | dh_typemask(ptr, 4)  /* uintptr_t ra */
900 };
901 
902 static TCGHelperInfo info_helper_st32_mmu = {
903     .flags = TCG_CALL_NO_WG,
904     .typemask = dh_typemask(void, 0)
905               | dh_typemask(env, 1)
906               | dh_typemask(i64, 2)  /* uint64_t addr */
907               | dh_typemask(i32, 3)  /* uint32_t data */
908               | dh_typemask(i32, 4)  /* unsigned oi */
909               | dh_typemask(ptr, 5)  /* uintptr_t ra */
910 };
911 
912 static TCGHelperInfo info_helper_st64_mmu = {
913     .flags = TCG_CALL_NO_WG,
914     .typemask = dh_typemask(void, 0)
915               | dh_typemask(env, 1)
916               | dh_typemask(i64, 2)  /* uint64_t addr */
917               | dh_typemask(i64, 3)  /* uint64_t data */
918               | dh_typemask(i32, 4)  /* unsigned oi */
919               | dh_typemask(ptr, 5)  /* uintptr_t ra */
920 };
921 
922 static TCGHelperInfo info_helper_st128_mmu = {
923     .flags = TCG_CALL_NO_WG,
924     .typemask = dh_typemask(void, 0)
925               | dh_typemask(env, 1)
926               | dh_typemask(i64, 2)  /* uint64_t addr */
927               | dh_typemask(i128, 3) /* Int128 data */
928               | dh_typemask(i32, 4)  /* unsigned oi */
929               | dh_typemask(ptr, 5)  /* uintptr_t ra */
930 };
931 
932 #ifdef CONFIG_TCG_INTERPRETER
933 static ffi_type *typecode_to_ffi(int argmask)
934 {
935     /*
936      * libffi does not support __int128_t, so we have forced Int128
937      * to use the structure definition instead of the builtin type.
938      */
939     static ffi_type *ffi_type_i128_elements[3] = {
940         &ffi_type_uint64,
941         &ffi_type_uint64,
942         NULL
943     };
944     static ffi_type ffi_type_i128 = {
945         .size = 16,
946         .alignment = __alignof__(Int128),
947         .type = FFI_TYPE_STRUCT,
948         .elements = ffi_type_i128_elements,
949     };
950 
951     switch (argmask) {
952     case dh_typecode_void:
953         return &ffi_type_void;
954     case dh_typecode_i32:
955         return &ffi_type_uint32;
956     case dh_typecode_s32:
957         return &ffi_type_sint32;
958     case dh_typecode_i64:
959         return &ffi_type_uint64;
960     case dh_typecode_s64:
961         return &ffi_type_sint64;
962     case dh_typecode_ptr:
963         return &ffi_type_pointer;
964     case dh_typecode_i128:
965         return &ffi_type_i128;
966     }
967     g_assert_not_reached();
968 }
969 
970 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
971 {
972     unsigned typemask = info->typemask;
973     struct {
974         ffi_cif cif;
975         ffi_type *args[];
976     } *ca;
977     ffi_status status;
978     int nargs;
979 
980     /* Ignoring the return type, find the last non-zero field. */
981     nargs = 32 - clz32(typemask >> 3);
982     nargs = DIV_ROUND_UP(nargs, 3);
983     assert(nargs <= MAX_CALL_IARGS);
984 
985     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
986     ca->cif.rtype = typecode_to_ffi(typemask & 7);
987     ca->cif.nargs = nargs;
988 
989     if (nargs != 0) {
990         ca->cif.arg_types = ca->args;
991         for (int j = 0; j < nargs; ++j) {
992             int typecode = extract32(typemask, (j + 1) * 3, 3);
993             ca->args[j] = typecode_to_ffi(typecode);
994         }
995     }
996 
997     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
998                           ca->cif.rtype, ca->cif.arg_types);
999     assert(status == FFI_OK);
1000 
1001     return &ca->cif;
1002 }
1003 
1004 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1005 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1006 #else
1007 #define HELPER_INFO_INIT(I)      (&(I)->init)
1008 #define HELPER_INFO_INIT_VAL(I)  1
1009 #endif /* CONFIG_TCG_INTERPRETER */
1010 
1011 static inline bool arg_slot_reg_p(unsigned arg_slot)
1012 {
1013     /*
1014      * Split the sizeof away from the comparison to avoid Werror from
1015      * "unsigned < 0 is always false", when iarg_regs is empty.
1016      */
1017     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1018     return arg_slot < nreg;
1019 }
1020 
1021 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1022 {
1023     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1024     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1025 
1026     tcg_debug_assert(stk_slot < max);
1027     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1028 }
1029 
1030 typedef struct TCGCumulativeArgs {
1031     int arg_idx;                /* tcg_gen_callN args[] */
1032     int info_in_idx;            /* TCGHelperInfo in[] */
1033     int arg_slot;               /* regs+stack slot */
1034     int ref_slot;               /* stack slots for references */
1035 } TCGCumulativeArgs;
1036 
1037 static void layout_arg_even(TCGCumulativeArgs *cum)
1038 {
1039     cum->arg_slot += cum->arg_slot & 1;
1040 }
1041 
1042 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1043                          TCGCallArgumentKind kind)
1044 {
1045     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1046 
1047     *loc = (TCGCallArgumentLoc){
1048         .kind = kind,
1049         .arg_idx = cum->arg_idx,
1050         .arg_slot = cum->arg_slot,
1051     };
1052     cum->info_in_idx++;
1053     cum->arg_slot++;
1054 }
1055 
1056 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1057                                 TCGHelperInfo *info, int n)
1058 {
1059     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1060 
1061     for (int i = 0; i < n; ++i) {
1062         /* Layout all using the same arg_idx, adjusting the subindex. */
1063         loc[i] = (TCGCallArgumentLoc){
1064             .kind = TCG_CALL_ARG_NORMAL,
1065             .arg_idx = cum->arg_idx,
1066             .tmp_subindex = i,
1067             .arg_slot = cum->arg_slot + i,
1068         };
1069     }
1070     cum->info_in_idx += n;
1071     cum->arg_slot += n;
1072 }
1073 
1074 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1075 {
1076     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1077     int n = 128 / TCG_TARGET_REG_BITS;
1078 
1079     /* The first subindex carries the pointer. */
1080     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1081 
1082     /*
1083      * The callee is allowed to clobber memory associated with
1084      * structure pass by-reference.  Therefore we must make copies.
1085      * Allocate space from "ref_slot", which will be adjusted to
1086      * follow the parameters on the stack.
1087      */
1088     loc[0].ref_slot = cum->ref_slot;
1089 
1090     /*
1091      * Subsequent words also go into the reference slot, but
1092      * do not accumulate into the regular arguments.
1093      */
1094     for (int i = 1; i < n; ++i) {
1095         loc[i] = (TCGCallArgumentLoc){
1096             .kind = TCG_CALL_ARG_BY_REF_N,
1097             .arg_idx = cum->arg_idx,
1098             .tmp_subindex = i,
1099             .ref_slot = cum->ref_slot + i,
1100         };
1101     }
1102     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1103     cum->ref_slot += n;
1104 }
1105 
1106 static void init_call_layout(TCGHelperInfo *info)
1107 {
1108     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1109     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1110     unsigned typemask = info->typemask;
1111     unsigned typecode;
1112     TCGCumulativeArgs cum = { };
1113 
1114     /*
1115      * Parse and place any function return value.
1116      */
1117     typecode = typemask & 7;
1118     switch (typecode) {
1119     case dh_typecode_void:
1120         info->nr_out = 0;
1121         break;
1122     case dh_typecode_i32:
1123     case dh_typecode_s32:
1124     case dh_typecode_ptr:
1125         info->nr_out = 1;
1126         info->out_kind = TCG_CALL_RET_NORMAL;
1127         break;
1128     case dh_typecode_i64:
1129     case dh_typecode_s64:
1130         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1131         info->out_kind = TCG_CALL_RET_NORMAL;
1132         /* Query the last register now to trigger any assert early. */
1133         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1134         break;
1135     case dh_typecode_i128:
1136         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1137         info->out_kind = TCG_TARGET_CALL_RET_I128;
1138         switch (TCG_TARGET_CALL_RET_I128) {
1139         case TCG_CALL_RET_NORMAL:
1140             /* Query the last register now to trigger any assert early. */
1141             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1142             break;
1143         case TCG_CALL_RET_BY_VEC:
1144             /* Query the single register now to trigger any assert early. */
1145             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1146             break;
1147         case TCG_CALL_RET_BY_REF:
1148             /*
1149              * Allocate the first argument to the output.
1150              * We don't need to store this anywhere, just make it
1151              * unavailable for use in the input loop below.
1152              */
1153             cum.arg_slot = 1;
1154             break;
1155         default:
1156             qemu_build_not_reached();
1157         }
1158         break;
1159     default:
1160         g_assert_not_reached();
1161     }
1162 
1163     /*
1164      * Parse and place function arguments.
1165      */
1166     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1167         TCGCallArgumentKind kind;
1168         TCGType type;
1169 
1170         typecode = typemask & 7;
1171         switch (typecode) {
1172         case dh_typecode_i32:
1173         case dh_typecode_s32:
1174             type = TCG_TYPE_I32;
1175             break;
1176         case dh_typecode_i64:
1177         case dh_typecode_s64:
1178             type = TCG_TYPE_I64;
1179             break;
1180         case dh_typecode_ptr:
1181             type = TCG_TYPE_PTR;
1182             break;
1183         case dh_typecode_i128:
1184             type = TCG_TYPE_I128;
1185             break;
1186         default:
1187             g_assert_not_reached();
1188         }
1189 
1190         switch (type) {
1191         case TCG_TYPE_I32:
1192             switch (TCG_TARGET_CALL_ARG_I32) {
1193             case TCG_CALL_ARG_EVEN:
1194                 layout_arg_even(&cum);
1195                 /* fall through */
1196             case TCG_CALL_ARG_NORMAL:
1197                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1198                 break;
1199             case TCG_CALL_ARG_EXTEND:
1200                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1201                 layout_arg_1(&cum, info, kind);
1202                 break;
1203             default:
1204                 qemu_build_not_reached();
1205             }
1206             break;
1207 
1208         case TCG_TYPE_I64:
1209             switch (TCG_TARGET_CALL_ARG_I64) {
1210             case TCG_CALL_ARG_EVEN:
1211                 layout_arg_even(&cum);
1212                 /* fall through */
1213             case TCG_CALL_ARG_NORMAL:
1214                 if (TCG_TARGET_REG_BITS == 32) {
1215                     layout_arg_normal_n(&cum, info, 2);
1216                 } else {
1217                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1218                 }
1219                 break;
1220             default:
1221                 qemu_build_not_reached();
1222             }
1223             break;
1224 
1225         case TCG_TYPE_I128:
1226             switch (TCG_TARGET_CALL_ARG_I128) {
1227             case TCG_CALL_ARG_EVEN:
1228                 layout_arg_even(&cum);
1229                 /* fall through */
1230             case TCG_CALL_ARG_NORMAL:
1231                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1232                 break;
1233             case TCG_CALL_ARG_BY_REF:
1234                 layout_arg_by_ref(&cum, info);
1235                 break;
1236             default:
1237                 qemu_build_not_reached();
1238             }
1239             break;
1240 
1241         default:
1242             g_assert_not_reached();
1243         }
1244     }
1245     info->nr_in = cum.info_in_idx;
1246 
1247     /* Validate that we didn't overrun the input array. */
1248     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1249     /* Validate the backend has enough argument space. */
1250     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1251 
1252     /*
1253      * Relocate the "ref_slot" area to the end of the parameters.
1254      * Minimizing this stack offset helps code size for x86,
1255      * which has a signed 8-bit offset encoding.
1256      */
1257     if (cum.ref_slot != 0) {
1258         int ref_base = 0;
1259 
1260         if (cum.arg_slot > max_reg_slots) {
1261             int align = __alignof(Int128) / sizeof(tcg_target_long);
1262 
1263             ref_base = cum.arg_slot - max_reg_slots;
1264             if (align > 1) {
1265                 ref_base = ROUND_UP(ref_base, align);
1266             }
1267         }
1268         assert(ref_base + cum.ref_slot <= max_stk_slots);
1269         ref_base += max_reg_slots;
1270 
1271         if (ref_base != 0) {
1272             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1273                 TCGCallArgumentLoc *loc = &info->in[i];
1274                 switch (loc->kind) {
1275                 case TCG_CALL_ARG_BY_REF:
1276                 case TCG_CALL_ARG_BY_REF_N:
1277                     loc->ref_slot += ref_base;
1278                     break;
1279                 default:
1280                     break;
1281                 }
1282             }
1283         }
1284     }
1285 }
1286 
1287 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1288 static void process_op_defs(TCGContext *s);
1289 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1290                                             TCGReg reg, const char *name);
1291 
1292 static void tcg_context_init(unsigned max_cpus)
1293 {
1294     TCGContext *s = &tcg_init_ctx;
1295     int op, total_args, n, i;
1296     TCGOpDef *def;
1297     TCGArgConstraint *args_ct;
1298     TCGTemp *ts;
1299 
1300     memset(s, 0, sizeof(*s));
1301     s->nb_globals = 0;
1302 
1303     /* Count total number of arguments and allocate the corresponding
1304        space */
1305     total_args = 0;
1306     for(op = 0; op < NB_OPS; op++) {
1307         def = &tcg_op_defs[op];
1308         n = def->nb_iargs + def->nb_oargs;
1309         total_args += n;
1310     }
1311 
1312     args_ct = g_new0(TCGArgConstraint, total_args);
1313 
1314     for(op = 0; op < NB_OPS; op++) {
1315         def = &tcg_op_defs[op];
1316         def->args_ct = args_ct;
1317         n = def->nb_iargs + def->nb_oargs;
1318         args_ct += n;
1319     }
1320 
1321     init_call_layout(&info_helper_ld32_mmu);
1322     init_call_layout(&info_helper_ld64_mmu);
1323     init_call_layout(&info_helper_ld128_mmu);
1324     init_call_layout(&info_helper_st32_mmu);
1325     init_call_layout(&info_helper_st64_mmu);
1326     init_call_layout(&info_helper_st128_mmu);
1327 
1328     tcg_target_init(s);
1329     process_op_defs(s);
1330 
1331     /* Reverse the order of the saved registers, assuming they're all at
1332        the start of tcg_target_reg_alloc_order.  */
1333     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1334         int r = tcg_target_reg_alloc_order[n];
1335         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1336             break;
1337         }
1338     }
1339     for (i = 0; i < n; ++i) {
1340         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1341     }
1342     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1343         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1344     }
1345 
1346     alloc_tcg_plugin_context(s);
1347 
1348     tcg_ctx = s;
1349     /*
1350      * In user-mode we simply share the init context among threads, since we
1351      * use a single region. See the documentation tcg_region_init() for the
1352      * reasoning behind this.
1353      * In system-mode we will have at most max_cpus TCG threads.
1354      */
1355 #ifdef CONFIG_USER_ONLY
1356     tcg_ctxs = &tcg_ctx;
1357     tcg_cur_ctxs = 1;
1358     tcg_max_ctxs = 1;
1359 #else
1360     tcg_max_ctxs = max_cpus;
1361     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1362 #endif
1363 
1364     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1365     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1366     tcg_env = temp_tcgv_ptr(ts);
1367 }
1368 
1369 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1370 {
1371     tcg_context_init(max_cpus);
1372     tcg_region_init(tb_size, splitwx, max_cpus);
1373 }
1374 
1375 /*
1376  * Allocate TBs right before their corresponding translated code, making
1377  * sure that TBs and code are on different cache lines.
1378  */
1379 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1380 {
1381     uintptr_t align = qemu_icache_linesize;
1382     TranslationBlock *tb;
1383     void *next;
1384 
1385  retry:
1386     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1387     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1388 
1389     if (unlikely(next > s->code_gen_highwater)) {
1390         if (tcg_region_alloc(s)) {
1391             return NULL;
1392         }
1393         goto retry;
1394     }
1395     qatomic_set(&s->code_gen_ptr, next);
1396     s->data_gen_ptr = NULL;
1397     return tb;
1398 }
1399 
1400 void tcg_prologue_init(void)
1401 {
1402     TCGContext *s = tcg_ctx;
1403     size_t prologue_size;
1404 
1405     s->code_ptr = s->code_gen_ptr;
1406     s->code_buf = s->code_gen_ptr;
1407     s->data_gen_ptr = NULL;
1408 
1409 #ifndef CONFIG_TCG_INTERPRETER
1410     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1411 #endif
1412 
1413 #ifdef TCG_TARGET_NEED_POOL_LABELS
1414     s->pool_labels = NULL;
1415 #endif
1416 
1417     qemu_thread_jit_write();
1418     /* Generate the prologue.  */
1419     tcg_target_qemu_prologue(s);
1420 
1421 #ifdef TCG_TARGET_NEED_POOL_LABELS
1422     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1423     {
1424         int result = tcg_out_pool_finalize(s);
1425         tcg_debug_assert(result == 0);
1426     }
1427 #endif
1428 
1429     prologue_size = tcg_current_code_size(s);
1430     perf_report_prologue(s->code_gen_ptr, prologue_size);
1431 
1432 #ifndef CONFIG_TCG_INTERPRETER
1433     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1434                         (uintptr_t)s->code_buf, prologue_size);
1435 #endif
1436 
1437     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1438         FILE *logfile = qemu_log_trylock();
1439         if (logfile) {
1440             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1441             if (s->data_gen_ptr) {
1442                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1443                 size_t data_size = prologue_size - code_size;
1444                 size_t i;
1445 
1446                 disas(logfile, s->code_gen_ptr, code_size);
1447 
1448                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1449                     if (sizeof(tcg_target_ulong) == 8) {
1450                         fprintf(logfile,
1451                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1452                                 (uintptr_t)s->data_gen_ptr + i,
1453                                 *(uint64_t *)(s->data_gen_ptr + i));
1454                     } else {
1455                         fprintf(logfile,
1456                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1457                                 (uintptr_t)s->data_gen_ptr + i,
1458                                 *(uint32_t *)(s->data_gen_ptr + i));
1459                     }
1460                 }
1461             } else {
1462                 disas(logfile, s->code_gen_ptr, prologue_size);
1463             }
1464             fprintf(logfile, "\n");
1465             qemu_log_unlock(logfile);
1466         }
1467     }
1468 
1469 #ifndef CONFIG_TCG_INTERPRETER
1470     /*
1471      * Assert that goto_ptr is implemented completely, setting an epilogue.
1472      * For tci, we use NULL as the signal to return from the interpreter,
1473      * so skip this check.
1474      */
1475     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1476 #endif
1477 
1478     tcg_region_prologue_set(s);
1479 }
1480 
1481 void tcg_func_start(TCGContext *s)
1482 {
1483     tcg_pool_reset(s);
1484     s->nb_temps = s->nb_globals;
1485 
1486     /* No temps have been previously allocated for size or locality.  */
1487     memset(s->free_temps, 0, sizeof(s->free_temps));
1488 
1489     /* No constant temps have been previously allocated. */
1490     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1491         if (s->const_table[i]) {
1492             g_hash_table_remove_all(s->const_table[i]);
1493         }
1494     }
1495 
1496     s->nb_ops = 0;
1497     s->nb_labels = 0;
1498     s->current_frame_offset = s->frame_start;
1499 
1500 #ifdef CONFIG_DEBUG_TCG
1501     s->goto_tb_issue_mask = 0;
1502 #endif
1503 
1504     QTAILQ_INIT(&s->ops);
1505     QTAILQ_INIT(&s->free_ops);
1506     QSIMPLEQ_INIT(&s->labels);
1507 
1508     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1509                      s->addr_type == TCG_TYPE_I64);
1510 
1511     tcg_debug_assert(s->insn_start_words > 0);
1512 }
1513 
1514 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1515 {
1516     int n = s->nb_temps++;
1517 
1518     if (n >= TCG_MAX_TEMPS) {
1519         tcg_raise_tb_overflow(s);
1520     }
1521     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1522 }
1523 
1524 static TCGTemp *tcg_global_alloc(TCGContext *s)
1525 {
1526     TCGTemp *ts;
1527 
1528     tcg_debug_assert(s->nb_globals == s->nb_temps);
1529     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1530     s->nb_globals++;
1531     ts = tcg_temp_alloc(s);
1532     ts->kind = TEMP_GLOBAL;
1533 
1534     return ts;
1535 }
1536 
1537 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1538                                             TCGReg reg, const char *name)
1539 {
1540     TCGTemp *ts;
1541 
1542     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1543 
1544     ts = tcg_global_alloc(s);
1545     ts->base_type = type;
1546     ts->type = type;
1547     ts->kind = TEMP_FIXED;
1548     ts->reg = reg;
1549     ts->name = name;
1550     tcg_regset_set_reg(s->reserved_regs, reg);
1551 
1552     return ts;
1553 }
1554 
1555 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1556 {
1557     s->frame_start = start;
1558     s->frame_end = start + size;
1559     s->frame_temp
1560         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1561 }
1562 
1563 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1564                                      intptr_t offset, const char *name)
1565 {
1566     TCGContext *s = tcg_ctx;
1567     TCGTemp *base_ts = tcgv_ptr_temp(base);
1568     TCGTemp *ts = tcg_global_alloc(s);
1569     int indirect_reg = 0;
1570 
1571     switch (base_ts->kind) {
1572     case TEMP_FIXED:
1573         break;
1574     case TEMP_GLOBAL:
1575         /* We do not support double-indirect registers.  */
1576         tcg_debug_assert(!base_ts->indirect_reg);
1577         base_ts->indirect_base = 1;
1578         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1579                             ? 2 : 1);
1580         indirect_reg = 1;
1581         break;
1582     default:
1583         g_assert_not_reached();
1584     }
1585 
1586     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1587         TCGTemp *ts2 = tcg_global_alloc(s);
1588         char buf[64];
1589 
1590         ts->base_type = TCG_TYPE_I64;
1591         ts->type = TCG_TYPE_I32;
1592         ts->indirect_reg = indirect_reg;
1593         ts->mem_allocated = 1;
1594         ts->mem_base = base_ts;
1595         ts->mem_offset = offset;
1596         pstrcpy(buf, sizeof(buf), name);
1597         pstrcat(buf, sizeof(buf), "_0");
1598         ts->name = strdup(buf);
1599 
1600         tcg_debug_assert(ts2 == ts + 1);
1601         ts2->base_type = TCG_TYPE_I64;
1602         ts2->type = TCG_TYPE_I32;
1603         ts2->indirect_reg = indirect_reg;
1604         ts2->mem_allocated = 1;
1605         ts2->mem_base = base_ts;
1606         ts2->mem_offset = offset + 4;
1607         ts2->temp_subindex = 1;
1608         pstrcpy(buf, sizeof(buf), name);
1609         pstrcat(buf, sizeof(buf), "_1");
1610         ts2->name = strdup(buf);
1611     } else {
1612         ts->base_type = type;
1613         ts->type = type;
1614         ts->indirect_reg = indirect_reg;
1615         ts->mem_allocated = 1;
1616         ts->mem_base = base_ts;
1617         ts->mem_offset = offset;
1618         ts->name = name;
1619     }
1620     return ts;
1621 }
1622 
1623 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1624 {
1625     TCGContext *s = tcg_ctx;
1626     TCGTemp *ts;
1627     int n;
1628 
1629     if (kind == TEMP_EBB) {
1630         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1631 
1632         if (idx < TCG_MAX_TEMPS) {
1633             /* There is already an available temp with the right type.  */
1634             clear_bit(idx, s->free_temps[type].l);
1635 
1636             ts = &s->temps[idx];
1637             ts->temp_allocated = 1;
1638             tcg_debug_assert(ts->base_type == type);
1639             tcg_debug_assert(ts->kind == kind);
1640             return ts;
1641         }
1642     } else {
1643         tcg_debug_assert(kind == TEMP_TB);
1644     }
1645 
1646     switch (type) {
1647     case TCG_TYPE_I32:
1648     case TCG_TYPE_V64:
1649     case TCG_TYPE_V128:
1650     case TCG_TYPE_V256:
1651         n = 1;
1652         break;
1653     case TCG_TYPE_I64:
1654         n = 64 / TCG_TARGET_REG_BITS;
1655         break;
1656     case TCG_TYPE_I128:
1657         n = 128 / TCG_TARGET_REG_BITS;
1658         break;
1659     default:
1660         g_assert_not_reached();
1661     }
1662 
1663     ts = tcg_temp_alloc(s);
1664     ts->base_type = type;
1665     ts->temp_allocated = 1;
1666     ts->kind = kind;
1667 
1668     if (n == 1) {
1669         ts->type = type;
1670     } else {
1671         ts->type = TCG_TYPE_REG;
1672 
1673         for (int i = 1; i < n; ++i) {
1674             TCGTemp *ts2 = tcg_temp_alloc(s);
1675 
1676             tcg_debug_assert(ts2 == ts + i);
1677             ts2->base_type = type;
1678             ts2->type = TCG_TYPE_REG;
1679             ts2->temp_allocated = 1;
1680             ts2->temp_subindex = i;
1681             ts2->kind = kind;
1682         }
1683     }
1684     return ts;
1685 }
1686 
1687 TCGv_vec tcg_temp_new_vec(TCGType type)
1688 {
1689     TCGTemp *t;
1690 
1691 #ifdef CONFIG_DEBUG_TCG
1692     switch (type) {
1693     case TCG_TYPE_V64:
1694         assert(TCG_TARGET_HAS_v64);
1695         break;
1696     case TCG_TYPE_V128:
1697         assert(TCG_TARGET_HAS_v128);
1698         break;
1699     case TCG_TYPE_V256:
1700         assert(TCG_TARGET_HAS_v256);
1701         break;
1702     default:
1703         g_assert_not_reached();
1704     }
1705 #endif
1706 
1707     t = tcg_temp_new_internal(type, TEMP_EBB);
1708     return temp_tcgv_vec(t);
1709 }
1710 
1711 /* Create a new temp of the same type as an existing temp.  */
1712 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1713 {
1714     TCGTemp *t = tcgv_vec_temp(match);
1715 
1716     tcg_debug_assert(t->temp_allocated != 0);
1717 
1718     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1719     return temp_tcgv_vec(t);
1720 }
1721 
1722 void tcg_temp_free_internal(TCGTemp *ts)
1723 {
1724     TCGContext *s = tcg_ctx;
1725 
1726     switch (ts->kind) {
1727     case TEMP_CONST:
1728     case TEMP_TB:
1729         /* Silently ignore free. */
1730         break;
1731     case TEMP_EBB:
1732         tcg_debug_assert(ts->temp_allocated != 0);
1733         ts->temp_allocated = 0;
1734         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1735         break;
1736     default:
1737         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1738         g_assert_not_reached();
1739     }
1740 }
1741 
1742 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1743 {
1744     TCGContext *s = tcg_ctx;
1745     GHashTable *h = s->const_table[type];
1746     TCGTemp *ts;
1747 
1748     if (h == NULL) {
1749         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1750         s->const_table[type] = h;
1751     }
1752 
1753     ts = g_hash_table_lookup(h, &val);
1754     if (ts == NULL) {
1755         int64_t *val_ptr;
1756 
1757         ts = tcg_temp_alloc(s);
1758 
1759         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1760             TCGTemp *ts2 = tcg_temp_alloc(s);
1761 
1762             tcg_debug_assert(ts2 == ts + 1);
1763 
1764             ts->base_type = TCG_TYPE_I64;
1765             ts->type = TCG_TYPE_I32;
1766             ts->kind = TEMP_CONST;
1767             ts->temp_allocated = 1;
1768 
1769             ts2->base_type = TCG_TYPE_I64;
1770             ts2->type = TCG_TYPE_I32;
1771             ts2->kind = TEMP_CONST;
1772             ts2->temp_allocated = 1;
1773             ts2->temp_subindex = 1;
1774 
1775             /*
1776              * Retain the full value of the 64-bit constant in the low
1777              * part, so that the hash table works.  Actual uses will
1778              * truncate the value to the low part.
1779              */
1780             ts[HOST_BIG_ENDIAN].val = val;
1781             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1782             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1783         } else {
1784             ts->base_type = type;
1785             ts->type = type;
1786             ts->kind = TEMP_CONST;
1787             ts->temp_allocated = 1;
1788             ts->val = val;
1789             val_ptr = &ts->val;
1790         }
1791         g_hash_table_insert(h, val_ptr, ts);
1792     }
1793 
1794     return ts;
1795 }
1796 
1797 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1798 {
1799     val = dup_const(vece, val);
1800     return temp_tcgv_vec(tcg_constant_internal(type, val));
1801 }
1802 
1803 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1804 {
1805     TCGTemp *t = tcgv_vec_temp(match);
1806 
1807     tcg_debug_assert(t->temp_allocated != 0);
1808     return tcg_constant_vec(t->base_type, vece, val);
1809 }
1810 
1811 #ifdef CONFIG_DEBUG_TCG
1812 size_t temp_idx(TCGTemp *ts)
1813 {
1814     ptrdiff_t n = ts - tcg_ctx->temps;
1815     assert(n >= 0 && n < tcg_ctx->nb_temps);
1816     return n;
1817 }
1818 
1819 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1820 {
1821     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1822 
1823     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1824     assert(o % sizeof(TCGTemp) == 0);
1825 
1826     return (void *)tcg_ctx + (uintptr_t)v;
1827 }
1828 #endif /* CONFIG_DEBUG_TCG */
1829 
1830 /* Return true if OP may appear in the opcode stream.
1831    Test the runtime variable that controls each opcode.  */
1832 bool tcg_op_supported(TCGOpcode op)
1833 {
1834     const bool have_vec
1835         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1836 
1837     switch (op) {
1838     case INDEX_op_discard:
1839     case INDEX_op_set_label:
1840     case INDEX_op_call:
1841     case INDEX_op_br:
1842     case INDEX_op_mb:
1843     case INDEX_op_insn_start:
1844     case INDEX_op_exit_tb:
1845     case INDEX_op_goto_tb:
1846     case INDEX_op_goto_ptr:
1847     case INDEX_op_qemu_ld_a32_i32:
1848     case INDEX_op_qemu_ld_a64_i32:
1849     case INDEX_op_qemu_st_a32_i32:
1850     case INDEX_op_qemu_st_a64_i32:
1851     case INDEX_op_qemu_ld_a32_i64:
1852     case INDEX_op_qemu_ld_a64_i64:
1853     case INDEX_op_qemu_st_a32_i64:
1854     case INDEX_op_qemu_st_a64_i64:
1855         return true;
1856 
1857     case INDEX_op_qemu_st8_a32_i32:
1858     case INDEX_op_qemu_st8_a64_i32:
1859         return TCG_TARGET_HAS_qemu_st8_i32;
1860 
1861     case INDEX_op_qemu_ld_a32_i128:
1862     case INDEX_op_qemu_ld_a64_i128:
1863     case INDEX_op_qemu_st_a32_i128:
1864     case INDEX_op_qemu_st_a64_i128:
1865         return TCG_TARGET_HAS_qemu_ldst_i128;
1866 
1867     case INDEX_op_mov_i32:
1868     case INDEX_op_setcond_i32:
1869     case INDEX_op_brcond_i32:
1870     case INDEX_op_ld8u_i32:
1871     case INDEX_op_ld8s_i32:
1872     case INDEX_op_ld16u_i32:
1873     case INDEX_op_ld16s_i32:
1874     case INDEX_op_ld_i32:
1875     case INDEX_op_st8_i32:
1876     case INDEX_op_st16_i32:
1877     case INDEX_op_st_i32:
1878     case INDEX_op_add_i32:
1879     case INDEX_op_sub_i32:
1880     case INDEX_op_mul_i32:
1881     case INDEX_op_and_i32:
1882     case INDEX_op_or_i32:
1883     case INDEX_op_xor_i32:
1884     case INDEX_op_shl_i32:
1885     case INDEX_op_shr_i32:
1886     case INDEX_op_sar_i32:
1887         return true;
1888 
1889     case INDEX_op_negsetcond_i32:
1890         return TCG_TARGET_HAS_negsetcond_i32;
1891     case INDEX_op_movcond_i32:
1892         return TCG_TARGET_HAS_movcond_i32;
1893     case INDEX_op_div_i32:
1894     case INDEX_op_divu_i32:
1895         return TCG_TARGET_HAS_div_i32;
1896     case INDEX_op_rem_i32:
1897     case INDEX_op_remu_i32:
1898         return TCG_TARGET_HAS_rem_i32;
1899     case INDEX_op_div2_i32:
1900     case INDEX_op_divu2_i32:
1901         return TCG_TARGET_HAS_div2_i32;
1902     case INDEX_op_rotl_i32:
1903     case INDEX_op_rotr_i32:
1904         return TCG_TARGET_HAS_rot_i32;
1905     case INDEX_op_deposit_i32:
1906         return TCG_TARGET_HAS_deposit_i32;
1907     case INDEX_op_extract_i32:
1908         return TCG_TARGET_HAS_extract_i32;
1909     case INDEX_op_sextract_i32:
1910         return TCG_TARGET_HAS_sextract_i32;
1911     case INDEX_op_extract2_i32:
1912         return TCG_TARGET_HAS_extract2_i32;
1913     case INDEX_op_add2_i32:
1914         return TCG_TARGET_HAS_add2_i32;
1915     case INDEX_op_sub2_i32:
1916         return TCG_TARGET_HAS_sub2_i32;
1917     case INDEX_op_mulu2_i32:
1918         return TCG_TARGET_HAS_mulu2_i32;
1919     case INDEX_op_muls2_i32:
1920         return TCG_TARGET_HAS_muls2_i32;
1921     case INDEX_op_muluh_i32:
1922         return TCG_TARGET_HAS_muluh_i32;
1923     case INDEX_op_mulsh_i32:
1924         return TCG_TARGET_HAS_mulsh_i32;
1925     case INDEX_op_ext8s_i32:
1926         return TCG_TARGET_HAS_ext8s_i32;
1927     case INDEX_op_ext16s_i32:
1928         return TCG_TARGET_HAS_ext16s_i32;
1929     case INDEX_op_ext8u_i32:
1930         return TCG_TARGET_HAS_ext8u_i32;
1931     case INDEX_op_ext16u_i32:
1932         return TCG_TARGET_HAS_ext16u_i32;
1933     case INDEX_op_bswap16_i32:
1934         return TCG_TARGET_HAS_bswap16_i32;
1935     case INDEX_op_bswap32_i32:
1936         return TCG_TARGET_HAS_bswap32_i32;
1937     case INDEX_op_not_i32:
1938         return TCG_TARGET_HAS_not_i32;
1939     case INDEX_op_neg_i32:
1940         return TCG_TARGET_HAS_neg_i32;
1941     case INDEX_op_andc_i32:
1942         return TCG_TARGET_HAS_andc_i32;
1943     case INDEX_op_orc_i32:
1944         return TCG_TARGET_HAS_orc_i32;
1945     case INDEX_op_eqv_i32:
1946         return TCG_TARGET_HAS_eqv_i32;
1947     case INDEX_op_nand_i32:
1948         return TCG_TARGET_HAS_nand_i32;
1949     case INDEX_op_nor_i32:
1950         return TCG_TARGET_HAS_nor_i32;
1951     case INDEX_op_clz_i32:
1952         return TCG_TARGET_HAS_clz_i32;
1953     case INDEX_op_ctz_i32:
1954         return TCG_TARGET_HAS_ctz_i32;
1955     case INDEX_op_ctpop_i32:
1956         return TCG_TARGET_HAS_ctpop_i32;
1957 
1958     case INDEX_op_brcond2_i32:
1959     case INDEX_op_setcond2_i32:
1960         return TCG_TARGET_REG_BITS == 32;
1961 
1962     case INDEX_op_mov_i64:
1963     case INDEX_op_setcond_i64:
1964     case INDEX_op_brcond_i64:
1965     case INDEX_op_ld8u_i64:
1966     case INDEX_op_ld8s_i64:
1967     case INDEX_op_ld16u_i64:
1968     case INDEX_op_ld16s_i64:
1969     case INDEX_op_ld32u_i64:
1970     case INDEX_op_ld32s_i64:
1971     case INDEX_op_ld_i64:
1972     case INDEX_op_st8_i64:
1973     case INDEX_op_st16_i64:
1974     case INDEX_op_st32_i64:
1975     case INDEX_op_st_i64:
1976     case INDEX_op_add_i64:
1977     case INDEX_op_sub_i64:
1978     case INDEX_op_mul_i64:
1979     case INDEX_op_and_i64:
1980     case INDEX_op_or_i64:
1981     case INDEX_op_xor_i64:
1982     case INDEX_op_shl_i64:
1983     case INDEX_op_shr_i64:
1984     case INDEX_op_sar_i64:
1985     case INDEX_op_ext_i32_i64:
1986     case INDEX_op_extu_i32_i64:
1987         return TCG_TARGET_REG_BITS == 64;
1988 
1989     case INDEX_op_negsetcond_i64:
1990         return TCG_TARGET_HAS_negsetcond_i64;
1991     case INDEX_op_movcond_i64:
1992         return TCG_TARGET_HAS_movcond_i64;
1993     case INDEX_op_div_i64:
1994     case INDEX_op_divu_i64:
1995         return TCG_TARGET_HAS_div_i64;
1996     case INDEX_op_rem_i64:
1997     case INDEX_op_remu_i64:
1998         return TCG_TARGET_HAS_rem_i64;
1999     case INDEX_op_div2_i64:
2000     case INDEX_op_divu2_i64:
2001         return TCG_TARGET_HAS_div2_i64;
2002     case INDEX_op_rotl_i64:
2003     case INDEX_op_rotr_i64:
2004         return TCG_TARGET_HAS_rot_i64;
2005     case INDEX_op_deposit_i64:
2006         return TCG_TARGET_HAS_deposit_i64;
2007     case INDEX_op_extract_i64:
2008         return TCG_TARGET_HAS_extract_i64;
2009     case INDEX_op_sextract_i64:
2010         return TCG_TARGET_HAS_sextract_i64;
2011     case INDEX_op_extract2_i64:
2012         return TCG_TARGET_HAS_extract2_i64;
2013     case INDEX_op_extrl_i64_i32:
2014     case INDEX_op_extrh_i64_i32:
2015         return TCG_TARGET_HAS_extr_i64_i32;
2016     case INDEX_op_ext8s_i64:
2017         return TCG_TARGET_HAS_ext8s_i64;
2018     case INDEX_op_ext16s_i64:
2019         return TCG_TARGET_HAS_ext16s_i64;
2020     case INDEX_op_ext32s_i64:
2021         return TCG_TARGET_HAS_ext32s_i64;
2022     case INDEX_op_ext8u_i64:
2023         return TCG_TARGET_HAS_ext8u_i64;
2024     case INDEX_op_ext16u_i64:
2025         return TCG_TARGET_HAS_ext16u_i64;
2026     case INDEX_op_ext32u_i64:
2027         return TCG_TARGET_HAS_ext32u_i64;
2028     case INDEX_op_bswap16_i64:
2029         return TCG_TARGET_HAS_bswap16_i64;
2030     case INDEX_op_bswap32_i64:
2031         return TCG_TARGET_HAS_bswap32_i64;
2032     case INDEX_op_bswap64_i64:
2033         return TCG_TARGET_HAS_bswap64_i64;
2034     case INDEX_op_not_i64:
2035         return TCG_TARGET_HAS_not_i64;
2036     case INDEX_op_neg_i64:
2037         return TCG_TARGET_HAS_neg_i64;
2038     case INDEX_op_andc_i64:
2039         return TCG_TARGET_HAS_andc_i64;
2040     case INDEX_op_orc_i64:
2041         return TCG_TARGET_HAS_orc_i64;
2042     case INDEX_op_eqv_i64:
2043         return TCG_TARGET_HAS_eqv_i64;
2044     case INDEX_op_nand_i64:
2045         return TCG_TARGET_HAS_nand_i64;
2046     case INDEX_op_nor_i64:
2047         return TCG_TARGET_HAS_nor_i64;
2048     case INDEX_op_clz_i64:
2049         return TCG_TARGET_HAS_clz_i64;
2050     case INDEX_op_ctz_i64:
2051         return TCG_TARGET_HAS_ctz_i64;
2052     case INDEX_op_ctpop_i64:
2053         return TCG_TARGET_HAS_ctpop_i64;
2054     case INDEX_op_add2_i64:
2055         return TCG_TARGET_HAS_add2_i64;
2056     case INDEX_op_sub2_i64:
2057         return TCG_TARGET_HAS_sub2_i64;
2058     case INDEX_op_mulu2_i64:
2059         return TCG_TARGET_HAS_mulu2_i64;
2060     case INDEX_op_muls2_i64:
2061         return TCG_TARGET_HAS_muls2_i64;
2062     case INDEX_op_muluh_i64:
2063         return TCG_TARGET_HAS_muluh_i64;
2064     case INDEX_op_mulsh_i64:
2065         return TCG_TARGET_HAS_mulsh_i64;
2066 
2067     case INDEX_op_mov_vec:
2068     case INDEX_op_dup_vec:
2069     case INDEX_op_dupm_vec:
2070     case INDEX_op_ld_vec:
2071     case INDEX_op_st_vec:
2072     case INDEX_op_add_vec:
2073     case INDEX_op_sub_vec:
2074     case INDEX_op_and_vec:
2075     case INDEX_op_or_vec:
2076     case INDEX_op_xor_vec:
2077     case INDEX_op_cmp_vec:
2078         return have_vec;
2079     case INDEX_op_dup2_vec:
2080         return have_vec && TCG_TARGET_REG_BITS == 32;
2081     case INDEX_op_not_vec:
2082         return have_vec && TCG_TARGET_HAS_not_vec;
2083     case INDEX_op_neg_vec:
2084         return have_vec && TCG_TARGET_HAS_neg_vec;
2085     case INDEX_op_abs_vec:
2086         return have_vec && TCG_TARGET_HAS_abs_vec;
2087     case INDEX_op_andc_vec:
2088         return have_vec && TCG_TARGET_HAS_andc_vec;
2089     case INDEX_op_orc_vec:
2090         return have_vec && TCG_TARGET_HAS_orc_vec;
2091     case INDEX_op_nand_vec:
2092         return have_vec && TCG_TARGET_HAS_nand_vec;
2093     case INDEX_op_nor_vec:
2094         return have_vec && TCG_TARGET_HAS_nor_vec;
2095     case INDEX_op_eqv_vec:
2096         return have_vec && TCG_TARGET_HAS_eqv_vec;
2097     case INDEX_op_mul_vec:
2098         return have_vec && TCG_TARGET_HAS_mul_vec;
2099     case INDEX_op_shli_vec:
2100     case INDEX_op_shri_vec:
2101     case INDEX_op_sari_vec:
2102         return have_vec && TCG_TARGET_HAS_shi_vec;
2103     case INDEX_op_shls_vec:
2104     case INDEX_op_shrs_vec:
2105     case INDEX_op_sars_vec:
2106         return have_vec && TCG_TARGET_HAS_shs_vec;
2107     case INDEX_op_shlv_vec:
2108     case INDEX_op_shrv_vec:
2109     case INDEX_op_sarv_vec:
2110         return have_vec && TCG_TARGET_HAS_shv_vec;
2111     case INDEX_op_rotli_vec:
2112         return have_vec && TCG_TARGET_HAS_roti_vec;
2113     case INDEX_op_rotls_vec:
2114         return have_vec && TCG_TARGET_HAS_rots_vec;
2115     case INDEX_op_rotlv_vec:
2116     case INDEX_op_rotrv_vec:
2117         return have_vec && TCG_TARGET_HAS_rotv_vec;
2118     case INDEX_op_ssadd_vec:
2119     case INDEX_op_usadd_vec:
2120     case INDEX_op_sssub_vec:
2121     case INDEX_op_ussub_vec:
2122         return have_vec && TCG_TARGET_HAS_sat_vec;
2123     case INDEX_op_smin_vec:
2124     case INDEX_op_umin_vec:
2125     case INDEX_op_smax_vec:
2126     case INDEX_op_umax_vec:
2127         return have_vec && TCG_TARGET_HAS_minmax_vec;
2128     case INDEX_op_bitsel_vec:
2129         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2130     case INDEX_op_cmpsel_vec:
2131         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2132 
2133     default:
2134         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2135         return true;
2136     }
2137 }
2138 
2139 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2140 
2141 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2142 {
2143     TCGv_i64 extend_free[MAX_CALL_IARGS];
2144     int n_extend = 0;
2145     TCGOp *op;
2146     int i, n, pi = 0, total_args;
2147 
2148     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2149         init_call_layout(info);
2150         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2151     }
2152 
2153     total_args = info->nr_out + info->nr_in + 2;
2154     op = tcg_op_alloc(INDEX_op_call, total_args);
2155 
2156 #ifdef CONFIG_PLUGIN
2157     /* Flag helpers that may affect guest state */
2158     if (tcg_ctx->plugin_insn &&
2159         !(info->flags & TCG_CALL_PLUGIN) &&
2160         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2161         tcg_ctx->plugin_insn->calls_helpers = true;
2162     }
2163 #endif
2164 
2165     TCGOP_CALLO(op) = n = info->nr_out;
2166     switch (n) {
2167     case 0:
2168         tcg_debug_assert(ret == NULL);
2169         break;
2170     case 1:
2171         tcg_debug_assert(ret != NULL);
2172         op->args[pi++] = temp_arg(ret);
2173         break;
2174     case 2:
2175     case 4:
2176         tcg_debug_assert(ret != NULL);
2177         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2178         tcg_debug_assert(ret->temp_subindex == 0);
2179         for (i = 0; i < n; ++i) {
2180             op->args[pi++] = temp_arg(ret + i);
2181         }
2182         break;
2183     default:
2184         g_assert_not_reached();
2185     }
2186 
2187     TCGOP_CALLI(op) = n = info->nr_in;
2188     for (i = 0; i < n; i++) {
2189         const TCGCallArgumentLoc *loc = &info->in[i];
2190         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2191 
2192         switch (loc->kind) {
2193         case TCG_CALL_ARG_NORMAL:
2194         case TCG_CALL_ARG_BY_REF:
2195         case TCG_CALL_ARG_BY_REF_N:
2196             op->args[pi++] = temp_arg(ts);
2197             break;
2198 
2199         case TCG_CALL_ARG_EXTEND_U:
2200         case TCG_CALL_ARG_EXTEND_S:
2201             {
2202                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2203                 TCGv_i32 orig = temp_tcgv_i32(ts);
2204 
2205                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2206                     tcg_gen_ext_i32_i64(temp, orig);
2207                 } else {
2208                     tcg_gen_extu_i32_i64(temp, orig);
2209                 }
2210                 op->args[pi++] = tcgv_i64_arg(temp);
2211                 extend_free[n_extend++] = temp;
2212             }
2213             break;
2214 
2215         default:
2216             g_assert_not_reached();
2217         }
2218     }
2219     op->args[pi++] = (uintptr_t)info->func;
2220     op->args[pi++] = (uintptr_t)info;
2221     tcg_debug_assert(pi == total_args);
2222 
2223     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2224 
2225     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2226     for (i = 0; i < n_extend; ++i) {
2227         tcg_temp_free_i64(extend_free[i]);
2228     }
2229 }
2230 
2231 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2232 {
2233     tcg_gen_callN(info, ret, NULL);
2234 }
2235 
2236 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2237 {
2238     tcg_gen_callN(info, ret, &t1);
2239 }
2240 
2241 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2242 {
2243     TCGTemp *args[2] = { t1, t2 };
2244     tcg_gen_callN(info, ret, args);
2245 }
2246 
2247 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2248                    TCGTemp *t2, TCGTemp *t3)
2249 {
2250     TCGTemp *args[3] = { t1, t2, t3 };
2251     tcg_gen_callN(info, ret, args);
2252 }
2253 
2254 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2255                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2256 {
2257     TCGTemp *args[4] = { t1, t2, t3, t4 };
2258     tcg_gen_callN(info, ret, args);
2259 }
2260 
2261 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2262                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2263 {
2264     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2265     tcg_gen_callN(info, ret, args);
2266 }
2267 
2268 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2269                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2270 {
2271     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2272     tcg_gen_callN(info, ret, args);
2273 }
2274 
2275 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2276                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2277                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2278 {
2279     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2280     tcg_gen_callN(info, ret, args);
2281 }
2282 
2283 static void tcg_reg_alloc_start(TCGContext *s)
2284 {
2285     int i, n;
2286 
2287     for (i = 0, n = s->nb_temps; i < n; i++) {
2288         TCGTemp *ts = &s->temps[i];
2289         TCGTempVal val = TEMP_VAL_MEM;
2290 
2291         switch (ts->kind) {
2292         case TEMP_CONST:
2293             val = TEMP_VAL_CONST;
2294             break;
2295         case TEMP_FIXED:
2296             val = TEMP_VAL_REG;
2297             break;
2298         case TEMP_GLOBAL:
2299             break;
2300         case TEMP_EBB:
2301             val = TEMP_VAL_DEAD;
2302             /* fall through */
2303         case TEMP_TB:
2304             ts->mem_allocated = 0;
2305             break;
2306         default:
2307             g_assert_not_reached();
2308         }
2309         ts->val_type = val;
2310     }
2311 
2312     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2313 }
2314 
2315 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2316                                  TCGTemp *ts)
2317 {
2318     int idx = temp_idx(ts);
2319 
2320     switch (ts->kind) {
2321     case TEMP_FIXED:
2322     case TEMP_GLOBAL:
2323         pstrcpy(buf, buf_size, ts->name);
2324         break;
2325     case TEMP_TB:
2326         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2327         break;
2328     case TEMP_EBB:
2329         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2330         break;
2331     case TEMP_CONST:
2332         switch (ts->type) {
2333         case TCG_TYPE_I32:
2334             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2335             break;
2336 #if TCG_TARGET_REG_BITS > 32
2337         case TCG_TYPE_I64:
2338             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2339             break;
2340 #endif
2341         case TCG_TYPE_V64:
2342         case TCG_TYPE_V128:
2343         case TCG_TYPE_V256:
2344             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2345                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2346             break;
2347         default:
2348             g_assert_not_reached();
2349         }
2350         break;
2351     }
2352     return buf;
2353 }
2354 
2355 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2356                              int buf_size, TCGArg arg)
2357 {
2358     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2359 }
2360 
2361 static const char * const cond_name[] =
2362 {
2363     [TCG_COND_NEVER] = "never",
2364     [TCG_COND_ALWAYS] = "always",
2365     [TCG_COND_EQ] = "eq",
2366     [TCG_COND_NE] = "ne",
2367     [TCG_COND_LT] = "lt",
2368     [TCG_COND_GE] = "ge",
2369     [TCG_COND_LE] = "le",
2370     [TCG_COND_GT] = "gt",
2371     [TCG_COND_LTU] = "ltu",
2372     [TCG_COND_GEU] = "geu",
2373     [TCG_COND_LEU] = "leu",
2374     [TCG_COND_GTU] = "gtu"
2375 };
2376 
2377 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2378 {
2379     [MO_UB]   = "ub",
2380     [MO_SB]   = "sb",
2381     [MO_LEUW] = "leuw",
2382     [MO_LESW] = "lesw",
2383     [MO_LEUL] = "leul",
2384     [MO_LESL] = "lesl",
2385     [MO_LEUQ] = "leq",
2386     [MO_BEUW] = "beuw",
2387     [MO_BESW] = "besw",
2388     [MO_BEUL] = "beul",
2389     [MO_BESL] = "besl",
2390     [MO_BEUQ] = "beq",
2391     [MO_128 + MO_BE] = "beo",
2392     [MO_128 + MO_LE] = "leo",
2393 };
2394 
2395 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2396     [MO_UNALN >> MO_ASHIFT]    = "un+",
2397     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2398     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2399     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2400     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2401     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2402     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2403     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2404 };
2405 
2406 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2407     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2408     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2409     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2410     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2411     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2412     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2413 };
2414 
2415 static const char bswap_flag_name[][6] = {
2416     [TCG_BSWAP_IZ] = "iz",
2417     [TCG_BSWAP_OZ] = "oz",
2418     [TCG_BSWAP_OS] = "os",
2419     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2420     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2421 };
2422 
2423 static inline bool tcg_regset_single(TCGRegSet d)
2424 {
2425     return (d & (d - 1)) == 0;
2426 }
2427 
2428 static inline TCGReg tcg_regset_first(TCGRegSet d)
2429 {
2430     if (TCG_TARGET_NB_REGS <= 32) {
2431         return ctz32(d);
2432     } else {
2433         return ctz64(d);
2434     }
2435 }
2436 
2437 /* Return only the number of characters output -- no error return. */
2438 #define ne_fprintf(...) \
2439     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2440 
2441 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2442 {
2443     char buf[128];
2444     TCGOp *op;
2445 
2446     QTAILQ_FOREACH(op, &s->ops, link) {
2447         int i, k, nb_oargs, nb_iargs, nb_cargs;
2448         const TCGOpDef *def;
2449         TCGOpcode c;
2450         int col = 0;
2451 
2452         c = op->opc;
2453         def = &tcg_op_defs[c];
2454 
2455         if (c == INDEX_op_insn_start) {
2456             nb_oargs = 0;
2457             col += ne_fprintf(f, "\n ----");
2458 
2459             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2460                 col += ne_fprintf(f, " %016" PRIx64,
2461                                   tcg_get_insn_start_param(op, i));
2462             }
2463         } else if (c == INDEX_op_call) {
2464             const TCGHelperInfo *info = tcg_call_info(op);
2465             void *func = tcg_call_func(op);
2466 
2467             /* variable number of arguments */
2468             nb_oargs = TCGOP_CALLO(op);
2469             nb_iargs = TCGOP_CALLI(op);
2470             nb_cargs = def->nb_cargs;
2471 
2472             col += ne_fprintf(f, " %s ", def->name);
2473 
2474             /*
2475              * Print the function name from TCGHelperInfo, if available.
2476              * Note that plugins have a template function for the info,
2477              * but the actual function pointer comes from the plugin.
2478              */
2479             if (func == info->func) {
2480                 col += ne_fprintf(f, "%s", info->name);
2481             } else {
2482                 col += ne_fprintf(f, "plugin(%p)", func);
2483             }
2484 
2485             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2486             for (i = 0; i < nb_oargs; i++) {
2487                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2488                                                             op->args[i]));
2489             }
2490             for (i = 0; i < nb_iargs; i++) {
2491                 TCGArg arg = op->args[nb_oargs + i];
2492                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2493                 col += ne_fprintf(f, ",%s", t);
2494             }
2495         } else {
2496             col += ne_fprintf(f, " %s ", def->name);
2497 
2498             nb_oargs = def->nb_oargs;
2499             nb_iargs = def->nb_iargs;
2500             nb_cargs = def->nb_cargs;
2501 
2502             if (def->flags & TCG_OPF_VECTOR) {
2503                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2504                                   8 << TCGOP_VECE(op));
2505             }
2506 
2507             k = 0;
2508             for (i = 0; i < nb_oargs; i++) {
2509                 const char *sep =  k ? "," : "";
2510                 col += ne_fprintf(f, "%s%s", sep,
2511                                   tcg_get_arg_str(s, buf, sizeof(buf),
2512                                                   op->args[k++]));
2513             }
2514             for (i = 0; i < nb_iargs; i++) {
2515                 const char *sep =  k ? "," : "";
2516                 col += ne_fprintf(f, "%s%s", sep,
2517                                   tcg_get_arg_str(s, buf, sizeof(buf),
2518                                                   op->args[k++]));
2519             }
2520             switch (c) {
2521             case INDEX_op_brcond_i32:
2522             case INDEX_op_setcond_i32:
2523             case INDEX_op_negsetcond_i32:
2524             case INDEX_op_movcond_i32:
2525             case INDEX_op_brcond2_i32:
2526             case INDEX_op_setcond2_i32:
2527             case INDEX_op_brcond_i64:
2528             case INDEX_op_setcond_i64:
2529             case INDEX_op_negsetcond_i64:
2530             case INDEX_op_movcond_i64:
2531             case INDEX_op_cmp_vec:
2532             case INDEX_op_cmpsel_vec:
2533                 if (op->args[k] < ARRAY_SIZE(cond_name)
2534                     && cond_name[op->args[k]]) {
2535                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2536                 } else {
2537                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2538                 }
2539                 i = 1;
2540                 break;
2541             case INDEX_op_qemu_ld_a32_i32:
2542             case INDEX_op_qemu_ld_a64_i32:
2543             case INDEX_op_qemu_st_a32_i32:
2544             case INDEX_op_qemu_st_a64_i32:
2545             case INDEX_op_qemu_st8_a32_i32:
2546             case INDEX_op_qemu_st8_a64_i32:
2547             case INDEX_op_qemu_ld_a32_i64:
2548             case INDEX_op_qemu_ld_a64_i64:
2549             case INDEX_op_qemu_st_a32_i64:
2550             case INDEX_op_qemu_st_a64_i64:
2551             case INDEX_op_qemu_ld_a32_i128:
2552             case INDEX_op_qemu_ld_a64_i128:
2553             case INDEX_op_qemu_st_a32_i128:
2554             case INDEX_op_qemu_st_a64_i128:
2555                 {
2556                     const char *s_al, *s_op, *s_at;
2557                     MemOpIdx oi = op->args[k++];
2558                     MemOp mop = get_memop(oi);
2559                     unsigned ix = get_mmuidx(oi);
2560 
2561                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2562                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2563                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2564                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2565 
2566                     /* If all fields are accounted for, print symbolically. */
2567                     if (!mop && s_al && s_op && s_at) {
2568                         col += ne_fprintf(f, ",%s%s%s,%u",
2569                                           s_at, s_al, s_op, ix);
2570                     } else {
2571                         mop = get_memop(oi);
2572                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2573                     }
2574                     i = 1;
2575                 }
2576                 break;
2577             case INDEX_op_bswap16_i32:
2578             case INDEX_op_bswap16_i64:
2579             case INDEX_op_bswap32_i32:
2580             case INDEX_op_bswap32_i64:
2581             case INDEX_op_bswap64_i64:
2582                 {
2583                     TCGArg flags = op->args[k];
2584                     const char *name = NULL;
2585 
2586                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2587                         name = bswap_flag_name[flags];
2588                     }
2589                     if (name) {
2590                         col += ne_fprintf(f, ",%s", name);
2591                     } else {
2592                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2593                     }
2594                     i = k = 1;
2595                 }
2596                 break;
2597             default:
2598                 i = 0;
2599                 break;
2600             }
2601             switch (c) {
2602             case INDEX_op_set_label:
2603             case INDEX_op_br:
2604             case INDEX_op_brcond_i32:
2605             case INDEX_op_brcond_i64:
2606             case INDEX_op_brcond2_i32:
2607                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2608                                   arg_label(op->args[k])->id);
2609                 i++, k++;
2610                 break;
2611             case INDEX_op_mb:
2612                 {
2613                     TCGBar membar = op->args[k];
2614                     const char *b_op, *m_op;
2615 
2616                     switch (membar & TCG_BAR_SC) {
2617                     case 0:
2618                         b_op = "none";
2619                         break;
2620                     case TCG_BAR_LDAQ:
2621                         b_op = "acq";
2622                         break;
2623                     case TCG_BAR_STRL:
2624                         b_op = "rel";
2625                         break;
2626                     case TCG_BAR_SC:
2627                         b_op = "seq";
2628                         break;
2629                     default:
2630                         g_assert_not_reached();
2631                     }
2632 
2633                     switch (membar & TCG_MO_ALL) {
2634                     case 0:
2635                         m_op = "none";
2636                         break;
2637                     case TCG_MO_LD_LD:
2638                         m_op = "rr";
2639                         break;
2640                     case TCG_MO_LD_ST:
2641                         m_op = "rw";
2642                         break;
2643                     case TCG_MO_ST_LD:
2644                         m_op = "wr";
2645                         break;
2646                     case TCG_MO_ST_ST:
2647                         m_op = "ww";
2648                         break;
2649                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2650                         m_op = "rr+rw";
2651                         break;
2652                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2653                         m_op = "rr+wr";
2654                         break;
2655                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2656                         m_op = "rr+ww";
2657                         break;
2658                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2659                         m_op = "rw+wr";
2660                         break;
2661                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2662                         m_op = "rw+ww";
2663                         break;
2664                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2665                         m_op = "wr+ww";
2666                         break;
2667                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2668                         m_op = "rr+rw+wr";
2669                         break;
2670                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2671                         m_op = "rr+rw+ww";
2672                         break;
2673                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2674                         m_op = "rr+wr+ww";
2675                         break;
2676                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2677                         m_op = "rw+wr+ww";
2678                         break;
2679                     case TCG_MO_ALL:
2680                         m_op = "all";
2681                         break;
2682                     default:
2683                         g_assert_not_reached();
2684                     }
2685 
2686                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2687                     i++, k++;
2688                 }
2689                 break;
2690             default:
2691                 break;
2692             }
2693             for (; i < nb_cargs; i++, k++) {
2694                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2695                                   op->args[k]);
2696             }
2697         }
2698 
2699         if (have_prefs || op->life) {
2700             for (; col < 40; ++col) {
2701                 putc(' ', f);
2702             }
2703         }
2704 
2705         if (op->life) {
2706             unsigned life = op->life;
2707 
2708             if (life & (SYNC_ARG * 3)) {
2709                 ne_fprintf(f, "  sync:");
2710                 for (i = 0; i < 2; ++i) {
2711                     if (life & (SYNC_ARG << i)) {
2712                         ne_fprintf(f, " %d", i);
2713                     }
2714                 }
2715             }
2716             life /= DEAD_ARG;
2717             if (life) {
2718                 ne_fprintf(f, "  dead:");
2719                 for (i = 0; life; ++i, life >>= 1) {
2720                     if (life & 1) {
2721                         ne_fprintf(f, " %d", i);
2722                     }
2723                 }
2724             }
2725         }
2726 
2727         if (have_prefs) {
2728             for (i = 0; i < nb_oargs; ++i) {
2729                 TCGRegSet set = output_pref(op, i);
2730 
2731                 if (i == 0) {
2732                     ne_fprintf(f, "  pref=");
2733                 } else {
2734                     ne_fprintf(f, ",");
2735                 }
2736                 if (set == 0) {
2737                     ne_fprintf(f, "none");
2738                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2739                     ne_fprintf(f, "all");
2740 #ifdef CONFIG_DEBUG_TCG
2741                 } else if (tcg_regset_single(set)) {
2742                     TCGReg reg = tcg_regset_first(set);
2743                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2744 #endif
2745                 } else if (TCG_TARGET_NB_REGS <= 32) {
2746                     ne_fprintf(f, "0x%x", (uint32_t)set);
2747                 } else {
2748                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2749                 }
2750             }
2751         }
2752 
2753         putc('\n', f);
2754     }
2755 }
2756 
2757 /* we give more priority to constraints with less registers */
2758 static int get_constraint_priority(const TCGOpDef *def, int k)
2759 {
2760     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2761     int n = ctpop64(arg_ct->regs);
2762 
2763     /*
2764      * Sort constraints of a single register first, which includes output
2765      * aliases (which must exactly match the input already allocated).
2766      */
2767     if (n == 1 || arg_ct->oalias) {
2768         return INT_MAX;
2769     }
2770 
2771     /*
2772      * Sort register pairs next, first then second immediately after.
2773      * Arbitrarily sort multiple pairs by the index of the first reg;
2774      * there shouldn't be many pairs.
2775      */
2776     switch (arg_ct->pair) {
2777     case 1:
2778     case 3:
2779         return (k + 1) * 2;
2780     case 2:
2781         return (arg_ct->pair_index + 1) * 2 - 1;
2782     }
2783 
2784     /* Finally, sort by decreasing register count. */
2785     assert(n > 1);
2786     return -n;
2787 }
2788 
2789 /* sort from highest priority to lowest */
2790 static void sort_constraints(TCGOpDef *def, int start, int n)
2791 {
2792     int i, j;
2793     TCGArgConstraint *a = def->args_ct;
2794 
2795     for (i = 0; i < n; i++) {
2796         a[start + i].sort_index = start + i;
2797     }
2798     if (n <= 1) {
2799         return;
2800     }
2801     for (i = 0; i < n - 1; i++) {
2802         for (j = i + 1; j < n; j++) {
2803             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2804             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2805             if (p1 < p2) {
2806                 int tmp = a[start + i].sort_index;
2807                 a[start + i].sort_index = a[start + j].sort_index;
2808                 a[start + j].sort_index = tmp;
2809             }
2810         }
2811     }
2812 }
2813 
2814 static void process_op_defs(TCGContext *s)
2815 {
2816     TCGOpcode op;
2817 
2818     for (op = 0; op < NB_OPS; op++) {
2819         TCGOpDef *def = &tcg_op_defs[op];
2820         const TCGTargetOpDef *tdefs;
2821         bool saw_alias_pair = false;
2822         int i, o, i2, o2, nb_args;
2823 
2824         if (def->flags & TCG_OPF_NOT_PRESENT) {
2825             continue;
2826         }
2827 
2828         nb_args = def->nb_iargs + def->nb_oargs;
2829         if (nb_args == 0) {
2830             continue;
2831         }
2832 
2833         /*
2834          * Macro magic should make it impossible, but double-check that
2835          * the array index is in range.  Since the signness of an enum
2836          * is implementation defined, force the result to unsigned.
2837          */
2838         unsigned con_set = tcg_target_op_def(op);
2839         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2840         tdefs = &constraint_sets[con_set];
2841 
2842         for (i = 0; i < nb_args; i++) {
2843             const char *ct_str = tdefs->args_ct_str[i];
2844             bool input_p = i >= def->nb_oargs;
2845 
2846             /* Incomplete TCGTargetOpDef entry. */
2847             tcg_debug_assert(ct_str != NULL);
2848 
2849             switch (*ct_str) {
2850             case '0' ... '9':
2851                 o = *ct_str - '0';
2852                 tcg_debug_assert(input_p);
2853                 tcg_debug_assert(o < def->nb_oargs);
2854                 tcg_debug_assert(def->args_ct[o].regs != 0);
2855                 tcg_debug_assert(!def->args_ct[o].oalias);
2856                 def->args_ct[i] = def->args_ct[o];
2857                 /* The output sets oalias.  */
2858                 def->args_ct[o].oalias = 1;
2859                 def->args_ct[o].alias_index = i;
2860                 /* The input sets ialias. */
2861                 def->args_ct[i].ialias = 1;
2862                 def->args_ct[i].alias_index = o;
2863                 if (def->args_ct[i].pair) {
2864                     saw_alias_pair = true;
2865                 }
2866                 tcg_debug_assert(ct_str[1] == '\0');
2867                 continue;
2868 
2869             case '&':
2870                 tcg_debug_assert(!input_p);
2871                 def->args_ct[i].newreg = true;
2872                 ct_str++;
2873                 break;
2874 
2875             case 'p': /* plus */
2876                 /* Allocate to the register after the previous. */
2877                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2878                 o = i - 1;
2879                 tcg_debug_assert(!def->args_ct[o].pair);
2880                 tcg_debug_assert(!def->args_ct[o].ct);
2881                 def->args_ct[i] = (TCGArgConstraint){
2882                     .pair = 2,
2883                     .pair_index = o,
2884                     .regs = def->args_ct[o].regs << 1,
2885                 };
2886                 def->args_ct[o].pair = 1;
2887                 def->args_ct[o].pair_index = i;
2888                 tcg_debug_assert(ct_str[1] == '\0');
2889                 continue;
2890 
2891             case 'm': /* minus */
2892                 /* Allocate to the register before the previous. */
2893                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2894                 o = i - 1;
2895                 tcg_debug_assert(!def->args_ct[o].pair);
2896                 tcg_debug_assert(!def->args_ct[o].ct);
2897                 def->args_ct[i] = (TCGArgConstraint){
2898                     .pair = 1,
2899                     .pair_index = o,
2900                     .regs = def->args_ct[o].regs >> 1,
2901                 };
2902                 def->args_ct[o].pair = 2;
2903                 def->args_ct[o].pair_index = i;
2904                 tcg_debug_assert(ct_str[1] == '\0');
2905                 continue;
2906             }
2907 
2908             do {
2909                 switch (*ct_str) {
2910                 case 'i':
2911                     def->args_ct[i].ct |= TCG_CT_CONST;
2912                     break;
2913 
2914                 /* Include all of the target-specific constraints. */
2915 
2916 #undef CONST
2917 #define CONST(CASE, MASK) \
2918     case CASE: def->args_ct[i].ct |= MASK; break;
2919 #define REGS(CASE, MASK) \
2920     case CASE: def->args_ct[i].regs |= MASK; break;
2921 
2922 #include "tcg-target-con-str.h"
2923 
2924 #undef REGS
2925 #undef CONST
2926                 default:
2927                 case '0' ... '9':
2928                 case '&':
2929                 case 'p':
2930                 case 'm':
2931                     /* Typo in TCGTargetOpDef constraint. */
2932                     g_assert_not_reached();
2933                 }
2934             } while (*++ct_str != '\0');
2935         }
2936 
2937         /* TCGTargetOpDef entry with too much information? */
2938         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2939 
2940         /*
2941          * Fix up output pairs that are aliased with inputs.
2942          * When we created the alias, we copied pair from the output.
2943          * There are three cases:
2944          *    (1a) Pairs of inputs alias pairs of outputs.
2945          *    (1b) One input aliases the first of a pair of outputs.
2946          *    (2)  One input aliases the second of a pair of outputs.
2947          *
2948          * Case 1a is handled by making sure that the pair_index'es are
2949          * properly updated so that they appear the same as a pair of inputs.
2950          *
2951          * Case 1b is handled by setting the pair_index of the input to
2952          * itself, simply so it doesn't point to an unrelated argument.
2953          * Since we don't encounter the "second" during the input allocation
2954          * phase, nothing happens with the second half of the input pair.
2955          *
2956          * Case 2 is handled by setting the second input to pair=3, the
2957          * first output to pair=3, and the pair_index'es to match.
2958          */
2959         if (saw_alias_pair) {
2960             for (i = def->nb_oargs; i < nb_args; i++) {
2961                 /*
2962                  * Since [0-9pm] must be alone in the constraint string,
2963                  * the only way they can both be set is if the pair comes
2964                  * from the output alias.
2965                  */
2966                 if (!def->args_ct[i].ialias) {
2967                     continue;
2968                 }
2969                 switch (def->args_ct[i].pair) {
2970                 case 0:
2971                     break;
2972                 case 1:
2973                     o = def->args_ct[i].alias_index;
2974                     o2 = def->args_ct[o].pair_index;
2975                     tcg_debug_assert(def->args_ct[o].pair == 1);
2976                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2977                     if (def->args_ct[o2].oalias) {
2978                         /* Case 1a */
2979                         i2 = def->args_ct[o2].alias_index;
2980                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2981                         def->args_ct[i2].pair_index = i;
2982                         def->args_ct[i].pair_index = i2;
2983                     } else {
2984                         /* Case 1b */
2985                         def->args_ct[i].pair_index = i;
2986                     }
2987                     break;
2988                 case 2:
2989                     o = def->args_ct[i].alias_index;
2990                     o2 = def->args_ct[o].pair_index;
2991                     tcg_debug_assert(def->args_ct[o].pair == 2);
2992                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2993                     if (def->args_ct[o2].oalias) {
2994                         /* Case 1a */
2995                         i2 = def->args_ct[o2].alias_index;
2996                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2997                         def->args_ct[i2].pair_index = i;
2998                         def->args_ct[i].pair_index = i2;
2999                     } else {
3000                         /* Case 2 */
3001                         def->args_ct[i].pair = 3;
3002                         def->args_ct[o2].pair = 3;
3003                         def->args_ct[i].pair_index = o2;
3004                         def->args_ct[o2].pair_index = i;
3005                     }
3006                     break;
3007                 default:
3008                     g_assert_not_reached();
3009                 }
3010             }
3011         }
3012 
3013         /* sort the constraints (XXX: this is just an heuristic) */
3014         sort_constraints(def, 0, def->nb_oargs);
3015         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3016     }
3017 }
3018 
3019 static void remove_label_use(TCGOp *op, int idx)
3020 {
3021     TCGLabel *label = arg_label(op->args[idx]);
3022     TCGLabelUse *use;
3023 
3024     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3025         if (use->op == op) {
3026             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3027             return;
3028         }
3029     }
3030     g_assert_not_reached();
3031 }
3032 
3033 void tcg_op_remove(TCGContext *s, TCGOp *op)
3034 {
3035     switch (op->opc) {
3036     case INDEX_op_br:
3037         remove_label_use(op, 0);
3038         break;
3039     case INDEX_op_brcond_i32:
3040     case INDEX_op_brcond_i64:
3041         remove_label_use(op, 3);
3042         break;
3043     case INDEX_op_brcond2_i32:
3044         remove_label_use(op, 5);
3045         break;
3046     default:
3047         break;
3048     }
3049 
3050     QTAILQ_REMOVE(&s->ops, op, link);
3051     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3052     s->nb_ops--;
3053 }
3054 
3055 void tcg_remove_ops_after(TCGOp *op)
3056 {
3057     TCGContext *s = tcg_ctx;
3058 
3059     while (true) {
3060         TCGOp *last = tcg_last_op();
3061         if (last == op) {
3062             return;
3063         }
3064         tcg_op_remove(s, last);
3065     }
3066 }
3067 
3068 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3069 {
3070     TCGContext *s = tcg_ctx;
3071     TCGOp *op = NULL;
3072 
3073     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3074         QTAILQ_FOREACH(op, &s->free_ops, link) {
3075             if (nargs <= op->nargs) {
3076                 QTAILQ_REMOVE(&s->free_ops, op, link);
3077                 nargs = op->nargs;
3078                 goto found;
3079             }
3080         }
3081     }
3082 
3083     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3084     nargs = MAX(4, nargs);
3085     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3086 
3087  found:
3088     memset(op, 0, offsetof(TCGOp, link));
3089     op->opc = opc;
3090     op->nargs = nargs;
3091 
3092     /* Check for bitfield overflow. */
3093     tcg_debug_assert(op->nargs == nargs);
3094 
3095     s->nb_ops++;
3096     return op;
3097 }
3098 
3099 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3100 {
3101     TCGOp *op = tcg_op_alloc(opc, nargs);
3102     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3103     return op;
3104 }
3105 
3106 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3107                             TCGOpcode opc, unsigned nargs)
3108 {
3109     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3110     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3111     return new_op;
3112 }
3113 
3114 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3115                            TCGOpcode opc, unsigned nargs)
3116 {
3117     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3118     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3119     return new_op;
3120 }
3121 
3122 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3123 {
3124     TCGLabelUse *u;
3125 
3126     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3127         TCGOp *op = u->op;
3128         switch (op->opc) {
3129         case INDEX_op_br:
3130             op->args[0] = label_arg(to);
3131             break;
3132         case INDEX_op_brcond_i32:
3133         case INDEX_op_brcond_i64:
3134             op->args[3] = label_arg(to);
3135             break;
3136         case INDEX_op_brcond2_i32:
3137             op->args[5] = label_arg(to);
3138             break;
3139         default:
3140             g_assert_not_reached();
3141         }
3142     }
3143 
3144     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3145 }
3146 
3147 /* Reachable analysis : remove unreachable code.  */
3148 static void __attribute__((noinline))
3149 reachable_code_pass(TCGContext *s)
3150 {
3151     TCGOp *op, *op_next, *op_prev;
3152     bool dead = false;
3153 
3154     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3155         bool remove = dead;
3156         TCGLabel *label;
3157 
3158         switch (op->opc) {
3159         case INDEX_op_set_label:
3160             label = arg_label(op->args[0]);
3161 
3162             /*
3163              * Note that the first op in the TB is always a load,
3164              * so there is always something before a label.
3165              */
3166             op_prev = QTAILQ_PREV(op, link);
3167 
3168             /*
3169              * If we find two sequential labels, move all branches to
3170              * reference the second label and remove the first label.
3171              * Do this before branch to next optimization, so that the
3172              * middle label is out of the way.
3173              */
3174             if (op_prev->opc == INDEX_op_set_label) {
3175                 move_label_uses(label, arg_label(op_prev->args[0]));
3176                 tcg_op_remove(s, op_prev);
3177                 op_prev = QTAILQ_PREV(op, link);
3178             }
3179 
3180             /*
3181              * Optimization can fold conditional branches to unconditional.
3182              * If we find a label which is preceded by an unconditional
3183              * branch to next, remove the branch.  We couldn't do this when
3184              * processing the branch because any dead code between the branch
3185              * and label had not yet been removed.
3186              */
3187             if (op_prev->opc == INDEX_op_br &&
3188                 label == arg_label(op_prev->args[0])) {
3189                 tcg_op_remove(s, op_prev);
3190                 /* Fall through means insns become live again.  */
3191                 dead = false;
3192             }
3193 
3194             if (QSIMPLEQ_EMPTY(&label->branches)) {
3195                 /*
3196                  * While there is an occasional backward branch, virtually
3197                  * all branches generated by the translators are forward.
3198                  * Which means that generally we will have already removed
3199                  * all references to the label that will be, and there is
3200                  * little to be gained by iterating.
3201                  */
3202                 remove = true;
3203             } else {
3204                 /* Once we see a label, insns become live again.  */
3205                 dead = false;
3206                 remove = false;
3207             }
3208             break;
3209 
3210         case INDEX_op_br:
3211         case INDEX_op_exit_tb:
3212         case INDEX_op_goto_ptr:
3213             /* Unconditional branches; everything following is dead.  */
3214             dead = true;
3215             break;
3216 
3217         case INDEX_op_call:
3218             /* Notice noreturn helper calls, raising exceptions.  */
3219             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3220                 dead = true;
3221             }
3222             break;
3223 
3224         case INDEX_op_insn_start:
3225             /* Never remove -- we need to keep these for unwind.  */
3226             remove = false;
3227             break;
3228 
3229         default:
3230             break;
3231         }
3232 
3233         if (remove) {
3234             tcg_op_remove(s, op);
3235         }
3236     }
3237 }
3238 
3239 #define TS_DEAD  1
3240 #define TS_MEM   2
3241 
3242 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3243 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3244 
3245 /* For liveness_pass_1, the register preferences for a given temp.  */
3246 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3247 {
3248     return ts->state_ptr;
3249 }
3250 
3251 /* For liveness_pass_1, reset the preferences for a given temp to the
3252  * maximal regset for its type.
3253  */
3254 static inline void la_reset_pref(TCGTemp *ts)
3255 {
3256     *la_temp_pref(ts)
3257         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3258 }
3259 
3260 /* liveness analysis: end of function: all temps are dead, and globals
3261    should be in memory. */
3262 static void la_func_end(TCGContext *s, int ng, int nt)
3263 {
3264     int i;
3265 
3266     for (i = 0; i < ng; ++i) {
3267         s->temps[i].state = TS_DEAD | TS_MEM;
3268         la_reset_pref(&s->temps[i]);
3269     }
3270     for (i = ng; i < nt; ++i) {
3271         s->temps[i].state = TS_DEAD;
3272         la_reset_pref(&s->temps[i]);
3273     }
3274 }
3275 
3276 /* liveness analysis: end of basic block: all temps are dead, globals
3277    and local temps should be in memory. */
3278 static void la_bb_end(TCGContext *s, int ng, int nt)
3279 {
3280     int i;
3281 
3282     for (i = 0; i < nt; ++i) {
3283         TCGTemp *ts = &s->temps[i];
3284         int state;
3285 
3286         switch (ts->kind) {
3287         case TEMP_FIXED:
3288         case TEMP_GLOBAL:
3289         case TEMP_TB:
3290             state = TS_DEAD | TS_MEM;
3291             break;
3292         case TEMP_EBB:
3293         case TEMP_CONST:
3294             state = TS_DEAD;
3295             break;
3296         default:
3297             g_assert_not_reached();
3298         }
3299         ts->state = state;
3300         la_reset_pref(ts);
3301     }
3302 }
3303 
3304 /* liveness analysis: sync globals back to memory.  */
3305 static void la_global_sync(TCGContext *s, int ng)
3306 {
3307     int i;
3308 
3309     for (i = 0; i < ng; ++i) {
3310         int state = s->temps[i].state;
3311         s->temps[i].state = state | TS_MEM;
3312         if (state == TS_DEAD) {
3313             /* If the global was previously dead, reset prefs.  */
3314             la_reset_pref(&s->temps[i]);
3315         }
3316     }
3317 }
3318 
3319 /*
3320  * liveness analysis: conditional branch: all temps are dead unless
3321  * explicitly live-across-conditional-branch, globals and local temps
3322  * should be synced.
3323  */
3324 static void la_bb_sync(TCGContext *s, int ng, int nt)
3325 {
3326     la_global_sync(s, ng);
3327 
3328     for (int i = ng; i < nt; ++i) {
3329         TCGTemp *ts = &s->temps[i];
3330         int state;
3331 
3332         switch (ts->kind) {
3333         case TEMP_TB:
3334             state = ts->state;
3335             ts->state = state | TS_MEM;
3336             if (state != TS_DEAD) {
3337                 continue;
3338             }
3339             break;
3340         case TEMP_EBB:
3341         case TEMP_CONST:
3342             continue;
3343         default:
3344             g_assert_not_reached();
3345         }
3346         la_reset_pref(&s->temps[i]);
3347     }
3348 }
3349 
3350 /* liveness analysis: sync globals back to memory and kill.  */
3351 static void la_global_kill(TCGContext *s, int ng)
3352 {
3353     int i;
3354 
3355     for (i = 0; i < ng; i++) {
3356         s->temps[i].state = TS_DEAD | TS_MEM;
3357         la_reset_pref(&s->temps[i]);
3358     }
3359 }
3360 
3361 /* liveness analysis: note live globals crossing calls.  */
3362 static void la_cross_call(TCGContext *s, int nt)
3363 {
3364     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3365     int i;
3366 
3367     for (i = 0; i < nt; i++) {
3368         TCGTemp *ts = &s->temps[i];
3369         if (!(ts->state & TS_DEAD)) {
3370             TCGRegSet *pset = la_temp_pref(ts);
3371             TCGRegSet set = *pset;
3372 
3373             set &= mask;
3374             /* If the combination is not possible, restart.  */
3375             if (set == 0) {
3376                 set = tcg_target_available_regs[ts->type] & mask;
3377             }
3378             *pset = set;
3379         }
3380     }
3381 }
3382 
3383 /*
3384  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3385  * to TEMP_EBB, if possible.
3386  */
3387 static void __attribute__((noinline))
3388 liveness_pass_0(TCGContext *s)
3389 {
3390     void * const multiple_ebb = (void *)(uintptr_t)-1;
3391     int nb_temps = s->nb_temps;
3392     TCGOp *op, *ebb;
3393 
3394     for (int i = s->nb_globals; i < nb_temps; ++i) {
3395         s->temps[i].state_ptr = NULL;
3396     }
3397 
3398     /*
3399      * Represent each EBB by the op at which it begins.  In the case of
3400      * the first EBB, this is the first op, otherwise it is a label.
3401      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3402      * within a single EBB, else MULTIPLE_EBB.
3403      */
3404     ebb = QTAILQ_FIRST(&s->ops);
3405     QTAILQ_FOREACH(op, &s->ops, link) {
3406         const TCGOpDef *def;
3407         int nb_oargs, nb_iargs;
3408 
3409         switch (op->opc) {
3410         case INDEX_op_set_label:
3411             ebb = op;
3412             continue;
3413         case INDEX_op_discard:
3414             continue;
3415         case INDEX_op_call:
3416             nb_oargs = TCGOP_CALLO(op);
3417             nb_iargs = TCGOP_CALLI(op);
3418             break;
3419         default:
3420             def = &tcg_op_defs[op->opc];
3421             nb_oargs = def->nb_oargs;
3422             nb_iargs = def->nb_iargs;
3423             break;
3424         }
3425 
3426         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3427             TCGTemp *ts = arg_temp(op->args[i]);
3428 
3429             if (ts->kind != TEMP_TB) {
3430                 continue;
3431             }
3432             if (ts->state_ptr == NULL) {
3433                 ts->state_ptr = ebb;
3434             } else if (ts->state_ptr != ebb) {
3435                 ts->state_ptr = multiple_ebb;
3436             }
3437         }
3438     }
3439 
3440     /*
3441      * For TEMP_TB that turned out not to be used beyond one EBB,
3442      * reduce the liveness to TEMP_EBB.
3443      */
3444     for (int i = s->nb_globals; i < nb_temps; ++i) {
3445         TCGTemp *ts = &s->temps[i];
3446         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3447             ts->kind = TEMP_EBB;
3448         }
3449     }
3450 }
3451 
3452 /* Liveness analysis : update the opc_arg_life array to tell if a
3453    given input arguments is dead. Instructions updating dead
3454    temporaries are removed. */
3455 static void __attribute__((noinline))
3456 liveness_pass_1(TCGContext *s)
3457 {
3458     int nb_globals = s->nb_globals;
3459     int nb_temps = s->nb_temps;
3460     TCGOp *op, *op_prev;
3461     TCGRegSet *prefs;
3462     int i;
3463 
3464     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3465     for (i = 0; i < nb_temps; ++i) {
3466         s->temps[i].state_ptr = prefs + i;
3467     }
3468 
3469     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3470     la_func_end(s, nb_globals, nb_temps);
3471 
3472     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3473         int nb_iargs, nb_oargs;
3474         TCGOpcode opc_new, opc_new2;
3475         bool have_opc_new2;
3476         TCGLifeData arg_life = 0;
3477         TCGTemp *ts;
3478         TCGOpcode opc = op->opc;
3479         const TCGOpDef *def = &tcg_op_defs[opc];
3480 
3481         switch (opc) {
3482         case INDEX_op_call:
3483             {
3484                 const TCGHelperInfo *info = tcg_call_info(op);
3485                 int call_flags = tcg_call_flags(op);
3486 
3487                 nb_oargs = TCGOP_CALLO(op);
3488                 nb_iargs = TCGOP_CALLI(op);
3489 
3490                 /* pure functions can be removed if their result is unused */
3491                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3492                     for (i = 0; i < nb_oargs; i++) {
3493                         ts = arg_temp(op->args[i]);
3494                         if (ts->state != TS_DEAD) {
3495                             goto do_not_remove_call;
3496                         }
3497                     }
3498                     goto do_remove;
3499                 }
3500             do_not_remove_call:
3501 
3502                 /* Output args are dead.  */
3503                 for (i = 0; i < nb_oargs; i++) {
3504                     ts = arg_temp(op->args[i]);
3505                     if (ts->state & TS_DEAD) {
3506                         arg_life |= DEAD_ARG << i;
3507                     }
3508                     if (ts->state & TS_MEM) {
3509                         arg_life |= SYNC_ARG << i;
3510                     }
3511                     ts->state = TS_DEAD;
3512                     la_reset_pref(ts);
3513                 }
3514 
3515                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3516                 memset(op->output_pref, 0, sizeof(op->output_pref));
3517 
3518                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3519                                     TCG_CALL_NO_READ_GLOBALS))) {
3520                     la_global_kill(s, nb_globals);
3521                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3522                     la_global_sync(s, nb_globals);
3523                 }
3524 
3525                 /* Record arguments that die in this helper.  */
3526                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3527                     ts = arg_temp(op->args[i]);
3528                     if (ts->state & TS_DEAD) {
3529                         arg_life |= DEAD_ARG << i;
3530                     }
3531                 }
3532 
3533                 /* For all live registers, remove call-clobbered prefs.  */
3534                 la_cross_call(s, nb_temps);
3535 
3536                 /*
3537                  * Input arguments are live for preceding opcodes.
3538                  *
3539                  * For those arguments that die, and will be allocated in
3540                  * registers, clear the register set for that arg, to be
3541                  * filled in below.  For args that will be on the stack,
3542                  * reset to any available reg.  Process arguments in reverse
3543                  * order so that if a temp is used more than once, the stack
3544                  * reset to max happens before the register reset to 0.
3545                  */
3546                 for (i = nb_iargs - 1; i >= 0; i--) {
3547                     const TCGCallArgumentLoc *loc = &info->in[i];
3548                     ts = arg_temp(op->args[nb_oargs + i]);
3549 
3550                     if (ts->state & TS_DEAD) {
3551                         switch (loc->kind) {
3552                         case TCG_CALL_ARG_NORMAL:
3553                         case TCG_CALL_ARG_EXTEND_U:
3554                         case TCG_CALL_ARG_EXTEND_S:
3555                             if (arg_slot_reg_p(loc->arg_slot)) {
3556                                 *la_temp_pref(ts) = 0;
3557                                 break;
3558                             }
3559                             /* fall through */
3560                         default:
3561                             *la_temp_pref(ts) =
3562                                 tcg_target_available_regs[ts->type];
3563                             break;
3564                         }
3565                         ts->state &= ~TS_DEAD;
3566                     }
3567                 }
3568 
3569                 /*
3570                  * For each input argument, add its input register to prefs.
3571                  * If a temp is used once, this produces a single set bit;
3572                  * if a temp is used multiple times, this produces a set.
3573                  */
3574                 for (i = 0; i < nb_iargs; i++) {
3575                     const TCGCallArgumentLoc *loc = &info->in[i];
3576                     ts = arg_temp(op->args[nb_oargs + i]);
3577 
3578                     switch (loc->kind) {
3579                     case TCG_CALL_ARG_NORMAL:
3580                     case TCG_CALL_ARG_EXTEND_U:
3581                     case TCG_CALL_ARG_EXTEND_S:
3582                         if (arg_slot_reg_p(loc->arg_slot)) {
3583                             tcg_regset_set_reg(*la_temp_pref(ts),
3584                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3585                         }
3586                         break;
3587                     default:
3588                         break;
3589                     }
3590                 }
3591             }
3592             break;
3593         case INDEX_op_insn_start:
3594             break;
3595         case INDEX_op_discard:
3596             /* mark the temporary as dead */
3597             ts = arg_temp(op->args[0]);
3598             ts->state = TS_DEAD;
3599             la_reset_pref(ts);
3600             break;
3601 
3602         case INDEX_op_add2_i32:
3603             opc_new = INDEX_op_add_i32;
3604             goto do_addsub2;
3605         case INDEX_op_sub2_i32:
3606             opc_new = INDEX_op_sub_i32;
3607             goto do_addsub2;
3608         case INDEX_op_add2_i64:
3609             opc_new = INDEX_op_add_i64;
3610             goto do_addsub2;
3611         case INDEX_op_sub2_i64:
3612             opc_new = INDEX_op_sub_i64;
3613         do_addsub2:
3614             nb_iargs = 4;
3615             nb_oargs = 2;
3616             /* Test if the high part of the operation is dead, but not
3617                the low part.  The result can be optimized to a simple
3618                add or sub.  This happens often for x86_64 guest when the
3619                cpu mode is set to 32 bit.  */
3620             if (arg_temp(op->args[1])->state == TS_DEAD) {
3621                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3622                     goto do_remove;
3623                 }
3624                 /* Replace the opcode and adjust the args in place,
3625                    leaving 3 unused args at the end.  */
3626                 op->opc = opc = opc_new;
3627                 op->args[1] = op->args[2];
3628                 op->args[2] = op->args[4];
3629                 /* Fall through and mark the single-word operation live.  */
3630                 nb_iargs = 2;
3631                 nb_oargs = 1;
3632             }
3633             goto do_not_remove;
3634 
3635         case INDEX_op_mulu2_i32:
3636             opc_new = INDEX_op_mul_i32;
3637             opc_new2 = INDEX_op_muluh_i32;
3638             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3639             goto do_mul2;
3640         case INDEX_op_muls2_i32:
3641             opc_new = INDEX_op_mul_i32;
3642             opc_new2 = INDEX_op_mulsh_i32;
3643             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3644             goto do_mul2;
3645         case INDEX_op_mulu2_i64:
3646             opc_new = INDEX_op_mul_i64;
3647             opc_new2 = INDEX_op_muluh_i64;
3648             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3649             goto do_mul2;
3650         case INDEX_op_muls2_i64:
3651             opc_new = INDEX_op_mul_i64;
3652             opc_new2 = INDEX_op_mulsh_i64;
3653             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3654             goto do_mul2;
3655         do_mul2:
3656             nb_iargs = 2;
3657             nb_oargs = 2;
3658             if (arg_temp(op->args[1])->state == TS_DEAD) {
3659                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3660                     /* Both parts of the operation are dead.  */
3661                     goto do_remove;
3662                 }
3663                 /* The high part of the operation is dead; generate the low. */
3664                 op->opc = opc = opc_new;
3665                 op->args[1] = op->args[2];
3666                 op->args[2] = op->args[3];
3667             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3668                 /* The low part of the operation is dead; generate the high. */
3669                 op->opc = opc = opc_new2;
3670                 op->args[0] = op->args[1];
3671                 op->args[1] = op->args[2];
3672                 op->args[2] = op->args[3];
3673             } else {
3674                 goto do_not_remove;
3675             }
3676             /* Mark the single-word operation live.  */
3677             nb_oargs = 1;
3678             goto do_not_remove;
3679 
3680         default:
3681             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3682             nb_iargs = def->nb_iargs;
3683             nb_oargs = def->nb_oargs;
3684 
3685             /* Test if the operation can be removed because all
3686                its outputs are dead. We assume that nb_oargs == 0
3687                implies side effects */
3688             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3689                 for (i = 0; i < nb_oargs; i++) {
3690                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3691                         goto do_not_remove;
3692                     }
3693                 }
3694                 goto do_remove;
3695             }
3696             goto do_not_remove;
3697 
3698         do_remove:
3699             tcg_op_remove(s, op);
3700             break;
3701 
3702         do_not_remove:
3703             for (i = 0; i < nb_oargs; i++) {
3704                 ts = arg_temp(op->args[i]);
3705 
3706                 /* Remember the preference of the uses that followed.  */
3707                 if (i < ARRAY_SIZE(op->output_pref)) {
3708                     op->output_pref[i] = *la_temp_pref(ts);
3709                 }
3710 
3711                 /* Output args are dead.  */
3712                 if (ts->state & TS_DEAD) {
3713                     arg_life |= DEAD_ARG << i;
3714                 }
3715                 if (ts->state & TS_MEM) {
3716                     arg_life |= SYNC_ARG << i;
3717                 }
3718                 ts->state = TS_DEAD;
3719                 la_reset_pref(ts);
3720             }
3721 
3722             /* If end of basic block, update.  */
3723             if (def->flags & TCG_OPF_BB_EXIT) {
3724                 la_func_end(s, nb_globals, nb_temps);
3725             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3726                 la_bb_sync(s, nb_globals, nb_temps);
3727             } else if (def->flags & TCG_OPF_BB_END) {
3728                 la_bb_end(s, nb_globals, nb_temps);
3729             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3730                 la_global_sync(s, nb_globals);
3731                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3732                     la_cross_call(s, nb_temps);
3733                 }
3734             }
3735 
3736             /* Record arguments that die in this opcode.  */
3737             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3738                 ts = arg_temp(op->args[i]);
3739                 if (ts->state & TS_DEAD) {
3740                     arg_life |= DEAD_ARG << i;
3741                 }
3742             }
3743 
3744             /* Input arguments are live for preceding opcodes.  */
3745             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3746                 ts = arg_temp(op->args[i]);
3747                 if (ts->state & TS_DEAD) {
3748                     /* For operands that were dead, initially allow
3749                        all regs for the type.  */
3750                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3751                     ts->state &= ~TS_DEAD;
3752                 }
3753             }
3754 
3755             /* Incorporate constraints for this operand.  */
3756             switch (opc) {
3757             case INDEX_op_mov_i32:
3758             case INDEX_op_mov_i64:
3759                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3760                    have proper constraints.  That said, special case
3761                    moves to propagate preferences backward.  */
3762                 if (IS_DEAD_ARG(1)) {
3763                     *la_temp_pref(arg_temp(op->args[0]))
3764                         = *la_temp_pref(arg_temp(op->args[1]));
3765                 }
3766                 break;
3767 
3768             default:
3769                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3770                     const TCGArgConstraint *ct = &def->args_ct[i];
3771                     TCGRegSet set, *pset;
3772 
3773                     ts = arg_temp(op->args[i]);
3774                     pset = la_temp_pref(ts);
3775                     set = *pset;
3776 
3777                     set &= ct->regs;
3778                     if (ct->ialias) {
3779                         set &= output_pref(op, ct->alias_index);
3780                     }
3781                     /* If the combination is not possible, restart.  */
3782                     if (set == 0) {
3783                         set = ct->regs;
3784                     }
3785                     *pset = set;
3786                 }
3787                 break;
3788             }
3789             break;
3790         }
3791         op->life = arg_life;
3792     }
3793 }
3794 
3795 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3796 static bool __attribute__((noinline))
3797 liveness_pass_2(TCGContext *s)
3798 {
3799     int nb_globals = s->nb_globals;
3800     int nb_temps, i;
3801     bool changes = false;
3802     TCGOp *op, *op_next;
3803 
3804     /* Create a temporary for each indirect global.  */
3805     for (i = 0; i < nb_globals; ++i) {
3806         TCGTemp *its = &s->temps[i];
3807         if (its->indirect_reg) {
3808             TCGTemp *dts = tcg_temp_alloc(s);
3809             dts->type = its->type;
3810             dts->base_type = its->base_type;
3811             dts->temp_subindex = its->temp_subindex;
3812             dts->kind = TEMP_EBB;
3813             its->state_ptr = dts;
3814         } else {
3815             its->state_ptr = NULL;
3816         }
3817         /* All globals begin dead.  */
3818         its->state = TS_DEAD;
3819     }
3820     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3821         TCGTemp *its = &s->temps[i];
3822         its->state_ptr = NULL;
3823         its->state = TS_DEAD;
3824     }
3825 
3826     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3827         TCGOpcode opc = op->opc;
3828         const TCGOpDef *def = &tcg_op_defs[opc];
3829         TCGLifeData arg_life = op->life;
3830         int nb_iargs, nb_oargs, call_flags;
3831         TCGTemp *arg_ts, *dir_ts;
3832 
3833         if (opc == INDEX_op_call) {
3834             nb_oargs = TCGOP_CALLO(op);
3835             nb_iargs = TCGOP_CALLI(op);
3836             call_flags = tcg_call_flags(op);
3837         } else {
3838             nb_iargs = def->nb_iargs;
3839             nb_oargs = def->nb_oargs;
3840 
3841             /* Set flags similar to how calls require.  */
3842             if (def->flags & TCG_OPF_COND_BRANCH) {
3843                 /* Like reading globals: sync_globals */
3844                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3845             } else if (def->flags & TCG_OPF_BB_END) {
3846                 /* Like writing globals: save_globals */
3847                 call_flags = 0;
3848             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3849                 /* Like reading globals: sync_globals */
3850                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3851             } else {
3852                 /* No effect on globals.  */
3853                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3854                               TCG_CALL_NO_WRITE_GLOBALS);
3855             }
3856         }
3857 
3858         /* Make sure that input arguments are available.  */
3859         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3860             arg_ts = arg_temp(op->args[i]);
3861             dir_ts = arg_ts->state_ptr;
3862             if (dir_ts && arg_ts->state == TS_DEAD) {
3863                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3864                                   ? INDEX_op_ld_i32
3865                                   : INDEX_op_ld_i64);
3866                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3867 
3868                 lop->args[0] = temp_arg(dir_ts);
3869                 lop->args[1] = temp_arg(arg_ts->mem_base);
3870                 lop->args[2] = arg_ts->mem_offset;
3871 
3872                 /* Loaded, but synced with memory.  */
3873                 arg_ts->state = TS_MEM;
3874             }
3875         }
3876 
3877         /* Perform input replacement, and mark inputs that became dead.
3878            No action is required except keeping temp_state up to date
3879            so that we reload when needed.  */
3880         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3881             arg_ts = arg_temp(op->args[i]);
3882             dir_ts = arg_ts->state_ptr;
3883             if (dir_ts) {
3884                 op->args[i] = temp_arg(dir_ts);
3885                 changes = true;
3886                 if (IS_DEAD_ARG(i)) {
3887                     arg_ts->state = TS_DEAD;
3888                 }
3889             }
3890         }
3891 
3892         /* Liveness analysis should ensure that the following are
3893            all correct, for call sites and basic block end points.  */
3894         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3895             /* Nothing to do */
3896         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3897             for (i = 0; i < nb_globals; ++i) {
3898                 /* Liveness should see that globals are synced back,
3899                    that is, either TS_DEAD or TS_MEM.  */
3900                 arg_ts = &s->temps[i];
3901                 tcg_debug_assert(arg_ts->state_ptr == 0
3902                                  || arg_ts->state != 0);
3903             }
3904         } else {
3905             for (i = 0; i < nb_globals; ++i) {
3906                 /* Liveness should see that globals are saved back,
3907                    that is, TS_DEAD, waiting to be reloaded.  */
3908                 arg_ts = &s->temps[i];
3909                 tcg_debug_assert(arg_ts->state_ptr == 0
3910                                  || arg_ts->state == TS_DEAD);
3911             }
3912         }
3913 
3914         /* Outputs become available.  */
3915         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3916             arg_ts = arg_temp(op->args[0]);
3917             dir_ts = arg_ts->state_ptr;
3918             if (dir_ts) {
3919                 op->args[0] = temp_arg(dir_ts);
3920                 changes = true;
3921 
3922                 /* The output is now live and modified.  */
3923                 arg_ts->state = 0;
3924 
3925                 if (NEED_SYNC_ARG(0)) {
3926                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3927                                       ? INDEX_op_st_i32
3928                                       : INDEX_op_st_i64);
3929                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3930                     TCGTemp *out_ts = dir_ts;
3931 
3932                     if (IS_DEAD_ARG(0)) {
3933                         out_ts = arg_temp(op->args[1]);
3934                         arg_ts->state = TS_DEAD;
3935                         tcg_op_remove(s, op);
3936                     } else {
3937                         arg_ts->state = TS_MEM;
3938                     }
3939 
3940                     sop->args[0] = temp_arg(out_ts);
3941                     sop->args[1] = temp_arg(arg_ts->mem_base);
3942                     sop->args[2] = arg_ts->mem_offset;
3943                 } else {
3944                     tcg_debug_assert(!IS_DEAD_ARG(0));
3945                 }
3946             }
3947         } else {
3948             for (i = 0; i < nb_oargs; i++) {
3949                 arg_ts = arg_temp(op->args[i]);
3950                 dir_ts = arg_ts->state_ptr;
3951                 if (!dir_ts) {
3952                     continue;
3953                 }
3954                 op->args[i] = temp_arg(dir_ts);
3955                 changes = true;
3956 
3957                 /* The output is now live and modified.  */
3958                 arg_ts->state = 0;
3959 
3960                 /* Sync outputs upon their last write.  */
3961                 if (NEED_SYNC_ARG(i)) {
3962                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3963                                       ? INDEX_op_st_i32
3964                                       : INDEX_op_st_i64);
3965                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3966 
3967                     sop->args[0] = temp_arg(dir_ts);
3968                     sop->args[1] = temp_arg(arg_ts->mem_base);
3969                     sop->args[2] = arg_ts->mem_offset;
3970 
3971                     arg_ts->state = TS_MEM;
3972                 }
3973                 /* Drop outputs that are dead.  */
3974                 if (IS_DEAD_ARG(i)) {
3975                     arg_ts->state = TS_DEAD;
3976                 }
3977             }
3978         }
3979     }
3980 
3981     return changes;
3982 }
3983 
3984 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3985 {
3986     intptr_t off;
3987     int size, align;
3988 
3989     /* When allocating an object, look at the full type. */
3990     size = tcg_type_size(ts->base_type);
3991     switch (ts->base_type) {
3992     case TCG_TYPE_I32:
3993         align = 4;
3994         break;
3995     case TCG_TYPE_I64:
3996     case TCG_TYPE_V64:
3997         align = 8;
3998         break;
3999     case TCG_TYPE_I128:
4000     case TCG_TYPE_V128:
4001     case TCG_TYPE_V256:
4002         /*
4003          * Note that we do not require aligned storage for V256,
4004          * and that we provide alignment for I128 to match V128,
4005          * even if that's above what the host ABI requires.
4006          */
4007         align = 16;
4008         break;
4009     default:
4010         g_assert_not_reached();
4011     }
4012 
4013     /*
4014      * Assume the stack is sufficiently aligned.
4015      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4016      * and do not require 16 byte vector alignment.  This seems slightly
4017      * easier than fully parameterizing the above switch statement.
4018      */
4019     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4020     off = ROUND_UP(s->current_frame_offset, align);
4021 
4022     /* If we've exhausted the stack frame, restart with a smaller TB. */
4023     if (off + size > s->frame_end) {
4024         tcg_raise_tb_overflow(s);
4025     }
4026     s->current_frame_offset = off + size;
4027 #if defined(__sparc__)
4028     off += TCG_TARGET_STACK_BIAS;
4029 #endif
4030 
4031     /* If the object was subdivided, assign memory to all the parts. */
4032     if (ts->base_type != ts->type) {
4033         int part_size = tcg_type_size(ts->type);
4034         int part_count = size / part_size;
4035 
4036         /*
4037          * Each part is allocated sequentially in tcg_temp_new_internal.
4038          * Jump back to the first part by subtracting the current index.
4039          */
4040         ts -= ts->temp_subindex;
4041         for (int i = 0; i < part_count; ++i) {
4042             ts[i].mem_offset = off + i * part_size;
4043             ts[i].mem_base = s->frame_temp;
4044             ts[i].mem_allocated = 1;
4045         }
4046     } else {
4047         ts->mem_offset = off;
4048         ts->mem_base = s->frame_temp;
4049         ts->mem_allocated = 1;
4050     }
4051 }
4052 
4053 /* Assign @reg to @ts, and update reg_to_temp[]. */
4054 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4055 {
4056     if (ts->val_type == TEMP_VAL_REG) {
4057         TCGReg old = ts->reg;
4058         tcg_debug_assert(s->reg_to_temp[old] == ts);
4059         if (old == reg) {
4060             return;
4061         }
4062         s->reg_to_temp[old] = NULL;
4063     }
4064     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4065     s->reg_to_temp[reg] = ts;
4066     ts->val_type = TEMP_VAL_REG;
4067     ts->reg = reg;
4068 }
4069 
4070 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4071 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4072 {
4073     tcg_debug_assert(type != TEMP_VAL_REG);
4074     if (ts->val_type == TEMP_VAL_REG) {
4075         TCGReg reg = ts->reg;
4076         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4077         s->reg_to_temp[reg] = NULL;
4078     }
4079     ts->val_type = type;
4080 }
4081 
4082 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4083 
4084 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4085    mark it free; otherwise mark it dead.  */
4086 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4087 {
4088     TCGTempVal new_type;
4089 
4090     switch (ts->kind) {
4091     case TEMP_FIXED:
4092         return;
4093     case TEMP_GLOBAL:
4094     case TEMP_TB:
4095         new_type = TEMP_VAL_MEM;
4096         break;
4097     case TEMP_EBB:
4098         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4099         break;
4100     case TEMP_CONST:
4101         new_type = TEMP_VAL_CONST;
4102         break;
4103     default:
4104         g_assert_not_reached();
4105     }
4106     set_temp_val_nonreg(s, ts, new_type);
4107 }
4108 
4109 /* Mark a temporary as dead.  */
4110 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4111 {
4112     temp_free_or_dead(s, ts, 1);
4113 }
4114 
4115 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4116    registers needs to be allocated to store a constant.  If 'free_or_dead'
4117    is non-zero, subsequently release the temporary; if it is positive, the
4118    temp is dead; if it is negative, the temp is free.  */
4119 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4120                       TCGRegSet preferred_regs, int free_or_dead)
4121 {
4122     if (!temp_readonly(ts) && !ts->mem_coherent) {
4123         if (!ts->mem_allocated) {
4124             temp_allocate_frame(s, ts);
4125         }
4126         switch (ts->val_type) {
4127         case TEMP_VAL_CONST:
4128             /* If we're going to free the temp immediately, then we won't
4129                require it later in a register, so attempt to store the
4130                constant to memory directly.  */
4131             if (free_or_dead
4132                 && tcg_out_sti(s, ts->type, ts->val,
4133                                ts->mem_base->reg, ts->mem_offset)) {
4134                 break;
4135             }
4136             temp_load(s, ts, tcg_target_available_regs[ts->type],
4137                       allocated_regs, preferred_regs);
4138             /* fallthrough */
4139 
4140         case TEMP_VAL_REG:
4141             tcg_out_st(s, ts->type, ts->reg,
4142                        ts->mem_base->reg, ts->mem_offset);
4143             break;
4144 
4145         case TEMP_VAL_MEM:
4146             break;
4147 
4148         case TEMP_VAL_DEAD:
4149         default:
4150             g_assert_not_reached();
4151         }
4152         ts->mem_coherent = 1;
4153     }
4154     if (free_or_dead) {
4155         temp_free_or_dead(s, ts, free_or_dead);
4156     }
4157 }
4158 
4159 /* free register 'reg' by spilling the corresponding temporary if necessary */
4160 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4161 {
4162     TCGTemp *ts = s->reg_to_temp[reg];
4163     if (ts != NULL) {
4164         temp_sync(s, ts, allocated_regs, 0, -1);
4165     }
4166 }
4167 
4168 /**
4169  * tcg_reg_alloc:
4170  * @required_regs: Set of registers in which we must allocate.
4171  * @allocated_regs: Set of registers which must be avoided.
4172  * @preferred_regs: Set of registers we should prefer.
4173  * @rev: True if we search the registers in "indirect" order.
4174  *
4175  * The allocated register must be in @required_regs & ~@allocated_regs,
4176  * but if we can put it in @preferred_regs we may save a move later.
4177  */
4178 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4179                             TCGRegSet allocated_regs,
4180                             TCGRegSet preferred_regs, bool rev)
4181 {
4182     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4183     TCGRegSet reg_ct[2];
4184     const int *order;
4185 
4186     reg_ct[1] = required_regs & ~allocated_regs;
4187     tcg_debug_assert(reg_ct[1] != 0);
4188     reg_ct[0] = reg_ct[1] & preferred_regs;
4189 
4190     /* Skip the preferred_regs option if it cannot be satisfied,
4191        or if the preference made no difference.  */
4192     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4193 
4194     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4195 
4196     /* Try free registers, preferences first.  */
4197     for (j = f; j < 2; j++) {
4198         TCGRegSet set = reg_ct[j];
4199 
4200         if (tcg_regset_single(set)) {
4201             /* One register in the set.  */
4202             TCGReg reg = tcg_regset_first(set);
4203             if (s->reg_to_temp[reg] == NULL) {
4204                 return reg;
4205             }
4206         } else {
4207             for (i = 0; i < n; i++) {
4208                 TCGReg reg = order[i];
4209                 if (s->reg_to_temp[reg] == NULL &&
4210                     tcg_regset_test_reg(set, reg)) {
4211                     return reg;
4212                 }
4213             }
4214         }
4215     }
4216 
4217     /* We must spill something.  */
4218     for (j = f; j < 2; j++) {
4219         TCGRegSet set = reg_ct[j];
4220 
4221         if (tcg_regset_single(set)) {
4222             /* One register in the set.  */
4223             TCGReg reg = tcg_regset_first(set);
4224             tcg_reg_free(s, reg, allocated_regs);
4225             return reg;
4226         } else {
4227             for (i = 0; i < n; i++) {
4228                 TCGReg reg = order[i];
4229                 if (tcg_regset_test_reg(set, reg)) {
4230                     tcg_reg_free(s, reg, allocated_regs);
4231                     return reg;
4232                 }
4233             }
4234         }
4235     }
4236 
4237     g_assert_not_reached();
4238 }
4239 
4240 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4241                                  TCGRegSet allocated_regs,
4242                                  TCGRegSet preferred_regs, bool rev)
4243 {
4244     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4245     TCGRegSet reg_ct[2];
4246     const int *order;
4247 
4248     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4249     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4250     tcg_debug_assert(reg_ct[1] != 0);
4251     reg_ct[0] = reg_ct[1] & preferred_regs;
4252 
4253     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4254 
4255     /*
4256      * Skip the preferred_regs option if it cannot be satisfied,
4257      * or if the preference made no difference.
4258      */
4259     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4260 
4261     /*
4262      * Minimize the number of flushes by looking for 2 free registers first,
4263      * then a single flush, then two flushes.
4264      */
4265     for (fmin = 2; fmin >= 0; fmin--) {
4266         for (j = k; j < 2; j++) {
4267             TCGRegSet set = reg_ct[j];
4268 
4269             for (i = 0; i < n; i++) {
4270                 TCGReg reg = order[i];
4271 
4272                 if (tcg_regset_test_reg(set, reg)) {
4273                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4274                     if (f >= fmin) {
4275                         tcg_reg_free(s, reg, allocated_regs);
4276                         tcg_reg_free(s, reg + 1, allocated_regs);
4277                         return reg;
4278                     }
4279                 }
4280             }
4281         }
4282     }
4283     g_assert_not_reached();
4284 }
4285 
4286 /* Make sure the temporary is in a register.  If needed, allocate the register
4287    from DESIRED while avoiding ALLOCATED.  */
4288 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4289                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4290 {
4291     TCGReg reg;
4292 
4293     switch (ts->val_type) {
4294     case TEMP_VAL_REG:
4295         return;
4296     case TEMP_VAL_CONST:
4297         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4298                             preferred_regs, ts->indirect_base);
4299         if (ts->type <= TCG_TYPE_I64) {
4300             tcg_out_movi(s, ts->type, reg, ts->val);
4301         } else {
4302             uint64_t val = ts->val;
4303             MemOp vece = MO_64;
4304 
4305             /*
4306              * Find the minimal vector element that matches the constant.
4307              * The targets will, in general, have to do this search anyway,
4308              * do this generically.
4309              */
4310             if (val == dup_const(MO_8, val)) {
4311                 vece = MO_8;
4312             } else if (val == dup_const(MO_16, val)) {
4313                 vece = MO_16;
4314             } else if (val == dup_const(MO_32, val)) {
4315                 vece = MO_32;
4316             }
4317 
4318             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4319         }
4320         ts->mem_coherent = 0;
4321         break;
4322     case TEMP_VAL_MEM:
4323         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4324                             preferred_regs, ts->indirect_base);
4325         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4326         ts->mem_coherent = 1;
4327         break;
4328     case TEMP_VAL_DEAD:
4329     default:
4330         g_assert_not_reached();
4331     }
4332     set_temp_val_reg(s, ts, reg);
4333 }
4334 
4335 /* Save a temporary to memory. 'allocated_regs' is used in case a
4336    temporary registers needs to be allocated to store a constant.  */
4337 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4338 {
4339     /* The liveness analysis already ensures that globals are back
4340        in memory. Keep an tcg_debug_assert for safety. */
4341     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4342 }
4343 
4344 /* save globals to their canonical location and assume they can be
4345    modified be the following code. 'allocated_regs' is used in case a
4346    temporary registers needs to be allocated to store a constant. */
4347 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4348 {
4349     int i, n;
4350 
4351     for (i = 0, n = s->nb_globals; i < n; i++) {
4352         temp_save(s, &s->temps[i], allocated_regs);
4353     }
4354 }
4355 
4356 /* sync globals to their canonical location and assume they can be
4357    read by the following code. 'allocated_regs' is used in case a
4358    temporary registers needs to be allocated to store a constant. */
4359 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4360 {
4361     int i, n;
4362 
4363     for (i = 0, n = s->nb_globals; i < n; i++) {
4364         TCGTemp *ts = &s->temps[i];
4365         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4366                          || ts->kind == TEMP_FIXED
4367                          || ts->mem_coherent);
4368     }
4369 }
4370 
4371 /* at the end of a basic block, we assume all temporaries are dead and
4372    all globals are stored at their canonical location. */
4373 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4374 {
4375     int i;
4376 
4377     for (i = s->nb_globals; i < s->nb_temps; i++) {
4378         TCGTemp *ts = &s->temps[i];
4379 
4380         switch (ts->kind) {
4381         case TEMP_TB:
4382             temp_save(s, ts, allocated_regs);
4383             break;
4384         case TEMP_EBB:
4385             /* The liveness analysis already ensures that temps are dead.
4386                Keep an tcg_debug_assert for safety. */
4387             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4388             break;
4389         case TEMP_CONST:
4390             /* Similarly, we should have freed any allocated register. */
4391             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4392             break;
4393         default:
4394             g_assert_not_reached();
4395         }
4396     }
4397 
4398     save_globals(s, allocated_regs);
4399 }
4400 
4401 /*
4402  * At a conditional branch, we assume all temporaries are dead unless
4403  * explicitly live-across-conditional-branch; all globals and local
4404  * temps are synced to their location.
4405  */
4406 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4407 {
4408     sync_globals(s, allocated_regs);
4409 
4410     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4411         TCGTemp *ts = &s->temps[i];
4412         /*
4413          * The liveness analysis already ensures that temps are dead.
4414          * Keep tcg_debug_asserts for safety.
4415          */
4416         switch (ts->kind) {
4417         case TEMP_TB:
4418             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4419             break;
4420         case TEMP_EBB:
4421         case TEMP_CONST:
4422             break;
4423         default:
4424             g_assert_not_reached();
4425         }
4426     }
4427 }
4428 
4429 /*
4430  * Specialized code generation for INDEX_op_mov_* with a constant.
4431  */
4432 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4433                                   tcg_target_ulong val, TCGLifeData arg_life,
4434                                   TCGRegSet preferred_regs)
4435 {
4436     /* ENV should not be modified.  */
4437     tcg_debug_assert(!temp_readonly(ots));
4438 
4439     /* The movi is not explicitly generated here.  */
4440     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4441     ots->val = val;
4442     ots->mem_coherent = 0;
4443     if (NEED_SYNC_ARG(0)) {
4444         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4445     } else if (IS_DEAD_ARG(0)) {
4446         temp_dead(s, ots);
4447     }
4448 }
4449 
4450 /*
4451  * Specialized code generation for INDEX_op_mov_*.
4452  */
4453 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4454 {
4455     const TCGLifeData arg_life = op->life;
4456     TCGRegSet allocated_regs, preferred_regs;
4457     TCGTemp *ts, *ots;
4458     TCGType otype, itype;
4459     TCGReg oreg, ireg;
4460 
4461     allocated_regs = s->reserved_regs;
4462     preferred_regs = output_pref(op, 0);
4463     ots = arg_temp(op->args[0]);
4464     ts = arg_temp(op->args[1]);
4465 
4466     /* ENV should not be modified.  */
4467     tcg_debug_assert(!temp_readonly(ots));
4468 
4469     /* Note that otype != itype for no-op truncation.  */
4470     otype = ots->type;
4471     itype = ts->type;
4472 
4473     if (ts->val_type == TEMP_VAL_CONST) {
4474         /* propagate constant or generate sti */
4475         tcg_target_ulong val = ts->val;
4476         if (IS_DEAD_ARG(1)) {
4477             temp_dead(s, ts);
4478         }
4479         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4480         return;
4481     }
4482 
4483     /* If the source value is in memory we're going to be forced
4484        to have it in a register in order to perform the copy.  Copy
4485        the SOURCE value into its own register first, that way we
4486        don't have to reload SOURCE the next time it is used. */
4487     if (ts->val_type == TEMP_VAL_MEM) {
4488         temp_load(s, ts, tcg_target_available_regs[itype],
4489                   allocated_regs, preferred_regs);
4490     }
4491     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4492     ireg = ts->reg;
4493 
4494     if (IS_DEAD_ARG(0)) {
4495         /* mov to a non-saved dead register makes no sense (even with
4496            liveness analysis disabled). */
4497         tcg_debug_assert(NEED_SYNC_ARG(0));
4498         if (!ots->mem_allocated) {
4499             temp_allocate_frame(s, ots);
4500         }
4501         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4502         if (IS_DEAD_ARG(1)) {
4503             temp_dead(s, ts);
4504         }
4505         temp_dead(s, ots);
4506         return;
4507     }
4508 
4509     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4510         /*
4511          * The mov can be suppressed.  Kill input first, so that it
4512          * is unlinked from reg_to_temp, then set the output to the
4513          * reg that we saved from the input.
4514          */
4515         temp_dead(s, ts);
4516         oreg = ireg;
4517     } else {
4518         if (ots->val_type == TEMP_VAL_REG) {
4519             oreg = ots->reg;
4520         } else {
4521             /* Make sure to not spill the input register during allocation. */
4522             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4523                                  allocated_regs | ((TCGRegSet)1 << ireg),
4524                                  preferred_regs, ots->indirect_base);
4525         }
4526         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4527             /*
4528              * Cross register class move not supported.
4529              * Store the source register into the destination slot
4530              * and leave the destination temp as TEMP_VAL_MEM.
4531              */
4532             assert(!temp_readonly(ots));
4533             if (!ts->mem_allocated) {
4534                 temp_allocate_frame(s, ots);
4535             }
4536             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4537             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4538             ots->mem_coherent = 1;
4539             return;
4540         }
4541     }
4542     set_temp_val_reg(s, ots, oreg);
4543     ots->mem_coherent = 0;
4544 
4545     if (NEED_SYNC_ARG(0)) {
4546         temp_sync(s, ots, allocated_regs, 0, 0);
4547     }
4548 }
4549 
4550 /*
4551  * Specialized code generation for INDEX_op_dup_vec.
4552  */
4553 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4554 {
4555     const TCGLifeData arg_life = op->life;
4556     TCGRegSet dup_out_regs, dup_in_regs;
4557     TCGTemp *its, *ots;
4558     TCGType itype, vtype;
4559     unsigned vece;
4560     int lowpart_ofs;
4561     bool ok;
4562 
4563     ots = arg_temp(op->args[0]);
4564     its = arg_temp(op->args[1]);
4565 
4566     /* ENV should not be modified.  */
4567     tcg_debug_assert(!temp_readonly(ots));
4568 
4569     itype = its->type;
4570     vece = TCGOP_VECE(op);
4571     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4572 
4573     if (its->val_type == TEMP_VAL_CONST) {
4574         /* Propagate constant via movi -> dupi.  */
4575         tcg_target_ulong val = its->val;
4576         if (IS_DEAD_ARG(1)) {
4577             temp_dead(s, its);
4578         }
4579         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4580         return;
4581     }
4582 
4583     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4584     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4585 
4586     /* Allocate the output register now.  */
4587     if (ots->val_type != TEMP_VAL_REG) {
4588         TCGRegSet allocated_regs = s->reserved_regs;
4589         TCGReg oreg;
4590 
4591         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4592             /* Make sure to not spill the input register. */
4593             tcg_regset_set_reg(allocated_regs, its->reg);
4594         }
4595         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4596                              output_pref(op, 0), ots->indirect_base);
4597         set_temp_val_reg(s, ots, oreg);
4598     }
4599 
4600     switch (its->val_type) {
4601     case TEMP_VAL_REG:
4602         /*
4603          * The dup constriaints must be broad, covering all possible VECE.
4604          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4605          * to fail, indicating that extra moves are required for that case.
4606          */
4607         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4608             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4609                 goto done;
4610             }
4611             /* Try again from memory or a vector input register.  */
4612         }
4613         if (!its->mem_coherent) {
4614             /*
4615              * The input register is not synced, and so an extra store
4616              * would be required to use memory.  Attempt an integer-vector
4617              * register move first.  We do not have a TCGRegSet for this.
4618              */
4619             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4620                 break;
4621             }
4622             /* Sync the temp back to its slot and load from there.  */
4623             temp_sync(s, its, s->reserved_regs, 0, 0);
4624         }
4625         /* fall through */
4626 
4627     case TEMP_VAL_MEM:
4628         lowpart_ofs = 0;
4629         if (HOST_BIG_ENDIAN) {
4630             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4631         }
4632         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4633                              its->mem_offset + lowpart_ofs)) {
4634             goto done;
4635         }
4636         /* Load the input into the destination vector register. */
4637         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4638         break;
4639 
4640     default:
4641         g_assert_not_reached();
4642     }
4643 
4644     /* We now have a vector input register, so dup must succeed. */
4645     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4646     tcg_debug_assert(ok);
4647 
4648  done:
4649     ots->mem_coherent = 0;
4650     if (IS_DEAD_ARG(1)) {
4651         temp_dead(s, its);
4652     }
4653     if (NEED_SYNC_ARG(0)) {
4654         temp_sync(s, ots, s->reserved_regs, 0, 0);
4655     }
4656     if (IS_DEAD_ARG(0)) {
4657         temp_dead(s, ots);
4658     }
4659 }
4660 
4661 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4662 {
4663     const TCGLifeData arg_life = op->life;
4664     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4665     TCGRegSet i_allocated_regs;
4666     TCGRegSet o_allocated_regs;
4667     int i, k, nb_iargs, nb_oargs;
4668     TCGReg reg;
4669     TCGArg arg;
4670     const TCGArgConstraint *arg_ct;
4671     TCGTemp *ts;
4672     TCGArg new_args[TCG_MAX_OP_ARGS];
4673     int const_args[TCG_MAX_OP_ARGS];
4674 
4675     nb_oargs = def->nb_oargs;
4676     nb_iargs = def->nb_iargs;
4677 
4678     /* copy constants */
4679     memcpy(new_args + nb_oargs + nb_iargs,
4680            op->args + nb_oargs + nb_iargs,
4681            sizeof(TCGArg) * def->nb_cargs);
4682 
4683     i_allocated_regs = s->reserved_regs;
4684     o_allocated_regs = s->reserved_regs;
4685 
4686     /* satisfy input constraints */
4687     for (k = 0; k < nb_iargs; k++) {
4688         TCGRegSet i_preferred_regs, i_required_regs;
4689         bool allocate_new_reg, copyto_new_reg;
4690         TCGTemp *ts2;
4691         int i1, i2;
4692 
4693         i = def->args_ct[nb_oargs + k].sort_index;
4694         arg = op->args[i];
4695         arg_ct = &def->args_ct[i];
4696         ts = arg_temp(arg);
4697 
4698         if (ts->val_type == TEMP_VAL_CONST
4699             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct, TCGOP_VECE(op))) {
4700             /* constant is OK for instruction */
4701             const_args[i] = 1;
4702             new_args[i] = ts->val;
4703             continue;
4704         }
4705 
4706         reg = ts->reg;
4707         i_preferred_regs = 0;
4708         i_required_regs = arg_ct->regs;
4709         allocate_new_reg = false;
4710         copyto_new_reg = false;
4711 
4712         switch (arg_ct->pair) {
4713         case 0: /* not paired */
4714             if (arg_ct->ialias) {
4715                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4716 
4717                 /*
4718                  * If the input is readonly, then it cannot also be an
4719                  * output and aliased to itself.  If the input is not
4720                  * dead after the instruction, we must allocate a new
4721                  * register and move it.
4722                  */
4723                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4724                     || def->args_ct[arg_ct->alias_index].newreg) {
4725                     allocate_new_reg = true;
4726                 } else if (ts->val_type == TEMP_VAL_REG) {
4727                     /*
4728                      * Check if the current register has already been
4729                      * allocated for another input.
4730                      */
4731                     allocate_new_reg =
4732                         tcg_regset_test_reg(i_allocated_regs, reg);
4733                 }
4734             }
4735             if (!allocate_new_reg) {
4736                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4737                           i_preferred_regs);
4738                 reg = ts->reg;
4739                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4740             }
4741             if (allocate_new_reg) {
4742                 /*
4743                  * Allocate a new register matching the constraint
4744                  * and move the temporary register into it.
4745                  */
4746                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4747                           i_allocated_regs, 0);
4748                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4749                                     i_preferred_regs, ts->indirect_base);
4750                 copyto_new_reg = true;
4751             }
4752             break;
4753 
4754         case 1:
4755             /* First of an input pair; if i1 == i2, the second is an output. */
4756             i1 = i;
4757             i2 = arg_ct->pair_index;
4758             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4759 
4760             /*
4761              * It is easier to default to allocating a new pair
4762              * and to identify a few cases where it's not required.
4763              */
4764             if (arg_ct->ialias) {
4765                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4766                 if (IS_DEAD_ARG(i1) &&
4767                     IS_DEAD_ARG(i2) &&
4768                     !temp_readonly(ts) &&
4769                     ts->val_type == TEMP_VAL_REG &&
4770                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4771                     tcg_regset_test_reg(i_required_regs, reg) &&
4772                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4773                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4774                     (ts2
4775                      ? ts2->val_type == TEMP_VAL_REG &&
4776                        ts2->reg == reg + 1 &&
4777                        !temp_readonly(ts2)
4778                      : s->reg_to_temp[reg + 1] == NULL)) {
4779                     break;
4780                 }
4781             } else {
4782                 /* Without aliasing, the pair must also be an input. */
4783                 tcg_debug_assert(ts2);
4784                 if (ts->val_type == TEMP_VAL_REG &&
4785                     ts2->val_type == TEMP_VAL_REG &&
4786                     ts2->reg == reg + 1 &&
4787                     tcg_regset_test_reg(i_required_regs, reg)) {
4788                     break;
4789                 }
4790             }
4791             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4792                                      0, ts->indirect_base);
4793             goto do_pair;
4794 
4795         case 2: /* pair second */
4796             reg = new_args[arg_ct->pair_index] + 1;
4797             goto do_pair;
4798 
4799         case 3: /* ialias with second output, no first input */
4800             tcg_debug_assert(arg_ct->ialias);
4801             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4802 
4803             if (IS_DEAD_ARG(i) &&
4804                 !temp_readonly(ts) &&
4805                 ts->val_type == TEMP_VAL_REG &&
4806                 reg > 0 &&
4807                 s->reg_to_temp[reg - 1] == NULL &&
4808                 tcg_regset_test_reg(i_required_regs, reg) &&
4809                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4810                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4811                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4812                 break;
4813             }
4814             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4815                                      i_allocated_regs, 0,
4816                                      ts->indirect_base);
4817             tcg_regset_set_reg(i_allocated_regs, reg);
4818             reg += 1;
4819             goto do_pair;
4820 
4821         do_pair:
4822             /*
4823              * If an aliased input is not dead after the instruction,
4824              * we must allocate a new register and move it.
4825              */
4826             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4827                 TCGRegSet t_allocated_regs = i_allocated_regs;
4828 
4829                 /*
4830                  * Because of the alias, and the continued life, make sure
4831                  * that the temp is somewhere *other* than the reg pair,
4832                  * and we get a copy in reg.
4833                  */
4834                 tcg_regset_set_reg(t_allocated_regs, reg);
4835                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4836                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4837                     /* If ts was already in reg, copy it somewhere else. */
4838                     TCGReg nr;
4839                     bool ok;
4840 
4841                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4842                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4843                                        t_allocated_regs, 0, ts->indirect_base);
4844                     ok = tcg_out_mov(s, ts->type, nr, reg);
4845                     tcg_debug_assert(ok);
4846 
4847                     set_temp_val_reg(s, ts, nr);
4848                 } else {
4849                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4850                               t_allocated_regs, 0);
4851                     copyto_new_reg = true;
4852                 }
4853             } else {
4854                 /* Preferably allocate to reg, otherwise copy. */
4855                 i_required_regs = (TCGRegSet)1 << reg;
4856                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4857                           i_preferred_regs);
4858                 copyto_new_reg = ts->reg != reg;
4859             }
4860             break;
4861 
4862         default:
4863             g_assert_not_reached();
4864         }
4865 
4866         if (copyto_new_reg) {
4867             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4868                 /*
4869                  * Cross register class move not supported.  Sync the
4870                  * temp back to its slot and load from there.
4871                  */
4872                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4873                 tcg_out_ld(s, ts->type, reg,
4874                            ts->mem_base->reg, ts->mem_offset);
4875             }
4876         }
4877         new_args[i] = reg;
4878         const_args[i] = 0;
4879         tcg_regset_set_reg(i_allocated_regs, reg);
4880     }
4881 
4882     /* mark dead temporaries and free the associated registers */
4883     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4884         if (IS_DEAD_ARG(i)) {
4885             temp_dead(s, arg_temp(op->args[i]));
4886         }
4887     }
4888 
4889     if (def->flags & TCG_OPF_COND_BRANCH) {
4890         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4891     } else if (def->flags & TCG_OPF_BB_END) {
4892         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4893     } else {
4894         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4895             /* XXX: permit generic clobber register list ? */
4896             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4897                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4898                     tcg_reg_free(s, i, i_allocated_regs);
4899                 }
4900             }
4901         }
4902         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4903             /* sync globals if the op has side effects and might trigger
4904                an exception. */
4905             sync_globals(s, i_allocated_regs);
4906         }
4907 
4908         /* satisfy the output constraints */
4909         for(k = 0; k < nb_oargs; k++) {
4910             i = def->args_ct[k].sort_index;
4911             arg = op->args[i];
4912             arg_ct = &def->args_ct[i];
4913             ts = arg_temp(arg);
4914 
4915             /* ENV should not be modified.  */
4916             tcg_debug_assert(!temp_readonly(ts));
4917 
4918             switch (arg_ct->pair) {
4919             case 0: /* not paired */
4920                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4921                     reg = new_args[arg_ct->alias_index];
4922                 } else if (arg_ct->newreg) {
4923                     reg = tcg_reg_alloc(s, arg_ct->regs,
4924                                         i_allocated_regs | o_allocated_regs,
4925                                         output_pref(op, k), ts->indirect_base);
4926                 } else {
4927                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4928                                         output_pref(op, k), ts->indirect_base);
4929                 }
4930                 break;
4931 
4932             case 1: /* first of pair */
4933                 tcg_debug_assert(!arg_ct->newreg);
4934                 if (arg_ct->oalias) {
4935                     reg = new_args[arg_ct->alias_index];
4936                     break;
4937                 }
4938                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4939                                          output_pref(op, k), ts->indirect_base);
4940                 break;
4941 
4942             case 2: /* second of pair */
4943                 tcg_debug_assert(!arg_ct->newreg);
4944                 if (arg_ct->oalias) {
4945                     reg = new_args[arg_ct->alias_index];
4946                 } else {
4947                     reg = new_args[arg_ct->pair_index] + 1;
4948                 }
4949                 break;
4950 
4951             case 3: /* first of pair, aliasing with a second input */
4952                 tcg_debug_assert(!arg_ct->newreg);
4953                 reg = new_args[arg_ct->pair_index] - 1;
4954                 break;
4955 
4956             default:
4957                 g_assert_not_reached();
4958             }
4959             tcg_regset_set_reg(o_allocated_regs, reg);
4960             set_temp_val_reg(s, ts, reg);
4961             ts->mem_coherent = 0;
4962             new_args[i] = reg;
4963         }
4964     }
4965 
4966     /* emit instruction */
4967     switch (op->opc) {
4968     case INDEX_op_ext8s_i32:
4969         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4970         break;
4971     case INDEX_op_ext8s_i64:
4972         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4973         break;
4974     case INDEX_op_ext8u_i32:
4975     case INDEX_op_ext8u_i64:
4976         tcg_out_ext8u(s, new_args[0], new_args[1]);
4977         break;
4978     case INDEX_op_ext16s_i32:
4979         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4980         break;
4981     case INDEX_op_ext16s_i64:
4982         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4983         break;
4984     case INDEX_op_ext16u_i32:
4985     case INDEX_op_ext16u_i64:
4986         tcg_out_ext16u(s, new_args[0], new_args[1]);
4987         break;
4988     case INDEX_op_ext32s_i64:
4989         tcg_out_ext32s(s, new_args[0], new_args[1]);
4990         break;
4991     case INDEX_op_ext32u_i64:
4992         tcg_out_ext32u(s, new_args[0], new_args[1]);
4993         break;
4994     case INDEX_op_ext_i32_i64:
4995         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4996         break;
4997     case INDEX_op_extu_i32_i64:
4998         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4999         break;
5000     case INDEX_op_extrl_i64_i32:
5001         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5002         break;
5003     default:
5004         if (def->flags & TCG_OPF_VECTOR) {
5005             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5006                            new_args, const_args);
5007         } else {
5008             tcg_out_op(s, op->opc, new_args, const_args);
5009         }
5010         break;
5011     }
5012 
5013     /* move the outputs in the correct register if needed */
5014     for(i = 0; i < nb_oargs; i++) {
5015         ts = arg_temp(op->args[i]);
5016 
5017         /* ENV should not be modified.  */
5018         tcg_debug_assert(!temp_readonly(ts));
5019 
5020         if (NEED_SYNC_ARG(i)) {
5021             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5022         } else if (IS_DEAD_ARG(i)) {
5023             temp_dead(s, ts);
5024         }
5025     }
5026 }
5027 
5028 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5029 {
5030     const TCGLifeData arg_life = op->life;
5031     TCGTemp *ots, *itsl, *itsh;
5032     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5033 
5034     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5035     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5036     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5037 
5038     ots = arg_temp(op->args[0]);
5039     itsl = arg_temp(op->args[1]);
5040     itsh = arg_temp(op->args[2]);
5041 
5042     /* ENV should not be modified.  */
5043     tcg_debug_assert(!temp_readonly(ots));
5044 
5045     /* Allocate the output register now.  */
5046     if (ots->val_type != TEMP_VAL_REG) {
5047         TCGRegSet allocated_regs = s->reserved_regs;
5048         TCGRegSet dup_out_regs =
5049             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5050         TCGReg oreg;
5051 
5052         /* Make sure to not spill the input registers. */
5053         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5054             tcg_regset_set_reg(allocated_regs, itsl->reg);
5055         }
5056         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5057             tcg_regset_set_reg(allocated_regs, itsh->reg);
5058         }
5059 
5060         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5061                              output_pref(op, 0), ots->indirect_base);
5062         set_temp_val_reg(s, ots, oreg);
5063     }
5064 
5065     /* Promote dup2 of immediates to dupi_vec. */
5066     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5067         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5068         MemOp vece = MO_64;
5069 
5070         if (val == dup_const(MO_8, val)) {
5071             vece = MO_8;
5072         } else if (val == dup_const(MO_16, val)) {
5073             vece = MO_16;
5074         } else if (val == dup_const(MO_32, val)) {
5075             vece = MO_32;
5076         }
5077 
5078         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5079         goto done;
5080     }
5081 
5082     /* If the two inputs form one 64-bit value, try dupm_vec. */
5083     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5084         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5085         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5086         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5087 
5088         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5089         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5090 
5091         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5092                              its->mem_base->reg, its->mem_offset)) {
5093             goto done;
5094         }
5095     }
5096 
5097     /* Fall back to generic expansion. */
5098     return false;
5099 
5100  done:
5101     ots->mem_coherent = 0;
5102     if (IS_DEAD_ARG(1)) {
5103         temp_dead(s, itsl);
5104     }
5105     if (IS_DEAD_ARG(2)) {
5106         temp_dead(s, itsh);
5107     }
5108     if (NEED_SYNC_ARG(0)) {
5109         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5110     } else if (IS_DEAD_ARG(0)) {
5111         temp_dead(s, ots);
5112     }
5113     return true;
5114 }
5115 
5116 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5117                          TCGRegSet allocated_regs)
5118 {
5119     if (ts->val_type == TEMP_VAL_REG) {
5120         if (ts->reg != reg) {
5121             tcg_reg_free(s, reg, allocated_regs);
5122             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5123                 /*
5124                  * Cross register class move not supported.  Sync the
5125                  * temp back to its slot and load from there.
5126                  */
5127                 temp_sync(s, ts, allocated_regs, 0, 0);
5128                 tcg_out_ld(s, ts->type, reg,
5129                            ts->mem_base->reg, ts->mem_offset);
5130             }
5131         }
5132     } else {
5133         TCGRegSet arg_set = 0;
5134 
5135         tcg_reg_free(s, reg, allocated_regs);
5136         tcg_regset_set_reg(arg_set, reg);
5137         temp_load(s, ts, arg_set, allocated_regs, 0);
5138     }
5139 }
5140 
5141 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5142                          TCGRegSet allocated_regs)
5143 {
5144     /*
5145      * When the destination is on the stack, load up the temp and store.
5146      * If there are many call-saved registers, the temp might live to
5147      * see another use; otherwise it'll be discarded.
5148      */
5149     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5150     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5151                arg_slot_stk_ofs(arg_slot));
5152 }
5153 
5154 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5155                             TCGTemp *ts, TCGRegSet *allocated_regs)
5156 {
5157     if (arg_slot_reg_p(l->arg_slot)) {
5158         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5159         load_arg_reg(s, reg, ts, *allocated_regs);
5160         tcg_regset_set_reg(*allocated_regs, reg);
5161     } else {
5162         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5163     }
5164 }
5165 
5166 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5167                          intptr_t ref_off, TCGRegSet *allocated_regs)
5168 {
5169     TCGReg reg;
5170 
5171     if (arg_slot_reg_p(arg_slot)) {
5172         reg = tcg_target_call_iarg_regs[arg_slot];
5173         tcg_reg_free(s, reg, *allocated_regs);
5174         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5175         tcg_regset_set_reg(*allocated_regs, reg);
5176     } else {
5177         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5178                             *allocated_regs, 0, false);
5179         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5180         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5181                    arg_slot_stk_ofs(arg_slot));
5182     }
5183 }
5184 
5185 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5186 {
5187     const int nb_oargs = TCGOP_CALLO(op);
5188     const int nb_iargs = TCGOP_CALLI(op);
5189     const TCGLifeData arg_life = op->life;
5190     const TCGHelperInfo *info = tcg_call_info(op);
5191     TCGRegSet allocated_regs = s->reserved_regs;
5192     int i;
5193 
5194     /*
5195      * Move inputs into place in reverse order,
5196      * so that we place stacked arguments first.
5197      */
5198     for (i = nb_iargs - 1; i >= 0; --i) {
5199         const TCGCallArgumentLoc *loc = &info->in[i];
5200         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5201 
5202         switch (loc->kind) {
5203         case TCG_CALL_ARG_NORMAL:
5204         case TCG_CALL_ARG_EXTEND_U:
5205         case TCG_CALL_ARG_EXTEND_S:
5206             load_arg_normal(s, loc, ts, &allocated_regs);
5207             break;
5208         case TCG_CALL_ARG_BY_REF:
5209             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5210             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5211                          arg_slot_stk_ofs(loc->ref_slot),
5212                          &allocated_regs);
5213             break;
5214         case TCG_CALL_ARG_BY_REF_N:
5215             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5216             break;
5217         default:
5218             g_assert_not_reached();
5219         }
5220     }
5221 
5222     /* Mark dead temporaries and free the associated registers.  */
5223     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5224         if (IS_DEAD_ARG(i)) {
5225             temp_dead(s, arg_temp(op->args[i]));
5226         }
5227     }
5228 
5229     /* Clobber call registers.  */
5230     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5231         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5232             tcg_reg_free(s, i, allocated_regs);
5233         }
5234     }
5235 
5236     /*
5237      * Save globals if they might be written by the helper,
5238      * sync them if they might be read.
5239      */
5240     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5241         /* Nothing to do */
5242     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5243         sync_globals(s, allocated_regs);
5244     } else {
5245         save_globals(s, allocated_regs);
5246     }
5247 
5248     /*
5249      * If the ABI passes a pointer to the returned struct as the first
5250      * argument, load that now.  Pass a pointer to the output home slot.
5251      */
5252     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5253         TCGTemp *ts = arg_temp(op->args[0]);
5254 
5255         if (!ts->mem_allocated) {
5256             temp_allocate_frame(s, ts);
5257         }
5258         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5259     }
5260 
5261     tcg_out_call(s, tcg_call_func(op), info);
5262 
5263     /* Assign output registers and emit moves if needed.  */
5264     switch (info->out_kind) {
5265     case TCG_CALL_RET_NORMAL:
5266         for (i = 0; i < nb_oargs; i++) {
5267             TCGTemp *ts = arg_temp(op->args[i]);
5268             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5269 
5270             /* ENV should not be modified.  */
5271             tcg_debug_assert(!temp_readonly(ts));
5272 
5273             set_temp_val_reg(s, ts, reg);
5274             ts->mem_coherent = 0;
5275         }
5276         break;
5277 
5278     case TCG_CALL_RET_BY_VEC:
5279         {
5280             TCGTemp *ts = arg_temp(op->args[0]);
5281 
5282             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5283             tcg_debug_assert(ts->temp_subindex == 0);
5284             if (!ts->mem_allocated) {
5285                 temp_allocate_frame(s, ts);
5286             }
5287             tcg_out_st(s, TCG_TYPE_V128,
5288                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5289                        ts->mem_base->reg, ts->mem_offset);
5290         }
5291         /* fall through to mark all parts in memory */
5292 
5293     case TCG_CALL_RET_BY_REF:
5294         /* The callee has performed a write through the reference. */
5295         for (i = 0; i < nb_oargs; i++) {
5296             TCGTemp *ts = arg_temp(op->args[i]);
5297             ts->val_type = TEMP_VAL_MEM;
5298         }
5299         break;
5300 
5301     default:
5302         g_assert_not_reached();
5303     }
5304 
5305     /* Flush or discard output registers as needed. */
5306     for (i = 0; i < nb_oargs; i++) {
5307         TCGTemp *ts = arg_temp(op->args[i]);
5308         if (NEED_SYNC_ARG(i)) {
5309             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5310         } else if (IS_DEAD_ARG(i)) {
5311             temp_dead(s, ts);
5312         }
5313     }
5314 }
5315 
5316 /**
5317  * atom_and_align_for_opc:
5318  * @s: tcg context
5319  * @opc: memory operation code
5320  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5321  * @allow_two_ops: true if we are prepared to issue two operations
5322  *
5323  * Return the alignment and atomicity to use for the inline fast path
5324  * for the given memory operation.  The alignment may be larger than
5325  * that specified in @opc, and the correct alignment will be diagnosed
5326  * by the slow path helper.
5327  *
5328  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5329  * and issue two loads or stores for subalignment.
5330  */
5331 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5332                                            MemOp host_atom, bool allow_two_ops)
5333 {
5334     MemOp align = get_alignment_bits(opc);
5335     MemOp size = opc & MO_SIZE;
5336     MemOp half = size ? size - 1 : 0;
5337     MemOp atmax;
5338     MemOp atom;
5339 
5340     /* When serialized, no further atomicity required.  */
5341     if (s->gen_tb->cflags & CF_PARALLEL) {
5342         atom = opc & MO_ATOM_MASK;
5343     } else {
5344         atom = MO_ATOM_NONE;
5345     }
5346 
5347     switch (atom) {
5348     case MO_ATOM_NONE:
5349         /* The operation requires no specific atomicity. */
5350         atmax = MO_8;
5351         break;
5352 
5353     case MO_ATOM_IFALIGN:
5354         atmax = size;
5355         break;
5356 
5357     case MO_ATOM_IFALIGN_PAIR:
5358         atmax = half;
5359         break;
5360 
5361     case MO_ATOM_WITHIN16:
5362         atmax = size;
5363         if (size == MO_128) {
5364             /* Misalignment implies !within16, and therefore no atomicity. */
5365         } else if (host_atom != MO_ATOM_WITHIN16) {
5366             /* The host does not implement within16, so require alignment. */
5367             align = MAX(align, size);
5368         }
5369         break;
5370 
5371     case MO_ATOM_WITHIN16_PAIR:
5372         atmax = size;
5373         /*
5374          * Misalignment implies !within16, and therefore half atomicity.
5375          * Any host prepared for two operations can implement this with
5376          * half alignment.
5377          */
5378         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5379             align = MAX(align, half);
5380         }
5381         break;
5382 
5383     case MO_ATOM_SUBALIGN:
5384         atmax = size;
5385         if (host_atom != MO_ATOM_SUBALIGN) {
5386             /* If unaligned but not odd, there are subobjects up to half. */
5387             if (allow_two_ops) {
5388                 align = MAX(align, half);
5389             } else {
5390                 align = MAX(align, size);
5391             }
5392         }
5393         break;
5394 
5395     default:
5396         g_assert_not_reached();
5397     }
5398 
5399     return (TCGAtomAlign){ .atom = atmax, .align = align };
5400 }
5401 
5402 /*
5403  * Similarly for qemu_ld/st slow path helpers.
5404  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5405  * using only the provided backend tcg_out_* functions.
5406  */
5407 
5408 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5409 {
5410     int ofs = arg_slot_stk_ofs(slot);
5411 
5412     /*
5413      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5414      * require extension to uint64_t, adjust the address for uint32_t.
5415      */
5416     if (HOST_BIG_ENDIAN &&
5417         TCG_TARGET_REG_BITS == 64 &&
5418         type == TCG_TYPE_I32) {
5419         ofs += 4;
5420     }
5421     return ofs;
5422 }
5423 
5424 static void tcg_out_helper_load_slots(TCGContext *s,
5425                                       unsigned nmov, TCGMovExtend *mov,
5426                                       const TCGLdstHelperParam *parm)
5427 {
5428     unsigned i;
5429     TCGReg dst3;
5430 
5431     /*
5432      * Start from the end, storing to the stack first.
5433      * This frees those registers, so we need not consider overlap.
5434      */
5435     for (i = nmov; i-- > 0; ) {
5436         unsigned slot = mov[i].dst;
5437 
5438         if (arg_slot_reg_p(slot)) {
5439             goto found_reg;
5440         }
5441 
5442         TCGReg src = mov[i].src;
5443         TCGType dst_type = mov[i].dst_type;
5444         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5445 
5446         /* The argument is going onto the stack; extend into scratch. */
5447         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5448             tcg_debug_assert(parm->ntmp != 0);
5449             mov[i].dst = src = parm->tmp[0];
5450             tcg_out_movext1(s, &mov[i]);
5451         }
5452 
5453         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5454                    tcg_out_helper_stk_ofs(dst_type, slot));
5455     }
5456     return;
5457 
5458  found_reg:
5459     /*
5460      * The remaining arguments are in registers.
5461      * Convert slot numbers to argument registers.
5462      */
5463     nmov = i + 1;
5464     for (i = 0; i < nmov; ++i) {
5465         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5466     }
5467 
5468     switch (nmov) {
5469     case 4:
5470         /* The backend must have provided enough temps for the worst case. */
5471         tcg_debug_assert(parm->ntmp >= 2);
5472 
5473         dst3 = mov[3].dst;
5474         for (unsigned j = 0; j < 3; ++j) {
5475             if (dst3 == mov[j].src) {
5476                 /*
5477                  * Conflict. Copy the source to a temporary, perform the
5478                  * remaining moves, then the extension from our scratch
5479                  * on the way out.
5480                  */
5481                 TCGReg scratch = parm->tmp[1];
5482 
5483                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5484                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5485                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5486                 break;
5487             }
5488         }
5489 
5490         /* No conflicts: perform this move and continue. */
5491         tcg_out_movext1(s, &mov[3]);
5492         /* fall through */
5493 
5494     case 3:
5495         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5496                         parm->ntmp ? parm->tmp[0] : -1);
5497         break;
5498     case 2:
5499         tcg_out_movext2(s, mov, mov + 1,
5500                         parm->ntmp ? parm->tmp[0] : -1);
5501         break;
5502     case 1:
5503         tcg_out_movext1(s, mov);
5504         break;
5505     default:
5506         g_assert_not_reached();
5507     }
5508 }
5509 
5510 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5511                                     TCGType type, tcg_target_long imm,
5512                                     const TCGLdstHelperParam *parm)
5513 {
5514     if (arg_slot_reg_p(slot)) {
5515         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5516     } else {
5517         int ofs = tcg_out_helper_stk_ofs(type, slot);
5518         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5519             tcg_debug_assert(parm->ntmp != 0);
5520             tcg_out_movi(s, type, parm->tmp[0], imm);
5521             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5522         }
5523     }
5524 }
5525 
5526 static void tcg_out_helper_load_common_args(TCGContext *s,
5527                                             const TCGLabelQemuLdst *ldst,
5528                                             const TCGLdstHelperParam *parm,
5529                                             const TCGHelperInfo *info,
5530                                             unsigned next_arg)
5531 {
5532     TCGMovExtend ptr_mov = {
5533         .dst_type = TCG_TYPE_PTR,
5534         .src_type = TCG_TYPE_PTR,
5535         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5536     };
5537     const TCGCallArgumentLoc *loc = &info->in[0];
5538     TCGType type;
5539     unsigned slot;
5540     tcg_target_ulong imm;
5541 
5542     /*
5543      * Handle env, which is always first.
5544      */
5545     ptr_mov.dst = loc->arg_slot;
5546     ptr_mov.src = TCG_AREG0;
5547     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5548 
5549     /*
5550      * Handle oi.
5551      */
5552     imm = ldst->oi;
5553     loc = &info->in[next_arg];
5554     type = TCG_TYPE_I32;
5555     switch (loc->kind) {
5556     case TCG_CALL_ARG_NORMAL:
5557         break;
5558     case TCG_CALL_ARG_EXTEND_U:
5559     case TCG_CALL_ARG_EXTEND_S:
5560         /* No extension required for MemOpIdx. */
5561         tcg_debug_assert(imm <= INT32_MAX);
5562         type = TCG_TYPE_REG;
5563         break;
5564     default:
5565         g_assert_not_reached();
5566     }
5567     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5568     next_arg++;
5569 
5570     /*
5571      * Handle ra.
5572      */
5573     loc = &info->in[next_arg];
5574     slot = loc->arg_slot;
5575     if (parm->ra_gen) {
5576         int arg_reg = -1;
5577         TCGReg ra_reg;
5578 
5579         if (arg_slot_reg_p(slot)) {
5580             arg_reg = tcg_target_call_iarg_regs[slot];
5581         }
5582         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5583 
5584         ptr_mov.dst = slot;
5585         ptr_mov.src = ra_reg;
5586         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5587     } else {
5588         imm = (uintptr_t)ldst->raddr;
5589         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5590     }
5591 }
5592 
5593 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5594                                        const TCGCallArgumentLoc *loc,
5595                                        TCGType dst_type, TCGType src_type,
5596                                        TCGReg lo, TCGReg hi)
5597 {
5598     MemOp reg_mo;
5599 
5600     if (dst_type <= TCG_TYPE_REG) {
5601         MemOp src_ext;
5602 
5603         switch (loc->kind) {
5604         case TCG_CALL_ARG_NORMAL:
5605             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5606             break;
5607         case TCG_CALL_ARG_EXTEND_U:
5608             dst_type = TCG_TYPE_REG;
5609             src_ext = MO_UL;
5610             break;
5611         case TCG_CALL_ARG_EXTEND_S:
5612             dst_type = TCG_TYPE_REG;
5613             src_ext = MO_SL;
5614             break;
5615         default:
5616             g_assert_not_reached();
5617         }
5618 
5619         mov[0].dst = loc->arg_slot;
5620         mov[0].dst_type = dst_type;
5621         mov[0].src = lo;
5622         mov[0].src_type = src_type;
5623         mov[0].src_ext = src_ext;
5624         return 1;
5625     }
5626 
5627     if (TCG_TARGET_REG_BITS == 32) {
5628         assert(dst_type == TCG_TYPE_I64);
5629         reg_mo = MO_32;
5630     } else {
5631         assert(dst_type == TCG_TYPE_I128);
5632         reg_mo = MO_64;
5633     }
5634 
5635     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5636     mov[0].src = lo;
5637     mov[0].dst_type = TCG_TYPE_REG;
5638     mov[0].src_type = TCG_TYPE_REG;
5639     mov[0].src_ext = reg_mo;
5640 
5641     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5642     mov[1].src = hi;
5643     mov[1].dst_type = TCG_TYPE_REG;
5644     mov[1].src_type = TCG_TYPE_REG;
5645     mov[1].src_ext = reg_mo;
5646 
5647     return 2;
5648 }
5649 
5650 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5651                                    const TCGLdstHelperParam *parm)
5652 {
5653     const TCGHelperInfo *info;
5654     const TCGCallArgumentLoc *loc;
5655     TCGMovExtend mov[2];
5656     unsigned next_arg, nmov;
5657     MemOp mop = get_memop(ldst->oi);
5658 
5659     switch (mop & MO_SIZE) {
5660     case MO_8:
5661     case MO_16:
5662     case MO_32:
5663         info = &info_helper_ld32_mmu;
5664         break;
5665     case MO_64:
5666         info = &info_helper_ld64_mmu;
5667         break;
5668     case MO_128:
5669         info = &info_helper_ld128_mmu;
5670         break;
5671     default:
5672         g_assert_not_reached();
5673     }
5674 
5675     /* Defer env argument. */
5676     next_arg = 1;
5677 
5678     loc = &info->in[next_arg];
5679     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5680         /*
5681          * 32-bit host with 32-bit guest: zero-extend the guest address
5682          * to 64-bits for the helper by storing the low part, then
5683          * load a zero for the high part.
5684          */
5685         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5686                                TCG_TYPE_I32, TCG_TYPE_I32,
5687                                ldst->addrlo_reg, -1);
5688         tcg_out_helper_load_slots(s, 1, mov, parm);
5689 
5690         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5691                                 TCG_TYPE_I32, 0, parm);
5692         next_arg += 2;
5693     } else {
5694         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5695                                       ldst->addrlo_reg, ldst->addrhi_reg);
5696         tcg_out_helper_load_slots(s, nmov, mov, parm);
5697         next_arg += nmov;
5698     }
5699 
5700     switch (info->out_kind) {
5701     case TCG_CALL_RET_NORMAL:
5702     case TCG_CALL_RET_BY_VEC:
5703         break;
5704     case TCG_CALL_RET_BY_REF:
5705         /*
5706          * The return reference is in the first argument slot.
5707          * We need memory in which to return: re-use the top of stack.
5708          */
5709         {
5710             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5711 
5712             if (arg_slot_reg_p(0)) {
5713                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5714                                  TCG_REG_CALL_STACK, ofs_slot0);
5715             } else {
5716                 tcg_debug_assert(parm->ntmp != 0);
5717                 tcg_out_addi_ptr(s, parm->tmp[0],
5718                                  TCG_REG_CALL_STACK, ofs_slot0);
5719                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5720                            TCG_REG_CALL_STACK, ofs_slot0);
5721             }
5722         }
5723         break;
5724     default:
5725         g_assert_not_reached();
5726     }
5727 
5728     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5729 }
5730 
5731 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5732                                   bool load_sign,
5733                                   const TCGLdstHelperParam *parm)
5734 {
5735     MemOp mop = get_memop(ldst->oi);
5736     TCGMovExtend mov[2];
5737     int ofs_slot0;
5738 
5739     switch (ldst->type) {
5740     case TCG_TYPE_I64:
5741         if (TCG_TARGET_REG_BITS == 32) {
5742             break;
5743         }
5744         /* fall through */
5745 
5746     case TCG_TYPE_I32:
5747         mov[0].dst = ldst->datalo_reg;
5748         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5749         mov[0].dst_type = ldst->type;
5750         mov[0].src_type = TCG_TYPE_REG;
5751 
5752         /*
5753          * If load_sign, then we allowed the helper to perform the
5754          * appropriate sign extension to tcg_target_ulong, and all
5755          * we need now is a plain move.
5756          *
5757          * If they do not, then we expect the relevant extension
5758          * instruction to be no more expensive than a move, and
5759          * we thus save the icache etc by only using one of two
5760          * helper functions.
5761          */
5762         if (load_sign || !(mop & MO_SIGN)) {
5763             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5764                 mov[0].src_ext = MO_32;
5765             } else {
5766                 mov[0].src_ext = MO_64;
5767             }
5768         } else {
5769             mov[0].src_ext = mop & MO_SSIZE;
5770         }
5771         tcg_out_movext1(s, mov);
5772         return;
5773 
5774     case TCG_TYPE_I128:
5775         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5776         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5777         switch (TCG_TARGET_CALL_RET_I128) {
5778         case TCG_CALL_RET_NORMAL:
5779             break;
5780         case TCG_CALL_RET_BY_VEC:
5781             tcg_out_st(s, TCG_TYPE_V128,
5782                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5783                        TCG_REG_CALL_STACK, ofs_slot0);
5784             /* fall through */
5785         case TCG_CALL_RET_BY_REF:
5786             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5787                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5788             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5789                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5790             return;
5791         default:
5792             g_assert_not_reached();
5793         }
5794         break;
5795 
5796     default:
5797         g_assert_not_reached();
5798     }
5799 
5800     mov[0].dst = ldst->datalo_reg;
5801     mov[0].src =
5802         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5803     mov[0].dst_type = TCG_TYPE_REG;
5804     mov[0].src_type = TCG_TYPE_REG;
5805     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5806 
5807     mov[1].dst = ldst->datahi_reg;
5808     mov[1].src =
5809         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5810     mov[1].dst_type = TCG_TYPE_REG;
5811     mov[1].src_type = TCG_TYPE_REG;
5812     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5813 
5814     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5815 }
5816 
5817 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5818                                    const TCGLdstHelperParam *parm)
5819 {
5820     const TCGHelperInfo *info;
5821     const TCGCallArgumentLoc *loc;
5822     TCGMovExtend mov[4];
5823     TCGType data_type;
5824     unsigned next_arg, nmov, n;
5825     MemOp mop = get_memop(ldst->oi);
5826 
5827     switch (mop & MO_SIZE) {
5828     case MO_8:
5829     case MO_16:
5830     case MO_32:
5831         info = &info_helper_st32_mmu;
5832         data_type = TCG_TYPE_I32;
5833         break;
5834     case MO_64:
5835         info = &info_helper_st64_mmu;
5836         data_type = TCG_TYPE_I64;
5837         break;
5838     case MO_128:
5839         info = &info_helper_st128_mmu;
5840         data_type = TCG_TYPE_I128;
5841         break;
5842     default:
5843         g_assert_not_reached();
5844     }
5845 
5846     /* Defer env argument. */
5847     next_arg = 1;
5848     nmov = 0;
5849 
5850     /* Handle addr argument. */
5851     loc = &info->in[next_arg];
5852     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5853         /*
5854          * 32-bit host with 32-bit guest: zero-extend the guest address
5855          * to 64-bits for the helper by storing the low part.  Later,
5856          * after we have processed the register inputs, we will load a
5857          * zero for the high part.
5858          */
5859         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5860                                TCG_TYPE_I32, TCG_TYPE_I32,
5861                                ldst->addrlo_reg, -1);
5862         next_arg += 2;
5863         nmov += 1;
5864     } else {
5865         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5866                                    ldst->addrlo_reg, ldst->addrhi_reg);
5867         next_arg += n;
5868         nmov += n;
5869     }
5870 
5871     /* Handle data argument. */
5872     loc = &info->in[next_arg];
5873     switch (loc->kind) {
5874     case TCG_CALL_ARG_NORMAL:
5875     case TCG_CALL_ARG_EXTEND_U:
5876     case TCG_CALL_ARG_EXTEND_S:
5877         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5878                                    ldst->datalo_reg, ldst->datahi_reg);
5879         next_arg += n;
5880         nmov += n;
5881         tcg_out_helper_load_slots(s, nmov, mov, parm);
5882         break;
5883 
5884     case TCG_CALL_ARG_BY_REF:
5885         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5886         tcg_debug_assert(data_type == TCG_TYPE_I128);
5887         tcg_out_st(s, TCG_TYPE_I64,
5888                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5889                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5890         tcg_out_st(s, TCG_TYPE_I64,
5891                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5892                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5893 
5894         tcg_out_helper_load_slots(s, nmov, mov, parm);
5895 
5896         if (arg_slot_reg_p(loc->arg_slot)) {
5897             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5898                              TCG_REG_CALL_STACK,
5899                              arg_slot_stk_ofs(loc->ref_slot));
5900         } else {
5901             tcg_debug_assert(parm->ntmp != 0);
5902             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5903                              arg_slot_stk_ofs(loc->ref_slot));
5904             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5905                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5906         }
5907         next_arg += 2;
5908         break;
5909 
5910     default:
5911         g_assert_not_reached();
5912     }
5913 
5914     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5915         /* Zero extend the address by loading a zero for the high part. */
5916         loc = &info->in[1 + !HOST_BIG_ENDIAN];
5917         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5918     }
5919 
5920     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5921 }
5922 
5923 void tcg_dump_op_count(GString *buf)
5924 {
5925     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5926 }
5927 
5928 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5929 {
5930     int i, start_words, num_insns;
5931     TCGOp *op;
5932 
5933     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5934                  && qemu_log_in_addr_range(pc_start))) {
5935         FILE *logfile = qemu_log_trylock();
5936         if (logfile) {
5937             fprintf(logfile, "OP:\n");
5938             tcg_dump_ops(s, logfile, false);
5939             fprintf(logfile, "\n");
5940             qemu_log_unlock(logfile);
5941         }
5942     }
5943 
5944 #ifdef CONFIG_DEBUG_TCG
5945     /* Ensure all labels referenced have been emitted.  */
5946     {
5947         TCGLabel *l;
5948         bool error = false;
5949 
5950         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5951             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5952                 qemu_log_mask(CPU_LOG_TB_OP,
5953                               "$L%d referenced but not present.\n", l->id);
5954                 error = true;
5955             }
5956         }
5957         assert(!error);
5958     }
5959 #endif
5960 
5961     tcg_optimize(s);
5962 
5963     reachable_code_pass(s);
5964     liveness_pass_0(s);
5965     liveness_pass_1(s);
5966 
5967     if (s->nb_indirects > 0) {
5968         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5969                      && qemu_log_in_addr_range(pc_start))) {
5970             FILE *logfile = qemu_log_trylock();
5971             if (logfile) {
5972                 fprintf(logfile, "OP before indirect lowering:\n");
5973                 tcg_dump_ops(s, logfile, false);
5974                 fprintf(logfile, "\n");
5975                 qemu_log_unlock(logfile);
5976             }
5977         }
5978 
5979         /* Replace indirect temps with direct temps.  */
5980         if (liveness_pass_2(s)) {
5981             /* If changes were made, re-run liveness.  */
5982             liveness_pass_1(s);
5983         }
5984     }
5985 
5986     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5987                  && qemu_log_in_addr_range(pc_start))) {
5988         FILE *logfile = qemu_log_trylock();
5989         if (logfile) {
5990             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5991             tcg_dump_ops(s, logfile, true);
5992             fprintf(logfile, "\n");
5993             qemu_log_unlock(logfile);
5994         }
5995     }
5996 
5997     /* Initialize goto_tb jump offsets. */
5998     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5999     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6000     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6001     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6002 
6003     tcg_reg_alloc_start(s);
6004 
6005     /*
6006      * Reset the buffer pointers when restarting after overflow.
6007      * TODO: Move this into translate-all.c with the rest of the
6008      * buffer management.  Having only this done here is confusing.
6009      */
6010     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6011     s->code_ptr = s->code_buf;
6012 
6013 #ifdef TCG_TARGET_NEED_LDST_LABELS
6014     QSIMPLEQ_INIT(&s->ldst_labels);
6015 #endif
6016 #ifdef TCG_TARGET_NEED_POOL_LABELS
6017     s->pool_labels = NULL;
6018 #endif
6019 
6020     start_words = s->insn_start_words;
6021     s->gen_insn_data =
6022         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6023 
6024     tcg_out_tb_start(s);
6025 
6026     num_insns = -1;
6027     QTAILQ_FOREACH(op, &s->ops, link) {
6028         TCGOpcode opc = op->opc;
6029 
6030         switch (opc) {
6031         case INDEX_op_mov_i32:
6032         case INDEX_op_mov_i64:
6033         case INDEX_op_mov_vec:
6034             tcg_reg_alloc_mov(s, op);
6035             break;
6036         case INDEX_op_dup_vec:
6037             tcg_reg_alloc_dup(s, op);
6038             break;
6039         case INDEX_op_insn_start:
6040             if (num_insns >= 0) {
6041                 size_t off = tcg_current_code_size(s);
6042                 s->gen_insn_end_off[num_insns] = off;
6043                 /* Assert that we do not overflow our stored offset.  */
6044                 assert(s->gen_insn_end_off[num_insns] == off);
6045             }
6046             num_insns++;
6047             for (i = 0; i < start_words; ++i) {
6048                 s->gen_insn_data[num_insns * start_words + i] =
6049                     tcg_get_insn_start_param(op, i);
6050             }
6051             break;
6052         case INDEX_op_discard:
6053             temp_dead(s, arg_temp(op->args[0]));
6054             break;
6055         case INDEX_op_set_label:
6056             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6057             tcg_out_label(s, arg_label(op->args[0]));
6058             break;
6059         case INDEX_op_call:
6060             tcg_reg_alloc_call(s, op);
6061             break;
6062         case INDEX_op_exit_tb:
6063             tcg_out_exit_tb(s, op->args[0]);
6064             break;
6065         case INDEX_op_goto_tb:
6066             tcg_out_goto_tb(s, op->args[0]);
6067             break;
6068         case INDEX_op_dup2_vec:
6069             if (tcg_reg_alloc_dup2(s, op)) {
6070                 break;
6071             }
6072             /* fall through */
6073         default:
6074             /* Sanity check that we've not introduced any unhandled opcodes. */
6075             tcg_debug_assert(tcg_op_supported(opc));
6076             /* Note: in order to speed up the code, it would be much
6077                faster to have specialized register allocator functions for
6078                some common argument patterns */
6079             tcg_reg_alloc_op(s, op);
6080             break;
6081         }
6082         /* Test for (pending) buffer overflow.  The assumption is that any
6083            one operation beginning below the high water mark cannot overrun
6084            the buffer completely.  Thus we can test for overflow after
6085            generating code without having to check during generation.  */
6086         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6087             return -1;
6088         }
6089         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6090         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6091             return -2;
6092         }
6093     }
6094     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6095     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6096 
6097     /* Generate TB finalization at the end of block */
6098 #ifdef TCG_TARGET_NEED_LDST_LABELS
6099     i = tcg_out_ldst_finalize(s);
6100     if (i < 0) {
6101         return i;
6102     }
6103 #endif
6104 #ifdef TCG_TARGET_NEED_POOL_LABELS
6105     i = tcg_out_pool_finalize(s);
6106     if (i < 0) {
6107         return i;
6108     }
6109 #endif
6110     if (!tcg_resolve_relocs(s)) {
6111         return -2;
6112     }
6113 
6114 #ifndef CONFIG_TCG_INTERPRETER
6115     /* flush instruction cache */
6116     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6117                         (uintptr_t)s->code_buf,
6118                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6119 #endif
6120 
6121     return tcg_current_code_size(s);
6122 }
6123 
6124 void tcg_dump_info(GString *buf)
6125 {
6126     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6127 }
6128 
6129 #ifdef ELF_HOST_MACHINE
6130 /* In order to use this feature, the backend needs to do three things:
6131 
6132    (1) Define ELF_HOST_MACHINE to indicate both what value to
6133        put into the ELF image and to indicate support for the feature.
6134 
6135    (2) Define tcg_register_jit.  This should create a buffer containing
6136        the contents of a .debug_frame section that describes the post-
6137        prologue unwind info for the tcg machine.
6138 
6139    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6140 */
6141 
6142 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6143 typedef enum {
6144     JIT_NOACTION = 0,
6145     JIT_REGISTER_FN,
6146     JIT_UNREGISTER_FN
6147 } jit_actions_t;
6148 
6149 struct jit_code_entry {
6150     struct jit_code_entry *next_entry;
6151     struct jit_code_entry *prev_entry;
6152     const void *symfile_addr;
6153     uint64_t symfile_size;
6154 };
6155 
6156 struct jit_descriptor {
6157     uint32_t version;
6158     uint32_t action_flag;
6159     struct jit_code_entry *relevant_entry;
6160     struct jit_code_entry *first_entry;
6161 };
6162 
6163 void __jit_debug_register_code(void) __attribute__((noinline));
6164 void __jit_debug_register_code(void)
6165 {
6166     asm("");
6167 }
6168 
6169 /* Must statically initialize the version, because GDB may check
6170    the version before we can set it.  */
6171 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6172 
6173 /* End GDB interface.  */
6174 
6175 static int find_string(const char *strtab, const char *str)
6176 {
6177     const char *p = strtab + 1;
6178 
6179     while (1) {
6180         if (strcmp(p, str) == 0) {
6181             return p - strtab;
6182         }
6183         p += strlen(p) + 1;
6184     }
6185 }
6186 
6187 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6188                                  const void *debug_frame,
6189                                  size_t debug_frame_size)
6190 {
6191     struct __attribute__((packed)) DebugInfo {
6192         uint32_t  len;
6193         uint16_t  version;
6194         uint32_t  abbrev;
6195         uint8_t   ptr_size;
6196         uint8_t   cu_die;
6197         uint16_t  cu_lang;
6198         uintptr_t cu_low_pc;
6199         uintptr_t cu_high_pc;
6200         uint8_t   fn_die;
6201         char      fn_name[16];
6202         uintptr_t fn_low_pc;
6203         uintptr_t fn_high_pc;
6204         uint8_t   cu_eoc;
6205     };
6206 
6207     struct ElfImage {
6208         ElfW(Ehdr) ehdr;
6209         ElfW(Phdr) phdr;
6210         ElfW(Shdr) shdr[7];
6211         ElfW(Sym)  sym[2];
6212         struct DebugInfo di;
6213         uint8_t    da[24];
6214         char       str[80];
6215     };
6216 
6217     struct ElfImage *img;
6218 
6219     static const struct ElfImage img_template = {
6220         .ehdr = {
6221             .e_ident[EI_MAG0] = ELFMAG0,
6222             .e_ident[EI_MAG1] = ELFMAG1,
6223             .e_ident[EI_MAG2] = ELFMAG2,
6224             .e_ident[EI_MAG3] = ELFMAG3,
6225             .e_ident[EI_CLASS] = ELF_CLASS,
6226             .e_ident[EI_DATA] = ELF_DATA,
6227             .e_ident[EI_VERSION] = EV_CURRENT,
6228             .e_type = ET_EXEC,
6229             .e_machine = ELF_HOST_MACHINE,
6230             .e_version = EV_CURRENT,
6231             .e_phoff = offsetof(struct ElfImage, phdr),
6232             .e_shoff = offsetof(struct ElfImage, shdr),
6233             .e_ehsize = sizeof(ElfW(Shdr)),
6234             .e_phentsize = sizeof(ElfW(Phdr)),
6235             .e_phnum = 1,
6236             .e_shentsize = sizeof(ElfW(Shdr)),
6237             .e_shnum = ARRAY_SIZE(img->shdr),
6238             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6239 #ifdef ELF_HOST_FLAGS
6240             .e_flags = ELF_HOST_FLAGS,
6241 #endif
6242 #ifdef ELF_OSABI
6243             .e_ident[EI_OSABI] = ELF_OSABI,
6244 #endif
6245         },
6246         .phdr = {
6247             .p_type = PT_LOAD,
6248             .p_flags = PF_X,
6249         },
6250         .shdr = {
6251             [0] = { .sh_type = SHT_NULL },
6252             /* Trick: The contents of code_gen_buffer are not present in
6253                this fake ELF file; that got allocated elsewhere.  Therefore
6254                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6255                will not look for contents.  We can record any address.  */
6256             [1] = { /* .text */
6257                 .sh_type = SHT_NOBITS,
6258                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6259             },
6260             [2] = { /* .debug_info */
6261                 .sh_type = SHT_PROGBITS,
6262                 .sh_offset = offsetof(struct ElfImage, di),
6263                 .sh_size = sizeof(struct DebugInfo),
6264             },
6265             [3] = { /* .debug_abbrev */
6266                 .sh_type = SHT_PROGBITS,
6267                 .sh_offset = offsetof(struct ElfImage, da),
6268                 .sh_size = sizeof(img->da),
6269             },
6270             [4] = { /* .debug_frame */
6271                 .sh_type = SHT_PROGBITS,
6272                 .sh_offset = sizeof(struct ElfImage),
6273             },
6274             [5] = { /* .symtab */
6275                 .sh_type = SHT_SYMTAB,
6276                 .sh_offset = offsetof(struct ElfImage, sym),
6277                 .sh_size = sizeof(img->sym),
6278                 .sh_info = 1,
6279                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6280                 .sh_entsize = sizeof(ElfW(Sym)),
6281             },
6282             [6] = { /* .strtab */
6283                 .sh_type = SHT_STRTAB,
6284                 .sh_offset = offsetof(struct ElfImage, str),
6285                 .sh_size = sizeof(img->str),
6286             }
6287         },
6288         .sym = {
6289             [1] = { /* code_gen_buffer */
6290                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6291                 .st_shndx = 1,
6292             }
6293         },
6294         .di = {
6295             .len = sizeof(struct DebugInfo) - 4,
6296             .version = 2,
6297             .ptr_size = sizeof(void *),
6298             .cu_die = 1,
6299             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6300             .fn_die = 2,
6301             .fn_name = "code_gen_buffer"
6302         },
6303         .da = {
6304             1,          /* abbrev number (the cu) */
6305             0x11, 1,    /* DW_TAG_compile_unit, has children */
6306             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6307             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6308             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6309             0, 0,       /* end of abbrev */
6310             2,          /* abbrev number (the fn) */
6311             0x2e, 0,    /* DW_TAG_subprogram, no children */
6312             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6313             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6314             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6315             0, 0,       /* end of abbrev */
6316             0           /* no more abbrev */
6317         },
6318         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6319                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6320     };
6321 
6322     /* We only need a single jit entry; statically allocate it.  */
6323     static struct jit_code_entry one_entry;
6324 
6325     uintptr_t buf = (uintptr_t)buf_ptr;
6326     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6327     DebugFrameHeader *dfh;
6328 
6329     img = g_malloc(img_size);
6330     *img = img_template;
6331 
6332     img->phdr.p_vaddr = buf;
6333     img->phdr.p_paddr = buf;
6334     img->phdr.p_memsz = buf_size;
6335 
6336     img->shdr[1].sh_name = find_string(img->str, ".text");
6337     img->shdr[1].sh_addr = buf;
6338     img->shdr[1].sh_size = buf_size;
6339 
6340     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6341     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6342 
6343     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6344     img->shdr[4].sh_size = debug_frame_size;
6345 
6346     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6347     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6348 
6349     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6350     img->sym[1].st_value = buf;
6351     img->sym[1].st_size = buf_size;
6352 
6353     img->di.cu_low_pc = buf;
6354     img->di.cu_high_pc = buf + buf_size;
6355     img->di.fn_low_pc = buf;
6356     img->di.fn_high_pc = buf + buf_size;
6357 
6358     dfh = (DebugFrameHeader *)(img + 1);
6359     memcpy(dfh, debug_frame, debug_frame_size);
6360     dfh->fde.func_start = buf;
6361     dfh->fde.func_len = buf_size;
6362 
6363 #ifdef DEBUG_JIT
6364     /* Enable this block to be able to debug the ELF image file creation.
6365        One can use readelf, objdump, or other inspection utilities.  */
6366     {
6367         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6368         FILE *f = fopen(jit, "w+b");
6369         if (f) {
6370             if (fwrite(img, img_size, 1, f) != img_size) {
6371                 /* Avoid stupid unused return value warning for fwrite.  */
6372             }
6373             fclose(f);
6374         }
6375     }
6376 #endif
6377 
6378     one_entry.symfile_addr = img;
6379     one_entry.symfile_size = img_size;
6380 
6381     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6382     __jit_debug_descriptor.relevant_entry = &one_entry;
6383     __jit_debug_descriptor.first_entry = &one_entry;
6384     __jit_debug_register_code();
6385 }
6386 #else
6387 /* No support for the feature.  Provide the entry point expected by exec.c,
6388    and implement the internal function we declared earlier.  */
6389 
6390 static void tcg_register_jit_int(const void *buf, size_t size,
6391                                  const void *debug_frame,
6392                                  size_t debug_frame_size)
6393 {
6394 }
6395 
6396 void tcg_register_jit(const void *buf, size_t buf_size)
6397 {
6398 }
6399 #endif /* ELF_HOST_MACHINE */
6400 
6401 #if !TCG_TARGET_MAYBE_vec
6402 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6403 {
6404     g_assert_not_reached();
6405 }
6406 #endif
6407