xref: /openbmc/qemu/tcg/tcg.c (revision 76f42780292c16a0d2f36cbbfbaf57495cd4d5e8)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpBrcond {
990     TCGOutOp base;
991     void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond,
992                    TCGReg a1, TCGReg a2, TCGLabel *label);
993     void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond,
994                    TCGReg a1, tcg_target_long a2, TCGLabel *label);
995 } TCGOutOpBrcond;
996 
997 typedef struct TCGOutOpBrcond2 {
998     TCGOutOp base;
999     void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
1000                 TCGArg bl, bool const_bl,
1001                 TCGArg bh, bool const_bh, TCGLabel *l);
1002 } TCGOutOpBrcond2;
1003 
1004 typedef struct TCGOutOpBswap {
1005     TCGOutOp base;
1006     void (*out_rr)(TCGContext *s, TCGType type,
1007                    TCGReg a0, TCGReg a1, unsigned flags);
1008 } TCGOutOpBswap;
1009 
1010 typedef struct TCGOutOpDeposit {
1011     TCGOutOp base;
1012     void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1013                     TCGReg a2, unsigned ofs, unsigned len);
1014     void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1015                     tcg_target_long a2, unsigned ofs, unsigned len);
1016     void (*out_rzr)(TCGContext *s, TCGType type, TCGReg a0,
1017                     TCGReg a2, unsigned ofs, unsigned len);
1018 } TCGOutOpDeposit;
1019 
1020 typedef struct TCGOutOpDivRem {
1021     TCGOutOp base;
1022     void (*out_rr01r)(TCGContext *s, TCGType type,
1023                       TCGReg a0, TCGReg a1, TCGReg a4);
1024 } TCGOutOpDivRem;
1025 
1026 typedef struct TCGOutOpExtract {
1027     TCGOutOp base;
1028     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1029                    unsigned ofs, unsigned len);
1030 } TCGOutOpExtract;
1031 
1032 typedef struct TCGOutOpExtract2 {
1033     TCGOutOp base;
1034     void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1035                     TCGReg a2, unsigned shr);
1036 } TCGOutOpExtract2;
1037 
1038 typedef struct TCGOutOpMovcond {
1039     TCGOutOp base;
1040     void (*out)(TCGContext *s, TCGType type, TCGCond cond,
1041                 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
1042                 TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf);
1043 } TCGOutOpMovcond;
1044 
1045 typedef struct TCGOutOpMul2 {
1046     TCGOutOp base;
1047     void (*out_rrrr)(TCGContext *s, TCGType type,
1048                      TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3);
1049 } TCGOutOpMul2;
1050 
1051 typedef struct TCGOutOpUnary {
1052     TCGOutOp base;
1053     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
1054 } TCGOutOpUnary;
1055 
1056 typedef struct TCGOutOpSetcond {
1057     TCGOutOp base;
1058     void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond,
1059                     TCGReg ret, TCGReg a1, TCGReg a2);
1060     void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond,
1061                     TCGReg ret, TCGReg a1, tcg_target_long a2);
1062 } TCGOutOpSetcond;
1063 
1064 typedef struct TCGOutOpSetcond2 {
1065     TCGOutOp base;
1066     void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah,
1067                 TCGArg bl, bool const_bl, TCGArg bh, bool const_bh);
1068 } TCGOutOpSetcond2;
1069 
1070 typedef struct TCGOutOpSubtract {
1071     TCGOutOp base;
1072     void (*out_rrr)(TCGContext *s, TCGType type,
1073                     TCGReg a0, TCGReg a1, TCGReg a2);
1074     void (*out_rir)(TCGContext *s, TCGType type,
1075                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1076 } TCGOutOpSubtract;
1077 
1078 #include "tcg-target.c.inc"
1079 
1080 #ifndef CONFIG_TCG_INTERPRETER
1081 /* Validate CPUTLBDescFast placement. */
1082 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1083                         sizeof(CPUNegativeOffsetState))
1084                   < MIN_TLB_MASK_TABLE_OFS);
1085 #endif
1086 
1087 #if TCG_TARGET_REG_BITS == 64
1088 /*
1089  * We require these functions for slow-path function calls.
1090  * Adapt them generically for opcode output.
1091  */
1092 
1093 static void tgen_exts_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1094 {
1095     tcg_out_exts_i32_i64(s, a0, a1);
1096 }
1097 
1098 static const TCGOutOpUnary outop_exts_i32_i64 = {
1099     .base.static_constraint = C_O1_I1(r, r),
1100     .out_rr = tgen_exts_i32_i64,
1101 };
1102 
1103 static void tgen_extu_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1104 {
1105     tcg_out_extu_i32_i64(s, a0, a1);
1106 }
1107 
1108 static const TCGOutOpUnary outop_extu_i32_i64 = {
1109     .base.static_constraint = C_O1_I1(r, r),
1110     .out_rr = tgen_extu_i32_i64,
1111 };
1112 
1113 static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1114 {
1115     tcg_out_extrl_i64_i32(s, a0, a1);
1116 }
1117 
1118 static const TCGOutOpUnary outop_extrl_i64_i32 = {
1119     .base.static_constraint = C_O1_I1(r, r),
1120     .out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL,
1121 };
1122 #endif
1123 
1124 /*
1125  * Register V as the TCGOutOp for O.
1126  * This verifies that V is of type T, otherwise give a nice compiler error.
1127  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1128  */
1129 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1130 
1131 /* Register allocation descriptions for every TCGOpcode. */
1132 static const TCGOutOp * const all_outop[NB_OPS] = {
1133     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1134     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1135     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1136     OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond),
1137     OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16),
1138     OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32),
1139     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1140     OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1141     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1142     OUTOP(INDEX_op_deposit, TCGOutOpDeposit, outop_deposit),
1143     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1144     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1145     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1146     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1147     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1148     OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract),
1149     OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2),
1150     OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond),
1151     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1152     OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
1153     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1154     OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2),
1155     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1156     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1157     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1158     OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond),
1159     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1160     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1161     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1162     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1163     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1164     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1165     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1166     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1167     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1168     OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond),
1169     OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract),
1170     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1171     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1172     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1173     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1174 
1175 #if TCG_TARGET_REG_BITS == 32
1176     OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2),
1177     OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2),
1178 #else
1179     OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64),
1180     OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64),
1181     OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64),
1182     OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32),
1183     OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32),
1184 #endif
1185 };
1186 
1187 #undef OUTOP
1188 
1189 /*
1190  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1191  * and registered the target's TCG globals) must register with this function
1192  * before initiating translation.
1193  *
1194  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1195  * of tcg_region_init() for the reasoning behind this.
1196  *
1197  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1198  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1199  * is not used anymore for translation once this function is called.
1200  *
1201  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1202  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1203  * modes.
1204  */
1205 #ifdef CONFIG_USER_ONLY
1206 void tcg_register_thread(void)
1207 {
1208     tcg_ctx = &tcg_init_ctx;
1209 }
1210 #else
1211 void tcg_register_thread(void)
1212 {
1213     TCGContext *s = g_malloc(sizeof(*s));
1214     unsigned int i, n;
1215 
1216     *s = tcg_init_ctx;
1217 
1218     /* Relink mem_base.  */
1219     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1220         if (tcg_init_ctx.temps[i].mem_base) {
1221             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1222             tcg_debug_assert(b >= 0 && b < n);
1223             s->temps[i].mem_base = &s->temps[b];
1224         }
1225     }
1226 
1227     /* Claim an entry in tcg_ctxs */
1228     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1229     g_assert(n < tcg_max_ctxs);
1230     qatomic_set(&tcg_ctxs[n], s);
1231 
1232     if (n > 0) {
1233         tcg_region_initial_alloc(s);
1234     }
1235 
1236     tcg_ctx = s;
1237 }
1238 #endif /* !CONFIG_USER_ONLY */
1239 
1240 /* pool based memory allocation */
1241 void *tcg_malloc_internal(TCGContext *s, int size)
1242 {
1243     TCGPool *p;
1244     int pool_size;
1245 
1246     if (size > TCG_POOL_CHUNK_SIZE) {
1247         /* big malloc: insert a new pool (XXX: could optimize) */
1248         p = g_malloc(sizeof(TCGPool) + size);
1249         p->size = size;
1250         p->next = s->pool_first_large;
1251         s->pool_first_large = p;
1252         return p->data;
1253     } else {
1254         p = s->pool_current;
1255         if (!p) {
1256             p = s->pool_first;
1257             if (!p)
1258                 goto new_pool;
1259         } else {
1260             if (!p->next) {
1261             new_pool:
1262                 pool_size = TCG_POOL_CHUNK_SIZE;
1263                 p = g_malloc(sizeof(TCGPool) + pool_size);
1264                 p->size = pool_size;
1265                 p->next = NULL;
1266                 if (s->pool_current) {
1267                     s->pool_current->next = p;
1268                 } else {
1269                     s->pool_first = p;
1270                 }
1271             } else {
1272                 p = p->next;
1273             }
1274         }
1275     }
1276     s->pool_current = p;
1277     s->pool_cur = p->data + size;
1278     s->pool_end = p->data + p->size;
1279     return p->data;
1280 }
1281 
1282 void tcg_pool_reset(TCGContext *s)
1283 {
1284     TCGPool *p, *t;
1285     for (p = s->pool_first_large; p; p = t) {
1286         t = p->next;
1287         g_free(p);
1288     }
1289     s->pool_first_large = NULL;
1290     s->pool_cur = s->pool_end = NULL;
1291     s->pool_current = NULL;
1292 }
1293 
1294 /*
1295  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1296  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1297  * We only use these for layout in tcg_out_ld_helper_ret and
1298  * tcg_out_st_helper_args, and share them between several of
1299  * the helpers, with the end result that it's easier to build manually.
1300  */
1301 
1302 #if TCG_TARGET_REG_BITS == 32
1303 # define dh_typecode_ttl  dh_typecode_i32
1304 #else
1305 # define dh_typecode_ttl  dh_typecode_i64
1306 #endif
1307 
1308 static TCGHelperInfo info_helper_ld32_mmu = {
1309     .flags = TCG_CALL_NO_WG,
1310     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1311               | dh_typemask(env, 1)
1312               | dh_typemask(i64, 2)  /* uint64_t addr */
1313               | dh_typemask(i32, 3)  /* unsigned oi */
1314               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1315 };
1316 
1317 static TCGHelperInfo info_helper_ld64_mmu = {
1318     .flags = TCG_CALL_NO_WG,
1319     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1320               | dh_typemask(env, 1)
1321               | dh_typemask(i64, 2)  /* uint64_t addr */
1322               | dh_typemask(i32, 3)  /* unsigned oi */
1323               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1324 };
1325 
1326 static TCGHelperInfo info_helper_ld128_mmu = {
1327     .flags = TCG_CALL_NO_WG,
1328     .typemask = dh_typemask(i128, 0) /* return Int128 */
1329               | dh_typemask(env, 1)
1330               | dh_typemask(i64, 2)  /* uint64_t addr */
1331               | dh_typemask(i32, 3)  /* unsigned oi */
1332               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1333 };
1334 
1335 static TCGHelperInfo info_helper_st32_mmu = {
1336     .flags = TCG_CALL_NO_WG,
1337     .typemask = dh_typemask(void, 0)
1338               | dh_typemask(env, 1)
1339               | dh_typemask(i64, 2)  /* uint64_t addr */
1340               | dh_typemask(i32, 3)  /* uint32_t data */
1341               | dh_typemask(i32, 4)  /* unsigned oi */
1342               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1343 };
1344 
1345 static TCGHelperInfo info_helper_st64_mmu = {
1346     .flags = TCG_CALL_NO_WG,
1347     .typemask = dh_typemask(void, 0)
1348               | dh_typemask(env, 1)
1349               | dh_typemask(i64, 2)  /* uint64_t addr */
1350               | dh_typemask(i64, 3)  /* uint64_t data */
1351               | dh_typemask(i32, 4)  /* unsigned oi */
1352               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1353 };
1354 
1355 static TCGHelperInfo info_helper_st128_mmu = {
1356     .flags = TCG_CALL_NO_WG,
1357     .typemask = dh_typemask(void, 0)
1358               | dh_typemask(env, 1)
1359               | dh_typemask(i64, 2)  /* uint64_t addr */
1360               | dh_typemask(i128, 3) /* Int128 data */
1361               | dh_typemask(i32, 4)  /* unsigned oi */
1362               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1363 };
1364 
1365 #ifdef CONFIG_TCG_INTERPRETER
1366 static ffi_type *typecode_to_ffi(int argmask)
1367 {
1368     /*
1369      * libffi does not support __int128_t, so we have forced Int128
1370      * to use the structure definition instead of the builtin type.
1371      */
1372     static ffi_type *ffi_type_i128_elements[3] = {
1373         &ffi_type_uint64,
1374         &ffi_type_uint64,
1375         NULL
1376     };
1377     static ffi_type ffi_type_i128 = {
1378         .size = 16,
1379         .alignment = __alignof__(Int128),
1380         .type = FFI_TYPE_STRUCT,
1381         .elements = ffi_type_i128_elements,
1382     };
1383 
1384     switch (argmask) {
1385     case dh_typecode_void:
1386         return &ffi_type_void;
1387     case dh_typecode_i32:
1388         return &ffi_type_uint32;
1389     case dh_typecode_s32:
1390         return &ffi_type_sint32;
1391     case dh_typecode_i64:
1392         return &ffi_type_uint64;
1393     case dh_typecode_s64:
1394         return &ffi_type_sint64;
1395     case dh_typecode_ptr:
1396         return &ffi_type_pointer;
1397     case dh_typecode_i128:
1398         return &ffi_type_i128;
1399     }
1400     g_assert_not_reached();
1401 }
1402 
1403 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1404 {
1405     unsigned typemask = info->typemask;
1406     struct {
1407         ffi_cif cif;
1408         ffi_type *args[];
1409     } *ca;
1410     ffi_status status;
1411     int nargs;
1412 
1413     /* Ignoring the return type, find the last non-zero field. */
1414     nargs = 32 - clz32(typemask >> 3);
1415     nargs = DIV_ROUND_UP(nargs, 3);
1416     assert(nargs <= MAX_CALL_IARGS);
1417 
1418     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1419     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1420     ca->cif.nargs = nargs;
1421 
1422     if (nargs != 0) {
1423         ca->cif.arg_types = ca->args;
1424         for (int j = 0; j < nargs; ++j) {
1425             int typecode = extract32(typemask, (j + 1) * 3, 3);
1426             ca->args[j] = typecode_to_ffi(typecode);
1427         }
1428     }
1429 
1430     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1431                           ca->cif.rtype, ca->cif.arg_types);
1432     assert(status == FFI_OK);
1433 
1434     return &ca->cif;
1435 }
1436 
1437 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1438 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1439 #else
1440 #define HELPER_INFO_INIT(I)      (&(I)->init)
1441 #define HELPER_INFO_INIT_VAL(I)  1
1442 #endif /* CONFIG_TCG_INTERPRETER */
1443 
1444 static inline bool arg_slot_reg_p(unsigned arg_slot)
1445 {
1446     /*
1447      * Split the sizeof away from the comparison to avoid Werror from
1448      * "unsigned < 0 is always false", when iarg_regs is empty.
1449      */
1450     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1451     return arg_slot < nreg;
1452 }
1453 
1454 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1455 {
1456     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1457     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1458 
1459     tcg_debug_assert(stk_slot < max);
1460     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1461 }
1462 
1463 typedef struct TCGCumulativeArgs {
1464     int arg_idx;                /* tcg_gen_callN args[] */
1465     int info_in_idx;            /* TCGHelperInfo in[] */
1466     int arg_slot;               /* regs+stack slot */
1467     int ref_slot;               /* stack slots for references */
1468 } TCGCumulativeArgs;
1469 
1470 static void layout_arg_even(TCGCumulativeArgs *cum)
1471 {
1472     cum->arg_slot += cum->arg_slot & 1;
1473 }
1474 
1475 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1476                          TCGCallArgumentKind kind)
1477 {
1478     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1479 
1480     *loc = (TCGCallArgumentLoc){
1481         .kind = kind,
1482         .arg_idx = cum->arg_idx,
1483         .arg_slot = cum->arg_slot,
1484     };
1485     cum->info_in_idx++;
1486     cum->arg_slot++;
1487 }
1488 
1489 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1490                                 TCGHelperInfo *info, int n)
1491 {
1492     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1493 
1494     for (int i = 0; i < n; ++i) {
1495         /* Layout all using the same arg_idx, adjusting the subindex. */
1496         loc[i] = (TCGCallArgumentLoc){
1497             .kind = TCG_CALL_ARG_NORMAL,
1498             .arg_idx = cum->arg_idx,
1499             .tmp_subindex = i,
1500             .arg_slot = cum->arg_slot + i,
1501         };
1502     }
1503     cum->info_in_idx += n;
1504     cum->arg_slot += n;
1505 }
1506 
1507 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1508 {
1509     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1510     int n = 128 / TCG_TARGET_REG_BITS;
1511 
1512     /* The first subindex carries the pointer. */
1513     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1514 
1515     /*
1516      * The callee is allowed to clobber memory associated with
1517      * structure pass by-reference.  Therefore we must make copies.
1518      * Allocate space from "ref_slot", which will be adjusted to
1519      * follow the parameters on the stack.
1520      */
1521     loc[0].ref_slot = cum->ref_slot;
1522 
1523     /*
1524      * Subsequent words also go into the reference slot, but
1525      * do not accumulate into the regular arguments.
1526      */
1527     for (int i = 1; i < n; ++i) {
1528         loc[i] = (TCGCallArgumentLoc){
1529             .kind = TCG_CALL_ARG_BY_REF_N,
1530             .arg_idx = cum->arg_idx,
1531             .tmp_subindex = i,
1532             .ref_slot = cum->ref_slot + i,
1533         };
1534     }
1535     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1536     cum->ref_slot += n;
1537 }
1538 
1539 static void init_call_layout(TCGHelperInfo *info)
1540 {
1541     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1542     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1543     unsigned typemask = info->typemask;
1544     unsigned typecode;
1545     TCGCumulativeArgs cum = { };
1546 
1547     /*
1548      * Parse and place any function return value.
1549      */
1550     typecode = typemask & 7;
1551     switch (typecode) {
1552     case dh_typecode_void:
1553         info->nr_out = 0;
1554         break;
1555     case dh_typecode_i32:
1556     case dh_typecode_s32:
1557     case dh_typecode_ptr:
1558         info->nr_out = 1;
1559         info->out_kind = TCG_CALL_RET_NORMAL;
1560         break;
1561     case dh_typecode_i64:
1562     case dh_typecode_s64:
1563         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1564         info->out_kind = TCG_CALL_RET_NORMAL;
1565         /* Query the last register now to trigger any assert early. */
1566         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1567         break;
1568     case dh_typecode_i128:
1569         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1570         info->out_kind = TCG_TARGET_CALL_RET_I128;
1571         switch (TCG_TARGET_CALL_RET_I128) {
1572         case TCG_CALL_RET_NORMAL:
1573             /* Query the last register now to trigger any assert early. */
1574             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1575             break;
1576         case TCG_CALL_RET_BY_VEC:
1577             /* Query the single register now to trigger any assert early. */
1578             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1579             break;
1580         case TCG_CALL_RET_BY_REF:
1581             /*
1582              * Allocate the first argument to the output.
1583              * We don't need to store this anywhere, just make it
1584              * unavailable for use in the input loop below.
1585              */
1586             cum.arg_slot = 1;
1587             break;
1588         default:
1589             qemu_build_not_reached();
1590         }
1591         break;
1592     default:
1593         g_assert_not_reached();
1594     }
1595 
1596     /*
1597      * Parse and place function arguments.
1598      */
1599     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1600         TCGCallArgumentKind kind;
1601         TCGType type;
1602 
1603         typecode = typemask & 7;
1604         switch (typecode) {
1605         case dh_typecode_i32:
1606         case dh_typecode_s32:
1607             type = TCG_TYPE_I32;
1608             break;
1609         case dh_typecode_i64:
1610         case dh_typecode_s64:
1611             type = TCG_TYPE_I64;
1612             break;
1613         case dh_typecode_ptr:
1614             type = TCG_TYPE_PTR;
1615             break;
1616         case dh_typecode_i128:
1617             type = TCG_TYPE_I128;
1618             break;
1619         default:
1620             g_assert_not_reached();
1621         }
1622 
1623         switch (type) {
1624         case TCG_TYPE_I32:
1625             switch (TCG_TARGET_CALL_ARG_I32) {
1626             case TCG_CALL_ARG_EVEN:
1627                 layout_arg_even(&cum);
1628                 /* fall through */
1629             case TCG_CALL_ARG_NORMAL:
1630                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1631                 break;
1632             case TCG_CALL_ARG_EXTEND:
1633                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1634                 layout_arg_1(&cum, info, kind);
1635                 break;
1636             default:
1637                 qemu_build_not_reached();
1638             }
1639             break;
1640 
1641         case TCG_TYPE_I64:
1642             switch (TCG_TARGET_CALL_ARG_I64) {
1643             case TCG_CALL_ARG_EVEN:
1644                 layout_arg_even(&cum);
1645                 /* fall through */
1646             case TCG_CALL_ARG_NORMAL:
1647                 if (TCG_TARGET_REG_BITS == 32) {
1648                     layout_arg_normal_n(&cum, info, 2);
1649                 } else {
1650                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1651                 }
1652                 break;
1653             default:
1654                 qemu_build_not_reached();
1655             }
1656             break;
1657 
1658         case TCG_TYPE_I128:
1659             switch (TCG_TARGET_CALL_ARG_I128) {
1660             case TCG_CALL_ARG_EVEN:
1661                 layout_arg_even(&cum);
1662                 /* fall through */
1663             case TCG_CALL_ARG_NORMAL:
1664                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1665                 break;
1666             case TCG_CALL_ARG_BY_REF:
1667                 layout_arg_by_ref(&cum, info);
1668                 break;
1669             default:
1670                 qemu_build_not_reached();
1671             }
1672             break;
1673 
1674         default:
1675             g_assert_not_reached();
1676         }
1677     }
1678     info->nr_in = cum.info_in_idx;
1679 
1680     /* Validate that we didn't overrun the input array. */
1681     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1682     /* Validate the backend has enough argument space. */
1683     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1684 
1685     /*
1686      * Relocate the "ref_slot" area to the end of the parameters.
1687      * Minimizing this stack offset helps code size for x86,
1688      * which has a signed 8-bit offset encoding.
1689      */
1690     if (cum.ref_slot != 0) {
1691         int ref_base = 0;
1692 
1693         if (cum.arg_slot > max_reg_slots) {
1694             int align = __alignof(Int128) / sizeof(tcg_target_long);
1695 
1696             ref_base = cum.arg_slot - max_reg_slots;
1697             if (align > 1) {
1698                 ref_base = ROUND_UP(ref_base, align);
1699             }
1700         }
1701         assert(ref_base + cum.ref_slot <= max_stk_slots);
1702         ref_base += max_reg_slots;
1703 
1704         if (ref_base != 0) {
1705             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1706                 TCGCallArgumentLoc *loc = &info->in[i];
1707                 switch (loc->kind) {
1708                 case TCG_CALL_ARG_BY_REF:
1709                 case TCG_CALL_ARG_BY_REF_N:
1710                     loc->ref_slot += ref_base;
1711                     break;
1712                 default:
1713                     break;
1714                 }
1715             }
1716         }
1717     }
1718 }
1719 
1720 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1721 static void process_constraint_sets(void);
1722 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1723                                             TCGReg reg, const char *name);
1724 
1725 static void tcg_context_init(unsigned max_threads)
1726 {
1727     TCGContext *s = &tcg_init_ctx;
1728     int n, i;
1729     TCGTemp *ts;
1730 
1731     memset(s, 0, sizeof(*s));
1732     s->nb_globals = 0;
1733 
1734     init_call_layout(&info_helper_ld32_mmu);
1735     init_call_layout(&info_helper_ld64_mmu);
1736     init_call_layout(&info_helper_ld128_mmu);
1737     init_call_layout(&info_helper_st32_mmu);
1738     init_call_layout(&info_helper_st64_mmu);
1739     init_call_layout(&info_helper_st128_mmu);
1740 
1741     tcg_target_init(s);
1742     process_constraint_sets();
1743 
1744     /* Reverse the order of the saved registers, assuming they're all at
1745        the start of tcg_target_reg_alloc_order.  */
1746     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1747         int r = tcg_target_reg_alloc_order[n];
1748         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1749             break;
1750         }
1751     }
1752     for (i = 0; i < n; ++i) {
1753         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1754     }
1755     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1756         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1757     }
1758 
1759     tcg_ctx = s;
1760     /*
1761      * In user-mode we simply share the init context among threads, since we
1762      * use a single region. See the documentation tcg_region_init() for the
1763      * reasoning behind this.
1764      * In system-mode we will have at most max_threads TCG threads.
1765      */
1766 #ifdef CONFIG_USER_ONLY
1767     tcg_ctxs = &tcg_ctx;
1768     tcg_cur_ctxs = 1;
1769     tcg_max_ctxs = 1;
1770 #else
1771     tcg_max_ctxs = max_threads;
1772     tcg_ctxs = g_new0(TCGContext *, max_threads);
1773 #endif
1774 
1775     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1776     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1777     tcg_env = temp_tcgv_ptr(ts);
1778 }
1779 
1780 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1781 {
1782     tcg_context_init(max_threads);
1783     tcg_region_init(tb_size, splitwx, max_threads);
1784 }
1785 
1786 /*
1787  * Allocate TBs right before their corresponding translated code, making
1788  * sure that TBs and code are on different cache lines.
1789  */
1790 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1791 {
1792     uintptr_t align = qemu_icache_linesize;
1793     TranslationBlock *tb;
1794     void *next;
1795 
1796  retry:
1797     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1798     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1799 
1800     if (unlikely(next > s->code_gen_highwater)) {
1801         if (tcg_region_alloc(s)) {
1802             return NULL;
1803         }
1804         goto retry;
1805     }
1806     qatomic_set(&s->code_gen_ptr, next);
1807     return tb;
1808 }
1809 
1810 void tcg_prologue_init(void)
1811 {
1812     TCGContext *s = tcg_ctx;
1813     size_t prologue_size;
1814 
1815     s->code_ptr = s->code_gen_ptr;
1816     s->code_buf = s->code_gen_ptr;
1817     s->data_gen_ptr = NULL;
1818 
1819 #ifndef CONFIG_TCG_INTERPRETER
1820     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1821 #endif
1822 
1823     s->pool_labels = NULL;
1824 
1825     qemu_thread_jit_write();
1826     /* Generate the prologue.  */
1827     tcg_target_qemu_prologue(s);
1828 
1829     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1830     {
1831         int result = tcg_out_pool_finalize(s);
1832         tcg_debug_assert(result == 0);
1833     }
1834 
1835     prologue_size = tcg_current_code_size(s);
1836     perf_report_prologue(s->code_gen_ptr, prologue_size);
1837 
1838 #ifndef CONFIG_TCG_INTERPRETER
1839     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1840                         (uintptr_t)s->code_buf, prologue_size);
1841 #endif
1842 
1843     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1844         FILE *logfile = qemu_log_trylock();
1845         if (logfile) {
1846             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1847             if (s->data_gen_ptr) {
1848                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1849                 size_t data_size = prologue_size - code_size;
1850                 size_t i;
1851 
1852                 disas(logfile, s->code_gen_ptr, code_size);
1853 
1854                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1855                     if (sizeof(tcg_target_ulong) == 8) {
1856                         fprintf(logfile,
1857                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1858                                 (uintptr_t)s->data_gen_ptr + i,
1859                                 *(uint64_t *)(s->data_gen_ptr + i));
1860                     } else {
1861                         fprintf(logfile,
1862                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1863                                 (uintptr_t)s->data_gen_ptr + i,
1864                                 *(uint32_t *)(s->data_gen_ptr + i));
1865                     }
1866                 }
1867             } else {
1868                 disas(logfile, s->code_gen_ptr, prologue_size);
1869             }
1870             fprintf(logfile, "\n");
1871             qemu_log_unlock(logfile);
1872         }
1873     }
1874 
1875 #ifndef CONFIG_TCG_INTERPRETER
1876     /*
1877      * Assert that goto_ptr is implemented completely, setting an epilogue.
1878      * For tci, we use NULL as the signal to return from the interpreter,
1879      * so skip this check.
1880      */
1881     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1882 #endif
1883 
1884     tcg_region_prologue_set(s);
1885 }
1886 
1887 void tcg_func_start(TCGContext *s)
1888 {
1889     tcg_pool_reset(s);
1890     s->nb_temps = s->nb_globals;
1891 
1892     /* No temps have been previously allocated for size or locality.  */
1893     tcg_temp_ebb_reset_freed(s);
1894 
1895     /* No constant temps have been previously allocated. */
1896     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1897         if (s->const_table[i]) {
1898             g_hash_table_remove_all(s->const_table[i]);
1899         }
1900     }
1901 
1902     s->nb_ops = 0;
1903     s->nb_labels = 0;
1904     s->current_frame_offset = s->frame_start;
1905 
1906 #ifdef CONFIG_DEBUG_TCG
1907     s->goto_tb_issue_mask = 0;
1908 #endif
1909 
1910     QTAILQ_INIT(&s->ops);
1911     QTAILQ_INIT(&s->free_ops);
1912     s->emit_before_op = NULL;
1913     QSIMPLEQ_INIT(&s->labels);
1914 
1915     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1916     tcg_debug_assert(s->insn_start_words > 0);
1917 }
1918 
1919 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1920 {
1921     int n = s->nb_temps++;
1922 
1923     if (n >= TCG_MAX_TEMPS) {
1924         tcg_raise_tb_overflow(s);
1925     }
1926     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1927 }
1928 
1929 static TCGTemp *tcg_global_alloc(TCGContext *s)
1930 {
1931     TCGTemp *ts;
1932 
1933     tcg_debug_assert(s->nb_globals == s->nb_temps);
1934     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1935     s->nb_globals++;
1936     ts = tcg_temp_alloc(s);
1937     ts->kind = TEMP_GLOBAL;
1938 
1939     return ts;
1940 }
1941 
1942 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1943                                             TCGReg reg, const char *name)
1944 {
1945     TCGTemp *ts;
1946 
1947     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1948 
1949     ts = tcg_global_alloc(s);
1950     ts->base_type = type;
1951     ts->type = type;
1952     ts->kind = TEMP_FIXED;
1953     ts->reg = reg;
1954     ts->name = name;
1955     tcg_regset_set_reg(s->reserved_regs, reg);
1956 
1957     return ts;
1958 }
1959 
1960 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1961 {
1962     s->frame_start = start;
1963     s->frame_end = start + size;
1964     s->frame_temp
1965         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1966 }
1967 
1968 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1969                                             const char *name, TCGType type)
1970 {
1971     TCGContext *s = tcg_ctx;
1972     TCGTemp *base_ts = tcgv_ptr_temp(base);
1973     TCGTemp *ts = tcg_global_alloc(s);
1974     int indirect_reg = 0;
1975 
1976     switch (base_ts->kind) {
1977     case TEMP_FIXED:
1978         break;
1979     case TEMP_GLOBAL:
1980         /* We do not support double-indirect registers.  */
1981         tcg_debug_assert(!base_ts->indirect_reg);
1982         base_ts->indirect_base = 1;
1983         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1984                             ? 2 : 1);
1985         indirect_reg = 1;
1986         break;
1987     default:
1988         g_assert_not_reached();
1989     }
1990 
1991     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1992         TCGTemp *ts2 = tcg_global_alloc(s);
1993         char buf[64];
1994 
1995         ts->base_type = TCG_TYPE_I64;
1996         ts->type = TCG_TYPE_I32;
1997         ts->indirect_reg = indirect_reg;
1998         ts->mem_allocated = 1;
1999         ts->mem_base = base_ts;
2000         ts->mem_offset = offset;
2001         pstrcpy(buf, sizeof(buf), name);
2002         pstrcat(buf, sizeof(buf), "_0");
2003         ts->name = strdup(buf);
2004 
2005         tcg_debug_assert(ts2 == ts + 1);
2006         ts2->base_type = TCG_TYPE_I64;
2007         ts2->type = TCG_TYPE_I32;
2008         ts2->indirect_reg = indirect_reg;
2009         ts2->mem_allocated = 1;
2010         ts2->mem_base = base_ts;
2011         ts2->mem_offset = offset + 4;
2012         ts2->temp_subindex = 1;
2013         pstrcpy(buf, sizeof(buf), name);
2014         pstrcat(buf, sizeof(buf), "_1");
2015         ts2->name = strdup(buf);
2016     } else {
2017         ts->base_type = type;
2018         ts->type = type;
2019         ts->indirect_reg = indirect_reg;
2020         ts->mem_allocated = 1;
2021         ts->mem_base = base_ts;
2022         ts->mem_offset = offset;
2023         ts->name = name;
2024     }
2025     return ts;
2026 }
2027 
2028 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
2029 {
2030     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
2031     return temp_tcgv_i32(ts);
2032 }
2033 
2034 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
2035 {
2036     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
2037     return temp_tcgv_i64(ts);
2038 }
2039 
2040 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
2041 {
2042     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
2043     return temp_tcgv_ptr(ts);
2044 }
2045 
2046 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
2047 {
2048     TCGContext *s = tcg_ctx;
2049     TCGTemp *ts;
2050     int n;
2051 
2052     if (kind == TEMP_EBB) {
2053         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
2054 
2055         if (idx < TCG_MAX_TEMPS) {
2056             /* There is already an available temp with the right type.  */
2057             clear_bit(idx, s->free_temps[type].l);
2058 
2059             ts = &s->temps[idx];
2060             ts->temp_allocated = 1;
2061             tcg_debug_assert(ts->base_type == type);
2062             tcg_debug_assert(ts->kind == kind);
2063             return ts;
2064         }
2065     } else {
2066         tcg_debug_assert(kind == TEMP_TB);
2067     }
2068 
2069     switch (type) {
2070     case TCG_TYPE_I32:
2071     case TCG_TYPE_V64:
2072     case TCG_TYPE_V128:
2073     case TCG_TYPE_V256:
2074         n = 1;
2075         break;
2076     case TCG_TYPE_I64:
2077         n = 64 / TCG_TARGET_REG_BITS;
2078         break;
2079     case TCG_TYPE_I128:
2080         n = 128 / TCG_TARGET_REG_BITS;
2081         break;
2082     default:
2083         g_assert_not_reached();
2084     }
2085 
2086     ts = tcg_temp_alloc(s);
2087     ts->base_type = type;
2088     ts->temp_allocated = 1;
2089     ts->kind = kind;
2090 
2091     if (n == 1) {
2092         ts->type = type;
2093     } else {
2094         ts->type = TCG_TYPE_REG;
2095 
2096         for (int i = 1; i < n; ++i) {
2097             TCGTemp *ts2 = tcg_temp_alloc(s);
2098 
2099             tcg_debug_assert(ts2 == ts + i);
2100             ts2->base_type = type;
2101             ts2->type = TCG_TYPE_REG;
2102             ts2->temp_allocated = 1;
2103             ts2->temp_subindex = i;
2104             ts2->kind = kind;
2105         }
2106     }
2107     return ts;
2108 }
2109 
2110 TCGv_i32 tcg_temp_new_i32(void)
2111 {
2112     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
2113 }
2114 
2115 TCGv_i32 tcg_temp_ebb_new_i32(void)
2116 {
2117     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
2118 }
2119 
2120 TCGv_i64 tcg_temp_new_i64(void)
2121 {
2122     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
2123 }
2124 
2125 TCGv_i64 tcg_temp_ebb_new_i64(void)
2126 {
2127     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
2128 }
2129 
2130 TCGv_ptr tcg_temp_new_ptr(void)
2131 {
2132     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2133 }
2134 
2135 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2136 {
2137     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2138 }
2139 
2140 TCGv_i128 tcg_temp_new_i128(void)
2141 {
2142     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2143 }
2144 
2145 TCGv_i128 tcg_temp_ebb_new_i128(void)
2146 {
2147     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2148 }
2149 
2150 TCGv_vec tcg_temp_new_vec(TCGType type)
2151 {
2152     TCGTemp *t;
2153 
2154 #ifdef CONFIG_DEBUG_TCG
2155     switch (type) {
2156     case TCG_TYPE_V64:
2157         assert(TCG_TARGET_HAS_v64);
2158         break;
2159     case TCG_TYPE_V128:
2160         assert(TCG_TARGET_HAS_v128);
2161         break;
2162     case TCG_TYPE_V256:
2163         assert(TCG_TARGET_HAS_v256);
2164         break;
2165     default:
2166         g_assert_not_reached();
2167     }
2168 #endif
2169 
2170     t = tcg_temp_new_internal(type, TEMP_EBB);
2171     return temp_tcgv_vec(t);
2172 }
2173 
2174 /* Create a new temp of the same type as an existing temp.  */
2175 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2176 {
2177     TCGTemp *t = tcgv_vec_temp(match);
2178 
2179     tcg_debug_assert(t->temp_allocated != 0);
2180 
2181     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2182     return temp_tcgv_vec(t);
2183 }
2184 
2185 void tcg_temp_free_internal(TCGTemp *ts)
2186 {
2187     TCGContext *s = tcg_ctx;
2188 
2189     switch (ts->kind) {
2190     case TEMP_CONST:
2191     case TEMP_TB:
2192         /* Silently ignore free. */
2193         break;
2194     case TEMP_EBB:
2195         tcg_debug_assert(ts->temp_allocated != 0);
2196         ts->temp_allocated = 0;
2197         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2198         break;
2199     default:
2200         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2201         g_assert_not_reached();
2202     }
2203 }
2204 
2205 void tcg_temp_free_i32(TCGv_i32 arg)
2206 {
2207     tcg_temp_free_internal(tcgv_i32_temp(arg));
2208 }
2209 
2210 void tcg_temp_free_i64(TCGv_i64 arg)
2211 {
2212     tcg_temp_free_internal(tcgv_i64_temp(arg));
2213 }
2214 
2215 void tcg_temp_free_i128(TCGv_i128 arg)
2216 {
2217     tcg_temp_free_internal(tcgv_i128_temp(arg));
2218 }
2219 
2220 void tcg_temp_free_ptr(TCGv_ptr arg)
2221 {
2222     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2223 }
2224 
2225 void tcg_temp_free_vec(TCGv_vec arg)
2226 {
2227     tcg_temp_free_internal(tcgv_vec_temp(arg));
2228 }
2229 
2230 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2231 {
2232     TCGContext *s = tcg_ctx;
2233     GHashTable *h = s->const_table[type];
2234     TCGTemp *ts;
2235 
2236     if (h == NULL) {
2237         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2238         s->const_table[type] = h;
2239     }
2240 
2241     ts = g_hash_table_lookup(h, &val);
2242     if (ts == NULL) {
2243         int64_t *val_ptr;
2244 
2245         ts = tcg_temp_alloc(s);
2246 
2247         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2248             TCGTemp *ts2 = tcg_temp_alloc(s);
2249 
2250             tcg_debug_assert(ts2 == ts + 1);
2251 
2252             ts->base_type = TCG_TYPE_I64;
2253             ts->type = TCG_TYPE_I32;
2254             ts->kind = TEMP_CONST;
2255             ts->temp_allocated = 1;
2256 
2257             ts2->base_type = TCG_TYPE_I64;
2258             ts2->type = TCG_TYPE_I32;
2259             ts2->kind = TEMP_CONST;
2260             ts2->temp_allocated = 1;
2261             ts2->temp_subindex = 1;
2262 
2263             /*
2264              * Retain the full value of the 64-bit constant in the low
2265              * part, so that the hash table works.  Actual uses will
2266              * truncate the value to the low part.
2267              */
2268             ts[HOST_BIG_ENDIAN].val = val;
2269             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2270             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2271         } else {
2272             ts->base_type = type;
2273             ts->type = type;
2274             ts->kind = TEMP_CONST;
2275             ts->temp_allocated = 1;
2276             ts->val = val;
2277             val_ptr = &ts->val;
2278         }
2279         g_hash_table_insert(h, val_ptr, ts);
2280     }
2281 
2282     return ts;
2283 }
2284 
2285 TCGv_i32 tcg_constant_i32(int32_t val)
2286 {
2287     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2288 }
2289 
2290 TCGv_i64 tcg_constant_i64(int64_t val)
2291 {
2292     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2293 }
2294 
2295 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2296 {
2297     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2298 }
2299 
2300 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2301 {
2302     val = dup_const(vece, val);
2303     return temp_tcgv_vec(tcg_constant_internal(type, val));
2304 }
2305 
2306 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2307 {
2308     TCGTemp *t = tcgv_vec_temp(match);
2309 
2310     tcg_debug_assert(t->temp_allocated != 0);
2311     return tcg_constant_vec(t->base_type, vece, val);
2312 }
2313 
2314 #ifdef CONFIG_DEBUG_TCG
2315 size_t temp_idx(TCGTemp *ts)
2316 {
2317     ptrdiff_t n = ts - tcg_ctx->temps;
2318     assert(n >= 0 && n < tcg_ctx->nb_temps);
2319     return n;
2320 }
2321 
2322 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2323 {
2324     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2325 
2326     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2327     assert(o % sizeof(TCGTemp) == 0);
2328 
2329     return (void *)tcg_ctx + (uintptr_t)v;
2330 }
2331 #endif /* CONFIG_DEBUG_TCG */
2332 
2333 /*
2334  * Return true if OP may appear in the opcode stream with TYPE.
2335  * Test the runtime variable that controls each opcode.
2336  */
2337 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2338 {
2339     bool has_type;
2340 
2341     switch (type) {
2342     case TCG_TYPE_I32:
2343         has_type = true;
2344         break;
2345     case TCG_TYPE_I64:
2346         has_type = TCG_TARGET_REG_BITS == 64;
2347         break;
2348     case TCG_TYPE_V64:
2349         has_type = TCG_TARGET_HAS_v64;
2350         break;
2351     case TCG_TYPE_V128:
2352         has_type = TCG_TARGET_HAS_v128;
2353         break;
2354     case TCG_TYPE_V256:
2355         has_type = TCG_TARGET_HAS_v256;
2356         break;
2357     default:
2358         has_type = false;
2359         break;
2360     }
2361 
2362     switch (op) {
2363     case INDEX_op_discard:
2364     case INDEX_op_set_label:
2365     case INDEX_op_call:
2366     case INDEX_op_br:
2367     case INDEX_op_mb:
2368     case INDEX_op_insn_start:
2369     case INDEX_op_exit_tb:
2370     case INDEX_op_goto_tb:
2371     case INDEX_op_goto_ptr:
2372     case INDEX_op_qemu_ld_i32:
2373     case INDEX_op_qemu_st_i32:
2374     case INDEX_op_qemu_ld_i64:
2375     case INDEX_op_qemu_st_i64:
2376         return true;
2377 
2378     case INDEX_op_qemu_st8_i32:
2379         return TCG_TARGET_HAS_qemu_st8_i32;
2380 
2381     case INDEX_op_qemu_ld_i128:
2382     case INDEX_op_qemu_st_i128:
2383         return TCG_TARGET_HAS_qemu_ldst_i128;
2384 
2385     case INDEX_op_add:
2386     case INDEX_op_and:
2387     case INDEX_op_brcond:
2388     case INDEX_op_deposit:
2389     case INDEX_op_extract:
2390     case INDEX_op_mov:
2391     case INDEX_op_movcond:
2392     case INDEX_op_negsetcond:
2393     case INDEX_op_or:
2394     case INDEX_op_setcond:
2395     case INDEX_op_sextract:
2396     case INDEX_op_xor:
2397         return has_type;
2398 
2399     case INDEX_op_ld8u_i32:
2400     case INDEX_op_ld8s_i32:
2401     case INDEX_op_ld16u_i32:
2402     case INDEX_op_ld16s_i32:
2403     case INDEX_op_ld_i32:
2404     case INDEX_op_st8_i32:
2405     case INDEX_op_st16_i32:
2406     case INDEX_op_st_i32:
2407         return true;
2408 
2409     case INDEX_op_add2_i32:
2410         return TCG_TARGET_HAS_add2_i32;
2411     case INDEX_op_sub2_i32:
2412         return TCG_TARGET_HAS_sub2_i32;
2413 
2414     case INDEX_op_brcond2_i32:
2415     case INDEX_op_setcond2_i32:
2416         return TCG_TARGET_REG_BITS == 32;
2417 
2418     case INDEX_op_ld8u_i64:
2419     case INDEX_op_ld8s_i64:
2420     case INDEX_op_ld16u_i64:
2421     case INDEX_op_ld16s_i64:
2422     case INDEX_op_ld32u_i64:
2423     case INDEX_op_ld32s_i64:
2424     case INDEX_op_ld_i64:
2425     case INDEX_op_st8_i64:
2426     case INDEX_op_st16_i64:
2427     case INDEX_op_st32_i64:
2428     case INDEX_op_st_i64:
2429     case INDEX_op_ext_i32_i64:
2430     case INDEX_op_extu_i32_i64:
2431     case INDEX_op_extrl_i64_i32:
2432     case INDEX_op_extrh_i64_i32:
2433         return TCG_TARGET_REG_BITS == 64;
2434 
2435     case INDEX_op_add2_i64:
2436         return TCG_TARGET_HAS_add2_i64;
2437     case INDEX_op_sub2_i64:
2438         return TCG_TARGET_HAS_sub2_i64;
2439 
2440     case INDEX_op_mov_vec:
2441     case INDEX_op_dup_vec:
2442     case INDEX_op_dupm_vec:
2443     case INDEX_op_ld_vec:
2444     case INDEX_op_st_vec:
2445     case INDEX_op_add_vec:
2446     case INDEX_op_sub_vec:
2447     case INDEX_op_and_vec:
2448     case INDEX_op_or_vec:
2449     case INDEX_op_xor_vec:
2450     case INDEX_op_cmp_vec:
2451         return has_type;
2452     case INDEX_op_dup2_vec:
2453         return has_type && TCG_TARGET_REG_BITS == 32;
2454     case INDEX_op_not_vec:
2455         return has_type && TCG_TARGET_HAS_not_vec;
2456     case INDEX_op_neg_vec:
2457         return has_type && TCG_TARGET_HAS_neg_vec;
2458     case INDEX_op_abs_vec:
2459         return has_type && TCG_TARGET_HAS_abs_vec;
2460     case INDEX_op_andc_vec:
2461         return has_type && TCG_TARGET_HAS_andc_vec;
2462     case INDEX_op_orc_vec:
2463         return has_type && TCG_TARGET_HAS_orc_vec;
2464     case INDEX_op_nand_vec:
2465         return has_type && TCG_TARGET_HAS_nand_vec;
2466     case INDEX_op_nor_vec:
2467         return has_type && TCG_TARGET_HAS_nor_vec;
2468     case INDEX_op_eqv_vec:
2469         return has_type && TCG_TARGET_HAS_eqv_vec;
2470     case INDEX_op_mul_vec:
2471         return has_type && TCG_TARGET_HAS_mul_vec;
2472     case INDEX_op_shli_vec:
2473     case INDEX_op_shri_vec:
2474     case INDEX_op_sari_vec:
2475         return has_type && TCG_TARGET_HAS_shi_vec;
2476     case INDEX_op_shls_vec:
2477     case INDEX_op_shrs_vec:
2478     case INDEX_op_sars_vec:
2479         return has_type && TCG_TARGET_HAS_shs_vec;
2480     case INDEX_op_shlv_vec:
2481     case INDEX_op_shrv_vec:
2482     case INDEX_op_sarv_vec:
2483         return has_type && TCG_TARGET_HAS_shv_vec;
2484     case INDEX_op_rotli_vec:
2485         return has_type && TCG_TARGET_HAS_roti_vec;
2486     case INDEX_op_rotls_vec:
2487         return has_type && TCG_TARGET_HAS_rots_vec;
2488     case INDEX_op_rotlv_vec:
2489     case INDEX_op_rotrv_vec:
2490         return has_type && TCG_TARGET_HAS_rotv_vec;
2491     case INDEX_op_ssadd_vec:
2492     case INDEX_op_usadd_vec:
2493     case INDEX_op_sssub_vec:
2494     case INDEX_op_ussub_vec:
2495         return has_type && TCG_TARGET_HAS_sat_vec;
2496     case INDEX_op_smin_vec:
2497     case INDEX_op_umin_vec:
2498     case INDEX_op_smax_vec:
2499     case INDEX_op_umax_vec:
2500         return has_type && TCG_TARGET_HAS_minmax_vec;
2501     case INDEX_op_bitsel_vec:
2502         return has_type && TCG_TARGET_HAS_bitsel_vec;
2503     case INDEX_op_cmpsel_vec:
2504         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2505 
2506     default:
2507         if (op < INDEX_op_last_generic) {
2508             const TCGOutOp *outop;
2509             TCGConstraintSetIndex con_set;
2510 
2511             if (!has_type) {
2512                 return false;
2513             }
2514 
2515             outop = all_outop[op];
2516             tcg_debug_assert(outop != NULL);
2517 
2518             con_set = outop->static_constraint;
2519             if (con_set == C_Dynamic) {
2520                 con_set = outop->dynamic_constraint(type, flags);
2521             }
2522             if (con_set >= 0) {
2523                 return true;
2524             }
2525             tcg_debug_assert(con_set == C_NotImplemented);
2526             return false;
2527         }
2528         tcg_debug_assert(op < NB_OPS);
2529         return true;
2530 
2531     case INDEX_op_last_generic:
2532         g_assert_not_reached();
2533     }
2534 }
2535 
2536 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2537 {
2538     unsigned width;
2539 
2540     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2541     width = (type == TCG_TYPE_I32 ? 32 : 64);
2542 
2543     tcg_debug_assert(ofs < width);
2544     tcg_debug_assert(len > 0);
2545     tcg_debug_assert(len <= width - ofs);
2546 
2547     return TCG_TARGET_deposit_valid(type, ofs, len);
2548 }
2549 
2550 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2551 
2552 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2553                           TCGTemp *ret, TCGTemp **args)
2554 {
2555     TCGv_i64 extend_free[MAX_CALL_IARGS];
2556     int n_extend = 0;
2557     TCGOp *op;
2558     int i, n, pi = 0, total_args;
2559 
2560     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2561         init_call_layout(info);
2562         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2563     }
2564 
2565     total_args = info->nr_out + info->nr_in + 2;
2566     op = tcg_op_alloc(INDEX_op_call, total_args);
2567 
2568 #ifdef CONFIG_PLUGIN
2569     /* Flag helpers that may affect guest state */
2570     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2571         tcg_ctx->plugin_insn->calls_helpers = true;
2572     }
2573 #endif
2574 
2575     TCGOP_CALLO(op) = n = info->nr_out;
2576     switch (n) {
2577     case 0:
2578         tcg_debug_assert(ret == NULL);
2579         break;
2580     case 1:
2581         tcg_debug_assert(ret != NULL);
2582         op->args[pi++] = temp_arg(ret);
2583         break;
2584     case 2:
2585     case 4:
2586         tcg_debug_assert(ret != NULL);
2587         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2588         tcg_debug_assert(ret->temp_subindex == 0);
2589         for (i = 0; i < n; ++i) {
2590             op->args[pi++] = temp_arg(ret + i);
2591         }
2592         break;
2593     default:
2594         g_assert_not_reached();
2595     }
2596 
2597     TCGOP_CALLI(op) = n = info->nr_in;
2598     for (i = 0; i < n; i++) {
2599         const TCGCallArgumentLoc *loc = &info->in[i];
2600         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2601 
2602         switch (loc->kind) {
2603         case TCG_CALL_ARG_NORMAL:
2604         case TCG_CALL_ARG_BY_REF:
2605         case TCG_CALL_ARG_BY_REF_N:
2606             op->args[pi++] = temp_arg(ts);
2607             break;
2608 
2609         case TCG_CALL_ARG_EXTEND_U:
2610         case TCG_CALL_ARG_EXTEND_S:
2611             {
2612                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2613                 TCGv_i32 orig = temp_tcgv_i32(ts);
2614 
2615                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2616                     tcg_gen_ext_i32_i64(temp, orig);
2617                 } else {
2618                     tcg_gen_extu_i32_i64(temp, orig);
2619                 }
2620                 op->args[pi++] = tcgv_i64_arg(temp);
2621                 extend_free[n_extend++] = temp;
2622             }
2623             break;
2624 
2625         default:
2626             g_assert_not_reached();
2627         }
2628     }
2629     op->args[pi++] = (uintptr_t)func;
2630     op->args[pi++] = (uintptr_t)info;
2631     tcg_debug_assert(pi == total_args);
2632 
2633     if (tcg_ctx->emit_before_op) {
2634         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2635     } else {
2636         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2637     }
2638 
2639     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2640     for (i = 0; i < n_extend; ++i) {
2641         tcg_temp_free_i64(extend_free[i]);
2642     }
2643 }
2644 
2645 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2646 {
2647     tcg_gen_callN(func, info, ret, NULL);
2648 }
2649 
2650 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2651 {
2652     tcg_gen_callN(func, info, ret, &t1);
2653 }
2654 
2655 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2656                    TCGTemp *t1, TCGTemp *t2)
2657 {
2658     TCGTemp *args[2] = { t1, t2 };
2659     tcg_gen_callN(func, info, ret, args);
2660 }
2661 
2662 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2663                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2664 {
2665     TCGTemp *args[3] = { t1, t2, t3 };
2666     tcg_gen_callN(func, info, ret, args);
2667 }
2668 
2669 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2670                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2671 {
2672     TCGTemp *args[4] = { t1, t2, t3, t4 };
2673     tcg_gen_callN(func, info, ret, args);
2674 }
2675 
2676 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2677                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2678 {
2679     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2680     tcg_gen_callN(func, info, ret, args);
2681 }
2682 
2683 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2684                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2685                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2686 {
2687     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2688     tcg_gen_callN(func, info, ret, args);
2689 }
2690 
2691 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2692                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2693                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2694 {
2695     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2696     tcg_gen_callN(func, info, ret, args);
2697 }
2698 
2699 static void tcg_reg_alloc_start(TCGContext *s)
2700 {
2701     int i, n;
2702 
2703     for (i = 0, n = s->nb_temps; i < n; i++) {
2704         TCGTemp *ts = &s->temps[i];
2705         TCGTempVal val = TEMP_VAL_MEM;
2706 
2707         switch (ts->kind) {
2708         case TEMP_CONST:
2709             val = TEMP_VAL_CONST;
2710             break;
2711         case TEMP_FIXED:
2712             val = TEMP_VAL_REG;
2713             break;
2714         case TEMP_GLOBAL:
2715             break;
2716         case TEMP_EBB:
2717             val = TEMP_VAL_DEAD;
2718             /* fall through */
2719         case TEMP_TB:
2720             ts->mem_allocated = 0;
2721             break;
2722         default:
2723             g_assert_not_reached();
2724         }
2725         ts->val_type = val;
2726     }
2727 
2728     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2729 }
2730 
2731 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2732                                  TCGTemp *ts)
2733 {
2734     int idx = temp_idx(ts);
2735 
2736     switch (ts->kind) {
2737     case TEMP_FIXED:
2738     case TEMP_GLOBAL:
2739         pstrcpy(buf, buf_size, ts->name);
2740         break;
2741     case TEMP_TB:
2742         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2743         break;
2744     case TEMP_EBB:
2745         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2746         break;
2747     case TEMP_CONST:
2748         switch (ts->type) {
2749         case TCG_TYPE_I32:
2750             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2751             break;
2752 #if TCG_TARGET_REG_BITS > 32
2753         case TCG_TYPE_I64:
2754             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2755             break;
2756 #endif
2757         case TCG_TYPE_V64:
2758         case TCG_TYPE_V128:
2759         case TCG_TYPE_V256:
2760             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2761                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2762             break;
2763         default:
2764             g_assert_not_reached();
2765         }
2766         break;
2767     }
2768     return buf;
2769 }
2770 
2771 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2772                              int buf_size, TCGArg arg)
2773 {
2774     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2775 }
2776 
2777 static const char * const cond_name[] =
2778 {
2779     [TCG_COND_NEVER] = "never",
2780     [TCG_COND_ALWAYS] = "always",
2781     [TCG_COND_EQ] = "eq",
2782     [TCG_COND_NE] = "ne",
2783     [TCG_COND_LT] = "lt",
2784     [TCG_COND_GE] = "ge",
2785     [TCG_COND_LE] = "le",
2786     [TCG_COND_GT] = "gt",
2787     [TCG_COND_LTU] = "ltu",
2788     [TCG_COND_GEU] = "geu",
2789     [TCG_COND_LEU] = "leu",
2790     [TCG_COND_GTU] = "gtu",
2791     [TCG_COND_TSTEQ] = "tsteq",
2792     [TCG_COND_TSTNE] = "tstne",
2793 };
2794 
2795 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2796 {
2797     [MO_UB]   = "ub",
2798     [MO_SB]   = "sb",
2799     [MO_LEUW] = "leuw",
2800     [MO_LESW] = "lesw",
2801     [MO_LEUL] = "leul",
2802     [MO_LESL] = "lesl",
2803     [MO_LEUQ] = "leq",
2804     [MO_BEUW] = "beuw",
2805     [MO_BESW] = "besw",
2806     [MO_BEUL] = "beul",
2807     [MO_BESL] = "besl",
2808     [MO_BEUQ] = "beq",
2809     [MO_128 + MO_BE] = "beo",
2810     [MO_128 + MO_LE] = "leo",
2811 };
2812 
2813 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2814     [MO_UNALN >> MO_ASHIFT]    = "un+",
2815     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2816     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2817     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2818     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2819     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2820     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2821     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2822 };
2823 
2824 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2825     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2826     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2827     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2828     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2829     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2830     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2831 };
2832 
2833 static const char bswap_flag_name[][6] = {
2834     [TCG_BSWAP_IZ] = "iz",
2835     [TCG_BSWAP_OZ] = "oz",
2836     [TCG_BSWAP_OS] = "os",
2837     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2838     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2839 };
2840 
2841 #ifdef CONFIG_PLUGIN
2842 static const char * const plugin_from_name[] = {
2843     "from-tb",
2844     "from-insn",
2845     "after-insn",
2846     "after-tb",
2847 };
2848 #endif
2849 
2850 static inline bool tcg_regset_single(TCGRegSet d)
2851 {
2852     return (d & (d - 1)) == 0;
2853 }
2854 
2855 static inline TCGReg tcg_regset_first(TCGRegSet d)
2856 {
2857     if (TCG_TARGET_NB_REGS <= 32) {
2858         return ctz32(d);
2859     } else {
2860         return ctz64(d);
2861     }
2862 }
2863 
2864 /* Return only the number of characters output -- no error return. */
2865 #define ne_fprintf(...) \
2866     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2867 
2868 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2869 {
2870     char buf[128];
2871     TCGOp *op;
2872 
2873     QTAILQ_FOREACH(op, &s->ops, link) {
2874         int i, k, nb_oargs, nb_iargs, nb_cargs;
2875         const TCGOpDef *def;
2876         TCGOpcode c;
2877         int col = 0;
2878 
2879         c = op->opc;
2880         def = &tcg_op_defs[c];
2881 
2882         if (c == INDEX_op_insn_start) {
2883             nb_oargs = 0;
2884             col += ne_fprintf(f, "\n ----");
2885 
2886             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2887                 col += ne_fprintf(f, " %016" PRIx64,
2888                                   tcg_get_insn_start_param(op, i));
2889             }
2890         } else if (c == INDEX_op_call) {
2891             const TCGHelperInfo *info = tcg_call_info(op);
2892             void *func = tcg_call_func(op);
2893 
2894             /* variable number of arguments */
2895             nb_oargs = TCGOP_CALLO(op);
2896             nb_iargs = TCGOP_CALLI(op);
2897             nb_cargs = def->nb_cargs;
2898 
2899             col += ne_fprintf(f, " %s ", def->name);
2900 
2901             /*
2902              * Print the function name from TCGHelperInfo, if available.
2903              * Note that plugins have a template function for the info,
2904              * but the actual function pointer comes from the plugin.
2905              */
2906             if (func == info->func) {
2907                 col += ne_fprintf(f, "%s", info->name);
2908             } else {
2909                 col += ne_fprintf(f, "plugin(%p)", func);
2910             }
2911 
2912             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2913             for (i = 0; i < nb_oargs; i++) {
2914                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2915                                                             op->args[i]));
2916             }
2917             for (i = 0; i < nb_iargs; i++) {
2918                 TCGArg arg = op->args[nb_oargs + i];
2919                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2920                 col += ne_fprintf(f, ",%s", t);
2921             }
2922         } else {
2923             if (def->flags & TCG_OPF_INT) {
2924                 col += ne_fprintf(f, " %s_i%d ",
2925                                   def->name,
2926                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2927             } else if (def->flags & TCG_OPF_VECTOR) {
2928                 col += ne_fprintf(f, "%s v%d,e%d,",
2929                                   def->name,
2930                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2931                                   8 << TCGOP_VECE(op));
2932             } else {
2933                 col += ne_fprintf(f, " %s ", def->name);
2934             }
2935 
2936             nb_oargs = def->nb_oargs;
2937             nb_iargs = def->nb_iargs;
2938             nb_cargs = def->nb_cargs;
2939 
2940             k = 0;
2941             for (i = 0; i < nb_oargs; i++) {
2942                 const char *sep =  k ? "," : "";
2943                 col += ne_fprintf(f, "%s%s", sep,
2944                                   tcg_get_arg_str(s, buf, sizeof(buf),
2945                                                   op->args[k++]));
2946             }
2947             for (i = 0; i < nb_iargs; i++) {
2948                 const char *sep =  k ? "," : "";
2949                 col += ne_fprintf(f, "%s%s", sep,
2950                                   tcg_get_arg_str(s, buf, sizeof(buf),
2951                                                   op->args[k++]));
2952             }
2953             switch (c) {
2954             case INDEX_op_brcond:
2955             case INDEX_op_setcond:
2956             case INDEX_op_negsetcond:
2957             case INDEX_op_movcond:
2958             case INDEX_op_brcond2_i32:
2959             case INDEX_op_setcond2_i32:
2960             case INDEX_op_cmp_vec:
2961             case INDEX_op_cmpsel_vec:
2962                 if (op->args[k] < ARRAY_SIZE(cond_name)
2963                     && cond_name[op->args[k]]) {
2964                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2965                 } else {
2966                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2967                 }
2968                 i = 1;
2969                 break;
2970             case INDEX_op_qemu_ld_i32:
2971             case INDEX_op_qemu_st_i32:
2972             case INDEX_op_qemu_st8_i32:
2973             case INDEX_op_qemu_ld_i64:
2974             case INDEX_op_qemu_st_i64:
2975             case INDEX_op_qemu_ld_i128:
2976             case INDEX_op_qemu_st_i128:
2977                 {
2978                     const char *s_al, *s_op, *s_at;
2979                     MemOpIdx oi = op->args[k++];
2980                     MemOp mop = get_memop(oi);
2981                     unsigned ix = get_mmuidx(oi);
2982 
2983                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2984                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2985                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2986                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2987 
2988                     /* If all fields are accounted for, print symbolically. */
2989                     if (!mop && s_al && s_op && s_at) {
2990                         col += ne_fprintf(f, ",%s%s%s,%u",
2991                                           s_at, s_al, s_op, ix);
2992                     } else {
2993                         mop = get_memop(oi);
2994                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2995                     }
2996                     i = 1;
2997                 }
2998                 break;
2999             case INDEX_op_bswap16:
3000             case INDEX_op_bswap32:
3001             case INDEX_op_bswap64:
3002                 {
3003                     TCGArg flags = op->args[k];
3004                     const char *name = NULL;
3005 
3006                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
3007                         name = bswap_flag_name[flags];
3008                     }
3009                     if (name) {
3010                         col += ne_fprintf(f, ",%s", name);
3011                     } else {
3012                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
3013                     }
3014                     i = k = 1;
3015                 }
3016                 break;
3017 #ifdef CONFIG_PLUGIN
3018             case INDEX_op_plugin_cb:
3019                 {
3020                     TCGArg from = op->args[k++];
3021                     const char *name = NULL;
3022 
3023                     if (from < ARRAY_SIZE(plugin_from_name)) {
3024                         name = plugin_from_name[from];
3025                     }
3026                     if (name) {
3027                         col += ne_fprintf(f, "%s", name);
3028                     } else {
3029                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
3030                     }
3031                     i = 1;
3032                 }
3033                 break;
3034 #endif
3035             default:
3036                 i = 0;
3037                 break;
3038             }
3039             switch (c) {
3040             case INDEX_op_set_label:
3041             case INDEX_op_br:
3042             case INDEX_op_brcond:
3043             case INDEX_op_brcond2_i32:
3044                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
3045                                   arg_label(op->args[k])->id);
3046                 i++, k++;
3047                 break;
3048             case INDEX_op_mb:
3049                 {
3050                     TCGBar membar = op->args[k];
3051                     const char *b_op, *m_op;
3052 
3053                     switch (membar & TCG_BAR_SC) {
3054                     case 0:
3055                         b_op = "none";
3056                         break;
3057                     case TCG_BAR_LDAQ:
3058                         b_op = "acq";
3059                         break;
3060                     case TCG_BAR_STRL:
3061                         b_op = "rel";
3062                         break;
3063                     case TCG_BAR_SC:
3064                         b_op = "seq";
3065                         break;
3066                     default:
3067                         g_assert_not_reached();
3068                     }
3069 
3070                     switch (membar & TCG_MO_ALL) {
3071                     case 0:
3072                         m_op = "none";
3073                         break;
3074                     case TCG_MO_LD_LD:
3075                         m_op = "rr";
3076                         break;
3077                     case TCG_MO_LD_ST:
3078                         m_op = "rw";
3079                         break;
3080                     case TCG_MO_ST_LD:
3081                         m_op = "wr";
3082                         break;
3083                     case TCG_MO_ST_ST:
3084                         m_op = "ww";
3085                         break;
3086                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3087                         m_op = "rr+rw";
3088                         break;
3089                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3090                         m_op = "rr+wr";
3091                         break;
3092                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3093                         m_op = "rr+ww";
3094                         break;
3095                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3096                         m_op = "rw+wr";
3097                         break;
3098                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3099                         m_op = "rw+ww";
3100                         break;
3101                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3102                         m_op = "wr+ww";
3103                         break;
3104                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3105                         m_op = "rr+rw+wr";
3106                         break;
3107                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3108                         m_op = "rr+rw+ww";
3109                         break;
3110                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3111                         m_op = "rr+wr+ww";
3112                         break;
3113                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3114                         m_op = "rw+wr+ww";
3115                         break;
3116                     case TCG_MO_ALL:
3117                         m_op = "all";
3118                         break;
3119                     default:
3120                         g_assert_not_reached();
3121                     }
3122 
3123                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3124                     i++, k++;
3125                 }
3126                 break;
3127             default:
3128                 break;
3129             }
3130             for (; i < nb_cargs; i++, k++) {
3131                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3132                                   op->args[k]);
3133             }
3134         }
3135 
3136         if (have_prefs || op->life) {
3137             for (; col < 40; ++col) {
3138                 putc(' ', f);
3139             }
3140         }
3141 
3142         if (op->life) {
3143             unsigned life = op->life;
3144 
3145             if (life & (SYNC_ARG * 3)) {
3146                 ne_fprintf(f, "  sync:");
3147                 for (i = 0; i < 2; ++i) {
3148                     if (life & (SYNC_ARG << i)) {
3149                         ne_fprintf(f, " %d", i);
3150                     }
3151                 }
3152             }
3153             life /= DEAD_ARG;
3154             if (life) {
3155                 ne_fprintf(f, "  dead:");
3156                 for (i = 0; life; ++i, life >>= 1) {
3157                     if (life & 1) {
3158                         ne_fprintf(f, " %d", i);
3159                     }
3160                 }
3161             }
3162         }
3163 
3164         if (have_prefs) {
3165             for (i = 0; i < nb_oargs; ++i) {
3166                 TCGRegSet set = output_pref(op, i);
3167 
3168                 if (i == 0) {
3169                     ne_fprintf(f, "  pref=");
3170                 } else {
3171                     ne_fprintf(f, ",");
3172                 }
3173                 if (set == 0) {
3174                     ne_fprintf(f, "none");
3175                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3176                     ne_fprintf(f, "all");
3177 #ifdef CONFIG_DEBUG_TCG
3178                 } else if (tcg_regset_single(set)) {
3179                     TCGReg reg = tcg_regset_first(set);
3180                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3181 #endif
3182                 } else if (TCG_TARGET_NB_REGS <= 32) {
3183                     ne_fprintf(f, "0x%x", (uint32_t)set);
3184                 } else {
3185                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3186                 }
3187             }
3188         }
3189 
3190         putc('\n', f);
3191     }
3192 }
3193 
3194 /* we give more priority to constraints with less registers */
3195 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3196 {
3197     int n;
3198 
3199     arg_ct += k;
3200     n = ctpop64(arg_ct->regs);
3201 
3202     /*
3203      * Sort constraints of a single register first, which includes output
3204      * aliases (which must exactly match the input already allocated).
3205      */
3206     if (n == 1 || arg_ct->oalias) {
3207         return INT_MAX;
3208     }
3209 
3210     /*
3211      * Sort register pairs next, first then second immediately after.
3212      * Arbitrarily sort multiple pairs by the index of the first reg;
3213      * there shouldn't be many pairs.
3214      */
3215     switch (arg_ct->pair) {
3216     case 1:
3217     case 3:
3218         return (k + 1) * 2;
3219     case 2:
3220         return (arg_ct->pair_index + 1) * 2 - 1;
3221     }
3222 
3223     /* Finally, sort by decreasing register count. */
3224     assert(n > 1);
3225     return -n;
3226 }
3227 
3228 /* sort from highest priority to lowest */
3229 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3230 {
3231     int i, j;
3232 
3233     for (i = 0; i < n; i++) {
3234         a[start + i].sort_index = start + i;
3235     }
3236     if (n <= 1) {
3237         return;
3238     }
3239     for (i = 0; i < n - 1; i++) {
3240         for (j = i + 1; j < n; j++) {
3241             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3242             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3243             if (p1 < p2) {
3244                 int tmp = a[start + i].sort_index;
3245                 a[start + i].sort_index = a[start + j].sort_index;
3246                 a[start + j].sort_index = tmp;
3247             }
3248         }
3249     }
3250 }
3251 
3252 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3253 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3254 
3255 static void process_constraint_sets(void)
3256 {
3257     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3258         const TCGConstraintSet *tdefs = &constraint_sets[c];
3259         TCGArgConstraint *args_ct = all_cts[c];
3260         int nb_oargs = tdefs->nb_oargs;
3261         int nb_iargs = tdefs->nb_iargs;
3262         int nb_args = nb_oargs + nb_iargs;
3263         bool saw_alias_pair = false;
3264 
3265         for (int i = 0; i < nb_args; i++) {
3266             const char *ct_str = tdefs->args_ct_str[i];
3267             bool input_p = i >= nb_oargs;
3268             int o;
3269 
3270             switch (*ct_str) {
3271             case '0' ... '9':
3272                 o = *ct_str - '0';
3273                 tcg_debug_assert(input_p);
3274                 tcg_debug_assert(o < nb_oargs);
3275                 tcg_debug_assert(args_ct[o].regs != 0);
3276                 tcg_debug_assert(!args_ct[o].oalias);
3277                 args_ct[i] = args_ct[o];
3278                 /* The output sets oalias.  */
3279                 args_ct[o].oalias = 1;
3280                 args_ct[o].alias_index = i;
3281                 /* The input sets ialias. */
3282                 args_ct[i].ialias = 1;
3283                 args_ct[i].alias_index = o;
3284                 if (args_ct[i].pair) {
3285                     saw_alias_pair = true;
3286                 }
3287                 tcg_debug_assert(ct_str[1] == '\0');
3288                 continue;
3289 
3290             case '&':
3291                 tcg_debug_assert(!input_p);
3292                 args_ct[i].newreg = true;
3293                 ct_str++;
3294                 break;
3295 
3296             case 'p': /* plus */
3297                 /* Allocate to the register after the previous. */
3298                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3299                 o = i - 1;
3300                 tcg_debug_assert(!args_ct[o].pair);
3301                 tcg_debug_assert(!args_ct[o].ct);
3302                 args_ct[i] = (TCGArgConstraint){
3303                     .pair = 2,
3304                     .pair_index = o,
3305                     .regs = args_ct[o].regs << 1,
3306                     .newreg = args_ct[o].newreg,
3307                 };
3308                 args_ct[o].pair = 1;
3309                 args_ct[o].pair_index = i;
3310                 tcg_debug_assert(ct_str[1] == '\0');
3311                 continue;
3312 
3313             case 'm': /* minus */
3314                 /* Allocate to the register before the previous. */
3315                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3316                 o = i - 1;
3317                 tcg_debug_assert(!args_ct[o].pair);
3318                 tcg_debug_assert(!args_ct[o].ct);
3319                 args_ct[i] = (TCGArgConstraint){
3320                     .pair = 1,
3321                     .pair_index = o,
3322                     .regs = args_ct[o].regs >> 1,
3323                     .newreg = args_ct[o].newreg,
3324                 };
3325                 args_ct[o].pair = 2;
3326                 args_ct[o].pair_index = i;
3327                 tcg_debug_assert(ct_str[1] == '\0');
3328                 continue;
3329             }
3330 
3331             do {
3332                 switch (*ct_str) {
3333                 case 'i':
3334                     args_ct[i].ct |= TCG_CT_CONST;
3335                     break;
3336 #ifdef TCG_REG_ZERO
3337                 case 'z':
3338                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3339                     break;
3340 #endif
3341 
3342                 /* Include all of the target-specific constraints. */
3343 
3344 #undef CONST
3345 #define CONST(CASE, MASK) \
3346     case CASE: args_ct[i].ct |= MASK; break;
3347 #define REGS(CASE, MASK) \
3348     case CASE: args_ct[i].regs |= MASK; break;
3349 
3350 #include "tcg-target-con-str.h"
3351 
3352 #undef REGS
3353 #undef CONST
3354                 default:
3355                 case '0' ... '9':
3356                 case '&':
3357                 case 'p':
3358                 case 'm':
3359                     /* Typo in TCGConstraintSet constraint. */
3360                     g_assert_not_reached();
3361                 }
3362             } while (*++ct_str != '\0');
3363         }
3364 
3365         /*
3366          * Fix up output pairs that are aliased with inputs.
3367          * When we created the alias, we copied pair from the output.
3368          * There are three cases:
3369          *    (1a) Pairs of inputs alias pairs of outputs.
3370          *    (1b) One input aliases the first of a pair of outputs.
3371          *    (2)  One input aliases the second of a pair of outputs.
3372          *
3373          * Case 1a is handled by making sure that the pair_index'es are
3374          * properly updated so that they appear the same as a pair of inputs.
3375          *
3376          * Case 1b is handled by setting the pair_index of the input to
3377          * itself, simply so it doesn't point to an unrelated argument.
3378          * Since we don't encounter the "second" during the input allocation
3379          * phase, nothing happens with the second half of the input pair.
3380          *
3381          * Case 2 is handled by setting the second input to pair=3, the
3382          * first output to pair=3, and the pair_index'es to match.
3383          */
3384         if (saw_alias_pair) {
3385             for (int i = nb_oargs; i < nb_args; i++) {
3386                 int o, o2, i2;
3387 
3388                 /*
3389                  * Since [0-9pm] must be alone in the constraint string,
3390                  * the only way they can both be set is if the pair comes
3391                  * from the output alias.
3392                  */
3393                 if (!args_ct[i].ialias) {
3394                     continue;
3395                 }
3396                 switch (args_ct[i].pair) {
3397                 case 0:
3398                     break;
3399                 case 1:
3400                     o = args_ct[i].alias_index;
3401                     o2 = args_ct[o].pair_index;
3402                     tcg_debug_assert(args_ct[o].pair == 1);
3403                     tcg_debug_assert(args_ct[o2].pair == 2);
3404                     if (args_ct[o2].oalias) {
3405                         /* Case 1a */
3406                         i2 = args_ct[o2].alias_index;
3407                         tcg_debug_assert(args_ct[i2].pair == 2);
3408                         args_ct[i2].pair_index = i;
3409                         args_ct[i].pair_index = i2;
3410                     } else {
3411                         /* Case 1b */
3412                         args_ct[i].pair_index = i;
3413                     }
3414                     break;
3415                 case 2:
3416                     o = args_ct[i].alias_index;
3417                     o2 = args_ct[o].pair_index;
3418                     tcg_debug_assert(args_ct[o].pair == 2);
3419                     tcg_debug_assert(args_ct[o2].pair == 1);
3420                     if (args_ct[o2].oalias) {
3421                         /* Case 1a */
3422                         i2 = args_ct[o2].alias_index;
3423                         tcg_debug_assert(args_ct[i2].pair == 1);
3424                         args_ct[i2].pair_index = i;
3425                         args_ct[i].pair_index = i2;
3426                     } else {
3427                         /* Case 2 */
3428                         args_ct[i].pair = 3;
3429                         args_ct[o2].pair = 3;
3430                         args_ct[i].pair_index = o2;
3431                         args_ct[o2].pair_index = i;
3432                     }
3433                     break;
3434                 default:
3435                     g_assert_not_reached();
3436                 }
3437             }
3438         }
3439 
3440         /* sort the constraints (XXX: this is just an heuristic) */
3441         sort_constraints(args_ct, 0, nb_oargs);
3442         sort_constraints(args_ct, nb_oargs, nb_iargs);
3443     }
3444 }
3445 
3446 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3447 {
3448     TCGOpcode opc = op->opc;
3449     TCGType type = TCGOP_TYPE(op);
3450     unsigned flags = TCGOP_FLAGS(op);
3451     const TCGOpDef *def = &tcg_op_defs[opc];
3452     const TCGOutOp *outop = all_outop[opc];
3453     TCGConstraintSetIndex con_set;
3454 
3455     if (def->flags & TCG_OPF_NOT_PRESENT) {
3456         return empty_cts;
3457     }
3458 
3459     if (outop) {
3460         con_set = outop->static_constraint;
3461         if (con_set == C_Dynamic) {
3462             con_set = outop->dynamic_constraint(type, flags);
3463         }
3464     } else {
3465         con_set = tcg_target_op_def(opc, type, flags);
3466     }
3467     tcg_debug_assert(con_set >= 0);
3468     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3469 
3470     /* The constraint arguments must match TCGOpcode arguments. */
3471     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3472     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3473 
3474     return all_cts[con_set];
3475 }
3476 
3477 static void remove_label_use(TCGOp *op, int idx)
3478 {
3479     TCGLabel *label = arg_label(op->args[idx]);
3480     TCGLabelUse *use;
3481 
3482     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3483         if (use->op == op) {
3484             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3485             return;
3486         }
3487     }
3488     g_assert_not_reached();
3489 }
3490 
3491 void tcg_op_remove(TCGContext *s, TCGOp *op)
3492 {
3493     switch (op->opc) {
3494     case INDEX_op_br:
3495         remove_label_use(op, 0);
3496         break;
3497     case INDEX_op_brcond:
3498         remove_label_use(op, 3);
3499         break;
3500     case INDEX_op_brcond2_i32:
3501         remove_label_use(op, 5);
3502         break;
3503     default:
3504         break;
3505     }
3506 
3507     QTAILQ_REMOVE(&s->ops, op, link);
3508     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3509     s->nb_ops--;
3510 }
3511 
3512 void tcg_remove_ops_after(TCGOp *op)
3513 {
3514     TCGContext *s = tcg_ctx;
3515 
3516     while (true) {
3517         TCGOp *last = tcg_last_op();
3518         if (last == op) {
3519             return;
3520         }
3521         tcg_op_remove(s, last);
3522     }
3523 }
3524 
3525 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3526 {
3527     TCGContext *s = tcg_ctx;
3528     TCGOp *op = NULL;
3529 
3530     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3531         QTAILQ_FOREACH(op, &s->free_ops, link) {
3532             if (nargs <= op->nargs) {
3533                 QTAILQ_REMOVE(&s->free_ops, op, link);
3534                 nargs = op->nargs;
3535                 goto found;
3536             }
3537         }
3538     }
3539 
3540     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3541     nargs = MAX(4, nargs);
3542     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3543 
3544  found:
3545     memset(op, 0, offsetof(TCGOp, link));
3546     op->opc = opc;
3547     op->nargs = nargs;
3548 
3549     /* Check for bitfield overflow. */
3550     tcg_debug_assert(op->nargs == nargs);
3551 
3552     s->nb_ops++;
3553     return op;
3554 }
3555 
3556 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3557 {
3558     TCGOp *op = tcg_op_alloc(opc, nargs);
3559 
3560     if (tcg_ctx->emit_before_op) {
3561         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3562     } else {
3563         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3564     }
3565     return op;
3566 }
3567 
3568 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3569                             TCGOpcode opc, TCGType type, unsigned nargs)
3570 {
3571     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3572 
3573     TCGOP_TYPE(new_op) = type;
3574     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3575     return new_op;
3576 }
3577 
3578 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3579                            TCGOpcode opc, TCGType type, unsigned nargs)
3580 {
3581     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3582 
3583     TCGOP_TYPE(new_op) = type;
3584     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3585     return new_op;
3586 }
3587 
3588 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3589 {
3590     TCGLabelUse *u;
3591 
3592     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3593         TCGOp *op = u->op;
3594         switch (op->opc) {
3595         case INDEX_op_br:
3596             op->args[0] = label_arg(to);
3597             break;
3598         case INDEX_op_brcond:
3599             op->args[3] = label_arg(to);
3600             break;
3601         case INDEX_op_brcond2_i32:
3602             op->args[5] = label_arg(to);
3603             break;
3604         default:
3605             g_assert_not_reached();
3606         }
3607     }
3608 
3609     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3610 }
3611 
3612 /* Reachable analysis : remove unreachable code.  */
3613 static void __attribute__((noinline))
3614 reachable_code_pass(TCGContext *s)
3615 {
3616     TCGOp *op, *op_next, *op_prev;
3617     bool dead = false;
3618 
3619     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3620         bool remove = dead;
3621         TCGLabel *label;
3622 
3623         switch (op->opc) {
3624         case INDEX_op_set_label:
3625             label = arg_label(op->args[0]);
3626 
3627             /*
3628              * Note that the first op in the TB is always a load,
3629              * so there is always something before a label.
3630              */
3631             op_prev = QTAILQ_PREV(op, link);
3632 
3633             /*
3634              * If we find two sequential labels, move all branches to
3635              * reference the second label and remove the first label.
3636              * Do this before branch to next optimization, so that the
3637              * middle label is out of the way.
3638              */
3639             if (op_prev->opc == INDEX_op_set_label) {
3640                 move_label_uses(label, arg_label(op_prev->args[0]));
3641                 tcg_op_remove(s, op_prev);
3642                 op_prev = QTAILQ_PREV(op, link);
3643             }
3644 
3645             /*
3646              * Optimization can fold conditional branches to unconditional.
3647              * If we find a label which is preceded by an unconditional
3648              * branch to next, remove the branch.  We couldn't do this when
3649              * processing the branch because any dead code between the branch
3650              * and label had not yet been removed.
3651              */
3652             if (op_prev->opc == INDEX_op_br &&
3653                 label == arg_label(op_prev->args[0])) {
3654                 tcg_op_remove(s, op_prev);
3655                 /* Fall through means insns become live again.  */
3656                 dead = false;
3657             }
3658 
3659             if (QSIMPLEQ_EMPTY(&label->branches)) {
3660                 /*
3661                  * While there is an occasional backward branch, virtually
3662                  * all branches generated by the translators are forward.
3663                  * Which means that generally we will have already removed
3664                  * all references to the label that will be, and there is
3665                  * little to be gained by iterating.
3666                  */
3667                 remove = true;
3668             } else {
3669                 /* Once we see a label, insns become live again.  */
3670                 dead = false;
3671                 remove = false;
3672             }
3673             break;
3674 
3675         case INDEX_op_br:
3676         case INDEX_op_exit_tb:
3677         case INDEX_op_goto_ptr:
3678             /* Unconditional branches; everything following is dead.  */
3679             dead = true;
3680             break;
3681 
3682         case INDEX_op_call:
3683             /* Notice noreturn helper calls, raising exceptions.  */
3684             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3685                 dead = true;
3686             }
3687             break;
3688 
3689         case INDEX_op_insn_start:
3690             /* Never remove -- we need to keep these for unwind.  */
3691             remove = false;
3692             break;
3693 
3694         default:
3695             break;
3696         }
3697 
3698         if (remove) {
3699             tcg_op_remove(s, op);
3700         }
3701     }
3702 }
3703 
3704 #define TS_DEAD  1
3705 #define TS_MEM   2
3706 
3707 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3708 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3709 
3710 /* For liveness_pass_1, the register preferences for a given temp.  */
3711 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3712 {
3713     return ts->state_ptr;
3714 }
3715 
3716 /* For liveness_pass_1, reset the preferences for a given temp to the
3717  * maximal regset for its type.
3718  */
3719 static inline void la_reset_pref(TCGTemp *ts)
3720 {
3721     *la_temp_pref(ts)
3722         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3723 }
3724 
3725 /* liveness analysis: end of function: all temps are dead, and globals
3726    should be in memory. */
3727 static void la_func_end(TCGContext *s, int ng, int nt)
3728 {
3729     int i;
3730 
3731     for (i = 0; i < ng; ++i) {
3732         s->temps[i].state = TS_DEAD | TS_MEM;
3733         la_reset_pref(&s->temps[i]);
3734     }
3735     for (i = ng; i < nt; ++i) {
3736         s->temps[i].state = TS_DEAD;
3737         la_reset_pref(&s->temps[i]);
3738     }
3739 }
3740 
3741 /* liveness analysis: end of basic block: all temps are dead, globals
3742    and local temps should be in memory. */
3743 static void la_bb_end(TCGContext *s, int ng, int nt)
3744 {
3745     int i;
3746 
3747     for (i = 0; i < nt; ++i) {
3748         TCGTemp *ts = &s->temps[i];
3749         int state;
3750 
3751         switch (ts->kind) {
3752         case TEMP_FIXED:
3753         case TEMP_GLOBAL:
3754         case TEMP_TB:
3755             state = TS_DEAD | TS_MEM;
3756             break;
3757         case TEMP_EBB:
3758         case TEMP_CONST:
3759             state = TS_DEAD;
3760             break;
3761         default:
3762             g_assert_not_reached();
3763         }
3764         ts->state = state;
3765         la_reset_pref(ts);
3766     }
3767 }
3768 
3769 /* liveness analysis: sync globals back to memory.  */
3770 static void la_global_sync(TCGContext *s, int ng)
3771 {
3772     int i;
3773 
3774     for (i = 0; i < ng; ++i) {
3775         int state = s->temps[i].state;
3776         s->temps[i].state = state | TS_MEM;
3777         if (state == TS_DEAD) {
3778             /* If the global was previously dead, reset prefs.  */
3779             la_reset_pref(&s->temps[i]);
3780         }
3781     }
3782 }
3783 
3784 /*
3785  * liveness analysis: conditional branch: all temps are dead unless
3786  * explicitly live-across-conditional-branch, globals and local temps
3787  * should be synced.
3788  */
3789 static void la_bb_sync(TCGContext *s, int ng, int nt)
3790 {
3791     la_global_sync(s, ng);
3792 
3793     for (int i = ng; i < nt; ++i) {
3794         TCGTemp *ts = &s->temps[i];
3795         int state;
3796 
3797         switch (ts->kind) {
3798         case TEMP_TB:
3799             state = ts->state;
3800             ts->state = state | TS_MEM;
3801             if (state != TS_DEAD) {
3802                 continue;
3803             }
3804             break;
3805         case TEMP_EBB:
3806         case TEMP_CONST:
3807             continue;
3808         default:
3809             g_assert_not_reached();
3810         }
3811         la_reset_pref(&s->temps[i]);
3812     }
3813 }
3814 
3815 /* liveness analysis: sync globals back to memory and kill.  */
3816 static void la_global_kill(TCGContext *s, int ng)
3817 {
3818     int i;
3819 
3820     for (i = 0; i < ng; i++) {
3821         s->temps[i].state = TS_DEAD | TS_MEM;
3822         la_reset_pref(&s->temps[i]);
3823     }
3824 }
3825 
3826 /* liveness analysis: note live globals crossing calls.  */
3827 static void la_cross_call(TCGContext *s, int nt)
3828 {
3829     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3830     int i;
3831 
3832     for (i = 0; i < nt; i++) {
3833         TCGTemp *ts = &s->temps[i];
3834         if (!(ts->state & TS_DEAD)) {
3835             TCGRegSet *pset = la_temp_pref(ts);
3836             TCGRegSet set = *pset;
3837 
3838             set &= mask;
3839             /* If the combination is not possible, restart.  */
3840             if (set == 0) {
3841                 set = tcg_target_available_regs[ts->type] & mask;
3842             }
3843             *pset = set;
3844         }
3845     }
3846 }
3847 
3848 /*
3849  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3850  * to TEMP_EBB, if possible.
3851  */
3852 static void __attribute__((noinline))
3853 liveness_pass_0(TCGContext *s)
3854 {
3855     void * const multiple_ebb = (void *)(uintptr_t)-1;
3856     int nb_temps = s->nb_temps;
3857     TCGOp *op, *ebb;
3858 
3859     for (int i = s->nb_globals; i < nb_temps; ++i) {
3860         s->temps[i].state_ptr = NULL;
3861     }
3862 
3863     /*
3864      * Represent each EBB by the op at which it begins.  In the case of
3865      * the first EBB, this is the first op, otherwise it is a label.
3866      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3867      * within a single EBB, else MULTIPLE_EBB.
3868      */
3869     ebb = QTAILQ_FIRST(&s->ops);
3870     QTAILQ_FOREACH(op, &s->ops, link) {
3871         const TCGOpDef *def;
3872         int nb_oargs, nb_iargs;
3873 
3874         switch (op->opc) {
3875         case INDEX_op_set_label:
3876             ebb = op;
3877             continue;
3878         case INDEX_op_discard:
3879             continue;
3880         case INDEX_op_call:
3881             nb_oargs = TCGOP_CALLO(op);
3882             nb_iargs = TCGOP_CALLI(op);
3883             break;
3884         default:
3885             def = &tcg_op_defs[op->opc];
3886             nb_oargs = def->nb_oargs;
3887             nb_iargs = def->nb_iargs;
3888             break;
3889         }
3890 
3891         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3892             TCGTemp *ts = arg_temp(op->args[i]);
3893 
3894             if (ts->kind != TEMP_TB) {
3895                 continue;
3896             }
3897             if (ts->state_ptr == NULL) {
3898                 ts->state_ptr = ebb;
3899             } else if (ts->state_ptr != ebb) {
3900                 ts->state_ptr = multiple_ebb;
3901             }
3902         }
3903     }
3904 
3905     /*
3906      * For TEMP_TB that turned out not to be used beyond one EBB,
3907      * reduce the liveness to TEMP_EBB.
3908      */
3909     for (int i = s->nb_globals; i < nb_temps; ++i) {
3910         TCGTemp *ts = &s->temps[i];
3911         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3912             ts->kind = TEMP_EBB;
3913         }
3914     }
3915 }
3916 
3917 static void assert_carry_dead(TCGContext *s)
3918 {
3919     /*
3920      * Carry operations can be separated by a few insns like mov,
3921      * load or store, but they should always be "close", and
3922      * carry-out operations should always be paired with carry-in.
3923      * At various boundaries, carry must have been consumed.
3924      */
3925     tcg_debug_assert(!s->carry_live);
3926 }
3927 
3928 /* Liveness analysis : update the opc_arg_life array to tell if a
3929    given input arguments is dead. Instructions updating dead
3930    temporaries are removed. */
3931 static void __attribute__((noinline))
3932 liveness_pass_1(TCGContext *s)
3933 {
3934     int nb_globals = s->nb_globals;
3935     int nb_temps = s->nb_temps;
3936     TCGOp *op, *op_prev;
3937     TCGRegSet *prefs;
3938 
3939     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3940     for (int i = 0; i < nb_temps; ++i) {
3941         s->temps[i].state_ptr = prefs + i;
3942     }
3943 
3944     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3945     la_func_end(s, nb_globals, nb_temps);
3946 
3947     s->carry_live = false;
3948     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3949         int nb_iargs, nb_oargs;
3950         TCGOpcode opc_new, opc_new2;
3951         TCGLifeData arg_life = 0;
3952         TCGTemp *ts;
3953         TCGOpcode opc = op->opc;
3954         const TCGOpDef *def;
3955         const TCGArgConstraint *args_ct;
3956 
3957         switch (opc) {
3958         case INDEX_op_call:
3959             assert_carry_dead(s);
3960             {
3961                 const TCGHelperInfo *info = tcg_call_info(op);
3962                 int call_flags = tcg_call_flags(op);
3963 
3964                 nb_oargs = TCGOP_CALLO(op);
3965                 nb_iargs = TCGOP_CALLI(op);
3966 
3967                 /* pure functions can be removed if their result is unused */
3968                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3969                     for (int i = 0; i < nb_oargs; i++) {
3970                         ts = arg_temp(op->args[i]);
3971                         if (ts->state != TS_DEAD) {
3972                             goto do_not_remove_call;
3973                         }
3974                     }
3975                     goto do_remove;
3976                 }
3977             do_not_remove_call:
3978 
3979                 /* Output args are dead.  */
3980                 for (int i = 0; i < nb_oargs; i++) {
3981                     ts = arg_temp(op->args[i]);
3982                     if (ts->state & TS_DEAD) {
3983                         arg_life |= DEAD_ARG << i;
3984                     }
3985                     if (ts->state & TS_MEM) {
3986                         arg_life |= SYNC_ARG << i;
3987                     }
3988                     ts->state = TS_DEAD;
3989                     la_reset_pref(ts);
3990                 }
3991 
3992                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3993                 memset(op->output_pref, 0, sizeof(op->output_pref));
3994 
3995                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3996                                     TCG_CALL_NO_READ_GLOBALS))) {
3997                     la_global_kill(s, nb_globals);
3998                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3999                     la_global_sync(s, nb_globals);
4000                 }
4001 
4002                 /* Record arguments that die in this helper.  */
4003                 for (int i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4004                     ts = arg_temp(op->args[i]);
4005                     if (ts->state & TS_DEAD) {
4006                         arg_life |= DEAD_ARG << i;
4007                     }
4008                 }
4009 
4010                 /* For all live registers, remove call-clobbered prefs.  */
4011                 la_cross_call(s, nb_temps);
4012 
4013                 /*
4014                  * Input arguments are live for preceding opcodes.
4015                  *
4016                  * For those arguments that die, and will be allocated in
4017                  * registers, clear the register set for that arg, to be
4018                  * filled in below.  For args that will be on the stack,
4019                  * reset to any available reg.  Process arguments in reverse
4020                  * order so that if a temp is used more than once, the stack
4021                  * reset to max happens before the register reset to 0.
4022                  */
4023                 for (int i = nb_iargs - 1; i >= 0; i--) {
4024                     const TCGCallArgumentLoc *loc = &info->in[i];
4025                     ts = arg_temp(op->args[nb_oargs + i]);
4026 
4027                     if (ts->state & TS_DEAD) {
4028                         switch (loc->kind) {
4029                         case TCG_CALL_ARG_NORMAL:
4030                         case TCG_CALL_ARG_EXTEND_U:
4031                         case TCG_CALL_ARG_EXTEND_S:
4032                             if (arg_slot_reg_p(loc->arg_slot)) {
4033                                 *la_temp_pref(ts) = 0;
4034                                 break;
4035                             }
4036                             /* fall through */
4037                         default:
4038                             *la_temp_pref(ts) =
4039                                 tcg_target_available_regs[ts->type];
4040                             break;
4041                         }
4042                         ts->state &= ~TS_DEAD;
4043                     }
4044                 }
4045 
4046                 /*
4047                  * For each input argument, add its input register to prefs.
4048                  * If a temp is used once, this produces a single set bit;
4049                  * if a temp is used multiple times, this produces a set.
4050                  */
4051                 for (int i = 0; i < nb_iargs; i++) {
4052                     const TCGCallArgumentLoc *loc = &info->in[i];
4053                     ts = arg_temp(op->args[nb_oargs + i]);
4054 
4055                     switch (loc->kind) {
4056                     case TCG_CALL_ARG_NORMAL:
4057                     case TCG_CALL_ARG_EXTEND_U:
4058                     case TCG_CALL_ARG_EXTEND_S:
4059                         if (arg_slot_reg_p(loc->arg_slot)) {
4060                             tcg_regset_set_reg(*la_temp_pref(ts),
4061                                 tcg_target_call_iarg_regs[loc->arg_slot]);
4062                         }
4063                         break;
4064                     default:
4065                         break;
4066                     }
4067                 }
4068             }
4069             break;
4070         case INDEX_op_insn_start:
4071             assert_carry_dead(s);
4072             break;
4073         case INDEX_op_discard:
4074             /* mark the temporary as dead */
4075             ts = arg_temp(op->args[0]);
4076             ts->state = TS_DEAD;
4077             la_reset_pref(ts);
4078             break;
4079 
4080         case INDEX_op_add2_i32:
4081         case INDEX_op_add2_i64:
4082             opc_new = INDEX_op_add;
4083             goto do_addsub2;
4084         case INDEX_op_sub2_i32:
4085         case INDEX_op_sub2_i64:
4086             opc_new = INDEX_op_sub;
4087         do_addsub2:
4088             assert_carry_dead(s);
4089             /* Test if the high part of the operation is dead, but not
4090                the low part.  The result can be optimized to a simple
4091                add or sub.  This happens often for x86_64 guest when the
4092                cpu mode is set to 32 bit.  */
4093             if (arg_temp(op->args[1])->state == TS_DEAD) {
4094                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4095                     goto do_remove;
4096                 }
4097                 /* Replace the opcode and adjust the args in place,
4098                    leaving 3 unused args at the end.  */
4099                 op->opc = opc = opc_new;
4100                 op->args[1] = op->args[2];
4101                 op->args[2] = op->args[4];
4102                 /* Fall through and mark the single-word operation live.  */
4103             }
4104             goto do_not_remove;
4105 
4106         case INDEX_op_muls2:
4107             opc_new = INDEX_op_mul;
4108             opc_new2 = INDEX_op_mulsh;
4109             goto do_mul2;
4110         case INDEX_op_mulu2:
4111             opc_new = INDEX_op_mul;
4112             opc_new2 = INDEX_op_muluh;
4113         do_mul2:
4114             assert_carry_dead(s);
4115             if (arg_temp(op->args[1])->state == TS_DEAD) {
4116                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4117                     /* Both parts of the operation are dead.  */
4118                     goto do_remove;
4119                 }
4120                 /* The high part of the operation is dead; generate the low. */
4121                 op->opc = opc = opc_new;
4122                 op->args[1] = op->args[2];
4123                 op->args[2] = op->args[3];
4124             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4125                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4126                 /* The low part of the operation is dead; generate the high. */
4127                 op->opc = opc = opc_new2;
4128                 op->args[0] = op->args[1];
4129                 op->args[1] = op->args[2];
4130                 op->args[2] = op->args[3];
4131             } else {
4132                 goto do_not_remove;
4133             }
4134             /* Mark the single-word operation live.  */
4135             goto do_not_remove;
4136 
4137         case INDEX_op_addco:
4138             if (s->carry_live) {
4139                 goto do_not_remove;
4140             }
4141             op->opc = opc = INDEX_op_add;
4142             goto do_default;
4143 
4144         case INDEX_op_addcio:
4145             if (s->carry_live) {
4146                 goto do_not_remove;
4147             }
4148             op->opc = opc = INDEX_op_addci;
4149             goto do_default;
4150 
4151         case INDEX_op_subbo:
4152             if (s->carry_live) {
4153                 goto do_not_remove;
4154             }
4155             /* Lower to sub, but this may also require canonicalization. */
4156             op->opc = opc = INDEX_op_sub;
4157             ts = arg_temp(op->args[2]);
4158             if (ts->kind == TEMP_CONST) {
4159                 ts = tcg_constant_internal(ts->type, -ts->val);
4160                 if (ts->state_ptr == NULL) {
4161                     tcg_debug_assert(temp_idx(ts) == nb_temps);
4162                     nb_temps++;
4163                     ts->state_ptr = tcg_malloc(sizeof(TCGRegSet));
4164                     ts->state = TS_DEAD;
4165                     la_reset_pref(ts);
4166                 }
4167                 op->args[2] = temp_arg(ts);
4168                 op->opc = opc = INDEX_op_add;
4169             }
4170             goto do_default;
4171 
4172         case INDEX_op_subbio:
4173             if (s->carry_live) {
4174                 goto do_not_remove;
4175             }
4176             op->opc = opc = INDEX_op_subbi;
4177             goto do_default;
4178 
4179         case INDEX_op_addc1o:
4180             if (s->carry_live) {
4181                 goto do_not_remove;
4182             }
4183             /* Lower to add, add +1. */
4184             op_prev = tcg_op_insert_before(s, op, INDEX_op_add,
4185                                            TCGOP_TYPE(op), 3);
4186             op_prev->args[0] = op->args[0];
4187             op_prev->args[1] = op->args[1];
4188             op_prev->args[2] = op->args[2];
4189             op->opc = opc = INDEX_op_add;
4190             op->args[1] = op->args[0];
4191             ts = arg_temp(op->args[0]);
4192             ts = tcg_constant_internal(ts->type, 1);
4193             op->args[2] = temp_arg(ts);
4194             goto do_default;
4195 
4196         case INDEX_op_subb1o:
4197             if (s->carry_live) {
4198                 goto do_not_remove;
4199             }
4200             /* Lower to sub, add -1. */
4201             op_prev = tcg_op_insert_before(s, op, INDEX_op_sub,
4202                                            TCGOP_TYPE(op), 3);
4203             op_prev->args[0] = op->args[0];
4204             op_prev->args[1] = op->args[1];
4205             op_prev->args[2] = op->args[2];
4206             op->opc = opc = INDEX_op_add;
4207             op->args[1] = op->args[0];
4208             ts = arg_temp(op->args[0]);
4209             ts = tcg_constant_internal(ts->type, -1);
4210             op->args[2] = temp_arg(ts);
4211             goto do_default;
4212 
4213         default:
4214         do_default:
4215             /*
4216              * Test if the operation can be removed because all
4217              * its outputs are dead. We assume that nb_oargs == 0
4218              * implies side effects.
4219              */
4220             def = &tcg_op_defs[opc];
4221             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) {
4222                 for (int i = def->nb_oargs - 1; i >= 0; i--) {
4223                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4224                         goto do_not_remove;
4225                     }
4226                 }
4227                 goto do_remove;
4228             }
4229             goto do_not_remove;
4230 
4231         do_remove:
4232             tcg_op_remove(s, op);
4233             break;
4234 
4235         do_not_remove:
4236             def = &tcg_op_defs[opc];
4237             nb_iargs = def->nb_iargs;
4238             nb_oargs = def->nb_oargs;
4239 
4240             for (int i = 0; i < nb_oargs; i++) {
4241                 ts = arg_temp(op->args[i]);
4242 
4243                 /* Remember the preference of the uses that followed.  */
4244                 if (i < ARRAY_SIZE(op->output_pref)) {
4245                     op->output_pref[i] = *la_temp_pref(ts);
4246                 }
4247 
4248                 /* Output args are dead.  */
4249                 if (ts->state & TS_DEAD) {
4250                     arg_life |= DEAD_ARG << i;
4251                 }
4252                 if (ts->state & TS_MEM) {
4253                     arg_life |= SYNC_ARG << i;
4254                 }
4255                 ts->state = TS_DEAD;
4256                 la_reset_pref(ts);
4257             }
4258 
4259             /* If end of basic block, update.  */
4260             if (def->flags & TCG_OPF_BB_EXIT) {
4261                 assert_carry_dead(s);
4262                 la_func_end(s, nb_globals, nb_temps);
4263             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4264                 assert_carry_dead(s);
4265                 la_bb_sync(s, nb_globals, nb_temps);
4266             } else if (def->flags & TCG_OPF_BB_END) {
4267                 assert_carry_dead(s);
4268                 la_bb_end(s, nb_globals, nb_temps);
4269             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4270                 assert_carry_dead(s);
4271                 la_global_sync(s, nb_globals);
4272                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4273                     la_cross_call(s, nb_temps);
4274                 }
4275             }
4276 
4277             /* Record arguments that die in this opcode.  */
4278             for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4279                 ts = arg_temp(op->args[i]);
4280                 if (ts->state & TS_DEAD) {
4281                     arg_life |= DEAD_ARG << i;
4282                 }
4283             }
4284             if (def->flags & TCG_OPF_CARRY_OUT) {
4285                 s->carry_live = false;
4286             }
4287 
4288             /* Input arguments are live for preceding opcodes.  */
4289             for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4290                 ts = arg_temp(op->args[i]);
4291                 if (ts->state & TS_DEAD) {
4292                     /* For operands that were dead, initially allow
4293                        all regs for the type.  */
4294                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4295                     ts->state &= ~TS_DEAD;
4296                 }
4297             }
4298             if (def->flags & TCG_OPF_CARRY_IN) {
4299                 s->carry_live = true;
4300             }
4301 
4302             /* Incorporate constraints for this operand.  */
4303             switch (opc) {
4304             case INDEX_op_mov:
4305                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4306                    have proper constraints.  That said, special case
4307                    moves to propagate preferences backward.  */
4308                 if (IS_DEAD_ARG(1)) {
4309                     *la_temp_pref(arg_temp(op->args[0]))
4310                         = *la_temp_pref(arg_temp(op->args[1]));
4311                 }
4312                 break;
4313 
4314             default:
4315                 args_ct = opcode_args_ct(op);
4316                 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4317                     const TCGArgConstraint *ct = &args_ct[i];
4318                     TCGRegSet set, *pset;
4319 
4320                     ts = arg_temp(op->args[i]);
4321                     pset = la_temp_pref(ts);
4322                     set = *pset;
4323 
4324                     set &= ct->regs;
4325                     if (ct->ialias) {
4326                         set &= output_pref(op, ct->alias_index);
4327                     }
4328                     /* If the combination is not possible, restart.  */
4329                     if (set == 0) {
4330                         set = ct->regs;
4331                     }
4332                     *pset = set;
4333                 }
4334                 break;
4335             }
4336             break;
4337         }
4338         op->life = arg_life;
4339     }
4340     assert_carry_dead(s);
4341 }
4342 
4343 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4344 static bool __attribute__((noinline))
4345 liveness_pass_2(TCGContext *s)
4346 {
4347     int nb_globals = s->nb_globals;
4348     int nb_temps, i;
4349     bool changes = false;
4350     TCGOp *op, *op_next;
4351 
4352     /* Create a temporary for each indirect global.  */
4353     for (i = 0; i < nb_globals; ++i) {
4354         TCGTemp *its = &s->temps[i];
4355         if (its->indirect_reg) {
4356             TCGTemp *dts = tcg_temp_alloc(s);
4357             dts->type = its->type;
4358             dts->base_type = its->base_type;
4359             dts->temp_subindex = its->temp_subindex;
4360             dts->kind = TEMP_EBB;
4361             its->state_ptr = dts;
4362         } else {
4363             its->state_ptr = NULL;
4364         }
4365         /* All globals begin dead.  */
4366         its->state = TS_DEAD;
4367     }
4368     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4369         TCGTemp *its = &s->temps[i];
4370         its->state_ptr = NULL;
4371         its->state = TS_DEAD;
4372     }
4373 
4374     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4375         TCGOpcode opc = op->opc;
4376         const TCGOpDef *def = &tcg_op_defs[opc];
4377         TCGLifeData arg_life = op->life;
4378         int nb_iargs, nb_oargs, call_flags;
4379         TCGTemp *arg_ts, *dir_ts;
4380 
4381         if (opc == INDEX_op_call) {
4382             nb_oargs = TCGOP_CALLO(op);
4383             nb_iargs = TCGOP_CALLI(op);
4384             call_flags = tcg_call_flags(op);
4385         } else {
4386             nb_iargs = def->nb_iargs;
4387             nb_oargs = def->nb_oargs;
4388 
4389             /* Set flags similar to how calls require.  */
4390             if (def->flags & TCG_OPF_COND_BRANCH) {
4391                 /* Like reading globals: sync_globals */
4392                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4393             } else if (def->flags & TCG_OPF_BB_END) {
4394                 /* Like writing globals: save_globals */
4395                 call_flags = 0;
4396             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4397                 /* Like reading globals: sync_globals */
4398                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4399             } else {
4400                 /* No effect on globals.  */
4401                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4402                               TCG_CALL_NO_WRITE_GLOBALS);
4403             }
4404         }
4405 
4406         /* Make sure that input arguments are available.  */
4407         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4408             arg_ts = arg_temp(op->args[i]);
4409             dir_ts = arg_ts->state_ptr;
4410             if (dir_ts && arg_ts->state == TS_DEAD) {
4411                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4412                                   ? INDEX_op_ld_i32
4413                                   : INDEX_op_ld_i64);
4414                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4415                                                   arg_ts->type, 3);
4416 
4417                 lop->args[0] = temp_arg(dir_ts);
4418                 lop->args[1] = temp_arg(arg_ts->mem_base);
4419                 lop->args[2] = arg_ts->mem_offset;
4420 
4421                 /* Loaded, but synced with memory.  */
4422                 arg_ts->state = TS_MEM;
4423             }
4424         }
4425 
4426         /* Perform input replacement, and mark inputs that became dead.
4427            No action is required except keeping temp_state up to date
4428            so that we reload when needed.  */
4429         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4430             arg_ts = arg_temp(op->args[i]);
4431             dir_ts = arg_ts->state_ptr;
4432             if (dir_ts) {
4433                 op->args[i] = temp_arg(dir_ts);
4434                 changes = true;
4435                 if (IS_DEAD_ARG(i)) {
4436                     arg_ts->state = TS_DEAD;
4437                 }
4438             }
4439         }
4440 
4441         /* Liveness analysis should ensure that the following are
4442            all correct, for call sites and basic block end points.  */
4443         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4444             /* Nothing to do */
4445         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4446             for (i = 0; i < nb_globals; ++i) {
4447                 /* Liveness should see that globals are synced back,
4448                    that is, either TS_DEAD or TS_MEM.  */
4449                 arg_ts = &s->temps[i];
4450                 tcg_debug_assert(arg_ts->state_ptr == 0
4451                                  || arg_ts->state != 0);
4452             }
4453         } else {
4454             for (i = 0; i < nb_globals; ++i) {
4455                 /* Liveness should see that globals are saved back,
4456                    that is, TS_DEAD, waiting to be reloaded.  */
4457                 arg_ts = &s->temps[i];
4458                 tcg_debug_assert(arg_ts->state_ptr == 0
4459                                  || arg_ts->state == TS_DEAD);
4460             }
4461         }
4462 
4463         /* Outputs become available.  */
4464         if (opc == INDEX_op_mov) {
4465             arg_ts = arg_temp(op->args[0]);
4466             dir_ts = arg_ts->state_ptr;
4467             if (dir_ts) {
4468                 op->args[0] = temp_arg(dir_ts);
4469                 changes = true;
4470 
4471                 /* The output is now live and modified.  */
4472                 arg_ts->state = 0;
4473 
4474                 if (NEED_SYNC_ARG(0)) {
4475                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4476                                       ? INDEX_op_st_i32
4477                                       : INDEX_op_st_i64);
4478                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4479                                                      arg_ts->type, 3);
4480                     TCGTemp *out_ts = dir_ts;
4481 
4482                     if (IS_DEAD_ARG(0)) {
4483                         out_ts = arg_temp(op->args[1]);
4484                         arg_ts->state = TS_DEAD;
4485                         tcg_op_remove(s, op);
4486                     } else {
4487                         arg_ts->state = TS_MEM;
4488                     }
4489 
4490                     sop->args[0] = temp_arg(out_ts);
4491                     sop->args[1] = temp_arg(arg_ts->mem_base);
4492                     sop->args[2] = arg_ts->mem_offset;
4493                 } else {
4494                     tcg_debug_assert(!IS_DEAD_ARG(0));
4495                 }
4496             }
4497         } else {
4498             for (i = 0; i < nb_oargs; i++) {
4499                 arg_ts = arg_temp(op->args[i]);
4500                 dir_ts = arg_ts->state_ptr;
4501                 if (!dir_ts) {
4502                     continue;
4503                 }
4504                 op->args[i] = temp_arg(dir_ts);
4505                 changes = true;
4506 
4507                 /* The output is now live and modified.  */
4508                 arg_ts->state = 0;
4509 
4510                 /* Sync outputs upon their last write.  */
4511                 if (NEED_SYNC_ARG(i)) {
4512                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4513                                       ? INDEX_op_st_i32
4514                                       : INDEX_op_st_i64);
4515                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4516                                                      arg_ts->type, 3);
4517 
4518                     sop->args[0] = temp_arg(dir_ts);
4519                     sop->args[1] = temp_arg(arg_ts->mem_base);
4520                     sop->args[2] = arg_ts->mem_offset;
4521 
4522                     arg_ts->state = TS_MEM;
4523                 }
4524                 /* Drop outputs that are dead.  */
4525                 if (IS_DEAD_ARG(i)) {
4526                     arg_ts->state = TS_DEAD;
4527                 }
4528             }
4529         }
4530     }
4531 
4532     return changes;
4533 }
4534 
4535 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4536 {
4537     intptr_t off;
4538     int size, align;
4539 
4540     /* When allocating an object, look at the full type. */
4541     size = tcg_type_size(ts->base_type);
4542     switch (ts->base_type) {
4543     case TCG_TYPE_I32:
4544         align = 4;
4545         break;
4546     case TCG_TYPE_I64:
4547     case TCG_TYPE_V64:
4548         align = 8;
4549         break;
4550     case TCG_TYPE_I128:
4551     case TCG_TYPE_V128:
4552     case TCG_TYPE_V256:
4553         /*
4554          * Note that we do not require aligned storage for V256,
4555          * and that we provide alignment for I128 to match V128,
4556          * even if that's above what the host ABI requires.
4557          */
4558         align = 16;
4559         break;
4560     default:
4561         g_assert_not_reached();
4562     }
4563 
4564     /*
4565      * Assume the stack is sufficiently aligned.
4566      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4567      * and do not require 16 byte vector alignment.  This seems slightly
4568      * easier than fully parameterizing the above switch statement.
4569      */
4570     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4571     off = ROUND_UP(s->current_frame_offset, align);
4572 
4573     /* If we've exhausted the stack frame, restart with a smaller TB. */
4574     if (off + size > s->frame_end) {
4575         tcg_raise_tb_overflow(s);
4576     }
4577     s->current_frame_offset = off + size;
4578 #if defined(__sparc__)
4579     off += TCG_TARGET_STACK_BIAS;
4580 #endif
4581 
4582     /* If the object was subdivided, assign memory to all the parts. */
4583     if (ts->base_type != ts->type) {
4584         int part_size = tcg_type_size(ts->type);
4585         int part_count = size / part_size;
4586 
4587         /*
4588          * Each part is allocated sequentially in tcg_temp_new_internal.
4589          * Jump back to the first part by subtracting the current index.
4590          */
4591         ts -= ts->temp_subindex;
4592         for (int i = 0; i < part_count; ++i) {
4593             ts[i].mem_offset = off + i * part_size;
4594             ts[i].mem_base = s->frame_temp;
4595             ts[i].mem_allocated = 1;
4596         }
4597     } else {
4598         ts->mem_offset = off;
4599         ts->mem_base = s->frame_temp;
4600         ts->mem_allocated = 1;
4601     }
4602 }
4603 
4604 /* Assign @reg to @ts, and update reg_to_temp[]. */
4605 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4606 {
4607     if (ts->val_type == TEMP_VAL_REG) {
4608         TCGReg old = ts->reg;
4609         tcg_debug_assert(s->reg_to_temp[old] == ts);
4610         if (old == reg) {
4611             return;
4612         }
4613         s->reg_to_temp[old] = NULL;
4614     }
4615     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4616     s->reg_to_temp[reg] = ts;
4617     ts->val_type = TEMP_VAL_REG;
4618     ts->reg = reg;
4619 }
4620 
4621 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4622 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4623 {
4624     tcg_debug_assert(type != TEMP_VAL_REG);
4625     if (ts->val_type == TEMP_VAL_REG) {
4626         TCGReg reg = ts->reg;
4627         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4628         s->reg_to_temp[reg] = NULL;
4629     }
4630     ts->val_type = type;
4631 }
4632 
4633 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4634 
4635 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4636    mark it free; otherwise mark it dead.  */
4637 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4638 {
4639     TCGTempVal new_type;
4640 
4641     switch (ts->kind) {
4642     case TEMP_FIXED:
4643         return;
4644     case TEMP_GLOBAL:
4645     case TEMP_TB:
4646         new_type = TEMP_VAL_MEM;
4647         break;
4648     case TEMP_EBB:
4649         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4650         break;
4651     case TEMP_CONST:
4652         new_type = TEMP_VAL_CONST;
4653         break;
4654     default:
4655         g_assert_not_reached();
4656     }
4657     set_temp_val_nonreg(s, ts, new_type);
4658 }
4659 
4660 /* Mark a temporary as dead.  */
4661 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4662 {
4663     temp_free_or_dead(s, ts, 1);
4664 }
4665 
4666 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4667    registers needs to be allocated to store a constant.  If 'free_or_dead'
4668    is non-zero, subsequently release the temporary; if it is positive, the
4669    temp is dead; if it is negative, the temp is free.  */
4670 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4671                       TCGRegSet preferred_regs, int free_or_dead)
4672 {
4673     if (!temp_readonly(ts) && !ts->mem_coherent) {
4674         if (!ts->mem_allocated) {
4675             temp_allocate_frame(s, ts);
4676         }
4677         switch (ts->val_type) {
4678         case TEMP_VAL_CONST:
4679             /* If we're going to free the temp immediately, then we won't
4680                require it later in a register, so attempt to store the
4681                constant to memory directly.  */
4682             if (free_or_dead
4683                 && tcg_out_sti(s, ts->type, ts->val,
4684                                ts->mem_base->reg, ts->mem_offset)) {
4685                 break;
4686             }
4687             temp_load(s, ts, tcg_target_available_regs[ts->type],
4688                       allocated_regs, preferred_regs);
4689             /* fallthrough */
4690 
4691         case TEMP_VAL_REG:
4692             tcg_out_st(s, ts->type, ts->reg,
4693                        ts->mem_base->reg, ts->mem_offset);
4694             break;
4695 
4696         case TEMP_VAL_MEM:
4697             break;
4698 
4699         case TEMP_VAL_DEAD:
4700         default:
4701             g_assert_not_reached();
4702         }
4703         ts->mem_coherent = 1;
4704     }
4705     if (free_or_dead) {
4706         temp_free_or_dead(s, ts, free_or_dead);
4707     }
4708 }
4709 
4710 /* free register 'reg' by spilling the corresponding temporary if necessary */
4711 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4712 {
4713     TCGTemp *ts = s->reg_to_temp[reg];
4714     if (ts != NULL) {
4715         temp_sync(s, ts, allocated_regs, 0, -1);
4716     }
4717 }
4718 
4719 /**
4720  * tcg_reg_alloc:
4721  * @required_regs: Set of registers in which we must allocate.
4722  * @allocated_regs: Set of registers which must be avoided.
4723  * @preferred_regs: Set of registers we should prefer.
4724  * @rev: True if we search the registers in "indirect" order.
4725  *
4726  * The allocated register must be in @required_regs & ~@allocated_regs,
4727  * but if we can put it in @preferred_regs we may save a move later.
4728  */
4729 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4730                             TCGRegSet allocated_regs,
4731                             TCGRegSet preferred_regs, bool rev)
4732 {
4733     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4734     TCGRegSet reg_ct[2];
4735     const int *order;
4736 
4737     reg_ct[1] = required_regs & ~allocated_regs;
4738     tcg_debug_assert(reg_ct[1] != 0);
4739     reg_ct[0] = reg_ct[1] & preferred_regs;
4740 
4741     /* Skip the preferred_regs option if it cannot be satisfied,
4742        or if the preference made no difference.  */
4743     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4744 
4745     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4746 
4747     /* Try free registers, preferences first.  */
4748     for (j = f; j < 2; j++) {
4749         TCGRegSet set = reg_ct[j];
4750 
4751         if (tcg_regset_single(set)) {
4752             /* One register in the set.  */
4753             TCGReg reg = tcg_regset_first(set);
4754             if (s->reg_to_temp[reg] == NULL) {
4755                 return reg;
4756             }
4757         } else {
4758             for (i = 0; i < n; i++) {
4759                 TCGReg reg = order[i];
4760                 if (s->reg_to_temp[reg] == NULL &&
4761                     tcg_regset_test_reg(set, reg)) {
4762                     return reg;
4763                 }
4764             }
4765         }
4766     }
4767 
4768     /* We must spill something.  */
4769     for (j = f; j < 2; j++) {
4770         TCGRegSet set = reg_ct[j];
4771 
4772         if (tcg_regset_single(set)) {
4773             /* One register in the set.  */
4774             TCGReg reg = tcg_regset_first(set);
4775             tcg_reg_free(s, reg, allocated_regs);
4776             return reg;
4777         } else {
4778             for (i = 0; i < n; i++) {
4779                 TCGReg reg = order[i];
4780                 if (tcg_regset_test_reg(set, reg)) {
4781                     tcg_reg_free(s, reg, allocated_regs);
4782                     return reg;
4783                 }
4784             }
4785         }
4786     }
4787 
4788     g_assert_not_reached();
4789 }
4790 
4791 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4792                                  TCGRegSet allocated_regs,
4793                                  TCGRegSet preferred_regs, bool rev)
4794 {
4795     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4796     TCGRegSet reg_ct[2];
4797     const int *order;
4798 
4799     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4800     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4801     tcg_debug_assert(reg_ct[1] != 0);
4802     reg_ct[0] = reg_ct[1] & preferred_regs;
4803 
4804     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4805 
4806     /*
4807      * Skip the preferred_regs option if it cannot be satisfied,
4808      * or if the preference made no difference.
4809      */
4810     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4811 
4812     /*
4813      * Minimize the number of flushes by looking for 2 free registers first,
4814      * then a single flush, then two flushes.
4815      */
4816     for (fmin = 2; fmin >= 0; fmin--) {
4817         for (j = k; j < 2; j++) {
4818             TCGRegSet set = reg_ct[j];
4819 
4820             for (i = 0; i < n; i++) {
4821                 TCGReg reg = order[i];
4822 
4823                 if (tcg_regset_test_reg(set, reg)) {
4824                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4825                     if (f >= fmin) {
4826                         tcg_reg_free(s, reg, allocated_regs);
4827                         tcg_reg_free(s, reg + 1, allocated_regs);
4828                         return reg;
4829                     }
4830                 }
4831             }
4832         }
4833     }
4834     g_assert_not_reached();
4835 }
4836 
4837 /* Make sure the temporary is in a register.  If needed, allocate the register
4838    from DESIRED while avoiding ALLOCATED.  */
4839 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4840                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4841 {
4842     TCGReg reg;
4843 
4844     switch (ts->val_type) {
4845     case TEMP_VAL_REG:
4846         return;
4847     case TEMP_VAL_CONST:
4848         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4849                             preferred_regs, ts->indirect_base);
4850         if (ts->type <= TCG_TYPE_I64) {
4851             tcg_out_movi(s, ts->type, reg, ts->val);
4852         } else {
4853             uint64_t val = ts->val;
4854             MemOp vece = MO_64;
4855 
4856             /*
4857              * Find the minimal vector element that matches the constant.
4858              * The targets will, in general, have to do this search anyway,
4859              * do this generically.
4860              */
4861             if (val == dup_const(MO_8, val)) {
4862                 vece = MO_8;
4863             } else if (val == dup_const(MO_16, val)) {
4864                 vece = MO_16;
4865             } else if (val == dup_const(MO_32, val)) {
4866                 vece = MO_32;
4867             }
4868 
4869             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4870         }
4871         ts->mem_coherent = 0;
4872         break;
4873     case TEMP_VAL_MEM:
4874         if (!ts->mem_allocated) {
4875             temp_allocate_frame(s, ts);
4876         }
4877         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4878                             preferred_regs, ts->indirect_base);
4879         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4880         ts->mem_coherent = 1;
4881         break;
4882     case TEMP_VAL_DEAD:
4883     default:
4884         g_assert_not_reached();
4885     }
4886     set_temp_val_reg(s, ts, reg);
4887 }
4888 
4889 /* Save a temporary to memory. 'allocated_regs' is used in case a
4890    temporary registers needs to be allocated to store a constant.  */
4891 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4892 {
4893     /* The liveness analysis already ensures that globals are back
4894        in memory. Keep an tcg_debug_assert for safety. */
4895     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4896 }
4897 
4898 /* save globals to their canonical location and assume they can be
4899    modified be the following code. 'allocated_regs' is used in case a
4900    temporary registers needs to be allocated to store a constant. */
4901 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4902 {
4903     int i, n;
4904 
4905     for (i = 0, n = s->nb_globals; i < n; i++) {
4906         temp_save(s, &s->temps[i], allocated_regs);
4907     }
4908 }
4909 
4910 /* sync globals to their canonical location and assume they can be
4911    read by the following code. 'allocated_regs' is used in case a
4912    temporary registers needs to be allocated to store a constant. */
4913 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4914 {
4915     int i, n;
4916 
4917     for (i = 0, n = s->nb_globals; i < n; i++) {
4918         TCGTemp *ts = &s->temps[i];
4919         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4920                          || ts->kind == TEMP_FIXED
4921                          || ts->mem_coherent);
4922     }
4923 }
4924 
4925 /* at the end of a basic block, we assume all temporaries are dead and
4926    all globals are stored at their canonical location. */
4927 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4928 {
4929     assert_carry_dead(s);
4930     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4931         TCGTemp *ts = &s->temps[i];
4932 
4933         switch (ts->kind) {
4934         case TEMP_TB:
4935             temp_save(s, ts, allocated_regs);
4936             break;
4937         case TEMP_EBB:
4938             /* The liveness analysis already ensures that temps are dead.
4939                Keep an tcg_debug_assert for safety. */
4940             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4941             break;
4942         case TEMP_CONST:
4943             /* Similarly, we should have freed any allocated register. */
4944             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4945             break;
4946         default:
4947             g_assert_not_reached();
4948         }
4949     }
4950 
4951     save_globals(s, allocated_regs);
4952 }
4953 
4954 /*
4955  * At a conditional branch, we assume all temporaries are dead unless
4956  * explicitly live-across-conditional-branch; all globals and local
4957  * temps are synced to their location.
4958  */
4959 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4960 {
4961     assert_carry_dead(s);
4962     sync_globals(s, allocated_regs);
4963 
4964     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4965         TCGTemp *ts = &s->temps[i];
4966         /*
4967          * The liveness analysis already ensures that temps are dead.
4968          * Keep tcg_debug_asserts for safety.
4969          */
4970         switch (ts->kind) {
4971         case TEMP_TB:
4972             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4973             break;
4974         case TEMP_EBB:
4975         case TEMP_CONST:
4976             break;
4977         default:
4978             g_assert_not_reached();
4979         }
4980     }
4981 }
4982 
4983 /*
4984  * Specialized code generation for INDEX_op_mov_* with a constant.
4985  */
4986 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4987                                   tcg_target_ulong val, TCGLifeData arg_life,
4988                                   TCGRegSet preferred_regs)
4989 {
4990     /* ENV should not be modified.  */
4991     tcg_debug_assert(!temp_readonly(ots));
4992 
4993     /* The movi is not explicitly generated here.  */
4994     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4995     ots->val = val;
4996     ots->mem_coherent = 0;
4997     if (NEED_SYNC_ARG(0)) {
4998         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4999     } else if (IS_DEAD_ARG(0)) {
5000         temp_dead(s, ots);
5001     }
5002 }
5003 
5004 /*
5005  * Specialized code generation for INDEX_op_mov_*.
5006  */
5007 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
5008 {
5009     const TCGLifeData arg_life = op->life;
5010     TCGRegSet allocated_regs, preferred_regs;
5011     TCGTemp *ts, *ots;
5012     TCGType otype, itype;
5013     TCGReg oreg, ireg;
5014 
5015     allocated_regs = s->reserved_regs;
5016     preferred_regs = output_pref(op, 0);
5017     ots = arg_temp(op->args[0]);
5018     ts = arg_temp(op->args[1]);
5019 
5020     /* ENV should not be modified.  */
5021     tcg_debug_assert(!temp_readonly(ots));
5022 
5023     /* Note that otype != itype for no-op truncation.  */
5024     otype = ots->type;
5025     itype = ts->type;
5026 
5027     if (ts->val_type == TEMP_VAL_CONST) {
5028         /* propagate constant or generate sti */
5029         tcg_target_ulong val = ts->val;
5030         if (IS_DEAD_ARG(1)) {
5031             temp_dead(s, ts);
5032         }
5033         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
5034         return;
5035     }
5036 
5037     /* If the source value is in memory we're going to be forced
5038        to have it in a register in order to perform the copy.  Copy
5039        the SOURCE value into its own register first, that way we
5040        don't have to reload SOURCE the next time it is used. */
5041     if (ts->val_type == TEMP_VAL_MEM) {
5042         temp_load(s, ts, tcg_target_available_regs[itype],
5043                   allocated_regs, preferred_regs);
5044     }
5045     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
5046     ireg = ts->reg;
5047 
5048     if (IS_DEAD_ARG(0)) {
5049         /* mov to a non-saved dead register makes no sense (even with
5050            liveness analysis disabled). */
5051         tcg_debug_assert(NEED_SYNC_ARG(0));
5052         if (!ots->mem_allocated) {
5053             temp_allocate_frame(s, ots);
5054         }
5055         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
5056         if (IS_DEAD_ARG(1)) {
5057             temp_dead(s, ts);
5058         }
5059         temp_dead(s, ots);
5060         return;
5061     }
5062 
5063     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
5064         /*
5065          * The mov can be suppressed.  Kill input first, so that it
5066          * is unlinked from reg_to_temp, then set the output to the
5067          * reg that we saved from the input.
5068          */
5069         temp_dead(s, ts);
5070         oreg = ireg;
5071     } else {
5072         if (ots->val_type == TEMP_VAL_REG) {
5073             oreg = ots->reg;
5074         } else {
5075             /* Make sure to not spill the input register during allocation. */
5076             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
5077                                  allocated_regs | ((TCGRegSet)1 << ireg),
5078                                  preferred_regs, ots->indirect_base);
5079         }
5080         if (!tcg_out_mov(s, otype, oreg, ireg)) {
5081             /*
5082              * Cross register class move not supported.
5083              * Store the source register into the destination slot
5084              * and leave the destination temp as TEMP_VAL_MEM.
5085              */
5086             assert(!temp_readonly(ots));
5087             if (!ts->mem_allocated) {
5088                 temp_allocate_frame(s, ots);
5089             }
5090             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
5091             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
5092             ots->mem_coherent = 1;
5093             return;
5094         }
5095     }
5096     set_temp_val_reg(s, ots, oreg);
5097     ots->mem_coherent = 0;
5098 
5099     if (NEED_SYNC_ARG(0)) {
5100         temp_sync(s, ots, allocated_regs, 0, 0);
5101     }
5102 }
5103 
5104 /*
5105  * Specialized code generation for INDEX_op_dup_vec.
5106  */
5107 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
5108 {
5109     const TCGLifeData arg_life = op->life;
5110     TCGRegSet dup_out_regs, dup_in_regs;
5111     const TCGArgConstraint *dup_args_ct;
5112     TCGTemp *its, *ots;
5113     TCGType itype, vtype;
5114     unsigned vece;
5115     int lowpart_ofs;
5116     bool ok;
5117 
5118     ots = arg_temp(op->args[0]);
5119     its = arg_temp(op->args[1]);
5120 
5121     /* ENV should not be modified.  */
5122     tcg_debug_assert(!temp_readonly(ots));
5123 
5124     itype = its->type;
5125     vece = TCGOP_VECE(op);
5126     vtype = TCGOP_TYPE(op);
5127 
5128     if (its->val_type == TEMP_VAL_CONST) {
5129         /* Propagate constant via movi -> dupi.  */
5130         tcg_target_ulong val = its->val;
5131         if (IS_DEAD_ARG(1)) {
5132             temp_dead(s, its);
5133         }
5134         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
5135         return;
5136     }
5137 
5138     dup_args_ct = opcode_args_ct(op);
5139     dup_out_regs = dup_args_ct[0].regs;
5140     dup_in_regs = dup_args_ct[1].regs;
5141 
5142     /* Allocate the output register now.  */
5143     if (ots->val_type != TEMP_VAL_REG) {
5144         TCGRegSet allocated_regs = s->reserved_regs;
5145         TCGReg oreg;
5146 
5147         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
5148             /* Make sure to not spill the input register. */
5149             tcg_regset_set_reg(allocated_regs, its->reg);
5150         }
5151         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5152                              output_pref(op, 0), ots->indirect_base);
5153         set_temp_val_reg(s, ots, oreg);
5154     }
5155 
5156     switch (its->val_type) {
5157     case TEMP_VAL_REG:
5158         /*
5159          * The dup constriaints must be broad, covering all possible VECE.
5160          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5161          * to fail, indicating that extra moves are required for that case.
5162          */
5163         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5164             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5165                 goto done;
5166             }
5167             /* Try again from memory or a vector input register.  */
5168         }
5169         if (!its->mem_coherent) {
5170             /*
5171              * The input register is not synced, and so an extra store
5172              * would be required to use memory.  Attempt an integer-vector
5173              * register move first.  We do not have a TCGRegSet for this.
5174              */
5175             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5176                 break;
5177             }
5178             /* Sync the temp back to its slot and load from there.  */
5179             temp_sync(s, its, s->reserved_regs, 0, 0);
5180         }
5181         /* fall through */
5182 
5183     case TEMP_VAL_MEM:
5184         lowpart_ofs = 0;
5185         if (HOST_BIG_ENDIAN) {
5186             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5187         }
5188         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5189                              its->mem_offset + lowpart_ofs)) {
5190             goto done;
5191         }
5192         /* Load the input into the destination vector register. */
5193         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5194         break;
5195 
5196     default:
5197         g_assert_not_reached();
5198     }
5199 
5200     /* We now have a vector input register, so dup must succeed. */
5201     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5202     tcg_debug_assert(ok);
5203 
5204  done:
5205     ots->mem_coherent = 0;
5206     if (IS_DEAD_ARG(1)) {
5207         temp_dead(s, its);
5208     }
5209     if (NEED_SYNC_ARG(0)) {
5210         temp_sync(s, ots, s->reserved_regs, 0, 0);
5211     }
5212     if (IS_DEAD_ARG(0)) {
5213         temp_dead(s, ots);
5214     }
5215 }
5216 
5217 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5218 {
5219     const TCGLifeData arg_life = op->life;
5220     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5221     TCGRegSet i_allocated_regs;
5222     TCGRegSet o_allocated_regs;
5223     int i, k, nb_iargs, nb_oargs;
5224     TCGReg reg;
5225     TCGArg arg;
5226     const TCGArgConstraint *args_ct;
5227     const TCGArgConstraint *arg_ct;
5228     TCGTemp *ts;
5229     TCGArg new_args[TCG_MAX_OP_ARGS];
5230     int const_args[TCG_MAX_OP_ARGS];
5231     TCGCond op_cond;
5232 
5233     if (def->flags & TCG_OPF_CARRY_IN) {
5234         tcg_debug_assert(s->carry_live);
5235     }
5236 
5237     nb_oargs = def->nb_oargs;
5238     nb_iargs = def->nb_iargs;
5239 
5240     /* copy constants */
5241     memcpy(new_args + nb_oargs + nb_iargs,
5242            op->args + nb_oargs + nb_iargs,
5243            sizeof(TCGArg) * def->nb_cargs);
5244 
5245     i_allocated_regs = s->reserved_regs;
5246     o_allocated_regs = s->reserved_regs;
5247 
5248     switch (op->opc) {
5249     case INDEX_op_brcond:
5250         op_cond = op->args[2];
5251         break;
5252     case INDEX_op_setcond:
5253     case INDEX_op_negsetcond:
5254     case INDEX_op_cmp_vec:
5255         op_cond = op->args[3];
5256         break;
5257     case INDEX_op_brcond2_i32:
5258         op_cond = op->args[4];
5259         break;
5260     case INDEX_op_movcond:
5261     case INDEX_op_setcond2_i32:
5262     case INDEX_op_cmpsel_vec:
5263         op_cond = op->args[5];
5264         break;
5265     default:
5266         /* No condition within opcode. */
5267         op_cond = TCG_COND_ALWAYS;
5268         break;
5269     }
5270 
5271     args_ct = opcode_args_ct(op);
5272 
5273     /* satisfy input constraints */
5274     for (k = 0; k < nb_iargs; k++) {
5275         TCGRegSet i_preferred_regs, i_required_regs;
5276         bool allocate_new_reg, copyto_new_reg;
5277         TCGTemp *ts2;
5278         int i1, i2;
5279 
5280         i = args_ct[nb_oargs + k].sort_index;
5281         arg = op->args[i];
5282         arg_ct = &args_ct[i];
5283         ts = arg_temp(arg);
5284 
5285         if (ts->val_type == TEMP_VAL_CONST) {
5286 #ifdef TCG_REG_ZERO
5287             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5288                 /* Hardware zero register: indicate register via non-const. */
5289                 const_args[i] = 0;
5290                 new_args[i] = TCG_REG_ZERO;
5291                 continue;
5292             }
5293 #endif
5294 
5295             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5296                                        op_cond, TCGOP_VECE(op))) {
5297                 /* constant is OK for instruction */
5298                 const_args[i] = 1;
5299                 new_args[i] = ts->val;
5300                 continue;
5301             }
5302         }
5303 
5304         reg = ts->reg;
5305         i_preferred_regs = 0;
5306         i_required_regs = arg_ct->regs;
5307         allocate_new_reg = false;
5308         copyto_new_reg = false;
5309 
5310         switch (arg_ct->pair) {
5311         case 0: /* not paired */
5312             if (arg_ct->ialias) {
5313                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5314 
5315                 /*
5316                  * If the input is readonly, then it cannot also be an
5317                  * output and aliased to itself.  If the input is not
5318                  * dead after the instruction, we must allocate a new
5319                  * register and move it.
5320                  */
5321                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5322                     || args_ct[arg_ct->alias_index].newreg) {
5323                     allocate_new_reg = true;
5324                 } else if (ts->val_type == TEMP_VAL_REG) {
5325                     /*
5326                      * Check if the current register has already been
5327                      * allocated for another input.
5328                      */
5329                     allocate_new_reg =
5330                         tcg_regset_test_reg(i_allocated_regs, reg);
5331                 }
5332             }
5333             if (!allocate_new_reg) {
5334                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5335                           i_preferred_regs);
5336                 reg = ts->reg;
5337                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5338             }
5339             if (allocate_new_reg) {
5340                 /*
5341                  * Allocate a new register matching the constraint
5342                  * and move the temporary register into it.
5343                  */
5344                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5345                           i_allocated_regs, 0);
5346                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5347                                     i_preferred_regs, ts->indirect_base);
5348                 copyto_new_reg = true;
5349             }
5350             break;
5351 
5352         case 1:
5353             /* First of an input pair; if i1 == i2, the second is an output. */
5354             i1 = i;
5355             i2 = arg_ct->pair_index;
5356             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5357 
5358             /*
5359              * It is easier to default to allocating a new pair
5360              * and to identify a few cases where it's not required.
5361              */
5362             if (arg_ct->ialias) {
5363                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5364                 if (IS_DEAD_ARG(i1) &&
5365                     IS_DEAD_ARG(i2) &&
5366                     !temp_readonly(ts) &&
5367                     ts->val_type == TEMP_VAL_REG &&
5368                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5369                     tcg_regset_test_reg(i_required_regs, reg) &&
5370                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5371                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5372                     (ts2
5373                      ? ts2->val_type == TEMP_VAL_REG &&
5374                        ts2->reg == reg + 1 &&
5375                        !temp_readonly(ts2)
5376                      : s->reg_to_temp[reg + 1] == NULL)) {
5377                     break;
5378                 }
5379             } else {
5380                 /* Without aliasing, the pair must also be an input. */
5381                 tcg_debug_assert(ts2);
5382                 if (ts->val_type == TEMP_VAL_REG &&
5383                     ts2->val_type == TEMP_VAL_REG &&
5384                     ts2->reg == reg + 1 &&
5385                     tcg_regset_test_reg(i_required_regs, reg)) {
5386                     break;
5387                 }
5388             }
5389             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5390                                      0, ts->indirect_base);
5391             goto do_pair;
5392 
5393         case 2: /* pair second */
5394             reg = new_args[arg_ct->pair_index] + 1;
5395             goto do_pair;
5396 
5397         case 3: /* ialias with second output, no first input */
5398             tcg_debug_assert(arg_ct->ialias);
5399             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5400 
5401             if (IS_DEAD_ARG(i) &&
5402                 !temp_readonly(ts) &&
5403                 ts->val_type == TEMP_VAL_REG &&
5404                 reg > 0 &&
5405                 s->reg_to_temp[reg - 1] == NULL &&
5406                 tcg_regset_test_reg(i_required_regs, reg) &&
5407                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5408                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5409                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5410                 break;
5411             }
5412             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5413                                      i_allocated_regs, 0,
5414                                      ts->indirect_base);
5415             tcg_regset_set_reg(i_allocated_regs, reg);
5416             reg += 1;
5417             goto do_pair;
5418 
5419         do_pair:
5420             /*
5421              * If an aliased input is not dead after the instruction,
5422              * we must allocate a new register and move it.
5423              */
5424             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5425                 TCGRegSet t_allocated_regs = i_allocated_regs;
5426 
5427                 /*
5428                  * Because of the alias, and the continued life, make sure
5429                  * that the temp is somewhere *other* than the reg pair,
5430                  * and we get a copy in reg.
5431                  */
5432                 tcg_regset_set_reg(t_allocated_regs, reg);
5433                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5434                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5435                     /* If ts was already in reg, copy it somewhere else. */
5436                     TCGReg nr;
5437                     bool ok;
5438 
5439                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5440                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5441                                        t_allocated_regs, 0, ts->indirect_base);
5442                     ok = tcg_out_mov(s, ts->type, nr, reg);
5443                     tcg_debug_assert(ok);
5444 
5445                     set_temp_val_reg(s, ts, nr);
5446                 } else {
5447                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5448                               t_allocated_regs, 0);
5449                     copyto_new_reg = true;
5450                 }
5451             } else {
5452                 /* Preferably allocate to reg, otherwise copy. */
5453                 i_required_regs = (TCGRegSet)1 << reg;
5454                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5455                           i_preferred_regs);
5456                 copyto_new_reg = ts->reg != reg;
5457             }
5458             break;
5459 
5460         default:
5461             g_assert_not_reached();
5462         }
5463 
5464         if (copyto_new_reg) {
5465             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5466                 /*
5467                  * Cross register class move not supported.  Sync the
5468                  * temp back to its slot and load from there.
5469                  */
5470                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5471                 tcg_out_ld(s, ts->type, reg,
5472                            ts->mem_base->reg, ts->mem_offset);
5473             }
5474         }
5475         new_args[i] = reg;
5476         const_args[i] = 0;
5477         tcg_regset_set_reg(i_allocated_regs, reg);
5478     }
5479 
5480     /* mark dead temporaries and free the associated registers */
5481     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5482         if (IS_DEAD_ARG(i)) {
5483             temp_dead(s, arg_temp(op->args[i]));
5484         }
5485     }
5486 
5487     if (def->flags & TCG_OPF_COND_BRANCH) {
5488         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5489     } else if (def->flags & TCG_OPF_BB_END) {
5490         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5491     } else {
5492         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5493             assert_carry_dead(s);
5494             /* XXX: permit generic clobber register list ? */
5495             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5496                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5497                     tcg_reg_free(s, i, i_allocated_regs);
5498                 }
5499             }
5500         }
5501         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5502             /* sync globals if the op has side effects and might trigger
5503                an exception. */
5504             sync_globals(s, i_allocated_regs);
5505         }
5506 
5507         /* satisfy the output constraints */
5508         for (k = 0; k < nb_oargs; k++) {
5509             i = args_ct[k].sort_index;
5510             arg = op->args[i];
5511             arg_ct = &args_ct[i];
5512             ts = arg_temp(arg);
5513 
5514             /* ENV should not be modified.  */
5515             tcg_debug_assert(!temp_readonly(ts));
5516 
5517             switch (arg_ct->pair) {
5518             case 0: /* not paired */
5519                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5520                     reg = new_args[arg_ct->alias_index];
5521                 } else if (arg_ct->newreg) {
5522                     reg = tcg_reg_alloc(s, arg_ct->regs,
5523                                         i_allocated_regs | o_allocated_regs,
5524                                         output_pref(op, k), ts->indirect_base);
5525                 } else {
5526                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5527                                         output_pref(op, k), ts->indirect_base);
5528                 }
5529                 break;
5530 
5531             case 1: /* first of pair */
5532                 if (arg_ct->oalias) {
5533                     reg = new_args[arg_ct->alias_index];
5534                 } else if (arg_ct->newreg) {
5535                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5536                                              i_allocated_regs | o_allocated_regs,
5537                                              output_pref(op, k),
5538                                              ts->indirect_base);
5539                 } else {
5540                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5541                                              output_pref(op, k),
5542                                              ts->indirect_base);
5543                 }
5544                 break;
5545 
5546             case 2: /* second of pair */
5547                 if (arg_ct->oalias) {
5548                     reg = new_args[arg_ct->alias_index];
5549                 } else {
5550                     reg = new_args[arg_ct->pair_index] + 1;
5551                 }
5552                 break;
5553 
5554             case 3: /* first of pair, aliasing with a second input */
5555                 tcg_debug_assert(!arg_ct->newreg);
5556                 reg = new_args[arg_ct->pair_index] - 1;
5557                 break;
5558 
5559             default:
5560                 g_assert_not_reached();
5561             }
5562             tcg_regset_set_reg(o_allocated_regs, reg);
5563             set_temp_val_reg(s, ts, reg);
5564             ts->mem_coherent = 0;
5565             new_args[i] = reg;
5566         }
5567     }
5568 
5569     /* emit instruction */
5570     TCGType type = TCGOP_TYPE(op);
5571     switch (op->opc) {
5572     case INDEX_op_add:
5573     case INDEX_op_and:
5574     case INDEX_op_andc:
5575     case INDEX_op_clz:
5576     case INDEX_op_ctz:
5577     case INDEX_op_divs:
5578     case INDEX_op_divu:
5579     case INDEX_op_eqv:
5580     case INDEX_op_mul:
5581     case INDEX_op_mulsh:
5582     case INDEX_op_muluh:
5583     case INDEX_op_nand:
5584     case INDEX_op_nor:
5585     case INDEX_op_or:
5586     case INDEX_op_orc:
5587     case INDEX_op_rems:
5588     case INDEX_op_remu:
5589     case INDEX_op_rotl:
5590     case INDEX_op_rotr:
5591     case INDEX_op_sar:
5592     case INDEX_op_shl:
5593     case INDEX_op_shr:
5594     case INDEX_op_xor:
5595         {
5596             const TCGOutOpBinary *out =
5597                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5598 
5599             /* Constants should never appear in the first source operand. */
5600             tcg_debug_assert(!const_args[1]);
5601             if (const_args[2]) {
5602                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5603             } else {
5604                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5605             }
5606         }
5607         break;
5608 
5609     case INDEX_op_sub:
5610         {
5611             const TCGOutOpSubtract *out =
5612                 container_of(all_outop[op->opc], TCGOutOpSubtract, base);
5613 
5614             /*
5615              * Constants should never appear in the second source operand.
5616              * These are folded to add with negative constant.
5617              */
5618             tcg_debug_assert(!const_args[2]);
5619             if (const_args[1]) {
5620                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5621             } else {
5622                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5623             }
5624         }
5625         break;
5626 
5627     case INDEX_op_addco:
5628     case INDEX_op_subbo:
5629     case INDEX_op_addci:
5630     case INDEX_op_subbi:
5631     case INDEX_op_addcio:
5632     case INDEX_op_subbio:
5633     case INDEX_op_addc1o:
5634     case INDEX_op_subb1o:
5635         g_assert_not_reached();
5636 
5637     case INDEX_op_bswap64:
5638     case INDEX_op_ext_i32_i64:
5639     case INDEX_op_extu_i32_i64:
5640     case INDEX_op_extrl_i64_i32:
5641     case INDEX_op_extrh_i64_i32:
5642         assert(TCG_TARGET_REG_BITS == 64);
5643         /* fall through */
5644     case INDEX_op_ctpop:
5645     case INDEX_op_neg:
5646     case INDEX_op_not:
5647         {
5648             const TCGOutOpUnary *out =
5649                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5650 
5651             /* Constants should have been folded. */
5652             tcg_debug_assert(!const_args[1]);
5653             out->out_rr(s, type, new_args[0], new_args[1]);
5654         }
5655         break;
5656 
5657     case INDEX_op_bswap16:
5658     case INDEX_op_bswap32:
5659         {
5660             const TCGOutOpBswap *out =
5661                 container_of(all_outop[op->opc], TCGOutOpBswap, base);
5662 
5663             tcg_debug_assert(!const_args[1]);
5664             out->out_rr(s, type, new_args[0], new_args[1], new_args[2]);
5665         }
5666         break;
5667 
5668     case INDEX_op_deposit:
5669         {
5670             const TCGOutOpDeposit *out = &outop_deposit;
5671 
5672             if (const_args[2]) {
5673                 tcg_debug_assert(!const_args[1]);
5674                 out->out_rri(s, type, new_args[0], new_args[1],
5675                              new_args[2], new_args[3], new_args[4]);
5676             } else if (const_args[1]) {
5677                 tcg_debug_assert(new_args[1] == 0);
5678                 tcg_debug_assert(!const_args[2]);
5679                 out->out_rzr(s, type, new_args[0], new_args[2],
5680                              new_args[3], new_args[4]);
5681             } else {
5682                 out->out_rrr(s, type, new_args[0], new_args[1],
5683                              new_args[2], new_args[3], new_args[4]);
5684             }
5685         }
5686         break;
5687 
5688     case INDEX_op_divs2:
5689     case INDEX_op_divu2:
5690         {
5691             const TCGOutOpDivRem *out =
5692                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5693 
5694             /* Only used by x86 and s390x, which use matching constraints. */
5695             tcg_debug_assert(new_args[0] == new_args[2]);
5696             tcg_debug_assert(new_args[1] == new_args[3]);
5697             tcg_debug_assert(!const_args[4]);
5698             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5699         }
5700         break;
5701 
5702     case INDEX_op_extract:
5703     case INDEX_op_sextract:
5704         {
5705             const TCGOutOpExtract *out =
5706                 container_of(all_outop[op->opc], TCGOutOpExtract, base);
5707 
5708             tcg_debug_assert(!const_args[1]);
5709             out->out_rr(s, type, new_args[0], new_args[1],
5710                         new_args[2], new_args[3]);
5711         }
5712         break;
5713 
5714     case INDEX_op_extract2:
5715         {
5716             const TCGOutOpExtract2 *out = &outop_extract2;
5717 
5718             tcg_debug_assert(!const_args[1]);
5719             tcg_debug_assert(!const_args[2]);
5720             out->out_rrr(s, type, new_args[0], new_args[1],
5721                          new_args[2], new_args[3]);
5722         }
5723         break;
5724 
5725     case INDEX_op_muls2:
5726     case INDEX_op_mulu2:
5727         {
5728             const TCGOutOpMul2 *out =
5729                 container_of(all_outop[op->opc], TCGOutOpMul2, base);
5730 
5731             tcg_debug_assert(!const_args[2]);
5732             tcg_debug_assert(!const_args[3]);
5733             out->out_rrrr(s, type, new_args[0], new_args[1],
5734                           new_args[2], new_args[3]);
5735         }
5736         break;
5737 
5738     case INDEX_op_brcond:
5739         {
5740             const TCGOutOpBrcond *out = &outop_brcond;
5741             TCGCond cond = new_args[2];
5742             TCGLabel *label = arg_label(new_args[3]);
5743 
5744             tcg_debug_assert(!const_args[0]);
5745             if (const_args[1]) {
5746                 out->out_ri(s, type, cond, new_args[0], new_args[1], label);
5747             } else {
5748                 out->out_rr(s, type, cond, new_args[0], new_args[1], label);
5749             }
5750         }
5751         break;
5752 
5753     case INDEX_op_movcond:
5754         {
5755             const TCGOutOpMovcond *out = &outop_movcond;
5756             TCGCond cond = new_args[5];
5757 
5758             tcg_debug_assert(!const_args[1]);
5759             out->out(s, type, cond, new_args[0],
5760                      new_args[1], new_args[2], const_args[2],
5761                      new_args[3], const_args[3],
5762                      new_args[4], const_args[4]);
5763         }
5764         break;
5765 
5766     case INDEX_op_setcond:
5767     case INDEX_op_negsetcond:
5768         {
5769             const TCGOutOpSetcond *out =
5770                 container_of(all_outop[op->opc], TCGOutOpSetcond, base);
5771             TCGCond cond = new_args[3];
5772 
5773             tcg_debug_assert(!const_args[1]);
5774             if (const_args[2]) {
5775                 out->out_rri(s, type, cond,
5776                              new_args[0], new_args[1], new_args[2]);
5777             } else {
5778                 out->out_rrr(s, type, cond,
5779                              new_args[0], new_args[1], new_args[2]);
5780             }
5781         }
5782         break;
5783 
5784 #if TCG_TARGET_REG_BITS == 32
5785     case INDEX_op_brcond2_i32:
5786         {
5787             const TCGOutOpBrcond2 *out = &outop_brcond2;
5788             TCGCond cond = new_args[4];
5789             TCGLabel *label = arg_label(new_args[5]);
5790 
5791             tcg_debug_assert(!const_args[0]);
5792             tcg_debug_assert(!const_args[1]);
5793             out->out(s, cond, new_args[0], new_args[1],
5794                      new_args[2], const_args[2],
5795                      new_args[3], const_args[3], label);
5796         }
5797         break;
5798     case INDEX_op_setcond2_i32:
5799         {
5800             const TCGOutOpSetcond2 *out = &outop_setcond2;
5801             TCGCond cond = new_args[5];
5802 
5803             tcg_debug_assert(!const_args[1]);
5804             tcg_debug_assert(!const_args[2]);
5805             out->out(s, cond, new_args[0], new_args[1], new_args[2],
5806                      new_args[3], const_args[3], new_args[4], const_args[4]);
5807         }
5808         break;
5809 #else
5810     case INDEX_op_brcond2_i32:
5811     case INDEX_op_setcond2_i32:
5812         g_assert_not_reached();
5813 #endif
5814 
5815     default:
5816         if (def->flags & TCG_OPF_VECTOR) {
5817             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5818                            TCGOP_VECE(op), new_args, const_args);
5819         } else {
5820             tcg_out_op(s, op->opc, type, new_args, const_args);
5821         }
5822         break;
5823     }
5824 
5825     if (def->flags & TCG_OPF_CARRY_IN) {
5826         s->carry_live = false;
5827     }
5828     if (def->flags & TCG_OPF_CARRY_OUT) {
5829         s->carry_live = true;
5830     }
5831 
5832     /* move the outputs in the correct register if needed */
5833     for(i = 0; i < nb_oargs; i++) {
5834         ts = arg_temp(op->args[i]);
5835 
5836         /* ENV should not be modified.  */
5837         tcg_debug_assert(!temp_readonly(ts));
5838 
5839         if (NEED_SYNC_ARG(i)) {
5840             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5841         } else if (IS_DEAD_ARG(i)) {
5842             temp_dead(s, ts);
5843         }
5844     }
5845 }
5846 
5847 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5848 {
5849     const TCGLifeData arg_life = op->life;
5850     TCGTemp *ots, *itsl, *itsh;
5851     TCGType vtype = TCGOP_TYPE(op);
5852 
5853     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5854     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5855     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5856 
5857     ots = arg_temp(op->args[0]);
5858     itsl = arg_temp(op->args[1]);
5859     itsh = arg_temp(op->args[2]);
5860 
5861     /* ENV should not be modified.  */
5862     tcg_debug_assert(!temp_readonly(ots));
5863 
5864     /* Allocate the output register now.  */
5865     if (ots->val_type != TEMP_VAL_REG) {
5866         TCGRegSet allocated_regs = s->reserved_regs;
5867         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5868         TCGReg oreg;
5869 
5870         /* Make sure to not spill the input registers. */
5871         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5872             tcg_regset_set_reg(allocated_regs, itsl->reg);
5873         }
5874         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5875             tcg_regset_set_reg(allocated_regs, itsh->reg);
5876         }
5877 
5878         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5879                              output_pref(op, 0), ots->indirect_base);
5880         set_temp_val_reg(s, ots, oreg);
5881     }
5882 
5883     /* Promote dup2 of immediates to dupi_vec. */
5884     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5885         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5886         MemOp vece = MO_64;
5887 
5888         if (val == dup_const(MO_8, val)) {
5889             vece = MO_8;
5890         } else if (val == dup_const(MO_16, val)) {
5891             vece = MO_16;
5892         } else if (val == dup_const(MO_32, val)) {
5893             vece = MO_32;
5894         }
5895 
5896         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5897         goto done;
5898     }
5899 
5900     /* If the two inputs form one 64-bit value, try dupm_vec. */
5901     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5902         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5903         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5904         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5905 
5906         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5907         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5908 
5909         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5910                              its->mem_base->reg, its->mem_offset)) {
5911             goto done;
5912         }
5913     }
5914 
5915     /* Fall back to generic expansion. */
5916     return false;
5917 
5918  done:
5919     ots->mem_coherent = 0;
5920     if (IS_DEAD_ARG(1)) {
5921         temp_dead(s, itsl);
5922     }
5923     if (IS_DEAD_ARG(2)) {
5924         temp_dead(s, itsh);
5925     }
5926     if (NEED_SYNC_ARG(0)) {
5927         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5928     } else if (IS_DEAD_ARG(0)) {
5929         temp_dead(s, ots);
5930     }
5931     return true;
5932 }
5933 
5934 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5935                          TCGRegSet allocated_regs)
5936 {
5937     if (ts->val_type == TEMP_VAL_REG) {
5938         if (ts->reg != reg) {
5939             tcg_reg_free(s, reg, allocated_regs);
5940             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5941                 /*
5942                  * Cross register class move not supported.  Sync the
5943                  * temp back to its slot and load from there.
5944                  */
5945                 temp_sync(s, ts, allocated_regs, 0, 0);
5946                 tcg_out_ld(s, ts->type, reg,
5947                            ts->mem_base->reg, ts->mem_offset);
5948             }
5949         }
5950     } else {
5951         TCGRegSet arg_set = 0;
5952 
5953         tcg_reg_free(s, reg, allocated_regs);
5954         tcg_regset_set_reg(arg_set, reg);
5955         temp_load(s, ts, arg_set, allocated_regs, 0);
5956     }
5957 }
5958 
5959 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5960                          TCGRegSet allocated_regs)
5961 {
5962     /*
5963      * When the destination is on the stack, load up the temp and store.
5964      * If there are many call-saved registers, the temp might live to
5965      * see another use; otherwise it'll be discarded.
5966      */
5967     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5968     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5969                arg_slot_stk_ofs(arg_slot));
5970 }
5971 
5972 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5973                             TCGTemp *ts, TCGRegSet *allocated_regs)
5974 {
5975     if (arg_slot_reg_p(l->arg_slot)) {
5976         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5977         load_arg_reg(s, reg, ts, *allocated_regs);
5978         tcg_regset_set_reg(*allocated_regs, reg);
5979     } else {
5980         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5981     }
5982 }
5983 
5984 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5985                          intptr_t ref_off, TCGRegSet *allocated_regs)
5986 {
5987     TCGReg reg;
5988 
5989     if (arg_slot_reg_p(arg_slot)) {
5990         reg = tcg_target_call_iarg_regs[arg_slot];
5991         tcg_reg_free(s, reg, *allocated_regs);
5992         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5993         tcg_regset_set_reg(*allocated_regs, reg);
5994     } else {
5995         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5996                             *allocated_regs, 0, false);
5997         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5998         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5999                    arg_slot_stk_ofs(arg_slot));
6000     }
6001 }
6002 
6003 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
6004 {
6005     const int nb_oargs = TCGOP_CALLO(op);
6006     const int nb_iargs = TCGOP_CALLI(op);
6007     const TCGLifeData arg_life = op->life;
6008     const TCGHelperInfo *info = tcg_call_info(op);
6009     TCGRegSet allocated_regs = s->reserved_regs;
6010     int i;
6011 
6012     /*
6013      * Move inputs into place in reverse order,
6014      * so that we place stacked arguments first.
6015      */
6016     for (i = nb_iargs - 1; i >= 0; --i) {
6017         const TCGCallArgumentLoc *loc = &info->in[i];
6018         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
6019 
6020         switch (loc->kind) {
6021         case TCG_CALL_ARG_NORMAL:
6022         case TCG_CALL_ARG_EXTEND_U:
6023         case TCG_CALL_ARG_EXTEND_S:
6024             load_arg_normal(s, loc, ts, &allocated_regs);
6025             break;
6026         case TCG_CALL_ARG_BY_REF:
6027             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
6028             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
6029                          arg_slot_stk_ofs(loc->ref_slot),
6030                          &allocated_regs);
6031             break;
6032         case TCG_CALL_ARG_BY_REF_N:
6033             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
6034             break;
6035         default:
6036             g_assert_not_reached();
6037         }
6038     }
6039 
6040     /* Mark dead temporaries and free the associated registers.  */
6041     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
6042         if (IS_DEAD_ARG(i)) {
6043             temp_dead(s, arg_temp(op->args[i]));
6044         }
6045     }
6046 
6047     /* Clobber call registers.  */
6048     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
6049         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
6050             tcg_reg_free(s, i, allocated_regs);
6051         }
6052     }
6053 
6054     /*
6055      * Save globals if they might be written by the helper,
6056      * sync them if they might be read.
6057      */
6058     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
6059         /* Nothing to do */
6060     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
6061         sync_globals(s, allocated_regs);
6062     } else {
6063         save_globals(s, allocated_regs);
6064     }
6065 
6066     /*
6067      * If the ABI passes a pointer to the returned struct as the first
6068      * argument, load that now.  Pass a pointer to the output home slot.
6069      */
6070     if (info->out_kind == TCG_CALL_RET_BY_REF) {
6071         TCGTemp *ts = arg_temp(op->args[0]);
6072 
6073         if (!ts->mem_allocated) {
6074             temp_allocate_frame(s, ts);
6075         }
6076         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
6077     }
6078 
6079     tcg_out_call(s, tcg_call_func(op), info);
6080 
6081     /* Assign output registers and emit moves if needed.  */
6082     switch (info->out_kind) {
6083     case TCG_CALL_RET_NORMAL:
6084         for (i = 0; i < nb_oargs; i++) {
6085             TCGTemp *ts = arg_temp(op->args[i]);
6086             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
6087 
6088             /* ENV should not be modified.  */
6089             tcg_debug_assert(!temp_readonly(ts));
6090 
6091             set_temp_val_reg(s, ts, reg);
6092             ts->mem_coherent = 0;
6093         }
6094         break;
6095 
6096     case TCG_CALL_RET_BY_VEC:
6097         {
6098             TCGTemp *ts = arg_temp(op->args[0]);
6099 
6100             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
6101             tcg_debug_assert(ts->temp_subindex == 0);
6102             if (!ts->mem_allocated) {
6103                 temp_allocate_frame(s, ts);
6104             }
6105             tcg_out_st(s, TCG_TYPE_V128,
6106                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6107                        ts->mem_base->reg, ts->mem_offset);
6108         }
6109         /* fall through to mark all parts in memory */
6110 
6111     case TCG_CALL_RET_BY_REF:
6112         /* The callee has performed a write through the reference. */
6113         for (i = 0; i < nb_oargs; i++) {
6114             TCGTemp *ts = arg_temp(op->args[i]);
6115             ts->val_type = TEMP_VAL_MEM;
6116         }
6117         break;
6118 
6119     default:
6120         g_assert_not_reached();
6121     }
6122 
6123     /* Flush or discard output registers as needed. */
6124     for (i = 0; i < nb_oargs; i++) {
6125         TCGTemp *ts = arg_temp(op->args[i]);
6126         if (NEED_SYNC_ARG(i)) {
6127             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
6128         } else if (IS_DEAD_ARG(i)) {
6129             temp_dead(s, ts);
6130         }
6131     }
6132 }
6133 
6134 /**
6135  * atom_and_align_for_opc:
6136  * @s: tcg context
6137  * @opc: memory operation code
6138  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
6139  * @allow_two_ops: true if we are prepared to issue two operations
6140  *
6141  * Return the alignment and atomicity to use for the inline fast path
6142  * for the given memory operation.  The alignment may be larger than
6143  * that specified in @opc, and the correct alignment will be diagnosed
6144  * by the slow path helper.
6145  *
6146  * If @allow_two_ops, the host is prepared to test for 2x alignment,
6147  * and issue two loads or stores for subalignment.
6148  */
6149 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
6150                                            MemOp host_atom, bool allow_two_ops)
6151 {
6152     MemOp align = memop_alignment_bits(opc);
6153     MemOp size = opc & MO_SIZE;
6154     MemOp half = size ? size - 1 : 0;
6155     MemOp atom = opc & MO_ATOM_MASK;
6156     MemOp atmax;
6157 
6158     switch (atom) {
6159     case MO_ATOM_NONE:
6160         /* The operation requires no specific atomicity. */
6161         atmax = MO_8;
6162         break;
6163 
6164     case MO_ATOM_IFALIGN:
6165         atmax = size;
6166         break;
6167 
6168     case MO_ATOM_IFALIGN_PAIR:
6169         atmax = half;
6170         break;
6171 
6172     case MO_ATOM_WITHIN16:
6173         atmax = size;
6174         if (size == MO_128) {
6175             /* Misalignment implies !within16, and therefore no atomicity. */
6176         } else if (host_atom != MO_ATOM_WITHIN16) {
6177             /* The host does not implement within16, so require alignment. */
6178             align = MAX(align, size);
6179         }
6180         break;
6181 
6182     case MO_ATOM_WITHIN16_PAIR:
6183         atmax = size;
6184         /*
6185          * Misalignment implies !within16, and therefore half atomicity.
6186          * Any host prepared for two operations can implement this with
6187          * half alignment.
6188          */
6189         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
6190             align = MAX(align, half);
6191         }
6192         break;
6193 
6194     case MO_ATOM_SUBALIGN:
6195         atmax = size;
6196         if (host_atom != MO_ATOM_SUBALIGN) {
6197             /* If unaligned but not odd, there are subobjects up to half. */
6198             if (allow_two_ops) {
6199                 align = MAX(align, half);
6200             } else {
6201                 align = MAX(align, size);
6202             }
6203         }
6204         break;
6205 
6206     default:
6207         g_assert_not_reached();
6208     }
6209 
6210     return (TCGAtomAlign){ .atom = atmax, .align = align };
6211 }
6212 
6213 /*
6214  * Similarly for qemu_ld/st slow path helpers.
6215  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
6216  * using only the provided backend tcg_out_* functions.
6217  */
6218 
6219 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
6220 {
6221     int ofs = arg_slot_stk_ofs(slot);
6222 
6223     /*
6224      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
6225      * require extension to uint64_t, adjust the address for uint32_t.
6226      */
6227     if (HOST_BIG_ENDIAN &&
6228         TCG_TARGET_REG_BITS == 64 &&
6229         type == TCG_TYPE_I32) {
6230         ofs += 4;
6231     }
6232     return ofs;
6233 }
6234 
6235 static void tcg_out_helper_load_slots(TCGContext *s,
6236                                       unsigned nmov, TCGMovExtend *mov,
6237                                       const TCGLdstHelperParam *parm)
6238 {
6239     unsigned i;
6240     TCGReg dst3;
6241 
6242     /*
6243      * Start from the end, storing to the stack first.
6244      * This frees those registers, so we need not consider overlap.
6245      */
6246     for (i = nmov; i-- > 0; ) {
6247         unsigned slot = mov[i].dst;
6248 
6249         if (arg_slot_reg_p(slot)) {
6250             goto found_reg;
6251         }
6252 
6253         TCGReg src = mov[i].src;
6254         TCGType dst_type = mov[i].dst_type;
6255         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6256 
6257         /* The argument is going onto the stack; extend into scratch. */
6258         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
6259             tcg_debug_assert(parm->ntmp != 0);
6260             mov[i].dst = src = parm->tmp[0];
6261             tcg_out_movext1(s, &mov[i]);
6262         }
6263 
6264         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
6265                    tcg_out_helper_stk_ofs(dst_type, slot));
6266     }
6267     return;
6268 
6269  found_reg:
6270     /*
6271      * The remaining arguments are in registers.
6272      * Convert slot numbers to argument registers.
6273      */
6274     nmov = i + 1;
6275     for (i = 0; i < nmov; ++i) {
6276         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
6277     }
6278 
6279     switch (nmov) {
6280     case 4:
6281         /* The backend must have provided enough temps for the worst case. */
6282         tcg_debug_assert(parm->ntmp >= 2);
6283 
6284         dst3 = mov[3].dst;
6285         for (unsigned j = 0; j < 3; ++j) {
6286             if (dst3 == mov[j].src) {
6287                 /*
6288                  * Conflict. Copy the source to a temporary, perform the
6289                  * remaining moves, then the extension from our scratch
6290                  * on the way out.
6291                  */
6292                 TCGReg scratch = parm->tmp[1];
6293 
6294                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
6295                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
6296                 tcg_out_movext1_new_src(s, &mov[3], scratch);
6297                 break;
6298             }
6299         }
6300 
6301         /* No conflicts: perform this move and continue. */
6302         tcg_out_movext1(s, &mov[3]);
6303         /* fall through */
6304 
6305     case 3:
6306         tcg_out_movext3(s, mov, mov + 1, mov + 2,
6307                         parm->ntmp ? parm->tmp[0] : -1);
6308         break;
6309     case 2:
6310         tcg_out_movext2(s, mov, mov + 1,
6311                         parm->ntmp ? parm->tmp[0] : -1);
6312         break;
6313     case 1:
6314         tcg_out_movext1(s, mov);
6315         break;
6316     default:
6317         g_assert_not_reached();
6318     }
6319 }
6320 
6321 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
6322                                     TCGType type, tcg_target_long imm,
6323                                     const TCGLdstHelperParam *parm)
6324 {
6325     if (arg_slot_reg_p(slot)) {
6326         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
6327     } else {
6328         int ofs = tcg_out_helper_stk_ofs(type, slot);
6329         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
6330             tcg_debug_assert(parm->ntmp != 0);
6331             tcg_out_movi(s, type, parm->tmp[0], imm);
6332             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
6333         }
6334     }
6335 }
6336 
6337 static void tcg_out_helper_load_common_args(TCGContext *s,
6338                                             const TCGLabelQemuLdst *ldst,
6339                                             const TCGLdstHelperParam *parm,
6340                                             const TCGHelperInfo *info,
6341                                             unsigned next_arg)
6342 {
6343     TCGMovExtend ptr_mov = {
6344         .dst_type = TCG_TYPE_PTR,
6345         .src_type = TCG_TYPE_PTR,
6346         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6347     };
6348     const TCGCallArgumentLoc *loc = &info->in[0];
6349     TCGType type;
6350     unsigned slot;
6351     tcg_target_ulong imm;
6352 
6353     /*
6354      * Handle env, which is always first.
6355      */
6356     ptr_mov.dst = loc->arg_slot;
6357     ptr_mov.src = TCG_AREG0;
6358     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6359 
6360     /*
6361      * Handle oi.
6362      */
6363     imm = ldst->oi;
6364     loc = &info->in[next_arg];
6365     type = TCG_TYPE_I32;
6366     switch (loc->kind) {
6367     case TCG_CALL_ARG_NORMAL:
6368         break;
6369     case TCG_CALL_ARG_EXTEND_U:
6370     case TCG_CALL_ARG_EXTEND_S:
6371         /* No extension required for MemOpIdx. */
6372         tcg_debug_assert(imm <= INT32_MAX);
6373         type = TCG_TYPE_REG;
6374         break;
6375     default:
6376         g_assert_not_reached();
6377     }
6378     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6379     next_arg++;
6380 
6381     /*
6382      * Handle ra.
6383      */
6384     loc = &info->in[next_arg];
6385     slot = loc->arg_slot;
6386     if (parm->ra_gen) {
6387         int arg_reg = -1;
6388         TCGReg ra_reg;
6389 
6390         if (arg_slot_reg_p(slot)) {
6391             arg_reg = tcg_target_call_iarg_regs[slot];
6392         }
6393         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6394 
6395         ptr_mov.dst = slot;
6396         ptr_mov.src = ra_reg;
6397         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6398     } else {
6399         imm = (uintptr_t)ldst->raddr;
6400         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6401     }
6402 }
6403 
6404 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6405                                        const TCGCallArgumentLoc *loc,
6406                                        TCGType dst_type, TCGType src_type,
6407                                        TCGReg lo, TCGReg hi)
6408 {
6409     MemOp reg_mo;
6410 
6411     if (dst_type <= TCG_TYPE_REG) {
6412         MemOp src_ext;
6413 
6414         switch (loc->kind) {
6415         case TCG_CALL_ARG_NORMAL:
6416             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6417             break;
6418         case TCG_CALL_ARG_EXTEND_U:
6419             dst_type = TCG_TYPE_REG;
6420             src_ext = MO_UL;
6421             break;
6422         case TCG_CALL_ARG_EXTEND_S:
6423             dst_type = TCG_TYPE_REG;
6424             src_ext = MO_SL;
6425             break;
6426         default:
6427             g_assert_not_reached();
6428         }
6429 
6430         mov[0].dst = loc->arg_slot;
6431         mov[0].dst_type = dst_type;
6432         mov[0].src = lo;
6433         mov[0].src_type = src_type;
6434         mov[0].src_ext = src_ext;
6435         return 1;
6436     }
6437 
6438     if (TCG_TARGET_REG_BITS == 32) {
6439         assert(dst_type == TCG_TYPE_I64);
6440         reg_mo = MO_32;
6441     } else {
6442         assert(dst_type == TCG_TYPE_I128);
6443         reg_mo = MO_64;
6444     }
6445 
6446     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6447     mov[0].src = lo;
6448     mov[0].dst_type = TCG_TYPE_REG;
6449     mov[0].src_type = TCG_TYPE_REG;
6450     mov[0].src_ext = reg_mo;
6451 
6452     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6453     mov[1].src = hi;
6454     mov[1].dst_type = TCG_TYPE_REG;
6455     mov[1].src_type = TCG_TYPE_REG;
6456     mov[1].src_ext = reg_mo;
6457 
6458     return 2;
6459 }
6460 
6461 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6462                                    const TCGLdstHelperParam *parm)
6463 {
6464     const TCGHelperInfo *info;
6465     const TCGCallArgumentLoc *loc;
6466     TCGMovExtend mov[2];
6467     unsigned next_arg, nmov;
6468     MemOp mop = get_memop(ldst->oi);
6469 
6470     switch (mop & MO_SIZE) {
6471     case MO_8:
6472     case MO_16:
6473     case MO_32:
6474         info = &info_helper_ld32_mmu;
6475         break;
6476     case MO_64:
6477         info = &info_helper_ld64_mmu;
6478         break;
6479     case MO_128:
6480         info = &info_helper_ld128_mmu;
6481         break;
6482     default:
6483         g_assert_not_reached();
6484     }
6485 
6486     /* Defer env argument. */
6487     next_arg = 1;
6488 
6489     loc = &info->in[next_arg];
6490     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6491         /*
6492          * 32-bit host with 32-bit guest: zero-extend the guest address
6493          * to 64-bits for the helper by storing the low part, then
6494          * load a zero for the high part.
6495          */
6496         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6497                                TCG_TYPE_I32, TCG_TYPE_I32,
6498                                ldst->addr_reg, -1);
6499         tcg_out_helper_load_slots(s, 1, mov, parm);
6500 
6501         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6502                                 TCG_TYPE_I32, 0, parm);
6503         next_arg += 2;
6504     } else {
6505         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6506                                       ldst->addr_reg, -1);
6507         tcg_out_helper_load_slots(s, nmov, mov, parm);
6508         next_arg += nmov;
6509     }
6510 
6511     switch (info->out_kind) {
6512     case TCG_CALL_RET_NORMAL:
6513     case TCG_CALL_RET_BY_VEC:
6514         break;
6515     case TCG_CALL_RET_BY_REF:
6516         /*
6517          * The return reference is in the first argument slot.
6518          * We need memory in which to return: re-use the top of stack.
6519          */
6520         {
6521             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6522 
6523             if (arg_slot_reg_p(0)) {
6524                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6525                                  TCG_REG_CALL_STACK, ofs_slot0);
6526             } else {
6527                 tcg_debug_assert(parm->ntmp != 0);
6528                 tcg_out_addi_ptr(s, parm->tmp[0],
6529                                  TCG_REG_CALL_STACK, ofs_slot0);
6530                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6531                            TCG_REG_CALL_STACK, ofs_slot0);
6532             }
6533         }
6534         break;
6535     default:
6536         g_assert_not_reached();
6537     }
6538 
6539     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6540 }
6541 
6542 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6543                                   bool load_sign,
6544                                   const TCGLdstHelperParam *parm)
6545 {
6546     MemOp mop = get_memop(ldst->oi);
6547     TCGMovExtend mov[2];
6548     int ofs_slot0;
6549 
6550     switch (ldst->type) {
6551     case TCG_TYPE_I64:
6552         if (TCG_TARGET_REG_BITS == 32) {
6553             break;
6554         }
6555         /* fall through */
6556 
6557     case TCG_TYPE_I32:
6558         mov[0].dst = ldst->datalo_reg;
6559         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6560         mov[0].dst_type = ldst->type;
6561         mov[0].src_type = TCG_TYPE_REG;
6562 
6563         /*
6564          * If load_sign, then we allowed the helper to perform the
6565          * appropriate sign extension to tcg_target_ulong, and all
6566          * we need now is a plain move.
6567          *
6568          * If they do not, then we expect the relevant extension
6569          * instruction to be no more expensive than a move, and
6570          * we thus save the icache etc by only using one of two
6571          * helper functions.
6572          */
6573         if (load_sign || !(mop & MO_SIGN)) {
6574             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6575                 mov[0].src_ext = MO_32;
6576             } else {
6577                 mov[0].src_ext = MO_64;
6578             }
6579         } else {
6580             mov[0].src_ext = mop & MO_SSIZE;
6581         }
6582         tcg_out_movext1(s, mov);
6583         return;
6584 
6585     case TCG_TYPE_I128:
6586         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6587         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6588         switch (TCG_TARGET_CALL_RET_I128) {
6589         case TCG_CALL_RET_NORMAL:
6590             break;
6591         case TCG_CALL_RET_BY_VEC:
6592             tcg_out_st(s, TCG_TYPE_V128,
6593                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6594                        TCG_REG_CALL_STACK, ofs_slot0);
6595             /* fall through */
6596         case TCG_CALL_RET_BY_REF:
6597             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6598                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6599             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6600                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6601             return;
6602         default:
6603             g_assert_not_reached();
6604         }
6605         break;
6606 
6607     default:
6608         g_assert_not_reached();
6609     }
6610 
6611     mov[0].dst = ldst->datalo_reg;
6612     mov[0].src =
6613         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6614     mov[0].dst_type = TCG_TYPE_REG;
6615     mov[0].src_type = TCG_TYPE_REG;
6616     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6617 
6618     mov[1].dst = ldst->datahi_reg;
6619     mov[1].src =
6620         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6621     mov[1].dst_type = TCG_TYPE_REG;
6622     mov[1].src_type = TCG_TYPE_REG;
6623     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6624 
6625     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6626 }
6627 
6628 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6629                                    const TCGLdstHelperParam *parm)
6630 {
6631     const TCGHelperInfo *info;
6632     const TCGCallArgumentLoc *loc;
6633     TCGMovExtend mov[4];
6634     TCGType data_type;
6635     unsigned next_arg, nmov, n;
6636     MemOp mop = get_memop(ldst->oi);
6637 
6638     switch (mop & MO_SIZE) {
6639     case MO_8:
6640     case MO_16:
6641     case MO_32:
6642         info = &info_helper_st32_mmu;
6643         data_type = TCG_TYPE_I32;
6644         break;
6645     case MO_64:
6646         info = &info_helper_st64_mmu;
6647         data_type = TCG_TYPE_I64;
6648         break;
6649     case MO_128:
6650         info = &info_helper_st128_mmu;
6651         data_type = TCG_TYPE_I128;
6652         break;
6653     default:
6654         g_assert_not_reached();
6655     }
6656 
6657     /* Defer env argument. */
6658     next_arg = 1;
6659     nmov = 0;
6660 
6661     /* Handle addr argument. */
6662     loc = &info->in[next_arg];
6663     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6664     if (TCG_TARGET_REG_BITS == 32) {
6665         /*
6666          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6667          * to 64-bits for the helper by storing the low part.  Later,
6668          * after we have processed the register inputs, we will load a
6669          * zero for the high part.
6670          */
6671         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6672                                TCG_TYPE_I32, TCG_TYPE_I32,
6673                                ldst->addr_reg, -1);
6674         next_arg += 2;
6675         nmov += 1;
6676     } else {
6677         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6678                                    ldst->addr_reg, -1);
6679         next_arg += n;
6680         nmov += n;
6681     }
6682 
6683     /* Handle data argument. */
6684     loc = &info->in[next_arg];
6685     switch (loc->kind) {
6686     case TCG_CALL_ARG_NORMAL:
6687     case TCG_CALL_ARG_EXTEND_U:
6688     case TCG_CALL_ARG_EXTEND_S:
6689         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6690                                    ldst->datalo_reg, ldst->datahi_reg);
6691         next_arg += n;
6692         nmov += n;
6693         tcg_out_helper_load_slots(s, nmov, mov, parm);
6694         break;
6695 
6696     case TCG_CALL_ARG_BY_REF:
6697         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6698         tcg_debug_assert(data_type == TCG_TYPE_I128);
6699         tcg_out_st(s, TCG_TYPE_I64,
6700                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6701                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6702         tcg_out_st(s, TCG_TYPE_I64,
6703                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6704                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6705 
6706         tcg_out_helper_load_slots(s, nmov, mov, parm);
6707 
6708         if (arg_slot_reg_p(loc->arg_slot)) {
6709             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6710                              TCG_REG_CALL_STACK,
6711                              arg_slot_stk_ofs(loc->ref_slot));
6712         } else {
6713             tcg_debug_assert(parm->ntmp != 0);
6714             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6715                              arg_slot_stk_ofs(loc->ref_slot));
6716             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6717                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6718         }
6719         next_arg += 2;
6720         break;
6721 
6722     default:
6723         g_assert_not_reached();
6724     }
6725 
6726     if (TCG_TARGET_REG_BITS == 32) {
6727         /* Zero extend the address by loading a zero for the high part. */
6728         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6729         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6730     }
6731 
6732     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6733 }
6734 
6735 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6736 {
6737     int i, start_words, num_insns;
6738     TCGOp *op;
6739 
6740     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6741                  && qemu_log_in_addr_range(pc_start))) {
6742         FILE *logfile = qemu_log_trylock();
6743         if (logfile) {
6744             fprintf(logfile, "OP:\n");
6745             tcg_dump_ops(s, logfile, false);
6746             fprintf(logfile, "\n");
6747             qemu_log_unlock(logfile);
6748         }
6749     }
6750 
6751 #ifdef CONFIG_DEBUG_TCG
6752     /* Ensure all labels referenced have been emitted.  */
6753     {
6754         TCGLabel *l;
6755         bool error = false;
6756 
6757         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6758             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6759                 qemu_log_mask(CPU_LOG_TB_OP,
6760                               "$L%d referenced but not present.\n", l->id);
6761                 error = true;
6762             }
6763         }
6764         assert(!error);
6765     }
6766 #endif
6767 
6768     /* Do not reuse any EBB that may be allocated within the TB. */
6769     tcg_temp_ebb_reset_freed(s);
6770 
6771     tcg_optimize(s);
6772 
6773     reachable_code_pass(s);
6774     liveness_pass_0(s);
6775     liveness_pass_1(s);
6776 
6777     if (s->nb_indirects > 0) {
6778         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6779                      && qemu_log_in_addr_range(pc_start))) {
6780             FILE *logfile = qemu_log_trylock();
6781             if (logfile) {
6782                 fprintf(logfile, "OP before indirect lowering:\n");
6783                 tcg_dump_ops(s, logfile, false);
6784                 fprintf(logfile, "\n");
6785                 qemu_log_unlock(logfile);
6786             }
6787         }
6788 
6789         /* Replace indirect temps with direct temps.  */
6790         if (liveness_pass_2(s)) {
6791             /* If changes were made, re-run liveness.  */
6792             liveness_pass_1(s);
6793         }
6794     }
6795 
6796     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6797                  && qemu_log_in_addr_range(pc_start))) {
6798         FILE *logfile = qemu_log_trylock();
6799         if (logfile) {
6800             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6801             tcg_dump_ops(s, logfile, true);
6802             fprintf(logfile, "\n");
6803             qemu_log_unlock(logfile);
6804         }
6805     }
6806 
6807     /* Initialize goto_tb jump offsets. */
6808     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6809     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6810     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6811     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6812 
6813     tcg_reg_alloc_start(s);
6814 
6815     /*
6816      * Reset the buffer pointers when restarting after overflow.
6817      * TODO: Move this into translate-all.c with the rest of the
6818      * buffer management.  Having only this done here is confusing.
6819      */
6820     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6821     s->code_ptr = s->code_buf;
6822     s->data_gen_ptr = NULL;
6823 
6824     QSIMPLEQ_INIT(&s->ldst_labels);
6825     s->pool_labels = NULL;
6826 
6827     start_words = s->insn_start_words;
6828     s->gen_insn_data =
6829         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6830 
6831     tcg_out_tb_start(s);
6832 
6833     num_insns = -1;
6834     s->carry_live = false;
6835     QTAILQ_FOREACH(op, &s->ops, link) {
6836         TCGOpcode opc = op->opc;
6837 
6838         switch (opc) {
6839         case INDEX_op_extrl_i64_i32:
6840             assert(TCG_TARGET_REG_BITS == 64);
6841             /*
6842              * If TCG_TYPE_I32 is represented in some canonical form,
6843              * e.g. zero or sign-extended, then emit as a unary op.
6844              * Otherwise we can treat this as a plain move.
6845              * If the output dies, treat this as a plain move, because
6846              * this will be implemented with a store.
6847              */
6848             if (TCG_TARGET_HAS_extr_i64_i32) {
6849                 TCGLifeData arg_life = op->life;
6850                 if (!IS_DEAD_ARG(0)) {
6851                     goto do_default;
6852                 }
6853             }
6854             /* fall through */
6855         case INDEX_op_mov:
6856         case INDEX_op_mov_vec:
6857             tcg_reg_alloc_mov(s, op);
6858             break;
6859         case INDEX_op_dup_vec:
6860             tcg_reg_alloc_dup(s, op);
6861             break;
6862         case INDEX_op_insn_start:
6863             assert_carry_dead(s);
6864             if (num_insns >= 0) {
6865                 size_t off = tcg_current_code_size(s);
6866                 s->gen_insn_end_off[num_insns] = off;
6867                 /* Assert that we do not overflow our stored offset.  */
6868                 assert(s->gen_insn_end_off[num_insns] == off);
6869             }
6870             num_insns++;
6871             for (i = 0; i < start_words; ++i) {
6872                 s->gen_insn_data[num_insns * start_words + i] =
6873                     tcg_get_insn_start_param(op, i);
6874             }
6875             break;
6876         case INDEX_op_discard:
6877             temp_dead(s, arg_temp(op->args[0]));
6878             break;
6879         case INDEX_op_set_label:
6880             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6881             tcg_out_label(s, arg_label(op->args[0]));
6882             break;
6883         case INDEX_op_call:
6884             assert_carry_dead(s);
6885             tcg_reg_alloc_call(s, op);
6886             break;
6887         case INDEX_op_exit_tb:
6888             tcg_out_exit_tb(s, op->args[0]);
6889             break;
6890         case INDEX_op_goto_tb:
6891             tcg_out_goto_tb(s, op->args[0]);
6892             break;
6893         case INDEX_op_dup2_vec:
6894             if (tcg_reg_alloc_dup2(s, op)) {
6895                 break;
6896             }
6897             /* fall through */
6898         default:
6899         do_default:
6900             /* Sanity check that we've not introduced any unhandled opcodes. */
6901             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6902                                               TCGOP_FLAGS(op)));
6903             /* Note: in order to speed up the code, it would be much
6904                faster to have specialized register allocator functions for
6905                some common argument patterns */
6906             tcg_reg_alloc_op(s, op);
6907             break;
6908         }
6909         /* Test for (pending) buffer overflow.  The assumption is that any
6910            one operation beginning below the high water mark cannot overrun
6911            the buffer completely.  Thus we can test for overflow after
6912            generating code without having to check during generation.  */
6913         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6914             return -1;
6915         }
6916         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6917         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6918             return -2;
6919         }
6920     }
6921     assert_carry_dead(s);
6922 
6923     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6924     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6925 
6926     /* Generate TB finalization at the end of block */
6927     i = tcg_out_ldst_finalize(s);
6928     if (i < 0) {
6929         return i;
6930     }
6931     i = tcg_out_pool_finalize(s);
6932     if (i < 0) {
6933         return i;
6934     }
6935     if (!tcg_resolve_relocs(s)) {
6936         return -2;
6937     }
6938 
6939 #ifndef CONFIG_TCG_INTERPRETER
6940     /* flush instruction cache */
6941     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6942                         (uintptr_t)s->code_buf,
6943                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6944 #endif
6945 
6946     return tcg_current_code_size(s);
6947 }
6948 
6949 #ifdef ELF_HOST_MACHINE
6950 /* In order to use this feature, the backend needs to do three things:
6951 
6952    (1) Define ELF_HOST_MACHINE to indicate both what value to
6953        put into the ELF image and to indicate support for the feature.
6954 
6955    (2) Define tcg_register_jit.  This should create a buffer containing
6956        the contents of a .debug_frame section that describes the post-
6957        prologue unwind info for the tcg machine.
6958 
6959    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6960 */
6961 
6962 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6963 typedef enum {
6964     JIT_NOACTION = 0,
6965     JIT_REGISTER_FN,
6966     JIT_UNREGISTER_FN
6967 } jit_actions_t;
6968 
6969 struct jit_code_entry {
6970     struct jit_code_entry *next_entry;
6971     struct jit_code_entry *prev_entry;
6972     const void *symfile_addr;
6973     uint64_t symfile_size;
6974 };
6975 
6976 struct jit_descriptor {
6977     uint32_t version;
6978     uint32_t action_flag;
6979     struct jit_code_entry *relevant_entry;
6980     struct jit_code_entry *first_entry;
6981 };
6982 
6983 void __jit_debug_register_code(void) __attribute__((noinline));
6984 void __jit_debug_register_code(void)
6985 {
6986     asm("");
6987 }
6988 
6989 /* Must statically initialize the version, because GDB may check
6990    the version before we can set it.  */
6991 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6992 
6993 /* End GDB interface.  */
6994 
6995 static int find_string(const char *strtab, const char *str)
6996 {
6997     const char *p = strtab + 1;
6998 
6999     while (1) {
7000         if (strcmp(p, str) == 0) {
7001             return p - strtab;
7002         }
7003         p += strlen(p) + 1;
7004     }
7005 }
7006 
7007 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
7008                                  const void *debug_frame,
7009                                  size_t debug_frame_size)
7010 {
7011     struct __attribute__((packed)) DebugInfo {
7012         uint32_t  len;
7013         uint16_t  version;
7014         uint32_t  abbrev;
7015         uint8_t   ptr_size;
7016         uint8_t   cu_die;
7017         uint16_t  cu_lang;
7018         uintptr_t cu_low_pc;
7019         uintptr_t cu_high_pc;
7020         uint8_t   fn_die;
7021         char      fn_name[16];
7022         uintptr_t fn_low_pc;
7023         uintptr_t fn_high_pc;
7024         uint8_t   cu_eoc;
7025     };
7026 
7027     struct ElfImage {
7028         ElfW(Ehdr) ehdr;
7029         ElfW(Phdr) phdr;
7030         ElfW(Shdr) shdr[7];
7031         ElfW(Sym)  sym[2];
7032         struct DebugInfo di;
7033         uint8_t    da[24];
7034         char       str[80];
7035     };
7036 
7037     struct ElfImage *img;
7038 
7039     static const struct ElfImage img_template = {
7040         .ehdr = {
7041             .e_ident[EI_MAG0] = ELFMAG0,
7042             .e_ident[EI_MAG1] = ELFMAG1,
7043             .e_ident[EI_MAG2] = ELFMAG2,
7044             .e_ident[EI_MAG3] = ELFMAG3,
7045             .e_ident[EI_CLASS] = ELF_CLASS,
7046             .e_ident[EI_DATA] = ELF_DATA,
7047             .e_ident[EI_VERSION] = EV_CURRENT,
7048             .e_type = ET_EXEC,
7049             .e_machine = ELF_HOST_MACHINE,
7050             .e_version = EV_CURRENT,
7051             .e_phoff = offsetof(struct ElfImage, phdr),
7052             .e_shoff = offsetof(struct ElfImage, shdr),
7053             .e_ehsize = sizeof(ElfW(Shdr)),
7054             .e_phentsize = sizeof(ElfW(Phdr)),
7055             .e_phnum = 1,
7056             .e_shentsize = sizeof(ElfW(Shdr)),
7057             .e_shnum = ARRAY_SIZE(img->shdr),
7058             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
7059 #ifdef ELF_HOST_FLAGS
7060             .e_flags = ELF_HOST_FLAGS,
7061 #endif
7062 #ifdef ELF_OSABI
7063             .e_ident[EI_OSABI] = ELF_OSABI,
7064 #endif
7065         },
7066         .phdr = {
7067             .p_type = PT_LOAD,
7068             .p_flags = PF_X,
7069         },
7070         .shdr = {
7071             [0] = { .sh_type = SHT_NULL },
7072             /* Trick: The contents of code_gen_buffer are not present in
7073                this fake ELF file; that got allocated elsewhere.  Therefore
7074                we mark .text as SHT_NOBITS (similar to .bss) so that readers
7075                will not look for contents.  We can record any address.  */
7076             [1] = { /* .text */
7077                 .sh_type = SHT_NOBITS,
7078                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
7079             },
7080             [2] = { /* .debug_info */
7081                 .sh_type = SHT_PROGBITS,
7082                 .sh_offset = offsetof(struct ElfImage, di),
7083                 .sh_size = sizeof(struct DebugInfo),
7084             },
7085             [3] = { /* .debug_abbrev */
7086                 .sh_type = SHT_PROGBITS,
7087                 .sh_offset = offsetof(struct ElfImage, da),
7088                 .sh_size = sizeof(img->da),
7089             },
7090             [4] = { /* .debug_frame */
7091                 .sh_type = SHT_PROGBITS,
7092                 .sh_offset = sizeof(struct ElfImage),
7093             },
7094             [5] = { /* .symtab */
7095                 .sh_type = SHT_SYMTAB,
7096                 .sh_offset = offsetof(struct ElfImage, sym),
7097                 .sh_size = sizeof(img->sym),
7098                 .sh_info = 1,
7099                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
7100                 .sh_entsize = sizeof(ElfW(Sym)),
7101             },
7102             [6] = { /* .strtab */
7103                 .sh_type = SHT_STRTAB,
7104                 .sh_offset = offsetof(struct ElfImage, str),
7105                 .sh_size = sizeof(img->str),
7106             }
7107         },
7108         .sym = {
7109             [1] = { /* code_gen_buffer */
7110                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
7111                 .st_shndx = 1,
7112             }
7113         },
7114         .di = {
7115             .len = sizeof(struct DebugInfo) - 4,
7116             .version = 2,
7117             .ptr_size = sizeof(void *),
7118             .cu_die = 1,
7119             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
7120             .fn_die = 2,
7121             .fn_name = "code_gen_buffer"
7122         },
7123         .da = {
7124             1,          /* abbrev number (the cu) */
7125             0x11, 1,    /* DW_TAG_compile_unit, has children */
7126             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
7127             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
7128             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
7129             0, 0,       /* end of abbrev */
7130             2,          /* abbrev number (the fn) */
7131             0x2e, 0,    /* DW_TAG_subprogram, no children */
7132             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
7133             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
7134             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
7135             0, 0,       /* end of abbrev */
7136             0           /* no more abbrev */
7137         },
7138         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
7139                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
7140     };
7141 
7142     /* We only need a single jit entry; statically allocate it.  */
7143     static struct jit_code_entry one_entry;
7144 
7145     uintptr_t buf = (uintptr_t)buf_ptr;
7146     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
7147     DebugFrameHeader *dfh;
7148 
7149     img = g_malloc(img_size);
7150     *img = img_template;
7151 
7152     img->phdr.p_vaddr = buf;
7153     img->phdr.p_paddr = buf;
7154     img->phdr.p_memsz = buf_size;
7155 
7156     img->shdr[1].sh_name = find_string(img->str, ".text");
7157     img->shdr[1].sh_addr = buf;
7158     img->shdr[1].sh_size = buf_size;
7159 
7160     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
7161     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
7162 
7163     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
7164     img->shdr[4].sh_size = debug_frame_size;
7165 
7166     img->shdr[5].sh_name = find_string(img->str, ".symtab");
7167     img->shdr[6].sh_name = find_string(img->str, ".strtab");
7168 
7169     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
7170     img->sym[1].st_value = buf;
7171     img->sym[1].st_size = buf_size;
7172 
7173     img->di.cu_low_pc = buf;
7174     img->di.cu_high_pc = buf + buf_size;
7175     img->di.fn_low_pc = buf;
7176     img->di.fn_high_pc = buf + buf_size;
7177 
7178     dfh = (DebugFrameHeader *)(img + 1);
7179     memcpy(dfh, debug_frame, debug_frame_size);
7180     dfh->fde.func_start = buf;
7181     dfh->fde.func_len = buf_size;
7182 
7183 #ifdef DEBUG_JIT
7184     /* Enable this block to be able to debug the ELF image file creation.
7185        One can use readelf, objdump, or other inspection utilities.  */
7186     {
7187         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
7188         FILE *f = fopen(jit, "w+b");
7189         if (f) {
7190             if (fwrite(img, img_size, 1, f) != img_size) {
7191                 /* Avoid stupid unused return value warning for fwrite.  */
7192             }
7193             fclose(f);
7194         }
7195     }
7196 #endif
7197 
7198     one_entry.symfile_addr = img;
7199     one_entry.symfile_size = img_size;
7200 
7201     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
7202     __jit_debug_descriptor.relevant_entry = &one_entry;
7203     __jit_debug_descriptor.first_entry = &one_entry;
7204     __jit_debug_register_code();
7205 }
7206 #else
7207 /* No support for the feature.  Provide the entry point expected by exec.c,
7208    and implement the internal function we declared earlier.  */
7209 
7210 static void tcg_register_jit_int(const void *buf, size_t size,
7211                                  const void *debug_frame,
7212                                  size_t debug_frame_size)
7213 {
7214 }
7215 
7216 void tcg_register_jit(const void *buf, size_t buf_size)
7217 {
7218 }
7219 #endif /* ELF_HOST_MACHINE */
7220 
7221 #if !TCG_TARGET_MAYBE_vec
7222 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
7223 {
7224     g_assert_not_reached();
7225 }
7226 #endif
7227