xref: /openbmc/qemu/tcg/tcg.c (revision 48e8de684aff7ad112aafcf74f776d2a66ef192e)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 #include "tcg-target.c.inc"
982 
983 #ifndef CONFIG_TCG_INTERPRETER
984 /* Validate CPUTLBDescFast placement. */
985 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
986                         sizeof(CPUNegativeOffsetState))
987                   < MIN_TLB_MASK_TABLE_OFS);
988 #endif
989 
990 /* Register allocation descriptions for every TCGOpcode. */
991 static const TCGOutOp * const all_outop[NB_OPS] = {
992 };
993 
994 /*
995  * All TCG threads except the parent (i.e. the one that called tcg_context_init
996  * and registered the target's TCG globals) must register with this function
997  * before initiating translation.
998  *
999  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1000  * of tcg_region_init() for the reasoning behind this.
1001  *
1002  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1003  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1004  * is not used anymore for translation once this function is called.
1005  *
1006  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1007  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1008  * modes.
1009  */
1010 #ifdef CONFIG_USER_ONLY
1011 void tcg_register_thread(void)
1012 {
1013     tcg_ctx = &tcg_init_ctx;
1014 }
1015 #else
1016 void tcg_register_thread(void)
1017 {
1018     TCGContext *s = g_malloc(sizeof(*s));
1019     unsigned int i, n;
1020 
1021     *s = tcg_init_ctx;
1022 
1023     /* Relink mem_base.  */
1024     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1025         if (tcg_init_ctx.temps[i].mem_base) {
1026             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1027             tcg_debug_assert(b >= 0 && b < n);
1028             s->temps[i].mem_base = &s->temps[b];
1029         }
1030     }
1031 
1032     /* Claim an entry in tcg_ctxs */
1033     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1034     g_assert(n < tcg_max_ctxs);
1035     qatomic_set(&tcg_ctxs[n], s);
1036 
1037     if (n > 0) {
1038         tcg_region_initial_alloc(s);
1039     }
1040 
1041     tcg_ctx = s;
1042 }
1043 #endif /* !CONFIG_USER_ONLY */
1044 
1045 /* pool based memory allocation */
1046 void *tcg_malloc_internal(TCGContext *s, int size)
1047 {
1048     TCGPool *p;
1049     int pool_size;
1050 
1051     if (size > TCG_POOL_CHUNK_SIZE) {
1052         /* big malloc: insert a new pool (XXX: could optimize) */
1053         p = g_malloc(sizeof(TCGPool) + size);
1054         p->size = size;
1055         p->next = s->pool_first_large;
1056         s->pool_first_large = p;
1057         return p->data;
1058     } else {
1059         p = s->pool_current;
1060         if (!p) {
1061             p = s->pool_first;
1062             if (!p)
1063                 goto new_pool;
1064         } else {
1065             if (!p->next) {
1066             new_pool:
1067                 pool_size = TCG_POOL_CHUNK_SIZE;
1068                 p = g_malloc(sizeof(TCGPool) + pool_size);
1069                 p->size = pool_size;
1070                 p->next = NULL;
1071                 if (s->pool_current) {
1072                     s->pool_current->next = p;
1073                 } else {
1074                     s->pool_first = p;
1075                 }
1076             } else {
1077                 p = p->next;
1078             }
1079         }
1080     }
1081     s->pool_current = p;
1082     s->pool_cur = p->data + size;
1083     s->pool_end = p->data + p->size;
1084     return p->data;
1085 }
1086 
1087 void tcg_pool_reset(TCGContext *s)
1088 {
1089     TCGPool *p, *t;
1090     for (p = s->pool_first_large; p; p = t) {
1091         t = p->next;
1092         g_free(p);
1093     }
1094     s->pool_first_large = NULL;
1095     s->pool_cur = s->pool_end = NULL;
1096     s->pool_current = NULL;
1097 }
1098 
1099 /*
1100  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1101  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1102  * We only use these for layout in tcg_out_ld_helper_ret and
1103  * tcg_out_st_helper_args, and share them between several of
1104  * the helpers, with the end result that it's easier to build manually.
1105  */
1106 
1107 #if TCG_TARGET_REG_BITS == 32
1108 # define dh_typecode_ttl  dh_typecode_i32
1109 #else
1110 # define dh_typecode_ttl  dh_typecode_i64
1111 #endif
1112 
1113 static TCGHelperInfo info_helper_ld32_mmu = {
1114     .flags = TCG_CALL_NO_WG,
1115     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1116               | dh_typemask(env, 1)
1117               | dh_typemask(i64, 2)  /* uint64_t addr */
1118               | dh_typemask(i32, 3)  /* unsigned oi */
1119               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1120 };
1121 
1122 static TCGHelperInfo info_helper_ld64_mmu = {
1123     .flags = TCG_CALL_NO_WG,
1124     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1125               | dh_typemask(env, 1)
1126               | dh_typemask(i64, 2)  /* uint64_t addr */
1127               | dh_typemask(i32, 3)  /* unsigned oi */
1128               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1129 };
1130 
1131 static TCGHelperInfo info_helper_ld128_mmu = {
1132     .flags = TCG_CALL_NO_WG,
1133     .typemask = dh_typemask(i128, 0) /* return Int128 */
1134               | dh_typemask(env, 1)
1135               | dh_typemask(i64, 2)  /* uint64_t addr */
1136               | dh_typemask(i32, 3)  /* unsigned oi */
1137               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1138 };
1139 
1140 static TCGHelperInfo info_helper_st32_mmu = {
1141     .flags = TCG_CALL_NO_WG,
1142     .typemask = dh_typemask(void, 0)
1143               | dh_typemask(env, 1)
1144               | dh_typemask(i64, 2)  /* uint64_t addr */
1145               | dh_typemask(i32, 3)  /* uint32_t data */
1146               | dh_typemask(i32, 4)  /* unsigned oi */
1147               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1148 };
1149 
1150 static TCGHelperInfo info_helper_st64_mmu = {
1151     .flags = TCG_CALL_NO_WG,
1152     .typemask = dh_typemask(void, 0)
1153               | dh_typemask(env, 1)
1154               | dh_typemask(i64, 2)  /* uint64_t addr */
1155               | dh_typemask(i64, 3)  /* uint64_t data */
1156               | dh_typemask(i32, 4)  /* unsigned oi */
1157               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1158 };
1159 
1160 static TCGHelperInfo info_helper_st128_mmu = {
1161     .flags = TCG_CALL_NO_WG,
1162     .typemask = dh_typemask(void, 0)
1163               | dh_typemask(env, 1)
1164               | dh_typemask(i64, 2)  /* uint64_t addr */
1165               | dh_typemask(i128, 3) /* Int128 data */
1166               | dh_typemask(i32, 4)  /* unsigned oi */
1167               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1168 };
1169 
1170 #ifdef CONFIG_TCG_INTERPRETER
1171 static ffi_type *typecode_to_ffi(int argmask)
1172 {
1173     /*
1174      * libffi does not support __int128_t, so we have forced Int128
1175      * to use the structure definition instead of the builtin type.
1176      */
1177     static ffi_type *ffi_type_i128_elements[3] = {
1178         &ffi_type_uint64,
1179         &ffi_type_uint64,
1180         NULL
1181     };
1182     static ffi_type ffi_type_i128 = {
1183         .size = 16,
1184         .alignment = __alignof__(Int128),
1185         .type = FFI_TYPE_STRUCT,
1186         .elements = ffi_type_i128_elements,
1187     };
1188 
1189     switch (argmask) {
1190     case dh_typecode_void:
1191         return &ffi_type_void;
1192     case dh_typecode_i32:
1193         return &ffi_type_uint32;
1194     case dh_typecode_s32:
1195         return &ffi_type_sint32;
1196     case dh_typecode_i64:
1197         return &ffi_type_uint64;
1198     case dh_typecode_s64:
1199         return &ffi_type_sint64;
1200     case dh_typecode_ptr:
1201         return &ffi_type_pointer;
1202     case dh_typecode_i128:
1203         return &ffi_type_i128;
1204     }
1205     g_assert_not_reached();
1206 }
1207 
1208 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1209 {
1210     unsigned typemask = info->typemask;
1211     struct {
1212         ffi_cif cif;
1213         ffi_type *args[];
1214     } *ca;
1215     ffi_status status;
1216     int nargs;
1217 
1218     /* Ignoring the return type, find the last non-zero field. */
1219     nargs = 32 - clz32(typemask >> 3);
1220     nargs = DIV_ROUND_UP(nargs, 3);
1221     assert(nargs <= MAX_CALL_IARGS);
1222 
1223     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1224     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1225     ca->cif.nargs = nargs;
1226 
1227     if (nargs != 0) {
1228         ca->cif.arg_types = ca->args;
1229         for (int j = 0; j < nargs; ++j) {
1230             int typecode = extract32(typemask, (j + 1) * 3, 3);
1231             ca->args[j] = typecode_to_ffi(typecode);
1232         }
1233     }
1234 
1235     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1236                           ca->cif.rtype, ca->cif.arg_types);
1237     assert(status == FFI_OK);
1238 
1239     return &ca->cif;
1240 }
1241 
1242 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1243 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1244 #else
1245 #define HELPER_INFO_INIT(I)      (&(I)->init)
1246 #define HELPER_INFO_INIT_VAL(I)  1
1247 #endif /* CONFIG_TCG_INTERPRETER */
1248 
1249 static inline bool arg_slot_reg_p(unsigned arg_slot)
1250 {
1251     /*
1252      * Split the sizeof away from the comparison to avoid Werror from
1253      * "unsigned < 0 is always false", when iarg_regs is empty.
1254      */
1255     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1256     return arg_slot < nreg;
1257 }
1258 
1259 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1260 {
1261     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1262     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1263 
1264     tcg_debug_assert(stk_slot < max);
1265     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1266 }
1267 
1268 typedef struct TCGCumulativeArgs {
1269     int arg_idx;                /* tcg_gen_callN args[] */
1270     int info_in_idx;            /* TCGHelperInfo in[] */
1271     int arg_slot;               /* regs+stack slot */
1272     int ref_slot;               /* stack slots for references */
1273 } TCGCumulativeArgs;
1274 
1275 static void layout_arg_even(TCGCumulativeArgs *cum)
1276 {
1277     cum->arg_slot += cum->arg_slot & 1;
1278 }
1279 
1280 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1281                          TCGCallArgumentKind kind)
1282 {
1283     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1284 
1285     *loc = (TCGCallArgumentLoc){
1286         .kind = kind,
1287         .arg_idx = cum->arg_idx,
1288         .arg_slot = cum->arg_slot,
1289     };
1290     cum->info_in_idx++;
1291     cum->arg_slot++;
1292 }
1293 
1294 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1295                                 TCGHelperInfo *info, int n)
1296 {
1297     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1298 
1299     for (int i = 0; i < n; ++i) {
1300         /* Layout all using the same arg_idx, adjusting the subindex. */
1301         loc[i] = (TCGCallArgumentLoc){
1302             .kind = TCG_CALL_ARG_NORMAL,
1303             .arg_idx = cum->arg_idx,
1304             .tmp_subindex = i,
1305             .arg_slot = cum->arg_slot + i,
1306         };
1307     }
1308     cum->info_in_idx += n;
1309     cum->arg_slot += n;
1310 }
1311 
1312 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1313 {
1314     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1315     int n = 128 / TCG_TARGET_REG_BITS;
1316 
1317     /* The first subindex carries the pointer. */
1318     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1319 
1320     /*
1321      * The callee is allowed to clobber memory associated with
1322      * structure pass by-reference.  Therefore we must make copies.
1323      * Allocate space from "ref_slot", which will be adjusted to
1324      * follow the parameters on the stack.
1325      */
1326     loc[0].ref_slot = cum->ref_slot;
1327 
1328     /*
1329      * Subsequent words also go into the reference slot, but
1330      * do not accumulate into the regular arguments.
1331      */
1332     for (int i = 1; i < n; ++i) {
1333         loc[i] = (TCGCallArgumentLoc){
1334             .kind = TCG_CALL_ARG_BY_REF_N,
1335             .arg_idx = cum->arg_idx,
1336             .tmp_subindex = i,
1337             .ref_slot = cum->ref_slot + i,
1338         };
1339     }
1340     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1341     cum->ref_slot += n;
1342 }
1343 
1344 static void init_call_layout(TCGHelperInfo *info)
1345 {
1346     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1347     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1348     unsigned typemask = info->typemask;
1349     unsigned typecode;
1350     TCGCumulativeArgs cum = { };
1351 
1352     /*
1353      * Parse and place any function return value.
1354      */
1355     typecode = typemask & 7;
1356     switch (typecode) {
1357     case dh_typecode_void:
1358         info->nr_out = 0;
1359         break;
1360     case dh_typecode_i32:
1361     case dh_typecode_s32:
1362     case dh_typecode_ptr:
1363         info->nr_out = 1;
1364         info->out_kind = TCG_CALL_RET_NORMAL;
1365         break;
1366     case dh_typecode_i64:
1367     case dh_typecode_s64:
1368         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1369         info->out_kind = TCG_CALL_RET_NORMAL;
1370         /* Query the last register now to trigger any assert early. */
1371         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1372         break;
1373     case dh_typecode_i128:
1374         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1375         info->out_kind = TCG_TARGET_CALL_RET_I128;
1376         switch (TCG_TARGET_CALL_RET_I128) {
1377         case TCG_CALL_RET_NORMAL:
1378             /* Query the last register now to trigger any assert early. */
1379             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1380             break;
1381         case TCG_CALL_RET_BY_VEC:
1382             /* Query the single register now to trigger any assert early. */
1383             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1384             break;
1385         case TCG_CALL_RET_BY_REF:
1386             /*
1387              * Allocate the first argument to the output.
1388              * We don't need to store this anywhere, just make it
1389              * unavailable for use in the input loop below.
1390              */
1391             cum.arg_slot = 1;
1392             break;
1393         default:
1394             qemu_build_not_reached();
1395         }
1396         break;
1397     default:
1398         g_assert_not_reached();
1399     }
1400 
1401     /*
1402      * Parse and place function arguments.
1403      */
1404     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1405         TCGCallArgumentKind kind;
1406         TCGType type;
1407 
1408         typecode = typemask & 7;
1409         switch (typecode) {
1410         case dh_typecode_i32:
1411         case dh_typecode_s32:
1412             type = TCG_TYPE_I32;
1413             break;
1414         case dh_typecode_i64:
1415         case dh_typecode_s64:
1416             type = TCG_TYPE_I64;
1417             break;
1418         case dh_typecode_ptr:
1419             type = TCG_TYPE_PTR;
1420             break;
1421         case dh_typecode_i128:
1422             type = TCG_TYPE_I128;
1423             break;
1424         default:
1425             g_assert_not_reached();
1426         }
1427 
1428         switch (type) {
1429         case TCG_TYPE_I32:
1430             switch (TCG_TARGET_CALL_ARG_I32) {
1431             case TCG_CALL_ARG_EVEN:
1432                 layout_arg_even(&cum);
1433                 /* fall through */
1434             case TCG_CALL_ARG_NORMAL:
1435                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1436                 break;
1437             case TCG_CALL_ARG_EXTEND:
1438                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1439                 layout_arg_1(&cum, info, kind);
1440                 break;
1441             default:
1442                 qemu_build_not_reached();
1443             }
1444             break;
1445 
1446         case TCG_TYPE_I64:
1447             switch (TCG_TARGET_CALL_ARG_I64) {
1448             case TCG_CALL_ARG_EVEN:
1449                 layout_arg_even(&cum);
1450                 /* fall through */
1451             case TCG_CALL_ARG_NORMAL:
1452                 if (TCG_TARGET_REG_BITS == 32) {
1453                     layout_arg_normal_n(&cum, info, 2);
1454                 } else {
1455                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1456                 }
1457                 break;
1458             default:
1459                 qemu_build_not_reached();
1460             }
1461             break;
1462 
1463         case TCG_TYPE_I128:
1464             switch (TCG_TARGET_CALL_ARG_I128) {
1465             case TCG_CALL_ARG_EVEN:
1466                 layout_arg_even(&cum);
1467                 /* fall through */
1468             case TCG_CALL_ARG_NORMAL:
1469                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1470                 break;
1471             case TCG_CALL_ARG_BY_REF:
1472                 layout_arg_by_ref(&cum, info);
1473                 break;
1474             default:
1475                 qemu_build_not_reached();
1476             }
1477             break;
1478 
1479         default:
1480             g_assert_not_reached();
1481         }
1482     }
1483     info->nr_in = cum.info_in_idx;
1484 
1485     /* Validate that we didn't overrun the input array. */
1486     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1487     /* Validate the backend has enough argument space. */
1488     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1489 
1490     /*
1491      * Relocate the "ref_slot" area to the end of the parameters.
1492      * Minimizing this stack offset helps code size for x86,
1493      * which has a signed 8-bit offset encoding.
1494      */
1495     if (cum.ref_slot != 0) {
1496         int ref_base = 0;
1497 
1498         if (cum.arg_slot > max_reg_slots) {
1499             int align = __alignof(Int128) / sizeof(tcg_target_long);
1500 
1501             ref_base = cum.arg_slot - max_reg_slots;
1502             if (align > 1) {
1503                 ref_base = ROUND_UP(ref_base, align);
1504             }
1505         }
1506         assert(ref_base + cum.ref_slot <= max_stk_slots);
1507         ref_base += max_reg_slots;
1508 
1509         if (ref_base != 0) {
1510             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1511                 TCGCallArgumentLoc *loc = &info->in[i];
1512                 switch (loc->kind) {
1513                 case TCG_CALL_ARG_BY_REF:
1514                 case TCG_CALL_ARG_BY_REF_N:
1515                     loc->ref_slot += ref_base;
1516                     break;
1517                 default:
1518                     break;
1519                 }
1520             }
1521         }
1522     }
1523 }
1524 
1525 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1526 static void process_constraint_sets(void);
1527 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1528                                             TCGReg reg, const char *name);
1529 
1530 static void tcg_context_init(unsigned max_threads)
1531 {
1532     TCGContext *s = &tcg_init_ctx;
1533     int n, i;
1534     TCGTemp *ts;
1535 
1536     memset(s, 0, sizeof(*s));
1537     s->nb_globals = 0;
1538 
1539     init_call_layout(&info_helper_ld32_mmu);
1540     init_call_layout(&info_helper_ld64_mmu);
1541     init_call_layout(&info_helper_ld128_mmu);
1542     init_call_layout(&info_helper_st32_mmu);
1543     init_call_layout(&info_helper_st64_mmu);
1544     init_call_layout(&info_helper_st128_mmu);
1545 
1546     tcg_target_init(s);
1547     process_constraint_sets();
1548 
1549     /* Reverse the order of the saved registers, assuming they're all at
1550        the start of tcg_target_reg_alloc_order.  */
1551     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1552         int r = tcg_target_reg_alloc_order[n];
1553         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1554             break;
1555         }
1556     }
1557     for (i = 0; i < n; ++i) {
1558         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1559     }
1560     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1561         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1562     }
1563 
1564     tcg_ctx = s;
1565     /*
1566      * In user-mode we simply share the init context among threads, since we
1567      * use a single region. See the documentation tcg_region_init() for the
1568      * reasoning behind this.
1569      * In system-mode we will have at most max_threads TCG threads.
1570      */
1571 #ifdef CONFIG_USER_ONLY
1572     tcg_ctxs = &tcg_ctx;
1573     tcg_cur_ctxs = 1;
1574     tcg_max_ctxs = 1;
1575 #else
1576     tcg_max_ctxs = max_threads;
1577     tcg_ctxs = g_new0(TCGContext *, max_threads);
1578 #endif
1579 
1580     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1581     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1582     tcg_env = temp_tcgv_ptr(ts);
1583 }
1584 
1585 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1586 {
1587     tcg_context_init(max_threads);
1588     tcg_region_init(tb_size, splitwx, max_threads);
1589 }
1590 
1591 /*
1592  * Allocate TBs right before their corresponding translated code, making
1593  * sure that TBs and code are on different cache lines.
1594  */
1595 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1596 {
1597     uintptr_t align = qemu_icache_linesize;
1598     TranslationBlock *tb;
1599     void *next;
1600 
1601  retry:
1602     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1603     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1604 
1605     if (unlikely(next > s->code_gen_highwater)) {
1606         if (tcg_region_alloc(s)) {
1607             return NULL;
1608         }
1609         goto retry;
1610     }
1611     qatomic_set(&s->code_gen_ptr, next);
1612     return tb;
1613 }
1614 
1615 void tcg_prologue_init(void)
1616 {
1617     TCGContext *s = tcg_ctx;
1618     size_t prologue_size;
1619 
1620     s->code_ptr = s->code_gen_ptr;
1621     s->code_buf = s->code_gen_ptr;
1622     s->data_gen_ptr = NULL;
1623 
1624 #ifndef CONFIG_TCG_INTERPRETER
1625     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1626 #endif
1627 
1628     s->pool_labels = NULL;
1629 
1630     qemu_thread_jit_write();
1631     /* Generate the prologue.  */
1632     tcg_target_qemu_prologue(s);
1633 
1634     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1635     {
1636         int result = tcg_out_pool_finalize(s);
1637         tcg_debug_assert(result == 0);
1638     }
1639 
1640     prologue_size = tcg_current_code_size(s);
1641     perf_report_prologue(s->code_gen_ptr, prologue_size);
1642 
1643 #ifndef CONFIG_TCG_INTERPRETER
1644     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1645                         (uintptr_t)s->code_buf, prologue_size);
1646 #endif
1647 
1648     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1649         FILE *logfile = qemu_log_trylock();
1650         if (logfile) {
1651             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1652             if (s->data_gen_ptr) {
1653                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1654                 size_t data_size = prologue_size - code_size;
1655                 size_t i;
1656 
1657                 disas(logfile, s->code_gen_ptr, code_size);
1658 
1659                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1660                     if (sizeof(tcg_target_ulong) == 8) {
1661                         fprintf(logfile,
1662                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1663                                 (uintptr_t)s->data_gen_ptr + i,
1664                                 *(uint64_t *)(s->data_gen_ptr + i));
1665                     } else {
1666                         fprintf(logfile,
1667                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1668                                 (uintptr_t)s->data_gen_ptr + i,
1669                                 *(uint32_t *)(s->data_gen_ptr + i));
1670                     }
1671                 }
1672             } else {
1673                 disas(logfile, s->code_gen_ptr, prologue_size);
1674             }
1675             fprintf(logfile, "\n");
1676             qemu_log_unlock(logfile);
1677         }
1678     }
1679 
1680 #ifndef CONFIG_TCG_INTERPRETER
1681     /*
1682      * Assert that goto_ptr is implemented completely, setting an epilogue.
1683      * For tci, we use NULL as the signal to return from the interpreter,
1684      * so skip this check.
1685      */
1686     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1687 #endif
1688 
1689     tcg_region_prologue_set(s);
1690 }
1691 
1692 void tcg_func_start(TCGContext *s)
1693 {
1694     tcg_pool_reset(s);
1695     s->nb_temps = s->nb_globals;
1696 
1697     /* No temps have been previously allocated for size or locality.  */
1698     tcg_temp_ebb_reset_freed(s);
1699 
1700     /* No constant temps have been previously allocated. */
1701     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1702         if (s->const_table[i]) {
1703             g_hash_table_remove_all(s->const_table[i]);
1704         }
1705     }
1706 
1707     s->nb_ops = 0;
1708     s->nb_labels = 0;
1709     s->current_frame_offset = s->frame_start;
1710 
1711 #ifdef CONFIG_DEBUG_TCG
1712     s->goto_tb_issue_mask = 0;
1713 #endif
1714 
1715     QTAILQ_INIT(&s->ops);
1716     QTAILQ_INIT(&s->free_ops);
1717     s->emit_before_op = NULL;
1718     QSIMPLEQ_INIT(&s->labels);
1719 
1720     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1721     tcg_debug_assert(s->insn_start_words > 0);
1722 }
1723 
1724 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1725 {
1726     int n = s->nb_temps++;
1727 
1728     if (n >= TCG_MAX_TEMPS) {
1729         tcg_raise_tb_overflow(s);
1730     }
1731     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1732 }
1733 
1734 static TCGTemp *tcg_global_alloc(TCGContext *s)
1735 {
1736     TCGTemp *ts;
1737 
1738     tcg_debug_assert(s->nb_globals == s->nb_temps);
1739     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1740     s->nb_globals++;
1741     ts = tcg_temp_alloc(s);
1742     ts->kind = TEMP_GLOBAL;
1743 
1744     return ts;
1745 }
1746 
1747 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1748                                             TCGReg reg, const char *name)
1749 {
1750     TCGTemp *ts;
1751 
1752     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1753 
1754     ts = tcg_global_alloc(s);
1755     ts->base_type = type;
1756     ts->type = type;
1757     ts->kind = TEMP_FIXED;
1758     ts->reg = reg;
1759     ts->name = name;
1760     tcg_regset_set_reg(s->reserved_regs, reg);
1761 
1762     return ts;
1763 }
1764 
1765 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1766 {
1767     s->frame_start = start;
1768     s->frame_end = start + size;
1769     s->frame_temp
1770         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1771 }
1772 
1773 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1774                                             const char *name, TCGType type)
1775 {
1776     TCGContext *s = tcg_ctx;
1777     TCGTemp *base_ts = tcgv_ptr_temp(base);
1778     TCGTemp *ts = tcg_global_alloc(s);
1779     int indirect_reg = 0;
1780 
1781     switch (base_ts->kind) {
1782     case TEMP_FIXED:
1783         break;
1784     case TEMP_GLOBAL:
1785         /* We do not support double-indirect registers.  */
1786         tcg_debug_assert(!base_ts->indirect_reg);
1787         base_ts->indirect_base = 1;
1788         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1789                             ? 2 : 1);
1790         indirect_reg = 1;
1791         break;
1792     default:
1793         g_assert_not_reached();
1794     }
1795 
1796     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1797         TCGTemp *ts2 = tcg_global_alloc(s);
1798         char buf[64];
1799 
1800         ts->base_type = TCG_TYPE_I64;
1801         ts->type = TCG_TYPE_I32;
1802         ts->indirect_reg = indirect_reg;
1803         ts->mem_allocated = 1;
1804         ts->mem_base = base_ts;
1805         ts->mem_offset = offset;
1806         pstrcpy(buf, sizeof(buf), name);
1807         pstrcat(buf, sizeof(buf), "_0");
1808         ts->name = strdup(buf);
1809 
1810         tcg_debug_assert(ts2 == ts + 1);
1811         ts2->base_type = TCG_TYPE_I64;
1812         ts2->type = TCG_TYPE_I32;
1813         ts2->indirect_reg = indirect_reg;
1814         ts2->mem_allocated = 1;
1815         ts2->mem_base = base_ts;
1816         ts2->mem_offset = offset + 4;
1817         ts2->temp_subindex = 1;
1818         pstrcpy(buf, sizeof(buf), name);
1819         pstrcat(buf, sizeof(buf), "_1");
1820         ts2->name = strdup(buf);
1821     } else {
1822         ts->base_type = type;
1823         ts->type = type;
1824         ts->indirect_reg = indirect_reg;
1825         ts->mem_allocated = 1;
1826         ts->mem_base = base_ts;
1827         ts->mem_offset = offset;
1828         ts->name = name;
1829     }
1830     return ts;
1831 }
1832 
1833 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1834 {
1835     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1836     return temp_tcgv_i32(ts);
1837 }
1838 
1839 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1840 {
1841     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1842     return temp_tcgv_i64(ts);
1843 }
1844 
1845 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1846 {
1847     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1848     return temp_tcgv_ptr(ts);
1849 }
1850 
1851 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1852 {
1853     TCGContext *s = tcg_ctx;
1854     TCGTemp *ts;
1855     int n;
1856 
1857     if (kind == TEMP_EBB) {
1858         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1859 
1860         if (idx < TCG_MAX_TEMPS) {
1861             /* There is already an available temp with the right type.  */
1862             clear_bit(idx, s->free_temps[type].l);
1863 
1864             ts = &s->temps[idx];
1865             ts->temp_allocated = 1;
1866             tcg_debug_assert(ts->base_type == type);
1867             tcg_debug_assert(ts->kind == kind);
1868             return ts;
1869         }
1870     } else {
1871         tcg_debug_assert(kind == TEMP_TB);
1872     }
1873 
1874     switch (type) {
1875     case TCG_TYPE_I32:
1876     case TCG_TYPE_V64:
1877     case TCG_TYPE_V128:
1878     case TCG_TYPE_V256:
1879         n = 1;
1880         break;
1881     case TCG_TYPE_I64:
1882         n = 64 / TCG_TARGET_REG_BITS;
1883         break;
1884     case TCG_TYPE_I128:
1885         n = 128 / TCG_TARGET_REG_BITS;
1886         break;
1887     default:
1888         g_assert_not_reached();
1889     }
1890 
1891     ts = tcg_temp_alloc(s);
1892     ts->base_type = type;
1893     ts->temp_allocated = 1;
1894     ts->kind = kind;
1895 
1896     if (n == 1) {
1897         ts->type = type;
1898     } else {
1899         ts->type = TCG_TYPE_REG;
1900 
1901         for (int i = 1; i < n; ++i) {
1902             TCGTemp *ts2 = tcg_temp_alloc(s);
1903 
1904             tcg_debug_assert(ts2 == ts + i);
1905             ts2->base_type = type;
1906             ts2->type = TCG_TYPE_REG;
1907             ts2->temp_allocated = 1;
1908             ts2->temp_subindex = i;
1909             ts2->kind = kind;
1910         }
1911     }
1912     return ts;
1913 }
1914 
1915 TCGv_i32 tcg_temp_new_i32(void)
1916 {
1917     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1918 }
1919 
1920 TCGv_i32 tcg_temp_ebb_new_i32(void)
1921 {
1922     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1923 }
1924 
1925 TCGv_i64 tcg_temp_new_i64(void)
1926 {
1927     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1928 }
1929 
1930 TCGv_i64 tcg_temp_ebb_new_i64(void)
1931 {
1932     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1933 }
1934 
1935 TCGv_ptr tcg_temp_new_ptr(void)
1936 {
1937     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1938 }
1939 
1940 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1941 {
1942     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1943 }
1944 
1945 TCGv_i128 tcg_temp_new_i128(void)
1946 {
1947     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1948 }
1949 
1950 TCGv_i128 tcg_temp_ebb_new_i128(void)
1951 {
1952     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1953 }
1954 
1955 TCGv_vec tcg_temp_new_vec(TCGType type)
1956 {
1957     TCGTemp *t;
1958 
1959 #ifdef CONFIG_DEBUG_TCG
1960     switch (type) {
1961     case TCG_TYPE_V64:
1962         assert(TCG_TARGET_HAS_v64);
1963         break;
1964     case TCG_TYPE_V128:
1965         assert(TCG_TARGET_HAS_v128);
1966         break;
1967     case TCG_TYPE_V256:
1968         assert(TCG_TARGET_HAS_v256);
1969         break;
1970     default:
1971         g_assert_not_reached();
1972     }
1973 #endif
1974 
1975     t = tcg_temp_new_internal(type, TEMP_EBB);
1976     return temp_tcgv_vec(t);
1977 }
1978 
1979 /* Create a new temp of the same type as an existing temp.  */
1980 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1981 {
1982     TCGTemp *t = tcgv_vec_temp(match);
1983 
1984     tcg_debug_assert(t->temp_allocated != 0);
1985 
1986     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1987     return temp_tcgv_vec(t);
1988 }
1989 
1990 void tcg_temp_free_internal(TCGTemp *ts)
1991 {
1992     TCGContext *s = tcg_ctx;
1993 
1994     switch (ts->kind) {
1995     case TEMP_CONST:
1996     case TEMP_TB:
1997         /* Silently ignore free. */
1998         break;
1999     case TEMP_EBB:
2000         tcg_debug_assert(ts->temp_allocated != 0);
2001         ts->temp_allocated = 0;
2002         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2003         break;
2004     default:
2005         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2006         g_assert_not_reached();
2007     }
2008 }
2009 
2010 void tcg_temp_free_i32(TCGv_i32 arg)
2011 {
2012     tcg_temp_free_internal(tcgv_i32_temp(arg));
2013 }
2014 
2015 void tcg_temp_free_i64(TCGv_i64 arg)
2016 {
2017     tcg_temp_free_internal(tcgv_i64_temp(arg));
2018 }
2019 
2020 void tcg_temp_free_i128(TCGv_i128 arg)
2021 {
2022     tcg_temp_free_internal(tcgv_i128_temp(arg));
2023 }
2024 
2025 void tcg_temp_free_ptr(TCGv_ptr arg)
2026 {
2027     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2028 }
2029 
2030 void tcg_temp_free_vec(TCGv_vec arg)
2031 {
2032     tcg_temp_free_internal(tcgv_vec_temp(arg));
2033 }
2034 
2035 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2036 {
2037     TCGContext *s = tcg_ctx;
2038     GHashTable *h = s->const_table[type];
2039     TCGTemp *ts;
2040 
2041     if (h == NULL) {
2042         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2043         s->const_table[type] = h;
2044     }
2045 
2046     ts = g_hash_table_lookup(h, &val);
2047     if (ts == NULL) {
2048         int64_t *val_ptr;
2049 
2050         ts = tcg_temp_alloc(s);
2051 
2052         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2053             TCGTemp *ts2 = tcg_temp_alloc(s);
2054 
2055             tcg_debug_assert(ts2 == ts + 1);
2056 
2057             ts->base_type = TCG_TYPE_I64;
2058             ts->type = TCG_TYPE_I32;
2059             ts->kind = TEMP_CONST;
2060             ts->temp_allocated = 1;
2061 
2062             ts2->base_type = TCG_TYPE_I64;
2063             ts2->type = TCG_TYPE_I32;
2064             ts2->kind = TEMP_CONST;
2065             ts2->temp_allocated = 1;
2066             ts2->temp_subindex = 1;
2067 
2068             /*
2069              * Retain the full value of the 64-bit constant in the low
2070              * part, so that the hash table works.  Actual uses will
2071              * truncate the value to the low part.
2072              */
2073             ts[HOST_BIG_ENDIAN].val = val;
2074             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2075             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2076         } else {
2077             ts->base_type = type;
2078             ts->type = type;
2079             ts->kind = TEMP_CONST;
2080             ts->temp_allocated = 1;
2081             ts->val = val;
2082             val_ptr = &ts->val;
2083         }
2084         g_hash_table_insert(h, val_ptr, ts);
2085     }
2086 
2087     return ts;
2088 }
2089 
2090 TCGv_i32 tcg_constant_i32(int32_t val)
2091 {
2092     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2093 }
2094 
2095 TCGv_i64 tcg_constant_i64(int64_t val)
2096 {
2097     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2098 }
2099 
2100 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2101 {
2102     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2103 }
2104 
2105 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2106 {
2107     val = dup_const(vece, val);
2108     return temp_tcgv_vec(tcg_constant_internal(type, val));
2109 }
2110 
2111 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2112 {
2113     TCGTemp *t = tcgv_vec_temp(match);
2114 
2115     tcg_debug_assert(t->temp_allocated != 0);
2116     return tcg_constant_vec(t->base_type, vece, val);
2117 }
2118 
2119 #ifdef CONFIG_DEBUG_TCG
2120 size_t temp_idx(TCGTemp *ts)
2121 {
2122     ptrdiff_t n = ts - tcg_ctx->temps;
2123     assert(n >= 0 && n < tcg_ctx->nb_temps);
2124     return n;
2125 }
2126 
2127 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2128 {
2129     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2130 
2131     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2132     assert(o % sizeof(TCGTemp) == 0);
2133 
2134     return (void *)tcg_ctx + (uintptr_t)v;
2135 }
2136 #endif /* CONFIG_DEBUG_TCG */
2137 
2138 /*
2139  * Return true if OP may appear in the opcode stream with TYPE.
2140  * Test the runtime variable that controls each opcode.
2141  */
2142 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2143 {
2144     bool has_type;
2145 
2146     switch (type) {
2147     case TCG_TYPE_I32:
2148         has_type = true;
2149         break;
2150     case TCG_TYPE_I64:
2151         has_type = TCG_TARGET_REG_BITS == 64;
2152         break;
2153     case TCG_TYPE_V64:
2154         has_type = TCG_TARGET_HAS_v64;
2155         break;
2156     case TCG_TYPE_V128:
2157         has_type = TCG_TARGET_HAS_v128;
2158         break;
2159     case TCG_TYPE_V256:
2160         has_type = TCG_TARGET_HAS_v256;
2161         break;
2162     default:
2163         has_type = false;
2164         break;
2165     }
2166 
2167     switch (op) {
2168     case INDEX_op_discard:
2169     case INDEX_op_set_label:
2170     case INDEX_op_call:
2171     case INDEX_op_br:
2172     case INDEX_op_mb:
2173     case INDEX_op_insn_start:
2174     case INDEX_op_exit_tb:
2175     case INDEX_op_goto_tb:
2176     case INDEX_op_goto_ptr:
2177     case INDEX_op_qemu_ld_i32:
2178     case INDEX_op_qemu_st_i32:
2179     case INDEX_op_qemu_ld_i64:
2180     case INDEX_op_qemu_st_i64:
2181         return true;
2182 
2183     case INDEX_op_qemu_st8_i32:
2184         return TCG_TARGET_HAS_qemu_st8_i32;
2185 
2186     case INDEX_op_qemu_ld_i128:
2187     case INDEX_op_qemu_st_i128:
2188         return TCG_TARGET_HAS_qemu_ldst_i128;
2189 
2190     case INDEX_op_mov_i32:
2191     case INDEX_op_setcond_i32:
2192     case INDEX_op_brcond_i32:
2193     case INDEX_op_movcond_i32:
2194     case INDEX_op_ld8u_i32:
2195     case INDEX_op_ld8s_i32:
2196     case INDEX_op_ld16u_i32:
2197     case INDEX_op_ld16s_i32:
2198     case INDEX_op_ld_i32:
2199     case INDEX_op_st8_i32:
2200     case INDEX_op_st16_i32:
2201     case INDEX_op_st_i32:
2202     case INDEX_op_add_i32:
2203     case INDEX_op_sub_i32:
2204     case INDEX_op_neg_i32:
2205     case INDEX_op_mul_i32:
2206     case INDEX_op_and_i32:
2207     case INDEX_op_or_i32:
2208     case INDEX_op_xor_i32:
2209     case INDEX_op_shl_i32:
2210     case INDEX_op_shr_i32:
2211     case INDEX_op_sar_i32:
2212     case INDEX_op_extract_i32:
2213     case INDEX_op_sextract_i32:
2214     case INDEX_op_deposit_i32:
2215         return true;
2216 
2217     case INDEX_op_negsetcond_i32:
2218         return TCG_TARGET_HAS_negsetcond_i32;
2219     case INDEX_op_div_i32:
2220     case INDEX_op_divu_i32:
2221         return TCG_TARGET_HAS_div_i32;
2222     case INDEX_op_rem_i32:
2223     case INDEX_op_remu_i32:
2224         return TCG_TARGET_HAS_rem_i32;
2225     case INDEX_op_div2_i32:
2226     case INDEX_op_divu2_i32:
2227         return TCG_TARGET_HAS_div2_i32;
2228     case INDEX_op_rotl_i32:
2229     case INDEX_op_rotr_i32:
2230         return TCG_TARGET_HAS_rot_i32;
2231     case INDEX_op_extract2_i32:
2232         return TCG_TARGET_HAS_extract2_i32;
2233     case INDEX_op_add2_i32:
2234         return TCG_TARGET_HAS_add2_i32;
2235     case INDEX_op_sub2_i32:
2236         return TCG_TARGET_HAS_sub2_i32;
2237     case INDEX_op_mulu2_i32:
2238         return TCG_TARGET_HAS_mulu2_i32;
2239     case INDEX_op_muls2_i32:
2240         return TCG_TARGET_HAS_muls2_i32;
2241     case INDEX_op_muluh_i32:
2242         return TCG_TARGET_HAS_muluh_i32;
2243     case INDEX_op_mulsh_i32:
2244         return TCG_TARGET_HAS_mulsh_i32;
2245     case INDEX_op_bswap16_i32:
2246         return TCG_TARGET_HAS_bswap16_i32;
2247     case INDEX_op_bswap32_i32:
2248         return TCG_TARGET_HAS_bswap32_i32;
2249     case INDEX_op_not_i32:
2250         return TCG_TARGET_HAS_not_i32;
2251     case INDEX_op_andc_i32:
2252         return TCG_TARGET_HAS_andc_i32;
2253     case INDEX_op_orc_i32:
2254         return TCG_TARGET_HAS_orc_i32;
2255     case INDEX_op_eqv_i32:
2256         return TCG_TARGET_HAS_eqv_i32;
2257     case INDEX_op_nand_i32:
2258         return TCG_TARGET_HAS_nand_i32;
2259     case INDEX_op_nor_i32:
2260         return TCG_TARGET_HAS_nor_i32;
2261     case INDEX_op_clz_i32:
2262         return TCG_TARGET_HAS_clz_i32;
2263     case INDEX_op_ctz_i32:
2264         return TCG_TARGET_HAS_ctz_i32;
2265     case INDEX_op_ctpop_i32:
2266         return TCG_TARGET_HAS_ctpop_i32;
2267 
2268     case INDEX_op_brcond2_i32:
2269     case INDEX_op_setcond2_i32:
2270         return TCG_TARGET_REG_BITS == 32;
2271 
2272     case INDEX_op_mov_i64:
2273     case INDEX_op_setcond_i64:
2274     case INDEX_op_brcond_i64:
2275     case INDEX_op_movcond_i64:
2276     case INDEX_op_ld8u_i64:
2277     case INDEX_op_ld8s_i64:
2278     case INDEX_op_ld16u_i64:
2279     case INDEX_op_ld16s_i64:
2280     case INDEX_op_ld32u_i64:
2281     case INDEX_op_ld32s_i64:
2282     case INDEX_op_ld_i64:
2283     case INDEX_op_st8_i64:
2284     case INDEX_op_st16_i64:
2285     case INDEX_op_st32_i64:
2286     case INDEX_op_st_i64:
2287     case INDEX_op_add_i64:
2288     case INDEX_op_sub_i64:
2289     case INDEX_op_neg_i64:
2290     case INDEX_op_mul_i64:
2291     case INDEX_op_and_i64:
2292     case INDEX_op_or_i64:
2293     case INDEX_op_xor_i64:
2294     case INDEX_op_shl_i64:
2295     case INDEX_op_shr_i64:
2296     case INDEX_op_sar_i64:
2297     case INDEX_op_ext_i32_i64:
2298     case INDEX_op_extu_i32_i64:
2299     case INDEX_op_extract_i64:
2300     case INDEX_op_sextract_i64:
2301     case INDEX_op_deposit_i64:
2302         return TCG_TARGET_REG_BITS == 64;
2303 
2304     case INDEX_op_negsetcond_i64:
2305         return TCG_TARGET_HAS_negsetcond_i64;
2306     case INDEX_op_div_i64:
2307     case INDEX_op_divu_i64:
2308         return TCG_TARGET_HAS_div_i64;
2309     case INDEX_op_rem_i64:
2310     case INDEX_op_remu_i64:
2311         return TCG_TARGET_HAS_rem_i64;
2312     case INDEX_op_div2_i64:
2313     case INDEX_op_divu2_i64:
2314         return TCG_TARGET_HAS_div2_i64;
2315     case INDEX_op_rotl_i64:
2316     case INDEX_op_rotr_i64:
2317         return TCG_TARGET_HAS_rot_i64;
2318     case INDEX_op_extract2_i64:
2319         return TCG_TARGET_HAS_extract2_i64;
2320     case INDEX_op_extrl_i64_i32:
2321     case INDEX_op_extrh_i64_i32:
2322         return TCG_TARGET_HAS_extr_i64_i32;
2323     case INDEX_op_bswap16_i64:
2324         return TCG_TARGET_HAS_bswap16_i64;
2325     case INDEX_op_bswap32_i64:
2326         return TCG_TARGET_HAS_bswap32_i64;
2327     case INDEX_op_bswap64_i64:
2328         return TCG_TARGET_HAS_bswap64_i64;
2329     case INDEX_op_not_i64:
2330         return TCG_TARGET_HAS_not_i64;
2331     case INDEX_op_andc_i64:
2332         return TCG_TARGET_HAS_andc_i64;
2333     case INDEX_op_orc_i64:
2334         return TCG_TARGET_HAS_orc_i64;
2335     case INDEX_op_eqv_i64:
2336         return TCG_TARGET_HAS_eqv_i64;
2337     case INDEX_op_nand_i64:
2338         return TCG_TARGET_HAS_nand_i64;
2339     case INDEX_op_nor_i64:
2340         return TCG_TARGET_HAS_nor_i64;
2341     case INDEX_op_clz_i64:
2342         return TCG_TARGET_HAS_clz_i64;
2343     case INDEX_op_ctz_i64:
2344         return TCG_TARGET_HAS_ctz_i64;
2345     case INDEX_op_ctpop_i64:
2346         return TCG_TARGET_HAS_ctpop_i64;
2347     case INDEX_op_add2_i64:
2348         return TCG_TARGET_HAS_add2_i64;
2349     case INDEX_op_sub2_i64:
2350         return TCG_TARGET_HAS_sub2_i64;
2351     case INDEX_op_mulu2_i64:
2352         return TCG_TARGET_HAS_mulu2_i64;
2353     case INDEX_op_muls2_i64:
2354         return TCG_TARGET_HAS_muls2_i64;
2355     case INDEX_op_muluh_i64:
2356         return TCG_TARGET_HAS_muluh_i64;
2357     case INDEX_op_mulsh_i64:
2358         return TCG_TARGET_HAS_mulsh_i64;
2359 
2360     case INDEX_op_mov_vec:
2361     case INDEX_op_dup_vec:
2362     case INDEX_op_dupm_vec:
2363     case INDEX_op_ld_vec:
2364     case INDEX_op_st_vec:
2365     case INDEX_op_add_vec:
2366     case INDEX_op_sub_vec:
2367     case INDEX_op_and_vec:
2368     case INDEX_op_or_vec:
2369     case INDEX_op_xor_vec:
2370     case INDEX_op_cmp_vec:
2371         return has_type;
2372     case INDEX_op_dup2_vec:
2373         return has_type && TCG_TARGET_REG_BITS == 32;
2374     case INDEX_op_not_vec:
2375         return has_type && TCG_TARGET_HAS_not_vec;
2376     case INDEX_op_neg_vec:
2377         return has_type && TCG_TARGET_HAS_neg_vec;
2378     case INDEX_op_abs_vec:
2379         return has_type && TCG_TARGET_HAS_abs_vec;
2380     case INDEX_op_andc_vec:
2381         return has_type && TCG_TARGET_HAS_andc_vec;
2382     case INDEX_op_orc_vec:
2383         return has_type && TCG_TARGET_HAS_orc_vec;
2384     case INDEX_op_nand_vec:
2385         return has_type && TCG_TARGET_HAS_nand_vec;
2386     case INDEX_op_nor_vec:
2387         return has_type && TCG_TARGET_HAS_nor_vec;
2388     case INDEX_op_eqv_vec:
2389         return has_type && TCG_TARGET_HAS_eqv_vec;
2390     case INDEX_op_mul_vec:
2391         return has_type && TCG_TARGET_HAS_mul_vec;
2392     case INDEX_op_shli_vec:
2393     case INDEX_op_shri_vec:
2394     case INDEX_op_sari_vec:
2395         return has_type && TCG_TARGET_HAS_shi_vec;
2396     case INDEX_op_shls_vec:
2397     case INDEX_op_shrs_vec:
2398     case INDEX_op_sars_vec:
2399         return has_type && TCG_TARGET_HAS_shs_vec;
2400     case INDEX_op_shlv_vec:
2401     case INDEX_op_shrv_vec:
2402     case INDEX_op_sarv_vec:
2403         return has_type && TCG_TARGET_HAS_shv_vec;
2404     case INDEX_op_rotli_vec:
2405         return has_type && TCG_TARGET_HAS_roti_vec;
2406     case INDEX_op_rotls_vec:
2407         return has_type && TCG_TARGET_HAS_rots_vec;
2408     case INDEX_op_rotlv_vec:
2409     case INDEX_op_rotrv_vec:
2410         return has_type && TCG_TARGET_HAS_rotv_vec;
2411     case INDEX_op_ssadd_vec:
2412     case INDEX_op_usadd_vec:
2413     case INDEX_op_sssub_vec:
2414     case INDEX_op_ussub_vec:
2415         return has_type && TCG_TARGET_HAS_sat_vec;
2416     case INDEX_op_smin_vec:
2417     case INDEX_op_umin_vec:
2418     case INDEX_op_smax_vec:
2419     case INDEX_op_umax_vec:
2420         return has_type && TCG_TARGET_HAS_minmax_vec;
2421     case INDEX_op_bitsel_vec:
2422         return has_type && TCG_TARGET_HAS_bitsel_vec;
2423     case INDEX_op_cmpsel_vec:
2424         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2425 
2426     default:
2427         if (op < INDEX_op_last_generic) {
2428             const TCGOutOp *outop;
2429             TCGConstraintSetIndex con_set;
2430 
2431             if (!has_type) {
2432                 return false;
2433             }
2434 
2435             outop = all_outop[op];
2436             tcg_debug_assert(outop != NULL);
2437 
2438             con_set = outop->static_constraint;
2439             if (con_set == C_Dynamic) {
2440                 con_set = outop->dynamic_constraint(type, flags);
2441             }
2442             if (con_set >= 0) {
2443                 return true;
2444             }
2445             tcg_debug_assert(con_set == C_NotImplemented);
2446             return false;
2447         }
2448         tcg_debug_assert(op < NB_OPS);
2449         return true;
2450 
2451     case INDEX_op_last_generic:
2452         g_assert_not_reached();
2453     }
2454 }
2455 
2456 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2457 {
2458     unsigned width;
2459 
2460     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2461     width = (type == TCG_TYPE_I32 ? 32 : 64);
2462 
2463     tcg_debug_assert(ofs < width);
2464     tcg_debug_assert(len > 0);
2465     tcg_debug_assert(len <= width - ofs);
2466 
2467     return TCG_TARGET_deposit_valid(type, ofs, len);
2468 }
2469 
2470 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2471 
2472 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2473                           TCGTemp *ret, TCGTemp **args)
2474 {
2475     TCGv_i64 extend_free[MAX_CALL_IARGS];
2476     int n_extend = 0;
2477     TCGOp *op;
2478     int i, n, pi = 0, total_args;
2479 
2480     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2481         init_call_layout(info);
2482         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2483     }
2484 
2485     total_args = info->nr_out + info->nr_in + 2;
2486     op = tcg_op_alloc(INDEX_op_call, total_args);
2487 
2488 #ifdef CONFIG_PLUGIN
2489     /* Flag helpers that may affect guest state */
2490     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2491         tcg_ctx->plugin_insn->calls_helpers = true;
2492     }
2493 #endif
2494 
2495     TCGOP_CALLO(op) = n = info->nr_out;
2496     switch (n) {
2497     case 0:
2498         tcg_debug_assert(ret == NULL);
2499         break;
2500     case 1:
2501         tcg_debug_assert(ret != NULL);
2502         op->args[pi++] = temp_arg(ret);
2503         break;
2504     case 2:
2505     case 4:
2506         tcg_debug_assert(ret != NULL);
2507         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2508         tcg_debug_assert(ret->temp_subindex == 0);
2509         for (i = 0; i < n; ++i) {
2510             op->args[pi++] = temp_arg(ret + i);
2511         }
2512         break;
2513     default:
2514         g_assert_not_reached();
2515     }
2516 
2517     TCGOP_CALLI(op) = n = info->nr_in;
2518     for (i = 0; i < n; i++) {
2519         const TCGCallArgumentLoc *loc = &info->in[i];
2520         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2521 
2522         switch (loc->kind) {
2523         case TCG_CALL_ARG_NORMAL:
2524         case TCG_CALL_ARG_BY_REF:
2525         case TCG_CALL_ARG_BY_REF_N:
2526             op->args[pi++] = temp_arg(ts);
2527             break;
2528 
2529         case TCG_CALL_ARG_EXTEND_U:
2530         case TCG_CALL_ARG_EXTEND_S:
2531             {
2532                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2533                 TCGv_i32 orig = temp_tcgv_i32(ts);
2534 
2535                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2536                     tcg_gen_ext_i32_i64(temp, orig);
2537                 } else {
2538                     tcg_gen_extu_i32_i64(temp, orig);
2539                 }
2540                 op->args[pi++] = tcgv_i64_arg(temp);
2541                 extend_free[n_extend++] = temp;
2542             }
2543             break;
2544 
2545         default:
2546             g_assert_not_reached();
2547         }
2548     }
2549     op->args[pi++] = (uintptr_t)func;
2550     op->args[pi++] = (uintptr_t)info;
2551     tcg_debug_assert(pi == total_args);
2552 
2553     if (tcg_ctx->emit_before_op) {
2554         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2555     } else {
2556         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2557     }
2558 
2559     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2560     for (i = 0; i < n_extend; ++i) {
2561         tcg_temp_free_i64(extend_free[i]);
2562     }
2563 }
2564 
2565 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2566 {
2567     tcg_gen_callN(func, info, ret, NULL);
2568 }
2569 
2570 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2571 {
2572     tcg_gen_callN(func, info, ret, &t1);
2573 }
2574 
2575 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2576                    TCGTemp *t1, TCGTemp *t2)
2577 {
2578     TCGTemp *args[2] = { t1, t2 };
2579     tcg_gen_callN(func, info, ret, args);
2580 }
2581 
2582 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2583                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2584 {
2585     TCGTemp *args[3] = { t1, t2, t3 };
2586     tcg_gen_callN(func, info, ret, args);
2587 }
2588 
2589 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2590                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2591 {
2592     TCGTemp *args[4] = { t1, t2, t3, t4 };
2593     tcg_gen_callN(func, info, ret, args);
2594 }
2595 
2596 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2597                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2598 {
2599     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2600     tcg_gen_callN(func, info, ret, args);
2601 }
2602 
2603 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2604                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2605                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2606 {
2607     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2608     tcg_gen_callN(func, info, ret, args);
2609 }
2610 
2611 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2612                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2613                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2614 {
2615     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2616     tcg_gen_callN(func, info, ret, args);
2617 }
2618 
2619 static void tcg_reg_alloc_start(TCGContext *s)
2620 {
2621     int i, n;
2622 
2623     for (i = 0, n = s->nb_temps; i < n; i++) {
2624         TCGTemp *ts = &s->temps[i];
2625         TCGTempVal val = TEMP_VAL_MEM;
2626 
2627         switch (ts->kind) {
2628         case TEMP_CONST:
2629             val = TEMP_VAL_CONST;
2630             break;
2631         case TEMP_FIXED:
2632             val = TEMP_VAL_REG;
2633             break;
2634         case TEMP_GLOBAL:
2635             break;
2636         case TEMP_EBB:
2637             val = TEMP_VAL_DEAD;
2638             /* fall through */
2639         case TEMP_TB:
2640             ts->mem_allocated = 0;
2641             break;
2642         default:
2643             g_assert_not_reached();
2644         }
2645         ts->val_type = val;
2646     }
2647 
2648     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2649 }
2650 
2651 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2652                                  TCGTemp *ts)
2653 {
2654     int idx = temp_idx(ts);
2655 
2656     switch (ts->kind) {
2657     case TEMP_FIXED:
2658     case TEMP_GLOBAL:
2659         pstrcpy(buf, buf_size, ts->name);
2660         break;
2661     case TEMP_TB:
2662         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2663         break;
2664     case TEMP_EBB:
2665         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2666         break;
2667     case TEMP_CONST:
2668         switch (ts->type) {
2669         case TCG_TYPE_I32:
2670             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2671             break;
2672 #if TCG_TARGET_REG_BITS > 32
2673         case TCG_TYPE_I64:
2674             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2675             break;
2676 #endif
2677         case TCG_TYPE_V64:
2678         case TCG_TYPE_V128:
2679         case TCG_TYPE_V256:
2680             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2681                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2682             break;
2683         default:
2684             g_assert_not_reached();
2685         }
2686         break;
2687     }
2688     return buf;
2689 }
2690 
2691 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2692                              int buf_size, TCGArg arg)
2693 {
2694     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2695 }
2696 
2697 static const char * const cond_name[] =
2698 {
2699     [TCG_COND_NEVER] = "never",
2700     [TCG_COND_ALWAYS] = "always",
2701     [TCG_COND_EQ] = "eq",
2702     [TCG_COND_NE] = "ne",
2703     [TCG_COND_LT] = "lt",
2704     [TCG_COND_GE] = "ge",
2705     [TCG_COND_LE] = "le",
2706     [TCG_COND_GT] = "gt",
2707     [TCG_COND_LTU] = "ltu",
2708     [TCG_COND_GEU] = "geu",
2709     [TCG_COND_LEU] = "leu",
2710     [TCG_COND_GTU] = "gtu",
2711     [TCG_COND_TSTEQ] = "tsteq",
2712     [TCG_COND_TSTNE] = "tstne",
2713 };
2714 
2715 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2716 {
2717     [MO_UB]   = "ub",
2718     [MO_SB]   = "sb",
2719     [MO_LEUW] = "leuw",
2720     [MO_LESW] = "lesw",
2721     [MO_LEUL] = "leul",
2722     [MO_LESL] = "lesl",
2723     [MO_LEUQ] = "leq",
2724     [MO_BEUW] = "beuw",
2725     [MO_BESW] = "besw",
2726     [MO_BEUL] = "beul",
2727     [MO_BESL] = "besl",
2728     [MO_BEUQ] = "beq",
2729     [MO_128 + MO_BE] = "beo",
2730     [MO_128 + MO_LE] = "leo",
2731 };
2732 
2733 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2734     [MO_UNALN >> MO_ASHIFT]    = "un+",
2735     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2736     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2737     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2738     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2739     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2740     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2741     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2742 };
2743 
2744 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2745     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2746     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2747     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2748     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2749     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2750     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2751 };
2752 
2753 static const char bswap_flag_name[][6] = {
2754     [TCG_BSWAP_IZ] = "iz",
2755     [TCG_BSWAP_OZ] = "oz",
2756     [TCG_BSWAP_OS] = "os",
2757     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2758     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2759 };
2760 
2761 #ifdef CONFIG_PLUGIN
2762 static const char * const plugin_from_name[] = {
2763     "from-tb",
2764     "from-insn",
2765     "after-insn",
2766     "after-tb",
2767 };
2768 #endif
2769 
2770 static inline bool tcg_regset_single(TCGRegSet d)
2771 {
2772     return (d & (d - 1)) == 0;
2773 }
2774 
2775 static inline TCGReg tcg_regset_first(TCGRegSet d)
2776 {
2777     if (TCG_TARGET_NB_REGS <= 32) {
2778         return ctz32(d);
2779     } else {
2780         return ctz64(d);
2781     }
2782 }
2783 
2784 /* Return only the number of characters output -- no error return. */
2785 #define ne_fprintf(...) \
2786     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2787 
2788 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2789 {
2790     char buf[128];
2791     TCGOp *op;
2792 
2793     QTAILQ_FOREACH(op, &s->ops, link) {
2794         int i, k, nb_oargs, nb_iargs, nb_cargs;
2795         const TCGOpDef *def;
2796         TCGOpcode c;
2797         int col = 0;
2798 
2799         c = op->opc;
2800         def = &tcg_op_defs[c];
2801 
2802         if (c == INDEX_op_insn_start) {
2803             nb_oargs = 0;
2804             col += ne_fprintf(f, "\n ----");
2805 
2806             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2807                 col += ne_fprintf(f, " %016" PRIx64,
2808                                   tcg_get_insn_start_param(op, i));
2809             }
2810         } else if (c == INDEX_op_call) {
2811             const TCGHelperInfo *info = tcg_call_info(op);
2812             void *func = tcg_call_func(op);
2813 
2814             /* variable number of arguments */
2815             nb_oargs = TCGOP_CALLO(op);
2816             nb_iargs = TCGOP_CALLI(op);
2817             nb_cargs = def->nb_cargs;
2818 
2819             col += ne_fprintf(f, " %s ", def->name);
2820 
2821             /*
2822              * Print the function name from TCGHelperInfo, if available.
2823              * Note that plugins have a template function for the info,
2824              * but the actual function pointer comes from the plugin.
2825              */
2826             if (func == info->func) {
2827                 col += ne_fprintf(f, "%s", info->name);
2828             } else {
2829                 col += ne_fprintf(f, "plugin(%p)", func);
2830             }
2831 
2832             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2833             for (i = 0; i < nb_oargs; i++) {
2834                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2835                                                             op->args[i]));
2836             }
2837             for (i = 0; i < nb_iargs; i++) {
2838                 TCGArg arg = op->args[nb_oargs + i];
2839                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2840                 col += ne_fprintf(f, ",%s", t);
2841             }
2842         } else {
2843             col += ne_fprintf(f, " %s ", def->name);
2844 
2845             nb_oargs = def->nb_oargs;
2846             nb_iargs = def->nb_iargs;
2847             nb_cargs = def->nb_cargs;
2848 
2849             if (def->flags & TCG_OPF_VECTOR) {
2850                 col += ne_fprintf(f, "v%d,e%d,",
2851                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2852                                   8 << TCGOP_VECE(op));
2853             }
2854 
2855             k = 0;
2856             for (i = 0; i < nb_oargs; i++) {
2857                 const char *sep =  k ? "," : "";
2858                 col += ne_fprintf(f, "%s%s", sep,
2859                                   tcg_get_arg_str(s, buf, sizeof(buf),
2860                                                   op->args[k++]));
2861             }
2862             for (i = 0; i < nb_iargs; i++) {
2863                 const char *sep =  k ? "," : "";
2864                 col += ne_fprintf(f, "%s%s", sep,
2865                                   tcg_get_arg_str(s, buf, sizeof(buf),
2866                                                   op->args[k++]));
2867             }
2868             switch (c) {
2869             case INDEX_op_brcond_i32:
2870             case INDEX_op_setcond_i32:
2871             case INDEX_op_negsetcond_i32:
2872             case INDEX_op_movcond_i32:
2873             case INDEX_op_brcond2_i32:
2874             case INDEX_op_setcond2_i32:
2875             case INDEX_op_brcond_i64:
2876             case INDEX_op_setcond_i64:
2877             case INDEX_op_negsetcond_i64:
2878             case INDEX_op_movcond_i64:
2879             case INDEX_op_cmp_vec:
2880             case INDEX_op_cmpsel_vec:
2881                 if (op->args[k] < ARRAY_SIZE(cond_name)
2882                     && cond_name[op->args[k]]) {
2883                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2884                 } else {
2885                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2886                 }
2887                 i = 1;
2888                 break;
2889             case INDEX_op_qemu_ld_i32:
2890             case INDEX_op_qemu_st_i32:
2891             case INDEX_op_qemu_st8_i32:
2892             case INDEX_op_qemu_ld_i64:
2893             case INDEX_op_qemu_st_i64:
2894             case INDEX_op_qemu_ld_i128:
2895             case INDEX_op_qemu_st_i128:
2896                 {
2897                     const char *s_al, *s_op, *s_at;
2898                     MemOpIdx oi = op->args[k++];
2899                     MemOp mop = get_memop(oi);
2900                     unsigned ix = get_mmuidx(oi);
2901 
2902                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2903                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2904                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2905                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2906 
2907                     /* If all fields are accounted for, print symbolically. */
2908                     if (!mop && s_al && s_op && s_at) {
2909                         col += ne_fprintf(f, ",%s%s%s,%u",
2910                                           s_at, s_al, s_op, ix);
2911                     } else {
2912                         mop = get_memop(oi);
2913                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2914                     }
2915                     i = 1;
2916                 }
2917                 break;
2918             case INDEX_op_bswap16_i32:
2919             case INDEX_op_bswap16_i64:
2920             case INDEX_op_bswap32_i32:
2921             case INDEX_op_bswap32_i64:
2922             case INDEX_op_bswap64_i64:
2923                 {
2924                     TCGArg flags = op->args[k];
2925                     const char *name = NULL;
2926 
2927                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2928                         name = bswap_flag_name[flags];
2929                     }
2930                     if (name) {
2931                         col += ne_fprintf(f, ",%s", name);
2932                     } else {
2933                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2934                     }
2935                     i = k = 1;
2936                 }
2937                 break;
2938 #ifdef CONFIG_PLUGIN
2939             case INDEX_op_plugin_cb:
2940                 {
2941                     TCGArg from = op->args[k++];
2942                     const char *name = NULL;
2943 
2944                     if (from < ARRAY_SIZE(plugin_from_name)) {
2945                         name = plugin_from_name[from];
2946                     }
2947                     if (name) {
2948                         col += ne_fprintf(f, "%s", name);
2949                     } else {
2950                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2951                     }
2952                     i = 1;
2953                 }
2954                 break;
2955 #endif
2956             default:
2957                 i = 0;
2958                 break;
2959             }
2960             switch (c) {
2961             case INDEX_op_set_label:
2962             case INDEX_op_br:
2963             case INDEX_op_brcond_i32:
2964             case INDEX_op_brcond_i64:
2965             case INDEX_op_brcond2_i32:
2966                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2967                                   arg_label(op->args[k])->id);
2968                 i++, k++;
2969                 break;
2970             case INDEX_op_mb:
2971                 {
2972                     TCGBar membar = op->args[k];
2973                     const char *b_op, *m_op;
2974 
2975                     switch (membar & TCG_BAR_SC) {
2976                     case 0:
2977                         b_op = "none";
2978                         break;
2979                     case TCG_BAR_LDAQ:
2980                         b_op = "acq";
2981                         break;
2982                     case TCG_BAR_STRL:
2983                         b_op = "rel";
2984                         break;
2985                     case TCG_BAR_SC:
2986                         b_op = "seq";
2987                         break;
2988                     default:
2989                         g_assert_not_reached();
2990                     }
2991 
2992                     switch (membar & TCG_MO_ALL) {
2993                     case 0:
2994                         m_op = "none";
2995                         break;
2996                     case TCG_MO_LD_LD:
2997                         m_op = "rr";
2998                         break;
2999                     case TCG_MO_LD_ST:
3000                         m_op = "rw";
3001                         break;
3002                     case TCG_MO_ST_LD:
3003                         m_op = "wr";
3004                         break;
3005                     case TCG_MO_ST_ST:
3006                         m_op = "ww";
3007                         break;
3008                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3009                         m_op = "rr+rw";
3010                         break;
3011                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3012                         m_op = "rr+wr";
3013                         break;
3014                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3015                         m_op = "rr+ww";
3016                         break;
3017                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3018                         m_op = "rw+wr";
3019                         break;
3020                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3021                         m_op = "rw+ww";
3022                         break;
3023                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3024                         m_op = "wr+ww";
3025                         break;
3026                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3027                         m_op = "rr+rw+wr";
3028                         break;
3029                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3030                         m_op = "rr+rw+ww";
3031                         break;
3032                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3033                         m_op = "rr+wr+ww";
3034                         break;
3035                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3036                         m_op = "rw+wr+ww";
3037                         break;
3038                     case TCG_MO_ALL:
3039                         m_op = "all";
3040                         break;
3041                     default:
3042                         g_assert_not_reached();
3043                     }
3044 
3045                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3046                     i++, k++;
3047                 }
3048                 break;
3049             default:
3050                 break;
3051             }
3052             for (; i < nb_cargs; i++, k++) {
3053                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3054                                   op->args[k]);
3055             }
3056         }
3057 
3058         if (have_prefs || op->life) {
3059             for (; col < 40; ++col) {
3060                 putc(' ', f);
3061             }
3062         }
3063 
3064         if (op->life) {
3065             unsigned life = op->life;
3066 
3067             if (life & (SYNC_ARG * 3)) {
3068                 ne_fprintf(f, "  sync:");
3069                 for (i = 0; i < 2; ++i) {
3070                     if (life & (SYNC_ARG << i)) {
3071                         ne_fprintf(f, " %d", i);
3072                     }
3073                 }
3074             }
3075             life /= DEAD_ARG;
3076             if (life) {
3077                 ne_fprintf(f, "  dead:");
3078                 for (i = 0; life; ++i, life >>= 1) {
3079                     if (life & 1) {
3080                         ne_fprintf(f, " %d", i);
3081                     }
3082                 }
3083             }
3084         }
3085 
3086         if (have_prefs) {
3087             for (i = 0; i < nb_oargs; ++i) {
3088                 TCGRegSet set = output_pref(op, i);
3089 
3090                 if (i == 0) {
3091                     ne_fprintf(f, "  pref=");
3092                 } else {
3093                     ne_fprintf(f, ",");
3094                 }
3095                 if (set == 0) {
3096                     ne_fprintf(f, "none");
3097                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3098                     ne_fprintf(f, "all");
3099 #ifdef CONFIG_DEBUG_TCG
3100                 } else if (tcg_regset_single(set)) {
3101                     TCGReg reg = tcg_regset_first(set);
3102                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3103 #endif
3104                 } else if (TCG_TARGET_NB_REGS <= 32) {
3105                     ne_fprintf(f, "0x%x", (uint32_t)set);
3106                 } else {
3107                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3108                 }
3109             }
3110         }
3111 
3112         putc('\n', f);
3113     }
3114 }
3115 
3116 /* we give more priority to constraints with less registers */
3117 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3118 {
3119     int n;
3120 
3121     arg_ct += k;
3122     n = ctpop64(arg_ct->regs);
3123 
3124     /*
3125      * Sort constraints of a single register first, which includes output
3126      * aliases (which must exactly match the input already allocated).
3127      */
3128     if (n == 1 || arg_ct->oalias) {
3129         return INT_MAX;
3130     }
3131 
3132     /*
3133      * Sort register pairs next, first then second immediately after.
3134      * Arbitrarily sort multiple pairs by the index of the first reg;
3135      * there shouldn't be many pairs.
3136      */
3137     switch (arg_ct->pair) {
3138     case 1:
3139     case 3:
3140         return (k + 1) * 2;
3141     case 2:
3142         return (arg_ct->pair_index + 1) * 2 - 1;
3143     }
3144 
3145     /* Finally, sort by decreasing register count. */
3146     assert(n > 1);
3147     return -n;
3148 }
3149 
3150 /* sort from highest priority to lowest */
3151 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3152 {
3153     int i, j;
3154 
3155     for (i = 0; i < n; i++) {
3156         a[start + i].sort_index = start + i;
3157     }
3158     if (n <= 1) {
3159         return;
3160     }
3161     for (i = 0; i < n - 1; i++) {
3162         for (j = i + 1; j < n; j++) {
3163             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3164             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3165             if (p1 < p2) {
3166                 int tmp = a[start + i].sort_index;
3167                 a[start + i].sort_index = a[start + j].sort_index;
3168                 a[start + j].sort_index = tmp;
3169             }
3170         }
3171     }
3172 }
3173 
3174 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3175 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3176 
3177 static void process_constraint_sets(void)
3178 {
3179     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3180         const TCGConstraintSet *tdefs = &constraint_sets[c];
3181         TCGArgConstraint *args_ct = all_cts[c];
3182         int nb_oargs = tdefs->nb_oargs;
3183         int nb_iargs = tdefs->nb_iargs;
3184         int nb_args = nb_oargs + nb_iargs;
3185         bool saw_alias_pair = false;
3186 
3187         for (int i = 0; i < nb_args; i++) {
3188             const char *ct_str = tdefs->args_ct_str[i];
3189             bool input_p = i >= nb_oargs;
3190             int o;
3191 
3192             switch (*ct_str) {
3193             case '0' ... '9':
3194                 o = *ct_str - '0';
3195                 tcg_debug_assert(input_p);
3196                 tcg_debug_assert(o < nb_oargs);
3197                 tcg_debug_assert(args_ct[o].regs != 0);
3198                 tcg_debug_assert(!args_ct[o].oalias);
3199                 args_ct[i] = args_ct[o];
3200                 /* The output sets oalias.  */
3201                 args_ct[o].oalias = 1;
3202                 args_ct[o].alias_index = i;
3203                 /* The input sets ialias. */
3204                 args_ct[i].ialias = 1;
3205                 args_ct[i].alias_index = o;
3206                 if (args_ct[i].pair) {
3207                     saw_alias_pair = true;
3208                 }
3209                 tcg_debug_assert(ct_str[1] == '\0');
3210                 continue;
3211 
3212             case '&':
3213                 tcg_debug_assert(!input_p);
3214                 args_ct[i].newreg = true;
3215                 ct_str++;
3216                 break;
3217 
3218             case 'p': /* plus */
3219                 /* Allocate to the register after the previous. */
3220                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3221                 o = i - 1;
3222                 tcg_debug_assert(!args_ct[o].pair);
3223                 tcg_debug_assert(!args_ct[o].ct);
3224                 args_ct[i] = (TCGArgConstraint){
3225                     .pair = 2,
3226                     .pair_index = o,
3227                     .regs = args_ct[o].regs << 1,
3228                     .newreg = args_ct[o].newreg,
3229                 };
3230                 args_ct[o].pair = 1;
3231                 args_ct[o].pair_index = i;
3232                 tcg_debug_assert(ct_str[1] == '\0');
3233                 continue;
3234 
3235             case 'm': /* minus */
3236                 /* Allocate to the register before the previous. */
3237                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3238                 o = i - 1;
3239                 tcg_debug_assert(!args_ct[o].pair);
3240                 tcg_debug_assert(!args_ct[o].ct);
3241                 args_ct[i] = (TCGArgConstraint){
3242                     .pair = 1,
3243                     .pair_index = o,
3244                     .regs = args_ct[o].regs >> 1,
3245                     .newreg = args_ct[o].newreg,
3246                 };
3247                 args_ct[o].pair = 2;
3248                 args_ct[o].pair_index = i;
3249                 tcg_debug_assert(ct_str[1] == '\0');
3250                 continue;
3251             }
3252 
3253             do {
3254                 switch (*ct_str) {
3255                 case 'i':
3256                     args_ct[i].ct |= TCG_CT_CONST;
3257                     break;
3258 #ifdef TCG_REG_ZERO
3259                 case 'z':
3260                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3261                     break;
3262 #endif
3263 
3264                 /* Include all of the target-specific constraints. */
3265 
3266 #undef CONST
3267 #define CONST(CASE, MASK) \
3268     case CASE: args_ct[i].ct |= MASK; break;
3269 #define REGS(CASE, MASK) \
3270     case CASE: args_ct[i].regs |= MASK; break;
3271 
3272 #include "tcg-target-con-str.h"
3273 
3274 #undef REGS
3275 #undef CONST
3276                 default:
3277                 case '0' ... '9':
3278                 case '&':
3279                 case 'p':
3280                 case 'm':
3281                     /* Typo in TCGConstraintSet constraint. */
3282                     g_assert_not_reached();
3283                 }
3284             } while (*++ct_str != '\0');
3285         }
3286 
3287         /*
3288          * Fix up output pairs that are aliased with inputs.
3289          * When we created the alias, we copied pair from the output.
3290          * There are three cases:
3291          *    (1a) Pairs of inputs alias pairs of outputs.
3292          *    (1b) One input aliases the first of a pair of outputs.
3293          *    (2)  One input aliases the second of a pair of outputs.
3294          *
3295          * Case 1a is handled by making sure that the pair_index'es are
3296          * properly updated so that they appear the same as a pair of inputs.
3297          *
3298          * Case 1b is handled by setting the pair_index of the input to
3299          * itself, simply so it doesn't point to an unrelated argument.
3300          * Since we don't encounter the "second" during the input allocation
3301          * phase, nothing happens with the second half of the input pair.
3302          *
3303          * Case 2 is handled by setting the second input to pair=3, the
3304          * first output to pair=3, and the pair_index'es to match.
3305          */
3306         if (saw_alias_pair) {
3307             for (int i = nb_oargs; i < nb_args; i++) {
3308                 int o, o2, i2;
3309 
3310                 /*
3311                  * Since [0-9pm] must be alone in the constraint string,
3312                  * the only way they can both be set is if the pair comes
3313                  * from the output alias.
3314                  */
3315                 if (!args_ct[i].ialias) {
3316                     continue;
3317                 }
3318                 switch (args_ct[i].pair) {
3319                 case 0:
3320                     break;
3321                 case 1:
3322                     o = args_ct[i].alias_index;
3323                     o2 = args_ct[o].pair_index;
3324                     tcg_debug_assert(args_ct[o].pair == 1);
3325                     tcg_debug_assert(args_ct[o2].pair == 2);
3326                     if (args_ct[o2].oalias) {
3327                         /* Case 1a */
3328                         i2 = args_ct[o2].alias_index;
3329                         tcg_debug_assert(args_ct[i2].pair == 2);
3330                         args_ct[i2].pair_index = i;
3331                         args_ct[i].pair_index = i2;
3332                     } else {
3333                         /* Case 1b */
3334                         args_ct[i].pair_index = i;
3335                     }
3336                     break;
3337                 case 2:
3338                     o = args_ct[i].alias_index;
3339                     o2 = args_ct[o].pair_index;
3340                     tcg_debug_assert(args_ct[o].pair == 2);
3341                     tcg_debug_assert(args_ct[o2].pair == 1);
3342                     if (args_ct[o2].oalias) {
3343                         /* Case 1a */
3344                         i2 = args_ct[o2].alias_index;
3345                         tcg_debug_assert(args_ct[i2].pair == 1);
3346                         args_ct[i2].pair_index = i;
3347                         args_ct[i].pair_index = i2;
3348                     } else {
3349                         /* Case 2 */
3350                         args_ct[i].pair = 3;
3351                         args_ct[o2].pair = 3;
3352                         args_ct[i].pair_index = o2;
3353                         args_ct[o2].pair_index = i;
3354                     }
3355                     break;
3356                 default:
3357                     g_assert_not_reached();
3358                 }
3359             }
3360         }
3361 
3362         /* sort the constraints (XXX: this is just an heuristic) */
3363         sort_constraints(args_ct, 0, nb_oargs);
3364         sort_constraints(args_ct, nb_oargs, nb_iargs);
3365     }
3366 }
3367 
3368 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3369 {
3370     TCGOpcode opc = op->opc;
3371     TCGType type = TCGOP_TYPE(op);
3372     unsigned flags = TCGOP_FLAGS(op);
3373     const TCGOpDef *def = &tcg_op_defs[opc];
3374     const TCGOutOp *outop = all_outop[opc];
3375     TCGConstraintSetIndex con_set;
3376 
3377     if (def->flags & TCG_OPF_NOT_PRESENT) {
3378         return empty_cts;
3379     }
3380 
3381     if (outop) {
3382         con_set = outop->static_constraint;
3383         if (con_set == C_Dynamic) {
3384             con_set = outop->dynamic_constraint(type, flags);
3385         }
3386     } else {
3387         con_set = tcg_target_op_def(opc, type, flags);
3388     }
3389     tcg_debug_assert(con_set >= 0);
3390     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3391 
3392     /* The constraint arguments must match TCGOpcode arguments. */
3393     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3394     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3395 
3396     return all_cts[con_set];
3397 }
3398 
3399 static void remove_label_use(TCGOp *op, int idx)
3400 {
3401     TCGLabel *label = arg_label(op->args[idx]);
3402     TCGLabelUse *use;
3403 
3404     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3405         if (use->op == op) {
3406             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3407             return;
3408         }
3409     }
3410     g_assert_not_reached();
3411 }
3412 
3413 void tcg_op_remove(TCGContext *s, TCGOp *op)
3414 {
3415     switch (op->opc) {
3416     case INDEX_op_br:
3417         remove_label_use(op, 0);
3418         break;
3419     case INDEX_op_brcond_i32:
3420     case INDEX_op_brcond_i64:
3421         remove_label_use(op, 3);
3422         break;
3423     case INDEX_op_brcond2_i32:
3424         remove_label_use(op, 5);
3425         break;
3426     default:
3427         break;
3428     }
3429 
3430     QTAILQ_REMOVE(&s->ops, op, link);
3431     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3432     s->nb_ops--;
3433 }
3434 
3435 void tcg_remove_ops_after(TCGOp *op)
3436 {
3437     TCGContext *s = tcg_ctx;
3438 
3439     while (true) {
3440         TCGOp *last = tcg_last_op();
3441         if (last == op) {
3442             return;
3443         }
3444         tcg_op_remove(s, last);
3445     }
3446 }
3447 
3448 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3449 {
3450     TCGContext *s = tcg_ctx;
3451     TCGOp *op = NULL;
3452 
3453     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3454         QTAILQ_FOREACH(op, &s->free_ops, link) {
3455             if (nargs <= op->nargs) {
3456                 QTAILQ_REMOVE(&s->free_ops, op, link);
3457                 nargs = op->nargs;
3458                 goto found;
3459             }
3460         }
3461     }
3462 
3463     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3464     nargs = MAX(4, nargs);
3465     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3466 
3467  found:
3468     memset(op, 0, offsetof(TCGOp, link));
3469     op->opc = opc;
3470     op->nargs = nargs;
3471 
3472     /* Check for bitfield overflow. */
3473     tcg_debug_assert(op->nargs == nargs);
3474 
3475     s->nb_ops++;
3476     return op;
3477 }
3478 
3479 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3480 {
3481     TCGOp *op = tcg_op_alloc(opc, nargs);
3482 
3483     if (tcg_ctx->emit_before_op) {
3484         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3485     } else {
3486         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3487     }
3488     return op;
3489 }
3490 
3491 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3492                             TCGOpcode opc, TCGType type, unsigned nargs)
3493 {
3494     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3495 
3496     TCGOP_TYPE(new_op) = type;
3497     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3498     return new_op;
3499 }
3500 
3501 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3502                            TCGOpcode opc, TCGType type, unsigned nargs)
3503 {
3504     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3505 
3506     TCGOP_TYPE(new_op) = type;
3507     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3508     return new_op;
3509 }
3510 
3511 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3512 {
3513     TCGLabelUse *u;
3514 
3515     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3516         TCGOp *op = u->op;
3517         switch (op->opc) {
3518         case INDEX_op_br:
3519             op->args[0] = label_arg(to);
3520             break;
3521         case INDEX_op_brcond_i32:
3522         case INDEX_op_brcond_i64:
3523             op->args[3] = label_arg(to);
3524             break;
3525         case INDEX_op_brcond2_i32:
3526             op->args[5] = label_arg(to);
3527             break;
3528         default:
3529             g_assert_not_reached();
3530         }
3531     }
3532 
3533     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3534 }
3535 
3536 /* Reachable analysis : remove unreachable code.  */
3537 static void __attribute__((noinline))
3538 reachable_code_pass(TCGContext *s)
3539 {
3540     TCGOp *op, *op_next, *op_prev;
3541     bool dead = false;
3542 
3543     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3544         bool remove = dead;
3545         TCGLabel *label;
3546 
3547         switch (op->opc) {
3548         case INDEX_op_set_label:
3549             label = arg_label(op->args[0]);
3550 
3551             /*
3552              * Note that the first op in the TB is always a load,
3553              * so there is always something before a label.
3554              */
3555             op_prev = QTAILQ_PREV(op, link);
3556 
3557             /*
3558              * If we find two sequential labels, move all branches to
3559              * reference the second label and remove the first label.
3560              * Do this before branch to next optimization, so that the
3561              * middle label is out of the way.
3562              */
3563             if (op_prev->opc == INDEX_op_set_label) {
3564                 move_label_uses(label, arg_label(op_prev->args[0]));
3565                 tcg_op_remove(s, op_prev);
3566                 op_prev = QTAILQ_PREV(op, link);
3567             }
3568 
3569             /*
3570              * Optimization can fold conditional branches to unconditional.
3571              * If we find a label which is preceded by an unconditional
3572              * branch to next, remove the branch.  We couldn't do this when
3573              * processing the branch because any dead code between the branch
3574              * and label had not yet been removed.
3575              */
3576             if (op_prev->opc == INDEX_op_br &&
3577                 label == arg_label(op_prev->args[0])) {
3578                 tcg_op_remove(s, op_prev);
3579                 /* Fall through means insns become live again.  */
3580                 dead = false;
3581             }
3582 
3583             if (QSIMPLEQ_EMPTY(&label->branches)) {
3584                 /*
3585                  * While there is an occasional backward branch, virtually
3586                  * all branches generated by the translators are forward.
3587                  * Which means that generally we will have already removed
3588                  * all references to the label that will be, and there is
3589                  * little to be gained by iterating.
3590                  */
3591                 remove = true;
3592             } else {
3593                 /* Once we see a label, insns become live again.  */
3594                 dead = false;
3595                 remove = false;
3596             }
3597             break;
3598 
3599         case INDEX_op_br:
3600         case INDEX_op_exit_tb:
3601         case INDEX_op_goto_ptr:
3602             /* Unconditional branches; everything following is dead.  */
3603             dead = true;
3604             break;
3605 
3606         case INDEX_op_call:
3607             /* Notice noreturn helper calls, raising exceptions.  */
3608             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3609                 dead = true;
3610             }
3611             break;
3612 
3613         case INDEX_op_insn_start:
3614             /* Never remove -- we need to keep these for unwind.  */
3615             remove = false;
3616             break;
3617 
3618         default:
3619             break;
3620         }
3621 
3622         if (remove) {
3623             tcg_op_remove(s, op);
3624         }
3625     }
3626 }
3627 
3628 #define TS_DEAD  1
3629 #define TS_MEM   2
3630 
3631 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3632 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3633 
3634 /* For liveness_pass_1, the register preferences for a given temp.  */
3635 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3636 {
3637     return ts->state_ptr;
3638 }
3639 
3640 /* For liveness_pass_1, reset the preferences for a given temp to the
3641  * maximal regset for its type.
3642  */
3643 static inline void la_reset_pref(TCGTemp *ts)
3644 {
3645     *la_temp_pref(ts)
3646         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3647 }
3648 
3649 /* liveness analysis: end of function: all temps are dead, and globals
3650    should be in memory. */
3651 static void la_func_end(TCGContext *s, int ng, int nt)
3652 {
3653     int i;
3654 
3655     for (i = 0; i < ng; ++i) {
3656         s->temps[i].state = TS_DEAD | TS_MEM;
3657         la_reset_pref(&s->temps[i]);
3658     }
3659     for (i = ng; i < nt; ++i) {
3660         s->temps[i].state = TS_DEAD;
3661         la_reset_pref(&s->temps[i]);
3662     }
3663 }
3664 
3665 /* liveness analysis: end of basic block: all temps are dead, globals
3666    and local temps should be in memory. */
3667 static void la_bb_end(TCGContext *s, int ng, int nt)
3668 {
3669     int i;
3670 
3671     for (i = 0; i < nt; ++i) {
3672         TCGTemp *ts = &s->temps[i];
3673         int state;
3674 
3675         switch (ts->kind) {
3676         case TEMP_FIXED:
3677         case TEMP_GLOBAL:
3678         case TEMP_TB:
3679             state = TS_DEAD | TS_MEM;
3680             break;
3681         case TEMP_EBB:
3682         case TEMP_CONST:
3683             state = TS_DEAD;
3684             break;
3685         default:
3686             g_assert_not_reached();
3687         }
3688         ts->state = state;
3689         la_reset_pref(ts);
3690     }
3691 }
3692 
3693 /* liveness analysis: sync globals back to memory.  */
3694 static void la_global_sync(TCGContext *s, int ng)
3695 {
3696     int i;
3697 
3698     for (i = 0; i < ng; ++i) {
3699         int state = s->temps[i].state;
3700         s->temps[i].state = state | TS_MEM;
3701         if (state == TS_DEAD) {
3702             /* If the global was previously dead, reset prefs.  */
3703             la_reset_pref(&s->temps[i]);
3704         }
3705     }
3706 }
3707 
3708 /*
3709  * liveness analysis: conditional branch: all temps are dead unless
3710  * explicitly live-across-conditional-branch, globals and local temps
3711  * should be synced.
3712  */
3713 static void la_bb_sync(TCGContext *s, int ng, int nt)
3714 {
3715     la_global_sync(s, ng);
3716 
3717     for (int i = ng; i < nt; ++i) {
3718         TCGTemp *ts = &s->temps[i];
3719         int state;
3720 
3721         switch (ts->kind) {
3722         case TEMP_TB:
3723             state = ts->state;
3724             ts->state = state | TS_MEM;
3725             if (state != TS_DEAD) {
3726                 continue;
3727             }
3728             break;
3729         case TEMP_EBB:
3730         case TEMP_CONST:
3731             continue;
3732         default:
3733             g_assert_not_reached();
3734         }
3735         la_reset_pref(&s->temps[i]);
3736     }
3737 }
3738 
3739 /* liveness analysis: sync globals back to memory and kill.  */
3740 static void la_global_kill(TCGContext *s, int ng)
3741 {
3742     int i;
3743 
3744     for (i = 0; i < ng; i++) {
3745         s->temps[i].state = TS_DEAD | TS_MEM;
3746         la_reset_pref(&s->temps[i]);
3747     }
3748 }
3749 
3750 /* liveness analysis: note live globals crossing calls.  */
3751 static void la_cross_call(TCGContext *s, int nt)
3752 {
3753     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3754     int i;
3755 
3756     for (i = 0; i < nt; i++) {
3757         TCGTemp *ts = &s->temps[i];
3758         if (!(ts->state & TS_DEAD)) {
3759             TCGRegSet *pset = la_temp_pref(ts);
3760             TCGRegSet set = *pset;
3761 
3762             set &= mask;
3763             /* If the combination is not possible, restart.  */
3764             if (set == 0) {
3765                 set = tcg_target_available_regs[ts->type] & mask;
3766             }
3767             *pset = set;
3768         }
3769     }
3770 }
3771 
3772 /*
3773  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3774  * to TEMP_EBB, if possible.
3775  */
3776 static void __attribute__((noinline))
3777 liveness_pass_0(TCGContext *s)
3778 {
3779     void * const multiple_ebb = (void *)(uintptr_t)-1;
3780     int nb_temps = s->nb_temps;
3781     TCGOp *op, *ebb;
3782 
3783     for (int i = s->nb_globals; i < nb_temps; ++i) {
3784         s->temps[i].state_ptr = NULL;
3785     }
3786 
3787     /*
3788      * Represent each EBB by the op at which it begins.  In the case of
3789      * the first EBB, this is the first op, otherwise it is a label.
3790      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3791      * within a single EBB, else MULTIPLE_EBB.
3792      */
3793     ebb = QTAILQ_FIRST(&s->ops);
3794     QTAILQ_FOREACH(op, &s->ops, link) {
3795         const TCGOpDef *def;
3796         int nb_oargs, nb_iargs;
3797 
3798         switch (op->opc) {
3799         case INDEX_op_set_label:
3800             ebb = op;
3801             continue;
3802         case INDEX_op_discard:
3803             continue;
3804         case INDEX_op_call:
3805             nb_oargs = TCGOP_CALLO(op);
3806             nb_iargs = TCGOP_CALLI(op);
3807             break;
3808         default:
3809             def = &tcg_op_defs[op->opc];
3810             nb_oargs = def->nb_oargs;
3811             nb_iargs = def->nb_iargs;
3812             break;
3813         }
3814 
3815         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3816             TCGTemp *ts = arg_temp(op->args[i]);
3817 
3818             if (ts->kind != TEMP_TB) {
3819                 continue;
3820             }
3821             if (ts->state_ptr == NULL) {
3822                 ts->state_ptr = ebb;
3823             } else if (ts->state_ptr != ebb) {
3824                 ts->state_ptr = multiple_ebb;
3825             }
3826         }
3827     }
3828 
3829     /*
3830      * For TEMP_TB that turned out not to be used beyond one EBB,
3831      * reduce the liveness to TEMP_EBB.
3832      */
3833     for (int i = s->nb_globals; i < nb_temps; ++i) {
3834         TCGTemp *ts = &s->temps[i];
3835         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3836             ts->kind = TEMP_EBB;
3837         }
3838     }
3839 }
3840 
3841 /* Liveness analysis : update the opc_arg_life array to tell if a
3842    given input arguments is dead. Instructions updating dead
3843    temporaries are removed. */
3844 static void __attribute__((noinline))
3845 liveness_pass_1(TCGContext *s)
3846 {
3847     int nb_globals = s->nb_globals;
3848     int nb_temps = s->nb_temps;
3849     TCGOp *op, *op_prev;
3850     TCGRegSet *prefs;
3851     int i;
3852 
3853     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3854     for (i = 0; i < nb_temps; ++i) {
3855         s->temps[i].state_ptr = prefs + i;
3856     }
3857 
3858     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3859     la_func_end(s, nb_globals, nb_temps);
3860 
3861     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3862         int nb_iargs, nb_oargs;
3863         TCGOpcode opc_new, opc_new2;
3864         bool have_opc_new2;
3865         TCGLifeData arg_life = 0;
3866         TCGTemp *ts;
3867         TCGOpcode opc = op->opc;
3868         const TCGOpDef *def = &tcg_op_defs[opc];
3869         const TCGArgConstraint *args_ct;
3870 
3871         switch (opc) {
3872         case INDEX_op_call:
3873             {
3874                 const TCGHelperInfo *info = tcg_call_info(op);
3875                 int call_flags = tcg_call_flags(op);
3876 
3877                 nb_oargs = TCGOP_CALLO(op);
3878                 nb_iargs = TCGOP_CALLI(op);
3879 
3880                 /* pure functions can be removed if their result is unused */
3881                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3882                     for (i = 0; i < nb_oargs; i++) {
3883                         ts = arg_temp(op->args[i]);
3884                         if (ts->state != TS_DEAD) {
3885                             goto do_not_remove_call;
3886                         }
3887                     }
3888                     goto do_remove;
3889                 }
3890             do_not_remove_call:
3891 
3892                 /* Output args are dead.  */
3893                 for (i = 0; i < nb_oargs; i++) {
3894                     ts = arg_temp(op->args[i]);
3895                     if (ts->state & TS_DEAD) {
3896                         arg_life |= DEAD_ARG << i;
3897                     }
3898                     if (ts->state & TS_MEM) {
3899                         arg_life |= SYNC_ARG << i;
3900                     }
3901                     ts->state = TS_DEAD;
3902                     la_reset_pref(ts);
3903                 }
3904 
3905                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3906                 memset(op->output_pref, 0, sizeof(op->output_pref));
3907 
3908                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3909                                     TCG_CALL_NO_READ_GLOBALS))) {
3910                     la_global_kill(s, nb_globals);
3911                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3912                     la_global_sync(s, nb_globals);
3913                 }
3914 
3915                 /* Record arguments that die in this helper.  */
3916                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3917                     ts = arg_temp(op->args[i]);
3918                     if (ts->state & TS_DEAD) {
3919                         arg_life |= DEAD_ARG << i;
3920                     }
3921                 }
3922 
3923                 /* For all live registers, remove call-clobbered prefs.  */
3924                 la_cross_call(s, nb_temps);
3925 
3926                 /*
3927                  * Input arguments are live for preceding opcodes.
3928                  *
3929                  * For those arguments that die, and will be allocated in
3930                  * registers, clear the register set for that arg, to be
3931                  * filled in below.  For args that will be on the stack,
3932                  * reset to any available reg.  Process arguments in reverse
3933                  * order so that if a temp is used more than once, the stack
3934                  * reset to max happens before the register reset to 0.
3935                  */
3936                 for (i = nb_iargs - 1; i >= 0; i--) {
3937                     const TCGCallArgumentLoc *loc = &info->in[i];
3938                     ts = arg_temp(op->args[nb_oargs + i]);
3939 
3940                     if (ts->state & TS_DEAD) {
3941                         switch (loc->kind) {
3942                         case TCG_CALL_ARG_NORMAL:
3943                         case TCG_CALL_ARG_EXTEND_U:
3944                         case TCG_CALL_ARG_EXTEND_S:
3945                             if (arg_slot_reg_p(loc->arg_slot)) {
3946                                 *la_temp_pref(ts) = 0;
3947                                 break;
3948                             }
3949                             /* fall through */
3950                         default:
3951                             *la_temp_pref(ts) =
3952                                 tcg_target_available_regs[ts->type];
3953                             break;
3954                         }
3955                         ts->state &= ~TS_DEAD;
3956                     }
3957                 }
3958 
3959                 /*
3960                  * For each input argument, add its input register to prefs.
3961                  * If a temp is used once, this produces a single set bit;
3962                  * if a temp is used multiple times, this produces a set.
3963                  */
3964                 for (i = 0; i < nb_iargs; i++) {
3965                     const TCGCallArgumentLoc *loc = &info->in[i];
3966                     ts = arg_temp(op->args[nb_oargs + i]);
3967 
3968                     switch (loc->kind) {
3969                     case TCG_CALL_ARG_NORMAL:
3970                     case TCG_CALL_ARG_EXTEND_U:
3971                     case TCG_CALL_ARG_EXTEND_S:
3972                         if (arg_slot_reg_p(loc->arg_slot)) {
3973                             tcg_regset_set_reg(*la_temp_pref(ts),
3974                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3975                         }
3976                         break;
3977                     default:
3978                         break;
3979                     }
3980                 }
3981             }
3982             break;
3983         case INDEX_op_insn_start:
3984             break;
3985         case INDEX_op_discard:
3986             /* mark the temporary as dead */
3987             ts = arg_temp(op->args[0]);
3988             ts->state = TS_DEAD;
3989             la_reset_pref(ts);
3990             break;
3991 
3992         case INDEX_op_add2_i32:
3993             opc_new = INDEX_op_add_i32;
3994             goto do_addsub2;
3995         case INDEX_op_sub2_i32:
3996             opc_new = INDEX_op_sub_i32;
3997             goto do_addsub2;
3998         case INDEX_op_add2_i64:
3999             opc_new = INDEX_op_add_i64;
4000             goto do_addsub2;
4001         case INDEX_op_sub2_i64:
4002             opc_new = INDEX_op_sub_i64;
4003         do_addsub2:
4004             nb_iargs = 4;
4005             nb_oargs = 2;
4006             /* Test if the high part of the operation is dead, but not
4007                the low part.  The result can be optimized to a simple
4008                add or sub.  This happens often for x86_64 guest when the
4009                cpu mode is set to 32 bit.  */
4010             if (arg_temp(op->args[1])->state == TS_DEAD) {
4011                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4012                     goto do_remove;
4013                 }
4014                 /* Replace the opcode and adjust the args in place,
4015                    leaving 3 unused args at the end.  */
4016                 op->opc = opc = opc_new;
4017                 op->args[1] = op->args[2];
4018                 op->args[2] = op->args[4];
4019                 /* Fall through and mark the single-word operation live.  */
4020                 nb_iargs = 2;
4021                 nb_oargs = 1;
4022             }
4023             goto do_not_remove;
4024 
4025         case INDEX_op_mulu2_i32:
4026             opc_new = INDEX_op_mul_i32;
4027             opc_new2 = INDEX_op_muluh_i32;
4028             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
4029             goto do_mul2;
4030         case INDEX_op_muls2_i32:
4031             opc_new = INDEX_op_mul_i32;
4032             opc_new2 = INDEX_op_mulsh_i32;
4033             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
4034             goto do_mul2;
4035         case INDEX_op_mulu2_i64:
4036             opc_new = INDEX_op_mul_i64;
4037             opc_new2 = INDEX_op_muluh_i64;
4038             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
4039             goto do_mul2;
4040         case INDEX_op_muls2_i64:
4041             opc_new = INDEX_op_mul_i64;
4042             opc_new2 = INDEX_op_mulsh_i64;
4043             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4044             goto do_mul2;
4045         do_mul2:
4046             nb_iargs = 2;
4047             nb_oargs = 2;
4048             if (arg_temp(op->args[1])->state == TS_DEAD) {
4049                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4050                     /* Both parts of the operation are dead.  */
4051                     goto do_remove;
4052                 }
4053                 /* The high part of the operation is dead; generate the low. */
4054                 op->opc = opc = opc_new;
4055                 op->args[1] = op->args[2];
4056                 op->args[2] = op->args[3];
4057             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4058                 /* The low part of the operation is dead; generate the high. */
4059                 op->opc = opc = opc_new2;
4060                 op->args[0] = op->args[1];
4061                 op->args[1] = op->args[2];
4062                 op->args[2] = op->args[3];
4063             } else {
4064                 goto do_not_remove;
4065             }
4066             /* Mark the single-word operation live.  */
4067             nb_oargs = 1;
4068             goto do_not_remove;
4069 
4070         default:
4071             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4072             nb_iargs = def->nb_iargs;
4073             nb_oargs = def->nb_oargs;
4074 
4075             /* Test if the operation can be removed because all
4076                its outputs are dead. We assume that nb_oargs == 0
4077                implies side effects */
4078             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4079                 for (i = 0; i < nb_oargs; i++) {
4080                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4081                         goto do_not_remove;
4082                     }
4083                 }
4084                 goto do_remove;
4085             }
4086             goto do_not_remove;
4087 
4088         do_remove:
4089             tcg_op_remove(s, op);
4090             break;
4091 
4092         do_not_remove:
4093             for (i = 0; i < nb_oargs; i++) {
4094                 ts = arg_temp(op->args[i]);
4095 
4096                 /* Remember the preference of the uses that followed.  */
4097                 if (i < ARRAY_SIZE(op->output_pref)) {
4098                     op->output_pref[i] = *la_temp_pref(ts);
4099                 }
4100 
4101                 /* Output args are dead.  */
4102                 if (ts->state & TS_DEAD) {
4103                     arg_life |= DEAD_ARG << i;
4104                 }
4105                 if (ts->state & TS_MEM) {
4106                     arg_life |= SYNC_ARG << i;
4107                 }
4108                 ts->state = TS_DEAD;
4109                 la_reset_pref(ts);
4110             }
4111 
4112             /* If end of basic block, update.  */
4113             if (def->flags & TCG_OPF_BB_EXIT) {
4114                 la_func_end(s, nb_globals, nb_temps);
4115             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4116                 la_bb_sync(s, nb_globals, nb_temps);
4117             } else if (def->flags & TCG_OPF_BB_END) {
4118                 la_bb_end(s, nb_globals, nb_temps);
4119             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4120                 la_global_sync(s, nb_globals);
4121                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4122                     la_cross_call(s, nb_temps);
4123                 }
4124             }
4125 
4126             /* Record arguments that die in this opcode.  */
4127             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4128                 ts = arg_temp(op->args[i]);
4129                 if (ts->state & TS_DEAD) {
4130                     arg_life |= DEAD_ARG << i;
4131                 }
4132             }
4133 
4134             /* Input arguments are live for preceding opcodes.  */
4135             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4136                 ts = arg_temp(op->args[i]);
4137                 if (ts->state & TS_DEAD) {
4138                     /* For operands that were dead, initially allow
4139                        all regs for the type.  */
4140                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4141                     ts->state &= ~TS_DEAD;
4142                 }
4143             }
4144 
4145             /* Incorporate constraints for this operand.  */
4146             switch (opc) {
4147             case INDEX_op_mov_i32:
4148             case INDEX_op_mov_i64:
4149                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4150                    have proper constraints.  That said, special case
4151                    moves to propagate preferences backward.  */
4152                 if (IS_DEAD_ARG(1)) {
4153                     *la_temp_pref(arg_temp(op->args[0]))
4154                         = *la_temp_pref(arg_temp(op->args[1]));
4155                 }
4156                 break;
4157 
4158             default:
4159                 args_ct = opcode_args_ct(op);
4160                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4161                     const TCGArgConstraint *ct = &args_ct[i];
4162                     TCGRegSet set, *pset;
4163 
4164                     ts = arg_temp(op->args[i]);
4165                     pset = la_temp_pref(ts);
4166                     set = *pset;
4167 
4168                     set &= ct->regs;
4169                     if (ct->ialias) {
4170                         set &= output_pref(op, ct->alias_index);
4171                     }
4172                     /* If the combination is not possible, restart.  */
4173                     if (set == 0) {
4174                         set = ct->regs;
4175                     }
4176                     *pset = set;
4177                 }
4178                 break;
4179             }
4180             break;
4181         }
4182         op->life = arg_life;
4183     }
4184 }
4185 
4186 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4187 static bool __attribute__((noinline))
4188 liveness_pass_2(TCGContext *s)
4189 {
4190     int nb_globals = s->nb_globals;
4191     int nb_temps, i;
4192     bool changes = false;
4193     TCGOp *op, *op_next;
4194 
4195     /* Create a temporary for each indirect global.  */
4196     for (i = 0; i < nb_globals; ++i) {
4197         TCGTemp *its = &s->temps[i];
4198         if (its->indirect_reg) {
4199             TCGTemp *dts = tcg_temp_alloc(s);
4200             dts->type = its->type;
4201             dts->base_type = its->base_type;
4202             dts->temp_subindex = its->temp_subindex;
4203             dts->kind = TEMP_EBB;
4204             its->state_ptr = dts;
4205         } else {
4206             its->state_ptr = NULL;
4207         }
4208         /* All globals begin dead.  */
4209         its->state = TS_DEAD;
4210     }
4211     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4212         TCGTemp *its = &s->temps[i];
4213         its->state_ptr = NULL;
4214         its->state = TS_DEAD;
4215     }
4216 
4217     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4218         TCGOpcode opc = op->opc;
4219         const TCGOpDef *def = &tcg_op_defs[opc];
4220         TCGLifeData arg_life = op->life;
4221         int nb_iargs, nb_oargs, call_flags;
4222         TCGTemp *arg_ts, *dir_ts;
4223 
4224         if (opc == INDEX_op_call) {
4225             nb_oargs = TCGOP_CALLO(op);
4226             nb_iargs = TCGOP_CALLI(op);
4227             call_flags = tcg_call_flags(op);
4228         } else {
4229             nb_iargs = def->nb_iargs;
4230             nb_oargs = def->nb_oargs;
4231 
4232             /* Set flags similar to how calls require.  */
4233             if (def->flags & TCG_OPF_COND_BRANCH) {
4234                 /* Like reading globals: sync_globals */
4235                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4236             } else if (def->flags & TCG_OPF_BB_END) {
4237                 /* Like writing globals: save_globals */
4238                 call_flags = 0;
4239             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4240                 /* Like reading globals: sync_globals */
4241                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4242             } else {
4243                 /* No effect on globals.  */
4244                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4245                               TCG_CALL_NO_WRITE_GLOBALS);
4246             }
4247         }
4248 
4249         /* Make sure that input arguments are available.  */
4250         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4251             arg_ts = arg_temp(op->args[i]);
4252             dir_ts = arg_ts->state_ptr;
4253             if (dir_ts && arg_ts->state == TS_DEAD) {
4254                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4255                                   ? INDEX_op_ld_i32
4256                                   : INDEX_op_ld_i64);
4257                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4258                                                   arg_ts->type, 3);
4259 
4260                 lop->args[0] = temp_arg(dir_ts);
4261                 lop->args[1] = temp_arg(arg_ts->mem_base);
4262                 lop->args[2] = arg_ts->mem_offset;
4263 
4264                 /* Loaded, but synced with memory.  */
4265                 arg_ts->state = TS_MEM;
4266             }
4267         }
4268 
4269         /* Perform input replacement, and mark inputs that became dead.
4270            No action is required except keeping temp_state up to date
4271            so that we reload when needed.  */
4272         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4273             arg_ts = arg_temp(op->args[i]);
4274             dir_ts = arg_ts->state_ptr;
4275             if (dir_ts) {
4276                 op->args[i] = temp_arg(dir_ts);
4277                 changes = true;
4278                 if (IS_DEAD_ARG(i)) {
4279                     arg_ts->state = TS_DEAD;
4280                 }
4281             }
4282         }
4283 
4284         /* Liveness analysis should ensure that the following are
4285            all correct, for call sites and basic block end points.  */
4286         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4287             /* Nothing to do */
4288         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4289             for (i = 0; i < nb_globals; ++i) {
4290                 /* Liveness should see that globals are synced back,
4291                    that is, either TS_DEAD or TS_MEM.  */
4292                 arg_ts = &s->temps[i];
4293                 tcg_debug_assert(arg_ts->state_ptr == 0
4294                                  || arg_ts->state != 0);
4295             }
4296         } else {
4297             for (i = 0; i < nb_globals; ++i) {
4298                 /* Liveness should see that globals are saved back,
4299                    that is, TS_DEAD, waiting to be reloaded.  */
4300                 arg_ts = &s->temps[i];
4301                 tcg_debug_assert(arg_ts->state_ptr == 0
4302                                  || arg_ts->state == TS_DEAD);
4303             }
4304         }
4305 
4306         /* Outputs become available.  */
4307         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4308             arg_ts = arg_temp(op->args[0]);
4309             dir_ts = arg_ts->state_ptr;
4310             if (dir_ts) {
4311                 op->args[0] = temp_arg(dir_ts);
4312                 changes = true;
4313 
4314                 /* The output is now live and modified.  */
4315                 arg_ts->state = 0;
4316 
4317                 if (NEED_SYNC_ARG(0)) {
4318                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4319                                       ? INDEX_op_st_i32
4320                                       : INDEX_op_st_i64);
4321                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4322                                                      arg_ts->type, 3);
4323                     TCGTemp *out_ts = dir_ts;
4324 
4325                     if (IS_DEAD_ARG(0)) {
4326                         out_ts = arg_temp(op->args[1]);
4327                         arg_ts->state = TS_DEAD;
4328                         tcg_op_remove(s, op);
4329                     } else {
4330                         arg_ts->state = TS_MEM;
4331                     }
4332 
4333                     sop->args[0] = temp_arg(out_ts);
4334                     sop->args[1] = temp_arg(arg_ts->mem_base);
4335                     sop->args[2] = arg_ts->mem_offset;
4336                 } else {
4337                     tcg_debug_assert(!IS_DEAD_ARG(0));
4338                 }
4339             }
4340         } else {
4341             for (i = 0; i < nb_oargs; i++) {
4342                 arg_ts = arg_temp(op->args[i]);
4343                 dir_ts = arg_ts->state_ptr;
4344                 if (!dir_ts) {
4345                     continue;
4346                 }
4347                 op->args[i] = temp_arg(dir_ts);
4348                 changes = true;
4349 
4350                 /* The output is now live and modified.  */
4351                 arg_ts->state = 0;
4352 
4353                 /* Sync outputs upon their last write.  */
4354                 if (NEED_SYNC_ARG(i)) {
4355                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4356                                       ? INDEX_op_st_i32
4357                                       : INDEX_op_st_i64);
4358                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4359                                                      arg_ts->type, 3);
4360 
4361                     sop->args[0] = temp_arg(dir_ts);
4362                     sop->args[1] = temp_arg(arg_ts->mem_base);
4363                     sop->args[2] = arg_ts->mem_offset;
4364 
4365                     arg_ts->state = TS_MEM;
4366                 }
4367                 /* Drop outputs that are dead.  */
4368                 if (IS_DEAD_ARG(i)) {
4369                     arg_ts->state = TS_DEAD;
4370                 }
4371             }
4372         }
4373     }
4374 
4375     return changes;
4376 }
4377 
4378 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4379 {
4380     intptr_t off;
4381     int size, align;
4382 
4383     /* When allocating an object, look at the full type. */
4384     size = tcg_type_size(ts->base_type);
4385     switch (ts->base_type) {
4386     case TCG_TYPE_I32:
4387         align = 4;
4388         break;
4389     case TCG_TYPE_I64:
4390     case TCG_TYPE_V64:
4391         align = 8;
4392         break;
4393     case TCG_TYPE_I128:
4394     case TCG_TYPE_V128:
4395     case TCG_TYPE_V256:
4396         /*
4397          * Note that we do not require aligned storage for V256,
4398          * and that we provide alignment for I128 to match V128,
4399          * even if that's above what the host ABI requires.
4400          */
4401         align = 16;
4402         break;
4403     default:
4404         g_assert_not_reached();
4405     }
4406 
4407     /*
4408      * Assume the stack is sufficiently aligned.
4409      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4410      * and do not require 16 byte vector alignment.  This seems slightly
4411      * easier than fully parameterizing the above switch statement.
4412      */
4413     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4414     off = ROUND_UP(s->current_frame_offset, align);
4415 
4416     /* If we've exhausted the stack frame, restart with a smaller TB. */
4417     if (off + size > s->frame_end) {
4418         tcg_raise_tb_overflow(s);
4419     }
4420     s->current_frame_offset = off + size;
4421 #if defined(__sparc__)
4422     off += TCG_TARGET_STACK_BIAS;
4423 #endif
4424 
4425     /* If the object was subdivided, assign memory to all the parts. */
4426     if (ts->base_type != ts->type) {
4427         int part_size = tcg_type_size(ts->type);
4428         int part_count = size / part_size;
4429 
4430         /*
4431          * Each part is allocated sequentially in tcg_temp_new_internal.
4432          * Jump back to the first part by subtracting the current index.
4433          */
4434         ts -= ts->temp_subindex;
4435         for (int i = 0; i < part_count; ++i) {
4436             ts[i].mem_offset = off + i * part_size;
4437             ts[i].mem_base = s->frame_temp;
4438             ts[i].mem_allocated = 1;
4439         }
4440     } else {
4441         ts->mem_offset = off;
4442         ts->mem_base = s->frame_temp;
4443         ts->mem_allocated = 1;
4444     }
4445 }
4446 
4447 /* Assign @reg to @ts, and update reg_to_temp[]. */
4448 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4449 {
4450     if (ts->val_type == TEMP_VAL_REG) {
4451         TCGReg old = ts->reg;
4452         tcg_debug_assert(s->reg_to_temp[old] == ts);
4453         if (old == reg) {
4454             return;
4455         }
4456         s->reg_to_temp[old] = NULL;
4457     }
4458     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4459     s->reg_to_temp[reg] = ts;
4460     ts->val_type = TEMP_VAL_REG;
4461     ts->reg = reg;
4462 }
4463 
4464 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4465 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4466 {
4467     tcg_debug_assert(type != TEMP_VAL_REG);
4468     if (ts->val_type == TEMP_VAL_REG) {
4469         TCGReg reg = ts->reg;
4470         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4471         s->reg_to_temp[reg] = NULL;
4472     }
4473     ts->val_type = type;
4474 }
4475 
4476 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4477 
4478 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4479    mark it free; otherwise mark it dead.  */
4480 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4481 {
4482     TCGTempVal new_type;
4483 
4484     switch (ts->kind) {
4485     case TEMP_FIXED:
4486         return;
4487     case TEMP_GLOBAL:
4488     case TEMP_TB:
4489         new_type = TEMP_VAL_MEM;
4490         break;
4491     case TEMP_EBB:
4492         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4493         break;
4494     case TEMP_CONST:
4495         new_type = TEMP_VAL_CONST;
4496         break;
4497     default:
4498         g_assert_not_reached();
4499     }
4500     set_temp_val_nonreg(s, ts, new_type);
4501 }
4502 
4503 /* Mark a temporary as dead.  */
4504 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4505 {
4506     temp_free_or_dead(s, ts, 1);
4507 }
4508 
4509 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4510    registers needs to be allocated to store a constant.  If 'free_or_dead'
4511    is non-zero, subsequently release the temporary; if it is positive, the
4512    temp is dead; if it is negative, the temp is free.  */
4513 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4514                       TCGRegSet preferred_regs, int free_or_dead)
4515 {
4516     if (!temp_readonly(ts) && !ts->mem_coherent) {
4517         if (!ts->mem_allocated) {
4518             temp_allocate_frame(s, ts);
4519         }
4520         switch (ts->val_type) {
4521         case TEMP_VAL_CONST:
4522             /* If we're going to free the temp immediately, then we won't
4523                require it later in a register, so attempt to store the
4524                constant to memory directly.  */
4525             if (free_or_dead
4526                 && tcg_out_sti(s, ts->type, ts->val,
4527                                ts->mem_base->reg, ts->mem_offset)) {
4528                 break;
4529             }
4530             temp_load(s, ts, tcg_target_available_regs[ts->type],
4531                       allocated_regs, preferred_regs);
4532             /* fallthrough */
4533 
4534         case TEMP_VAL_REG:
4535             tcg_out_st(s, ts->type, ts->reg,
4536                        ts->mem_base->reg, ts->mem_offset);
4537             break;
4538 
4539         case TEMP_VAL_MEM:
4540             break;
4541 
4542         case TEMP_VAL_DEAD:
4543         default:
4544             g_assert_not_reached();
4545         }
4546         ts->mem_coherent = 1;
4547     }
4548     if (free_or_dead) {
4549         temp_free_or_dead(s, ts, free_or_dead);
4550     }
4551 }
4552 
4553 /* free register 'reg' by spilling the corresponding temporary if necessary */
4554 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4555 {
4556     TCGTemp *ts = s->reg_to_temp[reg];
4557     if (ts != NULL) {
4558         temp_sync(s, ts, allocated_regs, 0, -1);
4559     }
4560 }
4561 
4562 /**
4563  * tcg_reg_alloc:
4564  * @required_regs: Set of registers in which we must allocate.
4565  * @allocated_regs: Set of registers which must be avoided.
4566  * @preferred_regs: Set of registers we should prefer.
4567  * @rev: True if we search the registers in "indirect" order.
4568  *
4569  * The allocated register must be in @required_regs & ~@allocated_regs,
4570  * but if we can put it in @preferred_regs we may save a move later.
4571  */
4572 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4573                             TCGRegSet allocated_regs,
4574                             TCGRegSet preferred_regs, bool rev)
4575 {
4576     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4577     TCGRegSet reg_ct[2];
4578     const int *order;
4579 
4580     reg_ct[1] = required_regs & ~allocated_regs;
4581     tcg_debug_assert(reg_ct[1] != 0);
4582     reg_ct[0] = reg_ct[1] & preferred_regs;
4583 
4584     /* Skip the preferred_regs option if it cannot be satisfied,
4585        or if the preference made no difference.  */
4586     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4587 
4588     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4589 
4590     /* Try free registers, preferences first.  */
4591     for (j = f; j < 2; j++) {
4592         TCGRegSet set = reg_ct[j];
4593 
4594         if (tcg_regset_single(set)) {
4595             /* One register in the set.  */
4596             TCGReg reg = tcg_regset_first(set);
4597             if (s->reg_to_temp[reg] == NULL) {
4598                 return reg;
4599             }
4600         } else {
4601             for (i = 0; i < n; i++) {
4602                 TCGReg reg = order[i];
4603                 if (s->reg_to_temp[reg] == NULL &&
4604                     tcg_regset_test_reg(set, reg)) {
4605                     return reg;
4606                 }
4607             }
4608         }
4609     }
4610 
4611     /* We must spill something.  */
4612     for (j = f; j < 2; j++) {
4613         TCGRegSet set = reg_ct[j];
4614 
4615         if (tcg_regset_single(set)) {
4616             /* One register in the set.  */
4617             TCGReg reg = tcg_regset_first(set);
4618             tcg_reg_free(s, reg, allocated_regs);
4619             return reg;
4620         } else {
4621             for (i = 0; i < n; i++) {
4622                 TCGReg reg = order[i];
4623                 if (tcg_regset_test_reg(set, reg)) {
4624                     tcg_reg_free(s, reg, allocated_regs);
4625                     return reg;
4626                 }
4627             }
4628         }
4629     }
4630 
4631     g_assert_not_reached();
4632 }
4633 
4634 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4635                                  TCGRegSet allocated_regs,
4636                                  TCGRegSet preferred_regs, bool rev)
4637 {
4638     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4639     TCGRegSet reg_ct[2];
4640     const int *order;
4641 
4642     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4643     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4644     tcg_debug_assert(reg_ct[1] != 0);
4645     reg_ct[0] = reg_ct[1] & preferred_regs;
4646 
4647     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4648 
4649     /*
4650      * Skip the preferred_regs option if it cannot be satisfied,
4651      * or if the preference made no difference.
4652      */
4653     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4654 
4655     /*
4656      * Minimize the number of flushes by looking for 2 free registers first,
4657      * then a single flush, then two flushes.
4658      */
4659     for (fmin = 2; fmin >= 0; fmin--) {
4660         for (j = k; j < 2; j++) {
4661             TCGRegSet set = reg_ct[j];
4662 
4663             for (i = 0; i < n; i++) {
4664                 TCGReg reg = order[i];
4665 
4666                 if (tcg_regset_test_reg(set, reg)) {
4667                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4668                     if (f >= fmin) {
4669                         tcg_reg_free(s, reg, allocated_regs);
4670                         tcg_reg_free(s, reg + 1, allocated_regs);
4671                         return reg;
4672                     }
4673                 }
4674             }
4675         }
4676     }
4677     g_assert_not_reached();
4678 }
4679 
4680 /* Make sure the temporary is in a register.  If needed, allocate the register
4681    from DESIRED while avoiding ALLOCATED.  */
4682 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4683                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4684 {
4685     TCGReg reg;
4686 
4687     switch (ts->val_type) {
4688     case TEMP_VAL_REG:
4689         return;
4690     case TEMP_VAL_CONST:
4691         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4692                             preferred_regs, ts->indirect_base);
4693         if (ts->type <= TCG_TYPE_I64) {
4694             tcg_out_movi(s, ts->type, reg, ts->val);
4695         } else {
4696             uint64_t val = ts->val;
4697             MemOp vece = MO_64;
4698 
4699             /*
4700              * Find the minimal vector element that matches the constant.
4701              * The targets will, in general, have to do this search anyway,
4702              * do this generically.
4703              */
4704             if (val == dup_const(MO_8, val)) {
4705                 vece = MO_8;
4706             } else if (val == dup_const(MO_16, val)) {
4707                 vece = MO_16;
4708             } else if (val == dup_const(MO_32, val)) {
4709                 vece = MO_32;
4710             }
4711 
4712             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4713         }
4714         ts->mem_coherent = 0;
4715         break;
4716     case TEMP_VAL_MEM:
4717         if (!ts->mem_allocated) {
4718             temp_allocate_frame(s, ts);
4719         }
4720         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4721                             preferred_regs, ts->indirect_base);
4722         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4723         ts->mem_coherent = 1;
4724         break;
4725     case TEMP_VAL_DEAD:
4726     default:
4727         g_assert_not_reached();
4728     }
4729     set_temp_val_reg(s, ts, reg);
4730 }
4731 
4732 /* Save a temporary to memory. 'allocated_regs' is used in case a
4733    temporary registers needs to be allocated to store a constant.  */
4734 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4735 {
4736     /* The liveness analysis already ensures that globals are back
4737        in memory. Keep an tcg_debug_assert for safety. */
4738     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4739 }
4740 
4741 /* save globals to their canonical location and assume they can be
4742    modified be the following code. 'allocated_regs' is used in case a
4743    temporary registers needs to be allocated to store a constant. */
4744 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4745 {
4746     int i, n;
4747 
4748     for (i = 0, n = s->nb_globals; i < n; i++) {
4749         temp_save(s, &s->temps[i], allocated_regs);
4750     }
4751 }
4752 
4753 /* sync globals to their canonical location and assume they can be
4754    read by the following code. 'allocated_regs' is used in case a
4755    temporary registers needs to be allocated to store a constant. */
4756 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4757 {
4758     int i, n;
4759 
4760     for (i = 0, n = s->nb_globals; i < n; i++) {
4761         TCGTemp *ts = &s->temps[i];
4762         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4763                          || ts->kind == TEMP_FIXED
4764                          || ts->mem_coherent);
4765     }
4766 }
4767 
4768 /* at the end of a basic block, we assume all temporaries are dead and
4769    all globals are stored at their canonical location. */
4770 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4771 {
4772     int i;
4773 
4774     for (i = s->nb_globals; i < s->nb_temps; i++) {
4775         TCGTemp *ts = &s->temps[i];
4776 
4777         switch (ts->kind) {
4778         case TEMP_TB:
4779             temp_save(s, ts, allocated_regs);
4780             break;
4781         case TEMP_EBB:
4782             /* The liveness analysis already ensures that temps are dead.
4783                Keep an tcg_debug_assert for safety. */
4784             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4785             break;
4786         case TEMP_CONST:
4787             /* Similarly, we should have freed any allocated register. */
4788             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4789             break;
4790         default:
4791             g_assert_not_reached();
4792         }
4793     }
4794 
4795     save_globals(s, allocated_regs);
4796 }
4797 
4798 /*
4799  * At a conditional branch, we assume all temporaries are dead unless
4800  * explicitly live-across-conditional-branch; all globals and local
4801  * temps are synced to their location.
4802  */
4803 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4804 {
4805     sync_globals(s, allocated_regs);
4806 
4807     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4808         TCGTemp *ts = &s->temps[i];
4809         /*
4810          * The liveness analysis already ensures that temps are dead.
4811          * Keep tcg_debug_asserts for safety.
4812          */
4813         switch (ts->kind) {
4814         case TEMP_TB:
4815             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4816             break;
4817         case TEMP_EBB:
4818         case TEMP_CONST:
4819             break;
4820         default:
4821             g_assert_not_reached();
4822         }
4823     }
4824 }
4825 
4826 /*
4827  * Specialized code generation for INDEX_op_mov_* with a constant.
4828  */
4829 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4830                                   tcg_target_ulong val, TCGLifeData arg_life,
4831                                   TCGRegSet preferred_regs)
4832 {
4833     /* ENV should not be modified.  */
4834     tcg_debug_assert(!temp_readonly(ots));
4835 
4836     /* The movi is not explicitly generated here.  */
4837     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4838     ots->val = val;
4839     ots->mem_coherent = 0;
4840     if (NEED_SYNC_ARG(0)) {
4841         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4842     } else if (IS_DEAD_ARG(0)) {
4843         temp_dead(s, ots);
4844     }
4845 }
4846 
4847 /*
4848  * Specialized code generation for INDEX_op_mov_*.
4849  */
4850 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4851 {
4852     const TCGLifeData arg_life = op->life;
4853     TCGRegSet allocated_regs, preferred_regs;
4854     TCGTemp *ts, *ots;
4855     TCGType otype, itype;
4856     TCGReg oreg, ireg;
4857 
4858     allocated_regs = s->reserved_regs;
4859     preferred_regs = output_pref(op, 0);
4860     ots = arg_temp(op->args[0]);
4861     ts = arg_temp(op->args[1]);
4862 
4863     /* ENV should not be modified.  */
4864     tcg_debug_assert(!temp_readonly(ots));
4865 
4866     /* Note that otype != itype for no-op truncation.  */
4867     otype = ots->type;
4868     itype = ts->type;
4869 
4870     if (ts->val_type == TEMP_VAL_CONST) {
4871         /* propagate constant or generate sti */
4872         tcg_target_ulong val = ts->val;
4873         if (IS_DEAD_ARG(1)) {
4874             temp_dead(s, ts);
4875         }
4876         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4877         return;
4878     }
4879 
4880     /* If the source value is in memory we're going to be forced
4881        to have it in a register in order to perform the copy.  Copy
4882        the SOURCE value into its own register first, that way we
4883        don't have to reload SOURCE the next time it is used. */
4884     if (ts->val_type == TEMP_VAL_MEM) {
4885         temp_load(s, ts, tcg_target_available_regs[itype],
4886                   allocated_regs, preferred_regs);
4887     }
4888     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4889     ireg = ts->reg;
4890 
4891     if (IS_DEAD_ARG(0)) {
4892         /* mov to a non-saved dead register makes no sense (even with
4893            liveness analysis disabled). */
4894         tcg_debug_assert(NEED_SYNC_ARG(0));
4895         if (!ots->mem_allocated) {
4896             temp_allocate_frame(s, ots);
4897         }
4898         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4899         if (IS_DEAD_ARG(1)) {
4900             temp_dead(s, ts);
4901         }
4902         temp_dead(s, ots);
4903         return;
4904     }
4905 
4906     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4907         /*
4908          * The mov can be suppressed.  Kill input first, so that it
4909          * is unlinked from reg_to_temp, then set the output to the
4910          * reg that we saved from the input.
4911          */
4912         temp_dead(s, ts);
4913         oreg = ireg;
4914     } else {
4915         if (ots->val_type == TEMP_VAL_REG) {
4916             oreg = ots->reg;
4917         } else {
4918             /* Make sure to not spill the input register during allocation. */
4919             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4920                                  allocated_regs | ((TCGRegSet)1 << ireg),
4921                                  preferred_regs, ots->indirect_base);
4922         }
4923         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4924             /*
4925              * Cross register class move not supported.
4926              * Store the source register into the destination slot
4927              * and leave the destination temp as TEMP_VAL_MEM.
4928              */
4929             assert(!temp_readonly(ots));
4930             if (!ts->mem_allocated) {
4931                 temp_allocate_frame(s, ots);
4932             }
4933             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4934             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4935             ots->mem_coherent = 1;
4936             return;
4937         }
4938     }
4939     set_temp_val_reg(s, ots, oreg);
4940     ots->mem_coherent = 0;
4941 
4942     if (NEED_SYNC_ARG(0)) {
4943         temp_sync(s, ots, allocated_regs, 0, 0);
4944     }
4945 }
4946 
4947 /*
4948  * Specialized code generation for INDEX_op_dup_vec.
4949  */
4950 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4951 {
4952     const TCGLifeData arg_life = op->life;
4953     TCGRegSet dup_out_regs, dup_in_regs;
4954     const TCGArgConstraint *dup_args_ct;
4955     TCGTemp *its, *ots;
4956     TCGType itype, vtype;
4957     unsigned vece;
4958     int lowpart_ofs;
4959     bool ok;
4960 
4961     ots = arg_temp(op->args[0]);
4962     its = arg_temp(op->args[1]);
4963 
4964     /* ENV should not be modified.  */
4965     tcg_debug_assert(!temp_readonly(ots));
4966 
4967     itype = its->type;
4968     vece = TCGOP_VECE(op);
4969     vtype = TCGOP_TYPE(op);
4970 
4971     if (its->val_type == TEMP_VAL_CONST) {
4972         /* Propagate constant via movi -> dupi.  */
4973         tcg_target_ulong val = its->val;
4974         if (IS_DEAD_ARG(1)) {
4975             temp_dead(s, its);
4976         }
4977         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4978         return;
4979     }
4980 
4981     dup_args_ct = opcode_args_ct(op);
4982     dup_out_regs = dup_args_ct[0].regs;
4983     dup_in_regs = dup_args_ct[1].regs;
4984 
4985     /* Allocate the output register now.  */
4986     if (ots->val_type != TEMP_VAL_REG) {
4987         TCGRegSet allocated_regs = s->reserved_regs;
4988         TCGReg oreg;
4989 
4990         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4991             /* Make sure to not spill the input register. */
4992             tcg_regset_set_reg(allocated_regs, its->reg);
4993         }
4994         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4995                              output_pref(op, 0), ots->indirect_base);
4996         set_temp_val_reg(s, ots, oreg);
4997     }
4998 
4999     switch (its->val_type) {
5000     case TEMP_VAL_REG:
5001         /*
5002          * The dup constriaints must be broad, covering all possible VECE.
5003          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5004          * to fail, indicating that extra moves are required for that case.
5005          */
5006         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5007             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5008                 goto done;
5009             }
5010             /* Try again from memory or a vector input register.  */
5011         }
5012         if (!its->mem_coherent) {
5013             /*
5014              * The input register is not synced, and so an extra store
5015              * would be required to use memory.  Attempt an integer-vector
5016              * register move first.  We do not have a TCGRegSet for this.
5017              */
5018             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5019                 break;
5020             }
5021             /* Sync the temp back to its slot and load from there.  */
5022             temp_sync(s, its, s->reserved_regs, 0, 0);
5023         }
5024         /* fall through */
5025 
5026     case TEMP_VAL_MEM:
5027         lowpart_ofs = 0;
5028         if (HOST_BIG_ENDIAN) {
5029             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5030         }
5031         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5032                              its->mem_offset + lowpart_ofs)) {
5033             goto done;
5034         }
5035         /* Load the input into the destination vector register. */
5036         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5037         break;
5038 
5039     default:
5040         g_assert_not_reached();
5041     }
5042 
5043     /* We now have a vector input register, so dup must succeed. */
5044     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5045     tcg_debug_assert(ok);
5046 
5047  done:
5048     ots->mem_coherent = 0;
5049     if (IS_DEAD_ARG(1)) {
5050         temp_dead(s, its);
5051     }
5052     if (NEED_SYNC_ARG(0)) {
5053         temp_sync(s, ots, s->reserved_regs, 0, 0);
5054     }
5055     if (IS_DEAD_ARG(0)) {
5056         temp_dead(s, ots);
5057     }
5058 }
5059 
5060 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5061 {
5062     const TCGLifeData arg_life = op->life;
5063     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5064     TCGRegSet i_allocated_regs;
5065     TCGRegSet o_allocated_regs;
5066     int i, k, nb_iargs, nb_oargs;
5067     TCGReg reg;
5068     TCGArg arg;
5069     const TCGArgConstraint *args_ct;
5070     const TCGArgConstraint *arg_ct;
5071     TCGTemp *ts;
5072     TCGArg new_args[TCG_MAX_OP_ARGS];
5073     int const_args[TCG_MAX_OP_ARGS];
5074     TCGCond op_cond;
5075 
5076     nb_oargs = def->nb_oargs;
5077     nb_iargs = def->nb_iargs;
5078 
5079     /* copy constants */
5080     memcpy(new_args + nb_oargs + nb_iargs,
5081            op->args + nb_oargs + nb_iargs,
5082            sizeof(TCGArg) * def->nb_cargs);
5083 
5084     i_allocated_regs = s->reserved_regs;
5085     o_allocated_regs = s->reserved_regs;
5086 
5087     switch (op->opc) {
5088     case INDEX_op_brcond_i32:
5089     case INDEX_op_brcond_i64:
5090         op_cond = op->args[2];
5091         break;
5092     case INDEX_op_setcond_i32:
5093     case INDEX_op_setcond_i64:
5094     case INDEX_op_negsetcond_i32:
5095     case INDEX_op_negsetcond_i64:
5096     case INDEX_op_cmp_vec:
5097         op_cond = op->args[3];
5098         break;
5099     case INDEX_op_brcond2_i32:
5100         op_cond = op->args[4];
5101         break;
5102     case INDEX_op_movcond_i32:
5103     case INDEX_op_movcond_i64:
5104     case INDEX_op_setcond2_i32:
5105     case INDEX_op_cmpsel_vec:
5106         op_cond = op->args[5];
5107         break;
5108     default:
5109         /* No condition within opcode. */
5110         op_cond = TCG_COND_ALWAYS;
5111         break;
5112     }
5113 
5114     args_ct = opcode_args_ct(op);
5115 
5116     /* satisfy input constraints */
5117     for (k = 0; k < nb_iargs; k++) {
5118         TCGRegSet i_preferred_regs, i_required_regs;
5119         bool allocate_new_reg, copyto_new_reg;
5120         TCGTemp *ts2;
5121         int i1, i2;
5122 
5123         i = args_ct[nb_oargs + k].sort_index;
5124         arg = op->args[i];
5125         arg_ct = &args_ct[i];
5126         ts = arg_temp(arg);
5127 
5128         if (ts->val_type == TEMP_VAL_CONST) {
5129 #ifdef TCG_REG_ZERO
5130             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5131                 /* Hardware zero register: indicate register via non-const. */
5132                 const_args[i] = 0;
5133                 new_args[i] = TCG_REG_ZERO;
5134                 continue;
5135             }
5136 #endif
5137 
5138             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5139                                        op_cond, TCGOP_VECE(op))) {
5140                 /* constant is OK for instruction */
5141                 const_args[i] = 1;
5142                 new_args[i] = ts->val;
5143                 continue;
5144             }
5145         }
5146 
5147         reg = ts->reg;
5148         i_preferred_regs = 0;
5149         i_required_regs = arg_ct->regs;
5150         allocate_new_reg = false;
5151         copyto_new_reg = false;
5152 
5153         switch (arg_ct->pair) {
5154         case 0: /* not paired */
5155             if (arg_ct->ialias) {
5156                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5157 
5158                 /*
5159                  * If the input is readonly, then it cannot also be an
5160                  * output and aliased to itself.  If the input is not
5161                  * dead after the instruction, we must allocate a new
5162                  * register and move it.
5163                  */
5164                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5165                     || args_ct[arg_ct->alias_index].newreg) {
5166                     allocate_new_reg = true;
5167                 } else if (ts->val_type == TEMP_VAL_REG) {
5168                     /*
5169                      * Check if the current register has already been
5170                      * allocated for another input.
5171                      */
5172                     allocate_new_reg =
5173                         tcg_regset_test_reg(i_allocated_regs, reg);
5174                 }
5175             }
5176             if (!allocate_new_reg) {
5177                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5178                           i_preferred_regs);
5179                 reg = ts->reg;
5180                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5181             }
5182             if (allocate_new_reg) {
5183                 /*
5184                  * Allocate a new register matching the constraint
5185                  * and move the temporary register into it.
5186                  */
5187                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5188                           i_allocated_regs, 0);
5189                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5190                                     i_preferred_regs, ts->indirect_base);
5191                 copyto_new_reg = true;
5192             }
5193             break;
5194 
5195         case 1:
5196             /* First of an input pair; if i1 == i2, the second is an output. */
5197             i1 = i;
5198             i2 = arg_ct->pair_index;
5199             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5200 
5201             /*
5202              * It is easier to default to allocating a new pair
5203              * and to identify a few cases where it's not required.
5204              */
5205             if (arg_ct->ialias) {
5206                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5207                 if (IS_DEAD_ARG(i1) &&
5208                     IS_DEAD_ARG(i2) &&
5209                     !temp_readonly(ts) &&
5210                     ts->val_type == TEMP_VAL_REG &&
5211                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5212                     tcg_regset_test_reg(i_required_regs, reg) &&
5213                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5214                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5215                     (ts2
5216                      ? ts2->val_type == TEMP_VAL_REG &&
5217                        ts2->reg == reg + 1 &&
5218                        !temp_readonly(ts2)
5219                      : s->reg_to_temp[reg + 1] == NULL)) {
5220                     break;
5221                 }
5222             } else {
5223                 /* Without aliasing, the pair must also be an input. */
5224                 tcg_debug_assert(ts2);
5225                 if (ts->val_type == TEMP_VAL_REG &&
5226                     ts2->val_type == TEMP_VAL_REG &&
5227                     ts2->reg == reg + 1 &&
5228                     tcg_regset_test_reg(i_required_regs, reg)) {
5229                     break;
5230                 }
5231             }
5232             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5233                                      0, ts->indirect_base);
5234             goto do_pair;
5235 
5236         case 2: /* pair second */
5237             reg = new_args[arg_ct->pair_index] + 1;
5238             goto do_pair;
5239 
5240         case 3: /* ialias with second output, no first input */
5241             tcg_debug_assert(arg_ct->ialias);
5242             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5243 
5244             if (IS_DEAD_ARG(i) &&
5245                 !temp_readonly(ts) &&
5246                 ts->val_type == TEMP_VAL_REG &&
5247                 reg > 0 &&
5248                 s->reg_to_temp[reg - 1] == NULL &&
5249                 tcg_regset_test_reg(i_required_regs, reg) &&
5250                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5251                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5252                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5253                 break;
5254             }
5255             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5256                                      i_allocated_regs, 0,
5257                                      ts->indirect_base);
5258             tcg_regset_set_reg(i_allocated_regs, reg);
5259             reg += 1;
5260             goto do_pair;
5261 
5262         do_pair:
5263             /*
5264              * If an aliased input is not dead after the instruction,
5265              * we must allocate a new register and move it.
5266              */
5267             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5268                 TCGRegSet t_allocated_regs = i_allocated_regs;
5269 
5270                 /*
5271                  * Because of the alias, and the continued life, make sure
5272                  * that the temp is somewhere *other* than the reg pair,
5273                  * and we get a copy in reg.
5274                  */
5275                 tcg_regset_set_reg(t_allocated_regs, reg);
5276                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5277                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5278                     /* If ts was already in reg, copy it somewhere else. */
5279                     TCGReg nr;
5280                     bool ok;
5281 
5282                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5283                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5284                                        t_allocated_regs, 0, ts->indirect_base);
5285                     ok = tcg_out_mov(s, ts->type, nr, reg);
5286                     tcg_debug_assert(ok);
5287 
5288                     set_temp_val_reg(s, ts, nr);
5289                 } else {
5290                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5291                               t_allocated_regs, 0);
5292                     copyto_new_reg = true;
5293                 }
5294             } else {
5295                 /* Preferably allocate to reg, otherwise copy. */
5296                 i_required_regs = (TCGRegSet)1 << reg;
5297                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5298                           i_preferred_regs);
5299                 copyto_new_reg = ts->reg != reg;
5300             }
5301             break;
5302 
5303         default:
5304             g_assert_not_reached();
5305         }
5306 
5307         if (copyto_new_reg) {
5308             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5309                 /*
5310                  * Cross register class move not supported.  Sync the
5311                  * temp back to its slot and load from there.
5312                  */
5313                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5314                 tcg_out_ld(s, ts->type, reg,
5315                            ts->mem_base->reg, ts->mem_offset);
5316             }
5317         }
5318         new_args[i] = reg;
5319         const_args[i] = 0;
5320         tcg_regset_set_reg(i_allocated_regs, reg);
5321     }
5322 
5323     /* mark dead temporaries and free the associated registers */
5324     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5325         if (IS_DEAD_ARG(i)) {
5326             temp_dead(s, arg_temp(op->args[i]));
5327         }
5328     }
5329 
5330     if (def->flags & TCG_OPF_COND_BRANCH) {
5331         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5332     } else if (def->flags & TCG_OPF_BB_END) {
5333         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5334     } else {
5335         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5336             /* XXX: permit generic clobber register list ? */
5337             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5338                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5339                     tcg_reg_free(s, i, i_allocated_regs);
5340                 }
5341             }
5342         }
5343         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5344             /* sync globals if the op has side effects and might trigger
5345                an exception. */
5346             sync_globals(s, i_allocated_regs);
5347         }
5348 
5349         /* satisfy the output constraints */
5350         for (k = 0; k < nb_oargs; k++) {
5351             i = args_ct[k].sort_index;
5352             arg = op->args[i];
5353             arg_ct = &args_ct[i];
5354             ts = arg_temp(arg);
5355 
5356             /* ENV should not be modified.  */
5357             tcg_debug_assert(!temp_readonly(ts));
5358 
5359             switch (arg_ct->pair) {
5360             case 0: /* not paired */
5361                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5362                     reg = new_args[arg_ct->alias_index];
5363                 } else if (arg_ct->newreg) {
5364                     reg = tcg_reg_alloc(s, arg_ct->regs,
5365                                         i_allocated_regs | o_allocated_regs,
5366                                         output_pref(op, k), ts->indirect_base);
5367                 } else {
5368                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5369                                         output_pref(op, k), ts->indirect_base);
5370                 }
5371                 break;
5372 
5373             case 1: /* first of pair */
5374                 if (arg_ct->oalias) {
5375                     reg = new_args[arg_ct->alias_index];
5376                 } else if (arg_ct->newreg) {
5377                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5378                                              i_allocated_regs | o_allocated_regs,
5379                                              output_pref(op, k),
5380                                              ts->indirect_base);
5381                 } else {
5382                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5383                                              output_pref(op, k),
5384                                              ts->indirect_base);
5385                 }
5386                 break;
5387 
5388             case 2: /* second of pair */
5389                 if (arg_ct->oalias) {
5390                     reg = new_args[arg_ct->alias_index];
5391                 } else {
5392                     reg = new_args[arg_ct->pair_index] + 1;
5393                 }
5394                 break;
5395 
5396             case 3: /* first of pair, aliasing with a second input */
5397                 tcg_debug_assert(!arg_ct->newreg);
5398                 reg = new_args[arg_ct->pair_index] - 1;
5399                 break;
5400 
5401             default:
5402                 g_assert_not_reached();
5403             }
5404             tcg_regset_set_reg(o_allocated_regs, reg);
5405             set_temp_val_reg(s, ts, reg);
5406             ts->mem_coherent = 0;
5407             new_args[i] = reg;
5408         }
5409     }
5410 
5411     /* emit instruction */
5412     switch (op->opc) {
5413     case INDEX_op_ext_i32_i64:
5414         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5415         break;
5416     case INDEX_op_extu_i32_i64:
5417         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5418         break;
5419     case INDEX_op_extrl_i64_i32:
5420         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5421         break;
5422     default:
5423         if (def->flags & TCG_OPF_VECTOR) {
5424             tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64,
5425                            TCGOP_VECE(op), new_args, const_args);
5426         } else {
5427             tcg_out_op(s, op->opc, TCGOP_TYPE(op), new_args, const_args);
5428         }
5429         break;
5430     }
5431 
5432     /* move the outputs in the correct register if needed */
5433     for(i = 0; i < nb_oargs; i++) {
5434         ts = arg_temp(op->args[i]);
5435 
5436         /* ENV should not be modified.  */
5437         tcg_debug_assert(!temp_readonly(ts));
5438 
5439         if (NEED_SYNC_ARG(i)) {
5440             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5441         } else if (IS_DEAD_ARG(i)) {
5442             temp_dead(s, ts);
5443         }
5444     }
5445 }
5446 
5447 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5448 {
5449     const TCGLifeData arg_life = op->life;
5450     TCGTemp *ots, *itsl, *itsh;
5451     TCGType vtype = TCGOP_TYPE(op);
5452 
5453     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5454     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5455     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5456 
5457     ots = arg_temp(op->args[0]);
5458     itsl = arg_temp(op->args[1]);
5459     itsh = arg_temp(op->args[2]);
5460 
5461     /* ENV should not be modified.  */
5462     tcg_debug_assert(!temp_readonly(ots));
5463 
5464     /* Allocate the output register now.  */
5465     if (ots->val_type != TEMP_VAL_REG) {
5466         TCGRegSet allocated_regs = s->reserved_regs;
5467         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5468         TCGReg oreg;
5469 
5470         /* Make sure to not spill the input registers. */
5471         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5472             tcg_regset_set_reg(allocated_regs, itsl->reg);
5473         }
5474         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5475             tcg_regset_set_reg(allocated_regs, itsh->reg);
5476         }
5477 
5478         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5479                              output_pref(op, 0), ots->indirect_base);
5480         set_temp_val_reg(s, ots, oreg);
5481     }
5482 
5483     /* Promote dup2 of immediates to dupi_vec. */
5484     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5485         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5486         MemOp vece = MO_64;
5487 
5488         if (val == dup_const(MO_8, val)) {
5489             vece = MO_8;
5490         } else if (val == dup_const(MO_16, val)) {
5491             vece = MO_16;
5492         } else if (val == dup_const(MO_32, val)) {
5493             vece = MO_32;
5494         }
5495 
5496         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5497         goto done;
5498     }
5499 
5500     /* If the two inputs form one 64-bit value, try dupm_vec. */
5501     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5502         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5503         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5504         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5505 
5506         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5507         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5508 
5509         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5510                              its->mem_base->reg, its->mem_offset)) {
5511             goto done;
5512         }
5513     }
5514 
5515     /* Fall back to generic expansion. */
5516     return false;
5517 
5518  done:
5519     ots->mem_coherent = 0;
5520     if (IS_DEAD_ARG(1)) {
5521         temp_dead(s, itsl);
5522     }
5523     if (IS_DEAD_ARG(2)) {
5524         temp_dead(s, itsh);
5525     }
5526     if (NEED_SYNC_ARG(0)) {
5527         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5528     } else if (IS_DEAD_ARG(0)) {
5529         temp_dead(s, ots);
5530     }
5531     return true;
5532 }
5533 
5534 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5535                          TCGRegSet allocated_regs)
5536 {
5537     if (ts->val_type == TEMP_VAL_REG) {
5538         if (ts->reg != reg) {
5539             tcg_reg_free(s, reg, allocated_regs);
5540             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5541                 /*
5542                  * Cross register class move not supported.  Sync the
5543                  * temp back to its slot and load from there.
5544                  */
5545                 temp_sync(s, ts, allocated_regs, 0, 0);
5546                 tcg_out_ld(s, ts->type, reg,
5547                            ts->mem_base->reg, ts->mem_offset);
5548             }
5549         }
5550     } else {
5551         TCGRegSet arg_set = 0;
5552 
5553         tcg_reg_free(s, reg, allocated_regs);
5554         tcg_regset_set_reg(arg_set, reg);
5555         temp_load(s, ts, arg_set, allocated_regs, 0);
5556     }
5557 }
5558 
5559 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5560                          TCGRegSet allocated_regs)
5561 {
5562     /*
5563      * When the destination is on the stack, load up the temp and store.
5564      * If there are many call-saved registers, the temp might live to
5565      * see another use; otherwise it'll be discarded.
5566      */
5567     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5568     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5569                arg_slot_stk_ofs(arg_slot));
5570 }
5571 
5572 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5573                             TCGTemp *ts, TCGRegSet *allocated_regs)
5574 {
5575     if (arg_slot_reg_p(l->arg_slot)) {
5576         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5577         load_arg_reg(s, reg, ts, *allocated_regs);
5578         tcg_regset_set_reg(*allocated_regs, reg);
5579     } else {
5580         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5581     }
5582 }
5583 
5584 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5585                          intptr_t ref_off, TCGRegSet *allocated_regs)
5586 {
5587     TCGReg reg;
5588 
5589     if (arg_slot_reg_p(arg_slot)) {
5590         reg = tcg_target_call_iarg_regs[arg_slot];
5591         tcg_reg_free(s, reg, *allocated_regs);
5592         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5593         tcg_regset_set_reg(*allocated_regs, reg);
5594     } else {
5595         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5596                             *allocated_regs, 0, false);
5597         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5598         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5599                    arg_slot_stk_ofs(arg_slot));
5600     }
5601 }
5602 
5603 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5604 {
5605     const int nb_oargs = TCGOP_CALLO(op);
5606     const int nb_iargs = TCGOP_CALLI(op);
5607     const TCGLifeData arg_life = op->life;
5608     const TCGHelperInfo *info = tcg_call_info(op);
5609     TCGRegSet allocated_regs = s->reserved_regs;
5610     int i;
5611 
5612     /*
5613      * Move inputs into place in reverse order,
5614      * so that we place stacked arguments first.
5615      */
5616     for (i = nb_iargs - 1; i >= 0; --i) {
5617         const TCGCallArgumentLoc *loc = &info->in[i];
5618         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5619 
5620         switch (loc->kind) {
5621         case TCG_CALL_ARG_NORMAL:
5622         case TCG_CALL_ARG_EXTEND_U:
5623         case TCG_CALL_ARG_EXTEND_S:
5624             load_arg_normal(s, loc, ts, &allocated_regs);
5625             break;
5626         case TCG_CALL_ARG_BY_REF:
5627             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5628             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5629                          arg_slot_stk_ofs(loc->ref_slot),
5630                          &allocated_regs);
5631             break;
5632         case TCG_CALL_ARG_BY_REF_N:
5633             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5634             break;
5635         default:
5636             g_assert_not_reached();
5637         }
5638     }
5639 
5640     /* Mark dead temporaries and free the associated registers.  */
5641     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5642         if (IS_DEAD_ARG(i)) {
5643             temp_dead(s, arg_temp(op->args[i]));
5644         }
5645     }
5646 
5647     /* Clobber call registers.  */
5648     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5649         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5650             tcg_reg_free(s, i, allocated_regs);
5651         }
5652     }
5653 
5654     /*
5655      * Save globals if they might be written by the helper,
5656      * sync them if they might be read.
5657      */
5658     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5659         /* Nothing to do */
5660     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5661         sync_globals(s, allocated_regs);
5662     } else {
5663         save_globals(s, allocated_regs);
5664     }
5665 
5666     /*
5667      * If the ABI passes a pointer to the returned struct as the first
5668      * argument, load that now.  Pass a pointer to the output home slot.
5669      */
5670     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5671         TCGTemp *ts = arg_temp(op->args[0]);
5672 
5673         if (!ts->mem_allocated) {
5674             temp_allocate_frame(s, ts);
5675         }
5676         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5677     }
5678 
5679     tcg_out_call(s, tcg_call_func(op), info);
5680 
5681     /* Assign output registers and emit moves if needed.  */
5682     switch (info->out_kind) {
5683     case TCG_CALL_RET_NORMAL:
5684         for (i = 0; i < nb_oargs; i++) {
5685             TCGTemp *ts = arg_temp(op->args[i]);
5686             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5687 
5688             /* ENV should not be modified.  */
5689             tcg_debug_assert(!temp_readonly(ts));
5690 
5691             set_temp_val_reg(s, ts, reg);
5692             ts->mem_coherent = 0;
5693         }
5694         break;
5695 
5696     case TCG_CALL_RET_BY_VEC:
5697         {
5698             TCGTemp *ts = arg_temp(op->args[0]);
5699 
5700             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5701             tcg_debug_assert(ts->temp_subindex == 0);
5702             if (!ts->mem_allocated) {
5703                 temp_allocate_frame(s, ts);
5704             }
5705             tcg_out_st(s, TCG_TYPE_V128,
5706                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5707                        ts->mem_base->reg, ts->mem_offset);
5708         }
5709         /* fall through to mark all parts in memory */
5710 
5711     case TCG_CALL_RET_BY_REF:
5712         /* The callee has performed a write through the reference. */
5713         for (i = 0; i < nb_oargs; i++) {
5714             TCGTemp *ts = arg_temp(op->args[i]);
5715             ts->val_type = TEMP_VAL_MEM;
5716         }
5717         break;
5718 
5719     default:
5720         g_assert_not_reached();
5721     }
5722 
5723     /* Flush or discard output registers as needed. */
5724     for (i = 0; i < nb_oargs; i++) {
5725         TCGTemp *ts = arg_temp(op->args[i]);
5726         if (NEED_SYNC_ARG(i)) {
5727             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5728         } else if (IS_DEAD_ARG(i)) {
5729             temp_dead(s, ts);
5730         }
5731     }
5732 }
5733 
5734 /**
5735  * atom_and_align_for_opc:
5736  * @s: tcg context
5737  * @opc: memory operation code
5738  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5739  * @allow_two_ops: true if we are prepared to issue two operations
5740  *
5741  * Return the alignment and atomicity to use for the inline fast path
5742  * for the given memory operation.  The alignment may be larger than
5743  * that specified in @opc, and the correct alignment will be diagnosed
5744  * by the slow path helper.
5745  *
5746  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5747  * and issue two loads or stores for subalignment.
5748  */
5749 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5750                                            MemOp host_atom, bool allow_two_ops)
5751 {
5752     MemOp align = memop_alignment_bits(opc);
5753     MemOp size = opc & MO_SIZE;
5754     MemOp half = size ? size - 1 : 0;
5755     MemOp atom = opc & MO_ATOM_MASK;
5756     MemOp atmax;
5757 
5758     switch (atom) {
5759     case MO_ATOM_NONE:
5760         /* The operation requires no specific atomicity. */
5761         atmax = MO_8;
5762         break;
5763 
5764     case MO_ATOM_IFALIGN:
5765         atmax = size;
5766         break;
5767 
5768     case MO_ATOM_IFALIGN_PAIR:
5769         atmax = half;
5770         break;
5771 
5772     case MO_ATOM_WITHIN16:
5773         atmax = size;
5774         if (size == MO_128) {
5775             /* Misalignment implies !within16, and therefore no atomicity. */
5776         } else if (host_atom != MO_ATOM_WITHIN16) {
5777             /* The host does not implement within16, so require alignment. */
5778             align = MAX(align, size);
5779         }
5780         break;
5781 
5782     case MO_ATOM_WITHIN16_PAIR:
5783         atmax = size;
5784         /*
5785          * Misalignment implies !within16, and therefore half atomicity.
5786          * Any host prepared for two operations can implement this with
5787          * half alignment.
5788          */
5789         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5790             align = MAX(align, half);
5791         }
5792         break;
5793 
5794     case MO_ATOM_SUBALIGN:
5795         atmax = size;
5796         if (host_atom != MO_ATOM_SUBALIGN) {
5797             /* If unaligned but not odd, there are subobjects up to half. */
5798             if (allow_two_ops) {
5799                 align = MAX(align, half);
5800             } else {
5801                 align = MAX(align, size);
5802             }
5803         }
5804         break;
5805 
5806     default:
5807         g_assert_not_reached();
5808     }
5809 
5810     return (TCGAtomAlign){ .atom = atmax, .align = align };
5811 }
5812 
5813 /*
5814  * Similarly for qemu_ld/st slow path helpers.
5815  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5816  * using only the provided backend tcg_out_* functions.
5817  */
5818 
5819 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5820 {
5821     int ofs = arg_slot_stk_ofs(slot);
5822 
5823     /*
5824      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5825      * require extension to uint64_t, adjust the address for uint32_t.
5826      */
5827     if (HOST_BIG_ENDIAN &&
5828         TCG_TARGET_REG_BITS == 64 &&
5829         type == TCG_TYPE_I32) {
5830         ofs += 4;
5831     }
5832     return ofs;
5833 }
5834 
5835 static void tcg_out_helper_load_slots(TCGContext *s,
5836                                       unsigned nmov, TCGMovExtend *mov,
5837                                       const TCGLdstHelperParam *parm)
5838 {
5839     unsigned i;
5840     TCGReg dst3;
5841 
5842     /*
5843      * Start from the end, storing to the stack first.
5844      * This frees those registers, so we need not consider overlap.
5845      */
5846     for (i = nmov; i-- > 0; ) {
5847         unsigned slot = mov[i].dst;
5848 
5849         if (arg_slot_reg_p(slot)) {
5850             goto found_reg;
5851         }
5852 
5853         TCGReg src = mov[i].src;
5854         TCGType dst_type = mov[i].dst_type;
5855         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5856 
5857         /* The argument is going onto the stack; extend into scratch. */
5858         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5859             tcg_debug_assert(parm->ntmp != 0);
5860             mov[i].dst = src = parm->tmp[0];
5861             tcg_out_movext1(s, &mov[i]);
5862         }
5863 
5864         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5865                    tcg_out_helper_stk_ofs(dst_type, slot));
5866     }
5867     return;
5868 
5869  found_reg:
5870     /*
5871      * The remaining arguments are in registers.
5872      * Convert slot numbers to argument registers.
5873      */
5874     nmov = i + 1;
5875     for (i = 0; i < nmov; ++i) {
5876         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5877     }
5878 
5879     switch (nmov) {
5880     case 4:
5881         /* The backend must have provided enough temps for the worst case. */
5882         tcg_debug_assert(parm->ntmp >= 2);
5883 
5884         dst3 = mov[3].dst;
5885         for (unsigned j = 0; j < 3; ++j) {
5886             if (dst3 == mov[j].src) {
5887                 /*
5888                  * Conflict. Copy the source to a temporary, perform the
5889                  * remaining moves, then the extension from our scratch
5890                  * on the way out.
5891                  */
5892                 TCGReg scratch = parm->tmp[1];
5893 
5894                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5895                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5896                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5897                 break;
5898             }
5899         }
5900 
5901         /* No conflicts: perform this move and continue. */
5902         tcg_out_movext1(s, &mov[3]);
5903         /* fall through */
5904 
5905     case 3:
5906         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5907                         parm->ntmp ? parm->tmp[0] : -1);
5908         break;
5909     case 2:
5910         tcg_out_movext2(s, mov, mov + 1,
5911                         parm->ntmp ? parm->tmp[0] : -1);
5912         break;
5913     case 1:
5914         tcg_out_movext1(s, mov);
5915         break;
5916     default:
5917         g_assert_not_reached();
5918     }
5919 }
5920 
5921 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5922                                     TCGType type, tcg_target_long imm,
5923                                     const TCGLdstHelperParam *parm)
5924 {
5925     if (arg_slot_reg_p(slot)) {
5926         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5927     } else {
5928         int ofs = tcg_out_helper_stk_ofs(type, slot);
5929         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5930             tcg_debug_assert(parm->ntmp != 0);
5931             tcg_out_movi(s, type, parm->tmp[0], imm);
5932             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5933         }
5934     }
5935 }
5936 
5937 static void tcg_out_helper_load_common_args(TCGContext *s,
5938                                             const TCGLabelQemuLdst *ldst,
5939                                             const TCGLdstHelperParam *parm,
5940                                             const TCGHelperInfo *info,
5941                                             unsigned next_arg)
5942 {
5943     TCGMovExtend ptr_mov = {
5944         .dst_type = TCG_TYPE_PTR,
5945         .src_type = TCG_TYPE_PTR,
5946         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5947     };
5948     const TCGCallArgumentLoc *loc = &info->in[0];
5949     TCGType type;
5950     unsigned slot;
5951     tcg_target_ulong imm;
5952 
5953     /*
5954      * Handle env, which is always first.
5955      */
5956     ptr_mov.dst = loc->arg_slot;
5957     ptr_mov.src = TCG_AREG0;
5958     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5959 
5960     /*
5961      * Handle oi.
5962      */
5963     imm = ldst->oi;
5964     loc = &info->in[next_arg];
5965     type = TCG_TYPE_I32;
5966     switch (loc->kind) {
5967     case TCG_CALL_ARG_NORMAL:
5968         break;
5969     case TCG_CALL_ARG_EXTEND_U:
5970     case TCG_CALL_ARG_EXTEND_S:
5971         /* No extension required for MemOpIdx. */
5972         tcg_debug_assert(imm <= INT32_MAX);
5973         type = TCG_TYPE_REG;
5974         break;
5975     default:
5976         g_assert_not_reached();
5977     }
5978     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5979     next_arg++;
5980 
5981     /*
5982      * Handle ra.
5983      */
5984     loc = &info->in[next_arg];
5985     slot = loc->arg_slot;
5986     if (parm->ra_gen) {
5987         int arg_reg = -1;
5988         TCGReg ra_reg;
5989 
5990         if (arg_slot_reg_p(slot)) {
5991             arg_reg = tcg_target_call_iarg_regs[slot];
5992         }
5993         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5994 
5995         ptr_mov.dst = slot;
5996         ptr_mov.src = ra_reg;
5997         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5998     } else {
5999         imm = (uintptr_t)ldst->raddr;
6000         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6001     }
6002 }
6003 
6004 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6005                                        const TCGCallArgumentLoc *loc,
6006                                        TCGType dst_type, TCGType src_type,
6007                                        TCGReg lo, TCGReg hi)
6008 {
6009     MemOp reg_mo;
6010 
6011     if (dst_type <= TCG_TYPE_REG) {
6012         MemOp src_ext;
6013 
6014         switch (loc->kind) {
6015         case TCG_CALL_ARG_NORMAL:
6016             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6017             break;
6018         case TCG_CALL_ARG_EXTEND_U:
6019             dst_type = TCG_TYPE_REG;
6020             src_ext = MO_UL;
6021             break;
6022         case TCG_CALL_ARG_EXTEND_S:
6023             dst_type = TCG_TYPE_REG;
6024             src_ext = MO_SL;
6025             break;
6026         default:
6027             g_assert_not_reached();
6028         }
6029 
6030         mov[0].dst = loc->arg_slot;
6031         mov[0].dst_type = dst_type;
6032         mov[0].src = lo;
6033         mov[0].src_type = src_type;
6034         mov[0].src_ext = src_ext;
6035         return 1;
6036     }
6037 
6038     if (TCG_TARGET_REG_BITS == 32) {
6039         assert(dst_type == TCG_TYPE_I64);
6040         reg_mo = MO_32;
6041     } else {
6042         assert(dst_type == TCG_TYPE_I128);
6043         reg_mo = MO_64;
6044     }
6045 
6046     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6047     mov[0].src = lo;
6048     mov[0].dst_type = TCG_TYPE_REG;
6049     mov[0].src_type = TCG_TYPE_REG;
6050     mov[0].src_ext = reg_mo;
6051 
6052     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6053     mov[1].src = hi;
6054     mov[1].dst_type = TCG_TYPE_REG;
6055     mov[1].src_type = TCG_TYPE_REG;
6056     mov[1].src_ext = reg_mo;
6057 
6058     return 2;
6059 }
6060 
6061 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6062                                    const TCGLdstHelperParam *parm)
6063 {
6064     const TCGHelperInfo *info;
6065     const TCGCallArgumentLoc *loc;
6066     TCGMovExtend mov[2];
6067     unsigned next_arg, nmov;
6068     MemOp mop = get_memop(ldst->oi);
6069 
6070     switch (mop & MO_SIZE) {
6071     case MO_8:
6072     case MO_16:
6073     case MO_32:
6074         info = &info_helper_ld32_mmu;
6075         break;
6076     case MO_64:
6077         info = &info_helper_ld64_mmu;
6078         break;
6079     case MO_128:
6080         info = &info_helper_ld128_mmu;
6081         break;
6082     default:
6083         g_assert_not_reached();
6084     }
6085 
6086     /* Defer env argument. */
6087     next_arg = 1;
6088 
6089     loc = &info->in[next_arg];
6090     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6091         /*
6092          * 32-bit host with 32-bit guest: zero-extend the guest address
6093          * to 64-bits for the helper by storing the low part, then
6094          * load a zero for the high part.
6095          */
6096         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6097                                TCG_TYPE_I32, TCG_TYPE_I32,
6098                                ldst->addr_reg, -1);
6099         tcg_out_helper_load_slots(s, 1, mov, parm);
6100 
6101         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6102                                 TCG_TYPE_I32, 0, parm);
6103         next_arg += 2;
6104     } else {
6105         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6106                                       ldst->addr_reg, -1);
6107         tcg_out_helper_load_slots(s, nmov, mov, parm);
6108         next_arg += nmov;
6109     }
6110 
6111     switch (info->out_kind) {
6112     case TCG_CALL_RET_NORMAL:
6113     case TCG_CALL_RET_BY_VEC:
6114         break;
6115     case TCG_CALL_RET_BY_REF:
6116         /*
6117          * The return reference is in the first argument slot.
6118          * We need memory in which to return: re-use the top of stack.
6119          */
6120         {
6121             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6122 
6123             if (arg_slot_reg_p(0)) {
6124                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6125                                  TCG_REG_CALL_STACK, ofs_slot0);
6126             } else {
6127                 tcg_debug_assert(parm->ntmp != 0);
6128                 tcg_out_addi_ptr(s, parm->tmp[0],
6129                                  TCG_REG_CALL_STACK, ofs_slot0);
6130                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6131                            TCG_REG_CALL_STACK, ofs_slot0);
6132             }
6133         }
6134         break;
6135     default:
6136         g_assert_not_reached();
6137     }
6138 
6139     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6140 }
6141 
6142 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6143                                   bool load_sign,
6144                                   const TCGLdstHelperParam *parm)
6145 {
6146     MemOp mop = get_memop(ldst->oi);
6147     TCGMovExtend mov[2];
6148     int ofs_slot0;
6149 
6150     switch (ldst->type) {
6151     case TCG_TYPE_I64:
6152         if (TCG_TARGET_REG_BITS == 32) {
6153             break;
6154         }
6155         /* fall through */
6156 
6157     case TCG_TYPE_I32:
6158         mov[0].dst = ldst->datalo_reg;
6159         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6160         mov[0].dst_type = ldst->type;
6161         mov[0].src_type = TCG_TYPE_REG;
6162 
6163         /*
6164          * If load_sign, then we allowed the helper to perform the
6165          * appropriate sign extension to tcg_target_ulong, and all
6166          * we need now is a plain move.
6167          *
6168          * If they do not, then we expect the relevant extension
6169          * instruction to be no more expensive than a move, and
6170          * we thus save the icache etc by only using one of two
6171          * helper functions.
6172          */
6173         if (load_sign || !(mop & MO_SIGN)) {
6174             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6175                 mov[0].src_ext = MO_32;
6176             } else {
6177                 mov[0].src_ext = MO_64;
6178             }
6179         } else {
6180             mov[0].src_ext = mop & MO_SSIZE;
6181         }
6182         tcg_out_movext1(s, mov);
6183         return;
6184 
6185     case TCG_TYPE_I128:
6186         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6187         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6188         switch (TCG_TARGET_CALL_RET_I128) {
6189         case TCG_CALL_RET_NORMAL:
6190             break;
6191         case TCG_CALL_RET_BY_VEC:
6192             tcg_out_st(s, TCG_TYPE_V128,
6193                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6194                        TCG_REG_CALL_STACK, ofs_slot0);
6195             /* fall through */
6196         case TCG_CALL_RET_BY_REF:
6197             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6198                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6199             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6200                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6201             return;
6202         default:
6203             g_assert_not_reached();
6204         }
6205         break;
6206 
6207     default:
6208         g_assert_not_reached();
6209     }
6210 
6211     mov[0].dst = ldst->datalo_reg;
6212     mov[0].src =
6213         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6214     mov[0].dst_type = TCG_TYPE_REG;
6215     mov[0].src_type = TCG_TYPE_REG;
6216     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6217 
6218     mov[1].dst = ldst->datahi_reg;
6219     mov[1].src =
6220         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6221     mov[1].dst_type = TCG_TYPE_REG;
6222     mov[1].src_type = TCG_TYPE_REG;
6223     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6224 
6225     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6226 }
6227 
6228 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6229                                    const TCGLdstHelperParam *parm)
6230 {
6231     const TCGHelperInfo *info;
6232     const TCGCallArgumentLoc *loc;
6233     TCGMovExtend mov[4];
6234     TCGType data_type;
6235     unsigned next_arg, nmov, n;
6236     MemOp mop = get_memop(ldst->oi);
6237 
6238     switch (mop & MO_SIZE) {
6239     case MO_8:
6240     case MO_16:
6241     case MO_32:
6242         info = &info_helper_st32_mmu;
6243         data_type = TCG_TYPE_I32;
6244         break;
6245     case MO_64:
6246         info = &info_helper_st64_mmu;
6247         data_type = TCG_TYPE_I64;
6248         break;
6249     case MO_128:
6250         info = &info_helper_st128_mmu;
6251         data_type = TCG_TYPE_I128;
6252         break;
6253     default:
6254         g_assert_not_reached();
6255     }
6256 
6257     /* Defer env argument. */
6258     next_arg = 1;
6259     nmov = 0;
6260 
6261     /* Handle addr argument. */
6262     loc = &info->in[next_arg];
6263     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6264     if (TCG_TARGET_REG_BITS == 32) {
6265         /*
6266          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6267          * to 64-bits for the helper by storing the low part.  Later,
6268          * after we have processed the register inputs, we will load a
6269          * zero for the high part.
6270          */
6271         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6272                                TCG_TYPE_I32, TCG_TYPE_I32,
6273                                ldst->addr_reg, -1);
6274         next_arg += 2;
6275         nmov += 1;
6276     } else {
6277         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6278                                    ldst->addr_reg, -1);
6279         next_arg += n;
6280         nmov += n;
6281     }
6282 
6283     /* Handle data argument. */
6284     loc = &info->in[next_arg];
6285     switch (loc->kind) {
6286     case TCG_CALL_ARG_NORMAL:
6287     case TCG_CALL_ARG_EXTEND_U:
6288     case TCG_CALL_ARG_EXTEND_S:
6289         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6290                                    ldst->datalo_reg, ldst->datahi_reg);
6291         next_arg += n;
6292         nmov += n;
6293         tcg_out_helper_load_slots(s, nmov, mov, parm);
6294         break;
6295 
6296     case TCG_CALL_ARG_BY_REF:
6297         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6298         tcg_debug_assert(data_type == TCG_TYPE_I128);
6299         tcg_out_st(s, TCG_TYPE_I64,
6300                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6301                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6302         tcg_out_st(s, TCG_TYPE_I64,
6303                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6304                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6305 
6306         tcg_out_helper_load_slots(s, nmov, mov, parm);
6307 
6308         if (arg_slot_reg_p(loc->arg_slot)) {
6309             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6310                              TCG_REG_CALL_STACK,
6311                              arg_slot_stk_ofs(loc->ref_slot));
6312         } else {
6313             tcg_debug_assert(parm->ntmp != 0);
6314             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6315                              arg_slot_stk_ofs(loc->ref_slot));
6316             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6317                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6318         }
6319         next_arg += 2;
6320         break;
6321 
6322     default:
6323         g_assert_not_reached();
6324     }
6325 
6326     if (TCG_TARGET_REG_BITS == 32) {
6327         /* Zero extend the address by loading a zero for the high part. */
6328         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6329         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6330     }
6331 
6332     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6333 }
6334 
6335 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6336 {
6337     int i, start_words, num_insns;
6338     TCGOp *op;
6339 
6340     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6341                  && qemu_log_in_addr_range(pc_start))) {
6342         FILE *logfile = qemu_log_trylock();
6343         if (logfile) {
6344             fprintf(logfile, "OP:\n");
6345             tcg_dump_ops(s, logfile, false);
6346             fprintf(logfile, "\n");
6347             qemu_log_unlock(logfile);
6348         }
6349     }
6350 
6351 #ifdef CONFIG_DEBUG_TCG
6352     /* Ensure all labels referenced have been emitted.  */
6353     {
6354         TCGLabel *l;
6355         bool error = false;
6356 
6357         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6358             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6359                 qemu_log_mask(CPU_LOG_TB_OP,
6360                               "$L%d referenced but not present.\n", l->id);
6361                 error = true;
6362             }
6363         }
6364         assert(!error);
6365     }
6366 #endif
6367 
6368     /* Do not reuse any EBB that may be allocated within the TB. */
6369     tcg_temp_ebb_reset_freed(s);
6370 
6371     tcg_optimize(s);
6372 
6373     reachable_code_pass(s);
6374     liveness_pass_0(s);
6375     liveness_pass_1(s);
6376 
6377     if (s->nb_indirects > 0) {
6378         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6379                      && qemu_log_in_addr_range(pc_start))) {
6380             FILE *logfile = qemu_log_trylock();
6381             if (logfile) {
6382                 fprintf(logfile, "OP before indirect lowering:\n");
6383                 tcg_dump_ops(s, logfile, false);
6384                 fprintf(logfile, "\n");
6385                 qemu_log_unlock(logfile);
6386             }
6387         }
6388 
6389         /* Replace indirect temps with direct temps.  */
6390         if (liveness_pass_2(s)) {
6391             /* If changes were made, re-run liveness.  */
6392             liveness_pass_1(s);
6393         }
6394     }
6395 
6396     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6397                  && qemu_log_in_addr_range(pc_start))) {
6398         FILE *logfile = qemu_log_trylock();
6399         if (logfile) {
6400             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6401             tcg_dump_ops(s, logfile, true);
6402             fprintf(logfile, "\n");
6403             qemu_log_unlock(logfile);
6404         }
6405     }
6406 
6407     /* Initialize goto_tb jump offsets. */
6408     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6409     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6410     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6411     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6412 
6413     tcg_reg_alloc_start(s);
6414 
6415     /*
6416      * Reset the buffer pointers when restarting after overflow.
6417      * TODO: Move this into translate-all.c with the rest of the
6418      * buffer management.  Having only this done here is confusing.
6419      */
6420     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6421     s->code_ptr = s->code_buf;
6422     s->data_gen_ptr = NULL;
6423 
6424     QSIMPLEQ_INIT(&s->ldst_labels);
6425     s->pool_labels = NULL;
6426 
6427     start_words = s->insn_start_words;
6428     s->gen_insn_data =
6429         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6430 
6431     tcg_out_tb_start(s);
6432 
6433     num_insns = -1;
6434     QTAILQ_FOREACH(op, &s->ops, link) {
6435         TCGOpcode opc = op->opc;
6436 
6437         switch (opc) {
6438         case INDEX_op_mov_i32:
6439         case INDEX_op_mov_i64:
6440         case INDEX_op_mov_vec:
6441             tcg_reg_alloc_mov(s, op);
6442             break;
6443         case INDEX_op_dup_vec:
6444             tcg_reg_alloc_dup(s, op);
6445             break;
6446         case INDEX_op_insn_start:
6447             if (num_insns >= 0) {
6448                 size_t off = tcg_current_code_size(s);
6449                 s->gen_insn_end_off[num_insns] = off;
6450                 /* Assert that we do not overflow our stored offset.  */
6451                 assert(s->gen_insn_end_off[num_insns] == off);
6452             }
6453             num_insns++;
6454             for (i = 0; i < start_words; ++i) {
6455                 s->gen_insn_data[num_insns * start_words + i] =
6456                     tcg_get_insn_start_param(op, i);
6457             }
6458             break;
6459         case INDEX_op_discard:
6460             temp_dead(s, arg_temp(op->args[0]));
6461             break;
6462         case INDEX_op_set_label:
6463             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6464             tcg_out_label(s, arg_label(op->args[0]));
6465             break;
6466         case INDEX_op_call:
6467             tcg_reg_alloc_call(s, op);
6468             break;
6469         case INDEX_op_exit_tb:
6470             tcg_out_exit_tb(s, op->args[0]);
6471             break;
6472         case INDEX_op_goto_tb:
6473             tcg_out_goto_tb(s, op->args[0]);
6474             break;
6475         case INDEX_op_dup2_vec:
6476             if (tcg_reg_alloc_dup2(s, op)) {
6477                 break;
6478             }
6479             /* fall through */
6480         default:
6481             /* Sanity check that we've not introduced any unhandled opcodes. */
6482             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6483                                               TCGOP_FLAGS(op)));
6484             /* Note: in order to speed up the code, it would be much
6485                faster to have specialized register allocator functions for
6486                some common argument patterns */
6487             tcg_reg_alloc_op(s, op);
6488             break;
6489         }
6490         /* Test for (pending) buffer overflow.  The assumption is that any
6491            one operation beginning below the high water mark cannot overrun
6492            the buffer completely.  Thus we can test for overflow after
6493            generating code without having to check during generation.  */
6494         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6495             return -1;
6496         }
6497         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6498         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6499             return -2;
6500         }
6501     }
6502     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6503     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6504 
6505     /* Generate TB finalization at the end of block */
6506     i = tcg_out_ldst_finalize(s);
6507     if (i < 0) {
6508         return i;
6509     }
6510     i = tcg_out_pool_finalize(s);
6511     if (i < 0) {
6512         return i;
6513     }
6514     if (!tcg_resolve_relocs(s)) {
6515         return -2;
6516     }
6517 
6518 #ifndef CONFIG_TCG_INTERPRETER
6519     /* flush instruction cache */
6520     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6521                         (uintptr_t)s->code_buf,
6522                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6523 #endif
6524 
6525     return tcg_current_code_size(s);
6526 }
6527 
6528 #ifdef ELF_HOST_MACHINE
6529 /* In order to use this feature, the backend needs to do three things:
6530 
6531    (1) Define ELF_HOST_MACHINE to indicate both what value to
6532        put into the ELF image and to indicate support for the feature.
6533 
6534    (2) Define tcg_register_jit.  This should create a buffer containing
6535        the contents of a .debug_frame section that describes the post-
6536        prologue unwind info for the tcg machine.
6537 
6538    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6539 */
6540 
6541 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6542 typedef enum {
6543     JIT_NOACTION = 0,
6544     JIT_REGISTER_FN,
6545     JIT_UNREGISTER_FN
6546 } jit_actions_t;
6547 
6548 struct jit_code_entry {
6549     struct jit_code_entry *next_entry;
6550     struct jit_code_entry *prev_entry;
6551     const void *symfile_addr;
6552     uint64_t symfile_size;
6553 };
6554 
6555 struct jit_descriptor {
6556     uint32_t version;
6557     uint32_t action_flag;
6558     struct jit_code_entry *relevant_entry;
6559     struct jit_code_entry *first_entry;
6560 };
6561 
6562 void __jit_debug_register_code(void) __attribute__((noinline));
6563 void __jit_debug_register_code(void)
6564 {
6565     asm("");
6566 }
6567 
6568 /* Must statically initialize the version, because GDB may check
6569    the version before we can set it.  */
6570 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6571 
6572 /* End GDB interface.  */
6573 
6574 static int find_string(const char *strtab, const char *str)
6575 {
6576     const char *p = strtab + 1;
6577 
6578     while (1) {
6579         if (strcmp(p, str) == 0) {
6580             return p - strtab;
6581         }
6582         p += strlen(p) + 1;
6583     }
6584 }
6585 
6586 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6587                                  const void *debug_frame,
6588                                  size_t debug_frame_size)
6589 {
6590     struct __attribute__((packed)) DebugInfo {
6591         uint32_t  len;
6592         uint16_t  version;
6593         uint32_t  abbrev;
6594         uint8_t   ptr_size;
6595         uint8_t   cu_die;
6596         uint16_t  cu_lang;
6597         uintptr_t cu_low_pc;
6598         uintptr_t cu_high_pc;
6599         uint8_t   fn_die;
6600         char      fn_name[16];
6601         uintptr_t fn_low_pc;
6602         uintptr_t fn_high_pc;
6603         uint8_t   cu_eoc;
6604     };
6605 
6606     struct ElfImage {
6607         ElfW(Ehdr) ehdr;
6608         ElfW(Phdr) phdr;
6609         ElfW(Shdr) shdr[7];
6610         ElfW(Sym)  sym[2];
6611         struct DebugInfo di;
6612         uint8_t    da[24];
6613         char       str[80];
6614     };
6615 
6616     struct ElfImage *img;
6617 
6618     static const struct ElfImage img_template = {
6619         .ehdr = {
6620             .e_ident[EI_MAG0] = ELFMAG0,
6621             .e_ident[EI_MAG1] = ELFMAG1,
6622             .e_ident[EI_MAG2] = ELFMAG2,
6623             .e_ident[EI_MAG3] = ELFMAG3,
6624             .e_ident[EI_CLASS] = ELF_CLASS,
6625             .e_ident[EI_DATA] = ELF_DATA,
6626             .e_ident[EI_VERSION] = EV_CURRENT,
6627             .e_type = ET_EXEC,
6628             .e_machine = ELF_HOST_MACHINE,
6629             .e_version = EV_CURRENT,
6630             .e_phoff = offsetof(struct ElfImage, phdr),
6631             .e_shoff = offsetof(struct ElfImage, shdr),
6632             .e_ehsize = sizeof(ElfW(Shdr)),
6633             .e_phentsize = sizeof(ElfW(Phdr)),
6634             .e_phnum = 1,
6635             .e_shentsize = sizeof(ElfW(Shdr)),
6636             .e_shnum = ARRAY_SIZE(img->shdr),
6637             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6638 #ifdef ELF_HOST_FLAGS
6639             .e_flags = ELF_HOST_FLAGS,
6640 #endif
6641 #ifdef ELF_OSABI
6642             .e_ident[EI_OSABI] = ELF_OSABI,
6643 #endif
6644         },
6645         .phdr = {
6646             .p_type = PT_LOAD,
6647             .p_flags = PF_X,
6648         },
6649         .shdr = {
6650             [0] = { .sh_type = SHT_NULL },
6651             /* Trick: The contents of code_gen_buffer are not present in
6652                this fake ELF file; that got allocated elsewhere.  Therefore
6653                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6654                will not look for contents.  We can record any address.  */
6655             [1] = { /* .text */
6656                 .sh_type = SHT_NOBITS,
6657                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6658             },
6659             [2] = { /* .debug_info */
6660                 .sh_type = SHT_PROGBITS,
6661                 .sh_offset = offsetof(struct ElfImage, di),
6662                 .sh_size = sizeof(struct DebugInfo),
6663             },
6664             [3] = { /* .debug_abbrev */
6665                 .sh_type = SHT_PROGBITS,
6666                 .sh_offset = offsetof(struct ElfImage, da),
6667                 .sh_size = sizeof(img->da),
6668             },
6669             [4] = { /* .debug_frame */
6670                 .sh_type = SHT_PROGBITS,
6671                 .sh_offset = sizeof(struct ElfImage),
6672             },
6673             [5] = { /* .symtab */
6674                 .sh_type = SHT_SYMTAB,
6675                 .sh_offset = offsetof(struct ElfImage, sym),
6676                 .sh_size = sizeof(img->sym),
6677                 .sh_info = 1,
6678                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6679                 .sh_entsize = sizeof(ElfW(Sym)),
6680             },
6681             [6] = { /* .strtab */
6682                 .sh_type = SHT_STRTAB,
6683                 .sh_offset = offsetof(struct ElfImage, str),
6684                 .sh_size = sizeof(img->str),
6685             }
6686         },
6687         .sym = {
6688             [1] = { /* code_gen_buffer */
6689                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6690                 .st_shndx = 1,
6691             }
6692         },
6693         .di = {
6694             .len = sizeof(struct DebugInfo) - 4,
6695             .version = 2,
6696             .ptr_size = sizeof(void *),
6697             .cu_die = 1,
6698             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6699             .fn_die = 2,
6700             .fn_name = "code_gen_buffer"
6701         },
6702         .da = {
6703             1,          /* abbrev number (the cu) */
6704             0x11, 1,    /* DW_TAG_compile_unit, has children */
6705             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6706             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6707             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6708             0, 0,       /* end of abbrev */
6709             2,          /* abbrev number (the fn) */
6710             0x2e, 0,    /* DW_TAG_subprogram, no children */
6711             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6712             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6713             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6714             0, 0,       /* end of abbrev */
6715             0           /* no more abbrev */
6716         },
6717         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6718                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6719     };
6720 
6721     /* We only need a single jit entry; statically allocate it.  */
6722     static struct jit_code_entry one_entry;
6723 
6724     uintptr_t buf = (uintptr_t)buf_ptr;
6725     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6726     DebugFrameHeader *dfh;
6727 
6728     img = g_malloc(img_size);
6729     *img = img_template;
6730 
6731     img->phdr.p_vaddr = buf;
6732     img->phdr.p_paddr = buf;
6733     img->phdr.p_memsz = buf_size;
6734 
6735     img->shdr[1].sh_name = find_string(img->str, ".text");
6736     img->shdr[1].sh_addr = buf;
6737     img->shdr[1].sh_size = buf_size;
6738 
6739     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6740     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6741 
6742     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6743     img->shdr[4].sh_size = debug_frame_size;
6744 
6745     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6746     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6747 
6748     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6749     img->sym[1].st_value = buf;
6750     img->sym[1].st_size = buf_size;
6751 
6752     img->di.cu_low_pc = buf;
6753     img->di.cu_high_pc = buf + buf_size;
6754     img->di.fn_low_pc = buf;
6755     img->di.fn_high_pc = buf + buf_size;
6756 
6757     dfh = (DebugFrameHeader *)(img + 1);
6758     memcpy(dfh, debug_frame, debug_frame_size);
6759     dfh->fde.func_start = buf;
6760     dfh->fde.func_len = buf_size;
6761 
6762 #ifdef DEBUG_JIT
6763     /* Enable this block to be able to debug the ELF image file creation.
6764        One can use readelf, objdump, or other inspection utilities.  */
6765     {
6766         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6767         FILE *f = fopen(jit, "w+b");
6768         if (f) {
6769             if (fwrite(img, img_size, 1, f) != img_size) {
6770                 /* Avoid stupid unused return value warning for fwrite.  */
6771             }
6772             fclose(f);
6773         }
6774     }
6775 #endif
6776 
6777     one_entry.symfile_addr = img;
6778     one_entry.symfile_size = img_size;
6779 
6780     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6781     __jit_debug_descriptor.relevant_entry = &one_entry;
6782     __jit_debug_descriptor.first_entry = &one_entry;
6783     __jit_debug_register_code();
6784 }
6785 #else
6786 /* No support for the feature.  Provide the entry point expected by exec.c,
6787    and implement the internal function we declared earlier.  */
6788 
6789 static void tcg_register_jit_int(const void *buf, size_t size,
6790                                  const void *debug_frame,
6791                                  size_t debug_frame_size)
6792 {
6793 }
6794 
6795 void tcg_register_jit(const void *buf, size_t buf_size)
6796 {
6797 }
6798 #endif /* ELF_HOST_MACHINE */
6799 
6800 #if !TCG_TARGET_MAYBE_vec
6801 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6802 {
6803     g_assert_not_reached();
6804 }
6805 #endif
6806