xref: /openbmc/qemu/tcg/tcg.c (revision c3b920b3d6a685484904d3060f3eb69401051bf0)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 #include "tcg-target.c.inc"
990 
991 #ifndef CONFIG_TCG_INTERPRETER
992 /* Validate CPUTLBDescFast placement. */
993 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
994                         sizeof(CPUNegativeOffsetState))
995                   < MIN_TLB_MASK_TABLE_OFS);
996 #endif
997 
998 /*
999  * Register V as the TCGOutOp for O.
1000  * This verifies that V is of type T, otherwise give a nice compiler error.
1001  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1002  */
1003 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1004 
1005 /* Register allocation descriptions for every TCGOpcode. */
1006 static const TCGOutOp * const all_outop[NB_OPS] = {
1007     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1008     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1009 };
1010 
1011 #undef OUTOP
1012 
1013 /*
1014  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1015  * and registered the target's TCG globals) must register with this function
1016  * before initiating translation.
1017  *
1018  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1019  * of tcg_region_init() for the reasoning behind this.
1020  *
1021  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1022  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1023  * is not used anymore for translation once this function is called.
1024  *
1025  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1026  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1027  * modes.
1028  */
1029 #ifdef CONFIG_USER_ONLY
1030 void tcg_register_thread(void)
1031 {
1032     tcg_ctx = &tcg_init_ctx;
1033 }
1034 #else
1035 void tcg_register_thread(void)
1036 {
1037     TCGContext *s = g_malloc(sizeof(*s));
1038     unsigned int i, n;
1039 
1040     *s = tcg_init_ctx;
1041 
1042     /* Relink mem_base.  */
1043     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1044         if (tcg_init_ctx.temps[i].mem_base) {
1045             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1046             tcg_debug_assert(b >= 0 && b < n);
1047             s->temps[i].mem_base = &s->temps[b];
1048         }
1049     }
1050 
1051     /* Claim an entry in tcg_ctxs */
1052     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1053     g_assert(n < tcg_max_ctxs);
1054     qatomic_set(&tcg_ctxs[n], s);
1055 
1056     if (n > 0) {
1057         tcg_region_initial_alloc(s);
1058     }
1059 
1060     tcg_ctx = s;
1061 }
1062 #endif /* !CONFIG_USER_ONLY */
1063 
1064 /* pool based memory allocation */
1065 void *tcg_malloc_internal(TCGContext *s, int size)
1066 {
1067     TCGPool *p;
1068     int pool_size;
1069 
1070     if (size > TCG_POOL_CHUNK_SIZE) {
1071         /* big malloc: insert a new pool (XXX: could optimize) */
1072         p = g_malloc(sizeof(TCGPool) + size);
1073         p->size = size;
1074         p->next = s->pool_first_large;
1075         s->pool_first_large = p;
1076         return p->data;
1077     } else {
1078         p = s->pool_current;
1079         if (!p) {
1080             p = s->pool_first;
1081             if (!p)
1082                 goto new_pool;
1083         } else {
1084             if (!p->next) {
1085             new_pool:
1086                 pool_size = TCG_POOL_CHUNK_SIZE;
1087                 p = g_malloc(sizeof(TCGPool) + pool_size);
1088                 p->size = pool_size;
1089                 p->next = NULL;
1090                 if (s->pool_current) {
1091                     s->pool_current->next = p;
1092                 } else {
1093                     s->pool_first = p;
1094                 }
1095             } else {
1096                 p = p->next;
1097             }
1098         }
1099     }
1100     s->pool_current = p;
1101     s->pool_cur = p->data + size;
1102     s->pool_end = p->data + p->size;
1103     return p->data;
1104 }
1105 
1106 void tcg_pool_reset(TCGContext *s)
1107 {
1108     TCGPool *p, *t;
1109     for (p = s->pool_first_large; p; p = t) {
1110         t = p->next;
1111         g_free(p);
1112     }
1113     s->pool_first_large = NULL;
1114     s->pool_cur = s->pool_end = NULL;
1115     s->pool_current = NULL;
1116 }
1117 
1118 /*
1119  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1120  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1121  * We only use these for layout in tcg_out_ld_helper_ret and
1122  * tcg_out_st_helper_args, and share them between several of
1123  * the helpers, with the end result that it's easier to build manually.
1124  */
1125 
1126 #if TCG_TARGET_REG_BITS == 32
1127 # define dh_typecode_ttl  dh_typecode_i32
1128 #else
1129 # define dh_typecode_ttl  dh_typecode_i64
1130 #endif
1131 
1132 static TCGHelperInfo info_helper_ld32_mmu = {
1133     .flags = TCG_CALL_NO_WG,
1134     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1135               | dh_typemask(env, 1)
1136               | dh_typemask(i64, 2)  /* uint64_t addr */
1137               | dh_typemask(i32, 3)  /* unsigned oi */
1138               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1139 };
1140 
1141 static TCGHelperInfo info_helper_ld64_mmu = {
1142     .flags = TCG_CALL_NO_WG,
1143     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1144               | dh_typemask(env, 1)
1145               | dh_typemask(i64, 2)  /* uint64_t addr */
1146               | dh_typemask(i32, 3)  /* unsigned oi */
1147               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1148 };
1149 
1150 static TCGHelperInfo info_helper_ld128_mmu = {
1151     .flags = TCG_CALL_NO_WG,
1152     .typemask = dh_typemask(i128, 0) /* return Int128 */
1153               | dh_typemask(env, 1)
1154               | dh_typemask(i64, 2)  /* uint64_t addr */
1155               | dh_typemask(i32, 3)  /* unsigned oi */
1156               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1157 };
1158 
1159 static TCGHelperInfo info_helper_st32_mmu = {
1160     .flags = TCG_CALL_NO_WG,
1161     .typemask = dh_typemask(void, 0)
1162               | dh_typemask(env, 1)
1163               | dh_typemask(i64, 2)  /* uint64_t addr */
1164               | dh_typemask(i32, 3)  /* uint32_t data */
1165               | dh_typemask(i32, 4)  /* unsigned oi */
1166               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1167 };
1168 
1169 static TCGHelperInfo info_helper_st64_mmu = {
1170     .flags = TCG_CALL_NO_WG,
1171     .typemask = dh_typemask(void, 0)
1172               | dh_typemask(env, 1)
1173               | dh_typemask(i64, 2)  /* uint64_t addr */
1174               | dh_typemask(i64, 3)  /* uint64_t data */
1175               | dh_typemask(i32, 4)  /* unsigned oi */
1176               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1177 };
1178 
1179 static TCGHelperInfo info_helper_st128_mmu = {
1180     .flags = TCG_CALL_NO_WG,
1181     .typemask = dh_typemask(void, 0)
1182               | dh_typemask(env, 1)
1183               | dh_typemask(i64, 2)  /* uint64_t addr */
1184               | dh_typemask(i128, 3) /* Int128 data */
1185               | dh_typemask(i32, 4)  /* unsigned oi */
1186               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1187 };
1188 
1189 #ifdef CONFIG_TCG_INTERPRETER
1190 static ffi_type *typecode_to_ffi(int argmask)
1191 {
1192     /*
1193      * libffi does not support __int128_t, so we have forced Int128
1194      * to use the structure definition instead of the builtin type.
1195      */
1196     static ffi_type *ffi_type_i128_elements[3] = {
1197         &ffi_type_uint64,
1198         &ffi_type_uint64,
1199         NULL
1200     };
1201     static ffi_type ffi_type_i128 = {
1202         .size = 16,
1203         .alignment = __alignof__(Int128),
1204         .type = FFI_TYPE_STRUCT,
1205         .elements = ffi_type_i128_elements,
1206     };
1207 
1208     switch (argmask) {
1209     case dh_typecode_void:
1210         return &ffi_type_void;
1211     case dh_typecode_i32:
1212         return &ffi_type_uint32;
1213     case dh_typecode_s32:
1214         return &ffi_type_sint32;
1215     case dh_typecode_i64:
1216         return &ffi_type_uint64;
1217     case dh_typecode_s64:
1218         return &ffi_type_sint64;
1219     case dh_typecode_ptr:
1220         return &ffi_type_pointer;
1221     case dh_typecode_i128:
1222         return &ffi_type_i128;
1223     }
1224     g_assert_not_reached();
1225 }
1226 
1227 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1228 {
1229     unsigned typemask = info->typemask;
1230     struct {
1231         ffi_cif cif;
1232         ffi_type *args[];
1233     } *ca;
1234     ffi_status status;
1235     int nargs;
1236 
1237     /* Ignoring the return type, find the last non-zero field. */
1238     nargs = 32 - clz32(typemask >> 3);
1239     nargs = DIV_ROUND_UP(nargs, 3);
1240     assert(nargs <= MAX_CALL_IARGS);
1241 
1242     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1243     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1244     ca->cif.nargs = nargs;
1245 
1246     if (nargs != 0) {
1247         ca->cif.arg_types = ca->args;
1248         for (int j = 0; j < nargs; ++j) {
1249             int typecode = extract32(typemask, (j + 1) * 3, 3);
1250             ca->args[j] = typecode_to_ffi(typecode);
1251         }
1252     }
1253 
1254     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1255                           ca->cif.rtype, ca->cif.arg_types);
1256     assert(status == FFI_OK);
1257 
1258     return &ca->cif;
1259 }
1260 
1261 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1262 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1263 #else
1264 #define HELPER_INFO_INIT(I)      (&(I)->init)
1265 #define HELPER_INFO_INIT_VAL(I)  1
1266 #endif /* CONFIG_TCG_INTERPRETER */
1267 
1268 static inline bool arg_slot_reg_p(unsigned arg_slot)
1269 {
1270     /*
1271      * Split the sizeof away from the comparison to avoid Werror from
1272      * "unsigned < 0 is always false", when iarg_regs is empty.
1273      */
1274     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1275     return arg_slot < nreg;
1276 }
1277 
1278 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1279 {
1280     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1281     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1282 
1283     tcg_debug_assert(stk_slot < max);
1284     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1285 }
1286 
1287 typedef struct TCGCumulativeArgs {
1288     int arg_idx;                /* tcg_gen_callN args[] */
1289     int info_in_idx;            /* TCGHelperInfo in[] */
1290     int arg_slot;               /* regs+stack slot */
1291     int ref_slot;               /* stack slots for references */
1292 } TCGCumulativeArgs;
1293 
1294 static void layout_arg_even(TCGCumulativeArgs *cum)
1295 {
1296     cum->arg_slot += cum->arg_slot & 1;
1297 }
1298 
1299 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1300                          TCGCallArgumentKind kind)
1301 {
1302     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1303 
1304     *loc = (TCGCallArgumentLoc){
1305         .kind = kind,
1306         .arg_idx = cum->arg_idx,
1307         .arg_slot = cum->arg_slot,
1308     };
1309     cum->info_in_idx++;
1310     cum->arg_slot++;
1311 }
1312 
1313 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1314                                 TCGHelperInfo *info, int n)
1315 {
1316     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1317 
1318     for (int i = 0; i < n; ++i) {
1319         /* Layout all using the same arg_idx, adjusting the subindex. */
1320         loc[i] = (TCGCallArgumentLoc){
1321             .kind = TCG_CALL_ARG_NORMAL,
1322             .arg_idx = cum->arg_idx,
1323             .tmp_subindex = i,
1324             .arg_slot = cum->arg_slot + i,
1325         };
1326     }
1327     cum->info_in_idx += n;
1328     cum->arg_slot += n;
1329 }
1330 
1331 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1332 {
1333     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1334     int n = 128 / TCG_TARGET_REG_BITS;
1335 
1336     /* The first subindex carries the pointer. */
1337     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1338 
1339     /*
1340      * The callee is allowed to clobber memory associated with
1341      * structure pass by-reference.  Therefore we must make copies.
1342      * Allocate space from "ref_slot", which will be adjusted to
1343      * follow the parameters on the stack.
1344      */
1345     loc[0].ref_slot = cum->ref_slot;
1346 
1347     /*
1348      * Subsequent words also go into the reference slot, but
1349      * do not accumulate into the regular arguments.
1350      */
1351     for (int i = 1; i < n; ++i) {
1352         loc[i] = (TCGCallArgumentLoc){
1353             .kind = TCG_CALL_ARG_BY_REF_N,
1354             .arg_idx = cum->arg_idx,
1355             .tmp_subindex = i,
1356             .ref_slot = cum->ref_slot + i,
1357         };
1358     }
1359     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1360     cum->ref_slot += n;
1361 }
1362 
1363 static void init_call_layout(TCGHelperInfo *info)
1364 {
1365     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1366     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1367     unsigned typemask = info->typemask;
1368     unsigned typecode;
1369     TCGCumulativeArgs cum = { };
1370 
1371     /*
1372      * Parse and place any function return value.
1373      */
1374     typecode = typemask & 7;
1375     switch (typecode) {
1376     case dh_typecode_void:
1377         info->nr_out = 0;
1378         break;
1379     case dh_typecode_i32:
1380     case dh_typecode_s32:
1381     case dh_typecode_ptr:
1382         info->nr_out = 1;
1383         info->out_kind = TCG_CALL_RET_NORMAL;
1384         break;
1385     case dh_typecode_i64:
1386     case dh_typecode_s64:
1387         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1388         info->out_kind = TCG_CALL_RET_NORMAL;
1389         /* Query the last register now to trigger any assert early. */
1390         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1391         break;
1392     case dh_typecode_i128:
1393         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1394         info->out_kind = TCG_TARGET_CALL_RET_I128;
1395         switch (TCG_TARGET_CALL_RET_I128) {
1396         case TCG_CALL_RET_NORMAL:
1397             /* Query the last register now to trigger any assert early. */
1398             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1399             break;
1400         case TCG_CALL_RET_BY_VEC:
1401             /* Query the single register now to trigger any assert early. */
1402             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1403             break;
1404         case TCG_CALL_RET_BY_REF:
1405             /*
1406              * Allocate the first argument to the output.
1407              * We don't need to store this anywhere, just make it
1408              * unavailable for use in the input loop below.
1409              */
1410             cum.arg_slot = 1;
1411             break;
1412         default:
1413             qemu_build_not_reached();
1414         }
1415         break;
1416     default:
1417         g_assert_not_reached();
1418     }
1419 
1420     /*
1421      * Parse and place function arguments.
1422      */
1423     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1424         TCGCallArgumentKind kind;
1425         TCGType type;
1426 
1427         typecode = typemask & 7;
1428         switch (typecode) {
1429         case dh_typecode_i32:
1430         case dh_typecode_s32:
1431             type = TCG_TYPE_I32;
1432             break;
1433         case dh_typecode_i64:
1434         case dh_typecode_s64:
1435             type = TCG_TYPE_I64;
1436             break;
1437         case dh_typecode_ptr:
1438             type = TCG_TYPE_PTR;
1439             break;
1440         case dh_typecode_i128:
1441             type = TCG_TYPE_I128;
1442             break;
1443         default:
1444             g_assert_not_reached();
1445         }
1446 
1447         switch (type) {
1448         case TCG_TYPE_I32:
1449             switch (TCG_TARGET_CALL_ARG_I32) {
1450             case TCG_CALL_ARG_EVEN:
1451                 layout_arg_even(&cum);
1452                 /* fall through */
1453             case TCG_CALL_ARG_NORMAL:
1454                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1455                 break;
1456             case TCG_CALL_ARG_EXTEND:
1457                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1458                 layout_arg_1(&cum, info, kind);
1459                 break;
1460             default:
1461                 qemu_build_not_reached();
1462             }
1463             break;
1464 
1465         case TCG_TYPE_I64:
1466             switch (TCG_TARGET_CALL_ARG_I64) {
1467             case TCG_CALL_ARG_EVEN:
1468                 layout_arg_even(&cum);
1469                 /* fall through */
1470             case TCG_CALL_ARG_NORMAL:
1471                 if (TCG_TARGET_REG_BITS == 32) {
1472                     layout_arg_normal_n(&cum, info, 2);
1473                 } else {
1474                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1475                 }
1476                 break;
1477             default:
1478                 qemu_build_not_reached();
1479             }
1480             break;
1481 
1482         case TCG_TYPE_I128:
1483             switch (TCG_TARGET_CALL_ARG_I128) {
1484             case TCG_CALL_ARG_EVEN:
1485                 layout_arg_even(&cum);
1486                 /* fall through */
1487             case TCG_CALL_ARG_NORMAL:
1488                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1489                 break;
1490             case TCG_CALL_ARG_BY_REF:
1491                 layout_arg_by_ref(&cum, info);
1492                 break;
1493             default:
1494                 qemu_build_not_reached();
1495             }
1496             break;
1497 
1498         default:
1499             g_assert_not_reached();
1500         }
1501     }
1502     info->nr_in = cum.info_in_idx;
1503 
1504     /* Validate that we didn't overrun the input array. */
1505     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1506     /* Validate the backend has enough argument space. */
1507     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1508 
1509     /*
1510      * Relocate the "ref_slot" area to the end of the parameters.
1511      * Minimizing this stack offset helps code size for x86,
1512      * which has a signed 8-bit offset encoding.
1513      */
1514     if (cum.ref_slot != 0) {
1515         int ref_base = 0;
1516 
1517         if (cum.arg_slot > max_reg_slots) {
1518             int align = __alignof(Int128) / sizeof(tcg_target_long);
1519 
1520             ref_base = cum.arg_slot - max_reg_slots;
1521             if (align > 1) {
1522                 ref_base = ROUND_UP(ref_base, align);
1523             }
1524         }
1525         assert(ref_base + cum.ref_slot <= max_stk_slots);
1526         ref_base += max_reg_slots;
1527 
1528         if (ref_base != 0) {
1529             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1530                 TCGCallArgumentLoc *loc = &info->in[i];
1531                 switch (loc->kind) {
1532                 case TCG_CALL_ARG_BY_REF:
1533                 case TCG_CALL_ARG_BY_REF_N:
1534                     loc->ref_slot += ref_base;
1535                     break;
1536                 default:
1537                     break;
1538                 }
1539             }
1540         }
1541     }
1542 }
1543 
1544 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1545 static void process_constraint_sets(void);
1546 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1547                                             TCGReg reg, const char *name);
1548 
1549 static void tcg_context_init(unsigned max_threads)
1550 {
1551     TCGContext *s = &tcg_init_ctx;
1552     int n, i;
1553     TCGTemp *ts;
1554 
1555     memset(s, 0, sizeof(*s));
1556     s->nb_globals = 0;
1557 
1558     init_call_layout(&info_helper_ld32_mmu);
1559     init_call_layout(&info_helper_ld64_mmu);
1560     init_call_layout(&info_helper_ld128_mmu);
1561     init_call_layout(&info_helper_st32_mmu);
1562     init_call_layout(&info_helper_st64_mmu);
1563     init_call_layout(&info_helper_st128_mmu);
1564 
1565     tcg_target_init(s);
1566     process_constraint_sets();
1567 
1568     /* Reverse the order of the saved registers, assuming they're all at
1569        the start of tcg_target_reg_alloc_order.  */
1570     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1571         int r = tcg_target_reg_alloc_order[n];
1572         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1573             break;
1574         }
1575     }
1576     for (i = 0; i < n; ++i) {
1577         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1578     }
1579     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1580         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1581     }
1582 
1583     tcg_ctx = s;
1584     /*
1585      * In user-mode we simply share the init context among threads, since we
1586      * use a single region. See the documentation tcg_region_init() for the
1587      * reasoning behind this.
1588      * In system-mode we will have at most max_threads TCG threads.
1589      */
1590 #ifdef CONFIG_USER_ONLY
1591     tcg_ctxs = &tcg_ctx;
1592     tcg_cur_ctxs = 1;
1593     tcg_max_ctxs = 1;
1594 #else
1595     tcg_max_ctxs = max_threads;
1596     tcg_ctxs = g_new0(TCGContext *, max_threads);
1597 #endif
1598 
1599     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1600     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1601     tcg_env = temp_tcgv_ptr(ts);
1602 }
1603 
1604 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1605 {
1606     tcg_context_init(max_threads);
1607     tcg_region_init(tb_size, splitwx, max_threads);
1608 }
1609 
1610 /*
1611  * Allocate TBs right before their corresponding translated code, making
1612  * sure that TBs and code are on different cache lines.
1613  */
1614 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1615 {
1616     uintptr_t align = qemu_icache_linesize;
1617     TranslationBlock *tb;
1618     void *next;
1619 
1620  retry:
1621     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1622     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1623 
1624     if (unlikely(next > s->code_gen_highwater)) {
1625         if (tcg_region_alloc(s)) {
1626             return NULL;
1627         }
1628         goto retry;
1629     }
1630     qatomic_set(&s->code_gen_ptr, next);
1631     return tb;
1632 }
1633 
1634 void tcg_prologue_init(void)
1635 {
1636     TCGContext *s = tcg_ctx;
1637     size_t prologue_size;
1638 
1639     s->code_ptr = s->code_gen_ptr;
1640     s->code_buf = s->code_gen_ptr;
1641     s->data_gen_ptr = NULL;
1642 
1643 #ifndef CONFIG_TCG_INTERPRETER
1644     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1645 #endif
1646 
1647     s->pool_labels = NULL;
1648 
1649     qemu_thread_jit_write();
1650     /* Generate the prologue.  */
1651     tcg_target_qemu_prologue(s);
1652 
1653     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1654     {
1655         int result = tcg_out_pool_finalize(s);
1656         tcg_debug_assert(result == 0);
1657     }
1658 
1659     prologue_size = tcg_current_code_size(s);
1660     perf_report_prologue(s->code_gen_ptr, prologue_size);
1661 
1662 #ifndef CONFIG_TCG_INTERPRETER
1663     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1664                         (uintptr_t)s->code_buf, prologue_size);
1665 #endif
1666 
1667     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1668         FILE *logfile = qemu_log_trylock();
1669         if (logfile) {
1670             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1671             if (s->data_gen_ptr) {
1672                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1673                 size_t data_size = prologue_size - code_size;
1674                 size_t i;
1675 
1676                 disas(logfile, s->code_gen_ptr, code_size);
1677 
1678                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1679                     if (sizeof(tcg_target_ulong) == 8) {
1680                         fprintf(logfile,
1681                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1682                                 (uintptr_t)s->data_gen_ptr + i,
1683                                 *(uint64_t *)(s->data_gen_ptr + i));
1684                     } else {
1685                         fprintf(logfile,
1686                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1687                                 (uintptr_t)s->data_gen_ptr + i,
1688                                 *(uint32_t *)(s->data_gen_ptr + i));
1689                     }
1690                 }
1691             } else {
1692                 disas(logfile, s->code_gen_ptr, prologue_size);
1693             }
1694             fprintf(logfile, "\n");
1695             qemu_log_unlock(logfile);
1696         }
1697     }
1698 
1699 #ifndef CONFIG_TCG_INTERPRETER
1700     /*
1701      * Assert that goto_ptr is implemented completely, setting an epilogue.
1702      * For tci, we use NULL as the signal to return from the interpreter,
1703      * so skip this check.
1704      */
1705     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1706 #endif
1707 
1708     tcg_region_prologue_set(s);
1709 }
1710 
1711 void tcg_func_start(TCGContext *s)
1712 {
1713     tcg_pool_reset(s);
1714     s->nb_temps = s->nb_globals;
1715 
1716     /* No temps have been previously allocated for size or locality.  */
1717     tcg_temp_ebb_reset_freed(s);
1718 
1719     /* No constant temps have been previously allocated. */
1720     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1721         if (s->const_table[i]) {
1722             g_hash_table_remove_all(s->const_table[i]);
1723         }
1724     }
1725 
1726     s->nb_ops = 0;
1727     s->nb_labels = 0;
1728     s->current_frame_offset = s->frame_start;
1729 
1730 #ifdef CONFIG_DEBUG_TCG
1731     s->goto_tb_issue_mask = 0;
1732 #endif
1733 
1734     QTAILQ_INIT(&s->ops);
1735     QTAILQ_INIT(&s->free_ops);
1736     s->emit_before_op = NULL;
1737     QSIMPLEQ_INIT(&s->labels);
1738 
1739     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1740     tcg_debug_assert(s->insn_start_words > 0);
1741 }
1742 
1743 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1744 {
1745     int n = s->nb_temps++;
1746 
1747     if (n >= TCG_MAX_TEMPS) {
1748         tcg_raise_tb_overflow(s);
1749     }
1750     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1751 }
1752 
1753 static TCGTemp *tcg_global_alloc(TCGContext *s)
1754 {
1755     TCGTemp *ts;
1756 
1757     tcg_debug_assert(s->nb_globals == s->nb_temps);
1758     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1759     s->nb_globals++;
1760     ts = tcg_temp_alloc(s);
1761     ts->kind = TEMP_GLOBAL;
1762 
1763     return ts;
1764 }
1765 
1766 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1767                                             TCGReg reg, const char *name)
1768 {
1769     TCGTemp *ts;
1770 
1771     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1772 
1773     ts = tcg_global_alloc(s);
1774     ts->base_type = type;
1775     ts->type = type;
1776     ts->kind = TEMP_FIXED;
1777     ts->reg = reg;
1778     ts->name = name;
1779     tcg_regset_set_reg(s->reserved_regs, reg);
1780 
1781     return ts;
1782 }
1783 
1784 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1785 {
1786     s->frame_start = start;
1787     s->frame_end = start + size;
1788     s->frame_temp
1789         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1790 }
1791 
1792 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1793                                             const char *name, TCGType type)
1794 {
1795     TCGContext *s = tcg_ctx;
1796     TCGTemp *base_ts = tcgv_ptr_temp(base);
1797     TCGTemp *ts = tcg_global_alloc(s);
1798     int indirect_reg = 0;
1799 
1800     switch (base_ts->kind) {
1801     case TEMP_FIXED:
1802         break;
1803     case TEMP_GLOBAL:
1804         /* We do not support double-indirect registers.  */
1805         tcg_debug_assert(!base_ts->indirect_reg);
1806         base_ts->indirect_base = 1;
1807         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1808                             ? 2 : 1);
1809         indirect_reg = 1;
1810         break;
1811     default:
1812         g_assert_not_reached();
1813     }
1814 
1815     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1816         TCGTemp *ts2 = tcg_global_alloc(s);
1817         char buf[64];
1818 
1819         ts->base_type = TCG_TYPE_I64;
1820         ts->type = TCG_TYPE_I32;
1821         ts->indirect_reg = indirect_reg;
1822         ts->mem_allocated = 1;
1823         ts->mem_base = base_ts;
1824         ts->mem_offset = offset;
1825         pstrcpy(buf, sizeof(buf), name);
1826         pstrcat(buf, sizeof(buf), "_0");
1827         ts->name = strdup(buf);
1828 
1829         tcg_debug_assert(ts2 == ts + 1);
1830         ts2->base_type = TCG_TYPE_I64;
1831         ts2->type = TCG_TYPE_I32;
1832         ts2->indirect_reg = indirect_reg;
1833         ts2->mem_allocated = 1;
1834         ts2->mem_base = base_ts;
1835         ts2->mem_offset = offset + 4;
1836         ts2->temp_subindex = 1;
1837         pstrcpy(buf, sizeof(buf), name);
1838         pstrcat(buf, sizeof(buf), "_1");
1839         ts2->name = strdup(buf);
1840     } else {
1841         ts->base_type = type;
1842         ts->type = type;
1843         ts->indirect_reg = indirect_reg;
1844         ts->mem_allocated = 1;
1845         ts->mem_base = base_ts;
1846         ts->mem_offset = offset;
1847         ts->name = name;
1848     }
1849     return ts;
1850 }
1851 
1852 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1853 {
1854     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1855     return temp_tcgv_i32(ts);
1856 }
1857 
1858 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1859 {
1860     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1861     return temp_tcgv_i64(ts);
1862 }
1863 
1864 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1865 {
1866     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1867     return temp_tcgv_ptr(ts);
1868 }
1869 
1870 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1871 {
1872     TCGContext *s = tcg_ctx;
1873     TCGTemp *ts;
1874     int n;
1875 
1876     if (kind == TEMP_EBB) {
1877         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1878 
1879         if (idx < TCG_MAX_TEMPS) {
1880             /* There is already an available temp with the right type.  */
1881             clear_bit(idx, s->free_temps[type].l);
1882 
1883             ts = &s->temps[idx];
1884             ts->temp_allocated = 1;
1885             tcg_debug_assert(ts->base_type == type);
1886             tcg_debug_assert(ts->kind == kind);
1887             return ts;
1888         }
1889     } else {
1890         tcg_debug_assert(kind == TEMP_TB);
1891     }
1892 
1893     switch (type) {
1894     case TCG_TYPE_I32:
1895     case TCG_TYPE_V64:
1896     case TCG_TYPE_V128:
1897     case TCG_TYPE_V256:
1898         n = 1;
1899         break;
1900     case TCG_TYPE_I64:
1901         n = 64 / TCG_TARGET_REG_BITS;
1902         break;
1903     case TCG_TYPE_I128:
1904         n = 128 / TCG_TARGET_REG_BITS;
1905         break;
1906     default:
1907         g_assert_not_reached();
1908     }
1909 
1910     ts = tcg_temp_alloc(s);
1911     ts->base_type = type;
1912     ts->temp_allocated = 1;
1913     ts->kind = kind;
1914 
1915     if (n == 1) {
1916         ts->type = type;
1917     } else {
1918         ts->type = TCG_TYPE_REG;
1919 
1920         for (int i = 1; i < n; ++i) {
1921             TCGTemp *ts2 = tcg_temp_alloc(s);
1922 
1923             tcg_debug_assert(ts2 == ts + i);
1924             ts2->base_type = type;
1925             ts2->type = TCG_TYPE_REG;
1926             ts2->temp_allocated = 1;
1927             ts2->temp_subindex = i;
1928             ts2->kind = kind;
1929         }
1930     }
1931     return ts;
1932 }
1933 
1934 TCGv_i32 tcg_temp_new_i32(void)
1935 {
1936     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1937 }
1938 
1939 TCGv_i32 tcg_temp_ebb_new_i32(void)
1940 {
1941     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1942 }
1943 
1944 TCGv_i64 tcg_temp_new_i64(void)
1945 {
1946     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1947 }
1948 
1949 TCGv_i64 tcg_temp_ebb_new_i64(void)
1950 {
1951     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1952 }
1953 
1954 TCGv_ptr tcg_temp_new_ptr(void)
1955 {
1956     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1957 }
1958 
1959 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1960 {
1961     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1962 }
1963 
1964 TCGv_i128 tcg_temp_new_i128(void)
1965 {
1966     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1967 }
1968 
1969 TCGv_i128 tcg_temp_ebb_new_i128(void)
1970 {
1971     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1972 }
1973 
1974 TCGv_vec tcg_temp_new_vec(TCGType type)
1975 {
1976     TCGTemp *t;
1977 
1978 #ifdef CONFIG_DEBUG_TCG
1979     switch (type) {
1980     case TCG_TYPE_V64:
1981         assert(TCG_TARGET_HAS_v64);
1982         break;
1983     case TCG_TYPE_V128:
1984         assert(TCG_TARGET_HAS_v128);
1985         break;
1986     case TCG_TYPE_V256:
1987         assert(TCG_TARGET_HAS_v256);
1988         break;
1989     default:
1990         g_assert_not_reached();
1991     }
1992 #endif
1993 
1994     t = tcg_temp_new_internal(type, TEMP_EBB);
1995     return temp_tcgv_vec(t);
1996 }
1997 
1998 /* Create a new temp of the same type as an existing temp.  */
1999 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2000 {
2001     TCGTemp *t = tcgv_vec_temp(match);
2002 
2003     tcg_debug_assert(t->temp_allocated != 0);
2004 
2005     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2006     return temp_tcgv_vec(t);
2007 }
2008 
2009 void tcg_temp_free_internal(TCGTemp *ts)
2010 {
2011     TCGContext *s = tcg_ctx;
2012 
2013     switch (ts->kind) {
2014     case TEMP_CONST:
2015     case TEMP_TB:
2016         /* Silently ignore free. */
2017         break;
2018     case TEMP_EBB:
2019         tcg_debug_assert(ts->temp_allocated != 0);
2020         ts->temp_allocated = 0;
2021         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2022         break;
2023     default:
2024         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2025         g_assert_not_reached();
2026     }
2027 }
2028 
2029 void tcg_temp_free_i32(TCGv_i32 arg)
2030 {
2031     tcg_temp_free_internal(tcgv_i32_temp(arg));
2032 }
2033 
2034 void tcg_temp_free_i64(TCGv_i64 arg)
2035 {
2036     tcg_temp_free_internal(tcgv_i64_temp(arg));
2037 }
2038 
2039 void tcg_temp_free_i128(TCGv_i128 arg)
2040 {
2041     tcg_temp_free_internal(tcgv_i128_temp(arg));
2042 }
2043 
2044 void tcg_temp_free_ptr(TCGv_ptr arg)
2045 {
2046     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2047 }
2048 
2049 void tcg_temp_free_vec(TCGv_vec arg)
2050 {
2051     tcg_temp_free_internal(tcgv_vec_temp(arg));
2052 }
2053 
2054 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2055 {
2056     TCGContext *s = tcg_ctx;
2057     GHashTable *h = s->const_table[type];
2058     TCGTemp *ts;
2059 
2060     if (h == NULL) {
2061         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2062         s->const_table[type] = h;
2063     }
2064 
2065     ts = g_hash_table_lookup(h, &val);
2066     if (ts == NULL) {
2067         int64_t *val_ptr;
2068 
2069         ts = tcg_temp_alloc(s);
2070 
2071         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2072             TCGTemp *ts2 = tcg_temp_alloc(s);
2073 
2074             tcg_debug_assert(ts2 == ts + 1);
2075 
2076             ts->base_type = TCG_TYPE_I64;
2077             ts->type = TCG_TYPE_I32;
2078             ts->kind = TEMP_CONST;
2079             ts->temp_allocated = 1;
2080 
2081             ts2->base_type = TCG_TYPE_I64;
2082             ts2->type = TCG_TYPE_I32;
2083             ts2->kind = TEMP_CONST;
2084             ts2->temp_allocated = 1;
2085             ts2->temp_subindex = 1;
2086 
2087             /*
2088              * Retain the full value of the 64-bit constant in the low
2089              * part, so that the hash table works.  Actual uses will
2090              * truncate the value to the low part.
2091              */
2092             ts[HOST_BIG_ENDIAN].val = val;
2093             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2094             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2095         } else {
2096             ts->base_type = type;
2097             ts->type = type;
2098             ts->kind = TEMP_CONST;
2099             ts->temp_allocated = 1;
2100             ts->val = val;
2101             val_ptr = &ts->val;
2102         }
2103         g_hash_table_insert(h, val_ptr, ts);
2104     }
2105 
2106     return ts;
2107 }
2108 
2109 TCGv_i32 tcg_constant_i32(int32_t val)
2110 {
2111     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2112 }
2113 
2114 TCGv_i64 tcg_constant_i64(int64_t val)
2115 {
2116     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2117 }
2118 
2119 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2120 {
2121     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2122 }
2123 
2124 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2125 {
2126     val = dup_const(vece, val);
2127     return temp_tcgv_vec(tcg_constant_internal(type, val));
2128 }
2129 
2130 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2131 {
2132     TCGTemp *t = tcgv_vec_temp(match);
2133 
2134     tcg_debug_assert(t->temp_allocated != 0);
2135     return tcg_constant_vec(t->base_type, vece, val);
2136 }
2137 
2138 #ifdef CONFIG_DEBUG_TCG
2139 size_t temp_idx(TCGTemp *ts)
2140 {
2141     ptrdiff_t n = ts - tcg_ctx->temps;
2142     assert(n >= 0 && n < tcg_ctx->nb_temps);
2143     return n;
2144 }
2145 
2146 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2147 {
2148     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2149 
2150     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2151     assert(o % sizeof(TCGTemp) == 0);
2152 
2153     return (void *)tcg_ctx + (uintptr_t)v;
2154 }
2155 #endif /* CONFIG_DEBUG_TCG */
2156 
2157 /*
2158  * Return true if OP may appear in the opcode stream with TYPE.
2159  * Test the runtime variable that controls each opcode.
2160  */
2161 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2162 {
2163     bool has_type;
2164 
2165     switch (type) {
2166     case TCG_TYPE_I32:
2167         has_type = true;
2168         break;
2169     case TCG_TYPE_I64:
2170         has_type = TCG_TARGET_REG_BITS == 64;
2171         break;
2172     case TCG_TYPE_V64:
2173         has_type = TCG_TARGET_HAS_v64;
2174         break;
2175     case TCG_TYPE_V128:
2176         has_type = TCG_TARGET_HAS_v128;
2177         break;
2178     case TCG_TYPE_V256:
2179         has_type = TCG_TARGET_HAS_v256;
2180         break;
2181     default:
2182         has_type = false;
2183         break;
2184     }
2185 
2186     switch (op) {
2187     case INDEX_op_discard:
2188     case INDEX_op_set_label:
2189     case INDEX_op_call:
2190     case INDEX_op_br:
2191     case INDEX_op_mb:
2192     case INDEX_op_insn_start:
2193     case INDEX_op_exit_tb:
2194     case INDEX_op_goto_tb:
2195     case INDEX_op_goto_ptr:
2196     case INDEX_op_qemu_ld_i32:
2197     case INDEX_op_qemu_st_i32:
2198     case INDEX_op_qemu_ld_i64:
2199     case INDEX_op_qemu_st_i64:
2200         return true;
2201 
2202     case INDEX_op_qemu_st8_i32:
2203         return TCG_TARGET_HAS_qemu_st8_i32;
2204 
2205     case INDEX_op_qemu_ld_i128:
2206     case INDEX_op_qemu_st_i128:
2207         return TCG_TARGET_HAS_qemu_ldst_i128;
2208 
2209     case INDEX_op_add:
2210     case INDEX_op_and:
2211     case INDEX_op_mov:
2212         return has_type;
2213 
2214     case INDEX_op_setcond_i32:
2215     case INDEX_op_brcond_i32:
2216     case INDEX_op_movcond_i32:
2217     case INDEX_op_ld8u_i32:
2218     case INDEX_op_ld8s_i32:
2219     case INDEX_op_ld16u_i32:
2220     case INDEX_op_ld16s_i32:
2221     case INDEX_op_ld_i32:
2222     case INDEX_op_st8_i32:
2223     case INDEX_op_st16_i32:
2224     case INDEX_op_st_i32:
2225     case INDEX_op_sub_i32:
2226     case INDEX_op_neg_i32:
2227     case INDEX_op_mul_i32:
2228     case INDEX_op_or_i32:
2229     case INDEX_op_xor_i32:
2230     case INDEX_op_shl_i32:
2231     case INDEX_op_shr_i32:
2232     case INDEX_op_sar_i32:
2233     case INDEX_op_extract_i32:
2234     case INDEX_op_sextract_i32:
2235     case INDEX_op_deposit_i32:
2236         return true;
2237 
2238     case INDEX_op_negsetcond_i32:
2239         return TCG_TARGET_HAS_negsetcond_i32;
2240     case INDEX_op_div_i32:
2241     case INDEX_op_divu_i32:
2242         return TCG_TARGET_HAS_div_i32;
2243     case INDEX_op_rem_i32:
2244     case INDEX_op_remu_i32:
2245         return TCG_TARGET_HAS_rem_i32;
2246     case INDEX_op_div2_i32:
2247     case INDEX_op_divu2_i32:
2248         return TCG_TARGET_HAS_div2_i32;
2249     case INDEX_op_rotl_i32:
2250     case INDEX_op_rotr_i32:
2251         return TCG_TARGET_HAS_rot_i32;
2252     case INDEX_op_extract2_i32:
2253         return TCG_TARGET_HAS_extract2_i32;
2254     case INDEX_op_add2_i32:
2255         return TCG_TARGET_HAS_add2_i32;
2256     case INDEX_op_sub2_i32:
2257         return TCG_TARGET_HAS_sub2_i32;
2258     case INDEX_op_mulu2_i32:
2259         return TCG_TARGET_HAS_mulu2_i32;
2260     case INDEX_op_muls2_i32:
2261         return TCG_TARGET_HAS_muls2_i32;
2262     case INDEX_op_muluh_i32:
2263         return TCG_TARGET_HAS_muluh_i32;
2264     case INDEX_op_mulsh_i32:
2265         return TCG_TARGET_HAS_mulsh_i32;
2266     case INDEX_op_bswap16_i32:
2267         return TCG_TARGET_HAS_bswap16_i32;
2268     case INDEX_op_bswap32_i32:
2269         return TCG_TARGET_HAS_bswap32_i32;
2270     case INDEX_op_not_i32:
2271         return TCG_TARGET_HAS_not_i32;
2272     case INDEX_op_andc_i32:
2273         return TCG_TARGET_HAS_andc_i32;
2274     case INDEX_op_orc_i32:
2275         return TCG_TARGET_HAS_orc_i32;
2276     case INDEX_op_eqv_i32:
2277         return TCG_TARGET_HAS_eqv_i32;
2278     case INDEX_op_nand_i32:
2279         return TCG_TARGET_HAS_nand_i32;
2280     case INDEX_op_nor_i32:
2281         return TCG_TARGET_HAS_nor_i32;
2282     case INDEX_op_clz_i32:
2283         return TCG_TARGET_HAS_clz_i32;
2284     case INDEX_op_ctz_i32:
2285         return TCG_TARGET_HAS_ctz_i32;
2286     case INDEX_op_ctpop_i32:
2287         return TCG_TARGET_HAS_ctpop_i32;
2288 
2289     case INDEX_op_brcond2_i32:
2290     case INDEX_op_setcond2_i32:
2291         return TCG_TARGET_REG_BITS == 32;
2292 
2293     case INDEX_op_setcond_i64:
2294     case INDEX_op_brcond_i64:
2295     case INDEX_op_movcond_i64:
2296     case INDEX_op_ld8u_i64:
2297     case INDEX_op_ld8s_i64:
2298     case INDEX_op_ld16u_i64:
2299     case INDEX_op_ld16s_i64:
2300     case INDEX_op_ld32u_i64:
2301     case INDEX_op_ld32s_i64:
2302     case INDEX_op_ld_i64:
2303     case INDEX_op_st8_i64:
2304     case INDEX_op_st16_i64:
2305     case INDEX_op_st32_i64:
2306     case INDEX_op_st_i64:
2307     case INDEX_op_sub_i64:
2308     case INDEX_op_neg_i64:
2309     case INDEX_op_mul_i64:
2310     case INDEX_op_or_i64:
2311     case INDEX_op_xor_i64:
2312     case INDEX_op_shl_i64:
2313     case INDEX_op_shr_i64:
2314     case INDEX_op_sar_i64:
2315     case INDEX_op_ext_i32_i64:
2316     case INDEX_op_extu_i32_i64:
2317     case INDEX_op_extract_i64:
2318     case INDEX_op_sextract_i64:
2319     case INDEX_op_deposit_i64:
2320         return TCG_TARGET_REG_BITS == 64;
2321 
2322     case INDEX_op_negsetcond_i64:
2323         return TCG_TARGET_HAS_negsetcond_i64;
2324     case INDEX_op_div_i64:
2325     case INDEX_op_divu_i64:
2326         return TCG_TARGET_HAS_div_i64;
2327     case INDEX_op_rem_i64:
2328     case INDEX_op_remu_i64:
2329         return TCG_TARGET_HAS_rem_i64;
2330     case INDEX_op_div2_i64:
2331     case INDEX_op_divu2_i64:
2332         return TCG_TARGET_HAS_div2_i64;
2333     case INDEX_op_rotl_i64:
2334     case INDEX_op_rotr_i64:
2335         return TCG_TARGET_HAS_rot_i64;
2336     case INDEX_op_extract2_i64:
2337         return TCG_TARGET_HAS_extract2_i64;
2338     case INDEX_op_extrl_i64_i32:
2339     case INDEX_op_extrh_i64_i32:
2340         return TCG_TARGET_HAS_extr_i64_i32;
2341     case INDEX_op_bswap16_i64:
2342         return TCG_TARGET_HAS_bswap16_i64;
2343     case INDEX_op_bswap32_i64:
2344         return TCG_TARGET_HAS_bswap32_i64;
2345     case INDEX_op_bswap64_i64:
2346         return TCG_TARGET_HAS_bswap64_i64;
2347     case INDEX_op_not_i64:
2348         return TCG_TARGET_HAS_not_i64;
2349     case INDEX_op_andc_i64:
2350         return TCG_TARGET_HAS_andc_i64;
2351     case INDEX_op_orc_i64:
2352         return TCG_TARGET_HAS_orc_i64;
2353     case INDEX_op_eqv_i64:
2354         return TCG_TARGET_HAS_eqv_i64;
2355     case INDEX_op_nand_i64:
2356         return TCG_TARGET_HAS_nand_i64;
2357     case INDEX_op_nor_i64:
2358         return TCG_TARGET_HAS_nor_i64;
2359     case INDEX_op_clz_i64:
2360         return TCG_TARGET_HAS_clz_i64;
2361     case INDEX_op_ctz_i64:
2362         return TCG_TARGET_HAS_ctz_i64;
2363     case INDEX_op_ctpop_i64:
2364         return TCG_TARGET_HAS_ctpop_i64;
2365     case INDEX_op_add2_i64:
2366         return TCG_TARGET_HAS_add2_i64;
2367     case INDEX_op_sub2_i64:
2368         return TCG_TARGET_HAS_sub2_i64;
2369     case INDEX_op_mulu2_i64:
2370         return TCG_TARGET_HAS_mulu2_i64;
2371     case INDEX_op_muls2_i64:
2372         return TCG_TARGET_HAS_muls2_i64;
2373     case INDEX_op_muluh_i64:
2374         return TCG_TARGET_HAS_muluh_i64;
2375     case INDEX_op_mulsh_i64:
2376         return TCG_TARGET_HAS_mulsh_i64;
2377 
2378     case INDEX_op_mov_vec:
2379     case INDEX_op_dup_vec:
2380     case INDEX_op_dupm_vec:
2381     case INDEX_op_ld_vec:
2382     case INDEX_op_st_vec:
2383     case INDEX_op_add_vec:
2384     case INDEX_op_sub_vec:
2385     case INDEX_op_and_vec:
2386     case INDEX_op_or_vec:
2387     case INDEX_op_xor_vec:
2388     case INDEX_op_cmp_vec:
2389         return has_type;
2390     case INDEX_op_dup2_vec:
2391         return has_type && TCG_TARGET_REG_BITS == 32;
2392     case INDEX_op_not_vec:
2393         return has_type && TCG_TARGET_HAS_not_vec;
2394     case INDEX_op_neg_vec:
2395         return has_type && TCG_TARGET_HAS_neg_vec;
2396     case INDEX_op_abs_vec:
2397         return has_type && TCG_TARGET_HAS_abs_vec;
2398     case INDEX_op_andc_vec:
2399         return has_type && TCG_TARGET_HAS_andc_vec;
2400     case INDEX_op_orc_vec:
2401         return has_type && TCG_TARGET_HAS_orc_vec;
2402     case INDEX_op_nand_vec:
2403         return has_type && TCG_TARGET_HAS_nand_vec;
2404     case INDEX_op_nor_vec:
2405         return has_type && TCG_TARGET_HAS_nor_vec;
2406     case INDEX_op_eqv_vec:
2407         return has_type && TCG_TARGET_HAS_eqv_vec;
2408     case INDEX_op_mul_vec:
2409         return has_type && TCG_TARGET_HAS_mul_vec;
2410     case INDEX_op_shli_vec:
2411     case INDEX_op_shri_vec:
2412     case INDEX_op_sari_vec:
2413         return has_type && TCG_TARGET_HAS_shi_vec;
2414     case INDEX_op_shls_vec:
2415     case INDEX_op_shrs_vec:
2416     case INDEX_op_sars_vec:
2417         return has_type && TCG_TARGET_HAS_shs_vec;
2418     case INDEX_op_shlv_vec:
2419     case INDEX_op_shrv_vec:
2420     case INDEX_op_sarv_vec:
2421         return has_type && TCG_TARGET_HAS_shv_vec;
2422     case INDEX_op_rotli_vec:
2423         return has_type && TCG_TARGET_HAS_roti_vec;
2424     case INDEX_op_rotls_vec:
2425         return has_type && TCG_TARGET_HAS_rots_vec;
2426     case INDEX_op_rotlv_vec:
2427     case INDEX_op_rotrv_vec:
2428         return has_type && TCG_TARGET_HAS_rotv_vec;
2429     case INDEX_op_ssadd_vec:
2430     case INDEX_op_usadd_vec:
2431     case INDEX_op_sssub_vec:
2432     case INDEX_op_ussub_vec:
2433         return has_type && TCG_TARGET_HAS_sat_vec;
2434     case INDEX_op_smin_vec:
2435     case INDEX_op_umin_vec:
2436     case INDEX_op_smax_vec:
2437     case INDEX_op_umax_vec:
2438         return has_type && TCG_TARGET_HAS_minmax_vec;
2439     case INDEX_op_bitsel_vec:
2440         return has_type && TCG_TARGET_HAS_bitsel_vec;
2441     case INDEX_op_cmpsel_vec:
2442         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2443 
2444     default:
2445         if (op < INDEX_op_last_generic) {
2446             const TCGOutOp *outop;
2447             TCGConstraintSetIndex con_set;
2448 
2449             if (!has_type) {
2450                 return false;
2451             }
2452 
2453             outop = all_outop[op];
2454             tcg_debug_assert(outop != NULL);
2455 
2456             con_set = outop->static_constraint;
2457             if (con_set == C_Dynamic) {
2458                 con_set = outop->dynamic_constraint(type, flags);
2459             }
2460             if (con_set >= 0) {
2461                 return true;
2462             }
2463             tcg_debug_assert(con_set == C_NotImplemented);
2464             return false;
2465         }
2466         tcg_debug_assert(op < NB_OPS);
2467         return true;
2468 
2469     case INDEX_op_last_generic:
2470         g_assert_not_reached();
2471     }
2472 }
2473 
2474 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2475 {
2476     unsigned width;
2477 
2478     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2479     width = (type == TCG_TYPE_I32 ? 32 : 64);
2480 
2481     tcg_debug_assert(ofs < width);
2482     tcg_debug_assert(len > 0);
2483     tcg_debug_assert(len <= width - ofs);
2484 
2485     return TCG_TARGET_deposit_valid(type, ofs, len);
2486 }
2487 
2488 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2489 
2490 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2491                           TCGTemp *ret, TCGTemp **args)
2492 {
2493     TCGv_i64 extend_free[MAX_CALL_IARGS];
2494     int n_extend = 0;
2495     TCGOp *op;
2496     int i, n, pi = 0, total_args;
2497 
2498     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2499         init_call_layout(info);
2500         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2501     }
2502 
2503     total_args = info->nr_out + info->nr_in + 2;
2504     op = tcg_op_alloc(INDEX_op_call, total_args);
2505 
2506 #ifdef CONFIG_PLUGIN
2507     /* Flag helpers that may affect guest state */
2508     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2509         tcg_ctx->plugin_insn->calls_helpers = true;
2510     }
2511 #endif
2512 
2513     TCGOP_CALLO(op) = n = info->nr_out;
2514     switch (n) {
2515     case 0:
2516         tcg_debug_assert(ret == NULL);
2517         break;
2518     case 1:
2519         tcg_debug_assert(ret != NULL);
2520         op->args[pi++] = temp_arg(ret);
2521         break;
2522     case 2:
2523     case 4:
2524         tcg_debug_assert(ret != NULL);
2525         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2526         tcg_debug_assert(ret->temp_subindex == 0);
2527         for (i = 0; i < n; ++i) {
2528             op->args[pi++] = temp_arg(ret + i);
2529         }
2530         break;
2531     default:
2532         g_assert_not_reached();
2533     }
2534 
2535     TCGOP_CALLI(op) = n = info->nr_in;
2536     for (i = 0; i < n; i++) {
2537         const TCGCallArgumentLoc *loc = &info->in[i];
2538         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2539 
2540         switch (loc->kind) {
2541         case TCG_CALL_ARG_NORMAL:
2542         case TCG_CALL_ARG_BY_REF:
2543         case TCG_CALL_ARG_BY_REF_N:
2544             op->args[pi++] = temp_arg(ts);
2545             break;
2546 
2547         case TCG_CALL_ARG_EXTEND_U:
2548         case TCG_CALL_ARG_EXTEND_S:
2549             {
2550                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2551                 TCGv_i32 orig = temp_tcgv_i32(ts);
2552 
2553                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2554                     tcg_gen_ext_i32_i64(temp, orig);
2555                 } else {
2556                     tcg_gen_extu_i32_i64(temp, orig);
2557                 }
2558                 op->args[pi++] = tcgv_i64_arg(temp);
2559                 extend_free[n_extend++] = temp;
2560             }
2561             break;
2562 
2563         default:
2564             g_assert_not_reached();
2565         }
2566     }
2567     op->args[pi++] = (uintptr_t)func;
2568     op->args[pi++] = (uintptr_t)info;
2569     tcg_debug_assert(pi == total_args);
2570 
2571     if (tcg_ctx->emit_before_op) {
2572         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2573     } else {
2574         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2575     }
2576 
2577     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2578     for (i = 0; i < n_extend; ++i) {
2579         tcg_temp_free_i64(extend_free[i]);
2580     }
2581 }
2582 
2583 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2584 {
2585     tcg_gen_callN(func, info, ret, NULL);
2586 }
2587 
2588 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2589 {
2590     tcg_gen_callN(func, info, ret, &t1);
2591 }
2592 
2593 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2594                    TCGTemp *t1, TCGTemp *t2)
2595 {
2596     TCGTemp *args[2] = { t1, t2 };
2597     tcg_gen_callN(func, info, ret, args);
2598 }
2599 
2600 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2601                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2602 {
2603     TCGTemp *args[3] = { t1, t2, t3 };
2604     tcg_gen_callN(func, info, ret, args);
2605 }
2606 
2607 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2608                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2609 {
2610     TCGTemp *args[4] = { t1, t2, t3, t4 };
2611     tcg_gen_callN(func, info, ret, args);
2612 }
2613 
2614 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2615                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2616 {
2617     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2618     tcg_gen_callN(func, info, ret, args);
2619 }
2620 
2621 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2622                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2623                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2624 {
2625     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2626     tcg_gen_callN(func, info, ret, args);
2627 }
2628 
2629 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2630                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2631                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2632 {
2633     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2634     tcg_gen_callN(func, info, ret, args);
2635 }
2636 
2637 static void tcg_reg_alloc_start(TCGContext *s)
2638 {
2639     int i, n;
2640 
2641     for (i = 0, n = s->nb_temps; i < n; i++) {
2642         TCGTemp *ts = &s->temps[i];
2643         TCGTempVal val = TEMP_VAL_MEM;
2644 
2645         switch (ts->kind) {
2646         case TEMP_CONST:
2647             val = TEMP_VAL_CONST;
2648             break;
2649         case TEMP_FIXED:
2650             val = TEMP_VAL_REG;
2651             break;
2652         case TEMP_GLOBAL:
2653             break;
2654         case TEMP_EBB:
2655             val = TEMP_VAL_DEAD;
2656             /* fall through */
2657         case TEMP_TB:
2658             ts->mem_allocated = 0;
2659             break;
2660         default:
2661             g_assert_not_reached();
2662         }
2663         ts->val_type = val;
2664     }
2665 
2666     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2667 }
2668 
2669 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2670                                  TCGTemp *ts)
2671 {
2672     int idx = temp_idx(ts);
2673 
2674     switch (ts->kind) {
2675     case TEMP_FIXED:
2676     case TEMP_GLOBAL:
2677         pstrcpy(buf, buf_size, ts->name);
2678         break;
2679     case TEMP_TB:
2680         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2681         break;
2682     case TEMP_EBB:
2683         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2684         break;
2685     case TEMP_CONST:
2686         switch (ts->type) {
2687         case TCG_TYPE_I32:
2688             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2689             break;
2690 #if TCG_TARGET_REG_BITS > 32
2691         case TCG_TYPE_I64:
2692             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2693             break;
2694 #endif
2695         case TCG_TYPE_V64:
2696         case TCG_TYPE_V128:
2697         case TCG_TYPE_V256:
2698             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2699                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2700             break;
2701         default:
2702             g_assert_not_reached();
2703         }
2704         break;
2705     }
2706     return buf;
2707 }
2708 
2709 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2710                              int buf_size, TCGArg arg)
2711 {
2712     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2713 }
2714 
2715 static const char * const cond_name[] =
2716 {
2717     [TCG_COND_NEVER] = "never",
2718     [TCG_COND_ALWAYS] = "always",
2719     [TCG_COND_EQ] = "eq",
2720     [TCG_COND_NE] = "ne",
2721     [TCG_COND_LT] = "lt",
2722     [TCG_COND_GE] = "ge",
2723     [TCG_COND_LE] = "le",
2724     [TCG_COND_GT] = "gt",
2725     [TCG_COND_LTU] = "ltu",
2726     [TCG_COND_GEU] = "geu",
2727     [TCG_COND_LEU] = "leu",
2728     [TCG_COND_GTU] = "gtu",
2729     [TCG_COND_TSTEQ] = "tsteq",
2730     [TCG_COND_TSTNE] = "tstne",
2731 };
2732 
2733 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2734 {
2735     [MO_UB]   = "ub",
2736     [MO_SB]   = "sb",
2737     [MO_LEUW] = "leuw",
2738     [MO_LESW] = "lesw",
2739     [MO_LEUL] = "leul",
2740     [MO_LESL] = "lesl",
2741     [MO_LEUQ] = "leq",
2742     [MO_BEUW] = "beuw",
2743     [MO_BESW] = "besw",
2744     [MO_BEUL] = "beul",
2745     [MO_BESL] = "besl",
2746     [MO_BEUQ] = "beq",
2747     [MO_128 + MO_BE] = "beo",
2748     [MO_128 + MO_LE] = "leo",
2749 };
2750 
2751 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2752     [MO_UNALN >> MO_ASHIFT]    = "un+",
2753     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2754     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2755     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2756     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2757     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2758     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2759     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2760 };
2761 
2762 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2763     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2764     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2765     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2766     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2767     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2768     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2769 };
2770 
2771 static const char bswap_flag_name[][6] = {
2772     [TCG_BSWAP_IZ] = "iz",
2773     [TCG_BSWAP_OZ] = "oz",
2774     [TCG_BSWAP_OS] = "os",
2775     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2776     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2777 };
2778 
2779 #ifdef CONFIG_PLUGIN
2780 static const char * const plugin_from_name[] = {
2781     "from-tb",
2782     "from-insn",
2783     "after-insn",
2784     "after-tb",
2785 };
2786 #endif
2787 
2788 static inline bool tcg_regset_single(TCGRegSet d)
2789 {
2790     return (d & (d - 1)) == 0;
2791 }
2792 
2793 static inline TCGReg tcg_regset_first(TCGRegSet d)
2794 {
2795     if (TCG_TARGET_NB_REGS <= 32) {
2796         return ctz32(d);
2797     } else {
2798         return ctz64(d);
2799     }
2800 }
2801 
2802 /* Return only the number of characters output -- no error return. */
2803 #define ne_fprintf(...) \
2804     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2805 
2806 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2807 {
2808     char buf[128];
2809     TCGOp *op;
2810 
2811     QTAILQ_FOREACH(op, &s->ops, link) {
2812         int i, k, nb_oargs, nb_iargs, nb_cargs;
2813         const TCGOpDef *def;
2814         TCGOpcode c;
2815         int col = 0;
2816 
2817         c = op->opc;
2818         def = &tcg_op_defs[c];
2819 
2820         if (c == INDEX_op_insn_start) {
2821             nb_oargs = 0;
2822             col += ne_fprintf(f, "\n ----");
2823 
2824             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2825                 col += ne_fprintf(f, " %016" PRIx64,
2826                                   tcg_get_insn_start_param(op, i));
2827             }
2828         } else if (c == INDEX_op_call) {
2829             const TCGHelperInfo *info = tcg_call_info(op);
2830             void *func = tcg_call_func(op);
2831 
2832             /* variable number of arguments */
2833             nb_oargs = TCGOP_CALLO(op);
2834             nb_iargs = TCGOP_CALLI(op);
2835             nb_cargs = def->nb_cargs;
2836 
2837             col += ne_fprintf(f, " %s ", def->name);
2838 
2839             /*
2840              * Print the function name from TCGHelperInfo, if available.
2841              * Note that plugins have a template function for the info,
2842              * but the actual function pointer comes from the plugin.
2843              */
2844             if (func == info->func) {
2845                 col += ne_fprintf(f, "%s", info->name);
2846             } else {
2847                 col += ne_fprintf(f, "plugin(%p)", func);
2848             }
2849 
2850             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2851             for (i = 0; i < nb_oargs; i++) {
2852                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2853                                                             op->args[i]));
2854             }
2855             for (i = 0; i < nb_iargs; i++) {
2856                 TCGArg arg = op->args[nb_oargs + i];
2857                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2858                 col += ne_fprintf(f, ",%s", t);
2859             }
2860         } else {
2861             if (def->flags & TCG_OPF_INT) {
2862                 col += ne_fprintf(f, " %s_i%d ",
2863                                   def->name,
2864                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2865             } else if (def->flags & TCG_OPF_VECTOR) {
2866                 col += ne_fprintf(f, "%s v%d,e%d,",
2867                                   def->name,
2868                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2869                                   8 << TCGOP_VECE(op));
2870             } else {
2871                 col += ne_fprintf(f, " %s ", def->name);
2872             }
2873 
2874             nb_oargs = def->nb_oargs;
2875             nb_iargs = def->nb_iargs;
2876             nb_cargs = def->nb_cargs;
2877 
2878             k = 0;
2879             for (i = 0; i < nb_oargs; i++) {
2880                 const char *sep =  k ? "," : "";
2881                 col += ne_fprintf(f, "%s%s", sep,
2882                                   tcg_get_arg_str(s, buf, sizeof(buf),
2883                                                   op->args[k++]));
2884             }
2885             for (i = 0; i < nb_iargs; i++) {
2886                 const char *sep =  k ? "," : "";
2887                 col += ne_fprintf(f, "%s%s", sep,
2888                                   tcg_get_arg_str(s, buf, sizeof(buf),
2889                                                   op->args[k++]));
2890             }
2891             switch (c) {
2892             case INDEX_op_brcond_i32:
2893             case INDEX_op_setcond_i32:
2894             case INDEX_op_negsetcond_i32:
2895             case INDEX_op_movcond_i32:
2896             case INDEX_op_brcond2_i32:
2897             case INDEX_op_setcond2_i32:
2898             case INDEX_op_brcond_i64:
2899             case INDEX_op_setcond_i64:
2900             case INDEX_op_negsetcond_i64:
2901             case INDEX_op_movcond_i64:
2902             case INDEX_op_cmp_vec:
2903             case INDEX_op_cmpsel_vec:
2904                 if (op->args[k] < ARRAY_SIZE(cond_name)
2905                     && cond_name[op->args[k]]) {
2906                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2907                 } else {
2908                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2909                 }
2910                 i = 1;
2911                 break;
2912             case INDEX_op_qemu_ld_i32:
2913             case INDEX_op_qemu_st_i32:
2914             case INDEX_op_qemu_st8_i32:
2915             case INDEX_op_qemu_ld_i64:
2916             case INDEX_op_qemu_st_i64:
2917             case INDEX_op_qemu_ld_i128:
2918             case INDEX_op_qemu_st_i128:
2919                 {
2920                     const char *s_al, *s_op, *s_at;
2921                     MemOpIdx oi = op->args[k++];
2922                     MemOp mop = get_memop(oi);
2923                     unsigned ix = get_mmuidx(oi);
2924 
2925                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2926                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2927                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2928                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2929 
2930                     /* If all fields are accounted for, print symbolically. */
2931                     if (!mop && s_al && s_op && s_at) {
2932                         col += ne_fprintf(f, ",%s%s%s,%u",
2933                                           s_at, s_al, s_op, ix);
2934                     } else {
2935                         mop = get_memop(oi);
2936                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2937                     }
2938                     i = 1;
2939                 }
2940                 break;
2941             case INDEX_op_bswap16_i32:
2942             case INDEX_op_bswap16_i64:
2943             case INDEX_op_bswap32_i32:
2944             case INDEX_op_bswap32_i64:
2945             case INDEX_op_bswap64_i64:
2946                 {
2947                     TCGArg flags = op->args[k];
2948                     const char *name = NULL;
2949 
2950                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2951                         name = bswap_flag_name[flags];
2952                     }
2953                     if (name) {
2954                         col += ne_fprintf(f, ",%s", name);
2955                     } else {
2956                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2957                     }
2958                     i = k = 1;
2959                 }
2960                 break;
2961 #ifdef CONFIG_PLUGIN
2962             case INDEX_op_plugin_cb:
2963                 {
2964                     TCGArg from = op->args[k++];
2965                     const char *name = NULL;
2966 
2967                     if (from < ARRAY_SIZE(plugin_from_name)) {
2968                         name = plugin_from_name[from];
2969                     }
2970                     if (name) {
2971                         col += ne_fprintf(f, "%s", name);
2972                     } else {
2973                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2974                     }
2975                     i = 1;
2976                 }
2977                 break;
2978 #endif
2979             default:
2980                 i = 0;
2981                 break;
2982             }
2983             switch (c) {
2984             case INDEX_op_set_label:
2985             case INDEX_op_br:
2986             case INDEX_op_brcond_i32:
2987             case INDEX_op_brcond_i64:
2988             case INDEX_op_brcond2_i32:
2989                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2990                                   arg_label(op->args[k])->id);
2991                 i++, k++;
2992                 break;
2993             case INDEX_op_mb:
2994                 {
2995                     TCGBar membar = op->args[k];
2996                     const char *b_op, *m_op;
2997 
2998                     switch (membar & TCG_BAR_SC) {
2999                     case 0:
3000                         b_op = "none";
3001                         break;
3002                     case TCG_BAR_LDAQ:
3003                         b_op = "acq";
3004                         break;
3005                     case TCG_BAR_STRL:
3006                         b_op = "rel";
3007                         break;
3008                     case TCG_BAR_SC:
3009                         b_op = "seq";
3010                         break;
3011                     default:
3012                         g_assert_not_reached();
3013                     }
3014 
3015                     switch (membar & TCG_MO_ALL) {
3016                     case 0:
3017                         m_op = "none";
3018                         break;
3019                     case TCG_MO_LD_LD:
3020                         m_op = "rr";
3021                         break;
3022                     case TCG_MO_LD_ST:
3023                         m_op = "rw";
3024                         break;
3025                     case TCG_MO_ST_LD:
3026                         m_op = "wr";
3027                         break;
3028                     case TCG_MO_ST_ST:
3029                         m_op = "ww";
3030                         break;
3031                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3032                         m_op = "rr+rw";
3033                         break;
3034                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3035                         m_op = "rr+wr";
3036                         break;
3037                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3038                         m_op = "rr+ww";
3039                         break;
3040                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3041                         m_op = "rw+wr";
3042                         break;
3043                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3044                         m_op = "rw+ww";
3045                         break;
3046                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3047                         m_op = "wr+ww";
3048                         break;
3049                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3050                         m_op = "rr+rw+wr";
3051                         break;
3052                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3053                         m_op = "rr+rw+ww";
3054                         break;
3055                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3056                         m_op = "rr+wr+ww";
3057                         break;
3058                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3059                         m_op = "rw+wr+ww";
3060                         break;
3061                     case TCG_MO_ALL:
3062                         m_op = "all";
3063                         break;
3064                     default:
3065                         g_assert_not_reached();
3066                     }
3067 
3068                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3069                     i++, k++;
3070                 }
3071                 break;
3072             default:
3073                 break;
3074             }
3075             for (; i < nb_cargs; i++, k++) {
3076                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3077                                   op->args[k]);
3078             }
3079         }
3080 
3081         if (have_prefs || op->life) {
3082             for (; col < 40; ++col) {
3083                 putc(' ', f);
3084             }
3085         }
3086 
3087         if (op->life) {
3088             unsigned life = op->life;
3089 
3090             if (life & (SYNC_ARG * 3)) {
3091                 ne_fprintf(f, "  sync:");
3092                 for (i = 0; i < 2; ++i) {
3093                     if (life & (SYNC_ARG << i)) {
3094                         ne_fprintf(f, " %d", i);
3095                     }
3096                 }
3097             }
3098             life /= DEAD_ARG;
3099             if (life) {
3100                 ne_fprintf(f, "  dead:");
3101                 for (i = 0; life; ++i, life >>= 1) {
3102                     if (life & 1) {
3103                         ne_fprintf(f, " %d", i);
3104                     }
3105                 }
3106             }
3107         }
3108 
3109         if (have_prefs) {
3110             for (i = 0; i < nb_oargs; ++i) {
3111                 TCGRegSet set = output_pref(op, i);
3112 
3113                 if (i == 0) {
3114                     ne_fprintf(f, "  pref=");
3115                 } else {
3116                     ne_fprintf(f, ",");
3117                 }
3118                 if (set == 0) {
3119                     ne_fprintf(f, "none");
3120                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3121                     ne_fprintf(f, "all");
3122 #ifdef CONFIG_DEBUG_TCG
3123                 } else if (tcg_regset_single(set)) {
3124                     TCGReg reg = tcg_regset_first(set);
3125                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3126 #endif
3127                 } else if (TCG_TARGET_NB_REGS <= 32) {
3128                     ne_fprintf(f, "0x%x", (uint32_t)set);
3129                 } else {
3130                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3131                 }
3132             }
3133         }
3134 
3135         putc('\n', f);
3136     }
3137 }
3138 
3139 /* we give more priority to constraints with less registers */
3140 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3141 {
3142     int n;
3143 
3144     arg_ct += k;
3145     n = ctpop64(arg_ct->regs);
3146 
3147     /*
3148      * Sort constraints of a single register first, which includes output
3149      * aliases (which must exactly match the input already allocated).
3150      */
3151     if (n == 1 || arg_ct->oalias) {
3152         return INT_MAX;
3153     }
3154 
3155     /*
3156      * Sort register pairs next, first then second immediately after.
3157      * Arbitrarily sort multiple pairs by the index of the first reg;
3158      * there shouldn't be many pairs.
3159      */
3160     switch (arg_ct->pair) {
3161     case 1:
3162     case 3:
3163         return (k + 1) * 2;
3164     case 2:
3165         return (arg_ct->pair_index + 1) * 2 - 1;
3166     }
3167 
3168     /* Finally, sort by decreasing register count. */
3169     assert(n > 1);
3170     return -n;
3171 }
3172 
3173 /* sort from highest priority to lowest */
3174 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3175 {
3176     int i, j;
3177 
3178     for (i = 0; i < n; i++) {
3179         a[start + i].sort_index = start + i;
3180     }
3181     if (n <= 1) {
3182         return;
3183     }
3184     for (i = 0; i < n - 1; i++) {
3185         for (j = i + 1; j < n; j++) {
3186             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3187             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3188             if (p1 < p2) {
3189                 int tmp = a[start + i].sort_index;
3190                 a[start + i].sort_index = a[start + j].sort_index;
3191                 a[start + j].sort_index = tmp;
3192             }
3193         }
3194     }
3195 }
3196 
3197 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3198 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3199 
3200 static void process_constraint_sets(void)
3201 {
3202     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3203         const TCGConstraintSet *tdefs = &constraint_sets[c];
3204         TCGArgConstraint *args_ct = all_cts[c];
3205         int nb_oargs = tdefs->nb_oargs;
3206         int nb_iargs = tdefs->nb_iargs;
3207         int nb_args = nb_oargs + nb_iargs;
3208         bool saw_alias_pair = false;
3209 
3210         for (int i = 0; i < nb_args; i++) {
3211             const char *ct_str = tdefs->args_ct_str[i];
3212             bool input_p = i >= nb_oargs;
3213             int o;
3214 
3215             switch (*ct_str) {
3216             case '0' ... '9':
3217                 o = *ct_str - '0';
3218                 tcg_debug_assert(input_p);
3219                 tcg_debug_assert(o < nb_oargs);
3220                 tcg_debug_assert(args_ct[o].regs != 0);
3221                 tcg_debug_assert(!args_ct[o].oalias);
3222                 args_ct[i] = args_ct[o];
3223                 /* The output sets oalias.  */
3224                 args_ct[o].oalias = 1;
3225                 args_ct[o].alias_index = i;
3226                 /* The input sets ialias. */
3227                 args_ct[i].ialias = 1;
3228                 args_ct[i].alias_index = o;
3229                 if (args_ct[i].pair) {
3230                     saw_alias_pair = true;
3231                 }
3232                 tcg_debug_assert(ct_str[1] == '\0');
3233                 continue;
3234 
3235             case '&':
3236                 tcg_debug_assert(!input_p);
3237                 args_ct[i].newreg = true;
3238                 ct_str++;
3239                 break;
3240 
3241             case 'p': /* plus */
3242                 /* Allocate to the register after the previous. */
3243                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3244                 o = i - 1;
3245                 tcg_debug_assert(!args_ct[o].pair);
3246                 tcg_debug_assert(!args_ct[o].ct);
3247                 args_ct[i] = (TCGArgConstraint){
3248                     .pair = 2,
3249                     .pair_index = o,
3250                     .regs = args_ct[o].regs << 1,
3251                     .newreg = args_ct[o].newreg,
3252                 };
3253                 args_ct[o].pair = 1;
3254                 args_ct[o].pair_index = i;
3255                 tcg_debug_assert(ct_str[1] == '\0');
3256                 continue;
3257 
3258             case 'm': /* minus */
3259                 /* Allocate to the register before the previous. */
3260                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3261                 o = i - 1;
3262                 tcg_debug_assert(!args_ct[o].pair);
3263                 tcg_debug_assert(!args_ct[o].ct);
3264                 args_ct[i] = (TCGArgConstraint){
3265                     .pair = 1,
3266                     .pair_index = o,
3267                     .regs = args_ct[o].regs >> 1,
3268                     .newreg = args_ct[o].newreg,
3269                 };
3270                 args_ct[o].pair = 2;
3271                 args_ct[o].pair_index = i;
3272                 tcg_debug_assert(ct_str[1] == '\0');
3273                 continue;
3274             }
3275 
3276             do {
3277                 switch (*ct_str) {
3278                 case 'i':
3279                     args_ct[i].ct |= TCG_CT_CONST;
3280                     break;
3281 #ifdef TCG_REG_ZERO
3282                 case 'z':
3283                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3284                     break;
3285 #endif
3286 
3287                 /* Include all of the target-specific constraints. */
3288 
3289 #undef CONST
3290 #define CONST(CASE, MASK) \
3291     case CASE: args_ct[i].ct |= MASK; break;
3292 #define REGS(CASE, MASK) \
3293     case CASE: args_ct[i].regs |= MASK; break;
3294 
3295 #include "tcg-target-con-str.h"
3296 
3297 #undef REGS
3298 #undef CONST
3299                 default:
3300                 case '0' ... '9':
3301                 case '&':
3302                 case 'p':
3303                 case 'm':
3304                     /* Typo in TCGConstraintSet constraint. */
3305                     g_assert_not_reached();
3306                 }
3307             } while (*++ct_str != '\0');
3308         }
3309 
3310         /*
3311          * Fix up output pairs that are aliased with inputs.
3312          * When we created the alias, we copied pair from the output.
3313          * There are three cases:
3314          *    (1a) Pairs of inputs alias pairs of outputs.
3315          *    (1b) One input aliases the first of a pair of outputs.
3316          *    (2)  One input aliases the second of a pair of outputs.
3317          *
3318          * Case 1a is handled by making sure that the pair_index'es are
3319          * properly updated so that they appear the same as a pair of inputs.
3320          *
3321          * Case 1b is handled by setting the pair_index of the input to
3322          * itself, simply so it doesn't point to an unrelated argument.
3323          * Since we don't encounter the "second" during the input allocation
3324          * phase, nothing happens with the second half of the input pair.
3325          *
3326          * Case 2 is handled by setting the second input to pair=3, the
3327          * first output to pair=3, and the pair_index'es to match.
3328          */
3329         if (saw_alias_pair) {
3330             for (int i = nb_oargs; i < nb_args; i++) {
3331                 int o, o2, i2;
3332 
3333                 /*
3334                  * Since [0-9pm] must be alone in the constraint string,
3335                  * the only way they can both be set is if the pair comes
3336                  * from the output alias.
3337                  */
3338                 if (!args_ct[i].ialias) {
3339                     continue;
3340                 }
3341                 switch (args_ct[i].pair) {
3342                 case 0:
3343                     break;
3344                 case 1:
3345                     o = args_ct[i].alias_index;
3346                     o2 = args_ct[o].pair_index;
3347                     tcg_debug_assert(args_ct[o].pair == 1);
3348                     tcg_debug_assert(args_ct[o2].pair == 2);
3349                     if (args_ct[o2].oalias) {
3350                         /* Case 1a */
3351                         i2 = args_ct[o2].alias_index;
3352                         tcg_debug_assert(args_ct[i2].pair == 2);
3353                         args_ct[i2].pair_index = i;
3354                         args_ct[i].pair_index = i2;
3355                     } else {
3356                         /* Case 1b */
3357                         args_ct[i].pair_index = i;
3358                     }
3359                     break;
3360                 case 2:
3361                     o = args_ct[i].alias_index;
3362                     o2 = args_ct[o].pair_index;
3363                     tcg_debug_assert(args_ct[o].pair == 2);
3364                     tcg_debug_assert(args_ct[o2].pair == 1);
3365                     if (args_ct[o2].oalias) {
3366                         /* Case 1a */
3367                         i2 = args_ct[o2].alias_index;
3368                         tcg_debug_assert(args_ct[i2].pair == 1);
3369                         args_ct[i2].pair_index = i;
3370                         args_ct[i].pair_index = i2;
3371                     } else {
3372                         /* Case 2 */
3373                         args_ct[i].pair = 3;
3374                         args_ct[o2].pair = 3;
3375                         args_ct[i].pair_index = o2;
3376                         args_ct[o2].pair_index = i;
3377                     }
3378                     break;
3379                 default:
3380                     g_assert_not_reached();
3381                 }
3382             }
3383         }
3384 
3385         /* sort the constraints (XXX: this is just an heuristic) */
3386         sort_constraints(args_ct, 0, nb_oargs);
3387         sort_constraints(args_ct, nb_oargs, nb_iargs);
3388     }
3389 }
3390 
3391 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3392 {
3393     TCGOpcode opc = op->opc;
3394     TCGType type = TCGOP_TYPE(op);
3395     unsigned flags = TCGOP_FLAGS(op);
3396     const TCGOpDef *def = &tcg_op_defs[opc];
3397     const TCGOutOp *outop = all_outop[opc];
3398     TCGConstraintSetIndex con_set;
3399 
3400     if (def->flags & TCG_OPF_NOT_PRESENT) {
3401         return empty_cts;
3402     }
3403 
3404     if (outop) {
3405         con_set = outop->static_constraint;
3406         if (con_set == C_Dynamic) {
3407             con_set = outop->dynamic_constraint(type, flags);
3408         }
3409     } else {
3410         con_set = tcg_target_op_def(opc, type, flags);
3411     }
3412     tcg_debug_assert(con_set >= 0);
3413     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3414 
3415     /* The constraint arguments must match TCGOpcode arguments. */
3416     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3417     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3418 
3419     return all_cts[con_set];
3420 }
3421 
3422 static void remove_label_use(TCGOp *op, int idx)
3423 {
3424     TCGLabel *label = arg_label(op->args[idx]);
3425     TCGLabelUse *use;
3426 
3427     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3428         if (use->op == op) {
3429             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3430             return;
3431         }
3432     }
3433     g_assert_not_reached();
3434 }
3435 
3436 void tcg_op_remove(TCGContext *s, TCGOp *op)
3437 {
3438     switch (op->opc) {
3439     case INDEX_op_br:
3440         remove_label_use(op, 0);
3441         break;
3442     case INDEX_op_brcond_i32:
3443     case INDEX_op_brcond_i64:
3444         remove_label_use(op, 3);
3445         break;
3446     case INDEX_op_brcond2_i32:
3447         remove_label_use(op, 5);
3448         break;
3449     default:
3450         break;
3451     }
3452 
3453     QTAILQ_REMOVE(&s->ops, op, link);
3454     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3455     s->nb_ops--;
3456 }
3457 
3458 void tcg_remove_ops_after(TCGOp *op)
3459 {
3460     TCGContext *s = tcg_ctx;
3461 
3462     while (true) {
3463         TCGOp *last = tcg_last_op();
3464         if (last == op) {
3465             return;
3466         }
3467         tcg_op_remove(s, last);
3468     }
3469 }
3470 
3471 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3472 {
3473     TCGContext *s = tcg_ctx;
3474     TCGOp *op = NULL;
3475 
3476     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3477         QTAILQ_FOREACH(op, &s->free_ops, link) {
3478             if (nargs <= op->nargs) {
3479                 QTAILQ_REMOVE(&s->free_ops, op, link);
3480                 nargs = op->nargs;
3481                 goto found;
3482             }
3483         }
3484     }
3485 
3486     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3487     nargs = MAX(4, nargs);
3488     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3489 
3490  found:
3491     memset(op, 0, offsetof(TCGOp, link));
3492     op->opc = opc;
3493     op->nargs = nargs;
3494 
3495     /* Check for bitfield overflow. */
3496     tcg_debug_assert(op->nargs == nargs);
3497 
3498     s->nb_ops++;
3499     return op;
3500 }
3501 
3502 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3503 {
3504     TCGOp *op = tcg_op_alloc(opc, nargs);
3505 
3506     if (tcg_ctx->emit_before_op) {
3507         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3508     } else {
3509         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3510     }
3511     return op;
3512 }
3513 
3514 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3515                             TCGOpcode opc, TCGType type, unsigned nargs)
3516 {
3517     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3518 
3519     TCGOP_TYPE(new_op) = type;
3520     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3521     return new_op;
3522 }
3523 
3524 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3525                            TCGOpcode opc, TCGType type, unsigned nargs)
3526 {
3527     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3528 
3529     TCGOP_TYPE(new_op) = type;
3530     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3531     return new_op;
3532 }
3533 
3534 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3535 {
3536     TCGLabelUse *u;
3537 
3538     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3539         TCGOp *op = u->op;
3540         switch (op->opc) {
3541         case INDEX_op_br:
3542             op->args[0] = label_arg(to);
3543             break;
3544         case INDEX_op_brcond_i32:
3545         case INDEX_op_brcond_i64:
3546             op->args[3] = label_arg(to);
3547             break;
3548         case INDEX_op_brcond2_i32:
3549             op->args[5] = label_arg(to);
3550             break;
3551         default:
3552             g_assert_not_reached();
3553         }
3554     }
3555 
3556     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3557 }
3558 
3559 /* Reachable analysis : remove unreachable code.  */
3560 static void __attribute__((noinline))
3561 reachable_code_pass(TCGContext *s)
3562 {
3563     TCGOp *op, *op_next, *op_prev;
3564     bool dead = false;
3565 
3566     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3567         bool remove = dead;
3568         TCGLabel *label;
3569 
3570         switch (op->opc) {
3571         case INDEX_op_set_label:
3572             label = arg_label(op->args[0]);
3573 
3574             /*
3575              * Note that the first op in the TB is always a load,
3576              * so there is always something before a label.
3577              */
3578             op_prev = QTAILQ_PREV(op, link);
3579 
3580             /*
3581              * If we find two sequential labels, move all branches to
3582              * reference the second label and remove the first label.
3583              * Do this before branch to next optimization, so that the
3584              * middle label is out of the way.
3585              */
3586             if (op_prev->opc == INDEX_op_set_label) {
3587                 move_label_uses(label, arg_label(op_prev->args[0]));
3588                 tcg_op_remove(s, op_prev);
3589                 op_prev = QTAILQ_PREV(op, link);
3590             }
3591 
3592             /*
3593              * Optimization can fold conditional branches to unconditional.
3594              * If we find a label which is preceded by an unconditional
3595              * branch to next, remove the branch.  We couldn't do this when
3596              * processing the branch because any dead code between the branch
3597              * and label had not yet been removed.
3598              */
3599             if (op_prev->opc == INDEX_op_br &&
3600                 label == arg_label(op_prev->args[0])) {
3601                 tcg_op_remove(s, op_prev);
3602                 /* Fall through means insns become live again.  */
3603                 dead = false;
3604             }
3605 
3606             if (QSIMPLEQ_EMPTY(&label->branches)) {
3607                 /*
3608                  * While there is an occasional backward branch, virtually
3609                  * all branches generated by the translators are forward.
3610                  * Which means that generally we will have already removed
3611                  * all references to the label that will be, and there is
3612                  * little to be gained by iterating.
3613                  */
3614                 remove = true;
3615             } else {
3616                 /* Once we see a label, insns become live again.  */
3617                 dead = false;
3618                 remove = false;
3619             }
3620             break;
3621 
3622         case INDEX_op_br:
3623         case INDEX_op_exit_tb:
3624         case INDEX_op_goto_ptr:
3625             /* Unconditional branches; everything following is dead.  */
3626             dead = true;
3627             break;
3628 
3629         case INDEX_op_call:
3630             /* Notice noreturn helper calls, raising exceptions.  */
3631             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3632                 dead = true;
3633             }
3634             break;
3635 
3636         case INDEX_op_insn_start:
3637             /* Never remove -- we need to keep these for unwind.  */
3638             remove = false;
3639             break;
3640 
3641         default:
3642             break;
3643         }
3644 
3645         if (remove) {
3646             tcg_op_remove(s, op);
3647         }
3648     }
3649 }
3650 
3651 #define TS_DEAD  1
3652 #define TS_MEM   2
3653 
3654 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3655 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3656 
3657 /* For liveness_pass_1, the register preferences for a given temp.  */
3658 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3659 {
3660     return ts->state_ptr;
3661 }
3662 
3663 /* For liveness_pass_1, reset the preferences for a given temp to the
3664  * maximal regset for its type.
3665  */
3666 static inline void la_reset_pref(TCGTemp *ts)
3667 {
3668     *la_temp_pref(ts)
3669         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3670 }
3671 
3672 /* liveness analysis: end of function: all temps are dead, and globals
3673    should be in memory. */
3674 static void la_func_end(TCGContext *s, int ng, int nt)
3675 {
3676     int i;
3677 
3678     for (i = 0; i < ng; ++i) {
3679         s->temps[i].state = TS_DEAD | TS_MEM;
3680         la_reset_pref(&s->temps[i]);
3681     }
3682     for (i = ng; i < nt; ++i) {
3683         s->temps[i].state = TS_DEAD;
3684         la_reset_pref(&s->temps[i]);
3685     }
3686 }
3687 
3688 /* liveness analysis: end of basic block: all temps are dead, globals
3689    and local temps should be in memory. */
3690 static void la_bb_end(TCGContext *s, int ng, int nt)
3691 {
3692     int i;
3693 
3694     for (i = 0; i < nt; ++i) {
3695         TCGTemp *ts = &s->temps[i];
3696         int state;
3697 
3698         switch (ts->kind) {
3699         case TEMP_FIXED:
3700         case TEMP_GLOBAL:
3701         case TEMP_TB:
3702             state = TS_DEAD | TS_MEM;
3703             break;
3704         case TEMP_EBB:
3705         case TEMP_CONST:
3706             state = TS_DEAD;
3707             break;
3708         default:
3709             g_assert_not_reached();
3710         }
3711         ts->state = state;
3712         la_reset_pref(ts);
3713     }
3714 }
3715 
3716 /* liveness analysis: sync globals back to memory.  */
3717 static void la_global_sync(TCGContext *s, int ng)
3718 {
3719     int i;
3720 
3721     for (i = 0; i < ng; ++i) {
3722         int state = s->temps[i].state;
3723         s->temps[i].state = state | TS_MEM;
3724         if (state == TS_DEAD) {
3725             /* If the global was previously dead, reset prefs.  */
3726             la_reset_pref(&s->temps[i]);
3727         }
3728     }
3729 }
3730 
3731 /*
3732  * liveness analysis: conditional branch: all temps are dead unless
3733  * explicitly live-across-conditional-branch, globals and local temps
3734  * should be synced.
3735  */
3736 static void la_bb_sync(TCGContext *s, int ng, int nt)
3737 {
3738     la_global_sync(s, ng);
3739 
3740     for (int i = ng; i < nt; ++i) {
3741         TCGTemp *ts = &s->temps[i];
3742         int state;
3743 
3744         switch (ts->kind) {
3745         case TEMP_TB:
3746             state = ts->state;
3747             ts->state = state | TS_MEM;
3748             if (state != TS_DEAD) {
3749                 continue;
3750             }
3751             break;
3752         case TEMP_EBB:
3753         case TEMP_CONST:
3754             continue;
3755         default:
3756             g_assert_not_reached();
3757         }
3758         la_reset_pref(&s->temps[i]);
3759     }
3760 }
3761 
3762 /* liveness analysis: sync globals back to memory and kill.  */
3763 static void la_global_kill(TCGContext *s, int ng)
3764 {
3765     int i;
3766 
3767     for (i = 0; i < ng; i++) {
3768         s->temps[i].state = TS_DEAD | TS_MEM;
3769         la_reset_pref(&s->temps[i]);
3770     }
3771 }
3772 
3773 /* liveness analysis: note live globals crossing calls.  */
3774 static void la_cross_call(TCGContext *s, int nt)
3775 {
3776     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3777     int i;
3778 
3779     for (i = 0; i < nt; i++) {
3780         TCGTemp *ts = &s->temps[i];
3781         if (!(ts->state & TS_DEAD)) {
3782             TCGRegSet *pset = la_temp_pref(ts);
3783             TCGRegSet set = *pset;
3784 
3785             set &= mask;
3786             /* If the combination is not possible, restart.  */
3787             if (set == 0) {
3788                 set = tcg_target_available_regs[ts->type] & mask;
3789             }
3790             *pset = set;
3791         }
3792     }
3793 }
3794 
3795 /*
3796  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3797  * to TEMP_EBB, if possible.
3798  */
3799 static void __attribute__((noinline))
3800 liveness_pass_0(TCGContext *s)
3801 {
3802     void * const multiple_ebb = (void *)(uintptr_t)-1;
3803     int nb_temps = s->nb_temps;
3804     TCGOp *op, *ebb;
3805 
3806     for (int i = s->nb_globals; i < nb_temps; ++i) {
3807         s->temps[i].state_ptr = NULL;
3808     }
3809 
3810     /*
3811      * Represent each EBB by the op at which it begins.  In the case of
3812      * the first EBB, this is the first op, otherwise it is a label.
3813      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3814      * within a single EBB, else MULTIPLE_EBB.
3815      */
3816     ebb = QTAILQ_FIRST(&s->ops);
3817     QTAILQ_FOREACH(op, &s->ops, link) {
3818         const TCGOpDef *def;
3819         int nb_oargs, nb_iargs;
3820 
3821         switch (op->opc) {
3822         case INDEX_op_set_label:
3823             ebb = op;
3824             continue;
3825         case INDEX_op_discard:
3826             continue;
3827         case INDEX_op_call:
3828             nb_oargs = TCGOP_CALLO(op);
3829             nb_iargs = TCGOP_CALLI(op);
3830             break;
3831         default:
3832             def = &tcg_op_defs[op->opc];
3833             nb_oargs = def->nb_oargs;
3834             nb_iargs = def->nb_iargs;
3835             break;
3836         }
3837 
3838         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3839             TCGTemp *ts = arg_temp(op->args[i]);
3840 
3841             if (ts->kind != TEMP_TB) {
3842                 continue;
3843             }
3844             if (ts->state_ptr == NULL) {
3845                 ts->state_ptr = ebb;
3846             } else if (ts->state_ptr != ebb) {
3847                 ts->state_ptr = multiple_ebb;
3848             }
3849         }
3850     }
3851 
3852     /*
3853      * For TEMP_TB that turned out not to be used beyond one EBB,
3854      * reduce the liveness to TEMP_EBB.
3855      */
3856     for (int i = s->nb_globals; i < nb_temps; ++i) {
3857         TCGTemp *ts = &s->temps[i];
3858         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3859             ts->kind = TEMP_EBB;
3860         }
3861     }
3862 }
3863 
3864 /* Liveness analysis : update the opc_arg_life array to tell if a
3865    given input arguments is dead. Instructions updating dead
3866    temporaries are removed. */
3867 static void __attribute__((noinline))
3868 liveness_pass_1(TCGContext *s)
3869 {
3870     int nb_globals = s->nb_globals;
3871     int nb_temps = s->nb_temps;
3872     TCGOp *op, *op_prev;
3873     TCGRegSet *prefs;
3874     int i;
3875 
3876     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3877     for (i = 0; i < nb_temps; ++i) {
3878         s->temps[i].state_ptr = prefs + i;
3879     }
3880 
3881     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3882     la_func_end(s, nb_globals, nb_temps);
3883 
3884     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3885         int nb_iargs, nb_oargs;
3886         TCGOpcode opc_new, opc_new2;
3887         bool have_opc_new2;
3888         TCGLifeData arg_life = 0;
3889         TCGTemp *ts;
3890         TCGOpcode opc = op->opc;
3891         const TCGOpDef *def = &tcg_op_defs[opc];
3892         const TCGArgConstraint *args_ct;
3893 
3894         switch (opc) {
3895         case INDEX_op_call:
3896             {
3897                 const TCGHelperInfo *info = tcg_call_info(op);
3898                 int call_flags = tcg_call_flags(op);
3899 
3900                 nb_oargs = TCGOP_CALLO(op);
3901                 nb_iargs = TCGOP_CALLI(op);
3902 
3903                 /* pure functions can be removed if their result is unused */
3904                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3905                     for (i = 0; i < nb_oargs; i++) {
3906                         ts = arg_temp(op->args[i]);
3907                         if (ts->state != TS_DEAD) {
3908                             goto do_not_remove_call;
3909                         }
3910                     }
3911                     goto do_remove;
3912                 }
3913             do_not_remove_call:
3914 
3915                 /* Output args are dead.  */
3916                 for (i = 0; i < nb_oargs; i++) {
3917                     ts = arg_temp(op->args[i]);
3918                     if (ts->state & TS_DEAD) {
3919                         arg_life |= DEAD_ARG << i;
3920                     }
3921                     if (ts->state & TS_MEM) {
3922                         arg_life |= SYNC_ARG << i;
3923                     }
3924                     ts->state = TS_DEAD;
3925                     la_reset_pref(ts);
3926                 }
3927 
3928                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3929                 memset(op->output_pref, 0, sizeof(op->output_pref));
3930 
3931                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3932                                     TCG_CALL_NO_READ_GLOBALS))) {
3933                     la_global_kill(s, nb_globals);
3934                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3935                     la_global_sync(s, nb_globals);
3936                 }
3937 
3938                 /* Record arguments that die in this helper.  */
3939                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3940                     ts = arg_temp(op->args[i]);
3941                     if (ts->state & TS_DEAD) {
3942                         arg_life |= DEAD_ARG << i;
3943                     }
3944                 }
3945 
3946                 /* For all live registers, remove call-clobbered prefs.  */
3947                 la_cross_call(s, nb_temps);
3948 
3949                 /*
3950                  * Input arguments are live for preceding opcodes.
3951                  *
3952                  * For those arguments that die, and will be allocated in
3953                  * registers, clear the register set for that arg, to be
3954                  * filled in below.  For args that will be on the stack,
3955                  * reset to any available reg.  Process arguments in reverse
3956                  * order so that if a temp is used more than once, the stack
3957                  * reset to max happens before the register reset to 0.
3958                  */
3959                 for (i = nb_iargs - 1; i >= 0; i--) {
3960                     const TCGCallArgumentLoc *loc = &info->in[i];
3961                     ts = arg_temp(op->args[nb_oargs + i]);
3962 
3963                     if (ts->state & TS_DEAD) {
3964                         switch (loc->kind) {
3965                         case TCG_CALL_ARG_NORMAL:
3966                         case TCG_CALL_ARG_EXTEND_U:
3967                         case TCG_CALL_ARG_EXTEND_S:
3968                             if (arg_slot_reg_p(loc->arg_slot)) {
3969                                 *la_temp_pref(ts) = 0;
3970                                 break;
3971                             }
3972                             /* fall through */
3973                         default:
3974                             *la_temp_pref(ts) =
3975                                 tcg_target_available_regs[ts->type];
3976                             break;
3977                         }
3978                         ts->state &= ~TS_DEAD;
3979                     }
3980                 }
3981 
3982                 /*
3983                  * For each input argument, add its input register to prefs.
3984                  * If a temp is used once, this produces a single set bit;
3985                  * if a temp is used multiple times, this produces a set.
3986                  */
3987                 for (i = 0; i < nb_iargs; i++) {
3988                     const TCGCallArgumentLoc *loc = &info->in[i];
3989                     ts = arg_temp(op->args[nb_oargs + i]);
3990 
3991                     switch (loc->kind) {
3992                     case TCG_CALL_ARG_NORMAL:
3993                     case TCG_CALL_ARG_EXTEND_U:
3994                     case TCG_CALL_ARG_EXTEND_S:
3995                         if (arg_slot_reg_p(loc->arg_slot)) {
3996                             tcg_regset_set_reg(*la_temp_pref(ts),
3997                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3998                         }
3999                         break;
4000                     default:
4001                         break;
4002                     }
4003                 }
4004             }
4005             break;
4006         case INDEX_op_insn_start:
4007             break;
4008         case INDEX_op_discard:
4009             /* mark the temporary as dead */
4010             ts = arg_temp(op->args[0]);
4011             ts->state = TS_DEAD;
4012             la_reset_pref(ts);
4013             break;
4014 
4015         case INDEX_op_add2_i32:
4016         case INDEX_op_add2_i64:
4017             opc_new = INDEX_op_add;
4018             goto do_addsub2;
4019         case INDEX_op_sub2_i32:
4020             opc_new = INDEX_op_sub_i32;
4021             goto do_addsub2;
4022         case INDEX_op_sub2_i64:
4023             opc_new = INDEX_op_sub_i64;
4024         do_addsub2:
4025             nb_iargs = 4;
4026             nb_oargs = 2;
4027             /* Test if the high part of the operation is dead, but not
4028                the low part.  The result can be optimized to a simple
4029                add or sub.  This happens often for x86_64 guest when the
4030                cpu mode is set to 32 bit.  */
4031             if (arg_temp(op->args[1])->state == TS_DEAD) {
4032                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4033                     goto do_remove;
4034                 }
4035                 /* Replace the opcode and adjust the args in place,
4036                    leaving 3 unused args at the end.  */
4037                 op->opc = opc = opc_new;
4038                 op->args[1] = op->args[2];
4039                 op->args[2] = op->args[4];
4040                 /* Fall through and mark the single-word operation live.  */
4041                 nb_iargs = 2;
4042                 nb_oargs = 1;
4043             }
4044             goto do_not_remove;
4045 
4046         case INDEX_op_mulu2_i32:
4047             opc_new = INDEX_op_mul_i32;
4048             opc_new2 = INDEX_op_muluh_i32;
4049             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
4050             goto do_mul2;
4051         case INDEX_op_muls2_i32:
4052             opc_new = INDEX_op_mul_i32;
4053             opc_new2 = INDEX_op_mulsh_i32;
4054             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
4055             goto do_mul2;
4056         case INDEX_op_mulu2_i64:
4057             opc_new = INDEX_op_mul_i64;
4058             opc_new2 = INDEX_op_muluh_i64;
4059             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
4060             goto do_mul2;
4061         case INDEX_op_muls2_i64:
4062             opc_new = INDEX_op_mul_i64;
4063             opc_new2 = INDEX_op_mulsh_i64;
4064             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4065             goto do_mul2;
4066         do_mul2:
4067             nb_iargs = 2;
4068             nb_oargs = 2;
4069             if (arg_temp(op->args[1])->state == TS_DEAD) {
4070                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4071                     /* Both parts of the operation are dead.  */
4072                     goto do_remove;
4073                 }
4074                 /* The high part of the operation is dead; generate the low. */
4075                 op->opc = opc = opc_new;
4076                 op->args[1] = op->args[2];
4077                 op->args[2] = op->args[3];
4078             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4079                 /* The low part of the operation is dead; generate the high. */
4080                 op->opc = opc = opc_new2;
4081                 op->args[0] = op->args[1];
4082                 op->args[1] = op->args[2];
4083                 op->args[2] = op->args[3];
4084             } else {
4085                 goto do_not_remove;
4086             }
4087             /* Mark the single-word operation live.  */
4088             nb_oargs = 1;
4089             goto do_not_remove;
4090 
4091         default:
4092             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4093             nb_iargs = def->nb_iargs;
4094             nb_oargs = def->nb_oargs;
4095 
4096             /* Test if the operation can be removed because all
4097                its outputs are dead. We assume that nb_oargs == 0
4098                implies side effects */
4099             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4100                 for (i = 0; i < nb_oargs; i++) {
4101                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4102                         goto do_not_remove;
4103                     }
4104                 }
4105                 goto do_remove;
4106             }
4107             goto do_not_remove;
4108 
4109         do_remove:
4110             tcg_op_remove(s, op);
4111             break;
4112 
4113         do_not_remove:
4114             for (i = 0; i < nb_oargs; i++) {
4115                 ts = arg_temp(op->args[i]);
4116 
4117                 /* Remember the preference of the uses that followed.  */
4118                 if (i < ARRAY_SIZE(op->output_pref)) {
4119                     op->output_pref[i] = *la_temp_pref(ts);
4120                 }
4121 
4122                 /* Output args are dead.  */
4123                 if (ts->state & TS_DEAD) {
4124                     arg_life |= DEAD_ARG << i;
4125                 }
4126                 if (ts->state & TS_MEM) {
4127                     arg_life |= SYNC_ARG << i;
4128                 }
4129                 ts->state = TS_DEAD;
4130                 la_reset_pref(ts);
4131             }
4132 
4133             /* If end of basic block, update.  */
4134             if (def->flags & TCG_OPF_BB_EXIT) {
4135                 la_func_end(s, nb_globals, nb_temps);
4136             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4137                 la_bb_sync(s, nb_globals, nb_temps);
4138             } else if (def->flags & TCG_OPF_BB_END) {
4139                 la_bb_end(s, nb_globals, nb_temps);
4140             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4141                 la_global_sync(s, nb_globals);
4142                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4143                     la_cross_call(s, nb_temps);
4144                 }
4145             }
4146 
4147             /* Record arguments that die in this opcode.  */
4148             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4149                 ts = arg_temp(op->args[i]);
4150                 if (ts->state & TS_DEAD) {
4151                     arg_life |= DEAD_ARG << i;
4152                 }
4153             }
4154 
4155             /* Input arguments are live for preceding opcodes.  */
4156             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4157                 ts = arg_temp(op->args[i]);
4158                 if (ts->state & TS_DEAD) {
4159                     /* For operands that were dead, initially allow
4160                        all regs for the type.  */
4161                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4162                     ts->state &= ~TS_DEAD;
4163                 }
4164             }
4165 
4166             /* Incorporate constraints for this operand.  */
4167             switch (opc) {
4168             case INDEX_op_mov:
4169                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4170                    have proper constraints.  That said, special case
4171                    moves to propagate preferences backward.  */
4172                 if (IS_DEAD_ARG(1)) {
4173                     *la_temp_pref(arg_temp(op->args[0]))
4174                         = *la_temp_pref(arg_temp(op->args[1]));
4175                 }
4176                 break;
4177 
4178             default:
4179                 args_ct = opcode_args_ct(op);
4180                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4181                     const TCGArgConstraint *ct = &args_ct[i];
4182                     TCGRegSet set, *pset;
4183 
4184                     ts = arg_temp(op->args[i]);
4185                     pset = la_temp_pref(ts);
4186                     set = *pset;
4187 
4188                     set &= ct->regs;
4189                     if (ct->ialias) {
4190                         set &= output_pref(op, ct->alias_index);
4191                     }
4192                     /* If the combination is not possible, restart.  */
4193                     if (set == 0) {
4194                         set = ct->regs;
4195                     }
4196                     *pset = set;
4197                 }
4198                 break;
4199             }
4200             break;
4201         }
4202         op->life = arg_life;
4203     }
4204 }
4205 
4206 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4207 static bool __attribute__((noinline))
4208 liveness_pass_2(TCGContext *s)
4209 {
4210     int nb_globals = s->nb_globals;
4211     int nb_temps, i;
4212     bool changes = false;
4213     TCGOp *op, *op_next;
4214 
4215     /* Create a temporary for each indirect global.  */
4216     for (i = 0; i < nb_globals; ++i) {
4217         TCGTemp *its = &s->temps[i];
4218         if (its->indirect_reg) {
4219             TCGTemp *dts = tcg_temp_alloc(s);
4220             dts->type = its->type;
4221             dts->base_type = its->base_type;
4222             dts->temp_subindex = its->temp_subindex;
4223             dts->kind = TEMP_EBB;
4224             its->state_ptr = dts;
4225         } else {
4226             its->state_ptr = NULL;
4227         }
4228         /* All globals begin dead.  */
4229         its->state = TS_DEAD;
4230     }
4231     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4232         TCGTemp *its = &s->temps[i];
4233         its->state_ptr = NULL;
4234         its->state = TS_DEAD;
4235     }
4236 
4237     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4238         TCGOpcode opc = op->opc;
4239         const TCGOpDef *def = &tcg_op_defs[opc];
4240         TCGLifeData arg_life = op->life;
4241         int nb_iargs, nb_oargs, call_flags;
4242         TCGTemp *arg_ts, *dir_ts;
4243 
4244         if (opc == INDEX_op_call) {
4245             nb_oargs = TCGOP_CALLO(op);
4246             nb_iargs = TCGOP_CALLI(op);
4247             call_flags = tcg_call_flags(op);
4248         } else {
4249             nb_iargs = def->nb_iargs;
4250             nb_oargs = def->nb_oargs;
4251 
4252             /* Set flags similar to how calls require.  */
4253             if (def->flags & TCG_OPF_COND_BRANCH) {
4254                 /* Like reading globals: sync_globals */
4255                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4256             } else if (def->flags & TCG_OPF_BB_END) {
4257                 /* Like writing globals: save_globals */
4258                 call_flags = 0;
4259             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4260                 /* Like reading globals: sync_globals */
4261                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4262             } else {
4263                 /* No effect on globals.  */
4264                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4265                               TCG_CALL_NO_WRITE_GLOBALS);
4266             }
4267         }
4268 
4269         /* Make sure that input arguments are available.  */
4270         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4271             arg_ts = arg_temp(op->args[i]);
4272             dir_ts = arg_ts->state_ptr;
4273             if (dir_ts && arg_ts->state == TS_DEAD) {
4274                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4275                                   ? INDEX_op_ld_i32
4276                                   : INDEX_op_ld_i64);
4277                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4278                                                   arg_ts->type, 3);
4279 
4280                 lop->args[0] = temp_arg(dir_ts);
4281                 lop->args[1] = temp_arg(arg_ts->mem_base);
4282                 lop->args[2] = arg_ts->mem_offset;
4283 
4284                 /* Loaded, but synced with memory.  */
4285                 arg_ts->state = TS_MEM;
4286             }
4287         }
4288 
4289         /* Perform input replacement, and mark inputs that became dead.
4290            No action is required except keeping temp_state up to date
4291            so that we reload when needed.  */
4292         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4293             arg_ts = arg_temp(op->args[i]);
4294             dir_ts = arg_ts->state_ptr;
4295             if (dir_ts) {
4296                 op->args[i] = temp_arg(dir_ts);
4297                 changes = true;
4298                 if (IS_DEAD_ARG(i)) {
4299                     arg_ts->state = TS_DEAD;
4300                 }
4301             }
4302         }
4303 
4304         /* Liveness analysis should ensure that the following are
4305            all correct, for call sites and basic block end points.  */
4306         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4307             /* Nothing to do */
4308         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4309             for (i = 0; i < nb_globals; ++i) {
4310                 /* Liveness should see that globals are synced back,
4311                    that is, either TS_DEAD or TS_MEM.  */
4312                 arg_ts = &s->temps[i];
4313                 tcg_debug_assert(arg_ts->state_ptr == 0
4314                                  || arg_ts->state != 0);
4315             }
4316         } else {
4317             for (i = 0; i < nb_globals; ++i) {
4318                 /* Liveness should see that globals are saved back,
4319                    that is, TS_DEAD, waiting to be reloaded.  */
4320                 arg_ts = &s->temps[i];
4321                 tcg_debug_assert(arg_ts->state_ptr == 0
4322                                  || arg_ts->state == TS_DEAD);
4323             }
4324         }
4325 
4326         /* Outputs become available.  */
4327         if (opc == INDEX_op_mov) {
4328             arg_ts = arg_temp(op->args[0]);
4329             dir_ts = arg_ts->state_ptr;
4330             if (dir_ts) {
4331                 op->args[0] = temp_arg(dir_ts);
4332                 changes = true;
4333 
4334                 /* The output is now live and modified.  */
4335                 arg_ts->state = 0;
4336 
4337                 if (NEED_SYNC_ARG(0)) {
4338                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4339                                       ? INDEX_op_st_i32
4340                                       : INDEX_op_st_i64);
4341                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4342                                                      arg_ts->type, 3);
4343                     TCGTemp *out_ts = dir_ts;
4344 
4345                     if (IS_DEAD_ARG(0)) {
4346                         out_ts = arg_temp(op->args[1]);
4347                         arg_ts->state = TS_DEAD;
4348                         tcg_op_remove(s, op);
4349                     } else {
4350                         arg_ts->state = TS_MEM;
4351                     }
4352 
4353                     sop->args[0] = temp_arg(out_ts);
4354                     sop->args[1] = temp_arg(arg_ts->mem_base);
4355                     sop->args[2] = arg_ts->mem_offset;
4356                 } else {
4357                     tcg_debug_assert(!IS_DEAD_ARG(0));
4358                 }
4359             }
4360         } else {
4361             for (i = 0; i < nb_oargs; i++) {
4362                 arg_ts = arg_temp(op->args[i]);
4363                 dir_ts = arg_ts->state_ptr;
4364                 if (!dir_ts) {
4365                     continue;
4366                 }
4367                 op->args[i] = temp_arg(dir_ts);
4368                 changes = true;
4369 
4370                 /* The output is now live and modified.  */
4371                 arg_ts->state = 0;
4372 
4373                 /* Sync outputs upon their last write.  */
4374                 if (NEED_SYNC_ARG(i)) {
4375                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4376                                       ? INDEX_op_st_i32
4377                                       : INDEX_op_st_i64);
4378                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4379                                                      arg_ts->type, 3);
4380 
4381                     sop->args[0] = temp_arg(dir_ts);
4382                     sop->args[1] = temp_arg(arg_ts->mem_base);
4383                     sop->args[2] = arg_ts->mem_offset;
4384 
4385                     arg_ts->state = TS_MEM;
4386                 }
4387                 /* Drop outputs that are dead.  */
4388                 if (IS_DEAD_ARG(i)) {
4389                     arg_ts->state = TS_DEAD;
4390                 }
4391             }
4392         }
4393     }
4394 
4395     return changes;
4396 }
4397 
4398 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4399 {
4400     intptr_t off;
4401     int size, align;
4402 
4403     /* When allocating an object, look at the full type. */
4404     size = tcg_type_size(ts->base_type);
4405     switch (ts->base_type) {
4406     case TCG_TYPE_I32:
4407         align = 4;
4408         break;
4409     case TCG_TYPE_I64:
4410     case TCG_TYPE_V64:
4411         align = 8;
4412         break;
4413     case TCG_TYPE_I128:
4414     case TCG_TYPE_V128:
4415     case TCG_TYPE_V256:
4416         /*
4417          * Note that we do not require aligned storage for V256,
4418          * and that we provide alignment for I128 to match V128,
4419          * even if that's above what the host ABI requires.
4420          */
4421         align = 16;
4422         break;
4423     default:
4424         g_assert_not_reached();
4425     }
4426 
4427     /*
4428      * Assume the stack is sufficiently aligned.
4429      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4430      * and do not require 16 byte vector alignment.  This seems slightly
4431      * easier than fully parameterizing the above switch statement.
4432      */
4433     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4434     off = ROUND_UP(s->current_frame_offset, align);
4435 
4436     /* If we've exhausted the stack frame, restart with a smaller TB. */
4437     if (off + size > s->frame_end) {
4438         tcg_raise_tb_overflow(s);
4439     }
4440     s->current_frame_offset = off + size;
4441 #if defined(__sparc__)
4442     off += TCG_TARGET_STACK_BIAS;
4443 #endif
4444 
4445     /* If the object was subdivided, assign memory to all the parts. */
4446     if (ts->base_type != ts->type) {
4447         int part_size = tcg_type_size(ts->type);
4448         int part_count = size / part_size;
4449 
4450         /*
4451          * Each part is allocated sequentially in tcg_temp_new_internal.
4452          * Jump back to the first part by subtracting the current index.
4453          */
4454         ts -= ts->temp_subindex;
4455         for (int i = 0; i < part_count; ++i) {
4456             ts[i].mem_offset = off + i * part_size;
4457             ts[i].mem_base = s->frame_temp;
4458             ts[i].mem_allocated = 1;
4459         }
4460     } else {
4461         ts->mem_offset = off;
4462         ts->mem_base = s->frame_temp;
4463         ts->mem_allocated = 1;
4464     }
4465 }
4466 
4467 /* Assign @reg to @ts, and update reg_to_temp[]. */
4468 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4469 {
4470     if (ts->val_type == TEMP_VAL_REG) {
4471         TCGReg old = ts->reg;
4472         tcg_debug_assert(s->reg_to_temp[old] == ts);
4473         if (old == reg) {
4474             return;
4475         }
4476         s->reg_to_temp[old] = NULL;
4477     }
4478     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4479     s->reg_to_temp[reg] = ts;
4480     ts->val_type = TEMP_VAL_REG;
4481     ts->reg = reg;
4482 }
4483 
4484 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4485 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4486 {
4487     tcg_debug_assert(type != TEMP_VAL_REG);
4488     if (ts->val_type == TEMP_VAL_REG) {
4489         TCGReg reg = ts->reg;
4490         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4491         s->reg_to_temp[reg] = NULL;
4492     }
4493     ts->val_type = type;
4494 }
4495 
4496 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4497 
4498 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4499    mark it free; otherwise mark it dead.  */
4500 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4501 {
4502     TCGTempVal new_type;
4503 
4504     switch (ts->kind) {
4505     case TEMP_FIXED:
4506         return;
4507     case TEMP_GLOBAL:
4508     case TEMP_TB:
4509         new_type = TEMP_VAL_MEM;
4510         break;
4511     case TEMP_EBB:
4512         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4513         break;
4514     case TEMP_CONST:
4515         new_type = TEMP_VAL_CONST;
4516         break;
4517     default:
4518         g_assert_not_reached();
4519     }
4520     set_temp_val_nonreg(s, ts, new_type);
4521 }
4522 
4523 /* Mark a temporary as dead.  */
4524 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4525 {
4526     temp_free_or_dead(s, ts, 1);
4527 }
4528 
4529 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4530    registers needs to be allocated to store a constant.  If 'free_or_dead'
4531    is non-zero, subsequently release the temporary; if it is positive, the
4532    temp is dead; if it is negative, the temp is free.  */
4533 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4534                       TCGRegSet preferred_regs, int free_or_dead)
4535 {
4536     if (!temp_readonly(ts) && !ts->mem_coherent) {
4537         if (!ts->mem_allocated) {
4538             temp_allocate_frame(s, ts);
4539         }
4540         switch (ts->val_type) {
4541         case TEMP_VAL_CONST:
4542             /* If we're going to free the temp immediately, then we won't
4543                require it later in a register, so attempt to store the
4544                constant to memory directly.  */
4545             if (free_or_dead
4546                 && tcg_out_sti(s, ts->type, ts->val,
4547                                ts->mem_base->reg, ts->mem_offset)) {
4548                 break;
4549             }
4550             temp_load(s, ts, tcg_target_available_regs[ts->type],
4551                       allocated_regs, preferred_regs);
4552             /* fallthrough */
4553 
4554         case TEMP_VAL_REG:
4555             tcg_out_st(s, ts->type, ts->reg,
4556                        ts->mem_base->reg, ts->mem_offset);
4557             break;
4558 
4559         case TEMP_VAL_MEM:
4560             break;
4561 
4562         case TEMP_VAL_DEAD:
4563         default:
4564             g_assert_not_reached();
4565         }
4566         ts->mem_coherent = 1;
4567     }
4568     if (free_or_dead) {
4569         temp_free_or_dead(s, ts, free_or_dead);
4570     }
4571 }
4572 
4573 /* free register 'reg' by spilling the corresponding temporary if necessary */
4574 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4575 {
4576     TCGTemp *ts = s->reg_to_temp[reg];
4577     if (ts != NULL) {
4578         temp_sync(s, ts, allocated_regs, 0, -1);
4579     }
4580 }
4581 
4582 /**
4583  * tcg_reg_alloc:
4584  * @required_regs: Set of registers in which we must allocate.
4585  * @allocated_regs: Set of registers which must be avoided.
4586  * @preferred_regs: Set of registers we should prefer.
4587  * @rev: True if we search the registers in "indirect" order.
4588  *
4589  * The allocated register must be in @required_regs & ~@allocated_regs,
4590  * but if we can put it in @preferred_regs we may save a move later.
4591  */
4592 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4593                             TCGRegSet allocated_regs,
4594                             TCGRegSet preferred_regs, bool rev)
4595 {
4596     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4597     TCGRegSet reg_ct[2];
4598     const int *order;
4599 
4600     reg_ct[1] = required_regs & ~allocated_regs;
4601     tcg_debug_assert(reg_ct[1] != 0);
4602     reg_ct[0] = reg_ct[1] & preferred_regs;
4603 
4604     /* Skip the preferred_regs option if it cannot be satisfied,
4605        or if the preference made no difference.  */
4606     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4607 
4608     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4609 
4610     /* Try free registers, preferences first.  */
4611     for (j = f; j < 2; j++) {
4612         TCGRegSet set = reg_ct[j];
4613 
4614         if (tcg_regset_single(set)) {
4615             /* One register in the set.  */
4616             TCGReg reg = tcg_regset_first(set);
4617             if (s->reg_to_temp[reg] == NULL) {
4618                 return reg;
4619             }
4620         } else {
4621             for (i = 0; i < n; i++) {
4622                 TCGReg reg = order[i];
4623                 if (s->reg_to_temp[reg] == NULL &&
4624                     tcg_regset_test_reg(set, reg)) {
4625                     return reg;
4626                 }
4627             }
4628         }
4629     }
4630 
4631     /* We must spill something.  */
4632     for (j = f; j < 2; j++) {
4633         TCGRegSet set = reg_ct[j];
4634 
4635         if (tcg_regset_single(set)) {
4636             /* One register in the set.  */
4637             TCGReg reg = tcg_regset_first(set);
4638             tcg_reg_free(s, reg, allocated_regs);
4639             return reg;
4640         } else {
4641             for (i = 0; i < n; i++) {
4642                 TCGReg reg = order[i];
4643                 if (tcg_regset_test_reg(set, reg)) {
4644                     tcg_reg_free(s, reg, allocated_regs);
4645                     return reg;
4646                 }
4647             }
4648         }
4649     }
4650 
4651     g_assert_not_reached();
4652 }
4653 
4654 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4655                                  TCGRegSet allocated_regs,
4656                                  TCGRegSet preferred_regs, bool rev)
4657 {
4658     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4659     TCGRegSet reg_ct[2];
4660     const int *order;
4661 
4662     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4663     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4664     tcg_debug_assert(reg_ct[1] != 0);
4665     reg_ct[0] = reg_ct[1] & preferred_regs;
4666 
4667     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4668 
4669     /*
4670      * Skip the preferred_regs option if it cannot be satisfied,
4671      * or if the preference made no difference.
4672      */
4673     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4674 
4675     /*
4676      * Minimize the number of flushes by looking for 2 free registers first,
4677      * then a single flush, then two flushes.
4678      */
4679     for (fmin = 2; fmin >= 0; fmin--) {
4680         for (j = k; j < 2; j++) {
4681             TCGRegSet set = reg_ct[j];
4682 
4683             for (i = 0; i < n; i++) {
4684                 TCGReg reg = order[i];
4685 
4686                 if (tcg_regset_test_reg(set, reg)) {
4687                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4688                     if (f >= fmin) {
4689                         tcg_reg_free(s, reg, allocated_regs);
4690                         tcg_reg_free(s, reg + 1, allocated_regs);
4691                         return reg;
4692                     }
4693                 }
4694             }
4695         }
4696     }
4697     g_assert_not_reached();
4698 }
4699 
4700 /* Make sure the temporary is in a register.  If needed, allocate the register
4701    from DESIRED while avoiding ALLOCATED.  */
4702 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4703                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4704 {
4705     TCGReg reg;
4706 
4707     switch (ts->val_type) {
4708     case TEMP_VAL_REG:
4709         return;
4710     case TEMP_VAL_CONST:
4711         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4712                             preferred_regs, ts->indirect_base);
4713         if (ts->type <= TCG_TYPE_I64) {
4714             tcg_out_movi(s, ts->type, reg, ts->val);
4715         } else {
4716             uint64_t val = ts->val;
4717             MemOp vece = MO_64;
4718 
4719             /*
4720              * Find the minimal vector element that matches the constant.
4721              * The targets will, in general, have to do this search anyway,
4722              * do this generically.
4723              */
4724             if (val == dup_const(MO_8, val)) {
4725                 vece = MO_8;
4726             } else if (val == dup_const(MO_16, val)) {
4727                 vece = MO_16;
4728             } else if (val == dup_const(MO_32, val)) {
4729                 vece = MO_32;
4730             }
4731 
4732             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4733         }
4734         ts->mem_coherent = 0;
4735         break;
4736     case TEMP_VAL_MEM:
4737         if (!ts->mem_allocated) {
4738             temp_allocate_frame(s, ts);
4739         }
4740         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4741                             preferred_regs, ts->indirect_base);
4742         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4743         ts->mem_coherent = 1;
4744         break;
4745     case TEMP_VAL_DEAD:
4746     default:
4747         g_assert_not_reached();
4748     }
4749     set_temp_val_reg(s, ts, reg);
4750 }
4751 
4752 /* Save a temporary to memory. 'allocated_regs' is used in case a
4753    temporary registers needs to be allocated to store a constant.  */
4754 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4755 {
4756     /* The liveness analysis already ensures that globals are back
4757        in memory. Keep an tcg_debug_assert for safety. */
4758     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4759 }
4760 
4761 /* save globals to their canonical location and assume they can be
4762    modified be the following code. 'allocated_regs' is used in case a
4763    temporary registers needs to be allocated to store a constant. */
4764 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4765 {
4766     int i, n;
4767 
4768     for (i = 0, n = s->nb_globals; i < n; i++) {
4769         temp_save(s, &s->temps[i], allocated_regs);
4770     }
4771 }
4772 
4773 /* sync globals to their canonical location and assume they can be
4774    read by the following code. 'allocated_regs' is used in case a
4775    temporary registers needs to be allocated to store a constant. */
4776 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4777 {
4778     int i, n;
4779 
4780     for (i = 0, n = s->nb_globals; i < n; i++) {
4781         TCGTemp *ts = &s->temps[i];
4782         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4783                          || ts->kind == TEMP_FIXED
4784                          || ts->mem_coherent);
4785     }
4786 }
4787 
4788 /* at the end of a basic block, we assume all temporaries are dead and
4789    all globals are stored at their canonical location. */
4790 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4791 {
4792     int i;
4793 
4794     for (i = s->nb_globals; i < s->nb_temps; i++) {
4795         TCGTemp *ts = &s->temps[i];
4796 
4797         switch (ts->kind) {
4798         case TEMP_TB:
4799             temp_save(s, ts, allocated_regs);
4800             break;
4801         case TEMP_EBB:
4802             /* The liveness analysis already ensures that temps are dead.
4803                Keep an tcg_debug_assert for safety. */
4804             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4805             break;
4806         case TEMP_CONST:
4807             /* Similarly, we should have freed any allocated register. */
4808             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4809             break;
4810         default:
4811             g_assert_not_reached();
4812         }
4813     }
4814 
4815     save_globals(s, allocated_regs);
4816 }
4817 
4818 /*
4819  * At a conditional branch, we assume all temporaries are dead unless
4820  * explicitly live-across-conditional-branch; all globals and local
4821  * temps are synced to their location.
4822  */
4823 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4824 {
4825     sync_globals(s, allocated_regs);
4826 
4827     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4828         TCGTemp *ts = &s->temps[i];
4829         /*
4830          * The liveness analysis already ensures that temps are dead.
4831          * Keep tcg_debug_asserts for safety.
4832          */
4833         switch (ts->kind) {
4834         case TEMP_TB:
4835             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4836             break;
4837         case TEMP_EBB:
4838         case TEMP_CONST:
4839             break;
4840         default:
4841             g_assert_not_reached();
4842         }
4843     }
4844 }
4845 
4846 /*
4847  * Specialized code generation for INDEX_op_mov_* with a constant.
4848  */
4849 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4850                                   tcg_target_ulong val, TCGLifeData arg_life,
4851                                   TCGRegSet preferred_regs)
4852 {
4853     /* ENV should not be modified.  */
4854     tcg_debug_assert(!temp_readonly(ots));
4855 
4856     /* The movi is not explicitly generated here.  */
4857     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4858     ots->val = val;
4859     ots->mem_coherent = 0;
4860     if (NEED_SYNC_ARG(0)) {
4861         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4862     } else if (IS_DEAD_ARG(0)) {
4863         temp_dead(s, ots);
4864     }
4865 }
4866 
4867 /*
4868  * Specialized code generation for INDEX_op_mov_*.
4869  */
4870 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4871 {
4872     const TCGLifeData arg_life = op->life;
4873     TCGRegSet allocated_regs, preferred_regs;
4874     TCGTemp *ts, *ots;
4875     TCGType otype, itype;
4876     TCGReg oreg, ireg;
4877 
4878     allocated_regs = s->reserved_regs;
4879     preferred_regs = output_pref(op, 0);
4880     ots = arg_temp(op->args[0]);
4881     ts = arg_temp(op->args[1]);
4882 
4883     /* ENV should not be modified.  */
4884     tcg_debug_assert(!temp_readonly(ots));
4885 
4886     /* Note that otype != itype for no-op truncation.  */
4887     otype = ots->type;
4888     itype = ts->type;
4889 
4890     if (ts->val_type == TEMP_VAL_CONST) {
4891         /* propagate constant or generate sti */
4892         tcg_target_ulong val = ts->val;
4893         if (IS_DEAD_ARG(1)) {
4894             temp_dead(s, ts);
4895         }
4896         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4897         return;
4898     }
4899 
4900     /* If the source value is in memory we're going to be forced
4901        to have it in a register in order to perform the copy.  Copy
4902        the SOURCE value into its own register first, that way we
4903        don't have to reload SOURCE the next time it is used. */
4904     if (ts->val_type == TEMP_VAL_MEM) {
4905         temp_load(s, ts, tcg_target_available_regs[itype],
4906                   allocated_regs, preferred_regs);
4907     }
4908     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4909     ireg = ts->reg;
4910 
4911     if (IS_DEAD_ARG(0)) {
4912         /* mov to a non-saved dead register makes no sense (even with
4913            liveness analysis disabled). */
4914         tcg_debug_assert(NEED_SYNC_ARG(0));
4915         if (!ots->mem_allocated) {
4916             temp_allocate_frame(s, ots);
4917         }
4918         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4919         if (IS_DEAD_ARG(1)) {
4920             temp_dead(s, ts);
4921         }
4922         temp_dead(s, ots);
4923         return;
4924     }
4925 
4926     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4927         /*
4928          * The mov can be suppressed.  Kill input first, so that it
4929          * is unlinked from reg_to_temp, then set the output to the
4930          * reg that we saved from the input.
4931          */
4932         temp_dead(s, ts);
4933         oreg = ireg;
4934     } else {
4935         if (ots->val_type == TEMP_VAL_REG) {
4936             oreg = ots->reg;
4937         } else {
4938             /* Make sure to not spill the input register during allocation. */
4939             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4940                                  allocated_regs | ((TCGRegSet)1 << ireg),
4941                                  preferred_regs, ots->indirect_base);
4942         }
4943         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4944             /*
4945              * Cross register class move not supported.
4946              * Store the source register into the destination slot
4947              * and leave the destination temp as TEMP_VAL_MEM.
4948              */
4949             assert(!temp_readonly(ots));
4950             if (!ts->mem_allocated) {
4951                 temp_allocate_frame(s, ots);
4952             }
4953             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4954             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4955             ots->mem_coherent = 1;
4956             return;
4957         }
4958     }
4959     set_temp_val_reg(s, ots, oreg);
4960     ots->mem_coherent = 0;
4961 
4962     if (NEED_SYNC_ARG(0)) {
4963         temp_sync(s, ots, allocated_regs, 0, 0);
4964     }
4965 }
4966 
4967 /*
4968  * Specialized code generation for INDEX_op_dup_vec.
4969  */
4970 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4971 {
4972     const TCGLifeData arg_life = op->life;
4973     TCGRegSet dup_out_regs, dup_in_regs;
4974     const TCGArgConstraint *dup_args_ct;
4975     TCGTemp *its, *ots;
4976     TCGType itype, vtype;
4977     unsigned vece;
4978     int lowpart_ofs;
4979     bool ok;
4980 
4981     ots = arg_temp(op->args[0]);
4982     its = arg_temp(op->args[1]);
4983 
4984     /* ENV should not be modified.  */
4985     tcg_debug_assert(!temp_readonly(ots));
4986 
4987     itype = its->type;
4988     vece = TCGOP_VECE(op);
4989     vtype = TCGOP_TYPE(op);
4990 
4991     if (its->val_type == TEMP_VAL_CONST) {
4992         /* Propagate constant via movi -> dupi.  */
4993         tcg_target_ulong val = its->val;
4994         if (IS_DEAD_ARG(1)) {
4995             temp_dead(s, its);
4996         }
4997         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4998         return;
4999     }
5000 
5001     dup_args_ct = opcode_args_ct(op);
5002     dup_out_regs = dup_args_ct[0].regs;
5003     dup_in_regs = dup_args_ct[1].regs;
5004 
5005     /* Allocate the output register now.  */
5006     if (ots->val_type != TEMP_VAL_REG) {
5007         TCGRegSet allocated_regs = s->reserved_regs;
5008         TCGReg oreg;
5009 
5010         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
5011             /* Make sure to not spill the input register. */
5012             tcg_regset_set_reg(allocated_regs, its->reg);
5013         }
5014         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5015                              output_pref(op, 0), ots->indirect_base);
5016         set_temp_val_reg(s, ots, oreg);
5017     }
5018 
5019     switch (its->val_type) {
5020     case TEMP_VAL_REG:
5021         /*
5022          * The dup constriaints must be broad, covering all possible VECE.
5023          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5024          * to fail, indicating that extra moves are required for that case.
5025          */
5026         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5027             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5028                 goto done;
5029             }
5030             /* Try again from memory or a vector input register.  */
5031         }
5032         if (!its->mem_coherent) {
5033             /*
5034              * The input register is not synced, and so an extra store
5035              * would be required to use memory.  Attempt an integer-vector
5036              * register move first.  We do not have a TCGRegSet for this.
5037              */
5038             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5039                 break;
5040             }
5041             /* Sync the temp back to its slot and load from there.  */
5042             temp_sync(s, its, s->reserved_regs, 0, 0);
5043         }
5044         /* fall through */
5045 
5046     case TEMP_VAL_MEM:
5047         lowpart_ofs = 0;
5048         if (HOST_BIG_ENDIAN) {
5049             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5050         }
5051         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5052                              its->mem_offset + lowpart_ofs)) {
5053             goto done;
5054         }
5055         /* Load the input into the destination vector register. */
5056         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5057         break;
5058 
5059     default:
5060         g_assert_not_reached();
5061     }
5062 
5063     /* We now have a vector input register, so dup must succeed. */
5064     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5065     tcg_debug_assert(ok);
5066 
5067  done:
5068     ots->mem_coherent = 0;
5069     if (IS_DEAD_ARG(1)) {
5070         temp_dead(s, its);
5071     }
5072     if (NEED_SYNC_ARG(0)) {
5073         temp_sync(s, ots, s->reserved_regs, 0, 0);
5074     }
5075     if (IS_DEAD_ARG(0)) {
5076         temp_dead(s, ots);
5077     }
5078 }
5079 
5080 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5081 {
5082     const TCGLifeData arg_life = op->life;
5083     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5084     TCGRegSet i_allocated_regs;
5085     TCGRegSet o_allocated_regs;
5086     int i, k, nb_iargs, nb_oargs;
5087     TCGReg reg;
5088     TCGArg arg;
5089     const TCGArgConstraint *args_ct;
5090     const TCGArgConstraint *arg_ct;
5091     TCGTemp *ts;
5092     TCGArg new_args[TCG_MAX_OP_ARGS];
5093     int const_args[TCG_MAX_OP_ARGS];
5094     TCGCond op_cond;
5095 
5096     nb_oargs = def->nb_oargs;
5097     nb_iargs = def->nb_iargs;
5098 
5099     /* copy constants */
5100     memcpy(new_args + nb_oargs + nb_iargs,
5101            op->args + nb_oargs + nb_iargs,
5102            sizeof(TCGArg) * def->nb_cargs);
5103 
5104     i_allocated_regs = s->reserved_regs;
5105     o_allocated_regs = s->reserved_regs;
5106 
5107     switch (op->opc) {
5108     case INDEX_op_brcond_i32:
5109     case INDEX_op_brcond_i64:
5110         op_cond = op->args[2];
5111         break;
5112     case INDEX_op_setcond_i32:
5113     case INDEX_op_setcond_i64:
5114     case INDEX_op_negsetcond_i32:
5115     case INDEX_op_negsetcond_i64:
5116     case INDEX_op_cmp_vec:
5117         op_cond = op->args[3];
5118         break;
5119     case INDEX_op_brcond2_i32:
5120         op_cond = op->args[4];
5121         break;
5122     case INDEX_op_movcond_i32:
5123     case INDEX_op_movcond_i64:
5124     case INDEX_op_setcond2_i32:
5125     case INDEX_op_cmpsel_vec:
5126         op_cond = op->args[5];
5127         break;
5128     default:
5129         /* No condition within opcode. */
5130         op_cond = TCG_COND_ALWAYS;
5131         break;
5132     }
5133 
5134     args_ct = opcode_args_ct(op);
5135 
5136     /* satisfy input constraints */
5137     for (k = 0; k < nb_iargs; k++) {
5138         TCGRegSet i_preferred_regs, i_required_regs;
5139         bool allocate_new_reg, copyto_new_reg;
5140         TCGTemp *ts2;
5141         int i1, i2;
5142 
5143         i = args_ct[nb_oargs + k].sort_index;
5144         arg = op->args[i];
5145         arg_ct = &args_ct[i];
5146         ts = arg_temp(arg);
5147 
5148         if (ts->val_type == TEMP_VAL_CONST) {
5149 #ifdef TCG_REG_ZERO
5150             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5151                 /* Hardware zero register: indicate register via non-const. */
5152                 const_args[i] = 0;
5153                 new_args[i] = TCG_REG_ZERO;
5154                 continue;
5155             }
5156 #endif
5157 
5158             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5159                                        op_cond, TCGOP_VECE(op))) {
5160                 /* constant is OK for instruction */
5161                 const_args[i] = 1;
5162                 new_args[i] = ts->val;
5163                 continue;
5164             }
5165         }
5166 
5167         reg = ts->reg;
5168         i_preferred_regs = 0;
5169         i_required_regs = arg_ct->regs;
5170         allocate_new_reg = false;
5171         copyto_new_reg = false;
5172 
5173         switch (arg_ct->pair) {
5174         case 0: /* not paired */
5175             if (arg_ct->ialias) {
5176                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5177 
5178                 /*
5179                  * If the input is readonly, then it cannot also be an
5180                  * output and aliased to itself.  If the input is not
5181                  * dead after the instruction, we must allocate a new
5182                  * register and move it.
5183                  */
5184                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5185                     || args_ct[arg_ct->alias_index].newreg) {
5186                     allocate_new_reg = true;
5187                 } else if (ts->val_type == TEMP_VAL_REG) {
5188                     /*
5189                      * Check if the current register has already been
5190                      * allocated for another input.
5191                      */
5192                     allocate_new_reg =
5193                         tcg_regset_test_reg(i_allocated_regs, reg);
5194                 }
5195             }
5196             if (!allocate_new_reg) {
5197                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5198                           i_preferred_regs);
5199                 reg = ts->reg;
5200                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5201             }
5202             if (allocate_new_reg) {
5203                 /*
5204                  * Allocate a new register matching the constraint
5205                  * and move the temporary register into it.
5206                  */
5207                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5208                           i_allocated_regs, 0);
5209                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5210                                     i_preferred_regs, ts->indirect_base);
5211                 copyto_new_reg = true;
5212             }
5213             break;
5214 
5215         case 1:
5216             /* First of an input pair; if i1 == i2, the second is an output. */
5217             i1 = i;
5218             i2 = arg_ct->pair_index;
5219             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5220 
5221             /*
5222              * It is easier to default to allocating a new pair
5223              * and to identify a few cases where it's not required.
5224              */
5225             if (arg_ct->ialias) {
5226                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5227                 if (IS_DEAD_ARG(i1) &&
5228                     IS_DEAD_ARG(i2) &&
5229                     !temp_readonly(ts) &&
5230                     ts->val_type == TEMP_VAL_REG &&
5231                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5232                     tcg_regset_test_reg(i_required_regs, reg) &&
5233                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5234                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5235                     (ts2
5236                      ? ts2->val_type == TEMP_VAL_REG &&
5237                        ts2->reg == reg + 1 &&
5238                        !temp_readonly(ts2)
5239                      : s->reg_to_temp[reg + 1] == NULL)) {
5240                     break;
5241                 }
5242             } else {
5243                 /* Without aliasing, the pair must also be an input. */
5244                 tcg_debug_assert(ts2);
5245                 if (ts->val_type == TEMP_VAL_REG &&
5246                     ts2->val_type == TEMP_VAL_REG &&
5247                     ts2->reg == reg + 1 &&
5248                     tcg_regset_test_reg(i_required_regs, reg)) {
5249                     break;
5250                 }
5251             }
5252             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5253                                      0, ts->indirect_base);
5254             goto do_pair;
5255 
5256         case 2: /* pair second */
5257             reg = new_args[arg_ct->pair_index] + 1;
5258             goto do_pair;
5259 
5260         case 3: /* ialias with second output, no first input */
5261             tcg_debug_assert(arg_ct->ialias);
5262             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5263 
5264             if (IS_DEAD_ARG(i) &&
5265                 !temp_readonly(ts) &&
5266                 ts->val_type == TEMP_VAL_REG &&
5267                 reg > 0 &&
5268                 s->reg_to_temp[reg - 1] == NULL &&
5269                 tcg_regset_test_reg(i_required_regs, reg) &&
5270                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5271                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5272                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5273                 break;
5274             }
5275             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5276                                      i_allocated_regs, 0,
5277                                      ts->indirect_base);
5278             tcg_regset_set_reg(i_allocated_regs, reg);
5279             reg += 1;
5280             goto do_pair;
5281 
5282         do_pair:
5283             /*
5284              * If an aliased input is not dead after the instruction,
5285              * we must allocate a new register and move it.
5286              */
5287             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5288                 TCGRegSet t_allocated_regs = i_allocated_regs;
5289 
5290                 /*
5291                  * Because of the alias, and the continued life, make sure
5292                  * that the temp is somewhere *other* than the reg pair,
5293                  * and we get a copy in reg.
5294                  */
5295                 tcg_regset_set_reg(t_allocated_regs, reg);
5296                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5297                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5298                     /* If ts was already in reg, copy it somewhere else. */
5299                     TCGReg nr;
5300                     bool ok;
5301 
5302                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5303                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5304                                        t_allocated_regs, 0, ts->indirect_base);
5305                     ok = tcg_out_mov(s, ts->type, nr, reg);
5306                     tcg_debug_assert(ok);
5307 
5308                     set_temp_val_reg(s, ts, nr);
5309                 } else {
5310                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5311                               t_allocated_regs, 0);
5312                     copyto_new_reg = true;
5313                 }
5314             } else {
5315                 /* Preferably allocate to reg, otherwise copy. */
5316                 i_required_regs = (TCGRegSet)1 << reg;
5317                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5318                           i_preferred_regs);
5319                 copyto_new_reg = ts->reg != reg;
5320             }
5321             break;
5322 
5323         default:
5324             g_assert_not_reached();
5325         }
5326 
5327         if (copyto_new_reg) {
5328             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5329                 /*
5330                  * Cross register class move not supported.  Sync the
5331                  * temp back to its slot and load from there.
5332                  */
5333                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5334                 tcg_out_ld(s, ts->type, reg,
5335                            ts->mem_base->reg, ts->mem_offset);
5336             }
5337         }
5338         new_args[i] = reg;
5339         const_args[i] = 0;
5340         tcg_regset_set_reg(i_allocated_regs, reg);
5341     }
5342 
5343     /* mark dead temporaries and free the associated registers */
5344     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5345         if (IS_DEAD_ARG(i)) {
5346             temp_dead(s, arg_temp(op->args[i]));
5347         }
5348     }
5349 
5350     if (def->flags & TCG_OPF_COND_BRANCH) {
5351         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5352     } else if (def->flags & TCG_OPF_BB_END) {
5353         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5354     } else {
5355         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5356             /* XXX: permit generic clobber register list ? */
5357             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5358                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5359                     tcg_reg_free(s, i, i_allocated_regs);
5360                 }
5361             }
5362         }
5363         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5364             /* sync globals if the op has side effects and might trigger
5365                an exception. */
5366             sync_globals(s, i_allocated_regs);
5367         }
5368 
5369         /* satisfy the output constraints */
5370         for (k = 0; k < nb_oargs; k++) {
5371             i = args_ct[k].sort_index;
5372             arg = op->args[i];
5373             arg_ct = &args_ct[i];
5374             ts = arg_temp(arg);
5375 
5376             /* ENV should not be modified.  */
5377             tcg_debug_assert(!temp_readonly(ts));
5378 
5379             switch (arg_ct->pair) {
5380             case 0: /* not paired */
5381                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5382                     reg = new_args[arg_ct->alias_index];
5383                 } else if (arg_ct->newreg) {
5384                     reg = tcg_reg_alloc(s, arg_ct->regs,
5385                                         i_allocated_regs | o_allocated_regs,
5386                                         output_pref(op, k), ts->indirect_base);
5387                 } else {
5388                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5389                                         output_pref(op, k), ts->indirect_base);
5390                 }
5391                 break;
5392 
5393             case 1: /* first of pair */
5394                 if (arg_ct->oalias) {
5395                     reg = new_args[arg_ct->alias_index];
5396                 } else if (arg_ct->newreg) {
5397                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5398                                              i_allocated_regs | o_allocated_regs,
5399                                              output_pref(op, k),
5400                                              ts->indirect_base);
5401                 } else {
5402                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5403                                              output_pref(op, k),
5404                                              ts->indirect_base);
5405                 }
5406                 break;
5407 
5408             case 2: /* second of pair */
5409                 if (arg_ct->oalias) {
5410                     reg = new_args[arg_ct->alias_index];
5411                 } else {
5412                     reg = new_args[arg_ct->pair_index] + 1;
5413                 }
5414                 break;
5415 
5416             case 3: /* first of pair, aliasing with a second input */
5417                 tcg_debug_assert(!arg_ct->newreg);
5418                 reg = new_args[arg_ct->pair_index] - 1;
5419                 break;
5420 
5421             default:
5422                 g_assert_not_reached();
5423             }
5424             tcg_regset_set_reg(o_allocated_regs, reg);
5425             set_temp_val_reg(s, ts, reg);
5426             ts->mem_coherent = 0;
5427             new_args[i] = reg;
5428         }
5429     }
5430 
5431     /* emit instruction */
5432     TCGType type = TCGOP_TYPE(op);
5433     switch (op->opc) {
5434     case INDEX_op_ext_i32_i64:
5435         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5436         break;
5437     case INDEX_op_extu_i32_i64:
5438         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5439         break;
5440     case INDEX_op_extrl_i64_i32:
5441         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5442         break;
5443 
5444     case INDEX_op_add:
5445     case INDEX_op_and:
5446         {
5447             const TCGOutOpBinary *out =
5448                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5449 
5450             /* Constants should never appear in the first source operand. */
5451             tcg_debug_assert(!const_args[1]);
5452             if (const_args[2]) {
5453                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5454             } else {
5455                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5456             }
5457         }
5458         break;
5459 
5460     default:
5461         if (def->flags & TCG_OPF_VECTOR) {
5462             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5463                            TCGOP_VECE(op), new_args, const_args);
5464         } else {
5465             tcg_out_op(s, op->opc, type, new_args, const_args);
5466         }
5467         break;
5468     }
5469 
5470     /* move the outputs in the correct register if needed */
5471     for(i = 0; i < nb_oargs; i++) {
5472         ts = arg_temp(op->args[i]);
5473 
5474         /* ENV should not be modified.  */
5475         tcg_debug_assert(!temp_readonly(ts));
5476 
5477         if (NEED_SYNC_ARG(i)) {
5478             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5479         } else if (IS_DEAD_ARG(i)) {
5480             temp_dead(s, ts);
5481         }
5482     }
5483 }
5484 
5485 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5486 {
5487     const TCGLifeData arg_life = op->life;
5488     TCGTemp *ots, *itsl, *itsh;
5489     TCGType vtype = TCGOP_TYPE(op);
5490 
5491     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5492     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5493     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5494 
5495     ots = arg_temp(op->args[0]);
5496     itsl = arg_temp(op->args[1]);
5497     itsh = arg_temp(op->args[2]);
5498 
5499     /* ENV should not be modified.  */
5500     tcg_debug_assert(!temp_readonly(ots));
5501 
5502     /* Allocate the output register now.  */
5503     if (ots->val_type != TEMP_VAL_REG) {
5504         TCGRegSet allocated_regs = s->reserved_regs;
5505         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5506         TCGReg oreg;
5507 
5508         /* Make sure to not spill the input registers. */
5509         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5510             tcg_regset_set_reg(allocated_regs, itsl->reg);
5511         }
5512         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5513             tcg_regset_set_reg(allocated_regs, itsh->reg);
5514         }
5515 
5516         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5517                              output_pref(op, 0), ots->indirect_base);
5518         set_temp_val_reg(s, ots, oreg);
5519     }
5520 
5521     /* Promote dup2 of immediates to dupi_vec. */
5522     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5523         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5524         MemOp vece = MO_64;
5525 
5526         if (val == dup_const(MO_8, val)) {
5527             vece = MO_8;
5528         } else if (val == dup_const(MO_16, val)) {
5529             vece = MO_16;
5530         } else if (val == dup_const(MO_32, val)) {
5531             vece = MO_32;
5532         }
5533 
5534         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5535         goto done;
5536     }
5537 
5538     /* If the two inputs form one 64-bit value, try dupm_vec. */
5539     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5540         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5541         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5542         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5543 
5544         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5545         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5546 
5547         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5548                              its->mem_base->reg, its->mem_offset)) {
5549             goto done;
5550         }
5551     }
5552 
5553     /* Fall back to generic expansion. */
5554     return false;
5555 
5556  done:
5557     ots->mem_coherent = 0;
5558     if (IS_DEAD_ARG(1)) {
5559         temp_dead(s, itsl);
5560     }
5561     if (IS_DEAD_ARG(2)) {
5562         temp_dead(s, itsh);
5563     }
5564     if (NEED_SYNC_ARG(0)) {
5565         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5566     } else if (IS_DEAD_ARG(0)) {
5567         temp_dead(s, ots);
5568     }
5569     return true;
5570 }
5571 
5572 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5573                          TCGRegSet allocated_regs)
5574 {
5575     if (ts->val_type == TEMP_VAL_REG) {
5576         if (ts->reg != reg) {
5577             tcg_reg_free(s, reg, allocated_regs);
5578             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5579                 /*
5580                  * Cross register class move not supported.  Sync the
5581                  * temp back to its slot and load from there.
5582                  */
5583                 temp_sync(s, ts, allocated_regs, 0, 0);
5584                 tcg_out_ld(s, ts->type, reg,
5585                            ts->mem_base->reg, ts->mem_offset);
5586             }
5587         }
5588     } else {
5589         TCGRegSet arg_set = 0;
5590 
5591         tcg_reg_free(s, reg, allocated_regs);
5592         tcg_regset_set_reg(arg_set, reg);
5593         temp_load(s, ts, arg_set, allocated_regs, 0);
5594     }
5595 }
5596 
5597 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5598                          TCGRegSet allocated_regs)
5599 {
5600     /*
5601      * When the destination is on the stack, load up the temp and store.
5602      * If there are many call-saved registers, the temp might live to
5603      * see another use; otherwise it'll be discarded.
5604      */
5605     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5606     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5607                arg_slot_stk_ofs(arg_slot));
5608 }
5609 
5610 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5611                             TCGTemp *ts, TCGRegSet *allocated_regs)
5612 {
5613     if (arg_slot_reg_p(l->arg_slot)) {
5614         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5615         load_arg_reg(s, reg, ts, *allocated_regs);
5616         tcg_regset_set_reg(*allocated_regs, reg);
5617     } else {
5618         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5619     }
5620 }
5621 
5622 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5623                          intptr_t ref_off, TCGRegSet *allocated_regs)
5624 {
5625     TCGReg reg;
5626 
5627     if (arg_slot_reg_p(arg_slot)) {
5628         reg = tcg_target_call_iarg_regs[arg_slot];
5629         tcg_reg_free(s, reg, *allocated_regs);
5630         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5631         tcg_regset_set_reg(*allocated_regs, reg);
5632     } else {
5633         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5634                             *allocated_regs, 0, false);
5635         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5636         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5637                    arg_slot_stk_ofs(arg_slot));
5638     }
5639 }
5640 
5641 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5642 {
5643     const int nb_oargs = TCGOP_CALLO(op);
5644     const int nb_iargs = TCGOP_CALLI(op);
5645     const TCGLifeData arg_life = op->life;
5646     const TCGHelperInfo *info = tcg_call_info(op);
5647     TCGRegSet allocated_regs = s->reserved_regs;
5648     int i;
5649 
5650     /*
5651      * Move inputs into place in reverse order,
5652      * so that we place stacked arguments first.
5653      */
5654     for (i = nb_iargs - 1; i >= 0; --i) {
5655         const TCGCallArgumentLoc *loc = &info->in[i];
5656         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5657 
5658         switch (loc->kind) {
5659         case TCG_CALL_ARG_NORMAL:
5660         case TCG_CALL_ARG_EXTEND_U:
5661         case TCG_CALL_ARG_EXTEND_S:
5662             load_arg_normal(s, loc, ts, &allocated_regs);
5663             break;
5664         case TCG_CALL_ARG_BY_REF:
5665             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5666             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5667                          arg_slot_stk_ofs(loc->ref_slot),
5668                          &allocated_regs);
5669             break;
5670         case TCG_CALL_ARG_BY_REF_N:
5671             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5672             break;
5673         default:
5674             g_assert_not_reached();
5675         }
5676     }
5677 
5678     /* Mark dead temporaries and free the associated registers.  */
5679     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5680         if (IS_DEAD_ARG(i)) {
5681             temp_dead(s, arg_temp(op->args[i]));
5682         }
5683     }
5684 
5685     /* Clobber call registers.  */
5686     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5687         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5688             tcg_reg_free(s, i, allocated_regs);
5689         }
5690     }
5691 
5692     /*
5693      * Save globals if they might be written by the helper,
5694      * sync them if they might be read.
5695      */
5696     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5697         /* Nothing to do */
5698     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5699         sync_globals(s, allocated_regs);
5700     } else {
5701         save_globals(s, allocated_regs);
5702     }
5703 
5704     /*
5705      * If the ABI passes a pointer to the returned struct as the first
5706      * argument, load that now.  Pass a pointer to the output home slot.
5707      */
5708     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5709         TCGTemp *ts = arg_temp(op->args[0]);
5710 
5711         if (!ts->mem_allocated) {
5712             temp_allocate_frame(s, ts);
5713         }
5714         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5715     }
5716 
5717     tcg_out_call(s, tcg_call_func(op), info);
5718 
5719     /* Assign output registers and emit moves if needed.  */
5720     switch (info->out_kind) {
5721     case TCG_CALL_RET_NORMAL:
5722         for (i = 0; i < nb_oargs; i++) {
5723             TCGTemp *ts = arg_temp(op->args[i]);
5724             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5725 
5726             /* ENV should not be modified.  */
5727             tcg_debug_assert(!temp_readonly(ts));
5728 
5729             set_temp_val_reg(s, ts, reg);
5730             ts->mem_coherent = 0;
5731         }
5732         break;
5733 
5734     case TCG_CALL_RET_BY_VEC:
5735         {
5736             TCGTemp *ts = arg_temp(op->args[0]);
5737 
5738             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5739             tcg_debug_assert(ts->temp_subindex == 0);
5740             if (!ts->mem_allocated) {
5741                 temp_allocate_frame(s, ts);
5742             }
5743             tcg_out_st(s, TCG_TYPE_V128,
5744                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5745                        ts->mem_base->reg, ts->mem_offset);
5746         }
5747         /* fall through to mark all parts in memory */
5748 
5749     case TCG_CALL_RET_BY_REF:
5750         /* The callee has performed a write through the reference. */
5751         for (i = 0; i < nb_oargs; i++) {
5752             TCGTemp *ts = arg_temp(op->args[i]);
5753             ts->val_type = TEMP_VAL_MEM;
5754         }
5755         break;
5756 
5757     default:
5758         g_assert_not_reached();
5759     }
5760 
5761     /* Flush or discard output registers as needed. */
5762     for (i = 0; i < nb_oargs; i++) {
5763         TCGTemp *ts = arg_temp(op->args[i]);
5764         if (NEED_SYNC_ARG(i)) {
5765             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5766         } else if (IS_DEAD_ARG(i)) {
5767             temp_dead(s, ts);
5768         }
5769     }
5770 }
5771 
5772 /**
5773  * atom_and_align_for_opc:
5774  * @s: tcg context
5775  * @opc: memory operation code
5776  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5777  * @allow_two_ops: true if we are prepared to issue two operations
5778  *
5779  * Return the alignment and atomicity to use for the inline fast path
5780  * for the given memory operation.  The alignment may be larger than
5781  * that specified in @opc, and the correct alignment will be diagnosed
5782  * by the slow path helper.
5783  *
5784  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5785  * and issue two loads or stores for subalignment.
5786  */
5787 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5788                                            MemOp host_atom, bool allow_two_ops)
5789 {
5790     MemOp align = memop_alignment_bits(opc);
5791     MemOp size = opc & MO_SIZE;
5792     MemOp half = size ? size - 1 : 0;
5793     MemOp atom = opc & MO_ATOM_MASK;
5794     MemOp atmax;
5795 
5796     switch (atom) {
5797     case MO_ATOM_NONE:
5798         /* The operation requires no specific atomicity. */
5799         atmax = MO_8;
5800         break;
5801 
5802     case MO_ATOM_IFALIGN:
5803         atmax = size;
5804         break;
5805 
5806     case MO_ATOM_IFALIGN_PAIR:
5807         atmax = half;
5808         break;
5809 
5810     case MO_ATOM_WITHIN16:
5811         atmax = size;
5812         if (size == MO_128) {
5813             /* Misalignment implies !within16, and therefore no atomicity. */
5814         } else if (host_atom != MO_ATOM_WITHIN16) {
5815             /* The host does not implement within16, so require alignment. */
5816             align = MAX(align, size);
5817         }
5818         break;
5819 
5820     case MO_ATOM_WITHIN16_PAIR:
5821         atmax = size;
5822         /*
5823          * Misalignment implies !within16, and therefore half atomicity.
5824          * Any host prepared for two operations can implement this with
5825          * half alignment.
5826          */
5827         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5828             align = MAX(align, half);
5829         }
5830         break;
5831 
5832     case MO_ATOM_SUBALIGN:
5833         atmax = size;
5834         if (host_atom != MO_ATOM_SUBALIGN) {
5835             /* If unaligned but not odd, there are subobjects up to half. */
5836             if (allow_two_ops) {
5837                 align = MAX(align, half);
5838             } else {
5839                 align = MAX(align, size);
5840             }
5841         }
5842         break;
5843 
5844     default:
5845         g_assert_not_reached();
5846     }
5847 
5848     return (TCGAtomAlign){ .atom = atmax, .align = align };
5849 }
5850 
5851 /*
5852  * Similarly for qemu_ld/st slow path helpers.
5853  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5854  * using only the provided backend tcg_out_* functions.
5855  */
5856 
5857 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5858 {
5859     int ofs = arg_slot_stk_ofs(slot);
5860 
5861     /*
5862      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5863      * require extension to uint64_t, adjust the address for uint32_t.
5864      */
5865     if (HOST_BIG_ENDIAN &&
5866         TCG_TARGET_REG_BITS == 64 &&
5867         type == TCG_TYPE_I32) {
5868         ofs += 4;
5869     }
5870     return ofs;
5871 }
5872 
5873 static void tcg_out_helper_load_slots(TCGContext *s,
5874                                       unsigned nmov, TCGMovExtend *mov,
5875                                       const TCGLdstHelperParam *parm)
5876 {
5877     unsigned i;
5878     TCGReg dst3;
5879 
5880     /*
5881      * Start from the end, storing to the stack first.
5882      * This frees those registers, so we need not consider overlap.
5883      */
5884     for (i = nmov; i-- > 0; ) {
5885         unsigned slot = mov[i].dst;
5886 
5887         if (arg_slot_reg_p(slot)) {
5888             goto found_reg;
5889         }
5890 
5891         TCGReg src = mov[i].src;
5892         TCGType dst_type = mov[i].dst_type;
5893         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5894 
5895         /* The argument is going onto the stack; extend into scratch. */
5896         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5897             tcg_debug_assert(parm->ntmp != 0);
5898             mov[i].dst = src = parm->tmp[0];
5899             tcg_out_movext1(s, &mov[i]);
5900         }
5901 
5902         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5903                    tcg_out_helper_stk_ofs(dst_type, slot));
5904     }
5905     return;
5906 
5907  found_reg:
5908     /*
5909      * The remaining arguments are in registers.
5910      * Convert slot numbers to argument registers.
5911      */
5912     nmov = i + 1;
5913     for (i = 0; i < nmov; ++i) {
5914         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5915     }
5916 
5917     switch (nmov) {
5918     case 4:
5919         /* The backend must have provided enough temps for the worst case. */
5920         tcg_debug_assert(parm->ntmp >= 2);
5921 
5922         dst3 = mov[3].dst;
5923         for (unsigned j = 0; j < 3; ++j) {
5924             if (dst3 == mov[j].src) {
5925                 /*
5926                  * Conflict. Copy the source to a temporary, perform the
5927                  * remaining moves, then the extension from our scratch
5928                  * on the way out.
5929                  */
5930                 TCGReg scratch = parm->tmp[1];
5931 
5932                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5933                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5934                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5935                 break;
5936             }
5937         }
5938 
5939         /* No conflicts: perform this move and continue. */
5940         tcg_out_movext1(s, &mov[3]);
5941         /* fall through */
5942 
5943     case 3:
5944         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5945                         parm->ntmp ? parm->tmp[0] : -1);
5946         break;
5947     case 2:
5948         tcg_out_movext2(s, mov, mov + 1,
5949                         parm->ntmp ? parm->tmp[0] : -1);
5950         break;
5951     case 1:
5952         tcg_out_movext1(s, mov);
5953         break;
5954     default:
5955         g_assert_not_reached();
5956     }
5957 }
5958 
5959 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5960                                     TCGType type, tcg_target_long imm,
5961                                     const TCGLdstHelperParam *parm)
5962 {
5963     if (arg_slot_reg_p(slot)) {
5964         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5965     } else {
5966         int ofs = tcg_out_helper_stk_ofs(type, slot);
5967         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5968             tcg_debug_assert(parm->ntmp != 0);
5969             tcg_out_movi(s, type, parm->tmp[0], imm);
5970             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5971         }
5972     }
5973 }
5974 
5975 static void tcg_out_helper_load_common_args(TCGContext *s,
5976                                             const TCGLabelQemuLdst *ldst,
5977                                             const TCGLdstHelperParam *parm,
5978                                             const TCGHelperInfo *info,
5979                                             unsigned next_arg)
5980 {
5981     TCGMovExtend ptr_mov = {
5982         .dst_type = TCG_TYPE_PTR,
5983         .src_type = TCG_TYPE_PTR,
5984         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5985     };
5986     const TCGCallArgumentLoc *loc = &info->in[0];
5987     TCGType type;
5988     unsigned slot;
5989     tcg_target_ulong imm;
5990 
5991     /*
5992      * Handle env, which is always first.
5993      */
5994     ptr_mov.dst = loc->arg_slot;
5995     ptr_mov.src = TCG_AREG0;
5996     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5997 
5998     /*
5999      * Handle oi.
6000      */
6001     imm = ldst->oi;
6002     loc = &info->in[next_arg];
6003     type = TCG_TYPE_I32;
6004     switch (loc->kind) {
6005     case TCG_CALL_ARG_NORMAL:
6006         break;
6007     case TCG_CALL_ARG_EXTEND_U:
6008     case TCG_CALL_ARG_EXTEND_S:
6009         /* No extension required for MemOpIdx. */
6010         tcg_debug_assert(imm <= INT32_MAX);
6011         type = TCG_TYPE_REG;
6012         break;
6013     default:
6014         g_assert_not_reached();
6015     }
6016     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6017     next_arg++;
6018 
6019     /*
6020      * Handle ra.
6021      */
6022     loc = &info->in[next_arg];
6023     slot = loc->arg_slot;
6024     if (parm->ra_gen) {
6025         int arg_reg = -1;
6026         TCGReg ra_reg;
6027 
6028         if (arg_slot_reg_p(slot)) {
6029             arg_reg = tcg_target_call_iarg_regs[slot];
6030         }
6031         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6032 
6033         ptr_mov.dst = slot;
6034         ptr_mov.src = ra_reg;
6035         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6036     } else {
6037         imm = (uintptr_t)ldst->raddr;
6038         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6039     }
6040 }
6041 
6042 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6043                                        const TCGCallArgumentLoc *loc,
6044                                        TCGType dst_type, TCGType src_type,
6045                                        TCGReg lo, TCGReg hi)
6046 {
6047     MemOp reg_mo;
6048 
6049     if (dst_type <= TCG_TYPE_REG) {
6050         MemOp src_ext;
6051 
6052         switch (loc->kind) {
6053         case TCG_CALL_ARG_NORMAL:
6054             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6055             break;
6056         case TCG_CALL_ARG_EXTEND_U:
6057             dst_type = TCG_TYPE_REG;
6058             src_ext = MO_UL;
6059             break;
6060         case TCG_CALL_ARG_EXTEND_S:
6061             dst_type = TCG_TYPE_REG;
6062             src_ext = MO_SL;
6063             break;
6064         default:
6065             g_assert_not_reached();
6066         }
6067 
6068         mov[0].dst = loc->arg_slot;
6069         mov[0].dst_type = dst_type;
6070         mov[0].src = lo;
6071         mov[0].src_type = src_type;
6072         mov[0].src_ext = src_ext;
6073         return 1;
6074     }
6075 
6076     if (TCG_TARGET_REG_BITS == 32) {
6077         assert(dst_type == TCG_TYPE_I64);
6078         reg_mo = MO_32;
6079     } else {
6080         assert(dst_type == TCG_TYPE_I128);
6081         reg_mo = MO_64;
6082     }
6083 
6084     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6085     mov[0].src = lo;
6086     mov[0].dst_type = TCG_TYPE_REG;
6087     mov[0].src_type = TCG_TYPE_REG;
6088     mov[0].src_ext = reg_mo;
6089 
6090     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6091     mov[1].src = hi;
6092     mov[1].dst_type = TCG_TYPE_REG;
6093     mov[1].src_type = TCG_TYPE_REG;
6094     mov[1].src_ext = reg_mo;
6095 
6096     return 2;
6097 }
6098 
6099 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6100                                    const TCGLdstHelperParam *parm)
6101 {
6102     const TCGHelperInfo *info;
6103     const TCGCallArgumentLoc *loc;
6104     TCGMovExtend mov[2];
6105     unsigned next_arg, nmov;
6106     MemOp mop = get_memop(ldst->oi);
6107 
6108     switch (mop & MO_SIZE) {
6109     case MO_8:
6110     case MO_16:
6111     case MO_32:
6112         info = &info_helper_ld32_mmu;
6113         break;
6114     case MO_64:
6115         info = &info_helper_ld64_mmu;
6116         break;
6117     case MO_128:
6118         info = &info_helper_ld128_mmu;
6119         break;
6120     default:
6121         g_assert_not_reached();
6122     }
6123 
6124     /* Defer env argument. */
6125     next_arg = 1;
6126 
6127     loc = &info->in[next_arg];
6128     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6129         /*
6130          * 32-bit host with 32-bit guest: zero-extend the guest address
6131          * to 64-bits for the helper by storing the low part, then
6132          * load a zero for the high part.
6133          */
6134         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6135                                TCG_TYPE_I32, TCG_TYPE_I32,
6136                                ldst->addr_reg, -1);
6137         tcg_out_helper_load_slots(s, 1, mov, parm);
6138 
6139         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6140                                 TCG_TYPE_I32, 0, parm);
6141         next_arg += 2;
6142     } else {
6143         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6144                                       ldst->addr_reg, -1);
6145         tcg_out_helper_load_slots(s, nmov, mov, parm);
6146         next_arg += nmov;
6147     }
6148 
6149     switch (info->out_kind) {
6150     case TCG_CALL_RET_NORMAL:
6151     case TCG_CALL_RET_BY_VEC:
6152         break;
6153     case TCG_CALL_RET_BY_REF:
6154         /*
6155          * The return reference is in the first argument slot.
6156          * We need memory in which to return: re-use the top of stack.
6157          */
6158         {
6159             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6160 
6161             if (arg_slot_reg_p(0)) {
6162                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6163                                  TCG_REG_CALL_STACK, ofs_slot0);
6164             } else {
6165                 tcg_debug_assert(parm->ntmp != 0);
6166                 tcg_out_addi_ptr(s, parm->tmp[0],
6167                                  TCG_REG_CALL_STACK, ofs_slot0);
6168                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6169                            TCG_REG_CALL_STACK, ofs_slot0);
6170             }
6171         }
6172         break;
6173     default:
6174         g_assert_not_reached();
6175     }
6176 
6177     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6178 }
6179 
6180 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6181                                   bool load_sign,
6182                                   const TCGLdstHelperParam *parm)
6183 {
6184     MemOp mop = get_memop(ldst->oi);
6185     TCGMovExtend mov[2];
6186     int ofs_slot0;
6187 
6188     switch (ldst->type) {
6189     case TCG_TYPE_I64:
6190         if (TCG_TARGET_REG_BITS == 32) {
6191             break;
6192         }
6193         /* fall through */
6194 
6195     case TCG_TYPE_I32:
6196         mov[0].dst = ldst->datalo_reg;
6197         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6198         mov[0].dst_type = ldst->type;
6199         mov[0].src_type = TCG_TYPE_REG;
6200 
6201         /*
6202          * If load_sign, then we allowed the helper to perform the
6203          * appropriate sign extension to tcg_target_ulong, and all
6204          * we need now is a plain move.
6205          *
6206          * If they do not, then we expect the relevant extension
6207          * instruction to be no more expensive than a move, and
6208          * we thus save the icache etc by only using one of two
6209          * helper functions.
6210          */
6211         if (load_sign || !(mop & MO_SIGN)) {
6212             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6213                 mov[0].src_ext = MO_32;
6214             } else {
6215                 mov[0].src_ext = MO_64;
6216             }
6217         } else {
6218             mov[0].src_ext = mop & MO_SSIZE;
6219         }
6220         tcg_out_movext1(s, mov);
6221         return;
6222 
6223     case TCG_TYPE_I128:
6224         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6225         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6226         switch (TCG_TARGET_CALL_RET_I128) {
6227         case TCG_CALL_RET_NORMAL:
6228             break;
6229         case TCG_CALL_RET_BY_VEC:
6230             tcg_out_st(s, TCG_TYPE_V128,
6231                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6232                        TCG_REG_CALL_STACK, ofs_slot0);
6233             /* fall through */
6234         case TCG_CALL_RET_BY_REF:
6235             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6236                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6237             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6238                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6239             return;
6240         default:
6241             g_assert_not_reached();
6242         }
6243         break;
6244 
6245     default:
6246         g_assert_not_reached();
6247     }
6248 
6249     mov[0].dst = ldst->datalo_reg;
6250     mov[0].src =
6251         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6252     mov[0].dst_type = TCG_TYPE_REG;
6253     mov[0].src_type = TCG_TYPE_REG;
6254     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6255 
6256     mov[1].dst = ldst->datahi_reg;
6257     mov[1].src =
6258         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6259     mov[1].dst_type = TCG_TYPE_REG;
6260     mov[1].src_type = TCG_TYPE_REG;
6261     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6262 
6263     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6264 }
6265 
6266 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6267                                    const TCGLdstHelperParam *parm)
6268 {
6269     const TCGHelperInfo *info;
6270     const TCGCallArgumentLoc *loc;
6271     TCGMovExtend mov[4];
6272     TCGType data_type;
6273     unsigned next_arg, nmov, n;
6274     MemOp mop = get_memop(ldst->oi);
6275 
6276     switch (mop & MO_SIZE) {
6277     case MO_8:
6278     case MO_16:
6279     case MO_32:
6280         info = &info_helper_st32_mmu;
6281         data_type = TCG_TYPE_I32;
6282         break;
6283     case MO_64:
6284         info = &info_helper_st64_mmu;
6285         data_type = TCG_TYPE_I64;
6286         break;
6287     case MO_128:
6288         info = &info_helper_st128_mmu;
6289         data_type = TCG_TYPE_I128;
6290         break;
6291     default:
6292         g_assert_not_reached();
6293     }
6294 
6295     /* Defer env argument. */
6296     next_arg = 1;
6297     nmov = 0;
6298 
6299     /* Handle addr argument. */
6300     loc = &info->in[next_arg];
6301     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6302     if (TCG_TARGET_REG_BITS == 32) {
6303         /*
6304          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6305          * to 64-bits for the helper by storing the low part.  Later,
6306          * after we have processed the register inputs, we will load a
6307          * zero for the high part.
6308          */
6309         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6310                                TCG_TYPE_I32, TCG_TYPE_I32,
6311                                ldst->addr_reg, -1);
6312         next_arg += 2;
6313         nmov += 1;
6314     } else {
6315         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6316                                    ldst->addr_reg, -1);
6317         next_arg += n;
6318         nmov += n;
6319     }
6320 
6321     /* Handle data argument. */
6322     loc = &info->in[next_arg];
6323     switch (loc->kind) {
6324     case TCG_CALL_ARG_NORMAL:
6325     case TCG_CALL_ARG_EXTEND_U:
6326     case TCG_CALL_ARG_EXTEND_S:
6327         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6328                                    ldst->datalo_reg, ldst->datahi_reg);
6329         next_arg += n;
6330         nmov += n;
6331         tcg_out_helper_load_slots(s, nmov, mov, parm);
6332         break;
6333 
6334     case TCG_CALL_ARG_BY_REF:
6335         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6336         tcg_debug_assert(data_type == TCG_TYPE_I128);
6337         tcg_out_st(s, TCG_TYPE_I64,
6338                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6339                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6340         tcg_out_st(s, TCG_TYPE_I64,
6341                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6342                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6343 
6344         tcg_out_helper_load_slots(s, nmov, mov, parm);
6345 
6346         if (arg_slot_reg_p(loc->arg_slot)) {
6347             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6348                              TCG_REG_CALL_STACK,
6349                              arg_slot_stk_ofs(loc->ref_slot));
6350         } else {
6351             tcg_debug_assert(parm->ntmp != 0);
6352             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6353                              arg_slot_stk_ofs(loc->ref_slot));
6354             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6355                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6356         }
6357         next_arg += 2;
6358         break;
6359 
6360     default:
6361         g_assert_not_reached();
6362     }
6363 
6364     if (TCG_TARGET_REG_BITS == 32) {
6365         /* Zero extend the address by loading a zero for the high part. */
6366         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6367         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6368     }
6369 
6370     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6371 }
6372 
6373 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6374 {
6375     int i, start_words, num_insns;
6376     TCGOp *op;
6377 
6378     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6379                  && qemu_log_in_addr_range(pc_start))) {
6380         FILE *logfile = qemu_log_trylock();
6381         if (logfile) {
6382             fprintf(logfile, "OP:\n");
6383             tcg_dump_ops(s, logfile, false);
6384             fprintf(logfile, "\n");
6385             qemu_log_unlock(logfile);
6386         }
6387     }
6388 
6389 #ifdef CONFIG_DEBUG_TCG
6390     /* Ensure all labels referenced have been emitted.  */
6391     {
6392         TCGLabel *l;
6393         bool error = false;
6394 
6395         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6396             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6397                 qemu_log_mask(CPU_LOG_TB_OP,
6398                               "$L%d referenced but not present.\n", l->id);
6399                 error = true;
6400             }
6401         }
6402         assert(!error);
6403     }
6404 #endif
6405 
6406     /* Do not reuse any EBB that may be allocated within the TB. */
6407     tcg_temp_ebb_reset_freed(s);
6408 
6409     tcg_optimize(s);
6410 
6411     reachable_code_pass(s);
6412     liveness_pass_0(s);
6413     liveness_pass_1(s);
6414 
6415     if (s->nb_indirects > 0) {
6416         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6417                      && qemu_log_in_addr_range(pc_start))) {
6418             FILE *logfile = qemu_log_trylock();
6419             if (logfile) {
6420                 fprintf(logfile, "OP before indirect lowering:\n");
6421                 tcg_dump_ops(s, logfile, false);
6422                 fprintf(logfile, "\n");
6423                 qemu_log_unlock(logfile);
6424             }
6425         }
6426 
6427         /* Replace indirect temps with direct temps.  */
6428         if (liveness_pass_2(s)) {
6429             /* If changes were made, re-run liveness.  */
6430             liveness_pass_1(s);
6431         }
6432     }
6433 
6434     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6435                  && qemu_log_in_addr_range(pc_start))) {
6436         FILE *logfile = qemu_log_trylock();
6437         if (logfile) {
6438             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6439             tcg_dump_ops(s, logfile, true);
6440             fprintf(logfile, "\n");
6441             qemu_log_unlock(logfile);
6442         }
6443     }
6444 
6445     /* Initialize goto_tb jump offsets. */
6446     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6447     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6448     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6449     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6450 
6451     tcg_reg_alloc_start(s);
6452 
6453     /*
6454      * Reset the buffer pointers when restarting after overflow.
6455      * TODO: Move this into translate-all.c with the rest of the
6456      * buffer management.  Having only this done here is confusing.
6457      */
6458     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6459     s->code_ptr = s->code_buf;
6460     s->data_gen_ptr = NULL;
6461 
6462     QSIMPLEQ_INIT(&s->ldst_labels);
6463     s->pool_labels = NULL;
6464 
6465     start_words = s->insn_start_words;
6466     s->gen_insn_data =
6467         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6468 
6469     tcg_out_tb_start(s);
6470 
6471     num_insns = -1;
6472     QTAILQ_FOREACH(op, &s->ops, link) {
6473         TCGOpcode opc = op->opc;
6474 
6475         switch (opc) {
6476         case INDEX_op_mov:
6477         case INDEX_op_mov_vec:
6478             tcg_reg_alloc_mov(s, op);
6479             break;
6480         case INDEX_op_dup_vec:
6481             tcg_reg_alloc_dup(s, op);
6482             break;
6483         case INDEX_op_insn_start:
6484             if (num_insns >= 0) {
6485                 size_t off = tcg_current_code_size(s);
6486                 s->gen_insn_end_off[num_insns] = off;
6487                 /* Assert that we do not overflow our stored offset.  */
6488                 assert(s->gen_insn_end_off[num_insns] == off);
6489             }
6490             num_insns++;
6491             for (i = 0; i < start_words; ++i) {
6492                 s->gen_insn_data[num_insns * start_words + i] =
6493                     tcg_get_insn_start_param(op, i);
6494             }
6495             break;
6496         case INDEX_op_discard:
6497             temp_dead(s, arg_temp(op->args[0]));
6498             break;
6499         case INDEX_op_set_label:
6500             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6501             tcg_out_label(s, arg_label(op->args[0]));
6502             break;
6503         case INDEX_op_call:
6504             tcg_reg_alloc_call(s, op);
6505             break;
6506         case INDEX_op_exit_tb:
6507             tcg_out_exit_tb(s, op->args[0]);
6508             break;
6509         case INDEX_op_goto_tb:
6510             tcg_out_goto_tb(s, op->args[0]);
6511             break;
6512         case INDEX_op_dup2_vec:
6513             if (tcg_reg_alloc_dup2(s, op)) {
6514                 break;
6515             }
6516             /* fall through */
6517         default:
6518             /* Sanity check that we've not introduced any unhandled opcodes. */
6519             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6520                                               TCGOP_FLAGS(op)));
6521             /* Note: in order to speed up the code, it would be much
6522                faster to have specialized register allocator functions for
6523                some common argument patterns */
6524             tcg_reg_alloc_op(s, op);
6525             break;
6526         }
6527         /* Test for (pending) buffer overflow.  The assumption is that any
6528            one operation beginning below the high water mark cannot overrun
6529            the buffer completely.  Thus we can test for overflow after
6530            generating code without having to check during generation.  */
6531         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6532             return -1;
6533         }
6534         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6535         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6536             return -2;
6537         }
6538     }
6539     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6540     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6541 
6542     /* Generate TB finalization at the end of block */
6543     i = tcg_out_ldst_finalize(s);
6544     if (i < 0) {
6545         return i;
6546     }
6547     i = tcg_out_pool_finalize(s);
6548     if (i < 0) {
6549         return i;
6550     }
6551     if (!tcg_resolve_relocs(s)) {
6552         return -2;
6553     }
6554 
6555 #ifndef CONFIG_TCG_INTERPRETER
6556     /* flush instruction cache */
6557     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6558                         (uintptr_t)s->code_buf,
6559                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6560 #endif
6561 
6562     return tcg_current_code_size(s);
6563 }
6564 
6565 #ifdef ELF_HOST_MACHINE
6566 /* In order to use this feature, the backend needs to do three things:
6567 
6568    (1) Define ELF_HOST_MACHINE to indicate both what value to
6569        put into the ELF image and to indicate support for the feature.
6570 
6571    (2) Define tcg_register_jit.  This should create a buffer containing
6572        the contents of a .debug_frame section that describes the post-
6573        prologue unwind info for the tcg machine.
6574 
6575    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6576 */
6577 
6578 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6579 typedef enum {
6580     JIT_NOACTION = 0,
6581     JIT_REGISTER_FN,
6582     JIT_UNREGISTER_FN
6583 } jit_actions_t;
6584 
6585 struct jit_code_entry {
6586     struct jit_code_entry *next_entry;
6587     struct jit_code_entry *prev_entry;
6588     const void *symfile_addr;
6589     uint64_t symfile_size;
6590 };
6591 
6592 struct jit_descriptor {
6593     uint32_t version;
6594     uint32_t action_flag;
6595     struct jit_code_entry *relevant_entry;
6596     struct jit_code_entry *first_entry;
6597 };
6598 
6599 void __jit_debug_register_code(void) __attribute__((noinline));
6600 void __jit_debug_register_code(void)
6601 {
6602     asm("");
6603 }
6604 
6605 /* Must statically initialize the version, because GDB may check
6606    the version before we can set it.  */
6607 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6608 
6609 /* End GDB interface.  */
6610 
6611 static int find_string(const char *strtab, const char *str)
6612 {
6613     const char *p = strtab + 1;
6614 
6615     while (1) {
6616         if (strcmp(p, str) == 0) {
6617             return p - strtab;
6618         }
6619         p += strlen(p) + 1;
6620     }
6621 }
6622 
6623 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6624                                  const void *debug_frame,
6625                                  size_t debug_frame_size)
6626 {
6627     struct __attribute__((packed)) DebugInfo {
6628         uint32_t  len;
6629         uint16_t  version;
6630         uint32_t  abbrev;
6631         uint8_t   ptr_size;
6632         uint8_t   cu_die;
6633         uint16_t  cu_lang;
6634         uintptr_t cu_low_pc;
6635         uintptr_t cu_high_pc;
6636         uint8_t   fn_die;
6637         char      fn_name[16];
6638         uintptr_t fn_low_pc;
6639         uintptr_t fn_high_pc;
6640         uint8_t   cu_eoc;
6641     };
6642 
6643     struct ElfImage {
6644         ElfW(Ehdr) ehdr;
6645         ElfW(Phdr) phdr;
6646         ElfW(Shdr) shdr[7];
6647         ElfW(Sym)  sym[2];
6648         struct DebugInfo di;
6649         uint8_t    da[24];
6650         char       str[80];
6651     };
6652 
6653     struct ElfImage *img;
6654 
6655     static const struct ElfImage img_template = {
6656         .ehdr = {
6657             .e_ident[EI_MAG0] = ELFMAG0,
6658             .e_ident[EI_MAG1] = ELFMAG1,
6659             .e_ident[EI_MAG2] = ELFMAG2,
6660             .e_ident[EI_MAG3] = ELFMAG3,
6661             .e_ident[EI_CLASS] = ELF_CLASS,
6662             .e_ident[EI_DATA] = ELF_DATA,
6663             .e_ident[EI_VERSION] = EV_CURRENT,
6664             .e_type = ET_EXEC,
6665             .e_machine = ELF_HOST_MACHINE,
6666             .e_version = EV_CURRENT,
6667             .e_phoff = offsetof(struct ElfImage, phdr),
6668             .e_shoff = offsetof(struct ElfImage, shdr),
6669             .e_ehsize = sizeof(ElfW(Shdr)),
6670             .e_phentsize = sizeof(ElfW(Phdr)),
6671             .e_phnum = 1,
6672             .e_shentsize = sizeof(ElfW(Shdr)),
6673             .e_shnum = ARRAY_SIZE(img->shdr),
6674             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6675 #ifdef ELF_HOST_FLAGS
6676             .e_flags = ELF_HOST_FLAGS,
6677 #endif
6678 #ifdef ELF_OSABI
6679             .e_ident[EI_OSABI] = ELF_OSABI,
6680 #endif
6681         },
6682         .phdr = {
6683             .p_type = PT_LOAD,
6684             .p_flags = PF_X,
6685         },
6686         .shdr = {
6687             [0] = { .sh_type = SHT_NULL },
6688             /* Trick: The contents of code_gen_buffer are not present in
6689                this fake ELF file; that got allocated elsewhere.  Therefore
6690                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6691                will not look for contents.  We can record any address.  */
6692             [1] = { /* .text */
6693                 .sh_type = SHT_NOBITS,
6694                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6695             },
6696             [2] = { /* .debug_info */
6697                 .sh_type = SHT_PROGBITS,
6698                 .sh_offset = offsetof(struct ElfImage, di),
6699                 .sh_size = sizeof(struct DebugInfo),
6700             },
6701             [3] = { /* .debug_abbrev */
6702                 .sh_type = SHT_PROGBITS,
6703                 .sh_offset = offsetof(struct ElfImage, da),
6704                 .sh_size = sizeof(img->da),
6705             },
6706             [4] = { /* .debug_frame */
6707                 .sh_type = SHT_PROGBITS,
6708                 .sh_offset = sizeof(struct ElfImage),
6709             },
6710             [5] = { /* .symtab */
6711                 .sh_type = SHT_SYMTAB,
6712                 .sh_offset = offsetof(struct ElfImage, sym),
6713                 .sh_size = sizeof(img->sym),
6714                 .sh_info = 1,
6715                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6716                 .sh_entsize = sizeof(ElfW(Sym)),
6717             },
6718             [6] = { /* .strtab */
6719                 .sh_type = SHT_STRTAB,
6720                 .sh_offset = offsetof(struct ElfImage, str),
6721                 .sh_size = sizeof(img->str),
6722             }
6723         },
6724         .sym = {
6725             [1] = { /* code_gen_buffer */
6726                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6727                 .st_shndx = 1,
6728             }
6729         },
6730         .di = {
6731             .len = sizeof(struct DebugInfo) - 4,
6732             .version = 2,
6733             .ptr_size = sizeof(void *),
6734             .cu_die = 1,
6735             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6736             .fn_die = 2,
6737             .fn_name = "code_gen_buffer"
6738         },
6739         .da = {
6740             1,          /* abbrev number (the cu) */
6741             0x11, 1,    /* DW_TAG_compile_unit, has children */
6742             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6743             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6744             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6745             0, 0,       /* end of abbrev */
6746             2,          /* abbrev number (the fn) */
6747             0x2e, 0,    /* DW_TAG_subprogram, no children */
6748             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6749             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6750             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6751             0, 0,       /* end of abbrev */
6752             0           /* no more abbrev */
6753         },
6754         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6755                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6756     };
6757 
6758     /* We only need a single jit entry; statically allocate it.  */
6759     static struct jit_code_entry one_entry;
6760 
6761     uintptr_t buf = (uintptr_t)buf_ptr;
6762     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6763     DebugFrameHeader *dfh;
6764 
6765     img = g_malloc(img_size);
6766     *img = img_template;
6767 
6768     img->phdr.p_vaddr = buf;
6769     img->phdr.p_paddr = buf;
6770     img->phdr.p_memsz = buf_size;
6771 
6772     img->shdr[1].sh_name = find_string(img->str, ".text");
6773     img->shdr[1].sh_addr = buf;
6774     img->shdr[1].sh_size = buf_size;
6775 
6776     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6777     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6778 
6779     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6780     img->shdr[4].sh_size = debug_frame_size;
6781 
6782     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6783     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6784 
6785     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6786     img->sym[1].st_value = buf;
6787     img->sym[1].st_size = buf_size;
6788 
6789     img->di.cu_low_pc = buf;
6790     img->di.cu_high_pc = buf + buf_size;
6791     img->di.fn_low_pc = buf;
6792     img->di.fn_high_pc = buf + buf_size;
6793 
6794     dfh = (DebugFrameHeader *)(img + 1);
6795     memcpy(dfh, debug_frame, debug_frame_size);
6796     dfh->fde.func_start = buf;
6797     dfh->fde.func_len = buf_size;
6798 
6799 #ifdef DEBUG_JIT
6800     /* Enable this block to be able to debug the ELF image file creation.
6801        One can use readelf, objdump, or other inspection utilities.  */
6802     {
6803         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6804         FILE *f = fopen(jit, "w+b");
6805         if (f) {
6806             if (fwrite(img, img_size, 1, f) != img_size) {
6807                 /* Avoid stupid unused return value warning for fwrite.  */
6808             }
6809             fclose(f);
6810         }
6811     }
6812 #endif
6813 
6814     one_entry.symfile_addr = img;
6815     one_entry.symfile_size = img_size;
6816 
6817     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6818     __jit_debug_descriptor.relevant_entry = &one_entry;
6819     __jit_debug_descriptor.first_entry = &one_entry;
6820     __jit_debug_register_code();
6821 }
6822 #else
6823 /* No support for the feature.  Provide the entry point expected by exec.c,
6824    and implement the internal function we declared earlier.  */
6825 
6826 static void tcg_register_jit_int(const void *buf, size_t size,
6827                                  const void *debug_frame,
6828                                  size_t debug_frame_size)
6829 {
6830 }
6831 
6832 void tcg_register_jit(const void *buf, size_t buf_size)
6833 {
6834 }
6835 #endif /* ELF_HOST_MACHINE */
6836 
6837 #if !TCG_TARGET_MAYBE_vec
6838 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6839 {
6840     g_assert_not_reached();
6841 }
6842 #endif
6843