xref: /openbmc/qemu/tcg/tcg.c (revision f441b4d19b289f55a378b8d033994f45a333b581)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
104     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
105     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
106     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
107     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
108     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
109     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
110 };
111 
112 static void tcg_register_jit_int(const void *buf, size_t size,
113                                  const void *debug_frame,
114                                  size_t debug_frame_size)
115     __attribute__((unused));
116 
117 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
118 static void tcg_out_tb_start(TCGContext *s);
119 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
120                        intptr_t arg2);
121 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
122 static void tcg_out_movi(TCGContext *s, TCGType type,
123                          TCGReg ret, tcg_target_long arg);
124 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
133 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
134 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
135 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
136 static void tcg_out_goto_tb(TCGContext *s, int which);
137 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
138                        const TCGArg args[TCG_MAX_OP_ARGS],
139                        const int const_args[TCG_MAX_OP_ARGS]);
140 #if TCG_TARGET_MAYBE_vec
141 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
142                             TCGReg dst, TCGReg src);
143 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
144                              TCGReg dst, TCGReg base, intptr_t offset);
145 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
146                              TCGReg dst, int64_t arg);
147 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
148                            unsigned vecl, unsigned vece,
149                            const TCGArg args[TCG_MAX_OP_ARGS],
150                            const int const_args[TCG_MAX_OP_ARGS]);
151 #else
152 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
153                                    TCGReg dst, TCGReg src)
154 {
155     g_assert_not_reached();
156 }
157 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
158                                     TCGReg dst, TCGReg base, intptr_t offset)
159 {
160     g_assert_not_reached();
161 }
162 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
163                                     TCGReg dst, int64_t arg)
164 {
165     g_assert_not_reached();
166 }
167 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
168                                   unsigned vecl, unsigned vece,
169                                   const TCGArg args[TCG_MAX_OP_ARGS],
170                                   const int const_args[TCG_MAX_OP_ARGS])
171 {
172     g_assert_not_reached();
173 }
174 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
175 {
176     return 0;
177 }
178 #endif
179 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
180                        intptr_t arg2);
181 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
182                         TCGReg base, intptr_t ofs);
183 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
184                          const TCGHelperInfo *info);
185 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
186 static bool tcg_target_const_match(int64_t val, int ct,
187                                    TCGType type, TCGCond cond, int vece);
188 
189 #ifndef CONFIG_USER_ONLY
190 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
191 #endif
192 
193 typedef struct TCGLdstHelperParam {
194     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
195     unsigned ntmp;
196     int tmp[3];
197 } TCGLdstHelperParam;
198 
199 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
200                                    const TCGLdstHelperParam *p)
201     __attribute__((unused));
202 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
203                                   bool load_sign, const TCGLdstHelperParam *p)
204     __attribute__((unused));
205 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
206                                    const TCGLdstHelperParam *p)
207     __attribute__((unused));
208 
209 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
210     [MO_UB] = helper_ldub_mmu,
211     [MO_SB] = helper_ldsb_mmu,
212     [MO_UW] = helper_lduw_mmu,
213     [MO_SW] = helper_ldsw_mmu,
214     [MO_UL] = helper_ldul_mmu,
215     [MO_UQ] = helper_ldq_mmu,
216 #if TCG_TARGET_REG_BITS == 64
217     [MO_SL] = helper_ldsl_mmu,
218     [MO_128] = helper_ld16_mmu,
219 #endif
220 };
221 
222 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
223     [MO_8]  = helper_stb_mmu,
224     [MO_16] = helper_stw_mmu,
225     [MO_32] = helper_stl_mmu,
226     [MO_64] = helper_stq_mmu,
227 #if TCG_TARGET_REG_BITS == 64
228     [MO_128] = helper_st16_mmu,
229 #endif
230 };
231 
232 typedef struct {
233     MemOp atom;   /* lg2 bits of atomicity required */
234     MemOp align;  /* lg2 bits of alignment to use */
235 } TCGAtomAlign;
236 
237 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
238                                            MemOp host_atom, bool allow_two_ops)
239     __attribute__((unused));
240 
241 #ifdef CONFIG_USER_ONLY
242 bool tcg_use_softmmu;
243 #endif
244 
245 TCGContext tcg_init_ctx;
246 __thread TCGContext *tcg_ctx;
247 
248 TCGContext **tcg_ctxs;
249 unsigned int tcg_cur_ctxs;
250 unsigned int tcg_max_ctxs;
251 TCGv_env tcg_env;
252 const void *tcg_code_gen_epilogue;
253 uintptr_t tcg_splitwx_diff;
254 
255 #ifndef CONFIG_TCG_INTERPRETER
256 tcg_prologue_fn *tcg_qemu_tb_exec;
257 #endif
258 
259 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
260 static TCGRegSet tcg_target_call_clobber_regs;
261 
262 #if TCG_TARGET_INSN_UNIT_SIZE == 1
263 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
264 {
265     *s->code_ptr++ = v;
266 }
267 
268 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
269                                                       uint8_t v)
270 {
271     *p = v;
272 }
273 #endif
274 
275 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
276 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
277 {
278     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
279         *s->code_ptr++ = v;
280     } else {
281         tcg_insn_unit *p = s->code_ptr;
282         memcpy(p, &v, sizeof(v));
283         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
284     }
285 }
286 
287 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
288                                                        uint16_t v)
289 {
290     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
291         *p = v;
292     } else {
293         memcpy(p, &v, sizeof(v));
294     }
295 }
296 #endif
297 
298 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
299 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
300 {
301     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
302         *s->code_ptr++ = v;
303     } else {
304         tcg_insn_unit *p = s->code_ptr;
305         memcpy(p, &v, sizeof(v));
306         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
307     }
308 }
309 
310 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
311                                                        uint32_t v)
312 {
313     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
314         *p = v;
315     } else {
316         memcpy(p, &v, sizeof(v));
317     }
318 }
319 #endif
320 
321 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
322 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
323 {
324     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
325         *s->code_ptr++ = v;
326     } else {
327         tcg_insn_unit *p = s->code_ptr;
328         memcpy(p, &v, sizeof(v));
329         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
330     }
331 }
332 
333 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
334                                                        uint64_t v)
335 {
336     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
337         *p = v;
338     } else {
339         memcpy(p, &v, sizeof(v));
340     }
341 }
342 #endif
343 
344 /* label relocation processing */
345 
346 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
347                           TCGLabel *l, intptr_t addend)
348 {
349     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
350 
351     r->type = type;
352     r->ptr = code_ptr;
353     r->addend = addend;
354     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
355 }
356 
357 static void tcg_out_label(TCGContext *s, TCGLabel *l)
358 {
359     tcg_debug_assert(!l->has_value);
360     l->has_value = 1;
361     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
362 }
363 
364 TCGLabel *gen_new_label(void)
365 {
366     TCGContext *s = tcg_ctx;
367     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
368 
369     memset(l, 0, sizeof(TCGLabel));
370     l->id = s->nb_labels++;
371     QSIMPLEQ_INIT(&l->branches);
372     QSIMPLEQ_INIT(&l->relocs);
373 
374     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
375 
376     return l;
377 }
378 
379 static bool tcg_resolve_relocs(TCGContext *s)
380 {
381     TCGLabel *l;
382 
383     QSIMPLEQ_FOREACH(l, &s->labels, next) {
384         TCGRelocation *r;
385         uintptr_t value = l->u.value;
386 
387         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
388             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
389                 return false;
390             }
391         }
392     }
393     return true;
394 }
395 
396 static void set_jmp_reset_offset(TCGContext *s, int which)
397 {
398     /*
399      * We will check for overflow at the end of the opcode loop in
400      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
401      */
402     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
403 }
404 
405 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
406 {
407     /*
408      * We will check for overflow at the end of the opcode loop in
409      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
410      */
411     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
412 }
413 
414 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
415 {
416     /*
417      * Return the read-execute version of the pointer, for the benefit
418      * of any pc-relative addressing mode.
419      */
420     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
421 }
422 
423 static int __attribute__((unused))
424 tlb_mask_table_ofs(TCGContext *s, int which)
425 {
426     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
427             sizeof(CPUNegativeOffsetState));
428 }
429 
430 /* Signal overflow, starting over with fewer guest insns. */
431 static G_NORETURN
432 void tcg_raise_tb_overflow(TCGContext *s)
433 {
434     siglongjmp(s->jmp_trans, -2);
435 }
436 
437 /*
438  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
439  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
440  *
441  * However, tcg_out_helper_load_slots reuses this field to hold an
442  * argument slot number (which may designate a argument register or an
443  * argument stack slot), converting to TCGReg once all arguments that
444  * are destined for the stack are processed.
445  */
446 typedef struct TCGMovExtend {
447     unsigned dst;
448     TCGReg src;
449     TCGType dst_type;
450     TCGType src_type;
451     MemOp src_ext;
452 } TCGMovExtend;
453 
454 /**
455  * tcg_out_movext -- move and extend
456  * @s: tcg context
457  * @dst_type: integral type for destination
458  * @dst: destination register
459  * @src_type: integral type for source
460  * @src_ext: extension to apply to source
461  * @src: source register
462  *
463  * Move or extend @src into @dst, depending on @src_ext and the types.
464  */
465 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
466                            TCGType src_type, MemOp src_ext, TCGReg src)
467 {
468     switch (src_ext) {
469     case MO_UB:
470         tcg_out_ext8u(s, dst, src);
471         break;
472     case MO_SB:
473         tcg_out_ext8s(s, dst_type, dst, src);
474         break;
475     case MO_UW:
476         tcg_out_ext16u(s, dst, src);
477         break;
478     case MO_SW:
479         tcg_out_ext16s(s, dst_type, dst, src);
480         break;
481     case MO_UL:
482     case MO_SL:
483         if (dst_type == TCG_TYPE_I32) {
484             if (src_type == TCG_TYPE_I32) {
485                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
486             } else {
487                 tcg_out_extrl_i64_i32(s, dst, src);
488             }
489         } else if (src_type == TCG_TYPE_I32) {
490             if (src_ext & MO_SIGN) {
491                 tcg_out_exts_i32_i64(s, dst, src);
492             } else {
493                 tcg_out_extu_i32_i64(s, dst, src);
494             }
495         } else {
496             if (src_ext & MO_SIGN) {
497                 tcg_out_ext32s(s, dst, src);
498             } else {
499                 tcg_out_ext32u(s, dst, src);
500             }
501         }
502         break;
503     case MO_UQ:
504         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
505         if (dst_type == TCG_TYPE_I32) {
506             tcg_out_extrl_i64_i32(s, dst, src);
507         } else {
508             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
509         }
510         break;
511     default:
512         g_assert_not_reached();
513     }
514 }
515 
516 /* Minor variations on a theme, using a structure. */
517 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
518                                     TCGReg src)
519 {
520     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
521 }
522 
523 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
524 {
525     tcg_out_movext1_new_src(s, i, i->src);
526 }
527 
528 /**
529  * tcg_out_movext2 -- move and extend two pair
530  * @s: tcg context
531  * @i1: first move description
532  * @i2: second move description
533  * @scratch: temporary register, or -1 for none
534  *
535  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
536  * between the sources and destinations.
537  */
538 
539 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
540                             const TCGMovExtend *i2, int scratch)
541 {
542     TCGReg src1 = i1->src;
543     TCGReg src2 = i2->src;
544 
545     if (i1->dst != src2) {
546         tcg_out_movext1(s, i1);
547         tcg_out_movext1(s, i2);
548         return;
549     }
550     if (i2->dst == src1) {
551         TCGType src1_type = i1->src_type;
552         TCGType src2_type = i2->src_type;
553 
554         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
555             /* The data is now in the correct registers, now extend. */
556             src1 = i2->src;
557             src2 = i1->src;
558         } else {
559             tcg_debug_assert(scratch >= 0);
560             tcg_out_mov(s, src1_type, scratch, src1);
561             src1 = scratch;
562         }
563     }
564     tcg_out_movext1_new_src(s, i2, src2);
565     tcg_out_movext1_new_src(s, i1, src1);
566 }
567 
568 /**
569  * tcg_out_movext3 -- move and extend three pair
570  * @s: tcg context
571  * @i1: first move description
572  * @i2: second move description
573  * @i3: third move description
574  * @scratch: temporary register, or -1 for none
575  *
576  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
577  * between the sources and destinations.
578  */
579 
580 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
581                             const TCGMovExtend *i2, const TCGMovExtend *i3,
582                             int scratch)
583 {
584     TCGReg src1 = i1->src;
585     TCGReg src2 = i2->src;
586     TCGReg src3 = i3->src;
587 
588     if (i1->dst != src2 && i1->dst != src3) {
589         tcg_out_movext1(s, i1);
590         tcg_out_movext2(s, i2, i3, scratch);
591         return;
592     }
593     if (i2->dst != src1 && i2->dst != src3) {
594         tcg_out_movext1(s, i2);
595         tcg_out_movext2(s, i1, i3, scratch);
596         return;
597     }
598     if (i3->dst != src1 && i3->dst != src2) {
599         tcg_out_movext1(s, i3);
600         tcg_out_movext2(s, i1, i2, scratch);
601         return;
602     }
603 
604     /*
605      * There is a cycle.  Since there are only 3 nodes, the cycle is
606      * either "clockwise" or "anti-clockwise", and can be solved with
607      * a single scratch or two xchg.
608      */
609     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
610         /* "Clockwise" */
611         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
612             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
613             /* The data is now in the correct registers, now extend. */
614             tcg_out_movext1_new_src(s, i1, i1->dst);
615             tcg_out_movext1_new_src(s, i2, i2->dst);
616             tcg_out_movext1_new_src(s, i3, i3->dst);
617         } else {
618             tcg_debug_assert(scratch >= 0);
619             tcg_out_mov(s, i1->src_type, scratch, src1);
620             tcg_out_movext1(s, i3);
621             tcg_out_movext1(s, i2);
622             tcg_out_movext1_new_src(s, i1, scratch);
623         }
624     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
625         /* "Anti-clockwise" */
626         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
627             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
628             /* The data is now in the correct registers, now extend. */
629             tcg_out_movext1_new_src(s, i1, i1->dst);
630             tcg_out_movext1_new_src(s, i2, i2->dst);
631             tcg_out_movext1_new_src(s, i3, i3->dst);
632         } else {
633             tcg_debug_assert(scratch >= 0);
634             tcg_out_mov(s, i1->src_type, scratch, src1);
635             tcg_out_movext1(s, i2);
636             tcg_out_movext1(s, i3);
637             tcg_out_movext1_new_src(s, i1, scratch);
638         }
639     } else {
640         g_assert_not_reached();
641     }
642 }
643 
644 /*
645  * Allocate a new TCGLabelQemuLdst entry.
646  */
647 
648 __attribute__((unused))
649 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
650 {
651     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
652 
653     memset(l, 0, sizeof(*l));
654     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
655 
656     return l;
657 }
658 
659 /*
660  * Allocate new constant pool entries.
661  */
662 
663 typedef struct TCGLabelPoolData {
664     struct TCGLabelPoolData *next;
665     tcg_insn_unit *label;
666     intptr_t addend;
667     int rtype;
668     unsigned nlong;
669     tcg_target_ulong data[];
670 } TCGLabelPoolData;
671 
672 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
673                                         tcg_insn_unit *label, intptr_t addend)
674 {
675     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
676                                      + sizeof(tcg_target_ulong) * nlong);
677 
678     n->label = label;
679     n->addend = addend;
680     n->rtype = rtype;
681     n->nlong = nlong;
682     return n;
683 }
684 
685 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
686 {
687     TCGLabelPoolData *i, **pp;
688     int nlong = n->nlong;
689 
690     /* Insertion sort on the pool.  */
691     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
692         if (nlong > i->nlong) {
693             break;
694         }
695         if (nlong < i->nlong) {
696             continue;
697         }
698         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
699             break;
700         }
701     }
702     n->next = *pp;
703     *pp = n;
704 }
705 
706 /* The "usual" for generic integer code.  */
707 __attribute__((unused))
708 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
709                            tcg_insn_unit *label, intptr_t addend)
710 {
711     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
712     n->data[0] = d;
713     new_pool_insert(s, n);
714 }
715 
716 /* For v64 or v128, depending on the host.  */
717 __attribute__((unused))
718 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
719                         intptr_t addend, tcg_target_ulong d0,
720                         tcg_target_ulong d1)
721 {
722     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
723     n->data[0] = d0;
724     n->data[1] = d1;
725     new_pool_insert(s, n);
726 }
727 
728 /* For v128 or v256, depending on the host.  */
729 __attribute__((unused))
730 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
731                         intptr_t addend, tcg_target_ulong d0,
732                         tcg_target_ulong d1, tcg_target_ulong d2,
733                         tcg_target_ulong d3)
734 {
735     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
736     n->data[0] = d0;
737     n->data[1] = d1;
738     n->data[2] = d2;
739     n->data[3] = d3;
740     new_pool_insert(s, n);
741 }
742 
743 /* For v256, for 32-bit host.  */
744 __attribute__((unused))
745 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
746                         intptr_t addend, tcg_target_ulong d0,
747                         tcg_target_ulong d1, tcg_target_ulong d2,
748                         tcg_target_ulong d3, tcg_target_ulong d4,
749                         tcg_target_ulong d5, tcg_target_ulong d6,
750                         tcg_target_ulong d7)
751 {
752     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
753     n->data[0] = d0;
754     n->data[1] = d1;
755     n->data[2] = d2;
756     n->data[3] = d3;
757     n->data[4] = d4;
758     n->data[5] = d5;
759     n->data[6] = d6;
760     n->data[7] = d7;
761     new_pool_insert(s, n);
762 }
763 
764 /*
765  * Generate TB finalization at the end of block
766  */
767 
768 static int tcg_out_ldst_finalize(TCGContext *s)
769 {
770     TCGLabelQemuLdst *lb;
771 
772     /* qemu_ld/st slow paths */
773     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
774         if (lb->is_ld
775             ? !tcg_out_qemu_ld_slow_path(s, lb)
776             : !tcg_out_qemu_st_slow_path(s, lb)) {
777             return -2;
778         }
779 
780         /*
781          * Test for (pending) buffer overflow.  The assumption is that any
782          * one operation beginning below the high water mark cannot overrun
783          * the buffer completely.  Thus we can test for overflow after
784          * generating code without having to check during generation.
785          */
786         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
787             return -1;
788         }
789     }
790     return 0;
791 }
792 
793 static int tcg_out_pool_finalize(TCGContext *s)
794 {
795     TCGLabelPoolData *p = s->pool_labels;
796     TCGLabelPoolData *l = NULL;
797     void *a;
798 
799     if (p == NULL) {
800         return 0;
801     }
802 
803     /*
804      * ??? Round up to qemu_icache_linesize, but then do not round
805      * again when allocating the next TranslationBlock structure.
806      */
807     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
808                          sizeof(tcg_target_ulong) * p->nlong);
809     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
810     s->data_gen_ptr = a;
811 
812     for (; p != NULL; p = p->next) {
813         size_t size = sizeof(tcg_target_ulong) * p->nlong;
814         uintptr_t value;
815 
816         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
817             if (unlikely(a > s->code_gen_highwater)) {
818                 return -1;
819             }
820             memcpy(a, p->data, size);
821             a += size;
822             l = p;
823         }
824 
825         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
826         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
827             return -2;
828         }
829     }
830 
831     s->code_ptr = a;
832     return 0;
833 }
834 
835 #define C_PFX1(P, A)                    P##A
836 #define C_PFX2(P, A, B)                 P##A##_##B
837 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
838 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
839 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
840 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
841 
842 /* Define an enumeration for the various combinations. */
843 
844 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
845 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
846 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
847 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
848 
849 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
850 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
851 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
852 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
853 
854 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
855 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
856 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
857 
858 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
859 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
860 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
861 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
862 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
863 
864 typedef enum {
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 #include "tcg-target.c.inc"
959 
960 #ifndef CONFIG_TCG_INTERPRETER
961 /* Validate CPUTLBDescFast placement. */
962 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
963                         sizeof(CPUNegativeOffsetState))
964                   < MIN_TLB_MASK_TABLE_OFS);
965 #endif
966 
967 /*
968  * All TCG threads except the parent (i.e. the one that called tcg_context_init
969  * and registered the target's TCG globals) must register with this function
970  * before initiating translation.
971  *
972  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
973  * of tcg_region_init() for the reasoning behind this.
974  *
975  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
976  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
977  * is not used anymore for translation once this function is called.
978  *
979  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
980  * iterates over the array (e.g. tcg_code_size() the same for both system/user
981  * modes.
982  */
983 #ifdef CONFIG_USER_ONLY
984 void tcg_register_thread(void)
985 {
986     tcg_ctx = &tcg_init_ctx;
987 }
988 #else
989 void tcg_register_thread(void)
990 {
991     TCGContext *s = g_malloc(sizeof(*s));
992     unsigned int i, n;
993 
994     *s = tcg_init_ctx;
995 
996     /* Relink mem_base.  */
997     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
998         if (tcg_init_ctx.temps[i].mem_base) {
999             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1000             tcg_debug_assert(b >= 0 && b < n);
1001             s->temps[i].mem_base = &s->temps[b];
1002         }
1003     }
1004 
1005     /* Claim an entry in tcg_ctxs */
1006     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1007     g_assert(n < tcg_max_ctxs);
1008     qatomic_set(&tcg_ctxs[n], s);
1009 
1010     if (n > 0) {
1011         tcg_region_initial_alloc(s);
1012     }
1013 
1014     tcg_ctx = s;
1015 }
1016 #endif /* !CONFIG_USER_ONLY */
1017 
1018 /* pool based memory allocation */
1019 void *tcg_malloc_internal(TCGContext *s, int size)
1020 {
1021     TCGPool *p;
1022     int pool_size;
1023 
1024     if (size > TCG_POOL_CHUNK_SIZE) {
1025         /* big malloc: insert a new pool (XXX: could optimize) */
1026         p = g_malloc(sizeof(TCGPool) + size);
1027         p->size = size;
1028         p->next = s->pool_first_large;
1029         s->pool_first_large = p;
1030         return p->data;
1031     } else {
1032         p = s->pool_current;
1033         if (!p) {
1034             p = s->pool_first;
1035             if (!p)
1036                 goto new_pool;
1037         } else {
1038             if (!p->next) {
1039             new_pool:
1040                 pool_size = TCG_POOL_CHUNK_SIZE;
1041                 p = g_malloc(sizeof(TCGPool) + pool_size);
1042                 p->size = pool_size;
1043                 p->next = NULL;
1044                 if (s->pool_current) {
1045                     s->pool_current->next = p;
1046                 } else {
1047                     s->pool_first = p;
1048                 }
1049             } else {
1050                 p = p->next;
1051             }
1052         }
1053     }
1054     s->pool_current = p;
1055     s->pool_cur = p->data + size;
1056     s->pool_end = p->data + p->size;
1057     return p->data;
1058 }
1059 
1060 void tcg_pool_reset(TCGContext *s)
1061 {
1062     TCGPool *p, *t;
1063     for (p = s->pool_first_large; p; p = t) {
1064         t = p->next;
1065         g_free(p);
1066     }
1067     s->pool_first_large = NULL;
1068     s->pool_cur = s->pool_end = NULL;
1069     s->pool_current = NULL;
1070 }
1071 
1072 /*
1073  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1074  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1075  * We only use these for layout in tcg_out_ld_helper_ret and
1076  * tcg_out_st_helper_args, and share them between several of
1077  * the helpers, with the end result that it's easier to build manually.
1078  */
1079 
1080 #if TCG_TARGET_REG_BITS == 32
1081 # define dh_typecode_ttl  dh_typecode_i32
1082 #else
1083 # define dh_typecode_ttl  dh_typecode_i64
1084 #endif
1085 
1086 static TCGHelperInfo info_helper_ld32_mmu = {
1087     .flags = TCG_CALL_NO_WG,
1088     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1089               | dh_typemask(env, 1)
1090               | dh_typemask(i64, 2)  /* uint64_t addr */
1091               | dh_typemask(i32, 3)  /* unsigned oi */
1092               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1093 };
1094 
1095 static TCGHelperInfo info_helper_ld64_mmu = {
1096     .flags = TCG_CALL_NO_WG,
1097     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1098               | dh_typemask(env, 1)
1099               | dh_typemask(i64, 2)  /* uint64_t addr */
1100               | dh_typemask(i32, 3)  /* unsigned oi */
1101               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1102 };
1103 
1104 static TCGHelperInfo info_helper_ld128_mmu = {
1105     .flags = TCG_CALL_NO_WG,
1106     .typemask = dh_typemask(i128, 0) /* return Int128 */
1107               | dh_typemask(env, 1)
1108               | dh_typemask(i64, 2)  /* uint64_t addr */
1109               | dh_typemask(i32, 3)  /* unsigned oi */
1110               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1111 };
1112 
1113 static TCGHelperInfo info_helper_st32_mmu = {
1114     .flags = TCG_CALL_NO_WG,
1115     .typemask = dh_typemask(void, 0)
1116               | dh_typemask(env, 1)
1117               | dh_typemask(i64, 2)  /* uint64_t addr */
1118               | dh_typemask(i32, 3)  /* uint32_t data */
1119               | dh_typemask(i32, 4)  /* unsigned oi */
1120               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1121 };
1122 
1123 static TCGHelperInfo info_helper_st64_mmu = {
1124     .flags = TCG_CALL_NO_WG,
1125     .typemask = dh_typemask(void, 0)
1126               | dh_typemask(env, 1)
1127               | dh_typemask(i64, 2)  /* uint64_t addr */
1128               | dh_typemask(i64, 3)  /* uint64_t data */
1129               | dh_typemask(i32, 4)  /* unsigned oi */
1130               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1131 };
1132 
1133 static TCGHelperInfo info_helper_st128_mmu = {
1134     .flags = TCG_CALL_NO_WG,
1135     .typemask = dh_typemask(void, 0)
1136               | dh_typemask(env, 1)
1137               | dh_typemask(i64, 2)  /* uint64_t addr */
1138               | dh_typemask(i128, 3) /* Int128 data */
1139               | dh_typemask(i32, 4)  /* unsigned oi */
1140               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1141 };
1142 
1143 #ifdef CONFIG_TCG_INTERPRETER
1144 static ffi_type *typecode_to_ffi(int argmask)
1145 {
1146     /*
1147      * libffi does not support __int128_t, so we have forced Int128
1148      * to use the structure definition instead of the builtin type.
1149      */
1150     static ffi_type *ffi_type_i128_elements[3] = {
1151         &ffi_type_uint64,
1152         &ffi_type_uint64,
1153         NULL
1154     };
1155     static ffi_type ffi_type_i128 = {
1156         .size = 16,
1157         .alignment = __alignof__(Int128),
1158         .type = FFI_TYPE_STRUCT,
1159         .elements = ffi_type_i128_elements,
1160     };
1161 
1162     switch (argmask) {
1163     case dh_typecode_void:
1164         return &ffi_type_void;
1165     case dh_typecode_i32:
1166         return &ffi_type_uint32;
1167     case dh_typecode_s32:
1168         return &ffi_type_sint32;
1169     case dh_typecode_i64:
1170         return &ffi_type_uint64;
1171     case dh_typecode_s64:
1172         return &ffi_type_sint64;
1173     case dh_typecode_ptr:
1174         return &ffi_type_pointer;
1175     case dh_typecode_i128:
1176         return &ffi_type_i128;
1177     }
1178     g_assert_not_reached();
1179 }
1180 
1181 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1182 {
1183     unsigned typemask = info->typemask;
1184     struct {
1185         ffi_cif cif;
1186         ffi_type *args[];
1187     } *ca;
1188     ffi_status status;
1189     int nargs;
1190 
1191     /* Ignoring the return type, find the last non-zero field. */
1192     nargs = 32 - clz32(typemask >> 3);
1193     nargs = DIV_ROUND_UP(nargs, 3);
1194     assert(nargs <= MAX_CALL_IARGS);
1195 
1196     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1197     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1198     ca->cif.nargs = nargs;
1199 
1200     if (nargs != 0) {
1201         ca->cif.arg_types = ca->args;
1202         for (int j = 0; j < nargs; ++j) {
1203             int typecode = extract32(typemask, (j + 1) * 3, 3);
1204             ca->args[j] = typecode_to_ffi(typecode);
1205         }
1206     }
1207 
1208     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1209                           ca->cif.rtype, ca->cif.arg_types);
1210     assert(status == FFI_OK);
1211 
1212     return &ca->cif;
1213 }
1214 
1215 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1216 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1217 #else
1218 #define HELPER_INFO_INIT(I)      (&(I)->init)
1219 #define HELPER_INFO_INIT_VAL(I)  1
1220 #endif /* CONFIG_TCG_INTERPRETER */
1221 
1222 static inline bool arg_slot_reg_p(unsigned arg_slot)
1223 {
1224     /*
1225      * Split the sizeof away from the comparison to avoid Werror from
1226      * "unsigned < 0 is always false", when iarg_regs is empty.
1227      */
1228     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1229     return arg_slot < nreg;
1230 }
1231 
1232 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1233 {
1234     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1235     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1236 
1237     tcg_debug_assert(stk_slot < max);
1238     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1239 }
1240 
1241 typedef struct TCGCumulativeArgs {
1242     int arg_idx;                /* tcg_gen_callN args[] */
1243     int info_in_idx;            /* TCGHelperInfo in[] */
1244     int arg_slot;               /* regs+stack slot */
1245     int ref_slot;               /* stack slots for references */
1246 } TCGCumulativeArgs;
1247 
1248 static void layout_arg_even(TCGCumulativeArgs *cum)
1249 {
1250     cum->arg_slot += cum->arg_slot & 1;
1251 }
1252 
1253 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1254                          TCGCallArgumentKind kind)
1255 {
1256     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1257 
1258     *loc = (TCGCallArgumentLoc){
1259         .kind = kind,
1260         .arg_idx = cum->arg_idx,
1261         .arg_slot = cum->arg_slot,
1262     };
1263     cum->info_in_idx++;
1264     cum->arg_slot++;
1265 }
1266 
1267 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1268                                 TCGHelperInfo *info, int n)
1269 {
1270     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1271 
1272     for (int i = 0; i < n; ++i) {
1273         /* Layout all using the same arg_idx, adjusting the subindex. */
1274         loc[i] = (TCGCallArgumentLoc){
1275             .kind = TCG_CALL_ARG_NORMAL,
1276             .arg_idx = cum->arg_idx,
1277             .tmp_subindex = i,
1278             .arg_slot = cum->arg_slot + i,
1279         };
1280     }
1281     cum->info_in_idx += n;
1282     cum->arg_slot += n;
1283 }
1284 
1285 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1286 {
1287     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1288     int n = 128 / TCG_TARGET_REG_BITS;
1289 
1290     /* The first subindex carries the pointer. */
1291     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1292 
1293     /*
1294      * The callee is allowed to clobber memory associated with
1295      * structure pass by-reference.  Therefore we must make copies.
1296      * Allocate space from "ref_slot", which will be adjusted to
1297      * follow the parameters on the stack.
1298      */
1299     loc[0].ref_slot = cum->ref_slot;
1300 
1301     /*
1302      * Subsequent words also go into the reference slot, but
1303      * do not accumulate into the regular arguments.
1304      */
1305     for (int i = 1; i < n; ++i) {
1306         loc[i] = (TCGCallArgumentLoc){
1307             .kind = TCG_CALL_ARG_BY_REF_N,
1308             .arg_idx = cum->arg_idx,
1309             .tmp_subindex = i,
1310             .ref_slot = cum->ref_slot + i,
1311         };
1312     }
1313     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1314     cum->ref_slot += n;
1315 }
1316 
1317 static void init_call_layout(TCGHelperInfo *info)
1318 {
1319     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1320     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1321     unsigned typemask = info->typemask;
1322     unsigned typecode;
1323     TCGCumulativeArgs cum = { };
1324 
1325     /*
1326      * Parse and place any function return value.
1327      */
1328     typecode = typemask & 7;
1329     switch (typecode) {
1330     case dh_typecode_void:
1331         info->nr_out = 0;
1332         break;
1333     case dh_typecode_i32:
1334     case dh_typecode_s32:
1335     case dh_typecode_ptr:
1336         info->nr_out = 1;
1337         info->out_kind = TCG_CALL_RET_NORMAL;
1338         break;
1339     case dh_typecode_i64:
1340     case dh_typecode_s64:
1341         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1342         info->out_kind = TCG_CALL_RET_NORMAL;
1343         /* Query the last register now to trigger any assert early. */
1344         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1345         break;
1346     case dh_typecode_i128:
1347         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1348         info->out_kind = TCG_TARGET_CALL_RET_I128;
1349         switch (TCG_TARGET_CALL_RET_I128) {
1350         case TCG_CALL_RET_NORMAL:
1351             /* Query the last register now to trigger any assert early. */
1352             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1353             break;
1354         case TCG_CALL_RET_BY_VEC:
1355             /* Query the single register now to trigger any assert early. */
1356             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1357             break;
1358         case TCG_CALL_RET_BY_REF:
1359             /*
1360              * Allocate the first argument to the output.
1361              * We don't need to store this anywhere, just make it
1362              * unavailable for use in the input loop below.
1363              */
1364             cum.arg_slot = 1;
1365             break;
1366         default:
1367             qemu_build_not_reached();
1368         }
1369         break;
1370     default:
1371         g_assert_not_reached();
1372     }
1373 
1374     /*
1375      * Parse and place function arguments.
1376      */
1377     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1378         TCGCallArgumentKind kind;
1379         TCGType type;
1380 
1381         typecode = typemask & 7;
1382         switch (typecode) {
1383         case dh_typecode_i32:
1384         case dh_typecode_s32:
1385             type = TCG_TYPE_I32;
1386             break;
1387         case dh_typecode_i64:
1388         case dh_typecode_s64:
1389             type = TCG_TYPE_I64;
1390             break;
1391         case dh_typecode_ptr:
1392             type = TCG_TYPE_PTR;
1393             break;
1394         case dh_typecode_i128:
1395             type = TCG_TYPE_I128;
1396             break;
1397         default:
1398             g_assert_not_reached();
1399         }
1400 
1401         switch (type) {
1402         case TCG_TYPE_I32:
1403             switch (TCG_TARGET_CALL_ARG_I32) {
1404             case TCG_CALL_ARG_EVEN:
1405                 layout_arg_even(&cum);
1406                 /* fall through */
1407             case TCG_CALL_ARG_NORMAL:
1408                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1409                 break;
1410             case TCG_CALL_ARG_EXTEND:
1411                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1412                 layout_arg_1(&cum, info, kind);
1413                 break;
1414             default:
1415                 qemu_build_not_reached();
1416             }
1417             break;
1418 
1419         case TCG_TYPE_I64:
1420             switch (TCG_TARGET_CALL_ARG_I64) {
1421             case TCG_CALL_ARG_EVEN:
1422                 layout_arg_even(&cum);
1423                 /* fall through */
1424             case TCG_CALL_ARG_NORMAL:
1425                 if (TCG_TARGET_REG_BITS == 32) {
1426                     layout_arg_normal_n(&cum, info, 2);
1427                 } else {
1428                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1429                 }
1430                 break;
1431             default:
1432                 qemu_build_not_reached();
1433             }
1434             break;
1435 
1436         case TCG_TYPE_I128:
1437             switch (TCG_TARGET_CALL_ARG_I128) {
1438             case TCG_CALL_ARG_EVEN:
1439                 layout_arg_even(&cum);
1440                 /* fall through */
1441             case TCG_CALL_ARG_NORMAL:
1442                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1443                 break;
1444             case TCG_CALL_ARG_BY_REF:
1445                 layout_arg_by_ref(&cum, info);
1446                 break;
1447             default:
1448                 qemu_build_not_reached();
1449             }
1450             break;
1451 
1452         default:
1453             g_assert_not_reached();
1454         }
1455     }
1456     info->nr_in = cum.info_in_idx;
1457 
1458     /* Validate that we didn't overrun the input array. */
1459     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1460     /* Validate the backend has enough argument space. */
1461     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1462 
1463     /*
1464      * Relocate the "ref_slot" area to the end of the parameters.
1465      * Minimizing this stack offset helps code size for x86,
1466      * which has a signed 8-bit offset encoding.
1467      */
1468     if (cum.ref_slot != 0) {
1469         int ref_base = 0;
1470 
1471         if (cum.arg_slot > max_reg_slots) {
1472             int align = __alignof(Int128) / sizeof(tcg_target_long);
1473 
1474             ref_base = cum.arg_slot - max_reg_slots;
1475             if (align > 1) {
1476                 ref_base = ROUND_UP(ref_base, align);
1477             }
1478         }
1479         assert(ref_base + cum.ref_slot <= max_stk_slots);
1480         ref_base += max_reg_slots;
1481 
1482         if (ref_base != 0) {
1483             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1484                 TCGCallArgumentLoc *loc = &info->in[i];
1485                 switch (loc->kind) {
1486                 case TCG_CALL_ARG_BY_REF:
1487                 case TCG_CALL_ARG_BY_REF_N:
1488                     loc->ref_slot += ref_base;
1489                     break;
1490                 default:
1491                     break;
1492                 }
1493             }
1494         }
1495     }
1496 }
1497 
1498 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1499 static void process_constraint_sets(void);
1500 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1501                                             TCGReg reg, const char *name);
1502 
1503 static void tcg_context_init(unsigned max_cpus)
1504 {
1505     TCGContext *s = &tcg_init_ctx;
1506     int n, i;
1507     TCGTemp *ts;
1508 
1509     memset(s, 0, sizeof(*s));
1510     s->nb_globals = 0;
1511 
1512     init_call_layout(&info_helper_ld32_mmu);
1513     init_call_layout(&info_helper_ld64_mmu);
1514     init_call_layout(&info_helper_ld128_mmu);
1515     init_call_layout(&info_helper_st32_mmu);
1516     init_call_layout(&info_helper_st64_mmu);
1517     init_call_layout(&info_helper_st128_mmu);
1518 
1519     tcg_target_init(s);
1520     process_constraint_sets();
1521 
1522     /* Reverse the order of the saved registers, assuming they're all at
1523        the start of tcg_target_reg_alloc_order.  */
1524     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1525         int r = tcg_target_reg_alloc_order[n];
1526         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1527             break;
1528         }
1529     }
1530     for (i = 0; i < n; ++i) {
1531         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1532     }
1533     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1534         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1535     }
1536 
1537     tcg_ctx = s;
1538     /*
1539      * In user-mode we simply share the init context among threads, since we
1540      * use a single region. See the documentation tcg_region_init() for the
1541      * reasoning behind this.
1542      * In system-mode we will have at most max_cpus TCG threads.
1543      */
1544 #ifdef CONFIG_USER_ONLY
1545     tcg_ctxs = &tcg_ctx;
1546     tcg_cur_ctxs = 1;
1547     tcg_max_ctxs = 1;
1548 #else
1549     tcg_max_ctxs = max_cpus;
1550     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1551 #endif
1552 
1553     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1554     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1555     tcg_env = temp_tcgv_ptr(ts);
1556 }
1557 
1558 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1559 {
1560     tcg_context_init(max_cpus);
1561     tcg_region_init(tb_size, splitwx, max_cpus);
1562 }
1563 
1564 /*
1565  * Allocate TBs right before their corresponding translated code, making
1566  * sure that TBs and code are on different cache lines.
1567  */
1568 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1569 {
1570     uintptr_t align = qemu_icache_linesize;
1571     TranslationBlock *tb;
1572     void *next;
1573 
1574  retry:
1575     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1576     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1577 
1578     if (unlikely(next > s->code_gen_highwater)) {
1579         if (tcg_region_alloc(s)) {
1580             return NULL;
1581         }
1582         goto retry;
1583     }
1584     qatomic_set(&s->code_gen_ptr, next);
1585     return tb;
1586 }
1587 
1588 void tcg_prologue_init(void)
1589 {
1590     TCGContext *s = tcg_ctx;
1591     size_t prologue_size;
1592 
1593     s->code_ptr = s->code_gen_ptr;
1594     s->code_buf = s->code_gen_ptr;
1595     s->data_gen_ptr = NULL;
1596 
1597 #ifndef CONFIG_TCG_INTERPRETER
1598     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1599 #endif
1600 
1601     s->pool_labels = NULL;
1602 
1603     qemu_thread_jit_write();
1604     /* Generate the prologue.  */
1605     tcg_target_qemu_prologue(s);
1606 
1607     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1608     {
1609         int result = tcg_out_pool_finalize(s);
1610         tcg_debug_assert(result == 0);
1611     }
1612 
1613     prologue_size = tcg_current_code_size(s);
1614     perf_report_prologue(s->code_gen_ptr, prologue_size);
1615 
1616 #ifndef CONFIG_TCG_INTERPRETER
1617     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1618                         (uintptr_t)s->code_buf, prologue_size);
1619 #endif
1620 
1621     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1622         FILE *logfile = qemu_log_trylock();
1623         if (logfile) {
1624             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1625             if (s->data_gen_ptr) {
1626                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1627                 size_t data_size = prologue_size - code_size;
1628                 size_t i;
1629 
1630                 disas(logfile, s->code_gen_ptr, code_size);
1631 
1632                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1633                     if (sizeof(tcg_target_ulong) == 8) {
1634                         fprintf(logfile,
1635                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1636                                 (uintptr_t)s->data_gen_ptr + i,
1637                                 *(uint64_t *)(s->data_gen_ptr + i));
1638                     } else {
1639                         fprintf(logfile,
1640                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1641                                 (uintptr_t)s->data_gen_ptr + i,
1642                                 *(uint32_t *)(s->data_gen_ptr + i));
1643                     }
1644                 }
1645             } else {
1646                 disas(logfile, s->code_gen_ptr, prologue_size);
1647             }
1648             fprintf(logfile, "\n");
1649             qemu_log_unlock(logfile);
1650         }
1651     }
1652 
1653 #ifndef CONFIG_TCG_INTERPRETER
1654     /*
1655      * Assert that goto_ptr is implemented completely, setting an epilogue.
1656      * For tci, we use NULL as the signal to return from the interpreter,
1657      * so skip this check.
1658      */
1659     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1660 #endif
1661 
1662     tcg_region_prologue_set(s);
1663 }
1664 
1665 void tcg_func_start(TCGContext *s)
1666 {
1667     tcg_pool_reset(s);
1668     s->nb_temps = s->nb_globals;
1669 
1670     /* No temps have been previously allocated for size or locality.  */
1671     tcg_temp_ebb_reset_freed(s);
1672 
1673     /* No constant temps have been previously allocated. */
1674     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1675         if (s->const_table[i]) {
1676             g_hash_table_remove_all(s->const_table[i]);
1677         }
1678     }
1679 
1680     s->nb_ops = 0;
1681     s->nb_labels = 0;
1682     s->current_frame_offset = s->frame_start;
1683 
1684 #ifdef CONFIG_DEBUG_TCG
1685     s->goto_tb_issue_mask = 0;
1686 #endif
1687 
1688     QTAILQ_INIT(&s->ops);
1689     QTAILQ_INIT(&s->free_ops);
1690     s->emit_before_op = NULL;
1691     QSIMPLEQ_INIT(&s->labels);
1692 
1693     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1694                      s->addr_type == TCG_TYPE_I64);
1695 
1696     tcg_debug_assert(s->insn_start_words > 0);
1697 }
1698 
1699 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1700 {
1701     int n = s->nb_temps++;
1702 
1703     if (n >= TCG_MAX_TEMPS) {
1704         tcg_raise_tb_overflow(s);
1705     }
1706     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1707 }
1708 
1709 static TCGTemp *tcg_global_alloc(TCGContext *s)
1710 {
1711     TCGTemp *ts;
1712 
1713     tcg_debug_assert(s->nb_globals == s->nb_temps);
1714     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1715     s->nb_globals++;
1716     ts = tcg_temp_alloc(s);
1717     ts->kind = TEMP_GLOBAL;
1718 
1719     return ts;
1720 }
1721 
1722 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1723                                             TCGReg reg, const char *name)
1724 {
1725     TCGTemp *ts;
1726 
1727     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1728 
1729     ts = tcg_global_alloc(s);
1730     ts->base_type = type;
1731     ts->type = type;
1732     ts->kind = TEMP_FIXED;
1733     ts->reg = reg;
1734     ts->name = name;
1735     tcg_regset_set_reg(s->reserved_regs, reg);
1736 
1737     return ts;
1738 }
1739 
1740 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1741 {
1742     s->frame_start = start;
1743     s->frame_end = start + size;
1744     s->frame_temp
1745         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1746 }
1747 
1748 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1749                                             const char *name, TCGType type)
1750 {
1751     TCGContext *s = tcg_ctx;
1752     TCGTemp *base_ts = tcgv_ptr_temp(base);
1753     TCGTemp *ts = tcg_global_alloc(s);
1754     int indirect_reg = 0;
1755 
1756     switch (base_ts->kind) {
1757     case TEMP_FIXED:
1758         break;
1759     case TEMP_GLOBAL:
1760         /* We do not support double-indirect registers.  */
1761         tcg_debug_assert(!base_ts->indirect_reg);
1762         base_ts->indirect_base = 1;
1763         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1764                             ? 2 : 1);
1765         indirect_reg = 1;
1766         break;
1767     default:
1768         g_assert_not_reached();
1769     }
1770 
1771     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1772         TCGTemp *ts2 = tcg_global_alloc(s);
1773         char buf[64];
1774 
1775         ts->base_type = TCG_TYPE_I64;
1776         ts->type = TCG_TYPE_I32;
1777         ts->indirect_reg = indirect_reg;
1778         ts->mem_allocated = 1;
1779         ts->mem_base = base_ts;
1780         ts->mem_offset = offset;
1781         pstrcpy(buf, sizeof(buf), name);
1782         pstrcat(buf, sizeof(buf), "_0");
1783         ts->name = strdup(buf);
1784 
1785         tcg_debug_assert(ts2 == ts + 1);
1786         ts2->base_type = TCG_TYPE_I64;
1787         ts2->type = TCG_TYPE_I32;
1788         ts2->indirect_reg = indirect_reg;
1789         ts2->mem_allocated = 1;
1790         ts2->mem_base = base_ts;
1791         ts2->mem_offset = offset + 4;
1792         ts2->temp_subindex = 1;
1793         pstrcpy(buf, sizeof(buf), name);
1794         pstrcat(buf, sizeof(buf), "_1");
1795         ts2->name = strdup(buf);
1796     } else {
1797         ts->base_type = type;
1798         ts->type = type;
1799         ts->indirect_reg = indirect_reg;
1800         ts->mem_allocated = 1;
1801         ts->mem_base = base_ts;
1802         ts->mem_offset = offset;
1803         ts->name = name;
1804     }
1805     return ts;
1806 }
1807 
1808 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1809 {
1810     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1811     return temp_tcgv_i32(ts);
1812 }
1813 
1814 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1815 {
1816     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1817     return temp_tcgv_i64(ts);
1818 }
1819 
1820 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1821 {
1822     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1823     return temp_tcgv_ptr(ts);
1824 }
1825 
1826 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1827 {
1828     TCGContext *s = tcg_ctx;
1829     TCGTemp *ts;
1830     int n;
1831 
1832     if (kind == TEMP_EBB) {
1833         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1834 
1835         if (idx < TCG_MAX_TEMPS) {
1836             /* There is already an available temp with the right type.  */
1837             clear_bit(idx, s->free_temps[type].l);
1838 
1839             ts = &s->temps[idx];
1840             ts->temp_allocated = 1;
1841             tcg_debug_assert(ts->base_type == type);
1842             tcg_debug_assert(ts->kind == kind);
1843             return ts;
1844         }
1845     } else {
1846         tcg_debug_assert(kind == TEMP_TB);
1847     }
1848 
1849     switch (type) {
1850     case TCG_TYPE_I32:
1851     case TCG_TYPE_V64:
1852     case TCG_TYPE_V128:
1853     case TCG_TYPE_V256:
1854         n = 1;
1855         break;
1856     case TCG_TYPE_I64:
1857         n = 64 / TCG_TARGET_REG_BITS;
1858         break;
1859     case TCG_TYPE_I128:
1860         n = 128 / TCG_TARGET_REG_BITS;
1861         break;
1862     default:
1863         g_assert_not_reached();
1864     }
1865 
1866     ts = tcg_temp_alloc(s);
1867     ts->base_type = type;
1868     ts->temp_allocated = 1;
1869     ts->kind = kind;
1870 
1871     if (n == 1) {
1872         ts->type = type;
1873     } else {
1874         ts->type = TCG_TYPE_REG;
1875 
1876         for (int i = 1; i < n; ++i) {
1877             TCGTemp *ts2 = tcg_temp_alloc(s);
1878 
1879             tcg_debug_assert(ts2 == ts + i);
1880             ts2->base_type = type;
1881             ts2->type = TCG_TYPE_REG;
1882             ts2->temp_allocated = 1;
1883             ts2->temp_subindex = i;
1884             ts2->kind = kind;
1885         }
1886     }
1887     return ts;
1888 }
1889 
1890 TCGv_i32 tcg_temp_new_i32(void)
1891 {
1892     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1893 }
1894 
1895 TCGv_i32 tcg_temp_ebb_new_i32(void)
1896 {
1897     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1898 }
1899 
1900 TCGv_i64 tcg_temp_new_i64(void)
1901 {
1902     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1903 }
1904 
1905 TCGv_i64 tcg_temp_ebb_new_i64(void)
1906 {
1907     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1908 }
1909 
1910 TCGv_ptr tcg_temp_new_ptr(void)
1911 {
1912     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1913 }
1914 
1915 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1916 {
1917     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1918 }
1919 
1920 TCGv_i128 tcg_temp_new_i128(void)
1921 {
1922     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1923 }
1924 
1925 TCGv_i128 tcg_temp_ebb_new_i128(void)
1926 {
1927     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1928 }
1929 
1930 TCGv_vec tcg_temp_new_vec(TCGType type)
1931 {
1932     TCGTemp *t;
1933 
1934 #ifdef CONFIG_DEBUG_TCG
1935     switch (type) {
1936     case TCG_TYPE_V64:
1937         assert(TCG_TARGET_HAS_v64);
1938         break;
1939     case TCG_TYPE_V128:
1940         assert(TCG_TARGET_HAS_v128);
1941         break;
1942     case TCG_TYPE_V256:
1943         assert(TCG_TARGET_HAS_v256);
1944         break;
1945     default:
1946         g_assert_not_reached();
1947     }
1948 #endif
1949 
1950     t = tcg_temp_new_internal(type, TEMP_EBB);
1951     return temp_tcgv_vec(t);
1952 }
1953 
1954 /* Create a new temp of the same type as an existing temp.  */
1955 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1956 {
1957     TCGTemp *t = tcgv_vec_temp(match);
1958 
1959     tcg_debug_assert(t->temp_allocated != 0);
1960 
1961     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1962     return temp_tcgv_vec(t);
1963 }
1964 
1965 void tcg_temp_free_internal(TCGTemp *ts)
1966 {
1967     TCGContext *s = tcg_ctx;
1968 
1969     switch (ts->kind) {
1970     case TEMP_CONST:
1971     case TEMP_TB:
1972         /* Silently ignore free. */
1973         break;
1974     case TEMP_EBB:
1975         tcg_debug_assert(ts->temp_allocated != 0);
1976         ts->temp_allocated = 0;
1977         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1978         break;
1979     default:
1980         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1981         g_assert_not_reached();
1982     }
1983 }
1984 
1985 void tcg_temp_free_i32(TCGv_i32 arg)
1986 {
1987     tcg_temp_free_internal(tcgv_i32_temp(arg));
1988 }
1989 
1990 void tcg_temp_free_i64(TCGv_i64 arg)
1991 {
1992     tcg_temp_free_internal(tcgv_i64_temp(arg));
1993 }
1994 
1995 void tcg_temp_free_i128(TCGv_i128 arg)
1996 {
1997     tcg_temp_free_internal(tcgv_i128_temp(arg));
1998 }
1999 
2000 void tcg_temp_free_ptr(TCGv_ptr arg)
2001 {
2002     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2003 }
2004 
2005 void tcg_temp_free_vec(TCGv_vec arg)
2006 {
2007     tcg_temp_free_internal(tcgv_vec_temp(arg));
2008 }
2009 
2010 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2011 {
2012     TCGContext *s = tcg_ctx;
2013     GHashTable *h = s->const_table[type];
2014     TCGTemp *ts;
2015 
2016     if (h == NULL) {
2017         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2018         s->const_table[type] = h;
2019     }
2020 
2021     ts = g_hash_table_lookup(h, &val);
2022     if (ts == NULL) {
2023         int64_t *val_ptr;
2024 
2025         ts = tcg_temp_alloc(s);
2026 
2027         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2028             TCGTemp *ts2 = tcg_temp_alloc(s);
2029 
2030             tcg_debug_assert(ts2 == ts + 1);
2031 
2032             ts->base_type = TCG_TYPE_I64;
2033             ts->type = TCG_TYPE_I32;
2034             ts->kind = TEMP_CONST;
2035             ts->temp_allocated = 1;
2036 
2037             ts2->base_type = TCG_TYPE_I64;
2038             ts2->type = TCG_TYPE_I32;
2039             ts2->kind = TEMP_CONST;
2040             ts2->temp_allocated = 1;
2041             ts2->temp_subindex = 1;
2042 
2043             /*
2044              * Retain the full value of the 64-bit constant in the low
2045              * part, so that the hash table works.  Actual uses will
2046              * truncate the value to the low part.
2047              */
2048             ts[HOST_BIG_ENDIAN].val = val;
2049             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2050             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2051         } else {
2052             ts->base_type = type;
2053             ts->type = type;
2054             ts->kind = TEMP_CONST;
2055             ts->temp_allocated = 1;
2056             ts->val = val;
2057             val_ptr = &ts->val;
2058         }
2059         g_hash_table_insert(h, val_ptr, ts);
2060     }
2061 
2062     return ts;
2063 }
2064 
2065 TCGv_i32 tcg_constant_i32(int32_t val)
2066 {
2067     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2068 }
2069 
2070 TCGv_i64 tcg_constant_i64(int64_t val)
2071 {
2072     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2073 }
2074 
2075 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2076 {
2077     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2078 }
2079 
2080 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2081 {
2082     val = dup_const(vece, val);
2083     return temp_tcgv_vec(tcg_constant_internal(type, val));
2084 }
2085 
2086 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2087 {
2088     TCGTemp *t = tcgv_vec_temp(match);
2089 
2090     tcg_debug_assert(t->temp_allocated != 0);
2091     return tcg_constant_vec(t->base_type, vece, val);
2092 }
2093 
2094 #ifdef CONFIG_DEBUG_TCG
2095 size_t temp_idx(TCGTemp *ts)
2096 {
2097     ptrdiff_t n = ts - tcg_ctx->temps;
2098     assert(n >= 0 && n < tcg_ctx->nb_temps);
2099     return n;
2100 }
2101 
2102 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2103 {
2104     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2105 
2106     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2107     assert(o % sizeof(TCGTemp) == 0);
2108 
2109     return (void *)tcg_ctx + (uintptr_t)v;
2110 }
2111 #endif /* CONFIG_DEBUG_TCG */
2112 
2113 /*
2114  * Return true if OP may appear in the opcode stream with TYPE.
2115  * Test the runtime variable that controls each opcode.
2116  */
2117 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2118 {
2119     bool has_type;
2120 
2121     switch (type) {
2122     case TCG_TYPE_I32:
2123         has_type = true;
2124         break;
2125     case TCG_TYPE_I64:
2126         has_type = TCG_TARGET_REG_BITS == 64;
2127         break;
2128     case TCG_TYPE_V64:
2129         has_type = TCG_TARGET_HAS_v64;
2130         break;
2131     case TCG_TYPE_V128:
2132         has_type = TCG_TARGET_HAS_v128;
2133         break;
2134     case TCG_TYPE_V256:
2135         has_type = TCG_TARGET_HAS_v256;
2136         break;
2137     default:
2138         has_type = false;
2139         break;
2140     }
2141 
2142     switch (op) {
2143     case INDEX_op_discard:
2144     case INDEX_op_set_label:
2145     case INDEX_op_call:
2146     case INDEX_op_br:
2147     case INDEX_op_mb:
2148     case INDEX_op_insn_start:
2149     case INDEX_op_exit_tb:
2150     case INDEX_op_goto_tb:
2151     case INDEX_op_goto_ptr:
2152     case INDEX_op_qemu_ld_a32_i32:
2153     case INDEX_op_qemu_ld_a64_i32:
2154     case INDEX_op_qemu_st_a32_i32:
2155     case INDEX_op_qemu_st_a64_i32:
2156     case INDEX_op_qemu_ld_a32_i64:
2157     case INDEX_op_qemu_ld_a64_i64:
2158     case INDEX_op_qemu_st_a32_i64:
2159     case INDEX_op_qemu_st_a64_i64:
2160         return true;
2161 
2162     case INDEX_op_qemu_st8_a32_i32:
2163     case INDEX_op_qemu_st8_a64_i32:
2164         return TCG_TARGET_HAS_qemu_st8_i32;
2165 
2166     case INDEX_op_qemu_ld_a32_i128:
2167     case INDEX_op_qemu_ld_a64_i128:
2168     case INDEX_op_qemu_st_a32_i128:
2169     case INDEX_op_qemu_st_a64_i128:
2170         return TCG_TARGET_HAS_qemu_ldst_i128;
2171 
2172     case INDEX_op_mov_i32:
2173     case INDEX_op_setcond_i32:
2174     case INDEX_op_brcond_i32:
2175     case INDEX_op_movcond_i32:
2176     case INDEX_op_ld8u_i32:
2177     case INDEX_op_ld8s_i32:
2178     case INDEX_op_ld16u_i32:
2179     case INDEX_op_ld16s_i32:
2180     case INDEX_op_ld_i32:
2181     case INDEX_op_st8_i32:
2182     case INDEX_op_st16_i32:
2183     case INDEX_op_st_i32:
2184     case INDEX_op_add_i32:
2185     case INDEX_op_sub_i32:
2186     case INDEX_op_neg_i32:
2187     case INDEX_op_mul_i32:
2188     case INDEX_op_and_i32:
2189     case INDEX_op_or_i32:
2190     case INDEX_op_xor_i32:
2191     case INDEX_op_shl_i32:
2192     case INDEX_op_shr_i32:
2193     case INDEX_op_sar_i32:
2194     case INDEX_op_extract_i32:
2195     case INDEX_op_sextract_i32:
2196     case INDEX_op_deposit_i32:
2197         return true;
2198 
2199     case INDEX_op_negsetcond_i32:
2200         return TCG_TARGET_HAS_negsetcond_i32;
2201     case INDEX_op_div_i32:
2202     case INDEX_op_divu_i32:
2203         return TCG_TARGET_HAS_div_i32;
2204     case INDEX_op_rem_i32:
2205     case INDEX_op_remu_i32:
2206         return TCG_TARGET_HAS_rem_i32;
2207     case INDEX_op_div2_i32:
2208     case INDEX_op_divu2_i32:
2209         return TCG_TARGET_HAS_div2_i32;
2210     case INDEX_op_rotl_i32:
2211     case INDEX_op_rotr_i32:
2212         return TCG_TARGET_HAS_rot_i32;
2213     case INDEX_op_extract2_i32:
2214         return TCG_TARGET_HAS_extract2_i32;
2215     case INDEX_op_add2_i32:
2216         return TCG_TARGET_HAS_add2_i32;
2217     case INDEX_op_sub2_i32:
2218         return TCG_TARGET_HAS_sub2_i32;
2219     case INDEX_op_mulu2_i32:
2220         return TCG_TARGET_HAS_mulu2_i32;
2221     case INDEX_op_muls2_i32:
2222         return TCG_TARGET_HAS_muls2_i32;
2223     case INDEX_op_muluh_i32:
2224         return TCG_TARGET_HAS_muluh_i32;
2225     case INDEX_op_mulsh_i32:
2226         return TCG_TARGET_HAS_mulsh_i32;
2227     case INDEX_op_ext8s_i32:
2228         return TCG_TARGET_HAS_ext8s_i32;
2229     case INDEX_op_ext16s_i32:
2230         return TCG_TARGET_HAS_ext16s_i32;
2231     case INDEX_op_ext8u_i32:
2232         return TCG_TARGET_HAS_ext8u_i32;
2233     case INDEX_op_ext16u_i32:
2234         return TCG_TARGET_HAS_ext16u_i32;
2235     case INDEX_op_bswap16_i32:
2236         return TCG_TARGET_HAS_bswap16_i32;
2237     case INDEX_op_bswap32_i32:
2238         return TCG_TARGET_HAS_bswap32_i32;
2239     case INDEX_op_not_i32:
2240         return TCG_TARGET_HAS_not_i32;
2241     case INDEX_op_andc_i32:
2242         return TCG_TARGET_HAS_andc_i32;
2243     case INDEX_op_orc_i32:
2244         return TCG_TARGET_HAS_orc_i32;
2245     case INDEX_op_eqv_i32:
2246         return TCG_TARGET_HAS_eqv_i32;
2247     case INDEX_op_nand_i32:
2248         return TCG_TARGET_HAS_nand_i32;
2249     case INDEX_op_nor_i32:
2250         return TCG_TARGET_HAS_nor_i32;
2251     case INDEX_op_clz_i32:
2252         return TCG_TARGET_HAS_clz_i32;
2253     case INDEX_op_ctz_i32:
2254         return TCG_TARGET_HAS_ctz_i32;
2255     case INDEX_op_ctpop_i32:
2256         return TCG_TARGET_HAS_ctpop_i32;
2257 
2258     case INDEX_op_brcond2_i32:
2259     case INDEX_op_setcond2_i32:
2260         return TCG_TARGET_REG_BITS == 32;
2261 
2262     case INDEX_op_mov_i64:
2263     case INDEX_op_setcond_i64:
2264     case INDEX_op_brcond_i64:
2265     case INDEX_op_movcond_i64:
2266     case INDEX_op_ld8u_i64:
2267     case INDEX_op_ld8s_i64:
2268     case INDEX_op_ld16u_i64:
2269     case INDEX_op_ld16s_i64:
2270     case INDEX_op_ld32u_i64:
2271     case INDEX_op_ld32s_i64:
2272     case INDEX_op_ld_i64:
2273     case INDEX_op_st8_i64:
2274     case INDEX_op_st16_i64:
2275     case INDEX_op_st32_i64:
2276     case INDEX_op_st_i64:
2277     case INDEX_op_add_i64:
2278     case INDEX_op_sub_i64:
2279     case INDEX_op_neg_i64:
2280     case INDEX_op_mul_i64:
2281     case INDEX_op_and_i64:
2282     case INDEX_op_or_i64:
2283     case INDEX_op_xor_i64:
2284     case INDEX_op_shl_i64:
2285     case INDEX_op_shr_i64:
2286     case INDEX_op_sar_i64:
2287     case INDEX_op_ext_i32_i64:
2288     case INDEX_op_extu_i32_i64:
2289     case INDEX_op_extract_i64:
2290     case INDEX_op_sextract_i64:
2291     case INDEX_op_deposit_i64:
2292         return TCG_TARGET_REG_BITS == 64;
2293 
2294     case INDEX_op_negsetcond_i64:
2295         return TCG_TARGET_HAS_negsetcond_i64;
2296     case INDEX_op_div_i64:
2297     case INDEX_op_divu_i64:
2298         return TCG_TARGET_HAS_div_i64;
2299     case INDEX_op_rem_i64:
2300     case INDEX_op_remu_i64:
2301         return TCG_TARGET_HAS_rem_i64;
2302     case INDEX_op_div2_i64:
2303     case INDEX_op_divu2_i64:
2304         return TCG_TARGET_HAS_div2_i64;
2305     case INDEX_op_rotl_i64:
2306     case INDEX_op_rotr_i64:
2307         return TCG_TARGET_HAS_rot_i64;
2308     case INDEX_op_extract2_i64:
2309         return TCG_TARGET_HAS_extract2_i64;
2310     case INDEX_op_extrl_i64_i32:
2311     case INDEX_op_extrh_i64_i32:
2312         return TCG_TARGET_HAS_extr_i64_i32;
2313     case INDEX_op_ext8s_i64:
2314         return TCG_TARGET_HAS_ext8s_i64;
2315     case INDEX_op_ext16s_i64:
2316         return TCG_TARGET_HAS_ext16s_i64;
2317     case INDEX_op_ext32s_i64:
2318         return TCG_TARGET_HAS_ext32s_i64;
2319     case INDEX_op_ext8u_i64:
2320         return TCG_TARGET_HAS_ext8u_i64;
2321     case INDEX_op_ext16u_i64:
2322         return TCG_TARGET_HAS_ext16u_i64;
2323     case INDEX_op_ext32u_i64:
2324         return TCG_TARGET_HAS_ext32u_i64;
2325     case INDEX_op_bswap16_i64:
2326         return TCG_TARGET_HAS_bswap16_i64;
2327     case INDEX_op_bswap32_i64:
2328         return TCG_TARGET_HAS_bswap32_i64;
2329     case INDEX_op_bswap64_i64:
2330         return TCG_TARGET_HAS_bswap64_i64;
2331     case INDEX_op_not_i64:
2332         return TCG_TARGET_HAS_not_i64;
2333     case INDEX_op_andc_i64:
2334         return TCG_TARGET_HAS_andc_i64;
2335     case INDEX_op_orc_i64:
2336         return TCG_TARGET_HAS_orc_i64;
2337     case INDEX_op_eqv_i64:
2338         return TCG_TARGET_HAS_eqv_i64;
2339     case INDEX_op_nand_i64:
2340         return TCG_TARGET_HAS_nand_i64;
2341     case INDEX_op_nor_i64:
2342         return TCG_TARGET_HAS_nor_i64;
2343     case INDEX_op_clz_i64:
2344         return TCG_TARGET_HAS_clz_i64;
2345     case INDEX_op_ctz_i64:
2346         return TCG_TARGET_HAS_ctz_i64;
2347     case INDEX_op_ctpop_i64:
2348         return TCG_TARGET_HAS_ctpop_i64;
2349     case INDEX_op_add2_i64:
2350         return TCG_TARGET_HAS_add2_i64;
2351     case INDEX_op_sub2_i64:
2352         return TCG_TARGET_HAS_sub2_i64;
2353     case INDEX_op_mulu2_i64:
2354         return TCG_TARGET_HAS_mulu2_i64;
2355     case INDEX_op_muls2_i64:
2356         return TCG_TARGET_HAS_muls2_i64;
2357     case INDEX_op_muluh_i64:
2358         return TCG_TARGET_HAS_muluh_i64;
2359     case INDEX_op_mulsh_i64:
2360         return TCG_TARGET_HAS_mulsh_i64;
2361 
2362     case INDEX_op_mov_vec:
2363     case INDEX_op_dup_vec:
2364     case INDEX_op_dupm_vec:
2365     case INDEX_op_ld_vec:
2366     case INDEX_op_st_vec:
2367     case INDEX_op_add_vec:
2368     case INDEX_op_sub_vec:
2369     case INDEX_op_and_vec:
2370     case INDEX_op_or_vec:
2371     case INDEX_op_xor_vec:
2372     case INDEX_op_cmp_vec:
2373         return has_type;
2374     case INDEX_op_dup2_vec:
2375         return has_type && TCG_TARGET_REG_BITS == 32;
2376     case INDEX_op_not_vec:
2377         return has_type && TCG_TARGET_HAS_not_vec;
2378     case INDEX_op_neg_vec:
2379         return has_type && TCG_TARGET_HAS_neg_vec;
2380     case INDEX_op_abs_vec:
2381         return has_type && TCG_TARGET_HAS_abs_vec;
2382     case INDEX_op_andc_vec:
2383         return has_type && TCG_TARGET_HAS_andc_vec;
2384     case INDEX_op_orc_vec:
2385         return has_type && TCG_TARGET_HAS_orc_vec;
2386     case INDEX_op_nand_vec:
2387         return has_type && TCG_TARGET_HAS_nand_vec;
2388     case INDEX_op_nor_vec:
2389         return has_type && TCG_TARGET_HAS_nor_vec;
2390     case INDEX_op_eqv_vec:
2391         return has_type && TCG_TARGET_HAS_eqv_vec;
2392     case INDEX_op_mul_vec:
2393         return has_type && TCG_TARGET_HAS_mul_vec;
2394     case INDEX_op_shli_vec:
2395     case INDEX_op_shri_vec:
2396     case INDEX_op_sari_vec:
2397         return has_type && TCG_TARGET_HAS_shi_vec;
2398     case INDEX_op_shls_vec:
2399     case INDEX_op_shrs_vec:
2400     case INDEX_op_sars_vec:
2401         return has_type && TCG_TARGET_HAS_shs_vec;
2402     case INDEX_op_shlv_vec:
2403     case INDEX_op_shrv_vec:
2404     case INDEX_op_sarv_vec:
2405         return has_type && TCG_TARGET_HAS_shv_vec;
2406     case INDEX_op_rotli_vec:
2407         return has_type && TCG_TARGET_HAS_roti_vec;
2408     case INDEX_op_rotls_vec:
2409         return has_type && TCG_TARGET_HAS_rots_vec;
2410     case INDEX_op_rotlv_vec:
2411     case INDEX_op_rotrv_vec:
2412         return has_type && TCG_TARGET_HAS_rotv_vec;
2413     case INDEX_op_ssadd_vec:
2414     case INDEX_op_usadd_vec:
2415     case INDEX_op_sssub_vec:
2416     case INDEX_op_ussub_vec:
2417         return has_type && TCG_TARGET_HAS_sat_vec;
2418     case INDEX_op_smin_vec:
2419     case INDEX_op_umin_vec:
2420     case INDEX_op_smax_vec:
2421     case INDEX_op_umax_vec:
2422         return has_type && TCG_TARGET_HAS_minmax_vec;
2423     case INDEX_op_bitsel_vec:
2424         return has_type && TCG_TARGET_HAS_bitsel_vec;
2425     case INDEX_op_cmpsel_vec:
2426         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2427 
2428     default:
2429         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2430         return true;
2431     }
2432 }
2433 
2434 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2435 {
2436     unsigned width;
2437 
2438     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2439     width = (type == TCG_TYPE_I32 ? 32 : 64);
2440 
2441     tcg_debug_assert(ofs < width);
2442     tcg_debug_assert(len > 0);
2443     tcg_debug_assert(len <= width - ofs);
2444 
2445     return TCG_TARGET_deposit_valid(type, ofs, len);
2446 }
2447 
2448 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2449 
2450 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2451                           TCGTemp *ret, TCGTemp **args)
2452 {
2453     TCGv_i64 extend_free[MAX_CALL_IARGS];
2454     int n_extend = 0;
2455     TCGOp *op;
2456     int i, n, pi = 0, total_args;
2457 
2458     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2459         init_call_layout(info);
2460         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2461     }
2462 
2463     total_args = info->nr_out + info->nr_in + 2;
2464     op = tcg_op_alloc(INDEX_op_call, total_args);
2465 
2466 #ifdef CONFIG_PLUGIN
2467     /* Flag helpers that may affect guest state */
2468     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2469         tcg_ctx->plugin_insn->calls_helpers = true;
2470     }
2471 #endif
2472 
2473     TCGOP_CALLO(op) = n = info->nr_out;
2474     switch (n) {
2475     case 0:
2476         tcg_debug_assert(ret == NULL);
2477         break;
2478     case 1:
2479         tcg_debug_assert(ret != NULL);
2480         op->args[pi++] = temp_arg(ret);
2481         break;
2482     case 2:
2483     case 4:
2484         tcg_debug_assert(ret != NULL);
2485         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2486         tcg_debug_assert(ret->temp_subindex == 0);
2487         for (i = 0; i < n; ++i) {
2488             op->args[pi++] = temp_arg(ret + i);
2489         }
2490         break;
2491     default:
2492         g_assert_not_reached();
2493     }
2494 
2495     TCGOP_CALLI(op) = n = info->nr_in;
2496     for (i = 0; i < n; i++) {
2497         const TCGCallArgumentLoc *loc = &info->in[i];
2498         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2499 
2500         switch (loc->kind) {
2501         case TCG_CALL_ARG_NORMAL:
2502         case TCG_CALL_ARG_BY_REF:
2503         case TCG_CALL_ARG_BY_REF_N:
2504             op->args[pi++] = temp_arg(ts);
2505             break;
2506 
2507         case TCG_CALL_ARG_EXTEND_U:
2508         case TCG_CALL_ARG_EXTEND_S:
2509             {
2510                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2511                 TCGv_i32 orig = temp_tcgv_i32(ts);
2512 
2513                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2514                     tcg_gen_ext_i32_i64(temp, orig);
2515                 } else {
2516                     tcg_gen_extu_i32_i64(temp, orig);
2517                 }
2518                 op->args[pi++] = tcgv_i64_arg(temp);
2519                 extend_free[n_extend++] = temp;
2520             }
2521             break;
2522 
2523         default:
2524             g_assert_not_reached();
2525         }
2526     }
2527     op->args[pi++] = (uintptr_t)func;
2528     op->args[pi++] = (uintptr_t)info;
2529     tcg_debug_assert(pi == total_args);
2530 
2531     if (tcg_ctx->emit_before_op) {
2532         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2533     } else {
2534         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2535     }
2536 
2537     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2538     for (i = 0; i < n_extend; ++i) {
2539         tcg_temp_free_i64(extend_free[i]);
2540     }
2541 }
2542 
2543 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2544 {
2545     tcg_gen_callN(func, info, ret, NULL);
2546 }
2547 
2548 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2549 {
2550     tcg_gen_callN(func, info, ret, &t1);
2551 }
2552 
2553 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2554                    TCGTemp *t1, TCGTemp *t2)
2555 {
2556     TCGTemp *args[2] = { t1, t2 };
2557     tcg_gen_callN(func, info, ret, args);
2558 }
2559 
2560 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2561                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2562 {
2563     TCGTemp *args[3] = { t1, t2, t3 };
2564     tcg_gen_callN(func, info, ret, args);
2565 }
2566 
2567 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2568                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2569 {
2570     TCGTemp *args[4] = { t1, t2, t3, t4 };
2571     tcg_gen_callN(func, info, ret, args);
2572 }
2573 
2574 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2575                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2576 {
2577     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2578     tcg_gen_callN(func, info, ret, args);
2579 }
2580 
2581 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2582                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2583                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2584 {
2585     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2586     tcg_gen_callN(func, info, ret, args);
2587 }
2588 
2589 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2590                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2591                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2592 {
2593     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2594     tcg_gen_callN(func, info, ret, args);
2595 }
2596 
2597 static void tcg_reg_alloc_start(TCGContext *s)
2598 {
2599     int i, n;
2600 
2601     for (i = 0, n = s->nb_temps; i < n; i++) {
2602         TCGTemp *ts = &s->temps[i];
2603         TCGTempVal val = TEMP_VAL_MEM;
2604 
2605         switch (ts->kind) {
2606         case TEMP_CONST:
2607             val = TEMP_VAL_CONST;
2608             break;
2609         case TEMP_FIXED:
2610             val = TEMP_VAL_REG;
2611             break;
2612         case TEMP_GLOBAL:
2613             break;
2614         case TEMP_EBB:
2615             val = TEMP_VAL_DEAD;
2616             /* fall through */
2617         case TEMP_TB:
2618             ts->mem_allocated = 0;
2619             break;
2620         default:
2621             g_assert_not_reached();
2622         }
2623         ts->val_type = val;
2624     }
2625 
2626     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2627 }
2628 
2629 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2630                                  TCGTemp *ts)
2631 {
2632     int idx = temp_idx(ts);
2633 
2634     switch (ts->kind) {
2635     case TEMP_FIXED:
2636     case TEMP_GLOBAL:
2637         pstrcpy(buf, buf_size, ts->name);
2638         break;
2639     case TEMP_TB:
2640         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2641         break;
2642     case TEMP_EBB:
2643         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2644         break;
2645     case TEMP_CONST:
2646         switch (ts->type) {
2647         case TCG_TYPE_I32:
2648             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2649             break;
2650 #if TCG_TARGET_REG_BITS > 32
2651         case TCG_TYPE_I64:
2652             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2653             break;
2654 #endif
2655         case TCG_TYPE_V64:
2656         case TCG_TYPE_V128:
2657         case TCG_TYPE_V256:
2658             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2659                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2660             break;
2661         default:
2662             g_assert_not_reached();
2663         }
2664         break;
2665     }
2666     return buf;
2667 }
2668 
2669 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2670                              int buf_size, TCGArg arg)
2671 {
2672     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2673 }
2674 
2675 static const char * const cond_name[] =
2676 {
2677     [TCG_COND_NEVER] = "never",
2678     [TCG_COND_ALWAYS] = "always",
2679     [TCG_COND_EQ] = "eq",
2680     [TCG_COND_NE] = "ne",
2681     [TCG_COND_LT] = "lt",
2682     [TCG_COND_GE] = "ge",
2683     [TCG_COND_LE] = "le",
2684     [TCG_COND_GT] = "gt",
2685     [TCG_COND_LTU] = "ltu",
2686     [TCG_COND_GEU] = "geu",
2687     [TCG_COND_LEU] = "leu",
2688     [TCG_COND_GTU] = "gtu",
2689     [TCG_COND_TSTEQ] = "tsteq",
2690     [TCG_COND_TSTNE] = "tstne",
2691 };
2692 
2693 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2694 {
2695     [MO_UB]   = "ub",
2696     [MO_SB]   = "sb",
2697     [MO_LEUW] = "leuw",
2698     [MO_LESW] = "lesw",
2699     [MO_LEUL] = "leul",
2700     [MO_LESL] = "lesl",
2701     [MO_LEUQ] = "leq",
2702     [MO_BEUW] = "beuw",
2703     [MO_BESW] = "besw",
2704     [MO_BEUL] = "beul",
2705     [MO_BESL] = "besl",
2706     [MO_BEUQ] = "beq",
2707     [MO_128 + MO_BE] = "beo",
2708     [MO_128 + MO_LE] = "leo",
2709 };
2710 
2711 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2712     [MO_UNALN >> MO_ASHIFT]    = "un+",
2713     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2714     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2715     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2716     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2717     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2718     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2719     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2720 };
2721 
2722 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2723     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2724     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2725     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2726     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2727     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2728     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2729 };
2730 
2731 static const char bswap_flag_name[][6] = {
2732     [TCG_BSWAP_IZ] = "iz",
2733     [TCG_BSWAP_OZ] = "oz",
2734     [TCG_BSWAP_OS] = "os",
2735     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2736     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2737 };
2738 
2739 #ifdef CONFIG_PLUGIN
2740 static const char * const plugin_from_name[] = {
2741     "from-tb",
2742     "from-insn",
2743     "after-insn",
2744     "after-tb",
2745 };
2746 #endif
2747 
2748 static inline bool tcg_regset_single(TCGRegSet d)
2749 {
2750     return (d & (d - 1)) == 0;
2751 }
2752 
2753 static inline TCGReg tcg_regset_first(TCGRegSet d)
2754 {
2755     if (TCG_TARGET_NB_REGS <= 32) {
2756         return ctz32(d);
2757     } else {
2758         return ctz64(d);
2759     }
2760 }
2761 
2762 /* Return only the number of characters output -- no error return. */
2763 #define ne_fprintf(...) \
2764     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2765 
2766 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2767 {
2768     char buf[128];
2769     TCGOp *op;
2770 
2771     QTAILQ_FOREACH(op, &s->ops, link) {
2772         int i, k, nb_oargs, nb_iargs, nb_cargs;
2773         const TCGOpDef *def;
2774         TCGOpcode c;
2775         int col = 0;
2776 
2777         c = op->opc;
2778         def = &tcg_op_defs[c];
2779 
2780         if (c == INDEX_op_insn_start) {
2781             nb_oargs = 0;
2782             col += ne_fprintf(f, "\n ----");
2783 
2784             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2785                 col += ne_fprintf(f, " %016" PRIx64,
2786                                   tcg_get_insn_start_param(op, i));
2787             }
2788         } else if (c == INDEX_op_call) {
2789             const TCGHelperInfo *info = tcg_call_info(op);
2790             void *func = tcg_call_func(op);
2791 
2792             /* variable number of arguments */
2793             nb_oargs = TCGOP_CALLO(op);
2794             nb_iargs = TCGOP_CALLI(op);
2795             nb_cargs = def->nb_cargs;
2796 
2797             col += ne_fprintf(f, " %s ", def->name);
2798 
2799             /*
2800              * Print the function name from TCGHelperInfo, if available.
2801              * Note that plugins have a template function for the info,
2802              * but the actual function pointer comes from the plugin.
2803              */
2804             if (func == info->func) {
2805                 col += ne_fprintf(f, "%s", info->name);
2806             } else {
2807                 col += ne_fprintf(f, "plugin(%p)", func);
2808             }
2809 
2810             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2811             for (i = 0; i < nb_oargs; i++) {
2812                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2813                                                             op->args[i]));
2814             }
2815             for (i = 0; i < nb_iargs; i++) {
2816                 TCGArg arg = op->args[nb_oargs + i];
2817                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2818                 col += ne_fprintf(f, ",%s", t);
2819             }
2820         } else {
2821             col += ne_fprintf(f, " %s ", def->name);
2822 
2823             nb_oargs = def->nb_oargs;
2824             nb_iargs = def->nb_iargs;
2825             nb_cargs = def->nb_cargs;
2826 
2827             if (def->flags & TCG_OPF_VECTOR) {
2828                 col += ne_fprintf(f, "v%d,e%d,",
2829                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2830                                   8 << TCGOP_VECE(op));
2831             }
2832 
2833             k = 0;
2834             for (i = 0; i < nb_oargs; i++) {
2835                 const char *sep =  k ? "," : "";
2836                 col += ne_fprintf(f, "%s%s", sep,
2837                                   tcg_get_arg_str(s, buf, sizeof(buf),
2838                                                   op->args[k++]));
2839             }
2840             for (i = 0; i < nb_iargs; i++) {
2841                 const char *sep =  k ? "," : "";
2842                 col += ne_fprintf(f, "%s%s", sep,
2843                                   tcg_get_arg_str(s, buf, sizeof(buf),
2844                                                   op->args[k++]));
2845             }
2846             switch (c) {
2847             case INDEX_op_brcond_i32:
2848             case INDEX_op_setcond_i32:
2849             case INDEX_op_negsetcond_i32:
2850             case INDEX_op_movcond_i32:
2851             case INDEX_op_brcond2_i32:
2852             case INDEX_op_setcond2_i32:
2853             case INDEX_op_brcond_i64:
2854             case INDEX_op_setcond_i64:
2855             case INDEX_op_negsetcond_i64:
2856             case INDEX_op_movcond_i64:
2857             case INDEX_op_cmp_vec:
2858             case INDEX_op_cmpsel_vec:
2859                 if (op->args[k] < ARRAY_SIZE(cond_name)
2860                     && cond_name[op->args[k]]) {
2861                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2862                 } else {
2863                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2864                 }
2865                 i = 1;
2866                 break;
2867             case INDEX_op_qemu_ld_a32_i32:
2868             case INDEX_op_qemu_ld_a64_i32:
2869             case INDEX_op_qemu_st_a32_i32:
2870             case INDEX_op_qemu_st_a64_i32:
2871             case INDEX_op_qemu_st8_a32_i32:
2872             case INDEX_op_qemu_st8_a64_i32:
2873             case INDEX_op_qemu_ld_a32_i64:
2874             case INDEX_op_qemu_ld_a64_i64:
2875             case INDEX_op_qemu_st_a32_i64:
2876             case INDEX_op_qemu_st_a64_i64:
2877             case INDEX_op_qemu_ld_a32_i128:
2878             case INDEX_op_qemu_ld_a64_i128:
2879             case INDEX_op_qemu_st_a32_i128:
2880             case INDEX_op_qemu_st_a64_i128:
2881                 {
2882                     const char *s_al, *s_op, *s_at;
2883                     MemOpIdx oi = op->args[k++];
2884                     MemOp mop = get_memop(oi);
2885                     unsigned ix = get_mmuidx(oi);
2886 
2887                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2888                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2889                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2890                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2891 
2892                     /* If all fields are accounted for, print symbolically. */
2893                     if (!mop && s_al && s_op && s_at) {
2894                         col += ne_fprintf(f, ",%s%s%s,%u",
2895                                           s_at, s_al, s_op, ix);
2896                     } else {
2897                         mop = get_memop(oi);
2898                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2899                     }
2900                     i = 1;
2901                 }
2902                 break;
2903             case INDEX_op_bswap16_i32:
2904             case INDEX_op_bswap16_i64:
2905             case INDEX_op_bswap32_i32:
2906             case INDEX_op_bswap32_i64:
2907             case INDEX_op_bswap64_i64:
2908                 {
2909                     TCGArg flags = op->args[k];
2910                     const char *name = NULL;
2911 
2912                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2913                         name = bswap_flag_name[flags];
2914                     }
2915                     if (name) {
2916                         col += ne_fprintf(f, ",%s", name);
2917                     } else {
2918                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2919                     }
2920                     i = k = 1;
2921                 }
2922                 break;
2923 #ifdef CONFIG_PLUGIN
2924             case INDEX_op_plugin_cb:
2925                 {
2926                     TCGArg from = op->args[k++];
2927                     const char *name = NULL;
2928 
2929                     if (from < ARRAY_SIZE(plugin_from_name)) {
2930                         name = plugin_from_name[from];
2931                     }
2932                     if (name) {
2933                         col += ne_fprintf(f, "%s", name);
2934                     } else {
2935                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2936                     }
2937                     i = 1;
2938                 }
2939                 break;
2940 #endif
2941             default:
2942                 i = 0;
2943                 break;
2944             }
2945             switch (c) {
2946             case INDEX_op_set_label:
2947             case INDEX_op_br:
2948             case INDEX_op_brcond_i32:
2949             case INDEX_op_brcond_i64:
2950             case INDEX_op_brcond2_i32:
2951                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2952                                   arg_label(op->args[k])->id);
2953                 i++, k++;
2954                 break;
2955             case INDEX_op_mb:
2956                 {
2957                     TCGBar membar = op->args[k];
2958                     const char *b_op, *m_op;
2959 
2960                     switch (membar & TCG_BAR_SC) {
2961                     case 0:
2962                         b_op = "none";
2963                         break;
2964                     case TCG_BAR_LDAQ:
2965                         b_op = "acq";
2966                         break;
2967                     case TCG_BAR_STRL:
2968                         b_op = "rel";
2969                         break;
2970                     case TCG_BAR_SC:
2971                         b_op = "seq";
2972                         break;
2973                     default:
2974                         g_assert_not_reached();
2975                     }
2976 
2977                     switch (membar & TCG_MO_ALL) {
2978                     case 0:
2979                         m_op = "none";
2980                         break;
2981                     case TCG_MO_LD_LD:
2982                         m_op = "rr";
2983                         break;
2984                     case TCG_MO_LD_ST:
2985                         m_op = "rw";
2986                         break;
2987                     case TCG_MO_ST_LD:
2988                         m_op = "wr";
2989                         break;
2990                     case TCG_MO_ST_ST:
2991                         m_op = "ww";
2992                         break;
2993                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2994                         m_op = "rr+rw";
2995                         break;
2996                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2997                         m_op = "rr+wr";
2998                         break;
2999                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3000                         m_op = "rr+ww";
3001                         break;
3002                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3003                         m_op = "rw+wr";
3004                         break;
3005                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3006                         m_op = "rw+ww";
3007                         break;
3008                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3009                         m_op = "wr+ww";
3010                         break;
3011                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3012                         m_op = "rr+rw+wr";
3013                         break;
3014                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3015                         m_op = "rr+rw+ww";
3016                         break;
3017                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3018                         m_op = "rr+wr+ww";
3019                         break;
3020                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3021                         m_op = "rw+wr+ww";
3022                         break;
3023                     case TCG_MO_ALL:
3024                         m_op = "all";
3025                         break;
3026                     default:
3027                         g_assert_not_reached();
3028                     }
3029 
3030                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3031                     i++, k++;
3032                 }
3033                 break;
3034             default:
3035                 break;
3036             }
3037             for (; i < nb_cargs; i++, k++) {
3038                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3039                                   op->args[k]);
3040             }
3041         }
3042 
3043         if (have_prefs || op->life) {
3044             for (; col < 40; ++col) {
3045                 putc(' ', f);
3046             }
3047         }
3048 
3049         if (op->life) {
3050             unsigned life = op->life;
3051 
3052             if (life & (SYNC_ARG * 3)) {
3053                 ne_fprintf(f, "  sync:");
3054                 for (i = 0; i < 2; ++i) {
3055                     if (life & (SYNC_ARG << i)) {
3056                         ne_fprintf(f, " %d", i);
3057                     }
3058                 }
3059             }
3060             life /= DEAD_ARG;
3061             if (life) {
3062                 ne_fprintf(f, "  dead:");
3063                 for (i = 0; life; ++i, life >>= 1) {
3064                     if (life & 1) {
3065                         ne_fprintf(f, " %d", i);
3066                     }
3067                 }
3068             }
3069         }
3070 
3071         if (have_prefs) {
3072             for (i = 0; i < nb_oargs; ++i) {
3073                 TCGRegSet set = output_pref(op, i);
3074 
3075                 if (i == 0) {
3076                     ne_fprintf(f, "  pref=");
3077                 } else {
3078                     ne_fprintf(f, ",");
3079                 }
3080                 if (set == 0) {
3081                     ne_fprintf(f, "none");
3082                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3083                     ne_fprintf(f, "all");
3084 #ifdef CONFIG_DEBUG_TCG
3085                 } else if (tcg_regset_single(set)) {
3086                     TCGReg reg = tcg_regset_first(set);
3087                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3088 #endif
3089                 } else if (TCG_TARGET_NB_REGS <= 32) {
3090                     ne_fprintf(f, "0x%x", (uint32_t)set);
3091                 } else {
3092                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3093                 }
3094             }
3095         }
3096 
3097         putc('\n', f);
3098     }
3099 }
3100 
3101 /* we give more priority to constraints with less registers */
3102 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3103 {
3104     int n;
3105 
3106     arg_ct += k;
3107     n = ctpop64(arg_ct->regs);
3108 
3109     /*
3110      * Sort constraints of a single register first, which includes output
3111      * aliases (which must exactly match the input already allocated).
3112      */
3113     if (n == 1 || arg_ct->oalias) {
3114         return INT_MAX;
3115     }
3116 
3117     /*
3118      * Sort register pairs next, first then second immediately after.
3119      * Arbitrarily sort multiple pairs by the index of the first reg;
3120      * there shouldn't be many pairs.
3121      */
3122     switch (arg_ct->pair) {
3123     case 1:
3124     case 3:
3125         return (k + 1) * 2;
3126     case 2:
3127         return (arg_ct->pair_index + 1) * 2 - 1;
3128     }
3129 
3130     /* Finally, sort by decreasing register count. */
3131     assert(n > 1);
3132     return -n;
3133 }
3134 
3135 /* sort from highest priority to lowest */
3136 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3137 {
3138     int i, j;
3139 
3140     for (i = 0; i < n; i++) {
3141         a[start + i].sort_index = start + i;
3142     }
3143     if (n <= 1) {
3144         return;
3145     }
3146     for (i = 0; i < n - 1; i++) {
3147         for (j = i + 1; j < n; j++) {
3148             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3149             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3150             if (p1 < p2) {
3151                 int tmp = a[start + i].sort_index;
3152                 a[start + i].sort_index = a[start + j].sort_index;
3153                 a[start + j].sort_index = tmp;
3154             }
3155         }
3156     }
3157 }
3158 
3159 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3160 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3161 
3162 static void process_constraint_sets(void)
3163 {
3164     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3165         const TCGConstraintSet *tdefs = &constraint_sets[c];
3166         TCGArgConstraint *args_ct = all_cts[c];
3167         int nb_oargs = tdefs->nb_oargs;
3168         int nb_iargs = tdefs->nb_iargs;
3169         int nb_args = nb_oargs + nb_iargs;
3170         bool saw_alias_pair = false;
3171 
3172         for (int i = 0; i < nb_args; i++) {
3173             const char *ct_str = tdefs->args_ct_str[i];
3174             bool input_p = i >= nb_oargs;
3175             int o;
3176 
3177             switch (*ct_str) {
3178             case '0' ... '9':
3179                 o = *ct_str - '0';
3180                 tcg_debug_assert(input_p);
3181                 tcg_debug_assert(o < nb_oargs);
3182                 tcg_debug_assert(args_ct[o].regs != 0);
3183                 tcg_debug_assert(!args_ct[o].oalias);
3184                 args_ct[i] = args_ct[o];
3185                 /* The output sets oalias.  */
3186                 args_ct[o].oalias = 1;
3187                 args_ct[o].alias_index = i;
3188                 /* The input sets ialias. */
3189                 args_ct[i].ialias = 1;
3190                 args_ct[i].alias_index = o;
3191                 if (args_ct[i].pair) {
3192                     saw_alias_pair = true;
3193                 }
3194                 tcg_debug_assert(ct_str[1] == '\0');
3195                 continue;
3196 
3197             case '&':
3198                 tcg_debug_assert(!input_p);
3199                 args_ct[i].newreg = true;
3200                 ct_str++;
3201                 break;
3202 
3203             case 'p': /* plus */
3204                 /* Allocate to the register after the previous. */
3205                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3206                 o = i - 1;
3207                 tcg_debug_assert(!args_ct[o].pair);
3208                 tcg_debug_assert(!args_ct[o].ct);
3209                 args_ct[i] = (TCGArgConstraint){
3210                     .pair = 2,
3211                     .pair_index = o,
3212                     .regs = args_ct[o].regs << 1,
3213                     .newreg = args_ct[o].newreg,
3214                 };
3215                 args_ct[o].pair = 1;
3216                 args_ct[o].pair_index = i;
3217                 tcg_debug_assert(ct_str[1] == '\0');
3218                 continue;
3219 
3220             case 'm': /* minus */
3221                 /* Allocate to the register before the previous. */
3222                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3223                 o = i - 1;
3224                 tcg_debug_assert(!args_ct[o].pair);
3225                 tcg_debug_assert(!args_ct[o].ct);
3226                 args_ct[i] = (TCGArgConstraint){
3227                     .pair = 1,
3228                     .pair_index = o,
3229                     .regs = args_ct[o].regs >> 1,
3230                     .newreg = args_ct[o].newreg,
3231                 };
3232                 args_ct[o].pair = 2;
3233                 args_ct[o].pair_index = i;
3234                 tcg_debug_assert(ct_str[1] == '\0');
3235                 continue;
3236             }
3237 
3238             do {
3239                 switch (*ct_str) {
3240                 case 'i':
3241                     args_ct[i].ct |= TCG_CT_CONST;
3242                     break;
3243 
3244                 /* Include all of the target-specific constraints. */
3245 
3246 #undef CONST
3247 #define CONST(CASE, MASK) \
3248     case CASE: args_ct[i].ct |= MASK; break;
3249 #define REGS(CASE, MASK) \
3250     case CASE: args_ct[i].regs |= MASK; break;
3251 
3252 #include "tcg-target-con-str.h"
3253 
3254 #undef REGS
3255 #undef CONST
3256                 default:
3257                 case '0' ... '9':
3258                 case '&':
3259                 case 'p':
3260                 case 'm':
3261                     /* Typo in TCGConstraintSet constraint. */
3262                     g_assert_not_reached();
3263                 }
3264             } while (*++ct_str != '\0');
3265         }
3266 
3267         /*
3268          * Fix up output pairs that are aliased with inputs.
3269          * When we created the alias, we copied pair from the output.
3270          * There are three cases:
3271          *    (1a) Pairs of inputs alias pairs of outputs.
3272          *    (1b) One input aliases the first of a pair of outputs.
3273          *    (2)  One input aliases the second of a pair of outputs.
3274          *
3275          * Case 1a is handled by making sure that the pair_index'es are
3276          * properly updated so that they appear the same as a pair of inputs.
3277          *
3278          * Case 1b is handled by setting the pair_index of the input to
3279          * itself, simply so it doesn't point to an unrelated argument.
3280          * Since we don't encounter the "second" during the input allocation
3281          * phase, nothing happens with the second half of the input pair.
3282          *
3283          * Case 2 is handled by setting the second input to pair=3, the
3284          * first output to pair=3, and the pair_index'es to match.
3285          */
3286         if (saw_alias_pair) {
3287             for (int i = nb_oargs; i < nb_args; i++) {
3288                 int o, o2, i2;
3289 
3290                 /*
3291                  * Since [0-9pm] must be alone in the constraint string,
3292                  * the only way they can both be set is if the pair comes
3293                  * from the output alias.
3294                  */
3295                 if (!args_ct[i].ialias) {
3296                     continue;
3297                 }
3298                 switch (args_ct[i].pair) {
3299                 case 0:
3300                     break;
3301                 case 1:
3302                     o = args_ct[i].alias_index;
3303                     o2 = args_ct[o].pair_index;
3304                     tcg_debug_assert(args_ct[o].pair == 1);
3305                     tcg_debug_assert(args_ct[o2].pair == 2);
3306                     if (args_ct[o2].oalias) {
3307                         /* Case 1a */
3308                         i2 = args_ct[o2].alias_index;
3309                         tcg_debug_assert(args_ct[i2].pair == 2);
3310                         args_ct[i2].pair_index = i;
3311                         args_ct[i].pair_index = i2;
3312                     } else {
3313                         /* Case 1b */
3314                         args_ct[i].pair_index = i;
3315                     }
3316                     break;
3317                 case 2:
3318                     o = args_ct[i].alias_index;
3319                     o2 = args_ct[o].pair_index;
3320                     tcg_debug_assert(args_ct[o].pair == 2);
3321                     tcg_debug_assert(args_ct[o2].pair == 1);
3322                     if (args_ct[o2].oalias) {
3323                         /* Case 1a */
3324                         i2 = args_ct[o2].alias_index;
3325                         tcg_debug_assert(args_ct[i2].pair == 1);
3326                         args_ct[i2].pair_index = i;
3327                         args_ct[i].pair_index = i2;
3328                     } else {
3329                         /* Case 2 */
3330                         args_ct[i].pair = 3;
3331                         args_ct[o2].pair = 3;
3332                         args_ct[i].pair_index = o2;
3333                         args_ct[o2].pair_index = i;
3334                     }
3335                     break;
3336                 default:
3337                     g_assert_not_reached();
3338                 }
3339             }
3340         }
3341 
3342         /* sort the constraints (XXX: this is just an heuristic) */
3343         sort_constraints(args_ct, 0, nb_oargs);
3344         sort_constraints(args_ct, nb_oargs, nb_iargs);
3345     }
3346 }
3347 
3348 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3349 {
3350     const TCGOpDef *def = &tcg_op_defs[op->opc];
3351     TCGConstraintSetIndex con_set;
3352 
3353 #ifdef CONFIG_DEBUG_TCG
3354     assert(tcg_op_supported(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op)));
3355 #endif
3356 
3357     if (def->flags & TCG_OPF_NOT_PRESENT) {
3358         return empty_cts;
3359     }
3360 
3361     con_set = tcg_target_op_def(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op));
3362     tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets));
3363 
3364     /* The constraint arguments must match TCGOpcode arguments. */
3365     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3366     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3367 
3368     return all_cts[con_set];
3369 }
3370 
3371 static void remove_label_use(TCGOp *op, int idx)
3372 {
3373     TCGLabel *label = arg_label(op->args[idx]);
3374     TCGLabelUse *use;
3375 
3376     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3377         if (use->op == op) {
3378             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3379             return;
3380         }
3381     }
3382     g_assert_not_reached();
3383 }
3384 
3385 void tcg_op_remove(TCGContext *s, TCGOp *op)
3386 {
3387     switch (op->opc) {
3388     case INDEX_op_br:
3389         remove_label_use(op, 0);
3390         break;
3391     case INDEX_op_brcond_i32:
3392     case INDEX_op_brcond_i64:
3393         remove_label_use(op, 3);
3394         break;
3395     case INDEX_op_brcond2_i32:
3396         remove_label_use(op, 5);
3397         break;
3398     default:
3399         break;
3400     }
3401 
3402     QTAILQ_REMOVE(&s->ops, op, link);
3403     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3404     s->nb_ops--;
3405 }
3406 
3407 void tcg_remove_ops_after(TCGOp *op)
3408 {
3409     TCGContext *s = tcg_ctx;
3410 
3411     while (true) {
3412         TCGOp *last = tcg_last_op();
3413         if (last == op) {
3414             return;
3415         }
3416         tcg_op_remove(s, last);
3417     }
3418 }
3419 
3420 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3421 {
3422     TCGContext *s = tcg_ctx;
3423     TCGOp *op = NULL;
3424 
3425     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3426         QTAILQ_FOREACH(op, &s->free_ops, link) {
3427             if (nargs <= op->nargs) {
3428                 QTAILQ_REMOVE(&s->free_ops, op, link);
3429                 nargs = op->nargs;
3430                 goto found;
3431             }
3432         }
3433     }
3434 
3435     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3436     nargs = MAX(4, nargs);
3437     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3438 
3439  found:
3440     memset(op, 0, offsetof(TCGOp, link));
3441     op->opc = opc;
3442     op->nargs = nargs;
3443 
3444     /* Check for bitfield overflow. */
3445     tcg_debug_assert(op->nargs == nargs);
3446 
3447     s->nb_ops++;
3448     return op;
3449 }
3450 
3451 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3452 {
3453     TCGOp *op = tcg_op_alloc(opc, nargs);
3454 
3455     if (tcg_ctx->emit_before_op) {
3456         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3457     } else {
3458         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3459     }
3460     return op;
3461 }
3462 
3463 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3464                             TCGOpcode opc, unsigned nargs)
3465 {
3466     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3467 
3468     TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
3469     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3470     return new_op;
3471 }
3472 
3473 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3474                            TCGOpcode opc, unsigned nargs)
3475 {
3476     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3477 
3478     TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op);
3479     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3480     return new_op;
3481 }
3482 
3483 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3484 {
3485     TCGLabelUse *u;
3486 
3487     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3488         TCGOp *op = u->op;
3489         switch (op->opc) {
3490         case INDEX_op_br:
3491             op->args[0] = label_arg(to);
3492             break;
3493         case INDEX_op_brcond_i32:
3494         case INDEX_op_brcond_i64:
3495             op->args[3] = label_arg(to);
3496             break;
3497         case INDEX_op_brcond2_i32:
3498             op->args[5] = label_arg(to);
3499             break;
3500         default:
3501             g_assert_not_reached();
3502         }
3503     }
3504 
3505     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3506 }
3507 
3508 /* Reachable analysis : remove unreachable code.  */
3509 static void __attribute__((noinline))
3510 reachable_code_pass(TCGContext *s)
3511 {
3512     TCGOp *op, *op_next, *op_prev;
3513     bool dead = false;
3514 
3515     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3516         bool remove = dead;
3517         TCGLabel *label;
3518 
3519         switch (op->opc) {
3520         case INDEX_op_set_label:
3521             label = arg_label(op->args[0]);
3522 
3523             /*
3524              * Note that the first op in the TB is always a load,
3525              * so there is always something before a label.
3526              */
3527             op_prev = QTAILQ_PREV(op, link);
3528 
3529             /*
3530              * If we find two sequential labels, move all branches to
3531              * reference the second label and remove the first label.
3532              * Do this before branch to next optimization, so that the
3533              * middle label is out of the way.
3534              */
3535             if (op_prev->opc == INDEX_op_set_label) {
3536                 move_label_uses(label, arg_label(op_prev->args[0]));
3537                 tcg_op_remove(s, op_prev);
3538                 op_prev = QTAILQ_PREV(op, link);
3539             }
3540 
3541             /*
3542              * Optimization can fold conditional branches to unconditional.
3543              * If we find a label which is preceded by an unconditional
3544              * branch to next, remove the branch.  We couldn't do this when
3545              * processing the branch because any dead code between the branch
3546              * and label had not yet been removed.
3547              */
3548             if (op_prev->opc == INDEX_op_br &&
3549                 label == arg_label(op_prev->args[0])) {
3550                 tcg_op_remove(s, op_prev);
3551                 /* Fall through means insns become live again.  */
3552                 dead = false;
3553             }
3554 
3555             if (QSIMPLEQ_EMPTY(&label->branches)) {
3556                 /*
3557                  * While there is an occasional backward branch, virtually
3558                  * all branches generated by the translators are forward.
3559                  * Which means that generally we will have already removed
3560                  * all references to the label that will be, and there is
3561                  * little to be gained by iterating.
3562                  */
3563                 remove = true;
3564             } else {
3565                 /* Once we see a label, insns become live again.  */
3566                 dead = false;
3567                 remove = false;
3568             }
3569             break;
3570 
3571         case INDEX_op_br:
3572         case INDEX_op_exit_tb:
3573         case INDEX_op_goto_ptr:
3574             /* Unconditional branches; everything following is dead.  */
3575             dead = true;
3576             break;
3577 
3578         case INDEX_op_call:
3579             /* Notice noreturn helper calls, raising exceptions.  */
3580             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3581                 dead = true;
3582             }
3583             break;
3584 
3585         case INDEX_op_insn_start:
3586             /* Never remove -- we need to keep these for unwind.  */
3587             remove = false;
3588             break;
3589 
3590         default:
3591             break;
3592         }
3593 
3594         if (remove) {
3595             tcg_op_remove(s, op);
3596         }
3597     }
3598 }
3599 
3600 #define TS_DEAD  1
3601 #define TS_MEM   2
3602 
3603 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3604 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3605 
3606 /* For liveness_pass_1, the register preferences for a given temp.  */
3607 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3608 {
3609     return ts->state_ptr;
3610 }
3611 
3612 /* For liveness_pass_1, reset the preferences for a given temp to the
3613  * maximal regset for its type.
3614  */
3615 static inline void la_reset_pref(TCGTemp *ts)
3616 {
3617     *la_temp_pref(ts)
3618         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3619 }
3620 
3621 /* liveness analysis: end of function: all temps are dead, and globals
3622    should be in memory. */
3623 static void la_func_end(TCGContext *s, int ng, int nt)
3624 {
3625     int i;
3626 
3627     for (i = 0; i < ng; ++i) {
3628         s->temps[i].state = TS_DEAD | TS_MEM;
3629         la_reset_pref(&s->temps[i]);
3630     }
3631     for (i = ng; i < nt; ++i) {
3632         s->temps[i].state = TS_DEAD;
3633         la_reset_pref(&s->temps[i]);
3634     }
3635 }
3636 
3637 /* liveness analysis: end of basic block: all temps are dead, globals
3638    and local temps should be in memory. */
3639 static void la_bb_end(TCGContext *s, int ng, int nt)
3640 {
3641     int i;
3642 
3643     for (i = 0; i < nt; ++i) {
3644         TCGTemp *ts = &s->temps[i];
3645         int state;
3646 
3647         switch (ts->kind) {
3648         case TEMP_FIXED:
3649         case TEMP_GLOBAL:
3650         case TEMP_TB:
3651             state = TS_DEAD | TS_MEM;
3652             break;
3653         case TEMP_EBB:
3654         case TEMP_CONST:
3655             state = TS_DEAD;
3656             break;
3657         default:
3658             g_assert_not_reached();
3659         }
3660         ts->state = state;
3661         la_reset_pref(ts);
3662     }
3663 }
3664 
3665 /* liveness analysis: sync globals back to memory.  */
3666 static void la_global_sync(TCGContext *s, int ng)
3667 {
3668     int i;
3669 
3670     for (i = 0; i < ng; ++i) {
3671         int state = s->temps[i].state;
3672         s->temps[i].state = state | TS_MEM;
3673         if (state == TS_DEAD) {
3674             /* If the global was previously dead, reset prefs.  */
3675             la_reset_pref(&s->temps[i]);
3676         }
3677     }
3678 }
3679 
3680 /*
3681  * liveness analysis: conditional branch: all temps are dead unless
3682  * explicitly live-across-conditional-branch, globals and local temps
3683  * should be synced.
3684  */
3685 static void la_bb_sync(TCGContext *s, int ng, int nt)
3686 {
3687     la_global_sync(s, ng);
3688 
3689     for (int i = ng; i < nt; ++i) {
3690         TCGTemp *ts = &s->temps[i];
3691         int state;
3692 
3693         switch (ts->kind) {
3694         case TEMP_TB:
3695             state = ts->state;
3696             ts->state = state | TS_MEM;
3697             if (state != TS_DEAD) {
3698                 continue;
3699             }
3700             break;
3701         case TEMP_EBB:
3702         case TEMP_CONST:
3703             continue;
3704         default:
3705             g_assert_not_reached();
3706         }
3707         la_reset_pref(&s->temps[i]);
3708     }
3709 }
3710 
3711 /* liveness analysis: sync globals back to memory and kill.  */
3712 static void la_global_kill(TCGContext *s, int ng)
3713 {
3714     int i;
3715 
3716     for (i = 0; i < ng; i++) {
3717         s->temps[i].state = TS_DEAD | TS_MEM;
3718         la_reset_pref(&s->temps[i]);
3719     }
3720 }
3721 
3722 /* liveness analysis: note live globals crossing calls.  */
3723 static void la_cross_call(TCGContext *s, int nt)
3724 {
3725     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3726     int i;
3727 
3728     for (i = 0; i < nt; i++) {
3729         TCGTemp *ts = &s->temps[i];
3730         if (!(ts->state & TS_DEAD)) {
3731             TCGRegSet *pset = la_temp_pref(ts);
3732             TCGRegSet set = *pset;
3733 
3734             set &= mask;
3735             /* If the combination is not possible, restart.  */
3736             if (set == 0) {
3737                 set = tcg_target_available_regs[ts->type] & mask;
3738             }
3739             *pset = set;
3740         }
3741     }
3742 }
3743 
3744 /*
3745  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3746  * to TEMP_EBB, if possible.
3747  */
3748 static void __attribute__((noinline))
3749 liveness_pass_0(TCGContext *s)
3750 {
3751     void * const multiple_ebb = (void *)(uintptr_t)-1;
3752     int nb_temps = s->nb_temps;
3753     TCGOp *op, *ebb;
3754 
3755     for (int i = s->nb_globals; i < nb_temps; ++i) {
3756         s->temps[i].state_ptr = NULL;
3757     }
3758 
3759     /*
3760      * Represent each EBB by the op at which it begins.  In the case of
3761      * the first EBB, this is the first op, otherwise it is a label.
3762      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3763      * within a single EBB, else MULTIPLE_EBB.
3764      */
3765     ebb = QTAILQ_FIRST(&s->ops);
3766     QTAILQ_FOREACH(op, &s->ops, link) {
3767         const TCGOpDef *def;
3768         int nb_oargs, nb_iargs;
3769 
3770         switch (op->opc) {
3771         case INDEX_op_set_label:
3772             ebb = op;
3773             continue;
3774         case INDEX_op_discard:
3775             continue;
3776         case INDEX_op_call:
3777             nb_oargs = TCGOP_CALLO(op);
3778             nb_iargs = TCGOP_CALLI(op);
3779             break;
3780         default:
3781             def = &tcg_op_defs[op->opc];
3782             nb_oargs = def->nb_oargs;
3783             nb_iargs = def->nb_iargs;
3784             break;
3785         }
3786 
3787         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3788             TCGTemp *ts = arg_temp(op->args[i]);
3789 
3790             if (ts->kind != TEMP_TB) {
3791                 continue;
3792             }
3793             if (ts->state_ptr == NULL) {
3794                 ts->state_ptr = ebb;
3795             } else if (ts->state_ptr != ebb) {
3796                 ts->state_ptr = multiple_ebb;
3797             }
3798         }
3799     }
3800 
3801     /*
3802      * For TEMP_TB that turned out not to be used beyond one EBB,
3803      * reduce the liveness to TEMP_EBB.
3804      */
3805     for (int i = s->nb_globals; i < nb_temps; ++i) {
3806         TCGTemp *ts = &s->temps[i];
3807         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3808             ts->kind = TEMP_EBB;
3809         }
3810     }
3811 }
3812 
3813 /* Liveness analysis : update the opc_arg_life array to tell if a
3814    given input arguments is dead. Instructions updating dead
3815    temporaries are removed. */
3816 static void __attribute__((noinline))
3817 liveness_pass_1(TCGContext *s)
3818 {
3819     int nb_globals = s->nb_globals;
3820     int nb_temps = s->nb_temps;
3821     TCGOp *op, *op_prev;
3822     TCGRegSet *prefs;
3823     int i;
3824 
3825     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3826     for (i = 0; i < nb_temps; ++i) {
3827         s->temps[i].state_ptr = prefs + i;
3828     }
3829 
3830     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3831     la_func_end(s, nb_globals, nb_temps);
3832 
3833     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3834         int nb_iargs, nb_oargs;
3835         TCGOpcode opc_new, opc_new2;
3836         bool have_opc_new2;
3837         TCGLifeData arg_life = 0;
3838         TCGTemp *ts;
3839         TCGOpcode opc = op->opc;
3840         const TCGOpDef *def = &tcg_op_defs[opc];
3841         const TCGArgConstraint *args_ct;
3842 
3843         switch (opc) {
3844         case INDEX_op_call:
3845             {
3846                 const TCGHelperInfo *info = tcg_call_info(op);
3847                 int call_flags = tcg_call_flags(op);
3848 
3849                 nb_oargs = TCGOP_CALLO(op);
3850                 nb_iargs = TCGOP_CALLI(op);
3851 
3852                 /* pure functions can be removed if their result is unused */
3853                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3854                     for (i = 0; i < nb_oargs; i++) {
3855                         ts = arg_temp(op->args[i]);
3856                         if (ts->state != TS_DEAD) {
3857                             goto do_not_remove_call;
3858                         }
3859                     }
3860                     goto do_remove;
3861                 }
3862             do_not_remove_call:
3863 
3864                 /* Output args are dead.  */
3865                 for (i = 0; i < nb_oargs; i++) {
3866                     ts = arg_temp(op->args[i]);
3867                     if (ts->state & TS_DEAD) {
3868                         arg_life |= DEAD_ARG << i;
3869                     }
3870                     if (ts->state & TS_MEM) {
3871                         arg_life |= SYNC_ARG << i;
3872                     }
3873                     ts->state = TS_DEAD;
3874                     la_reset_pref(ts);
3875                 }
3876 
3877                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3878                 memset(op->output_pref, 0, sizeof(op->output_pref));
3879 
3880                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3881                                     TCG_CALL_NO_READ_GLOBALS))) {
3882                     la_global_kill(s, nb_globals);
3883                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3884                     la_global_sync(s, nb_globals);
3885                 }
3886 
3887                 /* Record arguments that die in this helper.  */
3888                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3889                     ts = arg_temp(op->args[i]);
3890                     if (ts->state & TS_DEAD) {
3891                         arg_life |= DEAD_ARG << i;
3892                     }
3893                 }
3894 
3895                 /* For all live registers, remove call-clobbered prefs.  */
3896                 la_cross_call(s, nb_temps);
3897 
3898                 /*
3899                  * Input arguments are live for preceding opcodes.
3900                  *
3901                  * For those arguments that die, and will be allocated in
3902                  * registers, clear the register set for that arg, to be
3903                  * filled in below.  For args that will be on the stack,
3904                  * reset to any available reg.  Process arguments in reverse
3905                  * order so that if a temp is used more than once, the stack
3906                  * reset to max happens before the register reset to 0.
3907                  */
3908                 for (i = nb_iargs - 1; i >= 0; i--) {
3909                     const TCGCallArgumentLoc *loc = &info->in[i];
3910                     ts = arg_temp(op->args[nb_oargs + i]);
3911 
3912                     if (ts->state & TS_DEAD) {
3913                         switch (loc->kind) {
3914                         case TCG_CALL_ARG_NORMAL:
3915                         case TCG_CALL_ARG_EXTEND_U:
3916                         case TCG_CALL_ARG_EXTEND_S:
3917                             if (arg_slot_reg_p(loc->arg_slot)) {
3918                                 *la_temp_pref(ts) = 0;
3919                                 break;
3920                             }
3921                             /* fall through */
3922                         default:
3923                             *la_temp_pref(ts) =
3924                                 tcg_target_available_regs[ts->type];
3925                             break;
3926                         }
3927                         ts->state &= ~TS_DEAD;
3928                     }
3929                 }
3930 
3931                 /*
3932                  * For each input argument, add its input register to prefs.
3933                  * If a temp is used once, this produces a single set bit;
3934                  * if a temp is used multiple times, this produces a set.
3935                  */
3936                 for (i = 0; i < nb_iargs; i++) {
3937                     const TCGCallArgumentLoc *loc = &info->in[i];
3938                     ts = arg_temp(op->args[nb_oargs + i]);
3939 
3940                     switch (loc->kind) {
3941                     case TCG_CALL_ARG_NORMAL:
3942                     case TCG_CALL_ARG_EXTEND_U:
3943                     case TCG_CALL_ARG_EXTEND_S:
3944                         if (arg_slot_reg_p(loc->arg_slot)) {
3945                             tcg_regset_set_reg(*la_temp_pref(ts),
3946                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3947                         }
3948                         break;
3949                     default:
3950                         break;
3951                     }
3952                 }
3953             }
3954             break;
3955         case INDEX_op_insn_start:
3956             break;
3957         case INDEX_op_discard:
3958             /* mark the temporary as dead */
3959             ts = arg_temp(op->args[0]);
3960             ts->state = TS_DEAD;
3961             la_reset_pref(ts);
3962             break;
3963 
3964         case INDEX_op_add2_i32:
3965             opc_new = INDEX_op_add_i32;
3966             goto do_addsub2;
3967         case INDEX_op_sub2_i32:
3968             opc_new = INDEX_op_sub_i32;
3969             goto do_addsub2;
3970         case INDEX_op_add2_i64:
3971             opc_new = INDEX_op_add_i64;
3972             goto do_addsub2;
3973         case INDEX_op_sub2_i64:
3974             opc_new = INDEX_op_sub_i64;
3975         do_addsub2:
3976             nb_iargs = 4;
3977             nb_oargs = 2;
3978             /* Test if the high part of the operation is dead, but not
3979                the low part.  The result can be optimized to a simple
3980                add or sub.  This happens often for x86_64 guest when the
3981                cpu mode is set to 32 bit.  */
3982             if (arg_temp(op->args[1])->state == TS_DEAD) {
3983                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3984                     goto do_remove;
3985                 }
3986                 /* Replace the opcode and adjust the args in place,
3987                    leaving 3 unused args at the end.  */
3988                 op->opc = opc = opc_new;
3989                 op->args[1] = op->args[2];
3990                 op->args[2] = op->args[4];
3991                 /* Fall through and mark the single-word operation live.  */
3992                 nb_iargs = 2;
3993                 nb_oargs = 1;
3994             }
3995             goto do_not_remove;
3996 
3997         case INDEX_op_mulu2_i32:
3998             opc_new = INDEX_op_mul_i32;
3999             opc_new2 = INDEX_op_muluh_i32;
4000             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
4001             goto do_mul2;
4002         case INDEX_op_muls2_i32:
4003             opc_new = INDEX_op_mul_i32;
4004             opc_new2 = INDEX_op_mulsh_i32;
4005             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
4006             goto do_mul2;
4007         case INDEX_op_mulu2_i64:
4008             opc_new = INDEX_op_mul_i64;
4009             opc_new2 = INDEX_op_muluh_i64;
4010             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
4011             goto do_mul2;
4012         case INDEX_op_muls2_i64:
4013             opc_new = INDEX_op_mul_i64;
4014             opc_new2 = INDEX_op_mulsh_i64;
4015             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4016             goto do_mul2;
4017         do_mul2:
4018             nb_iargs = 2;
4019             nb_oargs = 2;
4020             if (arg_temp(op->args[1])->state == TS_DEAD) {
4021                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4022                     /* Both parts of the operation are dead.  */
4023                     goto do_remove;
4024                 }
4025                 /* The high part of the operation is dead; generate the low. */
4026                 op->opc = opc = opc_new;
4027                 op->args[1] = op->args[2];
4028                 op->args[2] = op->args[3];
4029             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4030                 /* The low part of the operation is dead; generate the high. */
4031                 op->opc = opc = opc_new2;
4032                 op->args[0] = op->args[1];
4033                 op->args[1] = op->args[2];
4034                 op->args[2] = op->args[3];
4035             } else {
4036                 goto do_not_remove;
4037             }
4038             /* Mark the single-word operation live.  */
4039             nb_oargs = 1;
4040             goto do_not_remove;
4041 
4042         default:
4043             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4044             nb_iargs = def->nb_iargs;
4045             nb_oargs = def->nb_oargs;
4046 
4047             /* Test if the operation can be removed because all
4048                its outputs are dead. We assume that nb_oargs == 0
4049                implies side effects */
4050             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4051                 for (i = 0; i < nb_oargs; i++) {
4052                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4053                         goto do_not_remove;
4054                     }
4055                 }
4056                 goto do_remove;
4057             }
4058             goto do_not_remove;
4059 
4060         do_remove:
4061             tcg_op_remove(s, op);
4062             break;
4063 
4064         do_not_remove:
4065             for (i = 0; i < nb_oargs; i++) {
4066                 ts = arg_temp(op->args[i]);
4067 
4068                 /* Remember the preference of the uses that followed.  */
4069                 if (i < ARRAY_SIZE(op->output_pref)) {
4070                     op->output_pref[i] = *la_temp_pref(ts);
4071                 }
4072 
4073                 /* Output args are dead.  */
4074                 if (ts->state & TS_DEAD) {
4075                     arg_life |= DEAD_ARG << i;
4076                 }
4077                 if (ts->state & TS_MEM) {
4078                     arg_life |= SYNC_ARG << i;
4079                 }
4080                 ts->state = TS_DEAD;
4081                 la_reset_pref(ts);
4082             }
4083 
4084             /* If end of basic block, update.  */
4085             if (def->flags & TCG_OPF_BB_EXIT) {
4086                 la_func_end(s, nb_globals, nb_temps);
4087             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4088                 la_bb_sync(s, nb_globals, nb_temps);
4089             } else if (def->flags & TCG_OPF_BB_END) {
4090                 la_bb_end(s, nb_globals, nb_temps);
4091             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4092                 la_global_sync(s, nb_globals);
4093                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4094                     la_cross_call(s, nb_temps);
4095                 }
4096             }
4097 
4098             /* Record arguments that die in this opcode.  */
4099             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4100                 ts = arg_temp(op->args[i]);
4101                 if (ts->state & TS_DEAD) {
4102                     arg_life |= DEAD_ARG << i;
4103                 }
4104             }
4105 
4106             /* Input arguments are live for preceding opcodes.  */
4107             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4108                 ts = arg_temp(op->args[i]);
4109                 if (ts->state & TS_DEAD) {
4110                     /* For operands that were dead, initially allow
4111                        all regs for the type.  */
4112                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4113                     ts->state &= ~TS_DEAD;
4114                 }
4115             }
4116 
4117             /* Incorporate constraints for this operand.  */
4118             switch (opc) {
4119             case INDEX_op_mov_i32:
4120             case INDEX_op_mov_i64:
4121                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4122                    have proper constraints.  That said, special case
4123                    moves to propagate preferences backward.  */
4124                 if (IS_DEAD_ARG(1)) {
4125                     *la_temp_pref(arg_temp(op->args[0]))
4126                         = *la_temp_pref(arg_temp(op->args[1]));
4127                 }
4128                 break;
4129 
4130             default:
4131                 args_ct = opcode_args_ct(op);
4132                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4133                     const TCGArgConstraint *ct = &args_ct[i];
4134                     TCGRegSet set, *pset;
4135 
4136                     ts = arg_temp(op->args[i]);
4137                     pset = la_temp_pref(ts);
4138                     set = *pset;
4139 
4140                     set &= ct->regs;
4141                     if (ct->ialias) {
4142                         set &= output_pref(op, ct->alias_index);
4143                     }
4144                     /* If the combination is not possible, restart.  */
4145                     if (set == 0) {
4146                         set = ct->regs;
4147                     }
4148                     *pset = set;
4149                 }
4150                 break;
4151             }
4152             break;
4153         }
4154         op->life = arg_life;
4155     }
4156 }
4157 
4158 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4159 static bool __attribute__((noinline))
4160 liveness_pass_2(TCGContext *s)
4161 {
4162     int nb_globals = s->nb_globals;
4163     int nb_temps, i;
4164     bool changes = false;
4165     TCGOp *op, *op_next;
4166 
4167     /* Create a temporary for each indirect global.  */
4168     for (i = 0; i < nb_globals; ++i) {
4169         TCGTemp *its = &s->temps[i];
4170         if (its->indirect_reg) {
4171             TCGTemp *dts = tcg_temp_alloc(s);
4172             dts->type = its->type;
4173             dts->base_type = its->base_type;
4174             dts->temp_subindex = its->temp_subindex;
4175             dts->kind = TEMP_EBB;
4176             its->state_ptr = dts;
4177         } else {
4178             its->state_ptr = NULL;
4179         }
4180         /* All globals begin dead.  */
4181         its->state = TS_DEAD;
4182     }
4183     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4184         TCGTemp *its = &s->temps[i];
4185         its->state_ptr = NULL;
4186         its->state = TS_DEAD;
4187     }
4188 
4189     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4190         TCGOpcode opc = op->opc;
4191         const TCGOpDef *def = &tcg_op_defs[opc];
4192         TCGLifeData arg_life = op->life;
4193         int nb_iargs, nb_oargs, call_flags;
4194         TCGTemp *arg_ts, *dir_ts;
4195 
4196         if (opc == INDEX_op_call) {
4197             nb_oargs = TCGOP_CALLO(op);
4198             nb_iargs = TCGOP_CALLI(op);
4199             call_flags = tcg_call_flags(op);
4200         } else {
4201             nb_iargs = def->nb_iargs;
4202             nb_oargs = def->nb_oargs;
4203 
4204             /* Set flags similar to how calls require.  */
4205             if (def->flags & TCG_OPF_COND_BRANCH) {
4206                 /* Like reading globals: sync_globals */
4207                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4208             } else if (def->flags & TCG_OPF_BB_END) {
4209                 /* Like writing globals: save_globals */
4210                 call_flags = 0;
4211             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4212                 /* Like reading globals: sync_globals */
4213                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4214             } else {
4215                 /* No effect on globals.  */
4216                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4217                               TCG_CALL_NO_WRITE_GLOBALS);
4218             }
4219         }
4220 
4221         /* Make sure that input arguments are available.  */
4222         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4223             arg_ts = arg_temp(op->args[i]);
4224             dir_ts = arg_ts->state_ptr;
4225             if (dir_ts && arg_ts->state == TS_DEAD) {
4226                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4227                                   ? INDEX_op_ld_i32
4228                                   : INDEX_op_ld_i64);
4229                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
4230 
4231                 lop->args[0] = temp_arg(dir_ts);
4232                 lop->args[1] = temp_arg(arg_ts->mem_base);
4233                 lop->args[2] = arg_ts->mem_offset;
4234 
4235                 /* Loaded, but synced with memory.  */
4236                 arg_ts->state = TS_MEM;
4237             }
4238         }
4239 
4240         /* Perform input replacement, and mark inputs that became dead.
4241            No action is required except keeping temp_state up to date
4242            so that we reload when needed.  */
4243         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4244             arg_ts = arg_temp(op->args[i]);
4245             dir_ts = arg_ts->state_ptr;
4246             if (dir_ts) {
4247                 op->args[i] = temp_arg(dir_ts);
4248                 changes = true;
4249                 if (IS_DEAD_ARG(i)) {
4250                     arg_ts->state = TS_DEAD;
4251                 }
4252             }
4253         }
4254 
4255         /* Liveness analysis should ensure that the following are
4256            all correct, for call sites and basic block end points.  */
4257         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4258             /* Nothing to do */
4259         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4260             for (i = 0; i < nb_globals; ++i) {
4261                 /* Liveness should see that globals are synced back,
4262                    that is, either TS_DEAD or TS_MEM.  */
4263                 arg_ts = &s->temps[i];
4264                 tcg_debug_assert(arg_ts->state_ptr == 0
4265                                  || arg_ts->state != 0);
4266             }
4267         } else {
4268             for (i = 0; i < nb_globals; ++i) {
4269                 /* Liveness should see that globals are saved back,
4270                    that is, TS_DEAD, waiting to be reloaded.  */
4271                 arg_ts = &s->temps[i];
4272                 tcg_debug_assert(arg_ts->state_ptr == 0
4273                                  || arg_ts->state == TS_DEAD);
4274             }
4275         }
4276 
4277         /* Outputs become available.  */
4278         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4279             arg_ts = arg_temp(op->args[0]);
4280             dir_ts = arg_ts->state_ptr;
4281             if (dir_ts) {
4282                 op->args[0] = temp_arg(dir_ts);
4283                 changes = true;
4284 
4285                 /* The output is now live and modified.  */
4286                 arg_ts->state = 0;
4287 
4288                 if (NEED_SYNC_ARG(0)) {
4289                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4290                                       ? INDEX_op_st_i32
4291                                       : INDEX_op_st_i64);
4292                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4293                     TCGTemp *out_ts = dir_ts;
4294 
4295                     if (IS_DEAD_ARG(0)) {
4296                         out_ts = arg_temp(op->args[1]);
4297                         arg_ts->state = TS_DEAD;
4298                         tcg_op_remove(s, op);
4299                     } else {
4300                         arg_ts->state = TS_MEM;
4301                     }
4302 
4303                     sop->args[0] = temp_arg(out_ts);
4304                     sop->args[1] = temp_arg(arg_ts->mem_base);
4305                     sop->args[2] = arg_ts->mem_offset;
4306                 } else {
4307                     tcg_debug_assert(!IS_DEAD_ARG(0));
4308                 }
4309             }
4310         } else {
4311             for (i = 0; i < nb_oargs; i++) {
4312                 arg_ts = arg_temp(op->args[i]);
4313                 dir_ts = arg_ts->state_ptr;
4314                 if (!dir_ts) {
4315                     continue;
4316                 }
4317                 op->args[i] = temp_arg(dir_ts);
4318                 changes = true;
4319 
4320                 /* The output is now live and modified.  */
4321                 arg_ts->state = 0;
4322 
4323                 /* Sync outputs upon their last write.  */
4324                 if (NEED_SYNC_ARG(i)) {
4325                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4326                                       ? INDEX_op_st_i32
4327                                       : INDEX_op_st_i64);
4328                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4329 
4330                     sop->args[0] = temp_arg(dir_ts);
4331                     sop->args[1] = temp_arg(arg_ts->mem_base);
4332                     sop->args[2] = arg_ts->mem_offset;
4333 
4334                     arg_ts->state = TS_MEM;
4335                 }
4336                 /* Drop outputs that are dead.  */
4337                 if (IS_DEAD_ARG(i)) {
4338                     arg_ts->state = TS_DEAD;
4339                 }
4340             }
4341         }
4342     }
4343 
4344     return changes;
4345 }
4346 
4347 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4348 {
4349     intptr_t off;
4350     int size, align;
4351 
4352     /* When allocating an object, look at the full type. */
4353     size = tcg_type_size(ts->base_type);
4354     switch (ts->base_type) {
4355     case TCG_TYPE_I32:
4356         align = 4;
4357         break;
4358     case TCG_TYPE_I64:
4359     case TCG_TYPE_V64:
4360         align = 8;
4361         break;
4362     case TCG_TYPE_I128:
4363     case TCG_TYPE_V128:
4364     case TCG_TYPE_V256:
4365         /*
4366          * Note that we do not require aligned storage for V256,
4367          * and that we provide alignment for I128 to match V128,
4368          * even if that's above what the host ABI requires.
4369          */
4370         align = 16;
4371         break;
4372     default:
4373         g_assert_not_reached();
4374     }
4375 
4376     /*
4377      * Assume the stack is sufficiently aligned.
4378      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4379      * and do not require 16 byte vector alignment.  This seems slightly
4380      * easier than fully parameterizing the above switch statement.
4381      */
4382     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4383     off = ROUND_UP(s->current_frame_offset, align);
4384 
4385     /* If we've exhausted the stack frame, restart with a smaller TB. */
4386     if (off + size > s->frame_end) {
4387         tcg_raise_tb_overflow(s);
4388     }
4389     s->current_frame_offset = off + size;
4390 #if defined(__sparc__)
4391     off += TCG_TARGET_STACK_BIAS;
4392 #endif
4393 
4394     /* If the object was subdivided, assign memory to all the parts. */
4395     if (ts->base_type != ts->type) {
4396         int part_size = tcg_type_size(ts->type);
4397         int part_count = size / part_size;
4398 
4399         /*
4400          * Each part is allocated sequentially in tcg_temp_new_internal.
4401          * Jump back to the first part by subtracting the current index.
4402          */
4403         ts -= ts->temp_subindex;
4404         for (int i = 0; i < part_count; ++i) {
4405             ts[i].mem_offset = off + i * part_size;
4406             ts[i].mem_base = s->frame_temp;
4407             ts[i].mem_allocated = 1;
4408         }
4409     } else {
4410         ts->mem_offset = off;
4411         ts->mem_base = s->frame_temp;
4412         ts->mem_allocated = 1;
4413     }
4414 }
4415 
4416 /* Assign @reg to @ts, and update reg_to_temp[]. */
4417 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4418 {
4419     if (ts->val_type == TEMP_VAL_REG) {
4420         TCGReg old = ts->reg;
4421         tcg_debug_assert(s->reg_to_temp[old] == ts);
4422         if (old == reg) {
4423             return;
4424         }
4425         s->reg_to_temp[old] = NULL;
4426     }
4427     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4428     s->reg_to_temp[reg] = ts;
4429     ts->val_type = TEMP_VAL_REG;
4430     ts->reg = reg;
4431 }
4432 
4433 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4434 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4435 {
4436     tcg_debug_assert(type != TEMP_VAL_REG);
4437     if (ts->val_type == TEMP_VAL_REG) {
4438         TCGReg reg = ts->reg;
4439         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4440         s->reg_to_temp[reg] = NULL;
4441     }
4442     ts->val_type = type;
4443 }
4444 
4445 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4446 
4447 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4448    mark it free; otherwise mark it dead.  */
4449 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4450 {
4451     TCGTempVal new_type;
4452 
4453     switch (ts->kind) {
4454     case TEMP_FIXED:
4455         return;
4456     case TEMP_GLOBAL:
4457     case TEMP_TB:
4458         new_type = TEMP_VAL_MEM;
4459         break;
4460     case TEMP_EBB:
4461         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4462         break;
4463     case TEMP_CONST:
4464         new_type = TEMP_VAL_CONST;
4465         break;
4466     default:
4467         g_assert_not_reached();
4468     }
4469     set_temp_val_nonreg(s, ts, new_type);
4470 }
4471 
4472 /* Mark a temporary as dead.  */
4473 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4474 {
4475     temp_free_or_dead(s, ts, 1);
4476 }
4477 
4478 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4479    registers needs to be allocated to store a constant.  If 'free_or_dead'
4480    is non-zero, subsequently release the temporary; if it is positive, the
4481    temp is dead; if it is negative, the temp is free.  */
4482 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4483                       TCGRegSet preferred_regs, int free_or_dead)
4484 {
4485     if (!temp_readonly(ts) && !ts->mem_coherent) {
4486         if (!ts->mem_allocated) {
4487             temp_allocate_frame(s, ts);
4488         }
4489         switch (ts->val_type) {
4490         case TEMP_VAL_CONST:
4491             /* If we're going to free the temp immediately, then we won't
4492                require it later in a register, so attempt to store the
4493                constant to memory directly.  */
4494             if (free_or_dead
4495                 && tcg_out_sti(s, ts->type, ts->val,
4496                                ts->mem_base->reg, ts->mem_offset)) {
4497                 break;
4498             }
4499             temp_load(s, ts, tcg_target_available_regs[ts->type],
4500                       allocated_regs, preferred_regs);
4501             /* fallthrough */
4502 
4503         case TEMP_VAL_REG:
4504             tcg_out_st(s, ts->type, ts->reg,
4505                        ts->mem_base->reg, ts->mem_offset);
4506             break;
4507 
4508         case TEMP_VAL_MEM:
4509             break;
4510 
4511         case TEMP_VAL_DEAD:
4512         default:
4513             g_assert_not_reached();
4514         }
4515         ts->mem_coherent = 1;
4516     }
4517     if (free_or_dead) {
4518         temp_free_or_dead(s, ts, free_or_dead);
4519     }
4520 }
4521 
4522 /* free register 'reg' by spilling the corresponding temporary if necessary */
4523 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4524 {
4525     TCGTemp *ts = s->reg_to_temp[reg];
4526     if (ts != NULL) {
4527         temp_sync(s, ts, allocated_regs, 0, -1);
4528     }
4529 }
4530 
4531 /**
4532  * tcg_reg_alloc:
4533  * @required_regs: Set of registers in which we must allocate.
4534  * @allocated_regs: Set of registers which must be avoided.
4535  * @preferred_regs: Set of registers we should prefer.
4536  * @rev: True if we search the registers in "indirect" order.
4537  *
4538  * The allocated register must be in @required_regs & ~@allocated_regs,
4539  * but if we can put it in @preferred_regs we may save a move later.
4540  */
4541 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4542                             TCGRegSet allocated_regs,
4543                             TCGRegSet preferred_regs, bool rev)
4544 {
4545     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4546     TCGRegSet reg_ct[2];
4547     const int *order;
4548 
4549     reg_ct[1] = required_regs & ~allocated_regs;
4550     tcg_debug_assert(reg_ct[1] != 0);
4551     reg_ct[0] = reg_ct[1] & preferred_regs;
4552 
4553     /* Skip the preferred_regs option if it cannot be satisfied,
4554        or if the preference made no difference.  */
4555     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4556 
4557     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4558 
4559     /* Try free registers, preferences first.  */
4560     for (j = f; j < 2; j++) {
4561         TCGRegSet set = reg_ct[j];
4562 
4563         if (tcg_regset_single(set)) {
4564             /* One register in the set.  */
4565             TCGReg reg = tcg_regset_first(set);
4566             if (s->reg_to_temp[reg] == NULL) {
4567                 return reg;
4568             }
4569         } else {
4570             for (i = 0; i < n; i++) {
4571                 TCGReg reg = order[i];
4572                 if (s->reg_to_temp[reg] == NULL &&
4573                     tcg_regset_test_reg(set, reg)) {
4574                     return reg;
4575                 }
4576             }
4577         }
4578     }
4579 
4580     /* We must spill something.  */
4581     for (j = f; j < 2; j++) {
4582         TCGRegSet set = reg_ct[j];
4583 
4584         if (tcg_regset_single(set)) {
4585             /* One register in the set.  */
4586             TCGReg reg = tcg_regset_first(set);
4587             tcg_reg_free(s, reg, allocated_regs);
4588             return reg;
4589         } else {
4590             for (i = 0; i < n; i++) {
4591                 TCGReg reg = order[i];
4592                 if (tcg_regset_test_reg(set, reg)) {
4593                     tcg_reg_free(s, reg, allocated_regs);
4594                     return reg;
4595                 }
4596             }
4597         }
4598     }
4599 
4600     g_assert_not_reached();
4601 }
4602 
4603 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4604                                  TCGRegSet allocated_regs,
4605                                  TCGRegSet preferred_regs, bool rev)
4606 {
4607     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4608     TCGRegSet reg_ct[2];
4609     const int *order;
4610 
4611     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4612     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4613     tcg_debug_assert(reg_ct[1] != 0);
4614     reg_ct[0] = reg_ct[1] & preferred_regs;
4615 
4616     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4617 
4618     /*
4619      * Skip the preferred_regs option if it cannot be satisfied,
4620      * or if the preference made no difference.
4621      */
4622     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4623 
4624     /*
4625      * Minimize the number of flushes by looking for 2 free registers first,
4626      * then a single flush, then two flushes.
4627      */
4628     for (fmin = 2; fmin >= 0; fmin--) {
4629         for (j = k; j < 2; j++) {
4630             TCGRegSet set = reg_ct[j];
4631 
4632             for (i = 0; i < n; i++) {
4633                 TCGReg reg = order[i];
4634 
4635                 if (tcg_regset_test_reg(set, reg)) {
4636                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4637                     if (f >= fmin) {
4638                         tcg_reg_free(s, reg, allocated_regs);
4639                         tcg_reg_free(s, reg + 1, allocated_regs);
4640                         return reg;
4641                     }
4642                 }
4643             }
4644         }
4645     }
4646     g_assert_not_reached();
4647 }
4648 
4649 /* Make sure the temporary is in a register.  If needed, allocate the register
4650    from DESIRED while avoiding ALLOCATED.  */
4651 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4652                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4653 {
4654     TCGReg reg;
4655 
4656     switch (ts->val_type) {
4657     case TEMP_VAL_REG:
4658         return;
4659     case TEMP_VAL_CONST:
4660         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4661                             preferred_regs, ts->indirect_base);
4662         if (ts->type <= TCG_TYPE_I64) {
4663             tcg_out_movi(s, ts->type, reg, ts->val);
4664         } else {
4665             uint64_t val = ts->val;
4666             MemOp vece = MO_64;
4667 
4668             /*
4669              * Find the minimal vector element that matches the constant.
4670              * The targets will, in general, have to do this search anyway,
4671              * do this generically.
4672              */
4673             if (val == dup_const(MO_8, val)) {
4674                 vece = MO_8;
4675             } else if (val == dup_const(MO_16, val)) {
4676                 vece = MO_16;
4677             } else if (val == dup_const(MO_32, val)) {
4678                 vece = MO_32;
4679             }
4680 
4681             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4682         }
4683         ts->mem_coherent = 0;
4684         break;
4685     case TEMP_VAL_MEM:
4686         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4687                             preferred_regs, ts->indirect_base);
4688         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4689         ts->mem_coherent = 1;
4690         break;
4691     case TEMP_VAL_DEAD:
4692     default:
4693         g_assert_not_reached();
4694     }
4695     set_temp_val_reg(s, ts, reg);
4696 }
4697 
4698 /* Save a temporary to memory. 'allocated_regs' is used in case a
4699    temporary registers needs to be allocated to store a constant.  */
4700 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4701 {
4702     /* The liveness analysis already ensures that globals are back
4703        in memory. Keep an tcg_debug_assert for safety. */
4704     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4705 }
4706 
4707 /* save globals to their canonical location and assume they can be
4708    modified be the following code. 'allocated_regs' is used in case a
4709    temporary registers needs to be allocated to store a constant. */
4710 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4711 {
4712     int i, n;
4713 
4714     for (i = 0, n = s->nb_globals; i < n; i++) {
4715         temp_save(s, &s->temps[i], allocated_regs);
4716     }
4717 }
4718 
4719 /* sync globals to their canonical location and assume they can be
4720    read by the following code. 'allocated_regs' is used in case a
4721    temporary registers needs to be allocated to store a constant. */
4722 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4723 {
4724     int i, n;
4725 
4726     for (i = 0, n = s->nb_globals; i < n; i++) {
4727         TCGTemp *ts = &s->temps[i];
4728         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4729                          || ts->kind == TEMP_FIXED
4730                          || ts->mem_coherent);
4731     }
4732 }
4733 
4734 /* at the end of a basic block, we assume all temporaries are dead and
4735    all globals are stored at their canonical location. */
4736 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4737 {
4738     int i;
4739 
4740     for (i = s->nb_globals; i < s->nb_temps; i++) {
4741         TCGTemp *ts = &s->temps[i];
4742 
4743         switch (ts->kind) {
4744         case TEMP_TB:
4745             temp_save(s, ts, allocated_regs);
4746             break;
4747         case TEMP_EBB:
4748             /* The liveness analysis already ensures that temps are dead.
4749                Keep an tcg_debug_assert for safety. */
4750             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4751             break;
4752         case TEMP_CONST:
4753             /* Similarly, we should have freed any allocated register. */
4754             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4755             break;
4756         default:
4757             g_assert_not_reached();
4758         }
4759     }
4760 
4761     save_globals(s, allocated_regs);
4762 }
4763 
4764 /*
4765  * At a conditional branch, we assume all temporaries are dead unless
4766  * explicitly live-across-conditional-branch; all globals and local
4767  * temps are synced to their location.
4768  */
4769 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4770 {
4771     sync_globals(s, allocated_regs);
4772 
4773     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4774         TCGTemp *ts = &s->temps[i];
4775         /*
4776          * The liveness analysis already ensures that temps are dead.
4777          * Keep tcg_debug_asserts for safety.
4778          */
4779         switch (ts->kind) {
4780         case TEMP_TB:
4781             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4782             break;
4783         case TEMP_EBB:
4784         case TEMP_CONST:
4785             break;
4786         default:
4787             g_assert_not_reached();
4788         }
4789     }
4790 }
4791 
4792 /*
4793  * Specialized code generation for INDEX_op_mov_* with a constant.
4794  */
4795 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4796                                   tcg_target_ulong val, TCGLifeData arg_life,
4797                                   TCGRegSet preferred_regs)
4798 {
4799     /* ENV should not be modified.  */
4800     tcg_debug_assert(!temp_readonly(ots));
4801 
4802     /* The movi is not explicitly generated here.  */
4803     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4804     ots->val = val;
4805     ots->mem_coherent = 0;
4806     if (NEED_SYNC_ARG(0)) {
4807         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4808     } else if (IS_DEAD_ARG(0)) {
4809         temp_dead(s, ots);
4810     }
4811 }
4812 
4813 /*
4814  * Specialized code generation for INDEX_op_mov_*.
4815  */
4816 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4817 {
4818     const TCGLifeData arg_life = op->life;
4819     TCGRegSet allocated_regs, preferred_regs;
4820     TCGTemp *ts, *ots;
4821     TCGType otype, itype;
4822     TCGReg oreg, ireg;
4823 
4824     allocated_regs = s->reserved_regs;
4825     preferred_regs = output_pref(op, 0);
4826     ots = arg_temp(op->args[0]);
4827     ts = arg_temp(op->args[1]);
4828 
4829     /* ENV should not be modified.  */
4830     tcg_debug_assert(!temp_readonly(ots));
4831 
4832     /* Note that otype != itype for no-op truncation.  */
4833     otype = ots->type;
4834     itype = ts->type;
4835 
4836     if (ts->val_type == TEMP_VAL_CONST) {
4837         /* propagate constant or generate sti */
4838         tcg_target_ulong val = ts->val;
4839         if (IS_DEAD_ARG(1)) {
4840             temp_dead(s, ts);
4841         }
4842         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4843         return;
4844     }
4845 
4846     /* If the source value is in memory we're going to be forced
4847        to have it in a register in order to perform the copy.  Copy
4848        the SOURCE value into its own register first, that way we
4849        don't have to reload SOURCE the next time it is used. */
4850     if (ts->val_type == TEMP_VAL_MEM) {
4851         temp_load(s, ts, tcg_target_available_regs[itype],
4852                   allocated_regs, preferred_regs);
4853     }
4854     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4855     ireg = ts->reg;
4856 
4857     if (IS_DEAD_ARG(0)) {
4858         /* mov to a non-saved dead register makes no sense (even with
4859            liveness analysis disabled). */
4860         tcg_debug_assert(NEED_SYNC_ARG(0));
4861         if (!ots->mem_allocated) {
4862             temp_allocate_frame(s, ots);
4863         }
4864         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4865         if (IS_DEAD_ARG(1)) {
4866             temp_dead(s, ts);
4867         }
4868         temp_dead(s, ots);
4869         return;
4870     }
4871 
4872     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4873         /*
4874          * The mov can be suppressed.  Kill input first, so that it
4875          * is unlinked from reg_to_temp, then set the output to the
4876          * reg that we saved from the input.
4877          */
4878         temp_dead(s, ts);
4879         oreg = ireg;
4880     } else {
4881         if (ots->val_type == TEMP_VAL_REG) {
4882             oreg = ots->reg;
4883         } else {
4884             /* Make sure to not spill the input register during allocation. */
4885             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4886                                  allocated_regs | ((TCGRegSet)1 << ireg),
4887                                  preferred_regs, ots->indirect_base);
4888         }
4889         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4890             /*
4891              * Cross register class move not supported.
4892              * Store the source register into the destination slot
4893              * and leave the destination temp as TEMP_VAL_MEM.
4894              */
4895             assert(!temp_readonly(ots));
4896             if (!ts->mem_allocated) {
4897                 temp_allocate_frame(s, ots);
4898             }
4899             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4900             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4901             ots->mem_coherent = 1;
4902             return;
4903         }
4904     }
4905     set_temp_val_reg(s, ots, oreg);
4906     ots->mem_coherent = 0;
4907 
4908     if (NEED_SYNC_ARG(0)) {
4909         temp_sync(s, ots, allocated_regs, 0, 0);
4910     }
4911 }
4912 
4913 /*
4914  * Specialized code generation for INDEX_op_dup_vec.
4915  */
4916 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4917 {
4918     const TCGLifeData arg_life = op->life;
4919     TCGRegSet dup_out_regs, dup_in_regs;
4920     const TCGArgConstraint *dup_args_ct;
4921     TCGTemp *its, *ots;
4922     TCGType itype, vtype;
4923     unsigned vece;
4924     int lowpart_ofs;
4925     bool ok;
4926 
4927     ots = arg_temp(op->args[0]);
4928     its = arg_temp(op->args[1]);
4929 
4930     /* ENV should not be modified.  */
4931     tcg_debug_assert(!temp_readonly(ots));
4932 
4933     itype = its->type;
4934     vece = TCGOP_VECE(op);
4935     vtype = TCGOP_TYPE(op);
4936 
4937     if (its->val_type == TEMP_VAL_CONST) {
4938         /* Propagate constant via movi -> dupi.  */
4939         tcg_target_ulong val = its->val;
4940         if (IS_DEAD_ARG(1)) {
4941             temp_dead(s, its);
4942         }
4943         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4944         return;
4945     }
4946 
4947     dup_args_ct = opcode_args_ct(op);
4948     dup_out_regs = dup_args_ct[0].regs;
4949     dup_in_regs = dup_args_ct[1].regs;
4950 
4951     /* Allocate the output register now.  */
4952     if (ots->val_type != TEMP_VAL_REG) {
4953         TCGRegSet allocated_regs = s->reserved_regs;
4954         TCGReg oreg;
4955 
4956         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4957             /* Make sure to not spill the input register. */
4958             tcg_regset_set_reg(allocated_regs, its->reg);
4959         }
4960         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4961                              output_pref(op, 0), ots->indirect_base);
4962         set_temp_val_reg(s, ots, oreg);
4963     }
4964 
4965     switch (its->val_type) {
4966     case TEMP_VAL_REG:
4967         /*
4968          * The dup constriaints must be broad, covering all possible VECE.
4969          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4970          * to fail, indicating that extra moves are required for that case.
4971          */
4972         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4973             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4974                 goto done;
4975             }
4976             /* Try again from memory or a vector input register.  */
4977         }
4978         if (!its->mem_coherent) {
4979             /*
4980              * The input register is not synced, and so an extra store
4981              * would be required to use memory.  Attempt an integer-vector
4982              * register move first.  We do not have a TCGRegSet for this.
4983              */
4984             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4985                 break;
4986             }
4987             /* Sync the temp back to its slot and load from there.  */
4988             temp_sync(s, its, s->reserved_regs, 0, 0);
4989         }
4990         /* fall through */
4991 
4992     case TEMP_VAL_MEM:
4993         lowpart_ofs = 0;
4994         if (HOST_BIG_ENDIAN) {
4995             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4996         }
4997         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4998                              its->mem_offset + lowpart_ofs)) {
4999             goto done;
5000         }
5001         /* Load the input into the destination vector register. */
5002         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5003         break;
5004 
5005     default:
5006         g_assert_not_reached();
5007     }
5008 
5009     /* We now have a vector input register, so dup must succeed. */
5010     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5011     tcg_debug_assert(ok);
5012 
5013  done:
5014     ots->mem_coherent = 0;
5015     if (IS_DEAD_ARG(1)) {
5016         temp_dead(s, its);
5017     }
5018     if (NEED_SYNC_ARG(0)) {
5019         temp_sync(s, ots, s->reserved_regs, 0, 0);
5020     }
5021     if (IS_DEAD_ARG(0)) {
5022         temp_dead(s, ots);
5023     }
5024 }
5025 
5026 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5027 {
5028     const TCGLifeData arg_life = op->life;
5029     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5030     TCGRegSet i_allocated_regs;
5031     TCGRegSet o_allocated_regs;
5032     int i, k, nb_iargs, nb_oargs;
5033     TCGReg reg;
5034     TCGArg arg;
5035     const TCGArgConstraint *args_ct;
5036     const TCGArgConstraint *arg_ct;
5037     TCGTemp *ts;
5038     TCGArg new_args[TCG_MAX_OP_ARGS];
5039     int const_args[TCG_MAX_OP_ARGS];
5040     TCGCond op_cond;
5041 
5042     nb_oargs = def->nb_oargs;
5043     nb_iargs = def->nb_iargs;
5044 
5045     /* copy constants */
5046     memcpy(new_args + nb_oargs + nb_iargs,
5047            op->args + nb_oargs + nb_iargs,
5048            sizeof(TCGArg) * def->nb_cargs);
5049 
5050     i_allocated_regs = s->reserved_regs;
5051     o_allocated_regs = s->reserved_regs;
5052 
5053     switch (op->opc) {
5054     case INDEX_op_brcond_i32:
5055     case INDEX_op_brcond_i64:
5056         op_cond = op->args[2];
5057         break;
5058     case INDEX_op_setcond_i32:
5059     case INDEX_op_setcond_i64:
5060     case INDEX_op_negsetcond_i32:
5061     case INDEX_op_negsetcond_i64:
5062     case INDEX_op_cmp_vec:
5063         op_cond = op->args[3];
5064         break;
5065     case INDEX_op_brcond2_i32:
5066         op_cond = op->args[4];
5067         break;
5068     case INDEX_op_movcond_i32:
5069     case INDEX_op_movcond_i64:
5070     case INDEX_op_setcond2_i32:
5071     case INDEX_op_cmpsel_vec:
5072         op_cond = op->args[5];
5073         break;
5074     default:
5075         /* No condition within opcode. */
5076         op_cond = TCG_COND_ALWAYS;
5077         break;
5078     }
5079 
5080     args_ct = opcode_args_ct(op);
5081 
5082     /* satisfy input constraints */
5083     for (k = 0; k < nb_iargs; k++) {
5084         TCGRegSet i_preferred_regs, i_required_regs;
5085         bool allocate_new_reg, copyto_new_reg;
5086         TCGTemp *ts2;
5087         int i1, i2;
5088 
5089         i = args_ct[nb_oargs + k].sort_index;
5090         arg = op->args[i];
5091         arg_ct = &args_ct[i];
5092         ts = arg_temp(arg);
5093 
5094         if (ts->val_type == TEMP_VAL_CONST
5095             && tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5096                                       op_cond, TCGOP_VECE(op))) {
5097             /* constant is OK for instruction */
5098             const_args[i] = 1;
5099             new_args[i] = ts->val;
5100             continue;
5101         }
5102 
5103         reg = ts->reg;
5104         i_preferred_regs = 0;
5105         i_required_regs = arg_ct->regs;
5106         allocate_new_reg = false;
5107         copyto_new_reg = false;
5108 
5109         switch (arg_ct->pair) {
5110         case 0: /* not paired */
5111             if (arg_ct->ialias) {
5112                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5113 
5114                 /*
5115                  * If the input is readonly, then it cannot also be an
5116                  * output and aliased to itself.  If the input is not
5117                  * dead after the instruction, we must allocate a new
5118                  * register and move it.
5119                  */
5120                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5121                     || args_ct[arg_ct->alias_index].newreg) {
5122                     allocate_new_reg = true;
5123                 } else if (ts->val_type == TEMP_VAL_REG) {
5124                     /*
5125                      * Check if the current register has already been
5126                      * allocated for another input.
5127                      */
5128                     allocate_new_reg =
5129                         tcg_regset_test_reg(i_allocated_regs, reg);
5130                 }
5131             }
5132             if (!allocate_new_reg) {
5133                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5134                           i_preferred_regs);
5135                 reg = ts->reg;
5136                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5137             }
5138             if (allocate_new_reg) {
5139                 /*
5140                  * Allocate a new register matching the constraint
5141                  * and move the temporary register into it.
5142                  */
5143                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5144                           i_allocated_regs, 0);
5145                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5146                                     i_preferred_regs, ts->indirect_base);
5147                 copyto_new_reg = true;
5148             }
5149             break;
5150 
5151         case 1:
5152             /* First of an input pair; if i1 == i2, the second is an output. */
5153             i1 = i;
5154             i2 = arg_ct->pair_index;
5155             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5156 
5157             /*
5158              * It is easier to default to allocating a new pair
5159              * and to identify a few cases where it's not required.
5160              */
5161             if (arg_ct->ialias) {
5162                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5163                 if (IS_DEAD_ARG(i1) &&
5164                     IS_DEAD_ARG(i2) &&
5165                     !temp_readonly(ts) &&
5166                     ts->val_type == TEMP_VAL_REG &&
5167                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5168                     tcg_regset_test_reg(i_required_regs, reg) &&
5169                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5170                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5171                     (ts2
5172                      ? ts2->val_type == TEMP_VAL_REG &&
5173                        ts2->reg == reg + 1 &&
5174                        !temp_readonly(ts2)
5175                      : s->reg_to_temp[reg + 1] == NULL)) {
5176                     break;
5177                 }
5178             } else {
5179                 /* Without aliasing, the pair must also be an input. */
5180                 tcg_debug_assert(ts2);
5181                 if (ts->val_type == TEMP_VAL_REG &&
5182                     ts2->val_type == TEMP_VAL_REG &&
5183                     ts2->reg == reg + 1 &&
5184                     tcg_regset_test_reg(i_required_regs, reg)) {
5185                     break;
5186                 }
5187             }
5188             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5189                                      0, ts->indirect_base);
5190             goto do_pair;
5191 
5192         case 2: /* pair second */
5193             reg = new_args[arg_ct->pair_index] + 1;
5194             goto do_pair;
5195 
5196         case 3: /* ialias with second output, no first input */
5197             tcg_debug_assert(arg_ct->ialias);
5198             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5199 
5200             if (IS_DEAD_ARG(i) &&
5201                 !temp_readonly(ts) &&
5202                 ts->val_type == TEMP_VAL_REG &&
5203                 reg > 0 &&
5204                 s->reg_to_temp[reg - 1] == NULL &&
5205                 tcg_regset_test_reg(i_required_regs, reg) &&
5206                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5207                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5208                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5209                 break;
5210             }
5211             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5212                                      i_allocated_regs, 0,
5213                                      ts->indirect_base);
5214             tcg_regset_set_reg(i_allocated_regs, reg);
5215             reg += 1;
5216             goto do_pair;
5217 
5218         do_pair:
5219             /*
5220              * If an aliased input is not dead after the instruction,
5221              * we must allocate a new register and move it.
5222              */
5223             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5224                 TCGRegSet t_allocated_regs = i_allocated_regs;
5225 
5226                 /*
5227                  * Because of the alias, and the continued life, make sure
5228                  * that the temp is somewhere *other* than the reg pair,
5229                  * and we get a copy in reg.
5230                  */
5231                 tcg_regset_set_reg(t_allocated_regs, reg);
5232                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5233                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5234                     /* If ts was already in reg, copy it somewhere else. */
5235                     TCGReg nr;
5236                     bool ok;
5237 
5238                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5239                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5240                                        t_allocated_regs, 0, ts->indirect_base);
5241                     ok = tcg_out_mov(s, ts->type, nr, reg);
5242                     tcg_debug_assert(ok);
5243 
5244                     set_temp_val_reg(s, ts, nr);
5245                 } else {
5246                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5247                               t_allocated_regs, 0);
5248                     copyto_new_reg = true;
5249                 }
5250             } else {
5251                 /* Preferably allocate to reg, otherwise copy. */
5252                 i_required_regs = (TCGRegSet)1 << reg;
5253                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5254                           i_preferred_regs);
5255                 copyto_new_reg = ts->reg != reg;
5256             }
5257             break;
5258 
5259         default:
5260             g_assert_not_reached();
5261         }
5262 
5263         if (copyto_new_reg) {
5264             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5265                 /*
5266                  * Cross register class move not supported.  Sync the
5267                  * temp back to its slot and load from there.
5268                  */
5269                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5270                 tcg_out_ld(s, ts->type, reg,
5271                            ts->mem_base->reg, ts->mem_offset);
5272             }
5273         }
5274         new_args[i] = reg;
5275         const_args[i] = 0;
5276         tcg_regset_set_reg(i_allocated_regs, reg);
5277     }
5278 
5279     /* mark dead temporaries and free the associated registers */
5280     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5281         if (IS_DEAD_ARG(i)) {
5282             temp_dead(s, arg_temp(op->args[i]));
5283         }
5284     }
5285 
5286     if (def->flags & TCG_OPF_COND_BRANCH) {
5287         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5288     } else if (def->flags & TCG_OPF_BB_END) {
5289         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5290     } else {
5291         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5292             /* XXX: permit generic clobber register list ? */
5293             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5294                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5295                     tcg_reg_free(s, i, i_allocated_regs);
5296                 }
5297             }
5298         }
5299         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5300             /* sync globals if the op has side effects and might trigger
5301                an exception. */
5302             sync_globals(s, i_allocated_regs);
5303         }
5304 
5305         /* satisfy the output constraints */
5306         for (k = 0; k < nb_oargs; k++) {
5307             i = args_ct[k].sort_index;
5308             arg = op->args[i];
5309             arg_ct = &args_ct[i];
5310             ts = arg_temp(arg);
5311 
5312             /* ENV should not be modified.  */
5313             tcg_debug_assert(!temp_readonly(ts));
5314 
5315             switch (arg_ct->pair) {
5316             case 0: /* not paired */
5317                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5318                     reg = new_args[arg_ct->alias_index];
5319                 } else if (arg_ct->newreg) {
5320                     reg = tcg_reg_alloc(s, arg_ct->regs,
5321                                         i_allocated_regs | o_allocated_regs,
5322                                         output_pref(op, k), ts->indirect_base);
5323                 } else {
5324                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5325                                         output_pref(op, k), ts->indirect_base);
5326                 }
5327                 break;
5328 
5329             case 1: /* first of pair */
5330                 if (arg_ct->oalias) {
5331                     reg = new_args[arg_ct->alias_index];
5332                 } else if (arg_ct->newreg) {
5333                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5334                                              i_allocated_regs | o_allocated_regs,
5335                                              output_pref(op, k),
5336                                              ts->indirect_base);
5337                 } else {
5338                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5339                                              output_pref(op, k),
5340                                              ts->indirect_base);
5341                 }
5342                 break;
5343 
5344             case 2: /* second of pair */
5345                 if (arg_ct->oalias) {
5346                     reg = new_args[arg_ct->alias_index];
5347                 } else {
5348                     reg = new_args[arg_ct->pair_index] + 1;
5349                 }
5350                 break;
5351 
5352             case 3: /* first of pair, aliasing with a second input */
5353                 tcg_debug_assert(!arg_ct->newreg);
5354                 reg = new_args[arg_ct->pair_index] - 1;
5355                 break;
5356 
5357             default:
5358                 g_assert_not_reached();
5359             }
5360             tcg_regset_set_reg(o_allocated_regs, reg);
5361             set_temp_val_reg(s, ts, reg);
5362             ts->mem_coherent = 0;
5363             new_args[i] = reg;
5364         }
5365     }
5366 
5367     /* emit instruction */
5368     switch (op->opc) {
5369     case INDEX_op_ext8s_i32:
5370         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5371         break;
5372     case INDEX_op_ext8s_i64:
5373         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5374         break;
5375     case INDEX_op_ext8u_i32:
5376     case INDEX_op_ext8u_i64:
5377         tcg_out_ext8u(s, new_args[0], new_args[1]);
5378         break;
5379     case INDEX_op_ext16s_i32:
5380         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5381         break;
5382     case INDEX_op_ext16s_i64:
5383         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5384         break;
5385     case INDEX_op_ext16u_i32:
5386     case INDEX_op_ext16u_i64:
5387         tcg_out_ext16u(s, new_args[0], new_args[1]);
5388         break;
5389     case INDEX_op_ext32s_i64:
5390         tcg_out_ext32s(s, new_args[0], new_args[1]);
5391         break;
5392     case INDEX_op_ext32u_i64:
5393         tcg_out_ext32u(s, new_args[0], new_args[1]);
5394         break;
5395     case INDEX_op_ext_i32_i64:
5396         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5397         break;
5398     case INDEX_op_extu_i32_i64:
5399         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5400         break;
5401     case INDEX_op_extrl_i64_i32:
5402         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5403         break;
5404     default:
5405         if (def->flags & TCG_OPF_VECTOR) {
5406             tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64,
5407                            TCGOP_VECE(op), new_args, const_args);
5408         } else {
5409             tcg_out_op(s, op->opc, TCGOP_TYPE(op), new_args, const_args);
5410         }
5411         break;
5412     }
5413 
5414     /* move the outputs in the correct register if needed */
5415     for(i = 0; i < nb_oargs; i++) {
5416         ts = arg_temp(op->args[i]);
5417 
5418         /* ENV should not be modified.  */
5419         tcg_debug_assert(!temp_readonly(ts));
5420 
5421         if (NEED_SYNC_ARG(i)) {
5422             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5423         } else if (IS_DEAD_ARG(i)) {
5424             temp_dead(s, ts);
5425         }
5426     }
5427 }
5428 
5429 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5430 {
5431     const TCGLifeData arg_life = op->life;
5432     TCGTemp *ots, *itsl, *itsh;
5433     TCGType vtype = TCGOP_TYPE(op);
5434 
5435     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5436     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5437     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5438 
5439     ots = arg_temp(op->args[0]);
5440     itsl = arg_temp(op->args[1]);
5441     itsh = arg_temp(op->args[2]);
5442 
5443     /* ENV should not be modified.  */
5444     tcg_debug_assert(!temp_readonly(ots));
5445 
5446     /* Allocate the output register now.  */
5447     if (ots->val_type != TEMP_VAL_REG) {
5448         TCGRegSet allocated_regs = s->reserved_regs;
5449         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5450         TCGReg oreg;
5451 
5452         /* Make sure to not spill the input registers. */
5453         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5454             tcg_regset_set_reg(allocated_regs, itsl->reg);
5455         }
5456         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5457             tcg_regset_set_reg(allocated_regs, itsh->reg);
5458         }
5459 
5460         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5461                              output_pref(op, 0), ots->indirect_base);
5462         set_temp_val_reg(s, ots, oreg);
5463     }
5464 
5465     /* Promote dup2 of immediates to dupi_vec. */
5466     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5467         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5468         MemOp vece = MO_64;
5469 
5470         if (val == dup_const(MO_8, val)) {
5471             vece = MO_8;
5472         } else if (val == dup_const(MO_16, val)) {
5473             vece = MO_16;
5474         } else if (val == dup_const(MO_32, val)) {
5475             vece = MO_32;
5476         }
5477 
5478         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5479         goto done;
5480     }
5481 
5482     /* If the two inputs form one 64-bit value, try dupm_vec. */
5483     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5484         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5485         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5486         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5487 
5488         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5489         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5490 
5491         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5492                              its->mem_base->reg, its->mem_offset)) {
5493             goto done;
5494         }
5495     }
5496 
5497     /* Fall back to generic expansion. */
5498     return false;
5499 
5500  done:
5501     ots->mem_coherent = 0;
5502     if (IS_DEAD_ARG(1)) {
5503         temp_dead(s, itsl);
5504     }
5505     if (IS_DEAD_ARG(2)) {
5506         temp_dead(s, itsh);
5507     }
5508     if (NEED_SYNC_ARG(0)) {
5509         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5510     } else if (IS_DEAD_ARG(0)) {
5511         temp_dead(s, ots);
5512     }
5513     return true;
5514 }
5515 
5516 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5517                          TCGRegSet allocated_regs)
5518 {
5519     if (ts->val_type == TEMP_VAL_REG) {
5520         if (ts->reg != reg) {
5521             tcg_reg_free(s, reg, allocated_regs);
5522             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5523                 /*
5524                  * Cross register class move not supported.  Sync the
5525                  * temp back to its slot and load from there.
5526                  */
5527                 temp_sync(s, ts, allocated_regs, 0, 0);
5528                 tcg_out_ld(s, ts->type, reg,
5529                            ts->mem_base->reg, ts->mem_offset);
5530             }
5531         }
5532     } else {
5533         TCGRegSet arg_set = 0;
5534 
5535         tcg_reg_free(s, reg, allocated_regs);
5536         tcg_regset_set_reg(arg_set, reg);
5537         temp_load(s, ts, arg_set, allocated_regs, 0);
5538     }
5539 }
5540 
5541 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5542                          TCGRegSet allocated_regs)
5543 {
5544     /*
5545      * When the destination is on the stack, load up the temp and store.
5546      * If there are many call-saved registers, the temp might live to
5547      * see another use; otherwise it'll be discarded.
5548      */
5549     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5550     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5551                arg_slot_stk_ofs(arg_slot));
5552 }
5553 
5554 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5555                             TCGTemp *ts, TCGRegSet *allocated_regs)
5556 {
5557     if (arg_slot_reg_p(l->arg_slot)) {
5558         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5559         load_arg_reg(s, reg, ts, *allocated_regs);
5560         tcg_regset_set_reg(*allocated_regs, reg);
5561     } else {
5562         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5563     }
5564 }
5565 
5566 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5567                          intptr_t ref_off, TCGRegSet *allocated_regs)
5568 {
5569     TCGReg reg;
5570 
5571     if (arg_slot_reg_p(arg_slot)) {
5572         reg = tcg_target_call_iarg_regs[arg_slot];
5573         tcg_reg_free(s, reg, *allocated_regs);
5574         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5575         tcg_regset_set_reg(*allocated_regs, reg);
5576     } else {
5577         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5578                             *allocated_regs, 0, false);
5579         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5580         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5581                    arg_slot_stk_ofs(arg_slot));
5582     }
5583 }
5584 
5585 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5586 {
5587     const int nb_oargs = TCGOP_CALLO(op);
5588     const int nb_iargs = TCGOP_CALLI(op);
5589     const TCGLifeData arg_life = op->life;
5590     const TCGHelperInfo *info = tcg_call_info(op);
5591     TCGRegSet allocated_regs = s->reserved_regs;
5592     int i;
5593 
5594     /*
5595      * Move inputs into place in reverse order,
5596      * so that we place stacked arguments first.
5597      */
5598     for (i = nb_iargs - 1; i >= 0; --i) {
5599         const TCGCallArgumentLoc *loc = &info->in[i];
5600         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5601 
5602         switch (loc->kind) {
5603         case TCG_CALL_ARG_NORMAL:
5604         case TCG_CALL_ARG_EXTEND_U:
5605         case TCG_CALL_ARG_EXTEND_S:
5606             load_arg_normal(s, loc, ts, &allocated_regs);
5607             break;
5608         case TCG_CALL_ARG_BY_REF:
5609             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5610             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5611                          arg_slot_stk_ofs(loc->ref_slot),
5612                          &allocated_regs);
5613             break;
5614         case TCG_CALL_ARG_BY_REF_N:
5615             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5616             break;
5617         default:
5618             g_assert_not_reached();
5619         }
5620     }
5621 
5622     /* Mark dead temporaries and free the associated registers.  */
5623     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5624         if (IS_DEAD_ARG(i)) {
5625             temp_dead(s, arg_temp(op->args[i]));
5626         }
5627     }
5628 
5629     /* Clobber call registers.  */
5630     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5631         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5632             tcg_reg_free(s, i, allocated_regs);
5633         }
5634     }
5635 
5636     /*
5637      * Save globals if they might be written by the helper,
5638      * sync them if they might be read.
5639      */
5640     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5641         /* Nothing to do */
5642     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5643         sync_globals(s, allocated_regs);
5644     } else {
5645         save_globals(s, allocated_regs);
5646     }
5647 
5648     /*
5649      * If the ABI passes a pointer to the returned struct as the first
5650      * argument, load that now.  Pass a pointer to the output home slot.
5651      */
5652     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5653         TCGTemp *ts = arg_temp(op->args[0]);
5654 
5655         if (!ts->mem_allocated) {
5656             temp_allocate_frame(s, ts);
5657         }
5658         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5659     }
5660 
5661     tcg_out_call(s, tcg_call_func(op), info);
5662 
5663     /* Assign output registers and emit moves if needed.  */
5664     switch (info->out_kind) {
5665     case TCG_CALL_RET_NORMAL:
5666         for (i = 0; i < nb_oargs; i++) {
5667             TCGTemp *ts = arg_temp(op->args[i]);
5668             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5669 
5670             /* ENV should not be modified.  */
5671             tcg_debug_assert(!temp_readonly(ts));
5672 
5673             set_temp_val_reg(s, ts, reg);
5674             ts->mem_coherent = 0;
5675         }
5676         break;
5677 
5678     case TCG_CALL_RET_BY_VEC:
5679         {
5680             TCGTemp *ts = arg_temp(op->args[0]);
5681 
5682             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5683             tcg_debug_assert(ts->temp_subindex == 0);
5684             if (!ts->mem_allocated) {
5685                 temp_allocate_frame(s, ts);
5686             }
5687             tcg_out_st(s, TCG_TYPE_V128,
5688                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5689                        ts->mem_base->reg, ts->mem_offset);
5690         }
5691         /* fall through to mark all parts in memory */
5692 
5693     case TCG_CALL_RET_BY_REF:
5694         /* The callee has performed a write through the reference. */
5695         for (i = 0; i < nb_oargs; i++) {
5696             TCGTemp *ts = arg_temp(op->args[i]);
5697             ts->val_type = TEMP_VAL_MEM;
5698         }
5699         break;
5700 
5701     default:
5702         g_assert_not_reached();
5703     }
5704 
5705     /* Flush or discard output registers as needed. */
5706     for (i = 0; i < nb_oargs; i++) {
5707         TCGTemp *ts = arg_temp(op->args[i]);
5708         if (NEED_SYNC_ARG(i)) {
5709             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5710         } else if (IS_DEAD_ARG(i)) {
5711             temp_dead(s, ts);
5712         }
5713     }
5714 }
5715 
5716 /**
5717  * atom_and_align_for_opc:
5718  * @s: tcg context
5719  * @opc: memory operation code
5720  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5721  * @allow_two_ops: true if we are prepared to issue two operations
5722  *
5723  * Return the alignment and atomicity to use for the inline fast path
5724  * for the given memory operation.  The alignment may be larger than
5725  * that specified in @opc, and the correct alignment will be diagnosed
5726  * by the slow path helper.
5727  *
5728  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5729  * and issue two loads or stores for subalignment.
5730  */
5731 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5732                                            MemOp host_atom, bool allow_two_ops)
5733 {
5734     MemOp align = memop_alignment_bits(opc);
5735     MemOp size = opc & MO_SIZE;
5736     MemOp half = size ? size - 1 : 0;
5737     MemOp atom = opc & MO_ATOM_MASK;
5738     MemOp atmax;
5739 
5740     switch (atom) {
5741     case MO_ATOM_NONE:
5742         /* The operation requires no specific atomicity. */
5743         atmax = MO_8;
5744         break;
5745 
5746     case MO_ATOM_IFALIGN:
5747         atmax = size;
5748         break;
5749 
5750     case MO_ATOM_IFALIGN_PAIR:
5751         atmax = half;
5752         break;
5753 
5754     case MO_ATOM_WITHIN16:
5755         atmax = size;
5756         if (size == MO_128) {
5757             /* Misalignment implies !within16, and therefore no atomicity. */
5758         } else if (host_atom != MO_ATOM_WITHIN16) {
5759             /* The host does not implement within16, so require alignment. */
5760             align = MAX(align, size);
5761         }
5762         break;
5763 
5764     case MO_ATOM_WITHIN16_PAIR:
5765         atmax = size;
5766         /*
5767          * Misalignment implies !within16, and therefore half atomicity.
5768          * Any host prepared for two operations can implement this with
5769          * half alignment.
5770          */
5771         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5772             align = MAX(align, half);
5773         }
5774         break;
5775 
5776     case MO_ATOM_SUBALIGN:
5777         atmax = size;
5778         if (host_atom != MO_ATOM_SUBALIGN) {
5779             /* If unaligned but not odd, there are subobjects up to half. */
5780             if (allow_two_ops) {
5781                 align = MAX(align, half);
5782             } else {
5783                 align = MAX(align, size);
5784             }
5785         }
5786         break;
5787 
5788     default:
5789         g_assert_not_reached();
5790     }
5791 
5792     return (TCGAtomAlign){ .atom = atmax, .align = align };
5793 }
5794 
5795 /*
5796  * Similarly for qemu_ld/st slow path helpers.
5797  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5798  * using only the provided backend tcg_out_* functions.
5799  */
5800 
5801 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5802 {
5803     int ofs = arg_slot_stk_ofs(slot);
5804 
5805     /*
5806      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5807      * require extension to uint64_t, adjust the address for uint32_t.
5808      */
5809     if (HOST_BIG_ENDIAN &&
5810         TCG_TARGET_REG_BITS == 64 &&
5811         type == TCG_TYPE_I32) {
5812         ofs += 4;
5813     }
5814     return ofs;
5815 }
5816 
5817 static void tcg_out_helper_load_slots(TCGContext *s,
5818                                       unsigned nmov, TCGMovExtend *mov,
5819                                       const TCGLdstHelperParam *parm)
5820 {
5821     unsigned i;
5822     TCGReg dst3;
5823 
5824     /*
5825      * Start from the end, storing to the stack first.
5826      * This frees those registers, so we need not consider overlap.
5827      */
5828     for (i = nmov; i-- > 0; ) {
5829         unsigned slot = mov[i].dst;
5830 
5831         if (arg_slot_reg_p(slot)) {
5832             goto found_reg;
5833         }
5834 
5835         TCGReg src = mov[i].src;
5836         TCGType dst_type = mov[i].dst_type;
5837         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5838 
5839         /* The argument is going onto the stack; extend into scratch. */
5840         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5841             tcg_debug_assert(parm->ntmp != 0);
5842             mov[i].dst = src = parm->tmp[0];
5843             tcg_out_movext1(s, &mov[i]);
5844         }
5845 
5846         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5847                    tcg_out_helper_stk_ofs(dst_type, slot));
5848     }
5849     return;
5850 
5851  found_reg:
5852     /*
5853      * The remaining arguments are in registers.
5854      * Convert slot numbers to argument registers.
5855      */
5856     nmov = i + 1;
5857     for (i = 0; i < nmov; ++i) {
5858         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5859     }
5860 
5861     switch (nmov) {
5862     case 4:
5863         /* The backend must have provided enough temps for the worst case. */
5864         tcg_debug_assert(parm->ntmp >= 2);
5865 
5866         dst3 = mov[3].dst;
5867         for (unsigned j = 0; j < 3; ++j) {
5868             if (dst3 == mov[j].src) {
5869                 /*
5870                  * Conflict. Copy the source to a temporary, perform the
5871                  * remaining moves, then the extension from our scratch
5872                  * on the way out.
5873                  */
5874                 TCGReg scratch = parm->tmp[1];
5875 
5876                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5877                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5878                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5879                 break;
5880             }
5881         }
5882 
5883         /* No conflicts: perform this move and continue. */
5884         tcg_out_movext1(s, &mov[3]);
5885         /* fall through */
5886 
5887     case 3:
5888         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5889                         parm->ntmp ? parm->tmp[0] : -1);
5890         break;
5891     case 2:
5892         tcg_out_movext2(s, mov, mov + 1,
5893                         parm->ntmp ? parm->tmp[0] : -1);
5894         break;
5895     case 1:
5896         tcg_out_movext1(s, mov);
5897         break;
5898     default:
5899         g_assert_not_reached();
5900     }
5901 }
5902 
5903 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5904                                     TCGType type, tcg_target_long imm,
5905                                     const TCGLdstHelperParam *parm)
5906 {
5907     if (arg_slot_reg_p(slot)) {
5908         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5909     } else {
5910         int ofs = tcg_out_helper_stk_ofs(type, slot);
5911         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5912             tcg_debug_assert(parm->ntmp != 0);
5913             tcg_out_movi(s, type, parm->tmp[0], imm);
5914             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5915         }
5916     }
5917 }
5918 
5919 static void tcg_out_helper_load_common_args(TCGContext *s,
5920                                             const TCGLabelQemuLdst *ldst,
5921                                             const TCGLdstHelperParam *parm,
5922                                             const TCGHelperInfo *info,
5923                                             unsigned next_arg)
5924 {
5925     TCGMovExtend ptr_mov = {
5926         .dst_type = TCG_TYPE_PTR,
5927         .src_type = TCG_TYPE_PTR,
5928         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5929     };
5930     const TCGCallArgumentLoc *loc = &info->in[0];
5931     TCGType type;
5932     unsigned slot;
5933     tcg_target_ulong imm;
5934 
5935     /*
5936      * Handle env, which is always first.
5937      */
5938     ptr_mov.dst = loc->arg_slot;
5939     ptr_mov.src = TCG_AREG0;
5940     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5941 
5942     /*
5943      * Handle oi.
5944      */
5945     imm = ldst->oi;
5946     loc = &info->in[next_arg];
5947     type = TCG_TYPE_I32;
5948     switch (loc->kind) {
5949     case TCG_CALL_ARG_NORMAL:
5950         break;
5951     case TCG_CALL_ARG_EXTEND_U:
5952     case TCG_CALL_ARG_EXTEND_S:
5953         /* No extension required for MemOpIdx. */
5954         tcg_debug_assert(imm <= INT32_MAX);
5955         type = TCG_TYPE_REG;
5956         break;
5957     default:
5958         g_assert_not_reached();
5959     }
5960     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5961     next_arg++;
5962 
5963     /*
5964      * Handle ra.
5965      */
5966     loc = &info->in[next_arg];
5967     slot = loc->arg_slot;
5968     if (parm->ra_gen) {
5969         int arg_reg = -1;
5970         TCGReg ra_reg;
5971 
5972         if (arg_slot_reg_p(slot)) {
5973             arg_reg = tcg_target_call_iarg_regs[slot];
5974         }
5975         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5976 
5977         ptr_mov.dst = slot;
5978         ptr_mov.src = ra_reg;
5979         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5980     } else {
5981         imm = (uintptr_t)ldst->raddr;
5982         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5983     }
5984 }
5985 
5986 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5987                                        const TCGCallArgumentLoc *loc,
5988                                        TCGType dst_type, TCGType src_type,
5989                                        TCGReg lo, TCGReg hi)
5990 {
5991     MemOp reg_mo;
5992 
5993     if (dst_type <= TCG_TYPE_REG) {
5994         MemOp src_ext;
5995 
5996         switch (loc->kind) {
5997         case TCG_CALL_ARG_NORMAL:
5998             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5999             break;
6000         case TCG_CALL_ARG_EXTEND_U:
6001             dst_type = TCG_TYPE_REG;
6002             src_ext = MO_UL;
6003             break;
6004         case TCG_CALL_ARG_EXTEND_S:
6005             dst_type = TCG_TYPE_REG;
6006             src_ext = MO_SL;
6007             break;
6008         default:
6009             g_assert_not_reached();
6010         }
6011 
6012         mov[0].dst = loc->arg_slot;
6013         mov[0].dst_type = dst_type;
6014         mov[0].src = lo;
6015         mov[0].src_type = src_type;
6016         mov[0].src_ext = src_ext;
6017         return 1;
6018     }
6019 
6020     if (TCG_TARGET_REG_BITS == 32) {
6021         assert(dst_type == TCG_TYPE_I64);
6022         reg_mo = MO_32;
6023     } else {
6024         assert(dst_type == TCG_TYPE_I128);
6025         reg_mo = MO_64;
6026     }
6027 
6028     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6029     mov[0].src = lo;
6030     mov[0].dst_type = TCG_TYPE_REG;
6031     mov[0].src_type = TCG_TYPE_REG;
6032     mov[0].src_ext = reg_mo;
6033 
6034     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6035     mov[1].src = hi;
6036     mov[1].dst_type = TCG_TYPE_REG;
6037     mov[1].src_type = TCG_TYPE_REG;
6038     mov[1].src_ext = reg_mo;
6039 
6040     return 2;
6041 }
6042 
6043 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6044                                    const TCGLdstHelperParam *parm)
6045 {
6046     const TCGHelperInfo *info;
6047     const TCGCallArgumentLoc *loc;
6048     TCGMovExtend mov[2];
6049     unsigned next_arg, nmov;
6050     MemOp mop = get_memop(ldst->oi);
6051 
6052     switch (mop & MO_SIZE) {
6053     case MO_8:
6054     case MO_16:
6055     case MO_32:
6056         info = &info_helper_ld32_mmu;
6057         break;
6058     case MO_64:
6059         info = &info_helper_ld64_mmu;
6060         break;
6061     case MO_128:
6062         info = &info_helper_ld128_mmu;
6063         break;
6064     default:
6065         g_assert_not_reached();
6066     }
6067 
6068     /* Defer env argument. */
6069     next_arg = 1;
6070 
6071     loc = &info->in[next_arg];
6072     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6073         /*
6074          * 32-bit host with 32-bit guest: zero-extend the guest address
6075          * to 64-bits for the helper by storing the low part, then
6076          * load a zero for the high part.
6077          */
6078         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6079                                TCG_TYPE_I32, TCG_TYPE_I32,
6080                                ldst->addrlo_reg, -1);
6081         tcg_out_helper_load_slots(s, 1, mov, parm);
6082 
6083         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6084                                 TCG_TYPE_I32, 0, parm);
6085         next_arg += 2;
6086     } else {
6087         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6088                                       ldst->addrlo_reg, ldst->addrhi_reg);
6089         tcg_out_helper_load_slots(s, nmov, mov, parm);
6090         next_arg += nmov;
6091     }
6092 
6093     switch (info->out_kind) {
6094     case TCG_CALL_RET_NORMAL:
6095     case TCG_CALL_RET_BY_VEC:
6096         break;
6097     case TCG_CALL_RET_BY_REF:
6098         /*
6099          * The return reference is in the first argument slot.
6100          * We need memory in which to return: re-use the top of stack.
6101          */
6102         {
6103             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6104 
6105             if (arg_slot_reg_p(0)) {
6106                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6107                                  TCG_REG_CALL_STACK, ofs_slot0);
6108             } else {
6109                 tcg_debug_assert(parm->ntmp != 0);
6110                 tcg_out_addi_ptr(s, parm->tmp[0],
6111                                  TCG_REG_CALL_STACK, ofs_slot0);
6112                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6113                            TCG_REG_CALL_STACK, ofs_slot0);
6114             }
6115         }
6116         break;
6117     default:
6118         g_assert_not_reached();
6119     }
6120 
6121     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6122 }
6123 
6124 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6125                                   bool load_sign,
6126                                   const TCGLdstHelperParam *parm)
6127 {
6128     MemOp mop = get_memop(ldst->oi);
6129     TCGMovExtend mov[2];
6130     int ofs_slot0;
6131 
6132     switch (ldst->type) {
6133     case TCG_TYPE_I64:
6134         if (TCG_TARGET_REG_BITS == 32) {
6135             break;
6136         }
6137         /* fall through */
6138 
6139     case TCG_TYPE_I32:
6140         mov[0].dst = ldst->datalo_reg;
6141         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6142         mov[0].dst_type = ldst->type;
6143         mov[0].src_type = TCG_TYPE_REG;
6144 
6145         /*
6146          * If load_sign, then we allowed the helper to perform the
6147          * appropriate sign extension to tcg_target_ulong, and all
6148          * we need now is a plain move.
6149          *
6150          * If they do not, then we expect the relevant extension
6151          * instruction to be no more expensive than a move, and
6152          * we thus save the icache etc by only using one of two
6153          * helper functions.
6154          */
6155         if (load_sign || !(mop & MO_SIGN)) {
6156             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6157                 mov[0].src_ext = MO_32;
6158             } else {
6159                 mov[0].src_ext = MO_64;
6160             }
6161         } else {
6162             mov[0].src_ext = mop & MO_SSIZE;
6163         }
6164         tcg_out_movext1(s, mov);
6165         return;
6166 
6167     case TCG_TYPE_I128:
6168         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6169         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6170         switch (TCG_TARGET_CALL_RET_I128) {
6171         case TCG_CALL_RET_NORMAL:
6172             break;
6173         case TCG_CALL_RET_BY_VEC:
6174             tcg_out_st(s, TCG_TYPE_V128,
6175                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6176                        TCG_REG_CALL_STACK, ofs_slot0);
6177             /* fall through */
6178         case TCG_CALL_RET_BY_REF:
6179             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6180                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6181             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6182                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6183             return;
6184         default:
6185             g_assert_not_reached();
6186         }
6187         break;
6188 
6189     default:
6190         g_assert_not_reached();
6191     }
6192 
6193     mov[0].dst = ldst->datalo_reg;
6194     mov[0].src =
6195         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6196     mov[0].dst_type = TCG_TYPE_REG;
6197     mov[0].src_type = TCG_TYPE_REG;
6198     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6199 
6200     mov[1].dst = ldst->datahi_reg;
6201     mov[1].src =
6202         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6203     mov[1].dst_type = TCG_TYPE_REG;
6204     mov[1].src_type = TCG_TYPE_REG;
6205     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6206 
6207     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6208 }
6209 
6210 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6211                                    const TCGLdstHelperParam *parm)
6212 {
6213     const TCGHelperInfo *info;
6214     const TCGCallArgumentLoc *loc;
6215     TCGMovExtend mov[4];
6216     TCGType data_type;
6217     unsigned next_arg, nmov, n;
6218     MemOp mop = get_memop(ldst->oi);
6219 
6220     switch (mop & MO_SIZE) {
6221     case MO_8:
6222     case MO_16:
6223     case MO_32:
6224         info = &info_helper_st32_mmu;
6225         data_type = TCG_TYPE_I32;
6226         break;
6227     case MO_64:
6228         info = &info_helper_st64_mmu;
6229         data_type = TCG_TYPE_I64;
6230         break;
6231     case MO_128:
6232         info = &info_helper_st128_mmu;
6233         data_type = TCG_TYPE_I128;
6234         break;
6235     default:
6236         g_assert_not_reached();
6237     }
6238 
6239     /* Defer env argument. */
6240     next_arg = 1;
6241     nmov = 0;
6242 
6243     /* Handle addr argument. */
6244     loc = &info->in[next_arg];
6245     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6246         /*
6247          * 32-bit host with 32-bit guest: zero-extend the guest address
6248          * to 64-bits for the helper by storing the low part.  Later,
6249          * after we have processed the register inputs, we will load a
6250          * zero for the high part.
6251          */
6252         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6253                                TCG_TYPE_I32, TCG_TYPE_I32,
6254                                ldst->addrlo_reg, -1);
6255         next_arg += 2;
6256         nmov += 1;
6257     } else {
6258         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6259                                    ldst->addrlo_reg, ldst->addrhi_reg);
6260         next_arg += n;
6261         nmov += n;
6262     }
6263 
6264     /* Handle data argument. */
6265     loc = &info->in[next_arg];
6266     switch (loc->kind) {
6267     case TCG_CALL_ARG_NORMAL:
6268     case TCG_CALL_ARG_EXTEND_U:
6269     case TCG_CALL_ARG_EXTEND_S:
6270         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6271                                    ldst->datalo_reg, ldst->datahi_reg);
6272         next_arg += n;
6273         nmov += n;
6274         tcg_out_helper_load_slots(s, nmov, mov, parm);
6275         break;
6276 
6277     case TCG_CALL_ARG_BY_REF:
6278         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6279         tcg_debug_assert(data_type == TCG_TYPE_I128);
6280         tcg_out_st(s, TCG_TYPE_I64,
6281                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6282                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6283         tcg_out_st(s, TCG_TYPE_I64,
6284                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6285                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6286 
6287         tcg_out_helper_load_slots(s, nmov, mov, parm);
6288 
6289         if (arg_slot_reg_p(loc->arg_slot)) {
6290             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6291                              TCG_REG_CALL_STACK,
6292                              arg_slot_stk_ofs(loc->ref_slot));
6293         } else {
6294             tcg_debug_assert(parm->ntmp != 0);
6295             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6296                              arg_slot_stk_ofs(loc->ref_slot));
6297             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6298                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6299         }
6300         next_arg += 2;
6301         break;
6302 
6303     default:
6304         g_assert_not_reached();
6305     }
6306 
6307     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6308         /* Zero extend the address by loading a zero for the high part. */
6309         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6310         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6311     }
6312 
6313     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6314 }
6315 
6316 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6317 {
6318     int i, start_words, num_insns;
6319     TCGOp *op;
6320 
6321     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6322                  && qemu_log_in_addr_range(pc_start))) {
6323         FILE *logfile = qemu_log_trylock();
6324         if (logfile) {
6325             fprintf(logfile, "OP:\n");
6326             tcg_dump_ops(s, logfile, false);
6327             fprintf(logfile, "\n");
6328             qemu_log_unlock(logfile);
6329         }
6330     }
6331 
6332 #ifdef CONFIG_DEBUG_TCG
6333     /* Ensure all labels referenced have been emitted.  */
6334     {
6335         TCGLabel *l;
6336         bool error = false;
6337 
6338         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6339             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6340                 qemu_log_mask(CPU_LOG_TB_OP,
6341                               "$L%d referenced but not present.\n", l->id);
6342                 error = true;
6343             }
6344         }
6345         assert(!error);
6346     }
6347 #endif
6348 
6349     /* Do not reuse any EBB that may be allocated within the TB. */
6350     tcg_temp_ebb_reset_freed(s);
6351 
6352     tcg_optimize(s);
6353 
6354     reachable_code_pass(s);
6355     liveness_pass_0(s);
6356     liveness_pass_1(s);
6357 
6358     if (s->nb_indirects > 0) {
6359         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6360                      && qemu_log_in_addr_range(pc_start))) {
6361             FILE *logfile = qemu_log_trylock();
6362             if (logfile) {
6363                 fprintf(logfile, "OP before indirect lowering:\n");
6364                 tcg_dump_ops(s, logfile, false);
6365                 fprintf(logfile, "\n");
6366                 qemu_log_unlock(logfile);
6367             }
6368         }
6369 
6370         /* Replace indirect temps with direct temps.  */
6371         if (liveness_pass_2(s)) {
6372             /* If changes were made, re-run liveness.  */
6373             liveness_pass_1(s);
6374         }
6375     }
6376 
6377     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6378                  && qemu_log_in_addr_range(pc_start))) {
6379         FILE *logfile = qemu_log_trylock();
6380         if (logfile) {
6381             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6382             tcg_dump_ops(s, logfile, true);
6383             fprintf(logfile, "\n");
6384             qemu_log_unlock(logfile);
6385         }
6386     }
6387 
6388     /* Initialize goto_tb jump offsets. */
6389     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6390     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6391     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6392     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6393 
6394     tcg_reg_alloc_start(s);
6395 
6396     /*
6397      * Reset the buffer pointers when restarting after overflow.
6398      * TODO: Move this into translate-all.c with the rest of the
6399      * buffer management.  Having only this done here is confusing.
6400      */
6401     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6402     s->code_ptr = s->code_buf;
6403     s->data_gen_ptr = NULL;
6404 
6405     QSIMPLEQ_INIT(&s->ldst_labels);
6406     s->pool_labels = NULL;
6407 
6408     start_words = s->insn_start_words;
6409     s->gen_insn_data =
6410         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6411 
6412     tcg_out_tb_start(s);
6413 
6414     num_insns = -1;
6415     QTAILQ_FOREACH(op, &s->ops, link) {
6416         TCGOpcode opc = op->opc;
6417 
6418         switch (opc) {
6419         case INDEX_op_mov_i32:
6420         case INDEX_op_mov_i64:
6421         case INDEX_op_mov_vec:
6422             tcg_reg_alloc_mov(s, op);
6423             break;
6424         case INDEX_op_dup_vec:
6425             tcg_reg_alloc_dup(s, op);
6426             break;
6427         case INDEX_op_insn_start:
6428             if (num_insns >= 0) {
6429                 size_t off = tcg_current_code_size(s);
6430                 s->gen_insn_end_off[num_insns] = off;
6431                 /* Assert that we do not overflow our stored offset.  */
6432                 assert(s->gen_insn_end_off[num_insns] == off);
6433             }
6434             num_insns++;
6435             for (i = 0; i < start_words; ++i) {
6436                 s->gen_insn_data[num_insns * start_words + i] =
6437                     tcg_get_insn_start_param(op, i);
6438             }
6439             break;
6440         case INDEX_op_discard:
6441             temp_dead(s, arg_temp(op->args[0]));
6442             break;
6443         case INDEX_op_set_label:
6444             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6445             tcg_out_label(s, arg_label(op->args[0]));
6446             break;
6447         case INDEX_op_call:
6448             tcg_reg_alloc_call(s, op);
6449             break;
6450         case INDEX_op_exit_tb:
6451             tcg_out_exit_tb(s, op->args[0]);
6452             break;
6453         case INDEX_op_goto_tb:
6454             tcg_out_goto_tb(s, op->args[0]);
6455             break;
6456         case INDEX_op_dup2_vec:
6457             if (tcg_reg_alloc_dup2(s, op)) {
6458                 break;
6459             }
6460             /* fall through */
6461         default:
6462             /* Sanity check that we've not introduced any unhandled opcodes. */
6463             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6464                                               TCGOP_FLAGS(op)));
6465             /* Note: in order to speed up the code, it would be much
6466                faster to have specialized register allocator functions for
6467                some common argument patterns */
6468             tcg_reg_alloc_op(s, op);
6469             break;
6470         }
6471         /* Test for (pending) buffer overflow.  The assumption is that any
6472            one operation beginning below the high water mark cannot overrun
6473            the buffer completely.  Thus we can test for overflow after
6474            generating code without having to check during generation.  */
6475         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6476             return -1;
6477         }
6478         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6479         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6480             return -2;
6481         }
6482     }
6483     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6484     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6485 
6486     /* Generate TB finalization at the end of block */
6487     i = tcg_out_ldst_finalize(s);
6488     if (i < 0) {
6489         return i;
6490     }
6491     i = tcg_out_pool_finalize(s);
6492     if (i < 0) {
6493         return i;
6494     }
6495     if (!tcg_resolve_relocs(s)) {
6496         return -2;
6497     }
6498 
6499 #ifndef CONFIG_TCG_INTERPRETER
6500     /* flush instruction cache */
6501     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6502                         (uintptr_t)s->code_buf,
6503                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6504 #endif
6505 
6506     return tcg_current_code_size(s);
6507 }
6508 
6509 #ifdef ELF_HOST_MACHINE
6510 /* In order to use this feature, the backend needs to do three things:
6511 
6512    (1) Define ELF_HOST_MACHINE to indicate both what value to
6513        put into the ELF image and to indicate support for the feature.
6514 
6515    (2) Define tcg_register_jit.  This should create a buffer containing
6516        the contents of a .debug_frame section that describes the post-
6517        prologue unwind info for the tcg machine.
6518 
6519    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6520 */
6521 
6522 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6523 typedef enum {
6524     JIT_NOACTION = 0,
6525     JIT_REGISTER_FN,
6526     JIT_UNREGISTER_FN
6527 } jit_actions_t;
6528 
6529 struct jit_code_entry {
6530     struct jit_code_entry *next_entry;
6531     struct jit_code_entry *prev_entry;
6532     const void *symfile_addr;
6533     uint64_t symfile_size;
6534 };
6535 
6536 struct jit_descriptor {
6537     uint32_t version;
6538     uint32_t action_flag;
6539     struct jit_code_entry *relevant_entry;
6540     struct jit_code_entry *first_entry;
6541 };
6542 
6543 void __jit_debug_register_code(void) __attribute__((noinline));
6544 void __jit_debug_register_code(void)
6545 {
6546     asm("");
6547 }
6548 
6549 /* Must statically initialize the version, because GDB may check
6550    the version before we can set it.  */
6551 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6552 
6553 /* End GDB interface.  */
6554 
6555 static int find_string(const char *strtab, const char *str)
6556 {
6557     const char *p = strtab + 1;
6558 
6559     while (1) {
6560         if (strcmp(p, str) == 0) {
6561             return p - strtab;
6562         }
6563         p += strlen(p) + 1;
6564     }
6565 }
6566 
6567 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6568                                  const void *debug_frame,
6569                                  size_t debug_frame_size)
6570 {
6571     struct __attribute__((packed)) DebugInfo {
6572         uint32_t  len;
6573         uint16_t  version;
6574         uint32_t  abbrev;
6575         uint8_t   ptr_size;
6576         uint8_t   cu_die;
6577         uint16_t  cu_lang;
6578         uintptr_t cu_low_pc;
6579         uintptr_t cu_high_pc;
6580         uint8_t   fn_die;
6581         char      fn_name[16];
6582         uintptr_t fn_low_pc;
6583         uintptr_t fn_high_pc;
6584         uint8_t   cu_eoc;
6585     };
6586 
6587     struct ElfImage {
6588         ElfW(Ehdr) ehdr;
6589         ElfW(Phdr) phdr;
6590         ElfW(Shdr) shdr[7];
6591         ElfW(Sym)  sym[2];
6592         struct DebugInfo di;
6593         uint8_t    da[24];
6594         char       str[80];
6595     };
6596 
6597     struct ElfImage *img;
6598 
6599     static const struct ElfImage img_template = {
6600         .ehdr = {
6601             .e_ident[EI_MAG0] = ELFMAG0,
6602             .e_ident[EI_MAG1] = ELFMAG1,
6603             .e_ident[EI_MAG2] = ELFMAG2,
6604             .e_ident[EI_MAG3] = ELFMAG3,
6605             .e_ident[EI_CLASS] = ELF_CLASS,
6606             .e_ident[EI_DATA] = ELF_DATA,
6607             .e_ident[EI_VERSION] = EV_CURRENT,
6608             .e_type = ET_EXEC,
6609             .e_machine = ELF_HOST_MACHINE,
6610             .e_version = EV_CURRENT,
6611             .e_phoff = offsetof(struct ElfImage, phdr),
6612             .e_shoff = offsetof(struct ElfImage, shdr),
6613             .e_ehsize = sizeof(ElfW(Shdr)),
6614             .e_phentsize = sizeof(ElfW(Phdr)),
6615             .e_phnum = 1,
6616             .e_shentsize = sizeof(ElfW(Shdr)),
6617             .e_shnum = ARRAY_SIZE(img->shdr),
6618             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6619 #ifdef ELF_HOST_FLAGS
6620             .e_flags = ELF_HOST_FLAGS,
6621 #endif
6622 #ifdef ELF_OSABI
6623             .e_ident[EI_OSABI] = ELF_OSABI,
6624 #endif
6625         },
6626         .phdr = {
6627             .p_type = PT_LOAD,
6628             .p_flags = PF_X,
6629         },
6630         .shdr = {
6631             [0] = { .sh_type = SHT_NULL },
6632             /* Trick: The contents of code_gen_buffer are not present in
6633                this fake ELF file; that got allocated elsewhere.  Therefore
6634                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6635                will not look for contents.  We can record any address.  */
6636             [1] = { /* .text */
6637                 .sh_type = SHT_NOBITS,
6638                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6639             },
6640             [2] = { /* .debug_info */
6641                 .sh_type = SHT_PROGBITS,
6642                 .sh_offset = offsetof(struct ElfImage, di),
6643                 .sh_size = sizeof(struct DebugInfo),
6644             },
6645             [3] = { /* .debug_abbrev */
6646                 .sh_type = SHT_PROGBITS,
6647                 .sh_offset = offsetof(struct ElfImage, da),
6648                 .sh_size = sizeof(img->da),
6649             },
6650             [4] = { /* .debug_frame */
6651                 .sh_type = SHT_PROGBITS,
6652                 .sh_offset = sizeof(struct ElfImage),
6653             },
6654             [5] = { /* .symtab */
6655                 .sh_type = SHT_SYMTAB,
6656                 .sh_offset = offsetof(struct ElfImage, sym),
6657                 .sh_size = sizeof(img->sym),
6658                 .sh_info = 1,
6659                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6660                 .sh_entsize = sizeof(ElfW(Sym)),
6661             },
6662             [6] = { /* .strtab */
6663                 .sh_type = SHT_STRTAB,
6664                 .sh_offset = offsetof(struct ElfImage, str),
6665                 .sh_size = sizeof(img->str),
6666             }
6667         },
6668         .sym = {
6669             [1] = { /* code_gen_buffer */
6670                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6671                 .st_shndx = 1,
6672             }
6673         },
6674         .di = {
6675             .len = sizeof(struct DebugInfo) - 4,
6676             .version = 2,
6677             .ptr_size = sizeof(void *),
6678             .cu_die = 1,
6679             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6680             .fn_die = 2,
6681             .fn_name = "code_gen_buffer"
6682         },
6683         .da = {
6684             1,          /* abbrev number (the cu) */
6685             0x11, 1,    /* DW_TAG_compile_unit, has children */
6686             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6687             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6688             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6689             0, 0,       /* end of abbrev */
6690             2,          /* abbrev number (the fn) */
6691             0x2e, 0,    /* DW_TAG_subprogram, no children */
6692             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6693             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6694             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6695             0, 0,       /* end of abbrev */
6696             0           /* no more abbrev */
6697         },
6698         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6699                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6700     };
6701 
6702     /* We only need a single jit entry; statically allocate it.  */
6703     static struct jit_code_entry one_entry;
6704 
6705     uintptr_t buf = (uintptr_t)buf_ptr;
6706     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6707     DebugFrameHeader *dfh;
6708 
6709     img = g_malloc(img_size);
6710     *img = img_template;
6711 
6712     img->phdr.p_vaddr = buf;
6713     img->phdr.p_paddr = buf;
6714     img->phdr.p_memsz = buf_size;
6715 
6716     img->shdr[1].sh_name = find_string(img->str, ".text");
6717     img->shdr[1].sh_addr = buf;
6718     img->shdr[1].sh_size = buf_size;
6719 
6720     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6721     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6722 
6723     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6724     img->shdr[4].sh_size = debug_frame_size;
6725 
6726     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6727     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6728 
6729     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6730     img->sym[1].st_value = buf;
6731     img->sym[1].st_size = buf_size;
6732 
6733     img->di.cu_low_pc = buf;
6734     img->di.cu_high_pc = buf + buf_size;
6735     img->di.fn_low_pc = buf;
6736     img->di.fn_high_pc = buf + buf_size;
6737 
6738     dfh = (DebugFrameHeader *)(img + 1);
6739     memcpy(dfh, debug_frame, debug_frame_size);
6740     dfh->fde.func_start = buf;
6741     dfh->fde.func_len = buf_size;
6742 
6743 #ifdef DEBUG_JIT
6744     /* Enable this block to be able to debug the ELF image file creation.
6745        One can use readelf, objdump, or other inspection utilities.  */
6746     {
6747         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6748         FILE *f = fopen(jit, "w+b");
6749         if (f) {
6750             if (fwrite(img, img_size, 1, f) != img_size) {
6751                 /* Avoid stupid unused return value warning for fwrite.  */
6752             }
6753             fclose(f);
6754         }
6755     }
6756 #endif
6757 
6758     one_entry.symfile_addr = img;
6759     one_entry.symfile_size = img_size;
6760 
6761     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6762     __jit_debug_descriptor.relevant_entry = &one_entry;
6763     __jit_debug_descriptor.first_entry = &one_entry;
6764     __jit_debug_register_code();
6765 }
6766 #else
6767 /* No support for the feature.  Provide the entry point expected by exec.c,
6768    and implement the internal function we declared earlier.  */
6769 
6770 static void tcg_register_jit_int(const void *buf, size_t size,
6771                                  const void *debug_frame,
6772                                  size_t debug_frame_size)
6773 {
6774 }
6775 
6776 void tcg_register_jit(const void *buf, size_t buf_size)
6777 {
6778 }
6779 #endif /* ELF_HOST_MACHINE */
6780 
6781 #if !TCG_TARGET_MAYBE_vec
6782 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6783 {
6784     g_assert_not_reached();
6785 }
6786 #endif
6787