xref: /openbmc/qemu/tcg/tcg.c (revision 019a98083a57861475461fd63895240b5c341077)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/cacheflush.h"
38 #include "qemu/cacheinfo.h"
39 #include "qemu/timer.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64 #include "accel/tcg/perf.h"
65 
66 /* Forward declarations for functions declared in tcg-target.c.inc and
67    used here. */
68 static void tcg_target_init(TCGContext *s);
69 static void tcg_target_qemu_prologue(TCGContext *s);
70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
71                         intptr_t value, intptr_t addend);
72 
73 /* The CIE and FDE header definitions will be common to all hosts.  */
74 typedef struct {
75     uint32_t len __attribute__((aligned((sizeof(void *)))));
76     uint32_t id;
77     uint8_t version;
78     char augmentation[1];
79     uint8_t code_align;
80     uint8_t data_align;
81     uint8_t return_column;
82 } DebugFrameCIE;
83 
84 typedef struct QEMU_PACKED {
85     uint32_t len __attribute__((aligned((sizeof(void *)))));
86     uint32_t cie_offset;
87     uintptr_t func_start;
88     uintptr_t func_len;
89 } DebugFrameFDEHeader;
90 
91 typedef struct QEMU_PACKED {
92     DebugFrameCIE cie;
93     DebugFrameFDEHeader fde;
94 } DebugFrameHeader;
95 
96 static void tcg_register_jit_int(const void *buf, size_t size,
97                                  const void *debug_frame,
98                                  size_t debug_frame_size)
99     __attribute__((unused));
100 
101 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103                        intptr_t arg2);
104 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
105 static void tcg_out_movi(TCGContext *s, TCGType type,
106                          TCGReg ret, tcg_target_long arg);
107 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
108 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
109 static void tcg_out_goto_tb(TCGContext *s, int which);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111                        const TCGArg args[TCG_MAX_OP_ARGS],
112                        const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115                             TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121                            unsigned vecl, unsigned vece,
122                            const TCGArg args[TCG_MAX_OP_ARGS],
123                            const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136                                     TCGReg dst, int64_t arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141                                   unsigned vecl, unsigned vece,
142                                   const TCGArg args[TCG_MAX_OP_ARGS],
143                                   const int const_args[TCG_MAX_OP_ARGS])
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
153                          const TCGHelperInfo *info);
154 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
155 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
156 #ifdef TCG_TARGET_NEED_LDST_LABELS
157 static int tcg_out_ldst_finalize(TCGContext *s);
158 #endif
159 
160 TCGContext tcg_init_ctx;
161 __thread TCGContext *tcg_ctx;
162 
163 TCGContext **tcg_ctxs;
164 unsigned int tcg_cur_ctxs;
165 unsigned int tcg_max_ctxs;
166 TCGv_env cpu_env = 0;
167 const void *tcg_code_gen_epilogue;
168 uintptr_t tcg_splitwx_diff;
169 
170 #ifndef CONFIG_TCG_INTERPRETER
171 tcg_prologue_fn *tcg_qemu_tb_exec;
172 #endif
173 
174 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
175 static TCGRegSet tcg_target_call_clobber_regs;
176 
177 #if TCG_TARGET_INSN_UNIT_SIZE == 1
178 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
179 {
180     *s->code_ptr++ = v;
181 }
182 
183 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
184                                                       uint8_t v)
185 {
186     *p = v;
187 }
188 #endif
189 
190 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
191 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
192 {
193     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
194         *s->code_ptr++ = v;
195     } else {
196         tcg_insn_unit *p = s->code_ptr;
197         memcpy(p, &v, sizeof(v));
198         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
199     }
200 }
201 
202 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
203                                                        uint16_t v)
204 {
205     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
206         *p = v;
207     } else {
208         memcpy(p, &v, sizeof(v));
209     }
210 }
211 #endif
212 
213 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
214 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
215 {
216     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
217         *s->code_ptr++ = v;
218     } else {
219         tcg_insn_unit *p = s->code_ptr;
220         memcpy(p, &v, sizeof(v));
221         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
222     }
223 }
224 
225 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
226                                                        uint32_t v)
227 {
228     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
229         *p = v;
230     } else {
231         memcpy(p, &v, sizeof(v));
232     }
233 }
234 #endif
235 
236 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
237 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
238 {
239     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
240         *s->code_ptr++ = v;
241     } else {
242         tcg_insn_unit *p = s->code_ptr;
243         memcpy(p, &v, sizeof(v));
244         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
245     }
246 }
247 
248 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
249                                                        uint64_t v)
250 {
251     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
252         *p = v;
253     } else {
254         memcpy(p, &v, sizeof(v));
255     }
256 }
257 #endif
258 
259 /* label relocation processing */
260 
261 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
262                           TCGLabel *l, intptr_t addend)
263 {
264     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
265 
266     r->type = type;
267     r->ptr = code_ptr;
268     r->addend = addend;
269     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
270 }
271 
272 static void tcg_out_label(TCGContext *s, TCGLabel *l)
273 {
274     tcg_debug_assert(!l->has_value);
275     l->has_value = 1;
276     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
277 }
278 
279 TCGLabel *gen_new_label(void)
280 {
281     TCGContext *s = tcg_ctx;
282     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
283 
284     memset(l, 0, sizeof(TCGLabel));
285     l->id = s->nb_labels++;
286     QSIMPLEQ_INIT(&l->branches);
287     QSIMPLEQ_INIT(&l->relocs);
288 
289     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
290 
291     return l;
292 }
293 
294 static bool tcg_resolve_relocs(TCGContext *s)
295 {
296     TCGLabel *l;
297 
298     QSIMPLEQ_FOREACH(l, &s->labels, next) {
299         TCGRelocation *r;
300         uintptr_t value = l->u.value;
301 
302         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
303             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
304                 return false;
305             }
306         }
307     }
308     return true;
309 }
310 
311 static void set_jmp_reset_offset(TCGContext *s, int which)
312 {
313     /*
314      * We will check for overflow at the end of the opcode loop in
315      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
316      */
317     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
318 }
319 
320 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
321 {
322     /*
323      * We will check for overflow at the end of the opcode loop in
324      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
325      */
326     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
327 }
328 
329 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
330 {
331     /*
332      * Return the read-execute version of the pointer, for the benefit
333      * of any pc-relative addressing mode.
334      */
335     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
336 }
337 
338 /* Signal overflow, starting over with fewer guest insns. */
339 static G_NORETURN
340 void tcg_raise_tb_overflow(TCGContext *s)
341 {
342     siglongjmp(s->jmp_trans, -2);
343 }
344 
345 #define C_PFX1(P, A)                    P##A
346 #define C_PFX2(P, A, B)                 P##A##_##B
347 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
348 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
349 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
350 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
351 
352 /* Define an enumeration for the various combinations. */
353 
354 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
355 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
356 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
357 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
358 
359 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
360 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
361 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
362 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
363 
364 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
365 
366 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
367 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
368 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
369 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
370 
371 typedef enum {
372 #include "tcg-target-con-set.h"
373 } TCGConstraintSetIndex;
374 
375 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
376 
377 #undef C_O0_I1
378 #undef C_O0_I2
379 #undef C_O0_I3
380 #undef C_O0_I4
381 #undef C_O1_I1
382 #undef C_O1_I2
383 #undef C_O1_I3
384 #undef C_O1_I4
385 #undef C_N1_I2
386 #undef C_O2_I1
387 #undef C_O2_I2
388 #undef C_O2_I3
389 #undef C_O2_I4
390 
391 /* Put all of the constraint sets into an array, indexed by the enum. */
392 
393 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
394 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
395 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
396 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
397 
398 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
399 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
400 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
401 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
402 
403 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
404 
405 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
406 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
407 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
408 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
409 
410 static const TCGTargetOpDef constraint_sets[] = {
411 #include "tcg-target-con-set.h"
412 };
413 
414 
415 #undef C_O0_I1
416 #undef C_O0_I2
417 #undef C_O0_I3
418 #undef C_O0_I4
419 #undef C_O1_I1
420 #undef C_O1_I2
421 #undef C_O1_I3
422 #undef C_O1_I4
423 #undef C_N1_I2
424 #undef C_O2_I1
425 #undef C_O2_I2
426 #undef C_O2_I3
427 #undef C_O2_I4
428 
429 /* Expand the enumerator to be returned from tcg_target_op_def(). */
430 
431 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
432 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
433 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
434 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
435 
436 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
437 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
438 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
439 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
440 
441 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
442 
443 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
444 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
445 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
446 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
447 
448 #include "tcg-target.c.inc"
449 
450 static void alloc_tcg_plugin_context(TCGContext *s)
451 {
452 #ifdef CONFIG_PLUGIN
453     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
454     s->plugin_tb->insns =
455         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
456 #endif
457 }
458 
459 /*
460  * All TCG threads except the parent (i.e. the one that called tcg_context_init
461  * and registered the target's TCG globals) must register with this function
462  * before initiating translation.
463  *
464  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
465  * of tcg_region_init() for the reasoning behind this.
466  *
467  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
468  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
469  * is not used anymore for translation once this function is called.
470  *
471  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
472  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
473  */
474 #ifdef CONFIG_USER_ONLY
475 void tcg_register_thread(void)
476 {
477     tcg_ctx = &tcg_init_ctx;
478 }
479 #else
480 void tcg_register_thread(void)
481 {
482     TCGContext *s = g_malloc(sizeof(*s));
483     unsigned int i, n;
484 
485     *s = tcg_init_ctx;
486 
487     /* Relink mem_base.  */
488     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
489         if (tcg_init_ctx.temps[i].mem_base) {
490             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
491             tcg_debug_assert(b >= 0 && b < n);
492             s->temps[i].mem_base = &s->temps[b];
493         }
494     }
495 
496     /* Claim an entry in tcg_ctxs */
497     n = qatomic_fetch_inc(&tcg_cur_ctxs);
498     g_assert(n < tcg_max_ctxs);
499     qatomic_set(&tcg_ctxs[n], s);
500 
501     if (n > 0) {
502         alloc_tcg_plugin_context(s);
503         tcg_region_initial_alloc(s);
504     }
505 
506     tcg_ctx = s;
507 }
508 #endif /* !CONFIG_USER_ONLY */
509 
510 /* pool based memory allocation */
511 void *tcg_malloc_internal(TCGContext *s, int size)
512 {
513     TCGPool *p;
514     int pool_size;
515 
516     if (size > TCG_POOL_CHUNK_SIZE) {
517         /* big malloc: insert a new pool (XXX: could optimize) */
518         p = g_malloc(sizeof(TCGPool) + size);
519         p->size = size;
520         p->next = s->pool_first_large;
521         s->pool_first_large = p;
522         return p->data;
523     } else {
524         p = s->pool_current;
525         if (!p) {
526             p = s->pool_first;
527             if (!p)
528                 goto new_pool;
529         } else {
530             if (!p->next) {
531             new_pool:
532                 pool_size = TCG_POOL_CHUNK_SIZE;
533                 p = g_malloc(sizeof(TCGPool) + pool_size);
534                 p->size = pool_size;
535                 p->next = NULL;
536                 if (s->pool_current) {
537                     s->pool_current->next = p;
538                 } else {
539                     s->pool_first = p;
540                 }
541             } else {
542                 p = p->next;
543             }
544         }
545     }
546     s->pool_current = p;
547     s->pool_cur = p->data + size;
548     s->pool_end = p->data + p->size;
549     return p->data;
550 }
551 
552 void tcg_pool_reset(TCGContext *s)
553 {
554     TCGPool *p, *t;
555     for (p = s->pool_first_large; p; p = t) {
556         t = p->next;
557         g_free(p);
558     }
559     s->pool_first_large = NULL;
560     s->pool_cur = s->pool_end = NULL;
561     s->pool_current = NULL;
562 }
563 
564 #include "exec/helper-proto.h"
565 
566 static TCGHelperInfo all_helpers[] = {
567 #include "exec/helper-tcg.h"
568 };
569 static GHashTable *helper_table;
570 
571 #ifdef CONFIG_TCG_INTERPRETER
572 static ffi_type *typecode_to_ffi(int argmask)
573 {
574     /*
575      * libffi does not support __int128_t, so we have forced Int128
576      * to use the structure definition instead of the builtin type.
577      */
578     static ffi_type *ffi_type_i128_elements[3] = {
579         &ffi_type_uint64,
580         &ffi_type_uint64,
581         NULL
582     };
583     static ffi_type ffi_type_i128 = {
584         .size = 16,
585         .alignment = __alignof__(Int128),
586         .type = FFI_TYPE_STRUCT,
587         .elements = ffi_type_i128_elements,
588     };
589 
590     switch (argmask) {
591     case dh_typecode_void:
592         return &ffi_type_void;
593     case dh_typecode_i32:
594         return &ffi_type_uint32;
595     case dh_typecode_s32:
596         return &ffi_type_sint32;
597     case dh_typecode_i64:
598         return &ffi_type_uint64;
599     case dh_typecode_s64:
600         return &ffi_type_sint64;
601     case dh_typecode_ptr:
602         return &ffi_type_pointer;
603     case dh_typecode_i128:
604         return &ffi_type_i128;
605     }
606     g_assert_not_reached();
607 }
608 
609 static void init_ffi_layouts(void)
610 {
611     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
612     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
613 
614     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
615         TCGHelperInfo *info = &all_helpers[i];
616         unsigned typemask = info->typemask;
617         gpointer hash = (gpointer)(uintptr_t)typemask;
618         struct {
619             ffi_cif cif;
620             ffi_type *args[];
621         } *ca;
622         ffi_status status;
623         int nargs;
624         ffi_cif *cif;
625 
626         cif = g_hash_table_lookup(ffi_table, hash);
627         if (cif) {
628             info->cif = cif;
629             continue;
630         }
631 
632         /* Ignoring the return type, find the last non-zero field. */
633         nargs = 32 - clz32(typemask >> 3);
634         nargs = DIV_ROUND_UP(nargs, 3);
635         assert(nargs <= MAX_CALL_IARGS);
636 
637         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
638         ca->cif.rtype = typecode_to_ffi(typemask & 7);
639         ca->cif.nargs = nargs;
640 
641         if (nargs != 0) {
642             ca->cif.arg_types = ca->args;
643             for (int j = 0; j < nargs; ++j) {
644                 int typecode = extract32(typemask, (j + 1) * 3, 3);
645                 ca->args[j] = typecode_to_ffi(typecode);
646             }
647         }
648 
649         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
650                               ca->cif.rtype, ca->cif.arg_types);
651         assert(status == FFI_OK);
652 
653         cif = &ca->cif;
654         info->cif = cif;
655         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
656     }
657 
658     g_hash_table_destroy(ffi_table);
659 }
660 #endif /* CONFIG_TCG_INTERPRETER */
661 
662 typedef struct TCGCumulativeArgs {
663     int arg_idx;                /* tcg_gen_callN args[] */
664     int info_in_idx;            /* TCGHelperInfo in[] */
665     int arg_slot;               /* regs+stack slot */
666     int ref_slot;               /* stack slots for references */
667 } TCGCumulativeArgs;
668 
669 static void layout_arg_even(TCGCumulativeArgs *cum)
670 {
671     cum->arg_slot += cum->arg_slot & 1;
672 }
673 
674 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
675                          TCGCallArgumentKind kind)
676 {
677     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
678 
679     *loc = (TCGCallArgumentLoc){
680         .kind = kind,
681         .arg_idx = cum->arg_idx,
682         .arg_slot = cum->arg_slot,
683     };
684     cum->info_in_idx++;
685     cum->arg_slot++;
686 }
687 
688 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
689                                 TCGHelperInfo *info, int n)
690 {
691     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
692 
693     for (int i = 0; i < n; ++i) {
694         /* Layout all using the same arg_idx, adjusting the subindex. */
695         loc[i] = (TCGCallArgumentLoc){
696             .kind = TCG_CALL_ARG_NORMAL,
697             .arg_idx = cum->arg_idx,
698             .tmp_subindex = i,
699             .arg_slot = cum->arg_slot + i,
700         };
701     }
702     cum->info_in_idx += n;
703     cum->arg_slot += n;
704 }
705 
706 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
707 {
708     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
709     int n = 128 / TCG_TARGET_REG_BITS;
710 
711     /* The first subindex carries the pointer. */
712     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
713 
714     /*
715      * The callee is allowed to clobber memory associated with
716      * structure pass by-reference.  Therefore we must make copies.
717      * Allocate space from "ref_slot", which will be adjusted to
718      * follow the parameters on the stack.
719      */
720     loc[0].ref_slot = cum->ref_slot;
721 
722     /*
723      * Subsequent words also go into the reference slot, but
724      * do not accumulate into the regular arguments.
725      */
726     for (int i = 1; i < n; ++i) {
727         loc[i] = (TCGCallArgumentLoc){
728             .kind = TCG_CALL_ARG_BY_REF_N,
729             .arg_idx = cum->arg_idx,
730             .tmp_subindex = i,
731             .ref_slot = cum->ref_slot + i,
732         };
733     }
734     cum->info_in_idx += n;
735     cum->ref_slot += n;
736 }
737 
738 static void init_call_layout(TCGHelperInfo *info)
739 {
740     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
741     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
742     unsigned typemask = info->typemask;
743     unsigned typecode;
744     TCGCumulativeArgs cum = { };
745 
746     /*
747      * Parse and place any function return value.
748      */
749     typecode = typemask & 7;
750     switch (typecode) {
751     case dh_typecode_void:
752         info->nr_out = 0;
753         break;
754     case dh_typecode_i32:
755     case dh_typecode_s32:
756     case dh_typecode_ptr:
757         info->nr_out = 1;
758         info->out_kind = TCG_CALL_RET_NORMAL;
759         break;
760     case dh_typecode_i64:
761     case dh_typecode_s64:
762         info->nr_out = 64 / TCG_TARGET_REG_BITS;
763         info->out_kind = TCG_CALL_RET_NORMAL;
764         /* Query the last register now to trigger any assert early. */
765         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
766         break;
767     case dh_typecode_i128:
768         info->nr_out = 128 / TCG_TARGET_REG_BITS;
769         info->out_kind = TCG_TARGET_CALL_RET_I128;
770         switch (TCG_TARGET_CALL_RET_I128) {
771         case TCG_CALL_RET_NORMAL:
772             /* Query the last register now to trigger any assert early. */
773             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
774             break;
775         case TCG_CALL_RET_BY_VEC:
776             /* Query the single register now to trigger any assert early. */
777             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
778             break;
779         case TCG_CALL_RET_BY_REF:
780             /*
781              * Allocate the first argument to the output.
782              * We don't need to store this anywhere, just make it
783              * unavailable for use in the input loop below.
784              */
785             cum.arg_slot = 1;
786             break;
787         default:
788             qemu_build_not_reached();
789         }
790         break;
791     default:
792         g_assert_not_reached();
793     }
794 
795     /*
796      * Parse and place function arguments.
797      */
798     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
799         TCGCallArgumentKind kind;
800         TCGType type;
801 
802         typecode = typemask & 7;
803         switch (typecode) {
804         case dh_typecode_i32:
805         case dh_typecode_s32:
806             type = TCG_TYPE_I32;
807             break;
808         case dh_typecode_i64:
809         case dh_typecode_s64:
810             type = TCG_TYPE_I64;
811             break;
812         case dh_typecode_ptr:
813             type = TCG_TYPE_PTR;
814             break;
815         case dh_typecode_i128:
816             type = TCG_TYPE_I128;
817             break;
818         default:
819             g_assert_not_reached();
820         }
821 
822         switch (type) {
823         case TCG_TYPE_I32:
824             switch (TCG_TARGET_CALL_ARG_I32) {
825             case TCG_CALL_ARG_EVEN:
826                 layout_arg_even(&cum);
827                 /* fall through */
828             case TCG_CALL_ARG_NORMAL:
829                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
830                 break;
831             case TCG_CALL_ARG_EXTEND:
832                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
833                 layout_arg_1(&cum, info, kind);
834                 break;
835             default:
836                 qemu_build_not_reached();
837             }
838             break;
839 
840         case TCG_TYPE_I64:
841             switch (TCG_TARGET_CALL_ARG_I64) {
842             case TCG_CALL_ARG_EVEN:
843                 layout_arg_even(&cum);
844                 /* fall through */
845             case TCG_CALL_ARG_NORMAL:
846                 if (TCG_TARGET_REG_BITS == 32) {
847                     layout_arg_normal_n(&cum, info, 2);
848                 } else {
849                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
850                 }
851                 break;
852             default:
853                 qemu_build_not_reached();
854             }
855             break;
856 
857         case TCG_TYPE_I128:
858             switch (TCG_TARGET_CALL_ARG_I128) {
859             case TCG_CALL_ARG_EVEN:
860                 layout_arg_even(&cum);
861                 /* fall through */
862             case TCG_CALL_ARG_NORMAL:
863                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
864                 break;
865             case TCG_CALL_ARG_BY_REF:
866                 layout_arg_by_ref(&cum, info);
867                 break;
868             default:
869                 qemu_build_not_reached();
870             }
871             break;
872 
873         default:
874             g_assert_not_reached();
875         }
876     }
877     info->nr_in = cum.info_in_idx;
878 
879     /* Validate that we didn't overrun the input array. */
880     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
881     /* Validate the backend has enough argument space. */
882     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
883 
884     /*
885      * Relocate the "ref_slot" area to the end of the parameters.
886      * Minimizing this stack offset helps code size for x86,
887      * which has a signed 8-bit offset encoding.
888      */
889     if (cum.ref_slot != 0) {
890         int ref_base = 0;
891 
892         if (cum.arg_slot > max_reg_slots) {
893             int align = __alignof(Int128) / sizeof(tcg_target_long);
894 
895             ref_base = cum.arg_slot - max_reg_slots;
896             if (align > 1) {
897                 ref_base = ROUND_UP(ref_base, align);
898             }
899         }
900         assert(ref_base + cum.ref_slot <= max_stk_slots);
901 
902         if (ref_base != 0) {
903             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
904                 TCGCallArgumentLoc *loc = &info->in[i];
905                 switch (loc->kind) {
906                 case TCG_CALL_ARG_BY_REF:
907                 case TCG_CALL_ARG_BY_REF_N:
908                     loc->ref_slot += ref_base;
909                     break;
910                 default:
911                     break;
912                 }
913             }
914         }
915     }
916 }
917 
918 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
919 static void process_op_defs(TCGContext *s);
920 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
921                                             TCGReg reg, const char *name);
922 
923 static void tcg_context_init(unsigned max_cpus)
924 {
925     TCGContext *s = &tcg_init_ctx;
926     int op, total_args, n, i;
927     TCGOpDef *def;
928     TCGArgConstraint *args_ct;
929     TCGTemp *ts;
930 
931     memset(s, 0, sizeof(*s));
932     s->nb_globals = 0;
933 
934     /* Count total number of arguments and allocate the corresponding
935        space */
936     total_args = 0;
937     for(op = 0; op < NB_OPS; op++) {
938         def = &tcg_op_defs[op];
939         n = def->nb_iargs + def->nb_oargs;
940         total_args += n;
941     }
942 
943     args_ct = g_new0(TCGArgConstraint, total_args);
944 
945     for(op = 0; op < NB_OPS; op++) {
946         def = &tcg_op_defs[op];
947         def->args_ct = args_ct;
948         n = def->nb_iargs + def->nb_oargs;
949         args_ct += n;
950     }
951 
952     /* Register helpers.  */
953     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
954     helper_table = g_hash_table_new(NULL, NULL);
955 
956     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
957         init_call_layout(&all_helpers[i]);
958         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
959                             (gpointer)&all_helpers[i]);
960     }
961 
962 #ifdef CONFIG_TCG_INTERPRETER
963     init_ffi_layouts();
964 #endif
965 
966     tcg_target_init(s);
967     process_op_defs(s);
968 
969     /* Reverse the order of the saved registers, assuming they're all at
970        the start of tcg_target_reg_alloc_order.  */
971     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
972         int r = tcg_target_reg_alloc_order[n];
973         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
974             break;
975         }
976     }
977     for (i = 0; i < n; ++i) {
978         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
979     }
980     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
981         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
982     }
983 
984     alloc_tcg_plugin_context(s);
985 
986     tcg_ctx = s;
987     /*
988      * In user-mode we simply share the init context among threads, since we
989      * use a single region. See the documentation tcg_region_init() for the
990      * reasoning behind this.
991      * In softmmu we will have at most max_cpus TCG threads.
992      */
993 #ifdef CONFIG_USER_ONLY
994     tcg_ctxs = &tcg_ctx;
995     tcg_cur_ctxs = 1;
996     tcg_max_ctxs = 1;
997 #else
998     tcg_max_ctxs = max_cpus;
999     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1000 #endif
1001 
1002     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1003     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1004     cpu_env = temp_tcgv_ptr(ts);
1005 }
1006 
1007 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1008 {
1009     tcg_context_init(max_cpus);
1010     tcg_region_init(tb_size, splitwx, max_cpus);
1011 }
1012 
1013 /*
1014  * Allocate TBs right before their corresponding translated code, making
1015  * sure that TBs and code are on different cache lines.
1016  */
1017 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1018 {
1019     uintptr_t align = qemu_icache_linesize;
1020     TranslationBlock *tb;
1021     void *next;
1022 
1023  retry:
1024     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1025     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1026 
1027     if (unlikely(next > s->code_gen_highwater)) {
1028         if (tcg_region_alloc(s)) {
1029             return NULL;
1030         }
1031         goto retry;
1032     }
1033     qatomic_set(&s->code_gen_ptr, next);
1034     s->data_gen_ptr = NULL;
1035     return tb;
1036 }
1037 
1038 void tcg_prologue_init(TCGContext *s)
1039 {
1040     size_t prologue_size;
1041 
1042     s->code_ptr = s->code_gen_ptr;
1043     s->code_buf = s->code_gen_ptr;
1044     s->data_gen_ptr = NULL;
1045 
1046 #ifndef CONFIG_TCG_INTERPRETER
1047     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1048 #endif
1049 
1050 #ifdef TCG_TARGET_NEED_POOL_LABELS
1051     s->pool_labels = NULL;
1052 #endif
1053 
1054     qemu_thread_jit_write();
1055     /* Generate the prologue.  */
1056     tcg_target_qemu_prologue(s);
1057 
1058 #ifdef TCG_TARGET_NEED_POOL_LABELS
1059     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1060     {
1061         int result = tcg_out_pool_finalize(s);
1062         tcg_debug_assert(result == 0);
1063     }
1064 #endif
1065 
1066     prologue_size = tcg_current_code_size(s);
1067     perf_report_prologue(s->code_gen_ptr, prologue_size);
1068 
1069 #ifndef CONFIG_TCG_INTERPRETER
1070     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1071                         (uintptr_t)s->code_buf, prologue_size);
1072 #endif
1073 
1074 #ifdef DEBUG_DISAS
1075     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1076         FILE *logfile = qemu_log_trylock();
1077         if (logfile) {
1078             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1079             if (s->data_gen_ptr) {
1080                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1081                 size_t data_size = prologue_size - code_size;
1082                 size_t i;
1083 
1084                 disas(logfile, s->code_gen_ptr, code_size);
1085 
1086                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1087                     if (sizeof(tcg_target_ulong) == 8) {
1088                         fprintf(logfile,
1089                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1090                                 (uintptr_t)s->data_gen_ptr + i,
1091                                 *(uint64_t *)(s->data_gen_ptr + i));
1092                     } else {
1093                         fprintf(logfile,
1094                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1095                                 (uintptr_t)s->data_gen_ptr + i,
1096                                 *(uint32_t *)(s->data_gen_ptr + i));
1097                     }
1098                 }
1099             } else {
1100                 disas(logfile, s->code_gen_ptr, prologue_size);
1101             }
1102             fprintf(logfile, "\n");
1103             qemu_log_unlock(logfile);
1104         }
1105     }
1106 #endif
1107 
1108 #ifndef CONFIG_TCG_INTERPRETER
1109     /*
1110      * Assert that goto_ptr is implemented completely, setting an epilogue.
1111      * For tci, we use NULL as the signal to return from the interpreter,
1112      * so skip this check.
1113      */
1114     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1115 #endif
1116 
1117     tcg_region_prologue_set(s);
1118 }
1119 
1120 void tcg_func_start(TCGContext *s)
1121 {
1122     tcg_pool_reset(s);
1123     s->nb_temps = s->nb_globals;
1124 
1125     /* No temps have been previously allocated for size or locality.  */
1126     memset(s->free_temps, 0, sizeof(s->free_temps));
1127 
1128     /* No constant temps have been previously allocated. */
1129     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1130         if (s->const_table[i]) {
1131             g_hash_table_remove_all(s->const_table[i]);
1132         }
1133     }
1134 
1135     s->nb_ops = 0;
1136     s->nb_labels = 0;
1137     s->current_frame_offset = s->frame_start;
1138 
1139 #ifdef CONFIG_DEBUG_TCG
1140     s->goto_tb_issue_mask = 0;
1141 #endif
1142 
1143     QTAILQ_INIT(&s->ops);
1144     QTAILQ_INIT(&s->free_ops);
1145     QSIMPLEQ_INIT(&s->labels);
1146 }
1147 
1148 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1149 {
1150     int n = s->nb_temps++;
1151 
1152     if (n >= TCG_MAX_TEMPS) {
1153         tcg_raise_tb_overflow(s);
1154     }
1155     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1156 }
1157 
1158 static TCGTemp *tcg_global_alloc(TCGContext *s)
1159 {
1160     TCGTemp *ts;
1161 
1162     tcg_debug_assert(s->nb_globals == s->nb_temps);
1163     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1164     s->nb_globals++;
1165     ts = tcg_temp_alloc(s);
1166     ts->kind = TEMP_GLOBAL;
1167 
1168     return ts;
1169 }
1170 
1171 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1172                                             TCGReg reg, const char *name)
1173 {
1174     TCGTemp *ts;
1175 
1176     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1177         tcg_abort();
1178     }
1179 
1180     ts = tcg_global_alloc(s);
1181     ts->base_type = type;
1182     ts->type = type;
1183     ts->kind = TEMP_FIXED;
1184     ts->reg = reg;
1185     ts->name = name;
1186     tcg_regset_set_reg(s->reserved_regs, reg);
1187 
1188     return ts;
1189 }
1190 
1191 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1192 {
1193     s->frame_start = start;
1194     s->frame_end = start + size;
1195     s->frame_temp
1196         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1197 }
1198 
1199 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1200                                      intptr_t offset, const char *name)
1201 {
1202     TCGContext *s = tcg_ctx;
1203     TCGTemp *base_ts = tcgv_ptr_temp(base);
1204     TCGTemp *ts = tcg_global_alloc(s);
1205     int indirect_reg = 0;
1206 
1207     switch (base_ts->kind) {
1208     case TEMP_FIXED:
1209         break;
1210     case TEMP_GLOBAL:
1211         /* We do not support double-indirect registers.  */
1212         tcg_debug_assert(!base_ts->indirect_reg);
1213         base_ts->indirect_base = 1;
1214         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1215                             ? 2 : 1);
1216         indirect_reg = 1;
1217         break;
1218     default:
1219         g_assert_not_reached();
1220     }
1221 
1222     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1223         TCGTemp *ts2 = tcg_global_alloc(s);
1224         char buf[64];
1225 
1226         ts->base_type = TCG_TYPE_I64;
1227         ts->type = TCG_TYPE_I32;
1228         ts->indirect_reg = indirect_reg;
1229         ts->mem_allocated = 1;
1230         ts->mem_base = base_ts;
1231         ts->mem_offset = offset;
1232         pstrcpy(buf, sizeof(buf), name);
1233         pstrcat(buf, sizeof(buf), "_0");
1234         ts->name = strdup(buf);
1235 
1236         tcg_debug_assert(ts2 == ts + 1);
1237         ts2->base_type = TCG_TYPE_I64;
1238         ts2->type = TCG_TYPE_I32;
1239         ts2->indirect_reg = indirect_reg;
1240         ts2->mem_allocated = 1;
1241         ts2->mem_base = base_ts;
1242         ts2->mem_offset = offset + 4;
1243         ts2->temp_subindex = 1;
1244         pstrcpy(buf, sizeof(buf), name);
1245         pstrcat(buf, sizeof(buf), "_1");
1246         ts2->name = strdup(buf);
1247     } else {
1248         ts->base_type = type;
1249         ts->type = type;
1250         ts->indirect_reg = indirect_reg;
1251         ts->mem_allocated = 1;
1252         ts->mem_base = base_ts;
1253         ts->mem_offset = offset;
1254         ts->name = name;
1255     }
1256     return ts;
1257 }
1258 
1259 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1260 {
1261     TCGContext *s = tcg_ctx;
1262     TCGTemp *ts;
1263     int n;
1264 
1265     if (kind == TEMP_EBB) {
1266         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1267 
1268         if (idx < TCG_MAX_TEMPS) {
1269             /* There is already an available temp with the right type.  */
1270             clear_bit(idx, s->free_temps[type].l);
1271 
1272             ts = &s->temps[idx];
1273             ts->temp_allocated = 1;
1274             tcg_debug_assert(ts->base_type == type);
1275             tcg_debug_assert(ts->kind == kind);
1276             goto done;
1277         }
1278     } else {
1279         tcg_debug_assert(kind == TEMP_TB);
1280     }
1281 
1282     switch (type) {
1283     case TCG_TYPE_I32:
1284     case TCG_TYPE_V64:
1285     case TCG_TYPE_V128:
1286     case TCG_TYPE_V256:
1287         n = 1;
1288         break;
1289     case TCG_TYPE_I64:
1290         n = 64 / TCG_TARGET_REG_BITS;
1291         break;
1292     case TCG_TYPE_I128:
1293         n = 128 / TCG_TARGET_REG_BITS;
1294         break;
1295     default:
1296         g_assert_not_reached();
1297     }
1298 
1299     ts = tcg_temp_alloc(s);
1300     ts->base_type = type;
1301     ts->temp_allocated = 1;
1302     ts->kind = kind;
1303 
1304     if (n == 1) {
1305         ts->type = type;
1306     } else {
1307         ts->type = TCG_TYPE_REG;
1308 
1309         for (int i = 1; i < n; ++i) {
1310             TCGTemp *ts2 = tcg_temp_alloc(s);
1311 
1312             tcg_debug_assert(ts2 == ts + i);
1313             ts2->base_type = type;
1314             ts2->type = TCG_TYPE_REG;
1315             ts2->temp_allocated = 1;
1316             ts2->temp_subindex = i;
1317             ts2->kind = kind;
1318         }
1319     }
1320 
1321  done:
1322 #if defined(CONFIG_DEBUG_TCG)
1323     s->temps_in_use++;
1324 #endif
1325     return ts;
1326 }
1327 
1328 TCGv_vec tcg_temp_new_vec(TCGType type)
1329 {
1330     TCGTemp *t;
1331 
1332 #ifdef CONFIG_DEBUG_TCG
1333     switch (type) {
1334     case TCG_TYPE_V64:
1335         assert(TCG_TARGET_HAS_v64);
1336         break;
1337     case TCG_TYPE_V128:
1338         assert(TCG_TARGET_HAS_v128);
1339         break;
1340     case TCG_TYPE_V256:
1341         assert(TCG_TARGET_HAS_v256);
1342         break;
1343     default:
1344         g_assert_not_reached();
1345     }
1346 #endif
1347 
1348     t = tcg_temp_new_internal(type, TEMP_EBB);
1349     return temp_tcgv_vec(t);
1350 }
1351 
1352 /* Create a new temp of the same type as an existing temp.  */
1353 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1354 {
1355     TCGTemp *t = tcgv_vec_temp(match);
1356 
1357     tcg_debug_assert(t->temp_allocated != 0);
1358 
1359     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1360     return temp_tcgv_vec(t);
1361 }
1362 
1363 void tcg_temp_free_internal(TCGTemp *ts)
1364 {
1365     TCGContext *s = tcg_ctx;
1366 
1367     switch (ts->kind) {
1368     case TEMP_CONST:
1369         /*
1370          * In order to simplify users of tcg_constant_*,
1371          * silently ignore free.
1372          */
1373         return;
1374     case TEMP_EBB:
1375     case TEMP_TB:
1376         break;
1377     default:
1378         g_assert_not_reached();
1379     }
1380 
1381     tcg_debug_assert(ts->temp_allocated != 0);
1382     ts->temp_allocated = 0;
1383 
1384 #if defined(CONFIG_DEBUG_TCG)
1385     assert(s->temps_in_use > 0);
1386     s->temps_in_use--;
1387 #endif
1388 
1389     if (ts->kind == TEMP_EBB) {
1390         int idx = temp_idx(ts);
1391         set_bit(idx, s->free_temps[ts->base_type].l);
1392     }
1393 }
1394 
1395 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1396 {
1397     TCGContext *s = tcg_ctx;
1398     GHashTable *h = s->const_table[type];
1399     TCGTemp *ts;
1400 
1401     if (h == NULL) {
1402         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1403         s->const_table[type] = h;
1404     }
1405 
1406     ts = g_hash_table_lookup(h, &val);
1407     if (ts == NULL) {
1408         int64_t *val_ptr;
1409 
1410         ts = tcg_temp_alloc(s);
1411 
1412         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1413             TCGTemp *ts2 = tcg_temp_alloc(s);
1414 
1415             tcg_debug_assert(ts2 == ts + 1);
1416 
1417             ts->base_type = TCG_TYPE_I64;
1418             ts->type = TCG_TYPE_I32;
1419             ts->kind = TEMP_CONST;
1420             ts->temp_allocated = 1;
1421 
1422             ts2->base_type = TCG_TYPE_I64;
1423             ts2->type = TCG_TYPE_I32;
1424             ts2->kind = TEMP_CONST;
1425             ts2->temp_allocated = 1;
1426             ts2->temp_subindex = 1;
1427 
1428             /*
1429              * Retain the full value of the 64-bit constant in the low
1430              * part, so that the hash table works.  Actual uses will
1431              * truncate the value to the low part.
1432              */
1433             ts[HOST_BIG_ENDIAN].val = val;
1434             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1435             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1436         } else {
1437             ts->base_type = type;
1438             ts->type = type;
1439             ts->kind = TEMP_CONST;
1440             ts->temp_allocated = 1;
1441             ts->val = val;
1442             val_ptr = &ts->val;
1443         }
1444         g_hash_table_insert(h, val_ptr, ts);
1445     }
1446 
1447     return ts;
1448 }
1449 
1450 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1451 {
1452     val = dup_const(vece, val);
1453     return temp_tcgv_vec(tcg_constant_internal(type, val));
1454 }
1455 
1456 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1457 {
1458     TCGTemp *t = tcgv_vec_temp(match);
1459 
1460     tcg_debug_assert(t->temp_allocated != 0);
1461     return tcg_constant_vec(t->base_type, vece, val);
1462 }
1463 
1464 TCGv_i32 tcg_const_i32(int32_t val)
1465 {
1466     TCGv_i32 t0;
1467     t0 = tcg_temp_new_i32();
1468     tcg_gen_movi_i32(t0, val);
1469     return t0;
1470 }
1471 
1472 TCGv_i64 tcg_const_i64(int64_t val)
1473 {
1474     TCGv_i64 t0;
1475     t0 = tcg_temp_new_i64();
1476     tcg_gen_movi_i64(t0, val);
1477     return t0;
1478 }
1479 
1480 #if defined(CONFIG_DEBUG_TCG)
1481 void tcg_clear_temp_count(void)
1482 {
1483     TCGContext *s = tcg_ctx;
1484     s->temps_in_use = 0;
1485 }
1486 
1487 int tcg_check_temp_count(void)
1488 {
1489     TCGContext *s = tcg_ctx;
1490     if (s->temps_in_use) {
1491         /* Clear the count so that we don't give another
1492          * warning immediately next time around.
1493          */
1494         s->temps_in_use = 0;
1495         return 1;
1496     }
1497     return 0;
1498 }
1499 #endif
1500 
1501 /* Return true if OP may appear in the opcode stream.
1502    Test the runtime variable that controls each opcode.  */
1503 bool tcg_op_supported(TCGOpcode op)
1504 {
1505     const bool have_vec
1506         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1507 
1508     switch (op) {
1509     case INDEX_op_discard:
1510     case INDEX_op_set_label:
1511     case INDEX_op_call:
1512     case INDEX_op_br:
1513     case INDEX_op_mb:
1514     case INDEX_op_insn_start:
1515     case INDEX_op_exit_tb:
1516     case INDEX_op_goto_tb:
1517     case INDEX_op_goto_ptr:
1518     case INDEX_op_qemu_ld_i32:
1519     case INDEX_op_qemu_st_i32:
1520     case INDEX_op_qemu_ld_i64:
1521     case INDEX_op_qemu_st_i64:
1522         return true;
1523 
1524     case INDEX_op_qemu_st8_i32:
1525         return TCG_TARGET_HAS_qemu_st8_i32;
1526 
1527     case INDEX_op_mov_i32:
1528     case INDEX_op_setcond_i32:
1529     case INDEX_op_brcond_i32:
1530     case INDEX_op_ld8u_i32:
1531     case INDEX_op_ld8s_i32:
1532     case INDEX_op_ld16u_i32:
1533     case INDEX_op_ld16s_i32:
1534     case INDEX_op_ld_i32:
1535     case INDEX_op_st8_i32:
1536     case INDEX_op_st16_i32:
1537     case INDEX_op_st_i32:
1538     case INDEX_op_add_i32:
1539     case INDEX_op_sub_i32:
1540     case INDEX_op_mul_i32:
1541     case INDEX_op_and_i32:
1542     case INDEX_op_or_i32:
1543     case INDEX_op_xor_i32:
1544     case INDEX_op_shl_i32:
1545     case INDEX_op_shr_i32:
1546     case INDEX_op_sar_i32:
1547         return true;
1548 
1549     case INDEX_op_movcond_i32:
1550         return TCG_TARGET_HAS_movcond_i32;
1551     case INDEX_op_div_i32:
1552     case INDEX_op_divu_i32:
1553         return TCG_TARGET_HAS_div_i32;
1554     case INDEX_op_rem_i32:
1555     case INDEX_op_remu_i32:
1556         return TCG_TARGET_HAS_rem_i32;
1557     case INDEX_op_div2_i32:
1558     case INDEX_op_divu2_i32:
1559         return TCG_TARGET_HAS_div2_i32;
1560     case INDEX_op_rotl_i32:
1561     case INDEX_op_rotr_i32:
1562         return TCG_TARGET_HAS_rot_i32;
1563     case INDEX_op_deposit_i32:
1564         return TCG_TARGET_HAS_deposit_i32;
1565     case INDEX_op_extract_i32:
1566         return TCG_TARGET_HAS_extract_i32;
1567     case INDEX_op_sextract_i32:
1568         return TCG_TARGET_HAS_sextract_i32;
1569     case INDEX_op_extract2_i32:
1570         return TCG_TARGET_HAS_extract2_i32;
1571     case INDEX_op_add2_i32:
1572         return TCG_TARGET_HAS_add2_i32;
1573     case INDEX_op_sub2_i32:
1574         return TCG_TARGET_HAS_sub2_i32;
1575     case INDEX_op_mulu2_i32:
1576         return TCG_TARGET_HAS_mulu2_i32;
1577     case INDEX_op_muls2_i32:
1578         return TCG_TARGET_HAS_muls2_i32;
1579     case INDEX_op_muluh_i32:
1580         return TCG_TARGET_HAS_muluh_i32;
1581     case INDEX_op_mulsh_i32:
1582         return TCG_TARGET_HAS_mulsh_i32;
1583     case INDEX_op_ext8s_i32:
1584         return TCG_TARGET_HAS_ext8s_i32;
1585     case INDEX_op_ext16s_i32:
1586         return TCG_TARGET_HAS_ext16s_i32;
1587     case INDEX_op_ext8u_i32:
1588         return TCG_TARGET_HAS_ext8u_i32;
1589     case INDEX_op_ext16u_i32:
1590         return TCG_TARGET_HAS_ext16u_i32;
1591     case INDEX_op_bswap16_i32:
1592         return TCG_TARGET_HAS_bswap16_i32;
1593     case INDEX_op_bswap32_i32:
1594         return TCG_TARGET_HAS_bswap32_i32;
1595     case INDEX_op_not_i32:
1596         return TCG_TARGET_HAS_not_i32;
1597     case INDEX_op_neg_i32:
1598         return TCG_TARGET_HAS_neg_i32;
1599     case INDEX_op_andc_i32:
1600         return TCG_TARGET_HAS_andc_i32;
1601     case INDEX_op_orc_i32:
1602         return TCG_TARGET_HAS_orc_i32;
1603     case INDEX_op_eqv_i32:
1604         return TCG_TARGET_HAS_eqv_i32;
1605     case INDEX_op_nand_i32:
1606         return TCG_TARGET_HAS_nand_i32;
1607     case INDEX_op_nor_i32:
1608         return TCG_TARGET_HAS_nor_i32;
1609     case INDEX_op_clz_i32:
1610         return TCG_TARGET_HAS_clz_i32;
1611     case INDEX_op_ctz_i32:
1612         return TCG_TARGET_HAS_ctz_i32;
1613     case INDEX_op_ctpop_i32:
1614         return TCG_TARGET_HAS_ctpop_i32;
1615 
1616     case INDEX_op_brcond2_i32:
1617     case INDEX_op_setcond2_i32:
1618         return TCG_TARGET_REG_BITS == 32;
1619 
1620     case INDEX_op_mov_i64:
1621     case INDEX_op_setcond_i64:
1622     case INDEX_op_brcond_i64:
1623     case INDEX_op_ld8u_i64:
1624     case INDEX_op_ld8s_i64:
1625     case INDEX_op_ld16u_i64:
1626     case INDEX_op_ld16s_i64:
1627     case INDEX_op_ld32u_i64:
1628     case INDEX_op_ld32s_i64:
1629     case INDEX_op_ld_i64:
1630     case INDEX_op_st8_i64:
1631     case INDEX_op_st16_i64:
1632     case INDEX_op_st32_i64:
1633     case INDEX_op_st_i64:
1634     case INDEX_op_add_i64:
1635     case INDEX_op_sub_i64:
1636     case INDEX_op_mul_i64:
1637     case INDEX_op_and_i64:
1638     case INDEX_op_or_i64:
1639     case INDEX_op_xor_i64:
1640     case INDEX_op_shl_i64:
1641     case INDEX_op_shr_i64:
1642     case INDEX_op_sar_i64:
1643     case INDEX_op_ext_i32_i64:
1644     case INDEX_op_extu_i32_i64:
1645         return TCG_TARGET_REG_BITS == 64;
1646 
1647     case INDEX_op_movcond_i64:
1648         return TCG_TARGET_HAS_movcond_i64;
1649     case INDEX_op_div_i64:
1650     case INDEX_op_divu_i64:
1651         return TCG_TARGET_HAS_div_i64;
1652     case INDEX_op_rem_i64:
1653     case INDEX_op_remu_i64:
1654         return TCG_TARGET_HAS_rem_i64;
1655     case INDEX_op_div2_i64:
1656     case INDEX_op_divu2_i64:
1657         return TCG_TARGET_HAS_div2_i64;
1658     case INDEX_op_rotl_i64:
1659     case INDEX_op_rotr_i64:
1660         return TCG_TARGET_HAS_rot_i64;
1661     case INDEX_op_deposit_i64:
1662         return TCG_TARGET_HAS_deposit_i64;
1663     case INDEX_op_extract_i64:
1664         return TCG_TARGET_HAS_extract_i64;
1665     case INDEX_op_sextract_i64:
1666         return TCG_TARGET_HAS_sextract_i64;
1667     case INDEX_op_extract2_i64:
1668         return TCG_TARGET_HAS_extract2_i64;
1669     case INDEX_op_extrl_i64_i32:
1670         return TCG_TARGET_HAS_extrl_i64_i32;
1671     case INDEX_op_extrh_i64_i32:
1672         return TCG_TARGET_HAS_extrh_i64_i32;
1673     case INDEX_op_ext8s_i64:
1674         return TCG_TARGET_HAS_ext8s_i64;
1675     case INDEX_op_ext16s_i64:
1676         return TCG_TARGET_HAS_ext16s_i64;
1677     case INDEX_op_ext32s_i64:
1678         return TCG_TARGET_HAS_ext32s_i64;
1679     case INDEX_op_ext8u_i64:
1680         return TCG_TARGET_HAS_ext8u_i64;
1681     case INDEX_op_ext16u_i64:
1682         return TCG_TARGET_HAS_ext16u_i64;
1683     case INDEX_op_ext32u_i64:
1684         return TCG_TARGET_HAS_ext32u_i64;
1685     case INDEX_op_bswap16_i64:
1686         return TCG_TARGET_HAS_bswap16_i64;
1687     case INDEX_op_bswap32_i64:
1688         return TCG_TARGET_HAS_bswap32_i64;
1689     case INDEX_op_bswap64_i64:
1690         return TCG_TARGET_HAS_bswap64_i64;
1691     case INDEX_op_not_i64:
1692         return TCG_TARGET_HAS_not_i64;
1693     case INDEX_op_neg_i64:
1694         return TCG_TARGET_HAS_neg_i64;
1695     case INDEX_op_andc_i64:
1696         return TCG_TARGET_HAS_andc_i64;
1697     case INDEX_op_orc_i64:
1698         return TCG_TARGET_HAS_orc_i64;
1699     case INDEX_op_eqv_i64:
1700         return TCG_TARGET_HAS_eqv_i64;
1701     case INDEX_op_nand_i64:
1702         return TCG_TARGET_HAS_nand_i64;
1703     case INDEX_op_nor_i64:
1704         return TCG_TARGET_HAS_nor_i64;
1705     case INDEX_op_clz_i64:
1706         return TCG_TARGET_HAS_clz_i64;
1707     case INDEX_op_ctz_i64:
1708         return TCG_TARGET_HAS_ctz_i64;
1709     case INDEX_op_ctpop_i64:
1710         return TCG_TARGET_HAS_ctpop_i64;
1711     case INDEX_op_add2_i64:
1712         return TCG_TARGET_HAS_add2_i64;
1713     case INDEX_op_sub2_i64:
1714         return TCG_TARGET_HAS_sub2_i64;
1715     case INDEX_op_mulu2_i64:
1716         return TCG_TARGET_HAS_mulu2_i64;
1717     case INDEX_op_muls2_i64:
1718         return TCG_TARGET_HAS_muls2_i64;
1719     case INDEX_op_muluh_i64:
1720         return TCG_TARGET_HAS_muluh_i64;
1721     case INDEX_op_mulsh_i64:
1722         return TCG_TARGET_HAS_mulsh_i64;
1723 
1724     case INDEX_op_mov_vec:
1725     case INDEX_op_dup_vec:
1726     case INDEX_op_dupm_vec:
1727     case INDEX_op_ld_vec:
1728     case INDEX_op_st_vec:
1729     case INDEX_op_add_vec:
1730     case INDEX_op_sub_vec:
1731     case INDEX_op_and_vec:
1732     case INDEX_op_or_vec:
1733     case INDEX_op_xor_vec:
1734     case INDEX_op_cmp_vec:
1735         return have_vec;
1736     case INDEX_op_dup2_vec:
1737         return have_vec && TCG_TARGET_REG_BITS == 32;
1738     case INDEX_op_not_vec:
1739         return have_vec && TCG_TARGET_HAS_not_vec;
1740     case INDEX_op_neg_vec:
1741         return have_vec && TCG_TARGET_HAS_neg_vec;
1742     case INDEX_op_abs_vec:
1743         return have_vec && TCG_TARGET_HAS_abs_vec;
1744     case INDEX_op_andc_vec:
1745         return have_vec && TCG_TARGET_HAS_andc_vec;
1746     case INDEX_op_orc_vec:
1747         return have_vec && TCG_TARGET_HAS_orc_vec;
1748     case INDEX_op_nand_vec:
1749         return have_vec && TCG_TARGET_HAS_nand_vec;
1750     case INDEX_op_nor_vec:
1751         return have_vec && TCG_TARGET_HAS_nor_vec;
1752     case INDEX_op_eqv_vec:
1753         return have_vec && TCG_TARGET_HAS_eqv_vec;
1754     case INDEX_op_mul_vec:
1755         return have_vec && TCG_TARGET_HAS_mul_vec;
1756     case INDEX_op_shli_vec:
1757     case INDEX_op_shri_vec:
1758     case INDEX_op_sari_vec:
1759         return have_vec && TCG_TARGET_HAS_shi_vec;
1760     case INDEX_op_shls_vec:
1761     case INDEX_op_shrs_vec:
1762     case INDEX_op_sars_vec:
1763         return have_vec && TCG_TARGET_HAS_shs_vec;
1764     case INDEX_op_shlv_vec:
1765     case INDEX_op_shrv_vec:
1766     case INDEX_op_sarv_vec:
1767         return have_vec && TCG_TARGET_HAS_shv_vec;
1768     case INDEX_op_rotli_vec:
1769         return have_vec && TCG_TARGET_HAS_roti_vec;
1770     case INDEX_op_rotls_vec:
1771         return have_vec && TCG_TARGET_HAS_rots_vec;
1772     case INDEX_op_rotlv_vec:
1773     case INDEX_op_rotrv_vec:
1774         return have_vec && TCG_TARGET_HAS_rotv_vec;
1775     case INDEX_op_ssadd_vec:
1776     case INDEX_op_usadd_vec:
1777     case INDEX_op_sssub_vec:
1778     case INDEX_op_ussub_vec:
1779         return have_vec && TCG_TARGET_HAS_sat_vec;
1780     case INDEX_op_smin_vec:
1781     case INDEX_op_umin_vec:
1782     case INDEX_op_smax_vec:
1783     case INDEX_op_umax_vec:
1784         return have_vec && TCG_TARGET_HAS_minmax_vec;
1785     case INDEX_op_bitsel_vec:
1786         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1787     case INDEX_op_cmpsel_vec:
1788         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1789 
1790     default:
1791         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1792         return true;
1793     }
1794 }
1795 
1796 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
1797 
1798 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1799 {
1800     const TCGHelperInfo *info;
1801     TCGv_i64 extend_free[MAX_CALL_IARGS];
1802     int n_extend = 0;
1803     TCGOp *op;
1804     int i, n, pi = 0, total_args;
1805 
1806     info = g_hash_table_lookup(helper_table, (gpointer)func);
1807     total_args = info->nr_out + info->nr_in + 2;
1808     op = tcg_op_alloc(INDEX_op_call, total_args);
1809 
1810 #ifdef CONFIG_PLUGIN
1811     /* Flag helpers that may affect guest state */
1812     if (tcg_ctx->plugin_insn &&
1813         !(info->flags & TCG_CALL_PLUGIN) &&
1814         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1815         tcg_ctx->plugin_insn->calls_helpers = true;
1816     }
1817 #endif
1818 
1819     TCGOP_CALLO(op) = n = info->nr_out;
1820     switch (n) {
1821     case 0:
1822         tcg_debug_assert(ret == NULL);
1823         break;
1824     case 1:
1825         tcg_debug_assert(ret != NULL);
1826         op->args[pi++] = temp_arg(ret);
1827         break;
1828     case 2:
1829     case 4:
1830         tcg_debug_assert(ret != NULL);
1831         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
1832         tcg_debug_assert(ret->temp_subindex == 0);
1833         for (i = 0; i < n; ++i) {
1834             op->args[pi++] = temp_arg(ret + i);
1835         }
1836         break;
1837     default:
1838         g_assert_not_reached();
1839     }
1840 
1841     TCGOP_CALLI(op) = n = info->nr_in;
1842     for (i = 0; i < n; i++) {
1843         const TCGCallArgumentLoc *loc = &info->in[i];
1844         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
1845 
1846         switch (loc->kind) {
1847         case TCG_CALL_ARG_NORMAL:
1848         case TCG_CALL_ARG_BY_REF:
1849         case TCG_CALL_ARG_BY_REF_N:
1850             op->args[pi++] = temp_arg(ts);
1851             break;
1852 
1853         case TCG_CALL_ARG_EXTEND_U:
1854         case TCG_CALL_ARG_EXTEND_S:
1855             {
1856                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
1857                 TCGv_i32 orig = temp_tcgv_i32(ts);
1858 
1859                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
1860                     tcg_gen_ext_i32_i64(temp, orig);
1861                 } else {
1862                     tcg_gen_extu_i32_i64(temp, orig);
1863                 }
1864                 op->args[pi++] = tcgv_i64_arg(temp);
1865                 extend_free[n_extend++] = temp;
1866             }
1867             break;
1868 
1869         default:
1870             g_assert_not_reached();
1871         }
1872     }
1873     op->args[pi++] = (uintptr_t)func;
1874     op->args[pi++] = (uintptr_t)info;
1875     tcg_debug_assert(pi == total_args);
1876 
1877     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
1878 
1879     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
1880     for (i = 0; i < n_extend; ++i) {
1881         tcg_temp_free_i64(extend_free[i]);
1882     }
1883 }
1884 
1885 static void tcg_reg_alloc_start(TCGContext *s)
1886 {
1887     int i, n;
1888 
1889     for (i = 0, n = s->nb_temps; i < n; i++) {
1890         TCGTemp *ts = &s->temps[i];
1891         TCGTempVal val = TEMP_VAL_MEM;
1892 
1893         switch (ts->kind) {
1894         case TEMP_CONST:
1895             val = TEMP_VAL_CONST;
1896             break;
1897         case TEMP_FIXED:
1898             val = TEMP_VAL_REG;
1899             break;
1900         case TEMP_GLOBAL:
1901             break;
1902         case TEMP_EBB:
1903             val = TEMP_VAL_DEAD;
1904             /* fall through */
1905         case TEMP_TB:
1906             ts->mem_allocated = 0;
1907             break;
1908         default:
1909             g_assert_not_reached();
1910         }
1911         ts->val_type = val;
1912     }
1913 
1914     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1915 }
1916 
1917 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1918                                  TCGTemp *ts)
1919 {
1920     int idx = temp_idx(ts);
1921 
1922     switch (ts->kind) {
1923     case TEMP_FIXED:
1924     case TEMP_GLOBAL:
1925         pstrcpy(buf, buf_size, ts->name);
1926         break;
1927     case TEMP_TB:
1928         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1929         break;
1930     case TEMP_EBB:
1931         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1932         break;
1933     case TEMP_CONST:
1934         switch (ts->type) {
1935         case TCG_TYPE_I32:
1936             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1937             break;
1938 #if TCG_TARGET_REG_BITS > 32
1939         case TCG_TYPE_I64:
1940             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1941             break;
1942 #endif
1943         case TCG_TYPE_V64:
1944         case TCG_TYPE_V128:
1945         case TCG_TYPE_V256:
1946             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1947                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1948             break;
1949         default:
1950             g_assert_not_reached();
1951         }
1952         break;
1953     }
1954     return buf;
1955 }
1956 
1957 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1958                              int buf_size, TCGArg arg)
1959 {
1960     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1961 }
1962 
1963 static const char * const cond_name[] =
1964 {
1965     [TCG_COND_NEVER] = "never",
1966     [TCG_COND_ALWAYS] = "always",
1967     [TCG_COND_EQ] = "eq",
1968     [TCG_COND_NE] = "ne",
1969     [TCG_COND_LT] = "lt",
1970     [TCG_COND_GE] = "ge",
1971     [TCG_COND_LE] = "le",
1972     [TCG_COND_GT] = "gt",
1973     [TCG_COND_LTU] = "ltu",
1974     [TCG_COND_GEU] = "geu",
1975     [TCG_COND_LEU] = "leu",
1976     [TCG_COND_GTU] = "gtu"
1977 };
1978 
1979 static const char * const ldst_name[] =
1980 {
1981     [MO_UB]   = "ub",
1982     [MO_SB]   = "sb",
1983     [MO_LEUW] = "leuw",
1984     [MO_LESW] = "lesw",
1985     [MO_LEUL] = "leul",
1986     [MO_LESL] = "lesl",
1987     [MO_LEUQ] = "leq",
1988     [MO_BEUW] = "beuw",
1989     [MO_BESW] = "besw",
1990     [MO_BEUL] = "beul",
1991     [MO_BESL] = "besl",
1992     [MO_BEUQ] = "beq",
1993 };
1994 
1995 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1996 #ifdef TARGET_ALIGNED_ONLY
1997     [MO_UNALN >> MO_ASHIFT]    = "un+",
1998     [MO_ALIGN >> MO_ASHIFT]    = "",
1999 #else
2000     [MO_UNALN >> MO_ASHIFT]    = "",
2001     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2002 #endif
2003     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2004     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2005     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2006     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2007     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2008     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2009 };
2010 
2011 static const char bswap_flag_name[][6] = {
2012     [TCG_BSWAP_IZ] = "iz",
2013     [TCG_BSWAP_OZ] = "oz",
2014     [TCG_BSWAP_OS] = "os",
2015     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2016     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2017 };
2018 
2019 static inline bool tcg_regset_single(TCGRegSet d)
2020 {
2021     return (d & (d - 1)) == 0;
2022 }
2023 
2024 static inline TCGReg tcg_regset_first(TCGRegSet d)
2025 {
2026     if (TCG_TARGET_NB_REGS <= 32) {
2027         return ctz32(d);
2028     } else {
2029         return ctz64(d);
2030     }
2031 }
2032 
2033 /* Return only the number of characters output -- no error return. */
2034 #define ne_fprintf(...) \
2035     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2036 
2037 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2038 {
2039     char buf[128];
2040     TCGOp *op;
2041 
2042     QTAILQ_FOREACH(op, &s->ops, link) {
2043         int i, k, nb_oargs, nb_iargs, nb_cargs;
2044         const TCGOpDef *def;
2045         TCGOpcode c;
2046         int col = 0;
2047 
2048         c = op->opc;
2049         def = &tcg_op_defs[c];
2050 
2051         if (c == INDEX_op_insn_start) {
2052             nb_oargs = 0;
2053             col += ne_fprintf(f, "\n ----");
2054 
2055             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2056                 target_ulong a;
2057 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2058                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2059 #else
2060                 a = op->args[i];
2061 #endif
2062                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
2063             }
2064         } else if (c == INDEX_op_call) {
2065             const TCGHelperInfo *info = tcg_call_info(op);
2066             void *func = tcg_call_func(op);
2067 
2068             /* variable number of arguments */
2069             nb_oargs = TCGOP_CALLO(op);
2070             nb_iargs = TCGOP_CALLI(op);
2071             nb_cargs = def->nb_cargs;
2072 
2073             col += ne_fprintf(f, " %s ", def->name);
2074 
2075             /*
2076              * Print the function name from TCGHelperInfo, if available.
2077              * Note that plugins have a template function for the info,
2078              * but the actual function pointer comes from the plugin.
2079              */
2080             if (func == info->func) {
2081                 col += ne_fprintf(f, "%s", info->name);
2082             } else {
2083                 col += ne_fprintf(f, "plugin(%p)", func);
2084             }
2085 
2086             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2087             for (i = 0; i < nb_oargs; i++) {
2088                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2089                                                             op->args[i]));
2090             }
2091             for (i = 0; i < nb_iargs; i++) {
2092                 TCGArg arg = op->args[nb_oargs + i];
2093                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2094                 col += ne_fprintf(f, ",%s", t);
2095             }
2096         } else {
2097             col += ne_fprintf(f, " %s ", def->name);
2098 
2099             nb_oargs = def->nb_oargs;
2100             nb_iargs = def->nb_iargs;
2101             nb_cargs = def->nb_cargs;
2102 
2103             if (def->flags & TCG_OPF_VECTOR) {
2104                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2105                                   8 << TCGOP_VECE(op));
2106             }
2107 
2108             k = 0;
2109             for (i = 0; i < nb_oargs; i++) {
2110                 const char *sep =  k ? "," : "";
2111                 col += ne_fprintf(f, "%s%s", sep,
2112                                   tcg_get_arg_str(s, buf, sizeof(buf),
2113                                                   op->args[k++]));
2114             }
2115             for (i = 0; i < nb_iargs; i++) {
2116                 const char *sep =  k ? "," : "";
2117                 col += ne_fprintf(f, "%s%s", sep,
2118                                   tcg_get_arg_str(s, buf, sizeof(buf),
2119                                                   op->args[k++]));
2120             }
2121             switch (c) {
2122             case INDEX_op_brcond_i32:
2123             case INDEX_op_setcond_i32:
2124             case INDEX_op_movcond_i32:
2125             case INDEX_op_brcond2_i32:
2126             case INDEX_op_setcond2_i32:
2127             case INDEX_op_brcond_i64:
2128             case INDEX_op_setcond_i64:
2129             case INDEX_op_movcond_i64:
2130             case INDEX_op_cmp_vec:
2131             case INDEX_op_cmpsel_vec:
2132                 if (op->args[k] < ARRAY_SIZE(cond_name)
2133                     && cond_name[op->args[k]]) {
2134                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2135                 } else {
2136                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2137                 }
2138                 i = 1;
2139                 break;
2140             case INDEX_op_qemu_ld_i32:
2141             case INDEX_op_qemu_st_i32:
2142             case INDEX_op_qemu_st8_i32:
2143             case INDEX_op_qemu_ld_i64:
2144             case INDEX_op_qemu_st_i64:
2145                 {
2146                     MemOpIdx oi = op->args[k++];
2147                     MemOp op = get_memop(oi);
2148                     unsigned ix = get_mmuidx(oi);
2149 
2150                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2151                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2152                     } else {
2153                         const char *s_al, *s_op;
2154                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2155                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2156                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
2157                     }
2158                     i = 1;
2159                 }
2160                 break;
2161             case INDEX_op_bswap16_i32:
2162             case INDEX_op_bswap16_i64:
2163             case INDEX_op_bswap32_i32:
2164             case INDEX_op_bswap32_i64:
2165             case INDEX_op_bswap64_i64:
2166                 {
2167                     TCGArg flags = op->args[k];
2168                     const char *name = NULL;
2169 
2170                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2171                         name = bswap_flag_name[flags];
2172                     }
2173                     if (name) {
2174                         col += ne_fprintf(f, ",%s", name);
2175                     } else {
2176                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2177                     }
2178                     i = k = 1;
2179                 }
2180                 break;
2181             default:
2182                 i = 0;
2183                 break;
2184             }
2185             switch (c) {
2186             case INDEX_op_set_label:
2187             case INDEX_op_br:
2188             case INDEX_op_brcond_i32:
2189             case INDEX_op_brcond_i64:
2190             case INDEX_op_brcond2_i32:
2191                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2192                                   arg_label(op->args[k])->id);
2193                 i++, k++;
2194                 break;
2195             default:
2196                 break;
2197             }
2198             for (; i < nb_cargs; i++, k++) {
2199                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2200                                   op->args[k]);
2201             }
2202         }
2203 
2204         if (have_prefs || op->life) {
2205             for (; col < 40; ++col) {
2206                 putc(' ', f);
2207             }
2208         }
2209 
2210         if (op->life) {
2211             unsigned life = op->life;
2212 
2213             if (life & (SYNC_ARG * 3)) {
2214                 ne_fprintf(f, "  sync:");
2215                 for (i = 0; i < 2; ++i) {
2216                     if (life & (SYNC_ARG << i)) {
2217                         ne_fprintf(f, " %d", i);
2218                     }
2219                 }
2220             }
2221             life /= DEAD_ARG;
2222             if (life) {
2223                 ne_fprintf(f, "  dead:");
2224                 for (i = 0; life; ++i, life >>= 1) {
2225                     if (life & 1) {
2226                         ne_fprintf(f, " %d", i);
2227                     }
2228                 }
2229             }
2230         }
2231 
2232         if (have_prefs) {
2233             for (i = 0; i < nb_oargs; ++i) {
2234                 TCGRegSet set = output_pref(op, i);
2235 
2236                 if (i == 0) {
2237                     ne_fprintf(f, "  pref=");
2238                 } else {
2239                     ne_fprintf(f, ",");
2240                 }
2241                 if (set == 0) {
2242                     ne_fprintf(f, "none");
2243                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2244                     ne_fprintf(f, "all");
2245 #ifdef CONFIG_DEBUG_TCG
2246                 } else if (tcg_regset_single(set)) {
2247                     TCGReg reg = tcg_regset_first(set);
2248                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2249 #endif
2250                 } else if (TCG_TARGET_NB_REGS <= 32) {
2251                     ne_fprintf(f, "0x%x", (uint32_t)set);
2252                 } else {
2253                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2254                 }
2255             }
2256         }
2257 
2258         putc('\n', f);
2259     }
2260 }
2261 
2262 /* we give more priority to constraints with less registers */
2263 static int get_constraint_priority(const TCGOpDef *def, int k)
2264 {
2265     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2266     int n = ctpop64(arg_ct->regs);
2267 
2268     /*
2269      * Sort constraints of a single register first, which includes output
2270      * aliases (which must exactly match the input already allocated).
2271      */
2272     if (n == 1 || arg_ct->oalias) {
2273         return INT_MAX;
2274     }
2275 
2276     /*
2277      * Sort register pairs next, first then second immediately after.
2278      * Arbitrarily sort multiple pairs by the index of the first reg;
2279      * there shouldn't be many pairs.
2280      */
2281     switch (arg_ct->pair) {
2282     case 1:
2283     case 3:
2284         return (k + 1) * 2;
2285     case 2:
2286         return (arg_ct->pair_index + 1) * 2 - 1;
2287     }
2288 
2289     /* Finally, sort by decreasing register count. */
2290     assert(n > 1);
2291     return -n;
2292 }
2293 
2294 /* sort from highest priority to lowest */
2295 static void sort_constraints(TCGOpDef *def, int start, int n)
2296 {
2297     int i, j;
2298     TCGArgConstraint *a = def->args_ct;
2299 
2300     for (i = 0; i < n; i++) {
2301         a[start + i].sort_index = start + i;
2302     }
2303     if (n <= 1) {
2304         return;
2305     }
2306     for (i = 0; i < n - 1; i++) {
2307         for (j = i + 1; j < n; j++) {
2308             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2309             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2310             if (p1 < p2) {
2311                 int tmp = a[start + i].sort_index;
2312                 a[start + i].sort_index = a[start + j].sort_index;
2313                 a[start + j].sort_index = tmp;
2314             }
2315         }
2316     }
2317 }
2318 
2319 static void process_op_defs(TCGContext *s)
2320 {
2321     TCGOpcode op;
2322 
2323     for (op = 0; op < NB_OPS; op++) {
2324         TCGOpDef *def = &tcg_op_defs[op];
2325         const TCGTargetOpDef *tdefs;
2326         bool saw_alias_pair = false;
2327         int i, o, i2, o2, nb_args;
2328 
2329         if (def->flags & TCG_OPF_NOT_PRESENT) {
2330             continue;
2331         }
2332 
2333         nb_args = def->nb_iargs + def->nb_oargs;
2334         if (nb_args == 0) {
2335             continue;
2336         }
2337 
2338         /*
2339          * Macro magic should make it impossible, but double-check that
2340          * the array index is in range.  Since the signness of an enum
2341          * is implementation defined, force the result to unsigned.
2342          */
2343         unsigned con_set = tcg_target_op_def(op);
2344         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2345         tdefs = &constraint_sets[con_set];
2346 
2347         for (i = 0; i < nb_args; i++) {
2348             const char *ct_str = tdefs->args_ct_str[i];
2349             bool input_p = i >= def->nb_oargs;
2350 
2351             /* Incomplete TCGTargetOpDef entry. */
2352             tcg_debug_assert(ct_str != NULL);
2353 
2354             switch (*ct_str) {
2355             case '0' ... '9':
2356                 o = *ct_str - '0';
2357                 tcg_debug_assert(input_p);
2358                 tcg_debug_assert(o < def->nb_oargs);
2359                 tcg_debug_assert(def->args_ct[o].regs != 0);
2360                 tcg_debug_assert(!def->args_ct[o].oalias);
2361                 def->args_ct[i] = def->args_ct[o];
2362                 /* The output sets oalias.  */
2363                 def->args_ct[o].oalias = 1;
2364                 def->args_ct[o].alias_index = i;
2365                 /* The input sets ialias. */
2366                 def->args_ct[i].ialias = 1;
2367                 def->args_ct[i].alias_index = o;
2368                 if (def->args_ct[i].pair) {
2369                     saw_alias_pair = true;
2370                 }
2371                 tcg_debug_assert(ct_str[1] == '\0');
2372                 continue;
2373 
2374             case '&':
2375                 tcg_debug_assert(!input_p);
2376                 def->args_ct[i].newreg = true;
2377                 ct_str++;
2378                 break;
2379 
2380             case 'p': /* plus */
2381                 /* Allocate to the register after the previous. */
2382                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2383                 o = i - 1;
2384                 tcg_debug_assert(!def->args_ct[o].pair);
2385                 tcg_debug_assert(!def->args_ct[o].ct);
2386                 def->args_ct[i] = (TCGArgConstraint){
2387                     .pair = 2,
2388                     .pair_index = o,
2389                     .regs = def->args_ct[o].regs << 1,
2390                 };
2391                 def->args_ct[o].pair = 1;
2392                 def->args_ct[o].pair_index = i;
2393                 tcg_debug_assert(ct_str[1] == '\0');
2394                 continue;
2395 
2396             case 'm': /* minus */
2397                 /* Allocate to the register before the previous. */
2398                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2399                 o = i - 1;
2400                 tcg_debug_assert(!def->args_ct[o].pair);
2401                 tcg_debug_assert(!def->args_ct[o].ct);
2402                 def->args_ct[i] = (TCGArgConstraint){
2403                     .pair = 1,
2404                     .pair_index = o,
2405                     .regs = def->args_ct[o].regs >> 1,
2406                 };
2407                 def->args_ct[o].pair = 2;
2408                 def->args_ct[o].pair_index = i;
2409                 tcg_debug_assert(ct_str[1] == '\0');
2410                 continue;
2411             }
2412 
2413             do {
2414                 switch (*ct_str) {
2415                 case 'i':
2416                     def->args_ct[i].ct |= TCG_CT_CONST;
2417                     break;
2418 
2419                 /* Include all of the target-specific constraints. */
2420 
2421 #undef CONST
2422 #define CONST(CASE, MASK) \
2423     case CASE: def->args_ct[i].ct |= MASK; break;
2424 #define REGS(CASE, MASK) \
2425     case CASE: def->args_ct[i].regs |= MASK; break;
2426 
2427 #include "tcg-target-con-str.h"
2428 
2429 #undef REGS
2430 #undef CONST
2431                 default:
2432                 case '0' ... '9':
2433                 case '&':
2434                 case 'p':
2435                 case 'm':
2436                     /* Typo in TCGTargetOpDef constraint. */
2437                     g_assert_not_reached();
2438                 }
2439             } while (*++ct_str != '\0');
2440         }
2441 
2442         /* TCGTargetOpDef entry with too much information? */
2443         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2444 
2445         /*
2446          * Fix up output pairs that are aliased with inputs.
2447          * When we created the alias, we copied pair from the output.
2448          * There are three cases:
2449          *    (1a) Pairs of inputs alias pairs of outputs.
2450          *    (1b) One input aliases the first of a pair of outputs.
2451          *    (2)  One input aliases the second of a pair of outputs.
2452          *
2453          * Case 1a is handled by making sure that the pair_index'es are
2454          * properly updated so that they appear the same as a pair of inputs.
2455          *
2456          * Case 1b is handled by setting the pair_index of the input to
2457          * itself, simply so it doesn't point to an unrelated argument.
2458          * Since we don't encounter the "second" during the input allocation
2459          * phase, nothing happens with the second half of the input pair.
2460          *
2461          * Case 2 is handled by setting the second input to pair=3, the
2462          * first output to pair=3, and the pair_index'es to match.
2463          */
2464         if (saw_alias_pair) {
2465             for (i = def->nb_oargs; i < nb_args; i++) {
2466                 /*
2467                  * Since [0-9pm] must be alone in the constraint string,
2468                  * the only way they can both be set is if the pair comes
2469                  * from the output alias.
2470                  */
2471                 if (!def->args_ct[i].ialias) {
2472                     continue;
2473                 }
2474                 switch (def->args_ct[i].pair) {
2475                 case 0:
2476                     break;
2477                 case 1:
2478                     o = def->args_ct[i].alias_index;
2479                     o2 = def->args_ct[o].pair_index;
2480                     tcg_debug_assert(def->args_ct[o].pair == 1);
2481                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2482                     if (def->args_ct[o2].oalias) {
2483                         /* Case 1a */
2484                         i2 = def->args_ct[o2].alias_index;
2485                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2486                         def->args_ct[i2].pair_index = i;
2487                         def->args_ct[i].pair_index = i2;
2488                     } else {
2489                         /* Case 1b */
2490                         def->args_ct[i].pair_index = i;
2491                     }
2492                     break;
2493                 case 2:
2494                     o = def->args_ct[i].alias_index;
2495                     o2 = def->args_ct[o].pair_index;
2496                     tcg_debug_assert(def->args_ct[o].pair == 2);
2497                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2498                     if (def->args_ct[o2].oalias) {
2499                         /* Case 1a */
2500                         i2 = def->args_ct[o2].alias_index;
2501                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2502                         def->args_ct[i2].pair_index = i;
2503                         def->args_ct[i].pair_index = i2;
2504                     } else {
2505                         /* Case 2 */
2506                         def->args_ct[i].pair = 3;
2507                         def->args_ct[o2].pair = 3;
2508                         def->args_ct[i].pair_index = o2;
2509                         def->args_ct[o2].pair_index = i;
2510                     }
2511                     break;
2512                 default:
2513                     g_assert_not_reached();
2514                 }
2515             }
2516         }
2517 
2518         /* sort the constraints (XXX: this is just an heuristic) */
2519         sort_constraints(def, 0, def->nb_oargs);
2520         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2521     }
2522 }
2523 
2524 static void remove_label_use(TCGOp *op, int idx)
2525 {
2526     TCGLabel *label = arg_label(op->args[idx]);
2527     TCGLabelUse *use;
2528 
2529     QSIMPLEQ_FOREACH(use, &label->branches, next) {
2530         if (use->op == op) {
2531             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
2532             return;
2533         }
2534     }
2535     g_assert_not_reached();
2536 }
2537 
2538 void tcg_op_remove(TCGContext *s, TCGOp *op)
2539 {
2540     switch (op->opc) {
2541     case INDEX_op_br:
2542         remove_label_use(op, 0);
2543         break;
2544     case INDEX_op_brcond_i32:
2545     case INDEX_op_brcond_i64:
2546         remove_label_use(op, 3);
2547         break;
2548     case INDEX_op_brcond2_i32:
2549         remove_label_use(op, 5);
2550         break;
2551     default:
2552         break;
2553     }
2554 
2555     QTAILQ_REMOVE(&s->ops, op, link);
2556     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2557     s->nb_ops--;
2558 
2559 #ifdef CONFIG_PROFILER
2560     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2561 #endif
2562 }
2563 
2564 void tcg_remove_ops_after(TCGOp *op)
2565 {
2566     TCGContext *s = tcg_ctx;
2567 
2568     while (true) {
2569         TCGOp *last = tcg_last_op();
2570         if (last == op) {
2571             return;
2572         }
2573         tcg_op_remove(s, last);
2574     }
2575 }
2576 
2577 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2578 {
2579     TCGContext *s = tcg_ctx;
2580     TCGOp *op = NULL;
2581 
2582     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2583         QTAILQ_FOREACH(op, &s->free_ops, link) {
2584             if (nargs <= op->nargs) {
2585                 QTAILQ_REMOVE(&s->free_ops, op, link);
2586                 nargs = op->nargs;
2587                 goto found;
2588             }
2589         }
2590     }
2591 
2592     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2593     nargs = MAX(4, nargs);
2594     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2595 
2596  found:
2597     memset(op, 0, offsetof(TCGOp, link));
2598     op->opc = opc;
2599     op->nargs = nargs;
2600 
2601     /* Check for bitfield overflow. */
2602     tcg_debug_assert(op->nargs == nargs);
2603 
2604     s->nb_ops++;
2605     return op;
2606 }
2607 
2608 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2609 {
2610     TCGOp *op = tcg_op_alloc(opc, nargs);
2611     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2612     return op;
2613 }
2614 
2615 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2616                             TCGOpcode opc, unsigned nargs)
2617 {
2618     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2619     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2620     return new_op;
2621 }
2622 
2623 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2624                            TCGOpcode opc, unsigned nargs)
2625 {
2626     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2627     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2628     return new_op;
2629 }
2630 
2631 static void move_label_uses(TCGLabel *to, TCGLabel *from)
2632 {
2633     TCGLabelUse *u;
2634 
2635     QSIMPLEQ_FOREACH(u, &from->branches, next) {
2636         TCGOp *op = u->op;
2637         switch (op->opc) {
2638         case INDEX_op_br:
2639             op->args[0] = label_arg(to);
2640             break;
2641         case INDEX_op_brcond_i32:
2642         case INDEX_op_brcond_i64:
2643             op->args[3] = label_arg(to);
2644             break;
2645         case INDEX_op_brcond2_i32:
2646             op->args[5] = label_arg(to);
2647             break;
2648         default:
2649             g_assert_not_reached();
2650         }
2651     }
2652 
2653     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
2654 }
2655 
2656 /* Reachable analysis : remove unreachable code.  */
2657 static void __attribute__((noinline))
2658 reachable_code_pass(TCGContext *s)
2659 {
2660     TCGOp *op, *op_next, *op_prev;
2661     bool dead = false;
2662 
2663     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2664         bool remove = dead;
2665         TCGLabel *label;
2666 
2667         switch (op->opc) {
2668         case INDEX_op_set_label:
2669             label = arg_label(op->args[0]);
2670 
2671             /*
2672              * Note that the first op in the TB is always a load,
2673              * so there is always something before a label.
2674              */
2675             op_prev = QTAILQ_PREV(op, link);
2676 
2677             /*
2678              * If we find two sequential labels, move all branches to
2679              * reference the second label and remove the first label.
2680              * Do this before branch to next optimization, so that the
2681              * middle label is out of the way.
2682              */
2683             if (op_prev->opc == INDEX_op_set_label) {
2684                 move_label_uses(label, arg_label(op_prev->args[0]));
2685                 tcg_op_remove(s, op_prev);
2686                 op_prev = QTAILQ_PREV(op, link);
2687             }
2688 
2689             /*
2690              * Optimization can fold conditional branches to unconditional.
2691              * If we find a label which is preceded by an unconditional
2692              * branch to next, remove the branch.  We couldn't do this when
2693              * processing the branch because any dead code between the branch
2694              * and label had not yet been removed.
2695              */
2696             if (op_prev->opc == INDEX_op_br &&
2697                 label == arg_label(op_prev->args[0])) {
2698                 tcg_op_remove(s, op_prev);
2699                 /* Fall through means insns become live again.  */
2700                 dead = false;
2701             }
2702 
2703             if (QSIMPLEQ_EMPTY(&label->branches)) {
2704                 /*
2705                  * While there is an occasional backward branch, virtually
2706                  * all branches generated by the translators are forward.
2707                  * Which means that generally we will have already removed
2708                  * all references to the label that will be, and there is
2709                  * little to be gained by iterating.
2710                  */
2711                 remove = true;
2712             } else {
2713                 /* Once we see a label, insns become live again.  */
2714                 dead = false;
2715                 remove = false;
2716             }
2717             break;
2718 
2719         case INDEX_op_br:
2720         case INDEX_op_exit_tb:
2721         case INDEX_op_goto_ptr:
2722             /* Unconditional branches; everything following is dead.  */
2723             dead = true;
2724             break;
2725 
2726         case INDEX_op_call:
2727             /* Notice noreturn helper calls, raising exceptions.  */
2728             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2729                 dead = true;
2730             }
2731             break;
2732 
2733         case INDEX_op_insn_start:
2734             /* Never remove -- we need to keep these for unwind.  */
2735             remove = false;
2736             break;
2737 
2738         default:
2739             break;
2740         }
2741 
2742         if (remove) {
2743             tcg_op_remove(s, op);
2744         }
2745     }
2746 }
2747 
2748 #define TS_DEAD  1
2749 #define TS_MEM   2
2750 
2751 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2752 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2753 
2754 /* For liveness_pass_1, the register preferences for a given temp.  */
2755 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2756 {
2757     return ts->state_ptr;
2758 }
2759 
2760 /* For liveness_pass_1, reset the preferences for a given temp to the
2761  * maximal regset for its type.
2762  */
2763 static inline void la_reset_pref(TCGTemp *ts)
2764 {
2765     *la_temp_pref(ts)
2766         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2767 }
2768 
2769 /* liveness analysis: end of function: all temps are dead, and globals
2770    should be in memory. */
2771 static void la_func_end(TCGContext *s, int ng, int nt)
2772 {
2773     int i;
2774 
2775     for (i = 0; i < ng; ++i) {
2776         s->temps[i].state = TS_DEAD | TS_MEM;
2777         la_reset_pref(&s->temps[i]);
2778     }
2779     for (i = ng; i < nt; ++i) {
2780         s->temps[i].state = TS_DEAD;
2781         la_reset_pref(&s->temps[i]);
2782     }
2783 }
2784 
2785 /* liveness analysis: end of basic block: all temps are dead, globals
2786    and local temps should be in memory. */
2787 static void la_bb_end(TCGContext *s, int ng, int nt)
2788 {
2789     int i;
2790 
2791     for (i = 0; i < nt; ++i) {
2792         TCGTemp *ts = &s->temps[i];
2793         int state;
2794 
2795         switch (ts->kind) {
2796         case TEMP_FIXED:
2797         case TEMP_GLOBAL:
2798         case TEMP_TB:
2799             state = TS_DEAD | TS_MEM;
2800             break;
2801         case TEMP_EBB:
2802         case TEMP_CONST:
2803             state = TS_DEAD;
2804             break;
2805         default:
2806             g_assert_not_reached();
2807         }
2808         ts->state = state;
2809         la_reset_pref(ts);
2810     }
2811 }
2812 
2813 /* liveness analysis: sync globals back to memory.  */
2814 static void la_global_sync(TCGContext *s, int ng)
2815 {
2816     int i;
2817 
2818     for (i = 0; i < ng; ++i) {
2819         int state = s->temps[i].state;
2820         s->temps[i].state = state | TS_MEM;
2821         if (state == TS_DEAD) {
2822             /* If the global was previously dead, reset prefs.  */
2823             la_reset_pref(&s->temps[i]);
2824         }
2825     }
2826 }
2827 
2828 /*
2829  * liveness analysis: conditional branch: all temps are dead unless
2830  * explicitly live-across-conditional-branch, globals and local temps
2831  * should be synced.
2832  */
2833 static void la_bb_sync(TCGContext *s, int ng, int nt)
2834 {
2835     la_global_sync(s, ng);
2836 
2837     for (int i = ng; i < nt; ++i) {
2838         TCGTemp *ts = &s->temps[i];
2839         int state;
2840 
2841         switch (ts->kind) {
2842         case TEMP_TB:
2843             state = ts->state;
2844             ts->state = state | TS_MEM;
2845             if (state != TS_DEAD) {
2846                 continue;
2847             }
2848             break;
2849         case TEMP_EBB:
2850         case TEMP_CONST:
2851             continue;
2852         default:
2853             g_assert_not_reached();
2854         }
2855         la_reset_pref(&s->temps[i]);
2856     }
2857 }
2858 
2859 /* liveness analysis: sync globals back to memory and kill.  */
2860 static void la_global_kill(TCGContext *s, int ng)
2861 {
2862     int i;
2863 
2864     for (i = 0; i < ng; i++) {
2865         s->temps[i].state = TS_DEAD | TS_MEM;
2866         la_reset_pref(&s->temps[i]);
2867     }
2868 }
2869 
2870 /* liveness analysis: note live globals crossing calls.  */
2871 static void la_cross_call(TCGContext *s, int nt)
2872 {
2873     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2874     int i;
2875 
2876     for (i = 0; i < nt; i++) {
2877         TCGTemp *ts = &s->temps[i];
2878         if (!(ts->state & TS_DEAD)) {
2879             TCGRegSet *pset = la_temp_pref(ts);
2880             TCGRegSet set = *pset;
2881 
2882             set &= mask;
2883             /* If the combination is not possible, restart.  */
2884             if (set == 0) {
2885                 set = tcg_target_available_regs[ts->type] & mask;
2886             }
2887             *pset = set;
2888         }
2889     }
2890 }
2891 
2892 /*
2893  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
2894  * to TEMP_EBB, if possible.
2895  */
2896 static void __attribute__((noinline))
2897 liveness_pass_0(TCGContext *s)
2898 {
2899     void * const multiple_ebb = (void *)(uintptr_t)-1;
2900     int nb_temps = s->nb_temps;
2901     TCGOp *op, *ebb;
2902 
2903     for (int i = s->nb_globals; i < nb_temps; ++i) {
2904         s->temps[i].state_ptr = NULL;
2905     }
2906 
2907     /*
2908      * Represent each EBB by the op at which it begins.  In the case of
2909      * the first EBB, this is the first op, otherwise it is a label.
2910      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
2911      * within a single EBB, else MULTIPLE_EBB.
2912      */
2913     ebb = QTAILQ_FIRST(&s->ops);
2914     QTAILQ_FOREACH(op, &s->ops, link) {
2915         const TCGOpDef *def;
2916         int nb_oargs, nb_iargs;
2917 
2918         switch (op->opc) {
2919         case INDEX_op_set_label:
2920             ebb = op;
2921             continue;
2922         case INDEX_op_discard:
2923             continue;
2924         case INDEX_op_call:
2925             nb_oargs = TCGOP_CALLO(op);
2926             nb_iargs = TCGOP_CALLI(op);
2927             break;
2928         default:
2929             def = &tcg_op_defs[op->opc];
2930             nb_oargs = def->nb_oargs;
2931             nb_iargs = def->nb_iargs;
2932             break;
2933         }
2934 
2935         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
2936             TCGTemp *ts = arg_temp(op->args[i]);
2937 
2938             if (ts->kind != TEMP_TB) {
2939                 continue;
2940             }
2941             if (ts->state_ptr == NULL) {
2942                 ts->state_ptr = ebb;
2943             } else if (ts->state_ptr != ebb) {
2944                 ts->state_ptr = multiple_ebb;
2945             }
2946         }
2947     }
2948 
2949     /*
2950      * For TEMP_TB that turned out not to be used beyond one EBB,
2951      * reduce the liveness to TEMP_EBB.
2952      */
2953     for (int i = s->nb_globals; i < nb_temps; ++i) {
2954         TCGTemp *ts = &s->temps[i];
2955         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
2956             ts->kind = TEMP_EBB;
2957         }
2958     }
2959 }
2960 
2961 /* Liveness analysis : update the opc_arg_life array to tell if a
2962    given input arguments is dead. Instructions updating dead
2963    temporaries are removed. */
2964 static void __attribute__((noinline))
2965 liveness_pass_1(TCGContext *s)
2966 {
2967     int nb_globals = s->nb_globals;
2968     int nb_temps = s->nb_temps;
2969     TCGOp *op, *op_prev;
2970     TCGRegSet *prefs;
2971     int i;
2972 
2973     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2974     for (i = 0; i < nb_temps; ++i) {
2975         s->temps[i].state_ptr = prefs + i;
2976     }
2977 
2978     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2979     la_func_end(s, nb_globals, nb_temps);
2980 
2981     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2982         int nb_iargs, nb_oargs;
2983         TCGOpcode opc_new, opc_new2;
2984         bool have_opc_new2;
2985         TCGLifeData arg_life = 0;
2986         TCGTemp *ts;
2987         TCGOpcode opc = op->opc;
2988         const TCGOpDef *def = &tcg_op_defs[opc];
2989 
2990         switch (opc) {
2991         case INDEX_op_call:
2992             {
2993                 const TCGHelperInfo *info = tcg_call_info(op);
2994                 int call_flags = tcg_call_flags(op);
2995 
2996                 nb_oargs = TCGOP_CALLO(op);
2997                 nb_iargs = TCGOP_CALLI(op);
2998 
2999                 /* pure functions can be removed if their result is unused */
3000                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3001                     for (i = 0; i < nb_oargs; i++) {
3002                         ts = arg_temp(op->args[i]);
3003                         if (ts->state != TS_DEAD) {
3004                             goto do_not_remove_call;
3005                         }
3006                     }
3007                     goto do_remove;
3008                 }
3009             do_not_remove_call:
3010 
3011                 /* Output args are dead.  */
3012                 for (i = 0; i < nb_oargs; i++) {
3013                     ts = arg_temp(op->args[i]);
3014                     if (ts->state & TS_DEAD) {
3015                         arg_life |= DEAD_ARG << i;
3016                     }
3017                     if (ts->state & TS_MEM) {
3018                         arg_life |= SYNC_ARG << i;
3019                     }
3020                     ts->state = TS_DEAD;
3021                     la_reset_pref(ts);
3022                 }
3023 
3024                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3025                 memset(op->output_pref, 0, sizeof(op->output_pref));
3026 
3027                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3028                                     TCG_CALL_NO_READ_GLOBALS))) {
3029                     la_global_kill(s, nb_globals);
3030                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3031                     la_global_sync(s, nb_globals);
3032                 }
3033 
3034                 /* Record arguments that die in this helper.  */
3035                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3036                     ts = arg_temp(op->args[i]);
3037                     if (ts->state & TS_DEAD) {
3038                         arg_life |= DEAD_ARG << i;
3039                     }
3040                 }
3041 
3042                 /* For all live registers, remove call-clobbered prefs.  */
3043                 la_cross_call(s, nb_temps);
3044 
3045                 /*
3046                  * Input arguments are live for preceding opcodes.
3047                  *
3048                  * For those arguments that die, and will be allocated in
3049                  * registers, clear the register set for that arg, to be
3050                  * filled in below.  For args that will be on the stack,
3051                  * reset to any available reg.  Process arguments in reverse
3052                  * order so that if a temp is used more than once, the stack
3053                  * reset to max happens before the register reset to 0.
3054                  */
3055                 for (i = nb_iargs - 1; i >= 0; i--) {
3056                     const TCGCallArgumentLoc *loc = &info->in[i];
3057                     ts = arg_temp(op->args[nb_oargs + i]);
3058 
3059                     if (ts->state & TS_DEAD) {
3060                         switch (loc->kind) {
3061                         case TCG_CALL_ARG_NORMAL:
3062                         case TCG_CALL_ARG_EXTEND_U:
3063                         case TCG_CALL_ARG_EXTEND_S:
3064                             if (REG_P(loc)) {
3065                                 *la_temp_pref(ts) = 0;
3066                                 break;
3067                             }
3068                             /* fall through */
3069                         default:
3070                             *la_temp_pref(ts) =
3071                                 tcg_target_available_regs[ts->type];
3072                             break;
3073                         }
3074                         ts->state &= ~TS_DEAD;
3075                     }
3076                 }
3077 
3078                 /*
3079                  * For each input argument, add its input register to prefs.
3080                  * If a temp is used once, this produces a single set bit;
3081                  * if a temp is used multiple times, this produces a set.
3082                  */
3083                 for (i = 0; i < nb_iargs; i++) {
3084                     const TCGCallArgumentLoc *loc = &info->in[i];
3085                     ts = arg_temp(op->args[nb_oargs + i]);
3086 
3087                     switch (loc->kind) {
3088                     case TCG_CALL_ARG_NORMAL:
3089                     case TCG_CALL_ARG_EXTEND_U:
3090                     case TCG_CALL_ARG_EXTEND_S:
3091                         if (REG_P(loc)) {
3092                             tcg_regset_set_reg(*la_temp_pref(ts),
3093                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3094                         }
3095                         break;
3096                     default:
3097                         break;
3098                     }
3099                 }
3100             }
3101             break;
3102         case INDEX_op_insn_start:
3103             break;
3104         case INDEX_op_discard:
3105             /* mark the temporary as dead */
3106             ts = arg_temp(op->args[0]);
3107             ts->state = TS_DEAD;
3108             la_reset_pref(ts);
3109             break;
3110 
3111         case INDEX_op_add2_i32:
3112             opc_new = INDEX_op_add_i32;
3113             goto do_addsub2;
3114         case INDEX_op_sub2_i32:
3115             opc_new = INDEX_op_sub_i32;
3116             goto do_addsub2;
3117         case INDEX_op_add2_i64:
3118             opc_new = INDEX_op_add_i64;
3119             goto do_addsub2;
3120         case INDEX_op_sub2_i64:
3121             opc_new = INDEX_op_sub_i64;
3122         do_addsub2:
3123             nb_iargs = 4;
3124             nb_oargs = 2;
3125             /* Test if the high part of the operation is dead, but not
3126                the low part.  The result can be optimized to a simple
3127                add or sub.  This happens often for x86_64 guest when the
3128                cpu mode is set to 32 bit.  */
3129             if (arg_temp(op->args[1])->state == TS_DEAD) {
3130                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3131                     goto do_remove;
3132                 }
3133                 /* Replace the opcode and adjust the args in place,
3134                    leaving 3 unused args at the end.  */
3135                 op->opc = opc = opc_new;
3136                 op->args[1] = op->args[2];
3137                 op->args[2] = op->args[4];
3138                 /* Fall through and mark the single-word operation live.  */
3139                 nb_iargs = 2;
3140                 nb_oargs = 1;
3141             }
3142             goto do_not_remove;
3143 
3144         case INDEX_op_mulu2_i32:
3145             opc_new = INDEX_op_mul_i32;
3146             opc_new2 = INDEX_op_muluh_i32;
3147             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3148             goto do_mul2;
3149         case INDEX_op_muls2_i32:
3150             opc_new = INDEX_op_mul_i32;
3151             opc_new2 = INDEX_op_mulsh_i32;
3152             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3153             goto do_mul2;
3154         case INDEX_op_mulu2_i64:
3155             opc_new = INDEX_op_mul_i64;
3156             opc_new2 = INDEX_op_muluh_i64;
3157             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3158             goto do_mul2;
3159         case INDEX_op_muls2_i64:
3160             opc_new = INDEX_op_mul_i64;
3161             opc_new2 = INDEX_op_mulsh_i64;
3162             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3163             goto do_mul2;
3164         do_mul2:
3165             nb_iargs = 2;
3166             nb_oargs = 2;
3167             if (arg_temp(op->args[1])->state == TS_DEAD) {
3168                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3169                     /* Both parts of the operation are dead.  */
3170                     goto do_remove;
3171                 }
3172                 /* The high part of the operation is dead; generate the low. */
3173                 op->opc = opc = opc_new;
3174                 op->args[1] = op->args[2];
3175                 op->args[2] = op->args[3];
3176             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3177                 /* The low part of the operation is dead; generate the high. */
3178                 op->opc = opc = opc_new2;
3179                 op->args[0] = op->args[1];
3180                 op->args[1] = op->args[2];
3181                 op->args[2] = op->args[3];
3182             } else {
3183                 goto do_not_remove;
3184             }
3185             /* Mark the single-word operation live.  */
3186             nb_oargs = 1;
3187             goto do_not_remove;
3188 
3189         default:
3190             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3191             nb_iargs = def->nb_iargs;
3192             nb_oargs = def->nb_oargs;
3193 
3194             /* Test if the operation can be removed because all
3195                its outputs are dead. We assume that nb_oargs == 0
3196                implies side effects */
3197             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3198                 for (i = 0; i < nb_oargs; i++) {
3199                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3200                         goto do_not_remove;
3201                     }
3202                 }
3203                 goto do_remove;
3204             }
3205             goto do_not_remove;
3206 
3207         do_remove:
3208             tcg_op_remove(s, op);
3209             break;
3210 
3211         do_not_remove:
3212             for (i = 0; i < nb_oargs; i++) {
3213                 ts = arg_temp(op->args[i]);
3214 
3215                 /* Remember the preference of the uses that followed.  */
3216                 if (i < ARRAY_SIZE(op->output_pref)) {
3217                     op->output_pref[i] = *la_temp_pref(ts);
3218                 }
3219 
3220                 /* Output args are dead.  */
3221                 if (ts->state & TS_DEAD) {
3222                     arg_life |= DEAD_ARG << i;
3223                 }
3224                 if (ts->state & TS_MEM) {
3225                     arg_life |= SYNC_ARG << i;
3226                 }
3227                 ts->state = TS_DEAD;
3228                 la_reset_pref(ts);
3229             }
3230 
3231             /* If end of basic block, update.  */
3232             if (def->flags & TCG_OPF_BB_EXIT) {
3233                 la_func_end(s, nb_globals, nb_temps);
3234             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3235                 la_bb_sync(s, nb_globals, nb_temps);
3236             } else if (def->flags & TCG_OPF_BB_END) {
3237                 la_bb_end(s, nb_globals, nb_temps);
3238             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3239                 la_global_sync(s, nb_globals);
3240                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3241                     la_cross_call(s, nb_temps);
3242                 }
3243             }
3244 
3245             /* Record arguments that die in this opcode.  */
3246             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3247                 ts = arg_temp(op->args[i]);
3248                 if (ts->state & TS_DEAD) {
3249                     arg_life |= DEAD_ARG << i;
3250                 }
3251             }
3252 
3253             /* Input arguments are live for preceding opcodes.  */
3254             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3255                 ts = arg_temp(op->args[i]);
3256                 if (ts->state & TS_DEAD) {
3257                     /* For operands that were dead, initially allow
3258                        all regs for the type.  */
3259                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3260                     ts->state &= ~TS_DEAD;
3261                 }
3262             }
3263 
3264             /* Incorporate constraints for this operand.  */
3265             switch (opc) {
3266             case INDEX_op_mov_i32:
3267             case INDEX_op_mov_i64:
3268                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3269                    have proper constraints.  That said, special case
3270                    moves to propagate preferences backward.  */
3271                 if (IS_DEAD_ARG(1)) {
3272                     *la_temp_pref(arg_temp(op->args[0]))
3273                         = *la_temp_pref(arg_temp(op->args[1]));
3274                 }
3275                 break;
3276 
3277             default:
3278                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3279                     const TCGArgConstraint *ct = &def->args_ct[i];
3280                     TCGRegSet set, *pset;
3281 
3282                     ts = arg_temp(op->args[i]);
3283                     pset = la_temp_pref(ts);
3284                     set = *pset;
3285 
3286                     set &= ct->regs;
3287                     if (ct->ialias) {
3288                         set &= output_pref(op, ct->alias_index);
3289                     }
3290                     /* If the combination is not possible, restart.  */
3291                     if (set == 0) {
3292                         set = ct->regs;
3293                     }
3294                     *pset = set;
3295                 }
3296                 break;
3297             }
3298             break;
3299         }
3300         op->life = arg_life;
3301     }
3302 }
3303 
3304 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3305 static bool __attribute__((noinline))
3306 liveness_pass_2(TCGContext *s)
3307 {
3308     int nb_globals = s->nb_globals;
3309     int nb_temps, i;
3310     bool changes = false;
3311     TCGOp *op, *op_next;
3312 
3313     /* Create a temporary for each indirect global.  */
3314     for (i = 0; i < nb_globals; ++i) {
3315         TCGTemp *its = &s->temps[i];
3316         if (its->indirect_reg) {
3317             TCGTemp *dts = tcg_temp_alloc(s);
3318             dts->type = its->type;
3319             dts->base_type = its->base_type;
3320             dts->temp_subindex = its->temp_subindex;
3321             dts->kind = TEMP_EBB;
3322             its->state_ptr = dts;
3323         } else {
3324             its->state_ptr = NULL;
3325         }
3326         /* All globals begin dead.  */
3327         its->state = TS_DEAD;
3328     }
3329     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3330         TCGTemp *its = &s->temps[i];
3331         its->state_ptr = NULL;
3332         its->state = TS_DEAD;
3333     }
3334 
3335     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3336         TCGOpcode opc = op->opc;
3337         const TCGOpDef *def = &tcg_op_defs[opc];
3338         TCGLifeData arg_life = op->life;
3339         int nb_iargs, nb_oargs, call_flags;
3340         TCGTemp *arg_ts, *dir_ts;
3341 
3342         if (opc == INDEX_op_call) {
3343             nb_oargs = TCGOP_CALLO(op);
3344             nb_iargs = TCGOP_CALLI(op);
3345             call_flags = tcg_call_flags(op);
3346         } else {
3347             nb_iargs = def->nb_iargs;
3348             nb_oargs = def->nb_oargs;
3349 
3350             /* Set flags similar to how calls require.  */
3351             if (def->flags & TCG_OPF_COND_BRANCH) {
3352                 /* Like reading globals: sync_globals */
3353                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3354             } else if (def->flags & TCG_OPF_BB_END) {
3355                 /* Like writing globals: save_globals */
3356                 call_flags = 0;
3357             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3358                 /* Like reading globals: sync_globals */
3359                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3360             } else {
3361                 /* No effect on globals.  */
3362                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3363                               TCG_CALL_NO_WRITE_GLOBALS);
3364             }
3365         }
3366 
3367         /* Make sure that input arguments are available.  */
3368         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3369             arg_ts = arg_temp(op->args[i]);
3370             dir_ts = arg_ts->state_ptr;
3371             if (dir_ts && arg_ts->state == TS_DEAD) {
3372                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3373                                   ? INDEX_op_ld_i32
3374                                   : INDEX_op_ld_i64);
3375                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3376 
3377                 lop->args[0] = temp_arg(dir_ts);
3378                 lop->args[1] = temp_arg(arg_ts->mem_base);
3379                 lop->args[2] = arg_ts->mem_offset;
3380 
3381                 /* Loaded, but synced with memory.  */
3382                 arg_ts->state = TS_MEM;
3383             }
3384         }
3385 
3386         /* Perform input replacement, and mark inputs that became dead.
3387            No action is required except keeping temp_state up to date
3388            so that we reload when needed.  */
3389         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3390             arg_ts = arg_temp(op->args[i]);
3391             dir_ts = arg_ts->state_ptr;
3392             if (dir_ts) {
3393                 op->args[i] = temp_arg(dir_ts);
3394                 changes = true;
3395                 if (IS_DEAD_ARG(i)) {
3396                     arg_ts->state = TS_DEAD;
3397                 }
3398             }
3399         }
3400 
3401         /* Liveness analysis should ensure that the following are
3402            all correct, for call sites and basic block end points.  */
3403         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3404             /* Nothing to do */
3405         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3406             for (i = 0; i < nb_globals; ++i) {
3407                 /* Liveness should see that globals are synced back,
3408                    that is, either TS_DEAD or TS_MEM.  */
3409                 arg_ts = &s->temps[i];
3410                 tcg_debug_assert(arg_ts->state_ptr == 0
3411                                  || arg_ts->state != 0);
3412             }
3413         } else {
3414             for (i = 0; i < nb_globals; ++i) {
3415                 /* Liveness should see that globals are saved back,
3416                    that is, TS_DEAD, waiting to be reloaded.  */
3417                 arg_ts = &s->temps[i];
3418                 tcg_debug_assert(arg_ts->state_ptr == 0
3419                                  || arg_ts->state == TS_DEAD);
3420             }
3421         }
3422 
3423         /* Outputs become available.  */
3424         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3425             arg_ts = arg_temp(op->args[0]);
3426             dir_ts = arg_ts->state_ptr;
3427             if (dir_ts) {
3428                 op->args[0] = temp_arg(dir_ts);
3429                 changes = true;
3430 
3431                 /* The output is now live and modified.  */
3432                 arg_ts->state = 0;
3433 
3434                 if (NEED_SYNC_ARG(0)) {
3435                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3436                                       ? INDEX_op_st_i32
3437                                       : INDEX_op_st_i64);
3438                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3439                     TCGTemp *out_ts = dir_ts;
3440 
3441                     if (IS_DEAD_ARG(0)) {
3442                         out_ts = arg_temp(op->args[1]);
3443                         arg_ts->state = TS_DEAD;
3444                         tcg_op_remove(s, op);
3445                     } else {
3446                         arg_ts->state = TS_MEM;
3447                     }
3448 
3449                     sop->args[0] = temp_arg(out_ts);
3450                     sop->args[1] = temp_arg(arg_ts->mem_base);
3451                     sop->args[2] = arg_ts->mem_offset;
3452                 } else {
3453                     tcg_debug_assert(!IS_DEAD_ARG(0));
3454                 }
3455             }
3456         } else {
3457             for (i = 0; i < nb_oargs; i++) {
3458                 arg_ts = arg_temp(op->args[i]);
3459                 dir_ts = arg_ts->state_ptr;
3460                 if (!dir_ts) {
3461                     continue;
3462                 }
3463                 op->args[i] = temp_arg(dir_ts);
3464                 changes = true;
3465 
3466                 /* The output is now live and modified.  */
3467                 arg_ts->state = 0;
3468 
3469                 /* Sync outputs upon their last write.  */
3470                 if (NEED_SYNC_ARG(i)) {
3471                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3472                                       ? INDEX_op_st_i32
3473                                       : INDEX_op_st_i64);
3474                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3475 
3476                     sop->args[0] = temp_arg(dir_ts);
3477                     sop->args[1] = temp_arg(arg_ts->mem_base);
3478                     sop->args[2] = arg_ts->mem_offset;
3479 
3480                     arg_ts->state = TS_MEM;
3481                 }
3482                 /* Drop outputs that are dead.  */
3483                 if (IS_DEAD_ARG(i)) {
3484                     arg_ts->state = TS_DEAD;
3485                 }
3486             }
3487         }
3488     }
3489 
3490     return changes;
3491 }
3492 
3493 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3494 {
3495     intptr_t off;
3496     int size, align;
3497 
3498     /* When allocating an object, look at the full type. */
3499     size = tcg_type_size(ts->base_type);
3500     switch (ts->base_type) {
3501     case TCG_TYPE_I32:
3502         align = 4;
3503         break;
3504     case TCG_TYPE_I64:
3505     case TCG_TYPE_V64:
3506         align = 8;
3507         break;
3508     case TCG_TYPE_I128:
3509     case TCG_TYPE_V128:
3510     case TCG_TYPE_V256:
3511         /*
3512          * Note that we do not require aligned storage for V256,
3513          * and that we provide alignment for I128 to match V128,
3514          * even if that's above what the host ABI requires.
3515          */
3516         align = 16;
3517         break;
3518     default:
3519         g_assert_not_reached();
3520     }
3521 
3522     /*
3523      * Assume the stack is sufficiently aligned.
3524      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3525      * and do not require 16 byte vector alignment.  This seems slightly
3526      * easier than fully parameterizing the above switch statement.
3527      */
3528     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3529     off = ROUND_UP(s->current_frame_offset, align);
3530 
3531     /* If we've exhausted the stack frame, restart with a smaller TB. */
3532     if (off + size > s->frame_end) {
3533         tcg_raise_tb_overflow(s);
3534     }
3535     s->current_frame_offset = off + size;
3536 #if defined(__sparc__)
3537     off += TCG_TARGET_STACK_BIAS;
3538 #endif
3539 
3540     /* If the object was subdivided, assign memory to all the parts. */
3541     if (ts->base_type != ts->type) {
3542         int part_size = tcg_type_size(ts->type);
3543         int part_count = size / part_size;
3544 
3545         /*
3546          * Each part is allocated sequentially in tcg_temp_new_internal.
3547          * Jump back to the first part by subtracting the current index.
3548          */
3549         ts -= ts->temp_subindex;
3550         for (int i = 0; i < part_count; ++i) {
3551             ts[i].mem_offset = off + i * part_size;
3552             ts[i].mem_base = s->frame_temp;
3553             ts[i].mem_allocated = 1;
3554         }
3555     } else {
3556         ts->mem_offset = off;
3557         ts->mem_base = s->frame_temp;
3558         ts->mem_allocated = 1;
3559     }
3560 }
3561 
3562 /* Assign @reg to @ts, and update reg_to_temp[]. */
3563 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3564 {
3565     if (ts->val_type == TEMP_VAL_REG) {
3566         TCGReg old = ts->reg;
3567         tcg_debug_assert(s->reg_to_temp[old] == ts);
3568         if (old == reg) {
3569             return;
3570         }
3571         s->reg_to_temp[old] = NULL;
3572     }
3573     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3574     s->reg_to_temp[reg] = ts;
3575     ts->val_type = TEMP_VAL_REG;
3576     ts->reg = reg;
3577 }
3578 
3579 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3580 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3581 {
3582     tcg_debug_assert(type != TEMP_VAL_REG);
3583     if (ts->val_type == TEMP_VAL_REG) {
3584         TCGReg reg = ts->reg;
3585         tcg_debug_assert(s->reg_to_temp[reg] == ts);
3586         s->reg_to_temp[reg] = NULL;
3587     }
3588     ts->val_type = type;
3589 }
3590 
3591 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3592 
3593 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3594    mark it free; otherwise mark it dead.  */
3595 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3596 {
3597     TCGTempVal new_type;
3598 
3599     switch (ts->kind) {
3600     case TEMP_FIXED:
3601         return;
3602     case TEMP_GLOBAL:
3603     case TEMP_TB:
3604         new_type = TEMP_VAL_MEM;
3605         break;
3606     case TEMP_EBB:
3607         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3608         break;
3609     case TEMP_CONST:
3610         new_type = TEMP_VAL_CONST;
3611         break;
3612     default:
3613         g_assert_not_reached();
3614     }
3615     set_temp_val_nonreg(s, ts, new_type);
3616 }
3617 
3618 /* Mark a temporary as dead.  */
3619 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3620 {
3621     temp_free_or_dead(s, ts, 1);
3622 }
3623 
3624 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3625    registers needs to be allocated to store a constant.  If 'free_or_dead'
3626    is non-zero, subsequently release the temporary; if it is positive, the
3627    temp is dead; if it is negative, the temp is free.  */
3628 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3629                       TCGRegSet preferred_regs, int free_or_dead)
3630 {
3631     if (!temp_readonly(ts) && !ts->mem_coherent) {
3632         if (!ts->mem_allocated) {
3633             temp_allocate_frame(s, ts);
3634         }
3635         switch (ts->val_type) {
3636         case TEMP_VAL_CONST:
3637             /* If we're going to free the temp immediately, then we won't
3638                require it later in a register, so attempt to store the
3639                constant to memory directly.  */
3640             if (free_or_dead
3641                 && tcg_out_sti(s, ts->type, ts->val,
3642                                ts->mem_base->reg, ts->mem_offset)) {
3643                 break;
3644             }
3645             temp_load(s, ts, tcg_target_available_regs[ts->type],
3646                       allocated_regs, preferred_regs);
3647             /* fallthrough */
3648 
3649         case TEMP_VAL_REG:
3650             tcg_out_st(s, ts->type, ts->reg,
3651                        ts->mem_base->reg, ts->mem_offset);
3652             break;
3653 
3654         case TEMP_VAL_MEM:
3655             break;
3656 
3657         case TEMP_VAL_DEAD:
3658         default:
3659             tcg_abort();
3660         }
3661         ts->mem_coherent = 1;
3662     }
3663     if (free_or_dead) {
3664         temp_free_or_dead(s, ts, free_or_dead);
3665     }
3666 }
3667 
3668 /* free register 'reg' by spilling the corresponding temporary if necessary */
3669 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3670 {
3671     TCGTemp *ts = s->reg_to_temp[reg];
3672     if (ts != NULL) {
3673         temp_sync(s, ts, allocated_regs, 0, -1);
3674     }
3675 }
3676 
3677 /**
3678  * tcg_reg_alloc:
3679  * @required_regs: Set of registers in which we must allocate.
3680  * @allocated_regs: Set of registers which must be avoided.
3681  * @preferred_regs: Set of registers we should prefer.
3682  * @rev: True if we search the registers in "indirect" order.
3683  *
3684  * The allocated register must be in @required_regs & ~@allocated_regs,
3685  * but if we can put it in @preferred_regs we may save a move later.
3686  */
3687 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3688                             TCGRegSet allocated_regs,
3689                             TCGRegSet preferred_regs, bool rev)
3690 {
3691     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3692     TCGRegSet reg_ct[2];
3693     const int *order;
3694 
3695     reg_ct[1] = required_regs & ~allocated_regs;
3696     tcg_debug_assert(reg_ct[1] != 0);
3697     reg_ct[0] = reg_ct[1] & preferred_regs;
3698 
3699     /* Skip the preferred_regs option if it cannot be satisfied,
3700        or if the preference made no difference.  */
3701     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3702 
3703     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3704 
3705     /* Try free registers, preferences first.  */
3706     for (j = f; j < 2; j++) {
3707         TCGRegSet set = reg_ct[j];
3708 
3709         if (tcg_regset_single(set)) {
3710             /* One register in the set.  */
3711             TCGReg reg = tcg_regset_first(set);
3712             if (s->reg_to_temp[reg] == NULL) {
3713                 return reg;
3714             }
3715         } else {
3716             for (i = 0; i < n; i++) {
3717                 TCGReg reg = order[i];
3718                 if (s->reg_to_temp[reg] == NULL &&
3719                     tcg_regset_test_reg(set, reg)) {
3720                     return reg;
3721                 }
3722             }
3723         }
3724     }
3725 
3726     /* We must spill something.  */
3727     for (j = f; j < 2; j++) {
3728         TCGRegSet set = reg_ct[j];
3729 
3730         if (tcg_regset_single(set)) {
3731             /* One register in the set.  */
3732             TCGReg reg = tcg_regset_first(set);
3733             tcg_reg_free(s, reg, allocated_regs);
3734             return reg;
3735         } else {
3736             for (i = 0; i < n; i++) {
3737                 TCGReg reg = order[i];
3738                 if (tcg_regset_test_reg(set, reg)) {
3739                     tcg_reg_free(s, reg, allocated_regs);
3740                     return reg;
3741                 }
3742             }
3743         }
3744     }
3745 
3746     tcg_abort();
3747 }
3748 
3749 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
3750                                  TCGRegSet allocated_regs,
3751                                  TCGRegSet preferred_regs, bool rev)
3752 {
3753     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3754     TCGRegSet reg_ct[2];
3755     const int *order;
3756 
3757     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3758     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
3759     tcg_debug_assert(reg_ct[1] != 0);
3760     reg_ct[0] = reg_ct[1] & preferred_regs;
3761 
3762     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3763 
3764     /*
3765      * Skip the preferred_regs option if it cannot be satisfied,
3766      * or if the preference made no difference.
3767      */
3768     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3769 
3770     /*
3771      * Minimize the number of flushes by looking for 2 free registers first,
3772      * then a single flush, then two flushes.
3773      */
3774     for (fmin = 2; fmin >= 0; fmin--) {
3775         for (j = k; j < 2; j++) {
3776             TCGRegSet set = reg_ct[j];
3777 
3778             for (i = 0; i < n; i++) {
3779                 TCGReg reg = order[i];
3780 
3781                 if (tcg_regset_test_reg(set, reg)) {
3782                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
3783                     if (f >= fmin) {
3784                         tcg_reg_free(s, reg, allocated_regs);
3785                         tcg_reg_free(s, reg + 1, allocated_regs);
3786                         return reg;
3787                     }
3788                 }
3789             }
3790         }
3791     }
3792     tcg_abort();
3793 }
3794 
3795 /* Make sure the temporary is in a register.  If needed, allocate the register
3796    from DESIRED while avoiding ALLOCATED.  */
3797 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3798                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3799 {
3800     TCGReg reg;
3801 
3802     switch (ts->val_type) {
3803     case TEMP_VAL_REG:
3804         return;
3805     case TEMP_VAL_CONST:
3806         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3807                             preferred_regs, ts->indirect_base);
3808         if (ts->type <= TCG_TYPE_I64) {
3809             tcg_out_movi(s, ts->type, reg, ts->val);
3810         } else {
3811             uint64_t val = ts->val;
3812             MemOp vece = MO_64;
3813 
3814             /*
3815              * Find the minimal vector element that matches the constant.
3816              * The targets will, in general, have to do this search anyway,
3817              * do this generically.
3818              */
3819             if (val == dup_const(MO_8, val)) {
3820                 vece = MO_8;
3821             } else if (val == dup_const(MO_16, val)) {
3822                 vece = MO_16;
3823             } else if (val == dup_const(MO_32, val)) {
3824                 vece = MO_32;
3825             }
3826 
3827             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3828         }
3829         ts->mem_coherent = 0;
3830         break;
3831     case TEMP_VAL_MEM:
3832         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3833                             preferred_regs, ts->indirect_base);
3834         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3835         ts->mem_coherent = 1;
3836         break;
3837     case TEMP_VAL_DEAD:
3838     default:
3839         tcg_abort();
3840     }
3841     set_temp_val_reg(s, ts, reg);
3842 }
3843 
3844 /* Save a temporary to memory. 'allocated_regs' is used in case a
3845    temporary registers needs to be allocated to store a constant.  */
3846 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3847 {
3848     /* The liveness analysis already ensures that globals are back
3849        in memory. Keep an tcg_debug_assert for safety. */
3850     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3851 }
3852 
3853 /* save globals to their canonical location and assume they can be
3854    modified be the following code. 'allocated_regs' is used in case a
3855    temporary registers needs to be allocated to store a constant. */
3856 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3857 {
3858     int i, n;
3859 
3860     for (i = 0, n = s->nb_globals; i < n; i++) {
3861         temp_save(s, &s->temps[i], allocated_regs);
3862     }
3863 }
3864 
3865 /* sync globals to their canonical location and assume they can be
3866    read by the following code. 'allocated_regs' is used in case a
3867    temporary registers needs to be allocated to store a constant. */
3868 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3869 {
3870     int i, n;
3871 
3872     for (i = 0, n = s->nb_globals; i < n; i++) {
3873         TCGTemp *ts = &s->temps[i];
3874         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3875                          || ts->kind == TEMP_FIXED
3876                          || ts->mem_coherent);
3877     }
3878 }
3879 
3880 /* at the end of a basic block, we assume all temporaries are dead and
3881    all globals are stored at their canonical location. */
3882 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3883 {
3884     int i;
3885 
3886     for (i = s->nb_globals; i < s->nb_temps; i++) {
3887         TCGTemp *ts = &s->temps[i];
3888 
3889         switch (ts->kind) {
3890         case TEMP_TB:
3891             temp_save(s, ts, allocated_regs);
3892             break;
3893         case TEMP_EBB:
3894             /* The liveness analysis already ensures that temps are dead.
3895                Keep an tcg_debug_assert for safety. */
3896             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3897             break;
3898         case TEMP_CONST:
3899             /* Similarly, we should have freed any allocated register. */
3900             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3901             break;
3902         default:
3903             g_assert_not_reached();
3904         }
3905     }
3906 
3907     save_globals(s, allocated_regs);
3908 }
3909 
3910 /*
3911  * At a conditional branch, we assume all temporaries are dead unless
3912  * explicitly live-across-conditional-branch; all globals and local
3913  * temps are synced to their location.
3914  */
3915 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3916 {
3917     sync_globals(s, allocated_regs);
3918 
3919     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3920         TCGTemp *ts = &s->temps[i];
3921         /*
3922          * The liveness analysis already ensures that temps are dead.
3923          * Keep tcg_debug_asserts for safety.
3924          */
3925         switch (ts->kind) {
3926         case TEMP_TB:
3927             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3928             break;
3929         case TEMP_EBB:
3930         case TEMP_CONST:
3931             break;
3932         default:
3933             g_assert_not_reached();
3934         }
3935     }
3936 }
3937 
3938 /*
3939  * Specialized code generation for INDEX_op_mov_* with a constant.
3940  */
3941 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3942                                   tcg_target_ulong val, TCGLifeData arg_life,
3943                                   TCGRegSet preferred_regs)
3944 {
3945     /* ENV should not be modified.  */
3946     tcg_debug_assert(!temp_readonly(ots));
3947 
3948     /* The movi is not explicitly generated here.  */
3949     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
3950     ots->val = val;
3951     ots->mem_coherent = 0;
3952     if (NEED_SYNC_ARG(0)) {
3953         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3954     } else if (IS_DEAD_ARG(0)) {
3955         temp_dead(s, ots);
3956     }
3957 }
3958 
3959 /*
3960  * Specialized code generation for INDEX_op_mov_*.
3961  */
3962 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3963 {
3964     const TCGLifeData arg_life = op->life;
3965     TCGRegSet allocated_regs, preferred_regs;
3966     TCGTemp *ts, *ots;
3967     TCGType otype, itype;
3968     TCGReg oreg, ireg;
3969 
3970     allocated_regs = s->reserved_regs;
3971     preferred_regs = output_pref(op, 0);
3972     ots = arg_temp(op->args[0]);
3973     ts = arg_temp(op->args[1]);
3974 
3975     /* ENV should not be modified.  */
3976     tcg_debug_assert(!temp_readonly(ots));
3977 
3978     /* Note that otype != itype for no-op truncation.  */
3979     otype = ots->type;
3980     itype = ts->type;
3981 
3982     if (ts->val_type == TEMP_VAL_CONST) {
3983         /* propagate constant or generate sti */
3984         tcg_target_ulong val = ts->val;
3985         if (IS_DEAD_ARG(1)) {
3986             temp_dead(s, ts);
3987         }
3988         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3989         return;
3990     }
3991 
3992     /* If the source value is in memory we're going to be forced
3993        to have it in a register in order to perform the copy.  Copy
3994        the SOURCE value into its own register first, that way we
3995        don't have to reload SOURCE the next time it is used. */
3996     if (ts->val_type == TEMP_VAL_MEM) {
3997         temp_load(s, ts, tcg_target_available_regs[itype],
3998                   allocated_regs, preferred_regs);
3999     }
4000     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4001     ireg = ts->reg;
4002 
4003     if (IS_DEAD_ARG(0)) {
4004         /* mov to a non-saved dead register makes no sense (even with
4005            liveness analysis disabled). */
4006         tcg_debug_assert(NEED_SYNC_ARG(0));
4007         if (!ots->mem_allocated) {
4008             temp_allocate_frame(s, ots);
4009         }
4010         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4011         if (IS_DEAD_ARG(1)) {
4012             temp_dead(s, ts);
4013         }
4014         temp_dead(s, ots);
4015         return;
4016     }
4017 
4018     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4019         /*
4020          * The mov can be suppressed.  Kill input first, so that it
4021          * is unlinked from reg_to_temp, then set the output to the
4022          * reg that we saved from the input.
4023          */
4024         temp_dead(s, ts);
4025         oreg = ireg;
4026     } else {
4027         if (ots->val_type == TEMP_VAL_REG) {
4028             oreg = ots->reg;
4029         } else {
4030             /* Make sure to not spill the input register during allocation. */
4031             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4032                                  allocated_regs | ((TCGRegSet)1 << ireg),
4033                                  preferred_regs, ots->indirect_base);
4034         }
4035         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4036             /*
4037              * Cross register class move not supported.
4038              * Store the source register into the destination slot
4039              * and leave the destination temp as TEMP_VAL_MEM.
4040              */
4041             assert(!temp_readonly(ots));
4042             if (!ts->mem_allocated) {
4043                 temp_allocate_frame(s, ots);
4044             }
4045             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4046             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4047             ots->mem_coherent = 1;
4048             return;
4049         }
4050     }
4051     set_temp_val_reg(s, ots, oreg);
4052     ots->mem_coherent = 0;
4053 
4054     if (NEED_SYNC_ARG(0)) {
4055         temp_sync(s, ots, allocated_regs, 0, 0);
4056     }
4057 }
4058 
4059 /*
4060  * Specialized code generation for INDEX_op_dup_vec.
4061  */
4062 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4063 {
4064     const TCGLifeData arg_life = op->life;
4065     TCGRegSet dup_out_regs, dup_in_regs;
4066     TCGTemp *its, *ots;
4067     TCGType itype, vtype;
4068     unsigned vece;
4069     int lowpart_ofs;
4070     bool ok;
4071 
4072     ots = arg_temp(op->args[0]);
4073     its = arg_temp(op->args[1]);
4074 
4075     /* ENV should not be modified.  */
4076     tcg_debug_assert(!temp_readonly(ots));
4077 
4078     itype = its->type;
4079     vece = TCGOP_VECE(op);
4080     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4081 
4082     if (its->val_type == TEMP_VAL_CONST) {
4083         /* Propagate constant via movi -> dupi.  */
4084         tcg_target_ulong val = its->val;
4085         if (IS_DEAD_ARG(1)) {
4086             temp_dead(s, its);
4087         }
4088         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4089         return;
4090     }
4091 
4092     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4093     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4094 
4095     /* Allocate the output register now.  */
4096     if (ots->val_type != TEMP_VAL_REG) {
4097         TCGRegSet allocated_regs = s->reserved_regs;
4098         TCGReg oreg;
4099 
4100         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4101             /* Make sure to not spill the input register. */
4102             tcg_regset_set_reg(allocated_regs, its->reg);
4103         }
4104         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4105                              output_pref(op, 0), ots->indirect_base);
4106         set_temp_val_reg(s, ots, oreg);
4107     }
4108 
4109     switch (its->val_type) {
4110     case TEMP_VAL_REG:
4111         /*
4112          * The dup constriaints must be broad, covering all possible VECE.
4113          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4114          * to fail, indicating that extra moves are required for that case.
4115          */
4116         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4117             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4118                 goto done;
4119             }
4120             /* Try again from memory or a vector input register.  */
4121         }
4122         if (!its->mem_coherent) {
4123             /*
4124              * The input register is not synced, and so an extra store
4125              * would be required to use memory.  Attempt an integer-vector
4126              * register move first.  We do not have a TCGRegSet for this.
4127              */
4128             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4129                 break;
4130             }
4131             /* Sync the temp back to its slot and load from there.  */
4132             temp_sync(s, its, s->reserved_regs, 0, 0);
4133         }
4134         /* fall through */
4135 
4136     case TEMP_VAL_MEM:
4137         lowpart_ofs = 0;
4138         if (HOST_BIG_ENDIAN) {
4139             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4140         }
4141         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4142                              its->mem_offset + lowpart_ofs)) {
4143             goto done;
4144         }
4145         /* Load the input into the destination vector register. */
4146         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4147         break;
4148 
4149     default:
4150         g_assert_not_reached();
4151     }
4152 
4153     /* We now have a vector input register, so dup must succeed. */
4154     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4155     tcg_debug_assert(ok);
4156 
4157  done:
4158     ots->mem_coherent = 0;
4159     if (IS_DEAD_ARG(1)) {
4160         temp_dead(s, its);
4161     }
4162     if (NEED_SYNC_ARG(0)) {
4163         temp_sync(s, ots, s->reserved_regs, 0, 0);
4164     }
4165     if (IS_DEAD_ARG(0)) {
4166         temp_dead(s, ots);
4167     }
4168 }
4169 
4170 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4171 {
4172     const TCGLifeData arg_life = op->life;
4173     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4174     TCGRegSet i_allocated_regs;
4175     TCGRegSet o_allocated_regs;
4176     int i, k, nb_iargs, nb_oargs;
4177     TCGReg reg;
4178     TCGArg arg;
4179     const TCGArgConstraint *arg_ct;
4180     TCGTemp *ts;
4181     TCGArg new_args[TCG_MAX_OP_ARGS];
4182     int const_args[TCG_MAX_OP_ARGS];
4183 
4184     nb_oargs = def->nb_oargs;
4185     nb_iargs = def->nb_iargs;
4186 
4187     /* copy constants */
4188     memcpy(new_args + nb_oargs + nb_iargs,
4189            op->args + nb_oargs + nb_iargs,
4190            sizeof(TCGArg) * def->nb_cargs);
4191 
4192     i_allocated_regs = s->reserved_regs;
4193     o_allocated_regs = s->reserved_regs;
4194 
4195     /* satisfy input constraints */
4196     for (k = 0; k < nb_iargs; k++) {
4197         TCGRegSet i_preferred_regs, i_required_regs;
4198         bool allocate_new_reg, copyto_new_reg;
4199         TCGTemp *ts2;
4200         int i1, i2;
4201 
4202         i = def->args_ct[nb_oargs + k].sort_index;
4203         arg = op->args[i];
4204         arg_ct = &def->args_ct[i];
4205         ts = arg_temp(arg);
4206 
4207         if (ts->val_type == TEMP_VAL_CONST
4208             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4209             /* constant is OK for instruction */
4210             const_args[i] = 1;
4211             new_args[i] = ts->val;
4212             continue;
4213         }
4214 
4215         reg = ts->reg;
4216         i_preferred_regs = 0;
4217         i_required_regs = arg_ct->regs;
4218         allocate_new_reg = false;
4219         copyto_new_reg = false;
4220 
4221         switch (arg_ct->pair) {
4222         case 0: /* not paired */
4223             if (arg_ct->ialias) {
4224                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4225 
4226                 /*
4227                  * If the input is readonly, then it cannot also be an
4228                  * output and aliased to itself.  If the input is not
4229                  * dead after the instruction, we must allocate a new
4230                  * register and move it.
4231                  */
4232                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4233                     allocate_new_reg = true;
4234                 } else if (ts->val_type == TEMP_VAL_REG) {
4235                     /*
4236                      * Check if the current register has already been
4237                      * allocated for another input.
4238                      */
4239                     allocate_new_reg =
4240                         tcg_regset_test_reg(i_allocated_regs, reg);
4241                 }
4242             }
4243             if (!allocate_new_reg) {
4244                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4245                           i_preferred_regs);
4246                 reg = ts->reg;
4247                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4248             }
4249             if (allocate_new_reg) {
4250                 /*
4251                  * Allocate a new register matching the constraint
4252                  * and move the temporary register into it.
4253                  */
4254                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4255                           i_allocated_regs, 0);
4256                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4257                                     i_preferred_regs, ts->indirect_base);
4258                 copyto_new_reg = true;
4259             }
4260             break;
4261 
4262         case 1:
4263             /* First of an input pair; if i1 == i2, the second is an output. */
4264             i1 = i;
4265             i2 = arg_ct->pair_index;
4266             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4267 
4268             /*
4269              * It is easier to default to allocating a new pair
4270              * and to identify a few cases where it's not required.
4271              */
4272             if (arg_ct->ialias) {
4273                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4274                 if (IS_DEAD_ARG(i1) &&
4275                     IS_DEAD_ARG(i2) &&
4276                     !temp_readonly(ts) &&
4277                     ts->val_type == TEMP_VAL_REG &&
4278                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4279                     tcg_regset_test_reg(i_required_regs, reg) &&
4280                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4281                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4282                     (ts2
4283                      ? ts2->val_type == TEMP_VAL_REG &&
4284                        ts2->reg == reg + 1 &&
4285                        !temp_readonly(ts2)
4286                      : s->reg_to_temp[reg + 1] == NULL)) {
4287                     break;
4288                 }
4289             } else {
4290                 /* Without aliasing, the pair must also be an input. */
4291                 tcg_debug_assert(ts2);
4292                 if (ts->val_type == TEMP_VAL_REG &&
4293                     ts2->val_type == TEMP_VAL_REG &&
4294                     ts2->reg == reg + 1 &&
4295                     tcg_regset_test_reg(i_required_regs, reg)) {
4296                     break;
4297                 }
4298             }
4299             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4300                                      0, ts->indirect_base);
4301             goto do_pair;
4302 
4303         case 2: /* pair second */
4304             reg = new_args[arg_ct->pair_index] + 1;
4305             goto do_pair;
4306 
4307         case 3: /* ialias with second output, no first input */
4308             tcg_debug_assert(arg_ct->ialias);
4309             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4310 
4311             if (IS_DEAD_ARG(i) &&
4312                 !temp_readonly(ts) &&
4313                 ts->val_type == TEMP_VAL_REG &&
4314                 reg > 0 &&
4315                 s->reg_to_temp[reg - 1] == NULL &&
4316                 tcg_regset_test_reg(i_required_regs, reg) &&
4317                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4318                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4319                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4320                 break;
4321             }
4322             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4323                                      i_allocated_regs, 0,
4324                                      ts->indirect_base);
4325             tcg_regset_set_reg(i_allocated_regs, reg);
4326             reg += 1;
4327             goto do_pair;
4328 
4329         do_pair:
4330             /*
4331              * If an aliased input is not dead after the instruction,
4332              * we must allocate a new register and move it.
4333              */
4334             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4335                 TCGRegSet t_allocated_regs = i_allocated_regs;
4336 
4337                 /*
4338                  * Because of the alias, and the continued life, make sure
4339                  * that the temp is somewhere *other* than the reg pair,
4340                  * and we get a copy in reg.
4341                  */
4342                 tcg_regset_set_reg(t_allocated_regs, reg);
4343                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4344                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4345                     /* If ts was already in reg, copy it somewhere else. */
4346                     TCGReg nr;
4347                     bool ok;
4348 
4349                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4350                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4351                                        t_allocated_regs, 0, ts->indirect_base);
4352                     ok = tcg_out_mov(s, ts->type, nr, reg);
4353                     tcg_debug_assert(ok);
4354 
4355                     set_temp_val_reg(s, ts, nr);
4356                 } else {
4357                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4358                               t_allocated_regs, 0);
4359                     copyto_new_reg = true;
4360                 }
4361             } else {
4362                 /* Preferably allocate to reg, otherwise copy. */
4363                 i_required_regs = (TCGRegSet)1 << reg;
4364                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4365                           i_preferred_regs);
4366                 copyto_new_reg = ts->reg != reg;
4367             }
4368             break;
4369 
4370         default:
4371             g_assert_not_reached();
4372         }
4373 
4374         if (copyto_new_reg) {
4375             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4376                 /*
4377                  * Cross register class move not supported.  Sync the
4378                  * temp back to its slot and load from there.
4379                  */
4380                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4381                 tcg_out_ld(s, ts->type, reg,
4382                            ts->mem_base->reg, ts->mem_offset);
4383             }
4384         }
4385         new_args[i] = reg;
4386         const_args[i] = 0;
4387         tcg_regset_set_reg(i_allocated_regs, reg);
4388     }
4389 
4390     /* mark dead temporaries and free the associated registers */
4391     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4392         if (IS_DEAD_ARG(i)) {
4393             temp_dead(s, arg_temp(op->args[i]));
4394         }
4395     }
4396 
4397     if (def->flags & TCG_OPF_COND_BRANCH) {
4398         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4399     } else if (def->flags & TCG_OPF_BB_END) {
4400         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4401     } else {
4402         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4403             /* XXX: permit generic clobber register list ? */
4404             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4405                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4406                     tcg_reg_free(s, i, i_allocated_regs);
4407                 }
4408             }
4409         }
4410         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4411             /* sync globals if the op has side effects and might trigger
4412                an exception. */
4413             sync_globals(s, i_allocated_regs);
4414         }
4415 
4416         /* satisfy the output constraints */
4417         for(k = 0; k < nb_oargs; k++) {
4418             i = def->args_ct[k].sort_index;
4419             arg = op->args[i];
4420             arg_ct = &def->args_ct[i];
4421             ts = arg_temp(arg);
4422 
4423             /* ENV should not be modified.  */
4424             tcg_debug_assert(!temp_readonly(ts));
4425 
4426             switch (arg_ct->pair) {
4427             case 0: /* not paired */
4428                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4429                     reg = new_args[arg_ct->alias_index];
4430                 } else if (arg_ct->newreg) {
4431                     reg = tcg_reg_alloc(s, arg_ct->regs,
4432                                         i_allocated_regs | o_allocated_regs,
4433                                         output_pref(op, k), ts->indirect_base);
4434                 } else {
4435                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4436                                         output_pref(op, k), ts->indirect_base);
4437                 }
4438                 break;
4439 
4440             case 1: /* first of pair */
4441                 tcg_debug_assert(!arg_ct->newreg);
4442                 if (arg_ct->oalias) {
4443                     reg = new_args[arg_ct->alias_index];
4444                     break;
4445                 }
4446                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4447                                          output_pref(op, k), ts->indirect_base);
4448                 break;
4449 
4450             case 2: /* second of pair */
4451                 tcg_debug_assert(!arg_ct->newreg);
4452                 if (arg_ct->oalias) {
4453                     reg = new_args[arg_ct->alias_index];
4454                 } else {
4455                     reg = new_args[arg_ct->pair_index] + 1;
4456                 }
4457                 break;
4458 
4459             case 3: /* first of pair, aliasing with a second input */
4460                 tcg_debug_assert(!arg_ct->newreg);
4461                 reg = new_args[arg_ct->pair_index] - 1;
4462                 break;
4463 
4464             default:
4465                 g_assert_not_reached();
4466             }
4467             tcg_regset_set_reg(o_allocated_regs, reg);
4468             set_temp_val_reg(s, ts, reg);
4469             ts->mem_coherent = 0;
4470             new_args[i] = reg;
4471         }
4472     }
4473 
4474     /* emit instruction */
4475     if (def->flags & TCG_OPF_VECTOR) {
4476         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4477                        new_args, const_args);
4478     } else {
4479         tcg_out_op(s, op->opc, new_args, const_args);
4480     }
4481 
4482     /* move the outputs in the correct register if needed */
4483     for(i = 0; i < nb_oargs; i++) {
4484         ts = arg_temp(op->args[i]);
4485 
4486         /* ENV should not be modified.  */
4487         tcg_debug_assert(!temp_readonly(ts));
4488 
4489         if (NEED_SYNC_ARG(i)) {
4490             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4491         } else if (IS_DEAD_ARG(i)) {
4492             temp_dead(s, ts);
4493         }
4494     }
4495 }
4496 
4497 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4498 {
4499     const TCGLifeData arg_life = op->life;
4500     TCGTemp *ots, *itsl, *itsh;
4501     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4502 
4503     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4504     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4505     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4506 
4507     ots = arg_temp(op->args[0]);
4508     itsl = arg_temp(op->args[1]);
4509     itsh = arg_temp(op->args[2]);
4510 
4511     /* ENV should not be modified.  */
4512     tcg_debug_assert(!temp_readonly(ots));
4513 
4514     /* Allocate the output register now.  */
4515     if (ots->val_type != TEMP_VAL_REG) {
4516         TCGRegSet allocated_regs = s->reserved_regs;
4517         TCGRegSet dup_out_regs =
4518             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4519         TCGReg oreg;
4520 
4521         /* Make sure to not spill the input registers. */
4522         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4523             tcg_regset_set_reg(allocated_regs, itsl->reg);
4524         }
4525         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4526             tcg_regset_set_reg(allocated_regs, itsh->reg);
4527         }
4528 
4529         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4530                              output_pref(op, 0), ots->indirect_base);
4531         set_temp_val_reg(s, ots, oreg);
4532     }
4533 
4534     /* Promote dup2 of immediates to dupi_vec. */
4535     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4536         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4537         MemOp vece = MO_64;
4538 
4539         if (val == dup_const(MO_8, val)) {
4540             vece = MO_8;
4541         } else if (val == dup_const(MO_16, val)) {
4542             vece = MO_16;
4543         } else if (val == dup_const(MO_32, val)) {
4544             vece = MO_32;
4545         }
4546 
4547         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4548         goto done;
4549     }
4550 
4551     /* If the two inputs form one 64-bit value, try dupm_vec. */
4552     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4553         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4554         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4555         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4556 
4557         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4558         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4559 
4560         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4561                              its->mem_base->reg, its->mem_offset)) {
4562             goto done;
4563         }
4564     }
4565 
4566     /* Fall back to generic expansion. */
4567     return false;
4568 
4569  done:
4570     ots->mem_coherent = 0;
4571     if (IS_DEAD_ARG(1)) {
4572         temp_dead(s, itsl);
4573     }
4574     if (IS_DEAD_ARG(2)) {
4575         temp_dead(s, itsh);
4576     }
4577     if (NEED_SYNC_ARG(0)) {
4578         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4579     } else if (IS_DEAD_ARG(0)) {
4580         temp_dead(s, ots);
4581     }
4582     return true;
4583 }
4584 
4585 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4586                          TCGRegSet allocated_regs)
4587 {
4588     if (ts->val_type == TEMP_VAL_REG) {
4589         if (ts->reg != reg) {
4590             tcg_reg_free(s, reg, allocated_regs);
4591             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4592                 /*
4593                  * Cross register class move not supported.  Sync the
4594                  * temp back to its slot and load from there.
4595                  */
4596                 temp_sync(s, ts, allocated_regs, 0, 0);
4597                 tcg_out_ld(s, ts->type, reg,
4598                            ts->mem_base->reg, ts->mem_offset);
4599             }
4600         }
4601     } else {
4602         TCGRegSet arg_set = 0;
4603 
4604         tcg_reg_free(s, reg, allocated_regs);
4605         tcg_regset_set_reg(arg_set, reg);
4606         temp_load(s, ts, arg_set, allocated_regs, 0);
4607     }
4608 }
4609 
4610 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
4611                          TCGRegSet allocated_regs)
4612 {
4613     /*
4614      * When the destination is on the stack, load up the temp and store.
4615      * If there are many call-saved registers, the temp might live to
4616      * see another use; otherwise it'll be discarded.
4617      */
4618     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4619     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4620                TCG_TARGET_CALL_STACK_OFFSET +
4621                stk_slot * sizeof(tcg_target_long));
4622 }
4623 
4624 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4625                             TCGTemp *ts, TCGRegSet *allocated_regs)
4626 {
4627     if (REG_P(l)) {
4628         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4629         load_arg_reg(s, reg, ts, *allocated_regs);
4630         tcg_regset_set_reg(*allocated_regs, reg);
4631     } else {
4632         load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
4633                      ts, *allocated_regs);
4634     }
4635 }
4636 
4637 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
4638                          intptr_t ref_off, TCGRegSet *allocated_regs)
4639 {
4640     TCGReg reg;
4641     int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
4642 
4643     if (stk_slot < 0) {
4644         reg = tcg_target_call_iarg_regs[arg_slot];
4645         tcg_reg_free(s, reg, *allocated_regs);
4646         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4647         tcg_regset_set_reg(*allocated_regs, reg);
4648     } else {
4649         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
4650                             *allocated_regs, 0, false);
4651         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4652         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
4653                    TCG_TARGET_CALL_STACK_OFFSET
4654                    + stk_slot * sizeof(tcg_target_long));
4655     }
4656 }
4657 
4658 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4659 {
4660     const int nb_oargs = TCGOP_CALLO(op);
4661     const int nb_iargs = TCGOP_CALLI(op);
4662     const TCGLifeData arg_life = op->life;
4663     const TCGHelperInfo *info = tcg_call_info(op);
4664     TCGRegSet allocated_regs = s->reserved_regs;
4665     int i;
4666 
4667     /*
4668      * Move inputs into place in reverse order,
4669      * so that we place stacked arguments first.
4670      */
4671     for (i = nb_iargs - 1; i >= 0; --i) {
4672         const TCGCallArgumentLoc *loc = &info->in[i];
4673         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
4674 
4675         switch (loc->kind) {
4676         case TCG_CALL_ARG_NORMAL:
4677         case TCG_CALL_ARG_EXTEND_U:
4678         case TCG_CALL_ARG_EXTEND_S:
4679             load_arg_normal(s, loc, ts, &allocated_regs);
4680             break;
4681         case TCG_CALL_ARG_BY_REF:
4682             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4683             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
4684                          TCG_TARGET_CALL_STACK_OFFSET
4685                          + loc->ref_slot * sizeof(tcg_target_long),
4686                          &allocated_regs);
4687             break;
4688         case TCG_CALL_ARG_BY_REF_N:
4689             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4690             break;
4691         default:
4692             g_assert_not_reached();
4693         }
4694     }
4695 
4696     /* Mark dead temporaries and free the associated registers.  */
4697     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4698         if (IS_DEAD_ARG(i)) {
4699             temp_dead(s, arg_temp(op->args[i]));
4700         }
4701     }
4702 
4703     /* Clobber call registers.  */
4704     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4705         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4706             tcg_reg_free(s, i, allocated_regs);
4707         }
4708     }
4709 
4710     /*
4711      * Save globals if they might be written by the helper,
4712      * sync them if they might be read.
4713      */
4714     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
4715         /* Nothing to do */
4716     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
4717         sync_globals(s, allocated_regs);
4718     } else {
4719         save_globals(s, allocated_regs);
4720     }
4721 
4722     /*
4723      * If the ABI passes a pointer to the returned struct as the first
4724      * argument, load that now.  Pass a pointer to the output home slot.
4725      */
4726     if (info->out_kind == TCG_CALL_RET_BY_REF) {
4727         TCGTemp *ts = arg_temp(op->args[0]);
4728 
4729         if (!ts->mem_allocated) {
4730             temp_allocate_frame(s, ts);
4731         }
4732         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
4733     }
4734 
4735     tcg_out_call(s, tcg_call_func(op), info);
4736 
4737     /* Assign output registers and emit moves if needed.  */
4738     switch (info->out_kind) {
4739     case TCG_CALL_RET_NORMAL:
4740         for (i = 0; i < nb_oargs; i++) {
4741             TCGTemp *ts = arg_temp(op->args[i]);
4742             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
4743 
4744             /* ENV should not be modified.  */
4745             tcg_debug_assert(!temp_readonly(ts));
4746 
4747             set_temp_val_reg(s, ts, reg);
4748             ts->mem_coherent = 0;
4749         }
4750         break;
4751 
4752     case TCG_CALL_RET_BY_VEC:
4753         {
4754             TCGTemp *ts = arg_temp(op->args[0]);
4755 
4756             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
4757             tcg_debug_assert(ts->temp_subindex == 0);
4758             if (!ts->mem_allocated) {
4759                 temp_allocate_frame(s, ts);
4760             }
4761             tcg_out_st(s, TCG_TYPE_V128,
4762                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
4763                        ts->mem_base->reg, ts->mem_offset);
4764         }
4765         /* fall through to mark all parts in memory */
4766 
4767     case TCG_CALL_RET_BY_REF:
4768         /* The callee has performed a write through the reference. */
4769         for (i = 0; i < nb_oargs; i++) {
4770             TCGTemp *ts = arg_temp(op->args[i]);
4771             ts->val_type = TEMP_VAL_MEM;
4772         }
4773         break;
4774 
4775     default:
4776         g_assert_not_reached();
4777     }
4778 
4779     /* Flush or discard output registers as needed. */
4780     for (i = 0; i < nb_oargs; i++) {
4781         TCGTemp *ts = arg_temp(op->args[i]);
4782         if (NEED_SYNC_ARG(i)) {
4783             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
4784         } else if (IS_DEAD_ARG(i)) {
4785             temp_dead(s, ts);
4786         }
4787     }
4788 }
4789 
4790 #ifdef CONFIG_PROFILER
4791 
4792 /* avoid copy/paste errors */
4793 #define PROF_ADD(to, from, field)                       \
4794     do {                                                \
4795         (to)->field += qatomic_read(&((from)->field));  \
4796     } while (0)
4797 
4798 #define PROF_MAX(to, from, field)                                       \
4799     do {                                                                \
4800         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4801         if (val__ > (to)->field) {                                      \
4802             (to)->field = val__;                                        \
4803         }                                                               \
4804     } while (0)
4805 
4806 /* Pass in a zero'ed @prof */
4807 static inline
4808 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4809 {
4810     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4811     unsigned int i;
4812 
4813     for (i = 0; i < n_ctxs; i++) {
4814         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4815         const TCGProfile *orig = &s->prof;
4816 
4817         if (counters) {
4818             PROF_ADD(prof, orig, cpu_exec_time);
4819             PROF_ADD(prof, orig, tb_count1);
4820             PROF_ADD(prof, orig, tb_count);
4821             PROF_ADD(prof, orig, op_count);
4822             PROF_MAX(prof, orig, op_count_max);
4823             PROF_ADD(prof, orig, temp_count);
4824             PROF_MAX(prof, orig, temp_count_max);
4825             PROF_ADD(prof, orig, del_op_count);
4826             PROF_ADD(prof, orig, code_in_len);
4827             PROF_ADD(prof, orig, code_out_len);
4828             PROF_ADD(prof, orig, search_out_len);
4829             PROF_ADD(prof, orig, interm_time);
4830             PROF_ADD(prof, orig, code_time);
4831             PROF_ADD(prof, orig, la_time);
4832             PROF_ADD(prof, orig, opt_time);
4833             PROF_ADD(prof, orig, restore_count);
4834             PROF_ADD(prof, orig, restore_time);
4835         }
4836         if (table) {
4837             int i;
4838 
4839             for (i = 0; i < NB_OPS; i++) {
4840                 PROF_ADD(prof, orig, table_op_count[i]);
4841             }
4842         }
4843     }
4844 }
4845 
4846 #undef PROF_ADD
4847 #undef PROF_MAX
4848 
4849 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4850 {
4851     tcg_profile_snapshot(prof, true, false);
4852 }
4853 
4854 static void tcg_profile_snapshot_table(TCGProfile *prof)
4855 {
4856     tcg_profile_snapshot(prof, false, true);
4857 }
4858 
4859 void tcg_dump_op_count(GString *buf)
4860 {
4861     TCGProfile prof = {};
4862     int i;
4863 
4864     tcg_profile_snapshot_table(&prof);
4865     for (i = 0; i < NB_OPS; i++) {
4866         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4867                                prof.table_op_count[i]);
4868     }
4869 }
4870 
4871 int64_t tcg_cpu_exec_time(void)
4872 {
4873     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4874     unsigned int i;
4875     int64_t ret = 0;
4876 
4877     for (i = 0; i < n_ctxs; i++) {
4878         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4879         const TCGProfile *prof = &s->prof;
4880 
4881         ret += qatomic_read(&prof->cpu_exec_time);
4882     }
4883     return ret;
4884 }
4885 #else
4886 void tcg_dump_op_count(GString *buf)
4887 {
4888     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4889 }
4890 
4891 int64_t tcg_cpu_exec_time(void)
4892 {
4893     error_report("%s: TCG profiler not compiled", __func__);
4894     exit(EXIT_FAILURE);
4895 }
4896 #endif
4897 
4898 
4899 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4900 {
4901 #ifdef CONFIG_PROFILER
4902     TCGProfile *prof = &s->prof;
4903 #endif
4904     int i, num_insns;
4905     TCGOp *op;
4906 
4907 #ifdef CONFIG_PROFILER
4908     {
4909         int n = 0;
4910 
4911         QTAILQ_FOREACH(op, &s->ops, link) {
4912             n++;
4913         }
4914         qatomic_set(&prof->op_count, prof->op_count + n);
4915         if (n > prof->op_count_max) {
4916             qatomic_set(&prof->op_count_max, n);
4917         }
4918 
4919         n = s->nb_temps;
4920         qatomic_set(&prof->temp_count, prof->temp_count + n);
4921         if (n > prof->temp_count_max) {
4922             qatomic_set(&prof->temp_count_max, n);
4923         }
4924     }
4925 #endif
4926 
4927 #ifdef DEBUG_DISAS
4928     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4929                  && qemu_log_in_addr_range(pc_start))) {
4930         FILE *logfile = qemu_log_trylock();
4931         if (logfile) {
4932             fprintf(logfile, "OP:\n");
4933             tcg_dump_ops(s, logfile, false);
4934             fprintf(logfile, "\n");
4935             qemu_log_unlock(logfile);
4936         }
4937     }
4938 #endif
4939 
4940 #ifdef CONFIG_DEBUG_TCG
4941     /* Ensure all labels referenced have been emitted.  */
4942     {
4943         TCGLabel *l;
4944         bool error = false;
4945 
4946         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4947             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
4948                 qemu_log_mask(CPU_LOG_TB_OP,
4949                               "$L%d referenced but not present.\n", l->id);
4950                 error = true;
4951             }
4952         }
4953         assert(!error);
4954     }
4955 #endif
4956 
4957 #ifdef CONFIG_PROFILER
4958     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4959 #endif
4960 
4961 #ifdef USE_TCG_OPTIMIZATIONS
4962     tcg_optimize(s);
4963 #endif
4964 
4965 #ifdef CONFIG_PROFILER
4966     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4967     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4968 #endif
4969 
4970     reachable_code_pass(s);
4971     liveness_pass_0(s);
4972     liveness_pass_1(s);
4973 
4974     if (s->nb_indirects > 0) {
4975 #ifdef DEBUG_DISAS
4976         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4977                      && qemu_log_in_addr_range(pc_start))) {
4978             FILE *logfile = qemu_log_trylock();
4979             if (logfile) {
4980                 fprintf(logfile, "OP before indirect lowering:\n");
4981                 tcg_dump_ops(s, logfile, false);
4982                 fprintf(logfile, "\n");
4983                 qemu_log_unlock(logfile);
4984             }
4985         }
4986 #endif
4987         /* Replace indirect temps with direct temps.  */
4988         if (liveness_pass_2(s)) {
4989             /* If changes were made, re-run liveness.  */
4990             liveness_pass_1(s);
4991         }
4992     }
4993 
4994 #ifdef CONFIG_PROFILER
4995     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4996 #endif
4997 
4998 #ifdef DEBUG_DISAS
4999     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5000                  && qemu_log_in_addr_range(pc_start))) {
5001         FILE *logfile = qemu_log_trylock();
5002         if (logfile) {
5003             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5004             tcg_dump_ops(s, logfile, true);
5005             fprintf(logfile, "\n");
5006             qemu_log_unlock(logfile);
5007         }
5008     }
5009 #endif
5010 
5011     /* Initialize goto_tb jump offsets. */
5012     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5013     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5014     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5015     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5016 
5017     tcg_reg_alloc_start(s);
5018 
5019     /*
5020      * Reset the buffer pointers when restarting after overflow.
5021      * TODO: Move this into translate-all.c with the rest of the
5022      * buffer management.  Having only this done here is confusing.
5023      */
5024     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
5025     s->code_ptr = s->code_buf;
5026 
5027 #ifdef TCG_TARGET_NEED_LDST_LABELS
5028     QSIMPLEQ_INIT(&s->ldst_labels);
5029 #endif
5030 #ifdef TCG_TARGET_NEED_POOL_LABELS
5031     s->pool_labels = NULL;
5032 #endif
5033 
5034     num_insns = -1;
5035     QTAILQ_FOREACH(op, &s->ops, link) {
5036         TCGOpcode opc = op->opc;
5037 
5038 #ifdef CONFIG_PROFILER
5039         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
5040 #endif
5041 
5042         switch (opc) {
5043         case INDEX_op_mov_i32:
5044         case INDEX_op_mov_i64:
5045         case INDEX_op_mov_vec:
5046             tcg_reg_alloc_mov(s, op);
5047             break;
5048         case INDEX_op_dup_vec:
5049             tcg_reg_alloc_dup(s, op);
5050             break;
5051         case INDEX_op_insn_start:
5052             if (num_insns >= 0) {
5053                 size_t off = tcg_current_code_size(s);
5054                 s->gen_insn_end_off[num_insns] = off;
5055                 /* Assert that we do not overflow our stored offset.  */
5056                 assert(s->gen_insn_end_off[num_insns] == off);
5057             }
5058             num_insns++;
5059             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
5060                 target_ulong a;
5061 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
5062                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
5063 #else
5064                 a = op->args[i];
5065 #endif
5066                 s->gen_insn_data[num_insns][i] = a;
5067             }
5068             break;
5069         case INDEX_op_discard:
5070             temp_dead(s, arg_temp(op->args[0]));
5071             break;
5072         case INDEX_op_set_label:
5073             tcg_reg_alloc_bb_end(s, s->reserved_regs);
5074             tcg_out_label(s, arg_label(op->args[0]));
5075             break;
5076         case INDEX_op_call:
5077             tcg_reg_alloc_call(s, op);
5078             break;
5079         case INDEX_op_exit_tb:
5080             tcg_out_exit_tb(s, op->args[0]);
5081             break;
5082         case INDEX_op_goto_tb:
5083             tcg_out_goto_tb(s, op->args[0]);
5084             break;
5085         case INDEX_op_dup2_vec:
5086             if (tcg_reg_alloc_dup2(s, op)) {
5087                 break;
5088             }
5089             /* fall through */
5090         default:
5091             /* Sanity check that we've not introduced any unhandled opcodes. */
5092             tcg_debug_assert(tcg_op_supported(opc));
5093             /* Note: in order to speed up the code, it would be much
5094                faster to have specialized register allocator functions for
5095                some common argument patterns */
5096             tcg_reg_alloc_op(s, op);
5097             break;
5098         }
5099         /* Test for (pending) buffer overflow.  The assumption is that any
5100            one operation beginning below the high water mark cannot overrun
5101            the buffer completely.  Thus we can test for overflow after
5102            generating code without having to check during generation.  */
5103         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
5104             return -1;
5105         }
5106         /* Test for TB overflow, as seen by gen_insn_end_off.  */
5107         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
5108             return -2;
5109         }
5110     }
5111     tcg_debug_assert(num_insns >= 0);
5112     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
5113 
5114     /* Generate TB finalization at the end of block */
5115 #ifdef TCG_TARGET_NEED_LDST_LABELS
5116     i = tcg_out_ldst_finalize(s);
5117     if (i < 0) {
5118         return i;
5119     }
5120 #endif
5121 #ifdef TCG_TARGET_NEED_POOL_LABELS
5122     i = tcg_out_pool_finalize(s);
5123     if (i < 0) {
5124         return i;
5125     }
5126 #endif
5127     if (!tcg_resolve_relocs(s)) {
5128         return -2;
5129     }
5130 
5131 #ifndef CONFIG_TCG_INTERPRETER
5132     /* flush instruction cache */
5133     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
5134                         (uintptr_t)s->code_buf,
5135                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
5136 #endif
5137 
5138     return tcg_current_code_size(s);
5139 }
5140 
5141 #ifdef CONFIG_PROFILER
5142 void tcg_dump_info(GString *buf)
5143 {
5144     TCGProfile prof = {};
5145     const TCGProfile *s;
5146     int64_t tb_count;
5147     int64_t tb_div_count;
5148     int64_t tot;
5149 
5150     tcg_profile_snapshot_counters(&prof);
5151     s = &prof;
5152     tb_count = s->tb_count;
5153     tb_div_count = tb_count ? tb_count : 1;
5154     tot = s->interm_time + s->code_time;
5155 
5156     g_string_append_printf(buf, "JIT cycles          %" PRId64
5157                            " (%0.3f s at 2.4 GHz)\n",
5158                            tot, tot / 2.4e9);
5159     g_string_append_printf(buf, "translated TBs      %" PRId64
5160                            " (aborted=%" PRId64 " %0.1f%%)\n",
5161                            tb_count, s->tb_count1 - tb_count,
5162                            (double)(s->tb_count1 - s->tb_count)
5163                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
5164     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
5165                            (double)s->op_count / tb_div_count, s->op_count_max);
5166     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
5167                            (double)s->del_op_count / tb_div_count);
5168     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
5169                            (double)s->temp_count / tb_div_count,
5170                            s->temp_count_max);
5171     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
5172                            (double)s->code_out_len / tb_div_count);
5173     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
5174                            (double)s->search_out_len / tb_div_count);
5175 
5176     g_string_append_printf(buf, "cycles/op           %0.1f\n",
5177                            s->op_count ? (double)tot / s->op_count : 0);
5178     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
5179                            s->code_in_len ? (double)tot / s->code_in_len : 0);
5180     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
5181                            s->code_out_len ? (double)tot / s->code_out_len : 0);
5182     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
5183                            s->search_out_len ?
5184                            (double)tot / s->search_out_len : 0);
5185     if (tot == 0) {
5186         tot = 1;
5187     }
5188     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
5189                            (double)s->interm_time / tot * 100.0);
5190     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
5191                            (double)s->code_time / tot * 100.0);
5192     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
5193                            (double)s->opt_time / (s->code_time ?
5194                                                   s->code_time : 1)
5195                            * 100.0);
5196     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
5197                            (double)s->la_time / (s->code_time ?
5198                                                  s->code_time : 1) * 100.0);
5199     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
5200                            s->restore_count);
5201     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
5202                            s->restore_count ?
5203                            (double)s->restore_time / s->restore_count : 0);
5204 }
5205 #else
5206 void tcg_dump_info(GString *buf)
5207 {
5208     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5209 }
5210 #endif
5211 
5212 #ifdef ELF_HOST_MACHINE
5213 /* In order to use this feature, the backend needs to do three things:
5214 
5215    (1) Define ELF_HOST_MACHINE to indicate both what value to
5216        put into the ELF image and to indicate support for the feature.
5217 
5218    (2) Define tcg_register_jit.  This should create a buffer containing
5219        the contents of a .debug_frame section that describes the post-
5220        prologue unwind info for the tcg machine.
5221 
5222    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
5223 */
5224 
5225 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
5226 typedef enum {
5227     JIT_NOACTION = 0,
5228     JIT_REGISTER_FN,
5229     JIT_UNREGISTER_FN
5230 } jit_actions_t;
5231 
5232 struct jit_code_entry {
5233     struct jit_code_entry *next_entry;
5234     struct jit_code_entry *prev_entry;
5235     const void *symfile_addr;
5236     uint64_t symfile_size;
5237 };
5238 
5239 struct jit_descriptor {
5240     uint32_t version;
5241     uint32_t action_flag;
5242     struct jit_code_entry *relevant_entry;
5243     struct jit_code_entry *first_entry;
5244 };
5245 
5246 void __jit_debug_register_code(void) __attribute__((noinline));
5247 void __jit_debug_register_code(void)
5248 {
5249     asm("");
5250 }
5251 
5252 /* Must statically initialize the version, because GDB may check
5253    the version before we can set it.  */
5254 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
5255 
5256 /* End GDB interface.  */
5257 
5258 static int find_string(const char *strtab, const char *str)
5259 {
5260     const char *p = strtab + 1;
5261 
5262     while (1) {
5263         if (strcmp(p, str) == 0) {
5264             return p - strtab;
5265         }
5266         p += strlen(p) + 1;
5267     }
5268 }
5269 
5270 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
5271                                  const void *debug_frame,
5272                                  size_t debug_frame_size)
5273 {
5274     struct __attribute__((packed)) DebugInfo {
5275         uint32_t  len;
5276         uint16_t  version;
5277         uint32_t  abbrev;
5278         uint8_t   ptr_size;
5279         uint8_t   cu_die;
5280         uint16_t  cu_lang;
5281         uintptr_t cu_low_pc;
5282         uintptr_t cu_high_pc;
5283         uint8_t   fn_die;
5284         char      fn_name[16];
5285         uintptr_t fn_low_pc;
5286         uintptr_t fn_high_pc;
5287         uint8_t   cu_eoc;
5288     };
5289 
5290     struct ElfImage {
5291         ElfW(Ehdr) ehdr;
5292         ElfW(Phdr) phdr;
5293         ElfW(Shdr) shdr[7];
5294         ElfW(Sym)  sym[2];
5295         struct DebugInfo di;
5296         uint8_t    da[24];
5297         char       str[80];
5298     };
5299 
5300     struct ElfImage *img;
5301 
5302     static const struct ElfImage img_template = {
5303         .ehdr = {
5304             .e_ident[EI_MAG0] = ELFMAG0,
5305             .e_ident[EI_MAG1] = ELFMAG1,
5306             .e_ident[EI_MAG2] = ELFMAG2,
5307             .e_ident[EI_MAG3] = ELFMAG3,
5308             .e_ident[EI_CLASS] = ELF_CLASS,
5309             .e_ident[EI_DATA] = ELF_DATA,
5310             .e_ident[EI_VERSION] = EV_CURRENT,
5311             .e_type = ET_EXEC,
5312             .e_machine = ELF_HOST_MACHINE,
5313             .e_version = EV_CURRENT,
5314             .e_phoff = offsetof(struct ElfImage, phdr),
5315             .e_shoff = offsetof(struct ElfImage, shdr),
5316             .e_ehsize = sizeof(ElfW(Shdr)),
5317             .e_phentsize = sizeof(ElfW(Phdr)),
5318             .e_phnum = 1,
5319             .e_shentsize = sizeof(ElfW(Shdr)),
5320             .e_shnum = ARRAY_SIZE(img->shdr),
5321             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
5322 #ifdef ELF_HOST_FLAGS
5323             .e_flags = ELF_HOST_FLAGS,
5324 #endif
5325 #ifdef ELF_OSABI
5326             .e_ident[EI_OSABI] = ELF_OSABI,
5327 #endif
5328         },
5329         .phdr = {
5330             .p_type = PT_LOAD,
5331             .p_flags = PF_X,
5332         },
5333         .shdr = {
5334             [0] = { .sh_type = SHT_NULL },
5335             /* Trick: The contents of code_gen_buffer are not present in
5336                this fake ELF file; that got allocated elsewhere.  Therefore
5337                we mark .text as SHT_NOBITS (similar to .bss) so that readers
5338                will not look for contents.  We can record any address.  */
5339             [1] = { /* .text */
5340                 .sh_type = SHT_NOBITS,
5341                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
5342             },
5343             [2] = { /* .debug_info */
5344                 .sh_type = SHT_PROGBITS,
5345                 .sh_offset = offsetof(struct ElfImage, di),
5346                 .sh_size = sizeof(struct DebugInfo),
5347             },
5348             [3] = { /* .debug_abbrev */
5349                 .sh_type = SHT_PROGBITS,
5350                 .sh_offset = offsetof(struct ElfImage, da),
5351                 .sh_size = sizeof(img->da),
5352             },
5353             [4] = { /* .debug_frame */
5354                 .sh_type = SHT_PROGBITS,
5355                 .sh_offset = sizeof(struct ElfImage),
5356             },
5357             [5] = { /* .symtab */
5358                 .sh_type = SHT_SYMTAB,
5359                 .sh_offset = offsetof(struct ElfImage, sym),
5360                 .sh_size = sizeof(img->sym),
5361                 .sh_info = 1,
5362                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5363                 .sh_entsize = sizeof(ElfW(Sym)),
5364             },
5365             [6] = { /* .strtab */
5366                 .sh_type = SHT_STRTAB,
5367                 .sh_offset = offsetof(struct ElfImage, str),
5368                 .sh_size = sizeof(img->str),
5369             }
5370         },
5371         .sym = {
5372             [1] = { /* code_gen_buffer */
5373                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5374                 .st_shndx = 1,
5375             }
5376         },
5377         .di = {
5378             .len = sizeof(struct DebugInfo) - 4,
5379             .version = 2,
5380             .ptr_size = sizeof(void *),
5381             .cu_die = 1,
5382             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5383             .fn_die = 2,
5384             .fn_name = "code_gen_buffer"
5385         },
5386         .da = {
5387             1,          /* abbrev number (the cu) */
5388             0x11, 1,    /* DW_TAG_compile_unit, has children */
5389             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5390             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5391             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5392             0, 0,       /* end of abbrev */
5393             2,          /* abbrev number (the fn) */
5394             0x2e, 0,    /* DW_TAG_subprogram, no children */
5395             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5396             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5397             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5398             0, 0,       /* end of abbrev */
5399             0           /* no more abbrev */
5400         },
5401         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5402                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5403     };
5404 
5405     /* We only need a single jit entry; statically allocate it.  */
5406     static struct jit_code_entry one_entry;
5407 
5408     uintptr_t buf = (uintptr_t)buf_ptr;
5409     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5410     DebugFrameHeader *dfh;
5411 
5412     img = g_malloc(img_size);
5413     *img = img_template;
5414 
5415     img->phdr.p_vaddr = buf;
5416     img->phdr.p_paddr = buf;
5417     img->phdr.p_memsz = buf_size;
5418 
5419     img->shdr[1].sh_name = find_string(img->str, ".text");
5420     img->shdr[1].sh_addr = buf;
5421     img->shdr[1].sh_size = buf_size;
5422 
5423     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5424     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5425 
5426     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5427     img->shdr[4].sh_size = debug_frame_size;
5428 
5429     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5430     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5431 
5432     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5433     img->sym[1].st_value = buf;
5434     img->sym[1].st_size = buf_size;
5435 
5436     img->di.cu_low_pc = buf;
5437     img->di.cu_high_pc = buf + buf_size;
5438     img->di.fn_low_pc = buf;
5439     img->di.fn_high_pc = buf + buf_size;
5440 
5441     dfh = (DebugFrameHeader *)(img + 1);
5442     memcpy(dfh, debug_frame, debug_frame_size);
5443     dfh->fde.func_start = buf;
5444     dfh->fde.func_len = buf_size;
5445 
5446 #ifdef DEBUG_JIT
5447     /* Enable this block to be able to debug the ELF image file creation.
5448        One can use readelf, objdump, or other inspection utilities.  */
5449     {
5450         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
5451         FILE *f = fopen(jit, "w+b");
5452         if (f) {
5453             if (fwrite(img, img_size, 1, f) != img_size) {
5454                 /* Avoid stupid unused return value warning for fwrite.  */
5455             }
5456             fclose(f);
5457         }
5458     }
5459 #endif
5460 
5461     one_entry.symfile_addr = img;
5462     one_entry.symfile_size = img_size;
5463 
5464     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5465     __jit_debug_descriptor.relevant_entry = &one_entry;
5466     __jit_debug_descriptor.first_entry = &one_entry;
5467     __jit_debug_register_code();
5468 }
5469 #else
5470 /* No support for the feature.  Provide the entry point expected by exec.c,
5471    and implement the internal function we declared earlier.  */
5472 
5473 static void tcg_register_jit_int(const void *buf, size_t size,
5474                                  const void *debug_frame,
5475                                  size_t debug_frame_size)
5476 {
5477 }
5478 
5479 void tcg_register_jit(const void *buf, size_t buf_size)
5480 {
5481 }
5482 #endif /* ELF_HOST_MACHINE */
5483 
5484 #if !TCG_TARGET_MAYBE_vec
5485 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5486 {
5487     g_assert_not_reached();
5488 }
5489 #endif
5490