xref: /openbmc/qemu/tcg/tcg.c (revision 2cfb3b6c)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 #include "qemu/cacheinfo.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64 #include "accel/tcg/perf.h"
65 
66 /* Forward declarations for functions declared in tcg-target.c.inc and
67    used here. */
68 static void tcg_target_init(TCGContext *s);
69 static void tcg_target_qemu_prologue(TCGContext *s);
70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
71                         intptr_t value, intptr_t addend);
72 
73 /* The CIE and FDE header definitions will be common to all hosts.  */
74 typedef struct {
75     uint32_t len __attribute__((aligned((sizeof(void *)))));
76     uint32_t id;
77     uint8_t version;
78     char augmentation[1];
79     uint8_t code_align;
80     uint8_t data_align;
81     uint8_t return_column;
82 } DebugFrameCIE;
83 
84 typedef struct QEMU_PACKED {
85     uint32_t len __attribute__((aligned((sizeof(void *)))));
86     uint32_t cie_offset;
87     uintptr_t func_start;
88     uintptr_t func_len;
89 } DebugFrameFDEHeader;
90 
91 typedef struct QEMU_PACKED {
92     DebugFrameCIE cie;
93     DebugFrameFDEHeader fde;
94 } DebugFrameHeader;
95 
96 static void tcg_register_jit_int(const void *buf, size_t size,
97                                  const void *debug_frame,
98                                  size_t debug_frame_size)
99     __attribute__((unused));
100 
101 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103                        intptr_t arg2);
104 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
105 static void tcg_out_movi(TCGContext *s, TCGType type,
106                          TCGReg ret, tcg_target_long arg);
107 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
108 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
109 static void tcg_out_goto_tb(TCGContext *s, int which);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111                        const TCGArg args[TCG_MAX_OP_ARGS],
112                        const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115                             TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121                            unsigned vecl, unsigned vece,
122                            const TCGArg args[TCG_MAX_OP_ARGS],
123                            const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136                                     TCGReg dst, int64_t arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141                                   unsigned vecl, unsigned vece,
142                                   const TCGArg args[TCG_MAX_OP_ARGS],
143                                   const int const_args[TCG_MAX_OP_ARGS])
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
153                          const TCGHelperInfo *info);
154 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
155 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
156 #ifdef TCG_TARGET_NEED_LDST_LABELS
157 static int tcg_out_ldst_finalize(TCGContext *s);
158 #endif
159 
160 TCGContext tcg_init_ctx;
161 __thread TCGContext *tcg_ctx;
162 
163 TCGContext **tcg_ctxs;
164 unsigned int tcg_cur_ctxs;
165 unsigned int tcg_max_ctxs;
166 TCGv_env cpu_env = 0;
167 const void *tcg_code_gen_epilogue;
168 uintptr_t tcg_splitwx_diff;
169 
170 #ifndef CONFIG_TCG_INTERPRETER
171 tcg_prologue_fn *tcg_qemu_tb_exec;
172 #endif
173 
174 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
175 static TCGRegSet tcg_target_call_clobber_regs;
176 
177 #if TCG_TARGET_INSN_UNIT_SIZE == 1
178 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
179 {
180     *s->code_ptr++ = v;
181 }
182 
183 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
184                                                       uint8_t v)
185 {
186     *p = v;
187 }
188 #endif
189 
190 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
191 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
192 {
193     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
194         *s->code_ptr++ = v;
195     } else {
196         tcg_insn_unit *p = s->code_ptr;
197         memcpy(p, &v, sizeof(v));
198         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
199     }
200 }
201 
202 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
203                                                        uint16_t v)
204 {
205     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
206         *p = v;
207     } else {
208         memcpy(p, &v, sizeof(v));
209     }
210 }
211 #endif
212 
213 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
214 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
215 {
216     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
217         *s->code_ptr++ = v;
218     } else {
219         tcg_insn_unit *p = s->code_ptr;
220         memcpy(p, &v, sizeof(v));
221         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
222     }
223 }
224 
225 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
226                                                        uint32_t v)
227 {
228     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
229         *p = v;
230     } else {
231         memcpy(p, &v, sizeof(v));
232     }
233 }
234 #endif
235 
236 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
237 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
238 {
239     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
240         *s->code_ptr++ = v;
241     } else {
242         tcg_insn_unit *p = s->code_ptr;
243         memcpy(p, &v, sizeof(v));
244         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
245     }
246 }
247 
248 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
249                                                        uint64_t v)
250 {
251     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
252         *p = v;
253     } else {
254         memcpy(p, &v, sizeof(v));
255     }
256 }
257 #endif
258 
259 /* label relocation processing */
260 
261 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
262                           TCGLabel *l, intptr_t addend)
263 {
264     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
265 
266     r->type = type;
267     r->ptr = code_ptr;
268     r->addend = addend;
269     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
270 }
271 
272 static void tcg_out_label(TCGContext *s, TCGLabel *l)
273 {
274     tcg_debug_assert(!l->has_value);
275     l->has_value = 1;
276     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
277 }
278 
279 TCGLabel *gen_new_label(void)
280 {
281     TCGContext *s = tcg_ctx;
282     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
283 
284     memset(l, 0, sizeof(TCGLabel));
285     l->id = s->nb_labels++;
286     QSIMPLEQ_INIT(&l->relocs);
287 
288     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
289 
290     return l;
291 }
292 
293 static bool tcg_resolve_relocs(TCGContext *s)
294 {
295     TCGLabel *l;
296 
297     QSIMPLEQ_FOREACH(l, &s->labels, next) {
298         TCGRelocation *r;
299         uintptr_t value = l->u.value;
300 
301         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
302             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
303                 return false;
304             }
305         }
306     }
307     return true;
308 }
309 
310 static void set_jmp_reset_offset(TCGContext *s, int which)
311 {
312     /*
313      * We will check for overflow at the end of the opcode loop in
314      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
315      */
316     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
317 }
318 
319 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
320 {
321     /*
322      * We will check for overflow at the end of the opcode loop in
323      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
324      */
325     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
326 }
327 
328 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
329 {
330     /*
331      * Return the read-execute version of the pointer, for the benefit
332      * of any pc-relative addressing mode.
333      */
334     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
335 }
336 
337 /* Signal overflow, starting over with fewer guest insns. */
338 static G_NORETURN
339 void tcg_raise_tb_overflow(TCGContext *s)
340 {
341     siglongjmp(s->jmp_trans, -2);
342 }
343 
344 #define C_PFX1(P, A)                    P##A
345 #define C_PFX2(P, A, B)                 P##A##_##B
346 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
347 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
348 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
349 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
350 
351 /* Define an enumeration for the various combinations. */
352 
353 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
354 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
355 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
356 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
357 
358 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
359 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
360 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
361 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
362 
363 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
364 
365 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
366 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
367 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
368 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
369 
370 typedef enum {
371 #include "tcg-target-con-set.h"
372 } TCGConstraintSetIndex;
373 
374 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
375 
376 #undef C_O0_I1
377 #undef C_O0_I2
378 #undef C_O0_I3
379 #undef C_O0_I4
380 #undef C_O1_I1
381 #undef C_O1_I2
382 #undef C_O1_I3
383 #undef C_O1_I4
384 #undef C_N1_I2
385 #undef C_O2_I1
386 #undef C_O2_I2
387 #undef C_O2_I3
388 #undef C_O2_I4
389 
390 /* Put all of the constraint sets into an array, indexed by the enum. */
391 
392 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
393 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
394 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
395 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
396 
397 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
398 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
399 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
400 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
401 
402 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
403 
404 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
405 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
406 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
407 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
408 
409 static const TCGTargetOpDef constraint_sets[] = {
410 #include "tcg-target-con-set.h"
411 };
412 
413 
414 #undef C_O0_I1
415 #undef C_O0_I2
416 #undef C_O0_I3
417 #undef C_O0_I4
418 #undef C_O1_I1
419 #undef C_O1_I2
420 #undef C_O1_I3
421 #undef C_O1_I4
422 #undef C_N1_I2
423 #undef C_O2_I1
424 #undef C_O2_I2
425 #undef C_O2_I3
426 #undef C_O2_I4
427 
428 /* Expand the enumerator to be returned from tcg_target_op_def(). */
429 
430 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
431 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
432 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
433 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
434 
435 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
436 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
437 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
438 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
439 
440 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
441 
442 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
443 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
444 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
445 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
446 
447 #include "tcg-target.c.inc"
448 
449 static void alloc_tcg_plugin_context(TCGContext *s)
450 {
451 #ifdef CONFIG_PLUGIN
452     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
453     s->plugin_tb->insns =
454         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
455 #endif
456 }
457 
458 /*
459  * All TCG threads except the parent (i.e. the one that called tcg_context_init
460  * and registered the target's TCG globals) must register with this function
461  * before initiating translation.
462  *
463  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
464  * of tcg_region_init() for the reasoning behind this.
465  *
466  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
467  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
468  * is not used anymore for translation once this function is called.
469  *
470  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
471  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
472  */
473 #ifdef CONFIG_USER_ONLY
474 void tcg_register_thread(void)
475 {
476     tcg_ctx = &tcg_init_ctx;
477 }
478 #else
479 void tcg_register_thread(void)
480 {
481     TCGContext *s = g_malloc(sizeof(*s));
482     unsigned int i, n;
483 
484     *s = tcg_init_ctx;
485 
486     /* Relink mem_base.  */
487     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
488         if (tcg_init_ctx.temps[i].mem_base) {
489             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
490             tcg_debug_assert(b >= 0 && b < n);
491             s->temps[i].mem_base = &s->temps[b];
492         }
493     }
494 
495     /* Claim an entry in tcg_ctxs */
496     n = qatomic_fetch_inc(&tcg_cur_ctxs);
497     g_assert(n < tcg_max_ctxs);
498     qatomic_set(&tcg_ctxs[n], s);
499 
500     if (n > 0) {
501         alloc_tcg_plugin_context(s);
502         tcg_region_initial_alloc(s);
503     }
504 
505     tcg_ctx = s;
506 }
507 #endif /* !CONFIG_USER_ONLY */
508 
509 /* pool based memory allocation */
510 void *tcg_malloc_internal(TCGContext *s, int size)
511 {
512     TCGPool *p;
513     int pool_size;
514 
515     if (size > TCG_POOL_CHUNK_SIZE) {
516         /* big malloc: insert a new pool (XXX: could optimize) */
517         p = g_malloc(sizeof(TCGPool) + size);
518         p->size = size;
519         p->next = s->pool_first_large;
520         s->pool_first_large = p;
521         return p->data;
522     } else {
523         p = s->pool_current;
524         if (!p) {
525             p = s->pool_first;
526             if (!p)
527                 goto new_pool;
528         } else {
529             if (!p->next) {
530             new_pool:
531                 pool_size = TCG_POOL_CHUNK_SIZE;
532                 p = g_malloc(sizeof(TCGPool) + pool_size);
533                 p->size = pool_size;
534                 p->next = NULL;
535                 if (s->pool_current) {
536                     s->pool_current->next = p;
537                 } else {
538                     s->pool_first = p;
539                 }
540             } else {
541                 p = p->next;
542             }
543         }
544     }
545     s->pool_current = p;
546     s->pool_cur = p->data + size;
547     s->pool_end = p->data + p->size;
548     return p->data;
549 }
550 
551 void tcg_pool_reset(TCGContext *s)
552 {
553     TCGPool *p, *t;
554     for (p = s->pool_first_large; p; p = t) {
555         t = p->next;
556         g_free(p);
557     }
558     s->pool_first_large = NULL;
559     s->pool_cur = s->pool_end = NULL;
560     s->pool_current = NULL;
561 }
562 
563 #include "exec/helper-proto.h"
564 
565 static TCGHelperInfo all_helpers[] = {
566 #include "exec/helper-tcg.h"
567 };
568 static GHashTable *helper_table;
569 
570 #ifdef CONFIG_TCG_INTERPRETER
571 static ffi_type *typecode_to_ffi(int argmask)
572 {
573     /*
574      * libffi does not support __int128_t, so we have forced Int128
575      * to use the structure definition instead of the builtin type.
576      */
577     static ffi_type *ffi_type_i128_elements[3] = {
578         &ffi_type_uint64,
579         &ffi_type_uint64,
580         NULL
581     };
582     static ffi_type ffi_type_i128 = {
583         .size = 16,
584         .alignment = __alignof__(Int128),
585         .type = FFI_TYPE_STRUCT,
586         .elements = ffi_type_i128_elements,
587     };
588 
589     switch (argmask) {
590     case dh_typecode_void:
591         return &ffi_type_void;
592     case dh_typecode_i32:
593         return &ffi_type_uint32;
594     case dh_typecode_s32:
595         return &ffi_type_sint32;
596     case dh_typecode_i64:
597         return &ffi_type_uint64;
598     case dh_typecode_s64:
599         return &ffi_type_sint64;
600     case dh_typecode_ptr:
601         return &ffi_type_pointer;
602     case dh_typecode_i128:
603         return &ffi_type_i128;
604     }
605     g_assert_not_reached();
606 }
607 
608 static void init_ffi_layouts(void)
609 {
610     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
611     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
612 
613     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
614         TCGHelperInfo *info = &all_helpers[i];
615         unsigned typemask = info->typemask;
616         gpointer hash = (gpointer)(uintptr_t)typemask;
617         struct {
618             ffi_cif cif;
619             ffi_type *args[];
620         } *ca;
621         ffi_status status;
622         int nargs;
623         ffi_cif *cif;
624 
625         cif = g_hash_table_lookup(ffi_table, hash);
626         if (cif) {
627             info->cif = cif;
628             continue;
629         }
630 
631         /* Ignoring the return type, find the last non-zero field. */
632         nargs = 32 - clz32(typemask >> 3);
633         nargs = DIV_ROUND_UP(nargs, 3);
634         assert(nargs <= MAX_CALL_IARGS);
635 
636         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
637         ca->cif.rtype = typecode_to_ffi(typemask & 7);
638         ca->cif.nargs = nargs;
639 
640         if (nargs != 0) {
641             ca->cif.arg_types = ca->args;
642             for (int j = 0; j < nargs; ++j) {
643                 int typecode = extract32(typemask, (j + 1) * 3, 3);
644                 ca->args[j] = typecode_to_ffi(typecode);
645             }
646         }
647 
648         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
649                               ca->cif.rtype, ca->cif.arg_types);
650         assert(status == FFI_OK);
651 
652         cif = &ca->cif;
653         info->cif = cif;
654         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
655     }
656 
657     g_hash_table_destroy(ffi_table);
658 }
659 #endif /* CONFIG_TCG_INTERPRETER */
660 
661 typedef struct TCGCumulativeArgs {
662     int arg_idx;                /* tcg_gen_callN args[] */
663     int info_in_idx;            /* TCGHelperInfo in[] */
664     int arg_slot;               /* regs+stack slot */
665     int ref_slot;               /* stack slots for references */
666 } TCGCumulativeArgs;
667 
668 static void layout_arg_even(TCGCumulativeArgs *cum)
669 {
670     cum->arg_slot += cum->arg_slot & 1;
671 }
672 
673 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
674                          TCGCallArgumentKind kind)
675 {
676     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
677 
678     *loc = (TCGCallArgumentLoc){
679         .kind = kind,
680         .arg_idx = cum->arg_idx,
681         .arg_slot = cum->arg_slot,
682     };
683     cum->info_in_idx++;
684     cum->arg_slot++;
685 }
686 
687 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
688                                 TCGHelperInfo *info, int n)
689 {
690     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
691 
692     for (int i = 0; i < n; ++i) {
693         /* Layout all using the same arg_idx, adjusting the subindex. */
694         loc[i] = (TCGCallArgumentLoc){
695             .kind = TCG_CALL_ARG_NORMAL,
696             .arg_idx = cum->arg_idx,
697             .tmp_subindex = i,
698             .arg_slot = cum->arg_slot + i,
699         };
700     }
701     cum->info_in_idx += n;
702     cum->arg_slot += n;
703 }
704 
705 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
706 {
707     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
708     int n = 128 / TCG_TARGET_REG_BITS;
709 
710     /* The first subindex carries the pointer. */
711     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
712 
713     /*
714      * The callee is allowed to clobber memory associated with
715      * structure pass by-reference.  Therefore we must make copies.
716      * Allocate space from "ref_slot", which will be adjusted to
717      * follow the parameters on the stack.
718      */
719     loc[0].ref_slot = cum->ref_slot;
720 
721     /*
722      * Subsequent words also go into the reference slot, but
723      * do not accumulate into the regular arguments.
724      */
725     for (int i = 1; i < n; ++i) {
726         loc[i] = (TCGCallArgumentLoc){
727             .kind = TCG_CALL_ARG_BY_REF_N,
728             .arg_idx = cum->arg_idx,
729             .tmp_subindex = i,
730             .ref_slot = cum->ref_slot + i,
731         };
732     }
733     cum->info_in_idx += n;
734     cum->ref_slot += n;
735 }
736 
737 static void init_call_layout(TCGHelperInfo *info)
738 {
739     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
740     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
741     unsigned typemask = info->typemask;
742     unsigned typecode;
743     TCGCumulativeArgs cum = { };
744 
745     /*
746      * Parse and place any function return value.
747      */
748     typecode = typemask & 7;
749     switch (typecode) {
750     case dh_typecode_void:
751         info->nr_out = 0;
752         break;
753     case dh_typecode_i32:
754     case dh_typecode_s32:
755     case dh_typecode_ptr:
756         info->nr_out = 1;
757         info->out_kind = TCG_CALL_RET_NORMAL;
758         break;
759     case dh_typecode_i64:
760     case dh_typecode_s64:
761         info->nr_out = 64 / TCG_TARGET_REG_BITS;
762         info->out_kind = TCG_CALL_RET_NORMAL;
763         /* Query the last register now to trigger any assert early. */
764         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
765         break;
766     case dh_typecode_i128:
767         info->nr_out = 128 / TCG_TARGET_REG_BITS;
768         info->out_kind = TCG_TARGET_CALL_RET_I128;
769         switch (TCG_TARGET_CALL_RET_I128) {
770         case TCG_CALL_RET_NORMAL:
771             /* Query the last register now to trigger any assert early. */
772             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
773             break;
774         case TCG_CALL_RET_BY_VEC:
775             /* Query the single register now to trigger any assert early. */
776             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
777             break;
778         case TCG_CALL_RET_BY_REF:
779             /*
780              * Allocate the first argument to the output.
781              * We don't need to store this anywhere, just make it
782              * unavailable for use in the input loop below.
783              */
784             cum.arg_slot = 1;
785             break;
786         default:
787             qemu_build_not_reached();
788         }
789         break;
790     default:
791         g_assert_not_reached();
792     }
793 
794     /*
795      * Parse and place function arguments.
796      */
797     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
798         TCGCallArgumentKind kind;
799         TCGType type;
800 
801         typecode = typemask & 7;
802         switch (typecode) {
803         case dh_typecode_i32:
804         case dh_typecode_s32:
805             type = TCG_TYPE_I32;
806             break;
807         case dh_typecode_i64:
808         case dh_typecode_s64:
809             type = TCG_TYPE_I64;
810             break;
811         case dh_typecode_ptr:
812             type = TCG_TYPE_PTR;
813             break;
814         case dh_typecode_i128:
815             type = TCG_TYPE_I128;
816             break;
817         default:
818             g_assert_not_reached();
819         }
820 
821         switch (type) {
822         case TCG_TYPE_I32:
823             switch (TCG_TARGET_CALL_ARG_I32) {
824             case TCG_CALL_ARG_EVEN:
825                 layout_arg_even(&cum);
826                 /* fall through */
827             case TCG_CALL_ARG_NORMAL:
828                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
829                 break;
830             case TCG_CALL_ARG_EXTEND:
831                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
832                 layout_arg_1(&cum, info, kind);
833                 break;
834             default:
835                 qemu_build_not_reached();
836             }
837             break;
838 
839         case TCG_TYPE_I64:
840             switch (TCG_TARGET_CALL_ARG_I64) {
841             case TCG_CALL_ARG_EVEN:
842                 layout_arg_even(&cum);
843                 /* fall through */
844             case TCG_CALL_ARG_NORMAL:
845                 if (TCG_TARGET_REG_BITS == 32) {
846                     layout_arg_normal_n(&cum, info, 2);
847                 } else {
848                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
849                 }
850                 break;
851             default:
852                 qemu_build_not_reached();
853             }
854             break;
855 
856         case TCG_TYPE_I128:
857             switch (TCG_TARGET_CALL_ARG_I128) {
858             case TCG_CALL_ARG_EVEN:
859                 layout_arg_even(&cum);
860                 /* fall through */
861             case TCG_CALL_ARG_NORMAL:
862                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
863                 break;
864             case TCG_CALL_ARG_BY_REF:
865                 layout_arg_by_ref(&cum, info);
866                 break;
867             default:
868                 qemu_build_not_reached();
869             }
870             break;
871 
872         default:
873             g_assert_not_reached();
874         }
875     }
876     info->nr_in = cum.info_in_idx;
877 
878     /* Validate that we didn't overrun the input array. */
879     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
880     /* Validate the backend has enough argument space. */
881     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
882 
883     /*
884      * Relocate the "ref_slot" area to the end of the parameters.
885      * Minimizing this stack offset helps code size for x86,
886      * which has a signed 8-bit offset encoding.
887      */
888     if (cum.ref_slot != 0) {
889         int ref_base = 0;
890 
891         if (cum.arg_slot > max_reg_slots) {
892             int align = __alignof(Int128) / sizeof(tcg_target_long);
893 
894             ref_base = cum.arg_slot - max_reg_slots;
895             if (align > 1) {
896                 ref_base = ROUND_UP(ref_base, align);
897             }
898         }
899         assert(ref_base + cum.ref_slot <= max_stk_slots);
900 
901         if (ref_base != 0) {
902             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
903                 TCGCallArgumentLoc *loc = &info->in[i];
904                 switch (loc->kind) {
905                 case TCG_CALL_ARG_BY_REF:
906                 case TCG_CALL_ARG_BY_REF_N:
907                     loc->ref_slot += ref_base;
908                     break;
909                 default:
910                     break;
911                 }
912             }
913         }
914     }
915 }
916 
917 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
918 static void process_op_defs(TCGContext *s);
919 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
920                                             TCGReg reg, const char *name);
921 
922 static void tcg_context_init(unsigned max_cpus)
923 {
924     TCGContext *s = &tcg_init_ctx;
925     int op, total_args, n, i;
926     TCGOpDef *def;
927     TCGArgConstraint *args_ct;
928     TCGTemp *ts;
929 
930     memset(s, 0, sizeof(*s));
931     s->nb_globals = 0;
932 
933     /* Count total number of arguments and allocate the corresponding
934        space */
935     total_args = 0;
936     for(op = 0; op < NB_OPS; op++) {
937         def = &tcg_op_defs[op];
938         n = def->nb_iargs + def->nb_oargs;
939         total_args += n;
940     }
941 
942     args_ct = g_new0(TCGArgConstraint, total_args);
943 
944     for(op = 0; op < NB_OPS; op++) {
945         def = &tcg_op_defs[op];
946         def->args_ct = args_ct;
947         n = def->nb_iargs + def->nb_oargs;
948         args_ct += n;
949     }
950 
951     /* Register helpers.  */
952     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
953     helper_table = g_hash_table_new(NULL, NULL);
954 
955     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
956         init_call_layout(&all_helpers[i]);
957         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
958                             (gpointer)&all_helpers[i]);
959     }
960 
961 #ifdef CONFIG_TCG_INTERPRETER
962     init_ffi_layouts();
963 #endif
964 
965     tcg_target_init(s);
966     process_op_defs(s);
967 
968     /* Reverse the order of the saved registers, assuming they're all at
969        the start of tcg_target_reg_alloc_order.  */
970     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
971         int r = tcg_target_reg_alloc_order[n];
972         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
973             break;
974         }
975     }
976     for (i = 0; i < n; ++i) {
977         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
978     }
979     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
980         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
981     }
982 
983     alloc_tcg_plugin_context(s);
984 
985     tcg_ctx = s;
986     /*
987      * In user-mode we simply share the init context among threads, since we
988      * use a single region. See the documentation tcg_region_init() for the
989      * reasoning behind this.
990      * In softmmu we will have at most max_cpus TCG threads.
991      */
992 #ifdef CONFIG_USER_ONLY
993     tcg_ctxs = &tcg_ctx;
994     tcg_cur_ctxs = 1;
995     tcg_max_ctxs = 1;
996 #else
997     tcg_max_ctxs = max_cpus;
998     tcg_ctxs = g_new0(TCGContext *, max_cpus);
999 #endif
1000 
1001     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1002     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1003     cpu_env = temp_tcgv_ptr(ts);
1004 }
1005 
1006 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1007 {
1008     tcg_context_init(max_cpus);
1009     tcg_region_init(tb_size, splitwx, max_cpus);
1010 }
1011 
1012 /*
1013  * Allocate TBs right before their corresponding translated code, making
1014  * sure that TBs and code are on different cache lines.
1015  */
1016 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1017 {
1018     uintptr_t align = qemu_icache_linesize;
1019     TranslationBlock *tb;
1020     void *next;
1021 
1022  retry:
1023     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1024     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1025 
1026     if (unlikely(next > s->code_gen_highwater)) {
1027         if (tcg_region_alloc(s)) {
1028             return NULL;
1029         }
1030         goto retry;
1031     }
1032     qatomic_set(&s->code_gen_ptr, next);
1033     s->data_gen_ptr = NULL;
1034     return tb;
1035 }
1036 
1037 void tcg_prologue_init(TCGContext *s)
1038 {
1039     size_t prologue_size;
1040 
1041     s->code_ptr = s->code_gen_ptr;
1042     s->code_buf = s->code_gen_ptr;
1043     s->data_gen_ptr = NULL;
1044 
1045 #ifndef CONFIG_TCG_INTERPRETER
1046     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1047 #endif
1048 
1049 #ifdef TCG_TARGET_NEED_POOL_LABELS
1050     s->pool_labels = NULL;
1051 #endif
1052 
1053     qemu_thread_jit_write();
1054     /* Generate the prologue.  */
1055     tcg_target_qemu_prologue(s);
1056 
1057 #ifdef TCG_TARGET_NEED_POOL_LABELS
1058     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1059     {
1060         int result = tcg_out_pool_finalize(s);
1061         tcg_debug_assert(result == 0);
1062     }
1063 #endif
1064 
1065     prologue_size = tcg_current_code_size(s);
1066     perf_report_prologue(s->code_gen_ptr, prologue_size);
1067 
1068 #ifndef CONFIG_TCG_INTERPRETER
1069     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1070                         (uintptr_t)s->code_buf, prologue_size);
1071 #endif
1072 
1073 #ifdef DEBUG_DISAS
1074     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1075         FILE *logfile = qemu_log_trylock();
1076         if (logfile) {
1077             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1078             if (s->data_gen_ptr) {
1079                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1080                 size_t data_size = prologue_size - code_size;
1081                 size_t i;
1082 
1083                 disas(logfile, s->code_gen_ptr, code_size);
1084 
1085                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1086                     if (sizeof(tcg_target_ulong) == 8) {
1087                         fprintf(logfile,
1088                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1089                                 (uintptr_t)s->data_gen_ptr + i,
1090                                 *(uint64_t *)(s->data_gen_ptr + i));
1091                     } else {
1092                         fprintf(logfile,
1093                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1094                                 (uintptr_t)s->data_gen_ptr + i,
1095                                 *(uint32_t *)(s->data_gen_ptr + i));
1096                     }
1097                 }
1098             } else {
1099                 disas(logfile, s->code_gen_ptr, prologue_size);
1100             }
1101             fprintf(logfile, "\n");
1102             qemu_log_unlock(logfile);
1103         }
1104     }
1105 #endif
1106 
1107 #ifndef CONFIG_TCG_INTERPRETER
1108     /*
1109      * Assert that goto_ptr is implemented completely, setting an epilogue.
1110      * For tci, we use NULL as the signal to return from the interpreter,
1111      * so skip this check.
1112      */
1113     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1114 #endif
1115 
1116     tcg_region_prologue_set(s);
1117 }
1118 
1119 void tcg_func_start(TCGContext *s)
1120 {
1121     tcg_pool_reset(s);
1122     s->nb_temps = s->nb_globals;
1123 
1124     /* No temps have been previously allocated for size or locality.  */
1125     memset(s->free_temps, 0, sizeof(s->free_temps));
1126 
1127     /* No constant temps have been previously allocated. */
1128     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1129         if (s->const_table[i]) {
1130             g_hash_table_remove_all(s->const_table[i]);
1131         }
1132     }
1133 
1134     s->nb_ops = 0;
1135     s->nb_labels = 0;
1136     s->current_frame_offset = s->frame_start;
1137 
1138 #ifdef CONFIG_DEBUG_TCG
1139     s->goto_tb_issue_mask = 0;
1140 #endif
1141 
1142     QTAILQ_INIT(&s->ops);
1143     QTAILQ_INIT(&s->free_ops);
1144     QSIMPLEQ_INIT(&s->labels);
1145 }
1146 
1147 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1148 {
1149     int n = s->nb_temps++;
1150 
1151     if (n >= TCG_MAX_TEMPS) {
1152         tcg_raise_tb_overflow(s);
1153     }
1154     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1155 }
1156 
1157 static TCGTemp *tcg_global_alloc(TCGContext *s)
1158 {
1159     TCGTemp *ts;
1160 
1161     tcg_debug_assert(s->nb_globals == s->nb_temps);
1162     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1163     s->nb_globals++;
1164     ts = tcg_temp_alloc(s);
1165     ts->kind = TEMP_GLOBAL;
1166 
1167     return ts;
1168 }
1169 
1170 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1171                                             TCGReg reg, const char *name)
1172 {
1173     TCGTemp *ts;
1174 
1175     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1176         tcg_abort();
1177     }
1178 
1179     ts = tcg_global_alloc(s);
1180     ts->base_type = type;
1181     ts->type = type;
1182     ts->kind = TEMP_FIXED;
1183     ts->reg = reg;
1184     ts->name = name;
1185     tcg_regset_set_reg(s->reserved_regs, reg);
1186 
1187     return ts;
1188 }
1189 
1190 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1191 {
1192     s->frame_start = start;
1193     s->frame_end = start + size;
1194     s->frame_temp
1195         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1196 }
1197 
1198 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1199                                      intptr_t offset, const char *name)
1200 {
1201     TCGContext *s = tcg_ctx;
1202     TCGTemp *base_ts = tcgv_ptr_temp(base);
1203     TCGTemp *ts = tcg_global_alloc(s);
1204     int indirect_reg = 0;
1205 
1206     switch (base_ts->kind) {
1207     case TEMP_FIXED:
1208         break;
1209     case TEMP_GLOBAL:
1210         /* We do not support double-indirect registers.  */
1211         tcg_debug_assert(!base_ts->indirect_reg);
1212         base_ts->indirect_base = 1;
1213         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1214                             ? 2 : 1);
1215         indirect_reg = 1;
1216         break;
1217     default:
1218         g_assert_not_reached();
1219     }
1220 
1221     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1222         TCGTemp *ts2 = tcg_global_alloc(s);
1223         char buf[64];
1224 
1225         ts->base_type = TCG_TYPE_I64;
1226         ts->type = TCG_TYPE_I32;
1227         ts->indirect_reg = indirect_reg;
1228         ts->mem_allocated = 1;
1229         ts->mem_base = base_ts;
1230         ts->mem_offset = offset;
1231         pstrcpy(buf, sizeof(buf), name);
1232         pstrcat(buf, sizeof(buf), "_0");
1233         ts->name = strdup(buf);
1234 
1235         tcg_debug_assert(ts2 == ts + 1);
1236         ts2->base_type = TCG_TYPE_I64;
1237         ts2->type = TCG_TYPE_I32;
1238         ts2->indirect_reg = indirect_reg;
1239         ts2->mem_allocated = 1;
1240         ts2->mem_base = base_ts;
1241         ts2->mem_offset = offset + 4;
1242         ts2->temp_subindex = 1;
1243         pstrcpy(buf, sizeof(buf), name);
1244         pstrcat(buf, sizeof(buf), "_1");
1245         ts2->name = strdup(buf);
1246     } else {
1247         ts->base_type = type;
1248         ts->type = type;
1249         ts->indirect_reg = indirect_reg;
1250         ts->mem_allocated = 1;
1251         ts->mem_base = base_ts;
1252         ts->mem_offset = offset;
1253         ts->name = name;
1254     }
1255     return ts;
1256 }
1257 
1258 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1259 {
1260     TCGContext *s = tcg_ctx;
1261     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1262     TCGTemp *ts;
1263     int idx, k;
1264 
1265     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1266     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1267     if (idx < TCG_MAX_TEMPS) {
1268         /* There is already an available temp with the right type.  */
1269         clear_bit(idx, s->free_temps[k].l);
1270 
1271         ts = &s->temps[idx];
1272         ts->temp_allocated = 1;
1273         tcg_debug_assert(ts->base_type == type);
1274         tcg_debug_assert(ts->kind == kind);
1275     } else {
1276         int i, n;
1277 
1278         switch (type) {
1279         case TCG_TYPE_I32:
1280         case TCG_TYPE_V64:
1281         case TCG_TYPE_V128:
1282         case TCG_TYPE_V256:
1283             n = 1;
1284             break;
1285         case TCG_TYPE_I64:
1286             n = 64 / TCG_TARGET_REG_BITS;
1287             break;
1288         case TCG_TYPE_I128:
1289             n = 128 / TCG_TARGET_REG_BITS;
1290             break;
1291         default:
1292             g_assert_not_reached();
1293         }
1294 
1295         ts = tcg_temp_alloc(s);
1296         ts->base_type = type;
1297         ts->temp_allocated = 1;
1298         ts->kind = kind;
1299 
1300         if (n == 1) {
1301             ts->type = type;
1302         } else {
1303             ts->type = TCG_TYPE_REG;
1304 
1305             for (i = 1; i < n; ++i) {
1306                 TCGTemp *ts2 = tcg_temp_alloc(s);
1307 
1308                 tcg_debug_assert(ts2 == ts + i);
1309                 ts2->base_type = type;
1310                 ts2->type = TCG_TYPE_REG;
1311                 ts2->temp_allocated = 1;
1312                 ts2->temp_subindex = i;
1313                 ts2->kind = kind;
1314             }
1315         }
1316     }
1317 
1318 #if defined(CONFIG_DEBUG_TCG)
1319     s->temps_in_use++;
1320 #endif
1321     return ts;
1322 }
1323 
1324 TCGv_vec tcg_temp_new_vec(TCGType type)
1325 {
1326     TCGTemp *t;
1327 
1328 #ifdef CONFIG_DEBUG_TCG
1329     switch (type) {
1330     case TCG_TYPE_V64:
1331         assert(TCG_TARGET_HAS_v64);
1332         break;
1333     case TCG_TYPE_V128:
1334         assert(TCG_TARGET_HAS_v128);
1335         break;
1336     case TCG_TYPE_V256:
1337         assert(TCG_TARGET_HAS_v256);
1338         break;
1339     default:
1340         g_assert_not_reached();
1341     }
1342 #endif
1343 
1344     t = tcg_temp_new_internal(type, 0);
1345     return temp_tcgv_vec(t);
1346 }
1347 
1348 /* Create a new temp of the same type as an existing temp.  */
1349 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1350 {
1351     TCGTemp *t = tcgv_vec_temp(match);
1352 
1353     tcg_debug_assert(t->temp_allocated != 0);
1354 
1355     t = tcg_temp_new_internal(t->base_type, 0);
1356     return temp_tcgv_vec(t);
1357 }
1358 
1359 void tcg_temp_free_internal(TCGTemp *ts)
1360 {
1361     TCGContext *s = tcg_ctx;
1362     int k, idx;
1363 
1364     switch (ts->kind) {
1365     case TEMP_CONST:
1366         /*
1367          * In order to simplify users of tcg_constant_*,
1368          * silently ignore free.
1369          */
1370         return;
1371     case TEMP_NORMAL:
1372     case TEMP_LOCAL:
1373         break;
1374     default:
1375         g_assert_not_reached();
1376     }
1377 
1378 #if defined(CONFIG_DEBUG_TCG)
1379     s->temps_in_use--;
1380     if (s->temps_in_use < 0) {
1381         fprintf(stderr, "More temporaries freed than allocated!\n");
1382     }
1383 #endif
1384 
1385     tcg_debug_assert(ts->temp_allocated != 0);
1386     ts->temp_allocated = 0;
1387 
1388     idx = temp_idx(ts);
1389     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1390     set_bit(idx, s->free_temps[k].l);
1391 }
1392 
1393 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1394 {
1395     TCGContext *s = tcg_ctx;
1396     GHashTable *h = s->const_table[type];
1397     TCGTemp *ts;
1398 
1399     if (h == NULL) {
1400         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1401         s->const_table[type] = h;
1402     }
1403 
1404     ts = g_hash_table_lookup(h, &val);
1405     if (ts == NULL) {
1406         int64_t *val_ptr;
1407 
1408         ts = tcg_temp_alloc(s);
1409 
1410         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1411             TCGTemp *ts2 = tcg_temp_alloc(s);
1412 
1413             tcg_debug_assert(ts2 == ts + 1);
1414 
1415             ts->base_type = TCG_TYPE_I64;
1416             ts->type = TCG_TYPE_I32;
1417             ts->kind = TEMP_CONST;
1418             ts->temp_allocated = 1;
1419 
1420             ts2->base_type = TCG_TYPE_I64;
1421             ts2->type = TCG_TYPE_I32;
1422             ts2->kind = TEMP_CONST;
1423             ts2->temp_allocated = 1;
1424             ts2->temp_subindex = 1;
1425 
1426             /*
1427              * Retain the full value of the 64-bit constant in the low
1428              * part, so that the hash table works.  Actual uses will
1429              * truncate the value to the low part.
1430              */
1431             ts[HOST_BIG_ENDIAN].val = val;
1432             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1433             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1434         } else {
1435             ts->base_type = type;
1436             ts->type = type;
1437             ts->kind = TEMP_CONST;
1438             ts->temp_allocated = 1;
1439             ts->val = val;
1440             val_ptr = &ts->val;
1441         }
1442         g_hash_table_insert(h, val_ptr, ts);
1443     }
1444 
1445     return ts;
1446 }
1447 
1448 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1449 {
1450     val = dup_const(vece, val);
1451     return temp_tcgv_vec(tcg_constant_internal(type, val));
1452 }
1453 
1454 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1455 {
1456     TCGTemp *t = tcgv_vec_temp(match);
1457 
1458     tcg_debug_assert(t->temp_allocated != 0);
1459     return tcg_constant_vec(t->base_type, vece, val);
1460 }
1461 
1462 TCGv_i32 tcg_const_i32(int32_t val)
1463 {
1464     TCGv_i32 t0;
1465     t0 = tcg_temp_new_i32();
1466     tcg_gen_movi_i32(t0, val);
1467     return t0;
1468 }
1469 
1470 TCGv_i64 tcg_const_i64(int64_t val)
1471 {
1472     TCGv_i64 t0;
1473     t0 = tcg_temp_new_i64();
1474     tcg_gen_movi_i64(t0, val);
1475     return t0;
1476 }
1477 
1478 TCGv_i32 tcg_const_local_i32(int32_t val)
1479 {
1480     TCGv_i32 t0;
1481     t0 = tcg_temp_local_new_i32();
1482     tcg_gen_movi_i32(t0, val);
1483     return t0;
1484 }
1485 
1486 TCGv_i64 tcg_const_local_i64(int64_t val)
1487 {
1488     TCGv_i64 t0;
1489     t0 = tcg_temp_local_new_i64();
1490     tcg_gen_movi_i64(t0, val);
1491     return t0;
1492 }
1493 
1494 #if defined(CONFIG_DEBUG_TCG)
1495 void tcg_clear_temp_count(void)
1496 {
1497     TCGContext *s = tcg_ctx;
1498     s->temps_in_use = 0;
1499 }
1500 
1501 int tcg_check_temp_count(void)
1502 {
1503     TCGContext *s = tcg_ctx;
1504     if (s->temps_in_use) {
1505         /* Clear the count so that we don't give another
1506          * warning immediately next time around.
1507          */
1508         s->temps_in_use = 0;
1509         return 1;
1510     }
1511     return 0;
1512 }
1513 #endif
1514 
1515 /* Return true if OP may appear in the opcode stream.
1516    Test the runtime variable that controls each opcode.  */
1517 bool tcg_op_supported(TCGOpcode op)
1518 {
1519     const bool have_vec
1520         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1521 
1522     switch (op) {
1523     case INDEX_op_discard:
1524     case INDEX_op_set_label:
1525     case INDEX_op_call:
1526     case INDEX_op_br:
1527     case INDEX_op_mb:
1528     case INDEX_op_insn_start:
1529     case INDEX_op_exit_tb:
1530     case INDEX_op_goto_tb:
1531     case INDEX_op_goto_ptr:
1532     case INDEX_op_qemu_ld_i32:
1533     case INDEX_op_qemu_st_i32:
1534     case INDEX_op_qemu_ld_i64:
1535     case INDEX_op_qemu_st_i64:
1536         return true;
1537 
1538     case INDEX_op_qemu_st8_i32:
1539         return TCG_TARGET_HAS_qemu_st8_i32;
1540 
1541     case INDEX_op_mov_i32:
1542     case INDEX_op_setcond_i32:
1543     case INDEX_op_brcond_i32:
1544     case INDEX_op_ld8u_i32:
1545     case INDEX_op_ld8s_i32:
1546     case INDEX_op_ld16u_i32:
1547     case INDEX_op_ld16s_i32:
1548     case INDEX_op_ld_i32:
1549     case INDEX_op_st8_i32:
1550     case INDEX_op_st16_i32:
1551     case INDEX_op_st_i32:
1552     case INDEX_op_add_i32:
1553     case INDEX_op_sub_i32:
1554     case INDEX_op_mul_i32:
1555     case INDEX_op_and_i32:
1556     case INDEX_op_or_i32:
1557     case INDEX_op_xor_i32:
1558     case INDEX_op_shl_i32:
1559     case INDEX_op_shr_i32:
1560     case INDEX_op_sar_i32:
1561         return true;
1562 
1563     case INDEX_op_movcond_i32:
1564         return TCG_TARGET_HAS_movcond_i32;
1565     case INDEX_op_div_i32:
1566     case INDEX_op_divu_i32:
1567         return TCG_TARGET_HAS_div_i32;
1568     case INDEX_op_rem_i32:
1569     case INDEX_op_remu_i32:
1570         return TCG_TARGET_HAS_rem_i32;
1571     case INDEX_op_div2_i32:
1572     case INDEX_op_divu2_i32:
1573         return TCG_TARGET_HAS_div2_i32;
1574     case INDEX_op_rotl_i32:
1575     case INDEX_op_rotr_i32:
1576         return TCG_TARGET_HAS_rot_i32;
1577     case INDEX_op_deposit_i32:
1578         return TCG_TARGET_HAS_deposit_i32;
1579     case INDEX_op_extract_i32:
1580         return TCG_TARGET_HAS_extract_i32;
1581     case INDEX_op_sextract_i32:
1582         return TCG_TARGET_HAS_sextract_i32;
1583     case INDEX_op_extract2_i32:
1584         return TCG_TARGET_HAS_extract2_i32;
1585     case INDEX_op_add2_i32:
1586         return TCG_TARGET_HAS_add2_i32;
1587     case INDEX_op_sub2_i32:
1588         return TCG_TARGET_HAS_sub2_i32;
1589     case INDEX_op_mulu2_i32:
1590         return TCG_TARGET_HAS_mulu2_i32;
1591     case INDEX_op_muls2_i32:
1592         return TCG_TARGET_HAS_muls2_i32;
1593     case INDEX_op_muluh_i32:
1594         return TCG_TARGET_HAS_muluh_i32;
1595     case INDEX_op_mulsh_i32:
1596         return TCG_TARGET_HAS_mulsh_i32;
1597     case INDEX_op_ext8s_i32:
1598         return TCG_TARGET_HAS_ext8s_i32;
1599     case INDEX_op_ext16s_i32:
1600         return TCG_TARGET_HAS_ext16s_i32;
1601     case INDEX_op_ext8u_i32:
1602         return TCG_TARGET_HAS_ext8u_i32;
1603     case INDEX_op_ext16u_i32:
1604         return TCG_TARGET_HAS_ext16u_i32;
1605     case INDEX_op_bswap16_i32:
1606         return TCG_TARGET_HAS_bswap16_i32;
1607     case INDEX_op_bswap32_i32:
1608         return TCG_TARGET_HAS_bswap32_i32;
1609     case INDEX_op_not_i32:
1610         return TCG_TARGET_HAS_not_i32;
1611     case INDEX_op_neg_i32:
1612         return TCG_TARGET_HAS_neg_i32;
1613     case INDEX_op_andc_i32:
1614         return TCG_TARGET_HAS_andc_i32;
1615     case INDEX_op_orc_i32:
1616         return TCG_TARGET_HAS_orc_i32;
1617     case INDEX_op_eqv_i32:
1618         return TCG_TARGET_HAS_eqv_i32;
1619     case INDEX_op_nand_i32:
1620         return TCG_TARGET_HAS_nand_i32;
1621     case INDEX_op_nor_i32:
1622         return TCG_TARGET_HAS_nor_i32;
1623     case INDEX_op_clz_i32:
1624         return TCG_TARGET_HAS_clz_i32;
1625     case INDEX_op_ctz_i32:
1626         return TCG_TARGET_HAS_ctz_i32;
1627     case INDEX_op_ctpop_i32:
1628         return TCG_TARGET_HAS_ctpop_i32;
1629 
1630     case INDEX_op_brcond2_i32:
1631     case INDEX_op_setcond2_i32:
1632         return TCG_TARGET_REG_BITS == 32;
1633 
1634     case INDEX_op_mov_i64:
1635     case INDEX_op_setcond_i64:
1636     case INDEX_op_brcond_i64:
1637     case INDEX_op_ld8u_i64:
1638     case INDEX_op_ld8s_i64:
1639     case INDEX_op_ld16u_i64:
1640     case INDEX_op_ld16s_i64:
1641     case INDEX_op_ld32u_i64:
1642     case INDEX_op_ld32s_i64:
1643     case INDEX_op_ld_i64:
1644     case INDEX_op_st8_i64:
1645     case INDEX_op_st16_i64:
1646     case INDEX_op_st32_i64:
1647     case INDEX_op_st_i64:
1648     case INDEX_op_add_i64:
1649     case INDEX_op_sub_i64:
1650     case INDEX_op_mul_i64:
1651     case INDEX_op_and_i64:
1652     case INDEX_op_or_i64:
1653     case INDEX_op_xor_i64:
1654     case INDEX_op_shl_i64:
1655     case INDEX_op_shr_i64:
1656     case INDEX_op_sar_i64:
1657     case INDEX_op_ext_i32_i64:
1658     case INDEX_op_extu_i32_i64:
1659         return TCG_TARGET_REG_BITS == 64;
1660 
1661     case INDEX_op_movcond_i64:
1662         return TCG_TARGET_HAS_movcond_i64;
1663     case INDEX_op_div_i64:
1664     case INDEX_op_divu_i64:
1665         return TCG_TARGET_HAS_div_i64;
1666     case INDEX_op_rem_i64:
1667     case INDEX_op_remu_i64:
1668         return TCG_TARGET_HAS_rem_i64;
1669     case INDEX_op_div2_i64:
1670     case INDEX_op_divu2_i64:
1671         return TCG_TARGET_HAS_div2_i64;
1672     case INDEX_op_rotl_i64:
1673     case INDEX_op_rotr_i64:
1674         return TCG_TARGET_HAS_rot_i64;
1675     case INDEX_op_deposit_i64:
1676         return TCG_TARGET_HAS_deposit_i64;
1677     case INDEX_op_extract_i64:
1678         return TCG_TARGET_HAS_extract_i64;
1679     case INDEX_op_sextract_i64:
1680         return TCG_TARGET_HAS_sextract_i64;
1681     case INDEX_op_extract2_i64:
1682         return TCG_TARGET_HAS_extract2_i64;
1683     case INDEX_op_extrl_i64_i32:
1684         return TCG_TARGET_HAS_extrl_i64_i32;
1685     case INDEX_op_extrh_i64_i32:
1686         return TCG_TARGET_HAS_extrh_i64_i32;
1687     case INDEX_op_ext8s_i64:
1688         return TCG_TARGET_HAS_ext8s_i64;
1689     case INDEX_op_ext16s_i64:
1690         return TCG_TARGET_HAS_ext16s_i64;
1691     case INDEX_op_ext32s_i64:
1692         return TCG_TARGET_HAS_ext32s_i64;
1693     case INDEX_op_ext8u_i64:
1694         return TCG_TARGET_HAS_ext8u_i64;
1695     case INDEX_op_ext16u_i64:
1696         return TCG_TARGET_HAS_ext16u_i64;
1697     case INDEX_op_ext32u_i64:
1698         return TCG_TARGET_HAS_ext32u_i64;
1699     case INDEX_op_bswap16_i64:
1700         return TCG_TARGET_HAS_bswap16_i64;
1701     case INDEX_op_bswap32_i64:
1702         return TCG_TARGET_HAS_bswap32_i64;
1703     case INDEX_op_bswap64_i64:
1704         return TCG_TARGET_HAS_bswap64_i64;
1705     case INDEX_op_not_i64:
1706         return TCG_TARGET_HAS_not_i64;
1707     case INDEX_op_neg_i64:
1708         return TCG_TARGET_HAS_neg_i64;
1709     case INDEX_op_andc_i64:
1710         return TCG_TARGET_HAS_andc_i64;
1711     case INDEX_op_orc_i64:
1712         return TCG_TARGET_HAS_orc_i64;
1713     case INDEX_op_eqv_i64:
1714         return TCG_TARGET_HAS_eqv_i64;
1715     case INDEX_op_nand_i64:
1716         return TCG_TARGET_HAS_nand_i64;
1717     case INDEX_op_nor_i64:
1718         return TCG_TARGET_HAS_nor_i64;
1719     case INDEX_op_clz_i64:
1720         return TCG_TARGET_HAS_clz_i64;
1721     case INDEX_op_ctz_i64:
1722         return TCG_TARGET_HAS_ctz_i64;
1723     case INDEX_op_ctpop_i64:
1724         return TCG_TARGET_HAS_ctpop_i64;
1725     case INDEX_op_add2_i64:
1726         return TCG_TARGET_HAS_add2_i64;
1727     case INDEX_op_sub2_i64:
1728         return TCG_TARGET_HAS_sub2_i64;
1729     case INDEX_op_mulu2_i64:
1730         return TCG_TARGET_HAS_mulu2_i64;
1731     case INDEX_op_muls2_i64:
1732         return TCG_TARGET_HAS_muls2_i64;
1733     case INDEX_op_muluh_i64:
1734         return TCG_TARGET_HAS_muluh_i64;
1735     case INDEX_op_mulsh_i64:
1736         return TCG_TARGET_HAS_mulsh_i64;
1737 
1738     case INDEX_op_mov_vec:
1739     case INDEX_op_dup_vec:
1740     case INDEX_op_dupm_vec:
1741     case INDEX_op_ld_vec:
1742     case INDEX_op_st_vec:
1743     case INDEX_op_add_vec:
1744     case INDEX_op_sub_vec:
1745     case INDEX_op_and_vec:
1746     case INDEX_op_or_vec:
1747     case INDEX_op_xor_vec:
1748     case INDEX_op_cmp_vec:
1749         return have_vec;
1750     case INDEX_op_dup2_vec:
1751         return have_vec && TCG_TARGET_REG_BITS == 32;
1752     case INDEX_op_not_vec:
1753         return have_vec && TCG_TARGET_HAS_not_vec;
1754     case INDEX_op_neg_vec:
1755         return have_vec && TCG_TARGET_HAS_neg_vec;
1756     case INDEX_op_abs_vec:
1757         return have_vec && TCG_TARGET_HAS_abs_vec;
1758     case INDEX_op_andc_vec:
1759         return have_vec && TCG_TARGET_HAS_andc_vec;
1760     case INDEX_op_orc_vec:
1761         return have_vec && TCG_TARGET_HAS_orc_vec;
1762     case INDEX_op_nand_vec:
1763         return have_vec && TCG_TARGET_HAS_nand_vec;
1764     case INDEX_op_nor_vec:
1765         return have_vec && TCG_TARGET_HAS_nor_vec;
1766     case INDEX_op_eqv_vec:
1767         return have_vec && TCG_TARGET_HAS_eqv_vec;
1768     case INDEX_op_mul_vec:
1769         return have_vec && TCG_TARGET_HAS_mul_vec;
1770     case INDEX_op_shli_vec:
1771     case INDEX_op_shri_vec:
1772     case INDEX_op_sari_vec:
1773         return have_vec && TCG_TARGET_HAS_shi_vec;
1774     case INDEX_op_shls_vec:
1775     case INDEX_op_shrs_vec:
1776     case INDEX_op_sars_vec:
1777         return have_vec && TCG_TARGET_HAS_shs_vec;
1778     case INDEX_op_shlv_vec:
1779     case INDEX_op_shrv_vec:
1780     case INDEX_op_sarv_vec:
1781         return have_vec && TCG_TARGET_HAS_shv_vec;
1782     case INDEX_op_rotli_vec:
1783         return have_vec && TCG_TARGET_HAS_roti_vec;
1784     case INDEX_op_rotls_vec:
1785         return have_vec && TCG_TARGET_HAS_rots_vec;
1786     case INDEX_op_rotlv_vec:
1787     case INDEX_op_rotrv_vec:
1788         return have_vec && TCG_TARGET_HAS_rotv_vec;
1789     case INDEX_op_ssadd_vec:
1790     case INDEX_op_usadd_vec:
1791     case INDEX_op_sssub_vec:
1792     case INDEX_op_ussub_vec:
1793         return have_vec && TCG_TARGET_HAS_sat_vec;
1794     case INDEX_op_smin_vec:
1795     case INDEX_op_umin_vec:
1796     case INDEX_op_smax_vec:
1797     case INDEX_op_umax_vec:
1798         return have_vec && TCG_TARGET_HAS_minmax_vec;
1799     case INDEX_op_bitsel_vec:
1800         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1801     case INDEX_op_cmpsel_vec:
1802         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1803 
1804     default:
1805         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1806         return true;
1807     }
1808 }
1809 
1810 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
1811 
1812 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1813 {
1814     const TCGHelperInfo *info;
1815     TCGv_i64 extend_free[MAX_CALL_IARGS];
1816     int n_extend = 0;
1817     TCGOp *op;
1818     int i, n, pi = 0, total_args;
1819 
1820     info = g_hash_table_lookup(helper_table, (gpointer)func);
1821     total_args = info->nr_out + info->nr_in + 2;
1822     op = tcg_op_alloc(INDEX_op_call, total_args);
1823 
1824 #ifdef CONFIG_PLUGIN
1825     /* Flag helpers that may affect guest state */
1826     if (tcg_ctx->plugin_insn &&
1827         !(info->flags & TCG_CALL_PLUGIN) &&
1828         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1829         tcg_ctx->plugin_insn->calls_helpers = true;
1830     }
1831 #endif
1832 
1833     TCGOP_CALLO(op) = n = info->nr_out;
1834     switch (n) {
1835     case 0:
1836         tcg_debug_assert(ret == NULL);
1837         break;
1838     case 1:
1839         tcg_debug_assert(ret != NULL);
1840         op->args[pi++] = temp_arg(ret);
1841         break;
1842     case 2:
1843     case 4:
1844         tcg_debug_assert(ret != NULL);
1845         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
1846         tcg_debug_assert(ret->temp_subindex == 0);
1847         for (i = 0; i < n; ++i) {
1848             op->args[pi++] = temp_arg(ret + i);
1849         }
1850         break;
1851     default:
1852         g_assert_not_reached();
1853     }
1854 
1855     TCGOP_CALLI(op) = n = info->nr_in;
1856     for (i = 0; i < n; i++) {
1857         const TCGCallArgumentLoc *loc = &info->in[i];
1858         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
1859 
1860         switch (loc->kind) {
1861         case TCG_CALL_ARG_NORMAL:
1862         case TCG_CALL_ARG_BY_REF:
1863         case TCG_CALL_ARG_BY_REF_N:
1864             op->args[pi++] = temp_arg(ts);
1865             break;
1866 
1867         case TCG_CALL_ARG_EXTEND_U:
1868         case TCG_CALL_ARG_EXTEND_S:
1869             {
1870                 TCGv_i64 temp = tcg_temp_new_i64();
1871                 TCGv_i32 orig = temp_tcgv_i32(ts);
1872 
1873                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
1874                     tcg_gen_ext_i32_i64(temp, orig);
1875                 } else {
1876                     tcg_gen_extu_i32_i64(temp, orig);
1877                 }
1878                 op->args[pi++] = tcgv_i64_arg(temp);
1879                 extend_free[n_extend++] = temp;
1880             }
1881             break;
1882 
1883         default:
1884             g_assert_not_reached();
1885         }
1886     }
1887     op->args[pi++] = (uintptr_t)func;
1888     op->args[pi++] = (uintptr_t)info;
1889     tcg_debug_assert(pi == total_args);
1890 
1891     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
1892 
1893     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
1894     for (i = 0; i < n_extend; ++i) {
1895         tcg_temp_free_i64(extend_free[i]);
1896     }
1897 }
1898 
1899 static void tcg_reg_alloc_start(TCGContext *s)
1900 {
1901     int i, n;
1902 
1903     for (i = 0, n = s->nb_temps; i < n; i++) {
1904         TCGTemp *ts = &s->temps[i];
1905         TCGTempVal val = TEMP_VAL_MEM;
1906 
1907         switch (ts->kind) {
1908         case TEMP_CONST:
1909             val = TEMP_VAL_CONST;
1910             break;
1911         case TEMP_FIXED:
1912             val = TEMP_VAL_REG;
1913             break;
1914         case TEMP_GLOBAL:
1915             break;
1916         case TEMP_NORMAL:
1917         case TEMP_EBB:
1918             val = TEMP_VAL_DEAD;
1919             /* fall through */
1920         case TEMP_LOCAL:
1921             ts->mem_allocated = 0;
1922             break;
1923         default:
1924             g_assert_not_reached();
1925         }
1926         ts->val_type = val;
1927     }
1928 
1929     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1930 }
1931 
1932 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1933                                  TCGTemp *ts)
1934 {
1935     int idx = temp_idx(ts);
1936 
1937     switch (ts->kind) {
1938     case TEMP_FIXED:
1939     case TEMP_GLOBAL:
1940         pstrcpy(buf, buf_size, ts->name);
1941         break;
1942     case TEMP_LOCAL:
1943         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1944         break;
1945     case TEMP_EBB:
1946         snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
1947         break;
1948     case TEMP_NORMAL:
1949         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1950         break;
1951     case TEMP_CONST:
1952         switch (ts->type) {
1953         case TCG_TYPE_I32:
1954             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1955             break;
1956 #if TCG_TARGET_REG_BITS > 32
1957         case TCG_TYPE_I64:
1958             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1959             break;
1960 #endif
1961         case TCG_TYPE_V64:
1962         case TCG_TYPE_V128:
1963         case TCG_TYPE_V256:
1964             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1965                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1966             break;
1967         default:
1968             g_assert_not_reached();
1969         }
1970         break;
1971     }
1972     return buf;
1973 }
1974 
1975 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1976                              int buf_size, TCGArg arg)
1977 {
1978     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1979 }
1980 
1981 static const char * const cond_name[] =
1982 {
1983     [TCG_COND_NEVER] = "never",
1984     [TCG_COND_ALWAYS] = "always",
1985     [TCG_COND_EQ] = "eq",
1986     [TCG_COND_NE] = "ne",
1987     [TCG_COND_LT] = "lt",
1988     [TCG_COND_GE] = "ge",
1989     [TCG_COND_LE] = "le",
1990     [TCG_COND_GT] = "gt",
1991     [TCG_COND_LTU] = "ltu",
1992     [TCG_COND_GEU] = "geu",
1993     [TCG_COND_LEU] = "leu",
1994     [TCG_COND_GTU] = "gtu"
1995 };
1996 
1997 static const char * const ldst_name[] =
1998 {
1999     [MO_UB]   = "ub",
2000     [MO_SB]   = "sb",
2001     [MO_LEUW] = "leuw",
2002     [MO_LESW] = "lesw",
2003     [MO_LEUL] = "leul",
2004     [MO_LESL] = "lesl",
2005     [MO_LEUQ] = "leq",
2006     [MO_BEUW] = "beuw",
2007     [MO_BESW] = "besw",
2008     [MO_BEUL] = "beul",
2009     [MO_BESL] = "besl",
2010     [MO_BEUQ] = "beq",
2011 };
2012 
2013 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2014 #ifdef TARGET_ALIGNED_ONLY
2015     [MO_UNALN >> MO_ASHIFT]    = "un+",
2016     [MO_ALIGN >> MO_ASHIFT]    = "",
2017 #else
2018     [MO_UNALN >> MO_ASHIFT]    = "",
2019     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2020 #endif
2021     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2022     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2023     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2024     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2025     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2026     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2027 };
2028 
2029 static const char bswap_flag_name[][6] = {
2030     [TCG_BSWAP_IZ] = "iz",
2031     [TCG_BSWAP_OZ] = "oz",
2032     [TCG_BSWAP_OS] = "os",
2033     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2034     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2035 };
2036 
2037 static inline bool tcg_regset_single(TCGRegSet d)
2038 {
2039     return (d & (d - 1)) == 0;
2040 }
2041 
2042 static inline TCGReg tcg_regset_first(TCGRegSet d)
2043 {
2044     if (TCG_TARGET_NB_REGS <= 32) {
2045         return ctz32(d);
2046     } else {
2047         return ctz64(d);
2048     }
2049 }
2050 
2051 /* Return only the number of characters output -- no error return. */
2052 #define ne_fprintf(...) \
2053     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2054 
2055 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2056 {
2057     char buf[128];
2058     TCGOp *op;
2059 
2060     QTAILQ_FOREACH(op, &s->ops, link) {
2061         int i, k, nb_oargs, nb_iargs, nb_cargs;
2062         const TCGOpDef *def;
2063         TCGOpcode c;
2064         int col = 0;
2065 
2066         c = op->opc;
2067         def = &tcg_op_defs[c];
2068 
2069         if (c == INDEX_op_insn_start) {
2070             nb_oargs = 0;
2071             col += ne_fprintf(f, "\n ----");
2072 
2073             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2074                 target_ulong a;
2075 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2076                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2077 #else
2078                 a = op->args[i];
2079 #endif
2080                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
2081             }
2082         } else if (c == INDEX_op_call) {
2083             const TCGHelperInfo *info = tcg_call_info(op);
2084             void *func = tcg_call_func(op);
2085 
2086             /* variable number of arguments */
2087             nb_oargs = TCGOP_CALLO(op);
2088             nb_iargs = TCGOP_CALLI(op);
2089             nb_cargs = def->nb_cargs;
2090 
2091             col += ne_fprintf(f, " %s ", def->name);
2092 
2093             /*
2094              * Print the function name from TCGHelperInfo, if available.
2095              * Note that plugins have a template function for the info,
2096              * but the actual function pointer comes from the plugin.
2097              */
2098             if (func == info->func) {
2099                 col += ne_fprintf(f, "%s", info->name);
2100             } else {
2101                 col += ne_fprintf(f, "plugin(%p)", func);
2102             }
2103 
2104             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2105             for (i = 0; i < nb_oargs; i++) {
2106                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2107                                                             op->args[i]));
2108             }
2109             for (i = 0; i < nb_iargs; i++) {
2110                 TCGArg arg = op->args[nb_oargs + i];
2111                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2112                 col += ne_fprintf(f, ",%s", t);
2113             }
2114         } else {
2115             col += ne_fprintf(f, " %s ", def->name);
2116 
2117             nb_oargs = def->nb_oargs;
2118             nb_iargs = def->nb_iargs;
2119             nb_cargs = def->nb_cargs;
2120 
2121             if (def->flags & TCG_OPF_VECTOR) {
2122                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2123                                   8 << TCGOP_VECE(op));
2124             }
2125 
2126             k = 0;
2127             for (i = 0; i < nb_oargs; i++) {
2128                 const char *sep =  k ? "," : "";
2129                 col += ne_fprintf(f, "%s%s", sep,
2130                                   tcg_get_arg_str(s, buf, sizeof(buf),
2131                                                   op->args[k++]));
2132             }
2133             for (i = 0; i < nb_iargs; i++) {
2134                 const char *sep =  k ? "," : "";
2135                 col += ne_fprintf(f, "%s%s", sep,
2136                                   tcg_get_arg_str(s, buf, sizeof(buf),
2137                                                   op->args[k++]));
2138             }
2139             switch (c) {
2140             case INDEX_op_brcond_i32:
2141             case INDEX_op_setcond_i32:
2142             case INDEX_op_movcond_i32:
2143             case INDEX_op_brcond2_i32:
2144             case INDEX_op_setcond2_i32:
2145             case INDEX_op_brcond_i64:
2146             case INDEX_op_setcond_i64:
2147             case INDEX_op_movcond_i64:
2148             case INDEX_op_cmp_vec:
2149             case INDEX_op_cmpsel_vec:
2150                 if (op->args[k] < ARRAY_SIZE(cond_name)
2151                     && cond_name[op->args[k]]) {
2152                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2153                 } else {
2154                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2155                 }
2156                 i = 1;
2157                 break;
2158             case INDEX_op_qemu_ld_i32:
2159             case INDEX_op_qemu_st_i32:
2160             case INDEX_op_qemu_st8_i32:
2161             case INDEX_op_qemu_ld_i64:
2162             case INDEX_op_qemu_st_i64:
2163                 {
2164                     MemOpIdx oi = op->args[k++];
2165                     MemOp op = get_memop(oi);
2166                     unsigned ix = get_mmuidx(oi);
2167 
2168                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2169                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2170                     } else {
2171                         const char *s_al, *s_op;
2172                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2173                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2174                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
2175                     }
2176                     i = 1;
2177                 }
2178                 break;
2179             case INDEX_op_bswap16_i32:
2180             case INDEX_op_bswap16_i64:
2181             case INDEX_op_bswap32_i32:
2182             case INDEX_op_bswap32_i64:
2183             case INDEX_op_bswap64_i64:
2184                 {
2185                     TCGArg flags = op->args[k];
2186                     const char *name = NULL;
2187 
2188                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2189                         name = bswap_flag_name[flags];
2190                     }
2191                     if (name) {
2192                         col += ne_fprintf(f, ",%s", name);
2193                     } else {
2194                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2195                     }
2196                     i = k = 1;
2197                 }
2198                 break;
2199             default:
2200                 i = 0;
2201                 break;
2202             }
2203             switch (c) {
2204             case INDEX_op_set_label:
2205             case INDEX_op_br:
2206             case INDEX_op_brcond_i32:
2207             case INDEX_op_brcond_i64:
2208             case INDEX_op_brcond2_i32:
2209                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2210                                   arg_label(op->args[k])->id);
2211                 i++, k++;
2212                 break;
2213             default:
2214                 break;
2215             }
2216             for (; i < nb_cargs; i++, k++) {
2217                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2218                                   op->args[k]);
2219             }
2220         }
2221 
2222         if (have_prefs || op->life) {
2223             for (; col < 40; ++col) {
2224                 putc(' ', f);
2225             }
2226         }
2227 
2228         if (op->life) {
2229             unsigned life = op->life;
2230 
2231             if (life & (SYNC_ARG * 3)) {
2232                 ne_fprintf(f, "  sync:");
2233                 for (i = 0; i < 2; ++i) {
2234                     if (life & (SYNC_ARG << i)) {
2235                         ne_fprintf(f, " %d", i);
2236                     }
2237                 }
2238             }
2239             life /= DEAD_ARG;
2240             if (life) {
2241                 ne_fprintf(f, "  dead:");
2242                 for (i = 0; life; ++i, life >>= 1) {
2243                     if (life & 1) {
2244                         ne_fprintf(f, " %d", i);
2245                     }
2246                 }
2247             }
2248         }
2249 
2250         if (have_prefs) {
2251             for (i = 0; i < nb_oargs; ++i) {
2252                 TCGRegSet set = output_pref(op, i);
2253 
2254                 if (i == 0) {
2255                     ne_fprintf(f, "  pref=");
2256                 } else {
2257                     ne_fprintf(f, ",");
2258                 }
2259                 if (set == 0) {
2260                     ne_fprintf(f, "none");
2261                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2262                     ne_fprintf(f, "all");
2263 #ifdef CONFIG_DEBUG_TCG
2264                 } else if (tcg_regset_single(set)) {
2265                     TCGReg reg = tcg_regset_first(set);
2266                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2267 #endif
2268                 } else if (TCG_TARGET_NB_REGS <= 32) {
2269                     ne_fprintf(f, "0x%x", (uint32_t)set);
2270                 } else {
2271                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2272                 }
2273             }
2274         }
2275 
2276         putc('\n', f);
2277     }
2278 }
2279 
2280 /* we give more priority to constraints with less registers */
2281 static int get_constraint_priority(const TCGOpDef *def, int k)
2282 {
2283     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2284     int n = ctpop64(arg_ct->regs);
2285 
2286     /*
2287      * Sort constraints of a single register first, which includes output
2288      * aliases (which must exactly match the input already allocated).
2289      */
2290     if (n == 1 || arg_ct->oalias) {
2291         return INT_MAX;
2292     }
2293 
2294     /*
2295      * Sort register pairs next, first then second immediately after.
2296      * Arbitrarily sort multiple pairs by the index of the first reg;
2297      * there shouldn't be many pairs.
2298      */
2299     switch (arg_ct->pair) {
2300     case 1:
2301     case 3:
2302         return (k + 1) * 2;
2303     case 2:
2304         return (arg_ct->pair_index + 1) * 2 - 1;
2305     }
2306 
2307     /* Finally, sort by decreasing register count. */
2308     assert(n > 1);
2309     return -n;
2310 }
2311 
2312 /* sort from highest priority to lowest */
2313 static void sort_constraints(TCGOpDef *def, int start, int n)
2314 {
2315     int i, j;
2316     TCGArgConstraint *a = def->args_ct;
2317 
2318     for (i = 0; i < n; i++) {
2319         a[start + i].sort_index = start + i;
2320     }
2321     if (n <= 1) {
2322         return;
2323     }
2324     for (i = 0; i < n - 1; i++) {
2325         for (j = i + 1; j < n; j++) {
2326             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2327             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2328             if (p1 < p2) {
2329                 int tmp = a[start + i].sort_index;
2330                 a[start + i].sort_index = a[start + j].sort_index;
2331                 a[start + j].sort_index = tmp;
2332             }
2333         }
2334     }
2335 }
2336 
2337 static void process_op_defs(TCGContext *s)
2338 {
2339     TCGOpcode op;
2340 
2341     for (op = 0; op < NB_OPS; op++) {
2342         TCGOpDef *def = &tcg_op_defs[op];
2343         const TCGTargetOpDef *tdefs;
2344         bool saw_alias_pair = false;
2345         int i, o, i2, o2, nb_args;
2346 
2347         if (def->flags & TCG_OPF_NOT_PRESENT) {
2348             continue;
2349         }
2350 
2351         nb_args = def->nb_iargs + def->nb_oargs;
2352         if (nb_args == 0) {
2353             continue;
2354         }
2355 
2356         /*
2357          * Macro magic should make it impossible, but double-check that
2358          * the array index is in range.  Since the signness of an enum
2359          * is implementation defined, force the result to unsigned.
2360          */
2361         unsigned con_set = tcg_target_op_def(op);
2362         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2363         tdefs = &constraint_sets[con_set];
2364 
2365         for (i = 0; i < nb_args; i++) {
2366             const char *ct_str = tdefs->args_ct_str[i];
2367             bool input_p = i >= def->nb_oargs;
2368 
2369             /* Incomplete TCGTargetOpDef entry. */
2370             tcg_debug_assert(ct_str != NULL);
2371 
2372             switch (*ct_str) {
2373             case '0' ... '9':
2374                 o = *ct_str - '0';
2375                 tcg_debug_assert(input_p);
2376                 tcg_debug_assert(o < def->nb_oargs);
2377                 tcg_debug_assert(def->args_ct[o].regs != 0);
2378                 tcg_debug_assert(!def->args_ct[o].oalias);
2379                 def->args_ct[i] = def->args_ct[o];
2380                 /* The output sets oalias.  */
2381                 def->args_ct[o].oalias = 1;
2382                 def->args_ct[o].alias_index = i;
2383                 /* The input sets ialias. */
2384                 def->args_ct[i].ialias = 1;
2385                 def->args_ct[i].alias_index = o;
2386                 if (def->args_ct[i].pair) {
2387                     saw_alias_pair = true;
2388                 }
2389                 tcg_debug_assert(ct_str[1] == '\0');
2390                 continue;
2391 
2392             case '&':
2393                 tcg_debug_assert(!input_p);
2394                 def->args_ct[i].newreg = true;
2395                 ct_str++;
2396                 break;
2397 
2398             case 'p': /* plus */
2399                 /* Allocate to the register after the previous. */
2400                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2401                 o = i - 1;
2402                 tcg_debug_assert(!def->args_ct[o].pair);
2403                 tcg_debug_assert(!def->args_ct[o].ct);
2404                 def->args_ct[i] = (TCGArgConstraint){
2405                     .pair = 2,
2406                     .pair_index = o,
2407                     .regs = def->args_ct[o].regs << 1,
2408                 };
2409                 def->args_ct[o].pair = 1;
2410                 def->args_ct[o].pair_index = i;
2411                 tcg_debug_assert(ct_str[1] == '\0');
2412                 continue;
2413 
2414             case 'm': /* minus */
2415                 /* Allocate to the register before the previous. */
2416                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2417                 o = i - 1;
2418                 tcg_debug_assert(!def->args_ct[o].pair);
2419                 tcg_debug_assert(!def->args_ct[o].ct);
2420                 def->args_ct[i] = (TCGArgConstraint){
2421                     .pair = 1,
2422                     .pair_index = o,
2423                     .regs = def->args_ct[o].regs >> 1,
2424                 };
2425                 def->args_ct[o].pair = 2;
2426                 def->args_ct[o].pair_index = i;
2427                 tcg_debug_assert(ct_str[1] == '\0');
2428                 continue;
2429             }
2430 
2431             do {
2432                 switch (*ct_str) {
2433                 case 'i':
2434                     def->args_ct[i].ct |= TCG_CT_CONST;
2435                     break;
2436 
2437                 /* Include all of the target-specific constraints. */
2438 
2439 #undef CONST
2440 #define CONST(CASE, MASK) \
2441     case CASE: def->args_ct[i].ct |= MASK; break;
2442 #define REGS(CASE, MASK) \
2443     case CASE: def->args_ct[i].regs |= MASK; break;
2444 
2445 #include "tcg-target-con-str.h"
2446 
2447 #undef REGS
2448 #undef CONST
2449                 default:
2450                 case '0' ... '9':
2451                 case '&':
2452                 case 'p':
2453                 case 'm':
2454                     /* Typo in TCGTargetOpDef constraint. */
2455                     g_assert_not_reached();
2456                 }
2457             } while (*++ct_str != '\0');
2458         }
2459 
2460         /* TCGTargetOpDef entry with too much information? */
2461         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2462 
2463         /*
2464          * Fix up output pairs that are aliased with inputs.
2465          * When we created the alias, we copied pair from the output.
2466          * There are three cases:
2467          *    (1a) Pairs of inputs alias pairs of outputs.
2468          *    (1b) One input aliases the first of a pair of outputs.
2469          *    (2)  One input aliases the second of a pair of outputs.
2470          *
2471          * Case 1a is handled by making sure that the pair_index'es are
2472          * properly updated so that they appear the same as a pair of inputs.
2473          *
2474          * Case 1b is handled by setting the pair_index of the input to
2475          * itself, simply so it doesn't point to an unrelated argument.
2476          * Since we don't encounter the "second" during the input allocation
2477          * phase, nothing happens with the second half of the input pair.
2478          *
2479          * Case 2 is handled by setting the second input to pair=3, the
2480          * first output to pair=3, and the pair_index'es to match.
2481          */
2482         if (saw_alias_pair) {
2483             for (i = def->nb_oargs; i < nb_args; i++) {
2484                 /*
2485                  * Since [0-9pm] must be alone in the constraint string,
2486                  * the only way they can both be set is if the pair comes
2487                  * from the output alias.
2488                  */
2489                 if (!def->args_ct[i].ialias) {
2490                     continue;
2491                 }
2492                 switch (def->args_ct[i].pair) {
2493                 case 0:
2494                     break;
2495                 case 1:
2496                     o = def->args_ct[i].alias_index;
2497                     o2 = def->args_ct[o].pair_index;
2498                     tcg_debug_assert(def->args_ct[o].pair == 1);
2499                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2500                     if (def->args_ct[o2].oalias) {
2501                         /* Case 1a */
2502                         i2 = def->args_ct[o2].alias_index;
2503                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2504                         def->args_ct[i2].pair_index = i;
2505                         def->args_ct[i].pair_index = i2;
2506                     } else {
2507                         /* Case 1b */
2508                         def->args_ct[i].pair_index = i;
2509                     }
2510                     break;
2511                 case 2:
2512                     o = def->args_ct[i].alias_index;
2513                     o2 = def->args_ct[o].pair_index;
2514                     tcg_debug_assert(def->args_ct[o].pair == 2);
2515                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2516                     if (def->args_ct[o2].oalias) {
2517                         /* Case 1a */
2518                         i2 = def->args_ct[o2].alias_index;
2519                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2520                         def->args_ct[i2].pair_index = i;
2521                         def->args_ct[i].pair_index = i2;
2522                     } else {
2523                         /* Case 2 */
2524                         def->args_ct[i].pair = 3;
2525                         def->args_ct[o2].pair = 3;
2526                         def->args_ct[i].pair_index = o2;
2527                         def->args_ct[o2].pair_index = i;
2528                     }
2529                     break;
2530                 default:
2531                     g_assert_not_reached();
2532                 }
2533             }
2534         }
2535 
2536         /* sort the constraints (XXX: this is just an heuristic) */
2537         sort_constraints(def, 0, def->nb_oargs);
2538         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2539     }
2540 }
2541 
2542 void tcg_op_remove(TCGContext *s, TCGOp *op)
2543 {
2544     TCGLabel *label;
2545 
2546     switch (op->opc) {
2547     case INDEX_op_br:
2548         label = arg_label(op->args[0]);
2549         label->refs--;
2550         break;
2551     case INDEX_op_brcond_i32:
2552     case INDEX_op_brcond_i64:
2553         label = arg_label(op->args[3]);
2554         label->refs--;
2555         break;
2556     case INDEX_op_brcond2_i32:
2557         label = arg_label(op->args[5]);
2558         label->refs--;
2559         break;
2560     default:
2561         break;
2562     }
2563 
2564     QTAILQ_REMOVE(&s->ops, op, link);
2565     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2566     s->nb_ops--;
2567 
2568 #ifdef CONFIG_PROFILER
2569     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2570 #endif
2571 }
2572 
2573 void tcg_remove_ops_after(TCGOp *op)
2574 {
2575     TCGContext *s = tcg_ctx;
2576 
2577     while (true) {
2578         TCGOp *last = tcg_last_op();
2579         if (last == op) {
2580             return;
2581         }
2582         tcg_op_remove(s, last);
2583     }
2584 }
2585 
2586 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2587 {
2588     TCGContext *s = tcg_ctx;
2589     TCGOp *op = NULL;
2590 
2591     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2592         QTAILQ_FOREACH(op, &s->free_ops, link) {
2593             if (nargs <= op->nargs) {
2594                 QTAILQ_REMOVE(&s->free_ops, op, link);
2595                 nargs = op->nargs;
2596                 goto found;
2597             }
2598         }
2599     }
2600 
2601     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2602     nargs = MAX(4, nargs);
2603     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2604 
2605  found:
2606     memset(op, 0, offsetof(TCGOp, link));
2607     op->opc = opc;
2608     op->nargs = nargs;
2609 
2610     /* Check for bitfield overflow. */
2611     tcg_debug_assert(op->nargs == nargs);
2612 
2613     s->nb_ops++;
2614     return op;
2615 }
2616 
2617 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2618 {
2619     TCGOp *op = tcg_op_alloc(opc, nargs);
2620     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2621     return op;
2622 }
2623 
2624 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2625                             TCGOpcode opc, unsigned nargs)
2626 {
2627     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2628     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2629     return new_op;
2630 }
2631 
2632 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2633                            TCGOpcode opc, unsigned nargs)
2634 {
2635     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2636     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2637     return new_op;
2638 }
2639 
2640 /* Reachable analysis : remove unreachable code.  */
2641 static void reachable_code_pass(TCGContext *s)
2642 {
2643     TCGOp *op, *op_next;
2644     bool dead = false;
2645 
2646     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2647         bool remove = dead;
2648         TCGLabel *label;
2649 
2650         switch (op->opc) {
2651         case INDEX_op_set_label:
2652             label = arg_label(op->args[0]);
2653             if (label->refs == 0) {
2654                 /*
2655                  * While there is an occasional backward branch, virtually
2656                  * all branches generated by the translators are forward.
2657                  * Which means that generally we will have already removed
2658                  * all references to the label that will be, and there is
2659                  * little to be gained by iterating.
2660                  */
2661                 remove = true;
2662             } else {
2663                 /* Once we see a label, insns become live again.  */
2664                 dead = false;
2665                 remove = false;
2666 
2667                 /*
2668                  * Optimization can fold conditional branches to unconditional.
2669                  * If we find a label with one reference which is preceded by
2670                  * an unconditional branch to it, remove both.  This needed to
2671                  * wait until the dead code in between them was removed.
2672                  */
2673                 if (label->refs == 1) {
2674                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2675                     if (op_prev->opc == INDEX_op_br &&
2676                         label == arg_label(op_prev->args[0])) {
2677                         tcg_op_remove(s, op_prev);
2678                         remove = true;
2679                     }
2680                 }
2681             }
2682             break;
2683 
2684         case INDEX_op_br:
2685         case INDEX_op_exit_tb:
2686         case INDEX_op_goto_ptr:
2687             /* Unconditional branches; everything following is dead.  */
2688             dead = true;
2689             break;
2690 
2691         case INDEX_op_call:
2692             /* Notice noreturn helper calls, raising exceptions.  */
2693             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2694                 dead = true;
2695             }
2696             break;
2697 
2698         case INDEX_op_insn_start:
2699             /* Never remove -- we need to keep these for unwind.  */
2700             remove = false;
2701             break;
2702 
2703         default:
2704             break;
2705         }
2706 
2707         if (remove) {
2708             tcg_op_remove(s, op);
2709         }
2710     }
2711 }
2712 
2713 #define TS_DEAD  1
2714 #define TS_MEM   2
2715 
2716 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2717 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2718 
2719 /* For liveness_pass_1, the register preferences for a given temp.  */
2720 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2721 {
2722     return ts->state_ptr;
2723 }
2724 
2725 /* For liveness_pass_1, reset the preferences for a given temp to the
2726  * maximal regset for its type.
2727  */
2728 static inline void la_reset_pref(TCGTemp *ts)
2729 {
2730     *la_temp_pref(ts)
2731         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2732 }
2733 
2734 /* liveness analysis: end of function: all temps are dead, and globals
2735    should be in memory. */
2736 static void la_func_end(TCGContext *s, int ng, int nt)
2737 {
2738     int i;
2739 
2740     for (i = 0; i < ng; ++i) {
2741         s->temps[i].state = TS_DEAD | TS_MEM;
2742         la_reset_pref(&s->temps[i]);
2743     }
2744     for (i = ng; i < nt; ++i) {
2745         s->temps[i].state = TS_DEAD;
2746         la_reset_pref(&s->temps[i]);
2747     }
2748 }
2749 
2750 /* liveness analysis: end of basic block: all temps are dead, globals
2751    and local temps should be in memory. */
2752 static void la_bb_end(TCGContext *s, int ng, int nt)
2753 {
2754     int i;
2755 
2756     for (i = 0; i < nt; ++i) {
2757         TCGTemp *ts = &s->temps[i];
2758         int state;
2759 
2760         switch (ts->kind) {
2761         case TEMP_FIXED:
2762         case TEMP_GLOBAL:
2763         case TEMP_LOCAL:
2764             state = TS_DEAD | TS_MEM;
2765             break;
2766         case TEMP_NORMAL:
2767         case TEMP_EBB:
2768         case TEMP_CONST:
2769             state = TS_DEAD;
2770             break;
2771         default:
2772             g_assert_not_reached();
2773         }
2774         ts->state = state;
2775         la_reset_pref(ts);
2776     }
2777 }
2778 
2779 /* liveness analysis: sync globals back to memory.  */
2780 static void la_global_sync(TCGContext *s, int ng)
2781 {
2782     int i;
2783 
2784     for (i = 0; i < ng; ++i) {
2785         int state = s->temps[i].state;
2786         s->temps[i].state = state | TS_MEM;
2787         if (state == TS_DEAD) {
2788             /* If the global was previously dead, reset prefs.  */
2789             la_reset_pref(&s->temps[i]);
2790         }
2791     }
2792 }
2793 
2794 /*
2795  * liveness analysis: conditional branch: all temps are dead unless
2796  * explicitly live-across-conditional-branch, globals and local temps
2797  * should be synced.
2798  */
2799 static void la_bb_sync(TCGContext *s, int ng, int nt)
2800 {
2801     la_global_sync(s, ng);
2802 
2803     for (int i = ng; i < nt; ++i) {
2804         TCGTemp *ts = &s->temps[i];
2805         int state;
2806 
2807         switch (ts->kind) {
2808         case TEMP_LOCAL:
2809             state = ts->state;
2810             ts->state = state | TS_MEM;
2811             if (state != TS_DEAD) {
2812                 continue;
2813             }
2814             break;
2815         case TEMP_NORMAL:
2816             s->temps[i].state = TS_DEAD;
2817             break;
2818         case TEMP_EBB:
2819         case TEMP_CONST:
2820             continue;
2821         default:
2822             g_assert_not_reached();
2823         }
2824         la_reset_pref(&s->temps[i]);
2825     }
2826 }
2827 
2828 /* liveness analysis: sync globals back to memory and kill.  */
2829 static void la_global_kill(TCGContext *s, int ng)
2830 {
2831     int i;
2832 
2833     for (i = 0; i < ng; i++) {
2834         s->temps[i].state = TS_DEAD | TS_MEM;
2835         la_reset_pref(&s->temps[i]);
2836     }
2837 }
2838 
2839 /* liveness analysis: note live globals crossing calls.  */
2840 static void la_cross_call(TCGContext *s, int nt)
2841 {
2842     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2843     int i;
2844 
2845     for (i = 0; i < nt; i++) {
2846         TCGTemp *ts = &s->temps[i];
2847         if (!(ts->state & TS_DEAD)) {
2848             TCGRegSet *pset = la_temp_pref(ts);
2849             TCGRegSet set = *pset;
2850 
2851             set &= mask;
2852             /* If the combination is not possible, restart.  */
2853             if (set == 0) {
2854                 set = tcg_target_available_regs[ts->type] & mask;
2855             }
2856             *pset = set;
2857         }
2858     }
2859 }
2860 
2861 /* Liveness analysis : update the opc_arg_life array to tell if a
2862    given input arguments is dead. Instructions updating dead
2863    temporaries are removed. */
2864 static void liveness_pass_1(TCGContext *s)
2865 {
2866     int nb_globals = s->nb_globals;
2867     int nb_temps = s->nb_temps;
2868     TCGOp *op, *op_prev;
2869     TCGRegSet *prefs;
2870     int i;
2871 
2872     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2873     for (i = 0; i < nb_temps; ++i) {
2874         s->temps[i].state_ptr = prefs + i;
2875     }
2876 
2877     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2878     la_func_end(s, nb_globals, nb_temps);
2879 
2880     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2881         int nb_iargs, nb_oargs;
2882         TCGOpcode opc_new, opc_new2;
2883         bool have_opc_new2;
2884         TCGLifeData arg_life = 0;
2885         TCGTemp *ts;
2886         TCGOpcode opc = op->opc;
2887         const TCGOpDef *def = &tcg_op_defs[opc];
2888 
2889         switch (opc) {
2890         case INDEX_op_call:
2891             {
2892                 const TCGHelperInfo *info = tcg_call_info(op);
2893                 int call_flags = tcg_call_flags(op);
2894 
2895                 nb_oargs = TCGOP_CALLO(op);
2896                 nb_iargs = TCGOP_CALLI(op);
2897 
2898                 /* pure functions can be removed if their result is unused */
2899                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2900                     for (i = 0; i < nb_oargs; i++) {
2901                         ts = arg_temp(op->args[i]);
2902                         if (ts->state != TS_DEAD) {
2903                             goto do_not_remove_call;
2904                         }
2905                     }
2906                     goto do_remove;
2907                 }
2908             do_not_remove_call:
2909 
2910                 /* Output args are dead.  */
2911                 for (i = 0; i < nb_oargs; i++) {
2912                     ts = arg_temp(op->args[i]);
2913                     if (ts->state & TS_DEAD) {
2914                         arg_life |= DEAD_ARG << i;
2915                     }
2916                     if (ts->state & TS_MEM) {
2917                         arg_life |= SYNC_ARG << i;
2918                     }
2919                     ts->state = TS_DEAD;
2920                     la_reset_pref(ts);
2921                 }
2922 
2923                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
2924                 memset(op->output_pref, 0, sizeof(op->output_pref));
2925 
2926                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2927                                     TCG_CALL_NO_READ_GLOBALS))) {
2928                     la_global_kill(s, nb_globals);
2929                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2930                     la_global_sync(s, nb_globals);
2931                 }
2932 
2933                 /* Record arguments that die in this helper.  */
2934                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2935                     ts = arg_temp(op->args[i]);
2936                     if (ts->state & TS_DEAD) {
2937                         arg_life |= DEAD_ARG << i;
2938                     }
2939                 }
2940 
2941                 /* For all live registers, remove call-clobbered prefs.  */
2942                 la_cross_call(s, nb_temps);
2943 
2944                 /*
2945                  * Input arguments are live for preceding opcodes.
2946                  *
2947                  * For those arguments that die, and will be allocated in
2948                  * registers, clear the register set for that arg, to be
2949                  * filled in below.  For args that will be on the stack,
2950                  * reset to any available reg.  Process arguments in reverse
2951                  * order so that if a temp is used more than once, the stack
2952                  * reset to max happens before the register reset to 0.
2953                  */
2954                 for (i = nb_iargs - 1; i >= 0; i--) {
2955                     const TCGCallArgumentLoc *loc = &info->in[i];
2956                     ts = arg_temp(op->args[nb_oargs + i]);
2957 
2958                     if (ts->state & TS_DEAD) {
2959                         switch (loc->kind) {
2960                         case TCG_CALL_ARG_NORMAL:
2961                         case TCG_CALL_ARG_EXTEND_U:
2962                         case TCG_CALL_ARG_EXTEND_S:
2963                             if (REG_P(loc)) {
2964                                 *la_temp_pref(ts) = 0;
2965                                 break;
2966                             }
2967                             /* fall through */
2968                         default:
2969                             *la_temp_pref(ts) =
2970                                 tcg_target_available_regs[ts->type];
2971                             break;
2972                         }
2973                         ts->state &= ~TS_DEAD;
2974                     }
2975                 }
2976 
2977                 /*
2978                  * For each input argument, add its input register to prefs.
2979                  * If a temp is used once, this produces a single set bit;
2980                  * if a temp is used multiple times, this produces a set.
2981                  */
2982                 for (i = 0; i < nb_iargs; i++) {
2983                     const TCGCallArgumentLoc *loc = &info->in[i];
2984                     ts = arg_temp(op->args[nb_oargs + i]);
2985 
2986                     switch (loc->kind) {
2987                     case TCG_CALL_ARG_NORMAL:
2988                     case TCG_CALL_ARG_EXTEND_U:
2989                     case TCG_CALL_ARG_EXTEND_S:
2990                         if (REG_P(loc)) {
2991                             tcg_regset_set_reg(*la_temp_pref(ts),
2992                                 tcg_target_call_iarg_regs[loc->arg_slot]);
2993                         }
2994                         break;
2995                     default:
2996                         break;
2997                     }
2998                 }
2999             }
3000             break;
3001         case INDEX_op_insn_start:
3002             break;
3003         case INDEX_op_discard:
3004             /* mark the temporary as dead */
3005             ts = arg_temp(op->args[0]);
3006             ts->state = TS_DEAD;
3007             la_reset_pref(ts);
3008             break;
3009 
3010         case INDEX_op_add2_i32:
3011             opc_new = INDEX_op_add_i32;
3012             goto do_addsub2;
3013         case INDEX_op_sub2_i32:
3014             opc_new = INDEX_op_sub_i32;
3015             goto do_addsub2;
3016         case INDEX_op_add2_i64:
3017             opc_new = INDEX_op_add_i64;
3018             goto do_addsub2;
3019         case INDEX_op_sub2_i64:
3020             opc_new = INDEX_op_sub_i64;
3021         do_addsub2:
3022             nb_iargs = 4;
3023             nb_oargs = 2;
3024             /* Test if the high part of the operation is dead, but not
3025                the low part.  The result can be optimized to a simple
3026                add or sub.  This happens often for x86_64 guest when the
3027                cpu mode is set to 32 bit.  */
3028             if (arg_temp(op->args[1])->state == TS_DEAD) {
3029                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3030                     goto do_remove;
3031                 }
3032                 /* Replace the opcode and adjust the args in place,
3033                    leaving 3 unused args at the end.  */
3034                 op->opc = opc = opc_new;
3035                 op->args[1] = op->args[2];
3036                 op->args[2] = op->args[4];
3037                 /* Fall through and mark the single-word operation live.  */
3038                 nb_iargs = 2;
3039                 nb_oargs = 1;
3040             }
3041             goto do_not_remove;
3042 
3043         case INDEX_op_mulu2_i32:
3044             opc_new = INDEX_op_mul_i32;
3045             opc_new2 = INDEX_op_muluh_i32;
3046             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3047             goto do_mul2;
3048         case INDEX_op_muls2_i32:
3049             opc_new = INDEX_op_mul_i32;
3050             opc_new2 = INDEX_op_mulsh_i32;
3051             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3052             goto do_mul2;
3053         case INDEX_op_mulu2_i64:
3054             opc_new = INDEX_op_mul_i64;
3055             opc_new2 = INDEX_op_muluh_i64;
3056             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3057             goto do_mul2;
3058         case INDEX_op_muls2_i64:
3059             opc_new = INDEX_op_mul_i64;
3060             opc_new2 = INDEX_op_mulsh_i64;
3061             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3062             goto do_mul2;
3063         do_mul2:
3064             nb_iargs = 2;
3065             nb_oargs = 2;
3066             if (arg_temp(op->args[1])->state == TS_DEAD) {
3067                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3068                     /* Both parts of the operation are dead.  */
3069                     goto do_remove;
3070                 }
3071                 /* The high part of the operation is dead; generate the low. */
3072                 op->opc = opc = opc_new;
3073                 op->args[1] = op->args[2];
3074                 op->args[2] = op->args[3];
3075             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3076                 /* The low part of the operation is dead; generate the high. */
3077                 op->opc = opc = opc_new2;
3078                 op->args[0] = op->args[1];
3079                 op->args[1] = op->args[2];
3080                 op->args[2] = op->args[3];
3081             } else {
3082                 goto do_not_remove;
3083             }
3084             /* Mark the single-word operation live.  */
3085             nb_oargs = 1;
3086             goto do_not_remove;
3087 
3088         default:
3089             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3090             nb_iargs = def->nb_iargs;
3091             nb_oargs = def->nb_oargs;
3092 
3093             /* Test if the operation can be removed because all
3094                its outputs are dead. We assume that nb_oargs == 0
3095                implies side effects */
3096             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3097                 for (i = 0; i < nb_oargs; i++) {
3098                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3099                         goto do_not_remove;
3100                     }
3101                 }
3102                 goto do_remove;
3103             }
3104             goto do_not_remove;
3105 
3106         do_remove:
3107             tcg_op_remove(s, op);
3108             break;
3109 
3110         do_not_remove:
3111             for (i = 0; i < nb_oargs; i++) {
3112                 ts = arg_temp(op->args[i]);
3113 
3114                 /* Remember the preference of the uses that followed.  */
3115                 if (i < ARRAY_SIZE(op->output_pref)) {
3116                     op->output_pref[i] = *la_temp_pref(ts);
3117                 }
3118 
3119                 /* Output args are dead.  */
3120                 if (ts->state & TS_DEAD) {
3121                     arg_life |= DEAD_ARG << i;
3122                 }
3123                 if (ts->state & TS_MEM) {
3124                     arg_life |= SYNC_ARG << i;
3125                 }
3126                 ts->state = TS_DEAD;
3127                 la_reset_pref(ts);
3128             }
3129 
3130             /* If end of basic block, update.  */
3131             if (def->flags & TCG_OPF_BB_EXIT) {
3132                 la_func_end(s, nb_globals, nb_temps);
3133             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3134                 la_bb_sync(s, nb_globals, nb_temps);
3135             } else if (def->flags & TCG_OPF_BB_END) {
3136                 la_bb_end(s, nb_globals, nb_temps);
3137             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3138                 la_global_sync(s, nb_globals);
3139                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3140                     la_cross_call(s, nb_temps);
3141                 }
3142             }
3143 
3144             /* Record arguments that die in this opcode.  */
3145             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3146                 ts = arg_temp(op->args[i]);
3147                 if (ts->state & TS_DEAD) {
3148                     arg_life |= DEAD_ARG << i;
3149                 }
3150             }
3151 
3152             /* Input arguments are live for preceding opcodes.  */
3153             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3154                 ts = arg_temp(op->args[i]);
3155                 if (ts->state & TS_DEAD) {
3156                     /* For operands that were dead, initially allow
3157                        all regs for the type.  */
3158                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3159                     ts->state &= ~TS_DEAD;
3160                 }
3161             }
3162 
3163             /* Incorporate constraints for this operand.  */
3164             switch (opc) {
3165             case INDEX_op_mov_i32:
3166             case INDEX_op_mov_i64:
3167                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3168                    have proper constraints.  That said, special case
3169                    moves to propagate preferences backward.  */
3170                 if (IS_DEAD_ARG(1)) {
3171                     *la_temp_pref(arg_temp(op->args[0]))
3172                         = *la_temp_pref(arg_temp(op->args[1]));
3173                 }
3174                 break;
3175 
3176             default:
3177                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3178                     const TCGArgConstraint *ct = &def->args_ct[i];
3179                     TCGRegSet set, *pset;
3180 
3181                     ts = arg_temp(op->args[i]);
3182                     pset = la_temp_pref(ts);
3183                     set = *pset;
3184 
3185                     set &= ct->regs;
3186                     if (ct->ialias) {
3187                         set &= output_pref(op, ct->alias_index);
3188                     }
3189                     /* If the combination is not possible, restart.  */
3190                     if (set == 0) {
3191                         set = ct->regs;
3192                     }
3193                     *pset = set;
3194                 }
3195                 break;
3196             }
3197             break;
3198         }
3199         op->life = arg_life;
3200     }
3201 }
3202 
3203 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3204 static bool liveness_pass_2(TCGContext *s)
3205 {
3206     int nb_globals = s->nb_globals;
3207     int nb_temps, i;
3208     bool changes = false;
3209     TCGOp *op, *op_next;
3210 
3211     /* Create a temporary for each indirect global.  */
3212     for (i = 0; i < nb_globals; ++i) {
3213         TCGTemp *its = &s->temps[i];
3214         if (its->indirect_reg) {
3215             TCGTemp *dts = tcg_temp_alloc(s);
3216             dts->type = its->type;
3217             dts->base_type = its->base_type;
3218             dts->temp_subindex = its->temp_subindex;
3219             dts->kind = TEMP_EBB;
3220             its->state_ptr = dts;
3221         } else {
3222             its->state_ptr = NULL;
3223         }
3224         /* All globals begin dead.  */
3225         its->state = TS_DEAD;
3226     }
3227     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3228         TCGTemp *its = &s->temps[i];
3229         its->state_ptr = NULL;
3230         its->state = TS_DEAD;
3231     }
3232 
3233     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3234         TCGOpcode opc = op->opc;
3235         const TCGOpDef *def = &tcg_op_defs[opc];
3236         TCGLifeData arg_life = op->life;
3237         int nb_iargs, nb_oargs, call_flags;
3238         TCGTemp *arg_ts, *dir_ts;
3239 
3240         if (opc == INDEX_op_call) {
3241             nb_oargs = TCGOP_CALLO(op);
3242             nb_iargs = TCGOP_CALLI(op);
3243             call_flags = tcg_call_flags(op);
3244         } else {
3245             nb_iargs = def->nb_iargs;
3246             nb_oargs = def->nb_oargs;
3247 
3248             /* Set flags similar to how calls require.  */
3249             if (def->flags & TCG_OPF_COND_BRANCH) {
3250                 /* Like reading globals: sync_globals */
3251                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3252             } else if (def->flags & TCG_OPF_BB_END) {
3253                 /* Like writing globals: save_globals */
3254                 call_flags = 0;
3255             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3256                 /* Like reading globals: sync_globals */
3257                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3258             } else {
3259                 /* No effect on globals.  */
3260                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3261                               TCG_CALL_NO_WRITE_GLOBALS);
3262             }
3263         }
3264 
3265         /* Make sure that input arguments are available.  */
3266         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3267             arg_ts = arg_temp(op->args[i]);
3268             dir_ts = arg_ts->state_ptr;
3269             if (dir_ts && arg_ts->state == TS_DEAD) {
3270                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3271                                   ? INDEX_op_ld_i32
3272                                   : INDEX_op_ld_i64);
3273                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3274 
3275                 lop->args[0] = temp_arg(dir_ts);
3276                 lop->args[1] = temp_arg(arg_ts->mem_base);
3277                 lop->args[2] = arg_ts->mem_offset;
3278 
3279                 /* Loaded, but synced with memory.  */
3280                 arg_ts->state = TS_MEM;
3281             }
3282         }
3283 
3284         /* Perform input replacement, and mark inputs that became dead.
3285            No action is required except keeping temp_state up to date
3286            so that we reload when needed.  */
3287         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3288             arg_ts = arg_temp(op->args[i]);
3289             dir_ts = arg_ts->state_ptr;
3290             if (dir_ts) {
3291                 op->args[i] = temp_arg(dir_ts);
3292                 changes = true;
3293                 if (IS_DEAD_ARG(i)) {
3294                     arg_ts->state = TS_DEAD;
3295                 }
3296             }
3297         }
3298 
3299         /* Liveness analysis should ensure that the following are
3300            all correct, for call sites and basic block end points.  */
3301         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3302             /* Nothing to do */
3303         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3304             for (i = 0; i < nb_globals; ++i) {
3305                 /* Liveness should see that globals are synced back,
3306                    that is, either TS_DEAD or TS_MEM.  */
3307                 arg_ts = &s->temps[i];
3308                 tcg_debug_assert(arg_ts->state_ptr == 0
3309                                  || arg_ts->state != 0);
3310             }
3311         } else {
3312             for (i = 0; i < nb_globals; ++i) {
3313                 /* Liveness should see that globals are saved back,
3314                    that is, TS_DEAD, waiting to be reloaded.  */
3315                 arg_ts = &s->temps[i];
3316                 tcg_debug_assert(arg_ts->state_ptr == 0
3317                                  || arg_ts->state == TS_DEAD);
3318             }
3319         }
3320 
3321         /* Outputs become available.  */
3322         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3323             arg_ts = arg_temp(op->args[0]);
3324             dir_ts = arg_ts->state_ptr;
3325             if (dir_ts) {
3326                 op->args[0] = temp_arg(dir_ts);
3327                 changes = true;
3328 
3329                 /* The output is now live and modified.  */
3330                 arg_ts->state = 0;
3331 
3332                 if (NEED_SYNC_ARG(0)) {
3333                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3334                                       ? INDEX_op_st_i32
3335                                       : INDEX_op_st_i64);
3336                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3337                     TCGTemp *out_ts = dir_ts;
3338 
3339                     if (IS_DEAD_ARG(0)) {
3340                         out_ts = arg_temp(op->args[1]);
3341                         arg_ts->state = TS_DEAD;
3342                         tcg_op_remove(s, op);
3343                     } else {
3344                         arg_ts->state = TS_MEM;
3345                     }
3346 
3347                     sop->args[0] = temp_arg(out_ts);
3348                     sop->args[1] = temp_arg(arg_ts->mem_base);
3349                     sop->args[2] = arg_ts->mem_offset;
3350                 } else {
3351                     tcg_debug_assert(!IS_DEAD_ARG(0));
3352                 }
3353             }
3354         } else {
3355             for (i = 0; i < nb_oargs; i++) {
3356                 arg_ts = arg_temp(op->args[i]);
3357                 dir_ts = arg_ts->state_ptr;
3358                 if (!dir_ts) {
3359                     continue;
3360                 }
3361                 op->args[i] = temp_arg(dir_ts);
3362                 changes = true;
3363 
3364                 /* The output is now live and modified.  */
3365                 arg_ts->state = 0;
3366 
3367                 /* Sync outputs upon their last write.  */
3368                 if (NEED_SYNC_ARG(i)) {
3369                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3370                                       ? INDEX_op_st_i32
3371                                       : INDEX_op_st_i64);
3372                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3373 
3374                     sop->args[0] = temp_arg(dir_ts);
3375                     sop->args[1] = temp_arg(arg_ts->mem_base);
3376                     sop->args[2] = arg_ts->mem_offset;
3377 
3378                     arg_ts->state = TS_MEM;
3379                 }
3380                 /* Drop outputs that are dead.  */
3381                 if (IS_DEAD_ARG(i)) {
3382                     arg_ts->state = TS_DEAD;
3383                 }
3384             }
3385         }
3386     }
3387 
3388     return changes;
3389 }
3390 
3391 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3392 {
3393     intptr_t off;
3394     int size, align;
3395 
3396     /* When allocating an object, look at the full type. */
3397     size = tcg_type_size(ts->base_type);
3398     switch (ts->base_type) {
3399     case TCG_TYPE_I32:
3400         align = 4;
3401         break;
3402     case TCG_TYPE_I64:
3403     case TCG_TYPE_V64:
3404         align = 8;
3405         break;
3406     case TCG_TYPE_I128:
3407     case TCG_TYPE_V128:
3408     case TCG_TYPE_V256:
3409         /*
3410          * Note that we do not require aligned storage for V256,
3411          * and that we provide alignment for I128 to match V128,
3412          * even if that's above what the host ABI requires.
3413          */
3414         align = 16;
3415         break;
3416     default:
3417         g_assert_not_reached();
3418     }
3419 
3420     /*
3421      * Assume the stack is sufficiently aligned.
3422      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3423      * and do not require 16 byte vector alignment.  This seems slightly
3424      * easier than fully parameterizing the above switch statement.
3425      */
3426     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3427     off = ROUND_UP(s->current_frame_offset, align);
3428 
3429     /* If we've exhausted the stack frame, restart with a smaller TB. */
3430     if (off + size > s->frame_end) {
3431         tcg_raise_tb_overflow(s);
3432     }
3433     s->current_frame_offset = off + size;
3434 #if defined(__sparc__)
3435     off += TCG_TARGET_STACK_BIAS;
3436 #endif
3437 
3438     /* If the object was subdivided, assign memory to all the parts. */
3439     if (ts->base_type != ts->type) {
3440         int part_size = tcg_type_size(ts->type);
3441         int part_count = size / part_size;
3442 
3443         /*
3444          * Each part is allocated sequentially in tcg_temp_new_internal.
3445          * Jump back to the first part by subtracting the current index.
3446          */
3447         ts -= ts->temp_subindex;
3448         for (int i = 0; i < part_count; ++i) {
3449             ts[i].mem_offset = off + i * part_size;
3450             ts[i].mem_base = s->frame_temp;
3451             ts[i].mem_allocated = 1;
3452         }
3453     } else {
3454         ts->mem_offset = off;
3455         ts->mem_base = s->frame_temp;
3456         ts->mem_allocated = 1;
3457     }
3458 }
3459 
3460 /* Assign @reg to @ts, and update reg_to_temp[]. */
3461 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3462 {
3463     if (ts->val_type == TEMP_VAL_REG) {
3464         TCGReg old = ts->reg;
3465         tcg_debug_assert(s->reg_to_temp[old] == ts);
3466         if (old == reg) {
3467             return;
3468         }
3469         s->reg_to_temp[old] = NULL;
3470     }
3471     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3472     s->reg_to_temp[reg] = ts;
3473     ts->val_type = TEMP_VAL_REG;
3474     ts->reg = reg;
3475 }
3476 
3477 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3478 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3479 {
3480     tcg_debug_assert(type != TEMP_VAL_REG);
3481     if (ts->val_type == TEMP_VAL_REG) {
3482         TCGReg reg = ts->reg;
3483         tcg_debug_assert(s->reg_to_temp[reg] == ts);
3484         s->reg_to_temp[reg] = NULL;
3485     }
3486     ts->val_type = type;
3487 }
3488 
3489 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3490 
3491 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3492    mark it free; otherwise mark it dead.  */
3493 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3494 {
3495     TCGTempVal new_type;
3496 
3497     switch (ts->kind) {
3498     case TEMP_FIXED:
3499         return;
3500     case TEMP_GLOBAL:
3501     case TEMP_LOCAL:
3502         new_type = TEMP_VAL_MEM;
3503         break;
3504     case TEMP_NORMAL:
3505     case TEMP_EBB:
3506         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3507         break;
3508     case TEMP_CONST:
3509         new_type = TEMP_VAL_CONST;
3510         break;
3511     default:
3512         g_assert_not_reached();
3513     }
3514     set_temp_val_nonreg(s, ts, new_type);
3515 }
3516 
3517 /* Mark a temporary as dead.  */
3518 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3519 {
3520     temp_free_or_dead(s, ts, 1);
3521 }
3522 
3523 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3524    registers needs to be allocated to store a constant.  If 'free_or_dead'
3525    is non-zero, subsequently release the temporary; if it is positive, the
3526    temp is dead; if it is negative, the temp is free.  */
3527 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3528                       TCGRegSet preferred_regs, int free_or_dead)
3529 {
3530     if (!temp_readonly(ts) && !ts->mem_coherent) {
3531         if (!ts->mem_allocated) {
3532             temp_allocate_frame(s, ts);
3533         }
3534         switch (ts->val_type) {
3535         case TEMP_VAL_CONST:
3536             /* If we're going to free the temp immediately, then we won't
3537                require it later in a register, so attempt to store the
3538                constant to memory directly.  */
3539             if (free_or_dead
3540                 && tcg_out_sti(s, ts->type, ts->val,
3541                                ts->mem_base->reg, ts->mem_offset)) {
3542                 break;
3543             }
3544             temp_load(s, ts, tcg_target_available_regs[ts->type],
3545                       allocated_regs, preferred_regs);
3546             /* fallthrough */
3547 
3548         case TEMP_VAL_REG:
3549             tcg_out_st(s, ts->type, ts->reg,
3550                        ts->mem_base->reg, ts->mem_offset);
3551             break;
3552 
3553         case TEMP_VAL_MEM:
3554             break;
3555 
3556         case TEMP_VAL_DEAD:
3557         default:
3558             tcg_abort();
3559         }
3560         ts->mem_coherent = 1;
3561     }
3562     if (free_or_dead) {
3563         temp_free_or_dead(s, ts, free_or_dead);
3564     }
3565 }
3566 
3567 /* free register 'reg' by spilling the corresponding temporary if necessary */
3568 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3569 {
3570     TCGTemp *ts = s->reg_to_temp[reg];
3571     if (ts != NULL) {
3572         temp_sync(s, ts, allocated_regs, 0, -1);
3573     }
3574 }
3575 
3576 /**
3577  * tcg_reg_alloc:
3578  * @required_regs: Set of registers in which we must allocate.
3579  * @allocated_regs: Set of registers which must be avoided.
3580  * @preferred_regs: Set of registers we should prefer.
3581  * @rev: True if we search the registers in "indirect" order.
3582  *
3583  * The allocated register must be in @required_regs & ~@allocated_regs,
3584  * but if we can put it in @preferred_regs we may save a move later.
3585  */
3586 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3587                             TCGRegSet allocated_regs,
3588                             TCGRegSet preferred_regs, bool rev)
3589 {
3590     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3591     TCGRegSet reg_ct[2];
3592     const int *order;
3593 
3594     reg_ct[1] = required_regs & ~allocated_regs;
3595     tcg_debug_assert(reg_ct[1] != 0);
3596     reg_ct[0] = reg_ct[1] & preferred_regs;
3597 
3598     /* Skip the preferred_regs option if it cannot be satisfied,
3599        or if the preference made no difference.  */
3600     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3601 
3602     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3603 
3604     /* Try free registers, preferences first.  */
3605     for (j = f; j < 2; j++) {
3606         TCGRegSet set = reg_ct[j];
3607 
3608         if (tcg_regset_single(set)) {
3609             /* One register in the set.  */
3610             TCGReg reg = tcg_regset_first(set);
3611             if (s->reg_to_temp[reg] == NULL) {
3612                 return reg;
3613             }
3614         } else {
3615             for (i = 0; i < n; i++) {
3616                 TCGReg reg = order[i];
3617                 if (s->reg_to_temp[reg] == NULL &&
3618                     tcg_regset_test_reg(set, reg)) {
3619                     return reg;
3620                 }
3621             }
3622         }
3623     }
3624 
3625     /* We must spill something.  */
3626     for (j = f; j < 2; j++) {
3627         TCGRegSet set = reg_ct[j];
3628 
3629         if (tcg_regset_single(set)) {
3630             /* One register in the set.  */
3631             TCGReg reg = tcg_regset_first(set);
3632             tcg_reg_free(s, reg, allocated_regs);
3633             return reg;
3634         } else {
3635             for (i = 0; i < n; i++) {
3636                 TCGReg reg = order[i];
3637                 if (tcg_regset_test_reg(set, reg)) {
3638                     tcg_reg_free(s, reg, allocated_regs);
3639                     return reg;
3640                 }
3641             }
3642         }
3643     }
3644 
3645     tcg_abort();
3646 }
3647 
3648 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
3649                                  TCGRegSet allocated_regs,
3650                                  TCGRegSet preferred_regs, bool rev)
3651 {
3652     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3653     TCGRegSet reg_ct[2];
3654     const int *order;
3655 
3656     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3657     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
3658     tcg_debug_assert(reg_ct[1] != 0);
3659     reg_ct[0] = reg_ct[1] & preferred_regs;
3660 
3661     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3662 
3663     /*
3664      * Skip the preferred_regs option if it cannot be satisfied,
3665      * or if the preference made no difference.
3666      */
3667     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3668 
3669     /*
3670      * Minimize the number of flushes by looking for 2 free registers first,
3671      * then a single flush, then two flushes.
3672      */
3673     for (fmin = 2; fmin >= 0; fmin--) {
3674         for (j = k; j < 2; j++) {
3675             TCGRegSet set = reg_ct[j];
3676 
3677             for (i = 0; i < n; i++) {
3678                 TCGReg reg = order[i];
3679 
3680                 if (tcg_regset_test_reg(set, reg)) {
3681                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
3682                     if (f >= fmin) {
3683                         tcg_reg_free(s, reg, allocated_regs);
3684                         tcg_reg_free(s, reg + 1, allocated_regs);
3685                         return reg;
3686                     }
3687                 }
3688             }
3689         }
3690     }
3691     tcg_abort();
3692 }
3693 
3694 /* Make sure the temporary is in a register.  If needed, allocate the register
3695    from DESIRED while avoiding ALLOCATED.  */
3696 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3697                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3698 {
3699     TCGReg reg;
3700 
3701     switch (ts->val_type) {
3702     case TEMP_VAL_REG:
3703         return;
3704     case TEMP_VAL_CONST:
3705         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3706                             preferred_regs, ts->indirect_base);
3707         if (ts->type <= TCG_TYPE_I64) {
3708             tcg_out_movi(s, ts->type, reg, ts->val);
3709         } else {
3710             uint64_t val = ts->val;
3711             MemOp vece = MO_64;
3712 
3713             /*
3714              * Find the minimal vector element that matches the constant.
3715              * The targets will, in general, have to do this search anyway,
3716              * do this generically.
3717              */
3718             if (val == dup_const(MO_8, val)) {
3719                 vece = MO_8;
3720             } else if (val == dup_const(MO_16, val)) {
3721                 vece = MO_16;
3722             } else if (val == dup_const(MO_32, val)) {
3723                 vece = MO_32;
3724             }
3725 
3726             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3727         }
3728         ts->mem_coherent = 0;
3729         break;
3730     case TEMP_VAL_MEM:
3731         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3732                             preferred_regs, ts->indirect_base);
3733         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3734         ts->mem_coherent = 1;
3735         break;
3736     case TEMP_VAL_DEAD:
3737     default:
3738         tcg_abort();
3739     }
3740     set_temp_val_reg(s, ts, reg);
3741 }
3742 
3743 /* Save a temporary to memory. 'allocated_regs' is used in case a
3744    temporary registers needs to be allocated to store a constant.  */
3745 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3746 {
3747     /* The liveness analysis already ensures that globals are back
3748        in memory. Keep an tcg_debug_assert for safety. */
3749     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3750 }
3751 
3752 /* save globals to their canonical location and assume they can be
3753    modified be the following code. 'allocated_regs' is used in case a
3754    temporary registers needs to be allocated to store a constant. */
3755 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3756 {
3757     int i, n;
3758 
3759     for (i = 0, n = s->nb_globals; i < n; i++) {
3760         temp_save(s, &s->temps[i], allocated_regs);
3761     }
3762 }
3763 
3764 /* sync globals to their canonical location and assume they can be
3765    read by the following code. 'allocated_regs' is used in case a
3766    temporary registers needs to be allocated to store a constant. */
3767 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3768 {
3769     int i, n;
3770 
3771     for (i = 0, n = s->nb_globals; i < n; i++) {
3772         TCGTemp *ts = &s->temps[i];
3773         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3774                          || ts->kind == TEMP_FIXED
3775                          || ts->mem_coherent);
3776     }
3777 }
3778 
3779 /* at the end of a basic block, we assume all temporaries are dead and
3780    all globals are stored at their canonical location. */
3781 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3782 {
3783     int i;
3784 
3785     for (i = s->nb_globals; i < s->nb_temps; i++) {
3786         TCGTemp *ts = &s->temps[i];
3787 
3788         switch (ts->kind) {
3789         case TEMP_LOCAL:
3790             temp_save(s, ts, allocated_regs);
3791             break;
3792         case TEMP_NORMAL:
3793         case TEMP_EBB:
3794             /* The liveness analysis already ensures that temps are dead.
3795                Keep an tcg_debug_assert for safety. */
3796             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3797             break;
3798         case TEMP_CONST:
3799             /* Similarly, we should have freed any allocated register. */
3800             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3801             break;
3802         default:
3803             g_assert_not_reached();
3804         }
3805     }
3806 
3807     save_globals(s, allocated_regs);
3808 }
3809 
3810 /*
3811  * At a conditional branch, we assume all temporaries are dead unless
3812  * explicitly live-across-conditional-branch; all globals and local
3813  * temps are synced to their location.
3814  */
3815 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3816 {
3817     sync_globals(s, allocated_regs);
3818 
3819     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3820         TCGTemp *ts = &s->temps[i];
3821         /*
3822          * The liveness analysis already ensures that temps are dead.
3823          * Keep tcg_debug_asserts for safety.
3824          */
3825         switch (ts->kind) {
3826         case TEMP_LOCAL:
3827             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3828             break;
3829         case TEMP_NORMAL:
3830             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3831             break;
3832         case TEMP_EBB:
3833         case TEMP_CONST:
3834             break;
3835         default:
3836             g_assert_not_reached();
3837         }
3838     }
3839 }
3840 
3841 /*
3842  * Specialized code generation for INDEX_op_mov_* with a constant.
3843  */
3844 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3845                                   tcg_target_ulong val, TCGLifeData arg_life,
3846                                   TCGRegSet preferred_regs)
3847 {
3848     /* ENV should not be modified.  */
3849     tcg_debug_assert(!temp_readonly(ots));
3850 
3851     /* The movi is not explicitly generated here.  */
3852     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
3853     ots->val = val;
3854     ots->mem_coherent = 0;
3855     if (NEED_SYNC_ARG(0)) {
3856         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3857     } else if (IS_DEAD_ARG(0)) {
3858         temp_dead(s, ots);
3859     }
3860 }
3861 
3862 /*
3863  * Specialized code generation for INDEX_op_mov_*.
3864  */
3865 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3866 {
3867     const TCGLifeData arg_life = op->life;
3868     TCGRegSet allocated_regs, preferred_regs;
3869     TCGTemp *ts, *ots;
3870     TCGType otype, itype;
3871     TCGReg oreg, ireg;
3872 
3873     allocated_regs = s->reserved_regs;
3874     preferred_regs = output_pref(op, 0);
3875     ots = arg_temp(op->args[0]);
3876     ts = arg_temp(op->args[1]);
3877 
3878     /* ENV should not be modified.  */
3879     tcg_debug_assert(!temp_readonly(ots));
3880 
3881     /* Note that otype != itype for no-op truncation.  */
3882     otype = ots->type;
3883     itype = ts->type;
3884 
3885     if (ts->val_type == TEMP_VAL_CONST) {
3886         /* propagate constant or generate sti */
3887         tcg_target_ulong val = ts->val;
3888         if (IS_DEAD_ARG(1)) {
3889             temp_dead(s, ts);
3890         }
3891         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3892         return;
3893     }
3894 
3895     /* If the source value is in memory we're going to be forced
3896        to have it in a register in order to perform the copy.  Copy
3897        the SOURCE value into its own register first, that way we
3898        don't have to reload SOURCE the next time it is used. */
3899     if (ts->val_type == TEMP_VAL_MEM) {
3900         temp_load(s, ts, tcg_target_available_regs[itype],
3901                   allocated_regs, preferred_regs);
3902     }
3903     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3904     ireg = ts->reg;
3905 
3906     if (IS_DEAD_ARG(0)) {
3907         /* mov to a non-saved dead register makes no sense (even with
3908            liveness analysis disabled). */
3909         tcg_debug_assert(NEED_SYNC_ARG(0));
3910         if (!ots->mem_allocated) {
3911             temp_allocate_frame(s, ots);
3912         }
3913         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
3914         if (IS_DEAD_ARG(1)) {
3915             temp_dead(s, ts);
3916         }
3917         temp_dead(s, ots);
3918         return;
3919     }
3920 
3921     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3922         /*
3923          * The mov can be suppressed.  Kill input first, so that it
3924          * is unlinked from reg_to_temp, then set the output to the
3925          * reg that we saved from the input.
3926          */
3927         temp_dead(s, ts);
3928         oreg = ireg;
3929     } else {
3930         if (ots->val_type == TEMP_VAL_REG) {
3931             oreg = ots->reg;
3932         } else {
3933             /* Make sure to not spill the input register during allocation. */
3934             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3935                                  allocated_regs | ((TCGRegSet)1 << ireg),
3936                                  preferred_regs, ots->indirect_base);
3937         }
3938         if (!tcg_out_mov(s, otype, oreg, ireg)) {
3939             /*
3940              * Cross register class move not supported.
3941              * Store the source register into the destination slot
3942              * and leave the destination temp as TEMP_VAL_MEM.
3943              */
3944             assert(!temp_readonly(ots));
3945             if (!ts->mem_allocated) {
3946                 temp_allocate_frame(s, ots);
3947             }
3948             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
3949             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
3950             ots->mem_coherent = 1;
3951             return;
3952         }
3953     }
3954     set_temp_val_reg(s, ots, oreg);
3955     ots->mem_coherent = 0;
3956 
3957     if (NEED_SYNC_ARG(0)) {
3958         temp_sync(s, ots, allocated_regs, 0, 0);
3959     }
3960 }
3961 
3962 /*
3963  * Specialized code generation for INDEX_op_dup_vec.
3964  */
3965 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3966 {
3967     const TCGLifeData arg_life = op->life;
3968     TCGRegSet dup_out_regs, dup_in_regs;
3969     TCGTemp *its, *ots;
3970     TCGType itype, vtype;
3971     unsigned vece;
3972     int lowpart_ofs;
3973     bool ok;
3974 
3975     ots = arg_temp(op->args[0]);
3976     its = arg_temp(op->args[1]);
3977 
3978     /* ENV should not be modified.  */
3979     tcg_debug_assert(!temp_readonly(ots));
3980 
3981     itype = its->type;
3982     vece = TCGOP_VECE(op);
3983     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3984 
3985     if (its->val_type == TEMP_VAL_CONST) {
3986         /* Propagate constant via movi -> dupi.  */
3987         tcg_target_ulong val = its->val;
3988         if (IS_DEAD_ARG(1)) {
3989             temp_dead(s, its);
3990         }
3991         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
3992         return;
3993     }
3994 
3995     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3996     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3997 
3998     /* Allocate the output register now.  */
3999     if (ots->val_type != TEMP_VAL_REG) {
4000         TCGRegSet allocated_regs = s->reserved_regs;
4001         TCGReg oreg;
4002 
4003         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4004             /* Make sure to not spill the input register. */
4005             tcg_regset_set_reg(allocated_regs, its->reg);
4006         }
4007         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4008                              output_pref(op, 0), ots->indirect_base);
4009         set_temp_val_reg(s, ots, oreg);
4010     }
4011 
4012     switch (its->val_type) {
4013     case TEMP_VAL_REG:
4014         /*
4015          * The dup constriaints must be broad, covering all possible VECE.
4016          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4017          * to fail, indicating that extra moves are required for that case.
4018          */
4019         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4020             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4021                 goto done;
4022             }
4023             /* Try again from memory or a vector input register.  */
4024         }
4025         if (!its->mem_coherent) {
4026             /*
4027              * The input register is not synced, and so an extra store
4028              * would be required to use memory.  Attempt an integer-vector
4029              * register move first.  We do not have a TCGRegSet for this.
4030              */
4031             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4032                 break;
4033             }
4034             /* Sync the temp back to its slot and load from there.  */
4035             temp_sync(s, its, s->reserved_regs, 0, 0);
4036         }
4037         /* fall through */
4038 
4039     case TEMP_VAL_MEM:
4040         lowpart_ofs = 0;
4041         if (HOST_BIG_ENDIAN) {
4042             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4043         }
4044         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4045                              its->mem_offset + lowpart_ofs)) {
4046             goto done;
4047         }
4048         /* Load the input into the destination vector register. */
4049         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4050         break;
4051 
4052     default:
4053         g_assert_not_reached();
4054     }
4055 
4056     /* We now have a vector input register, so dup must succeed. */
4057     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4058     tcg_debug_assert(ok);
4059 
4060  done:
4061     ots->mem_coherent = 0;
4062     if (IS_DEAD_ARG(1)) {
4063         temp_dead(s, its);
4064     }
4065     if (NEED_SYNC_ARG(0)) {
4066         temp_sync(s, ots, s->reserved_regs, 0, 0);
4067     }
4068     if (IS_DEAD_ARG(0)) {
4069         temp_dead(s, ots);
4070     }
4071 }
4072 
4073 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4074 {
4075     const TCGLifeData arg_life = op->life;
4076     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4077     TCGRegSet i_allocated_regs;
4078     TCGRegSet o_allocated_regs;
4079     int i, k, nb_iargs, nb_oargs;
4080     TCGReg reg;
4081     TCGArg arg;
4082     const TCGArgConstraint *arg_ct;
4083     TCGTemp *ts;
4084     TCGArg new_args[TCG_MAX_OP_ARGS];
4085     int const_args[TCG_MAX_OP_ARGS];
4086 
4087     nb_oargs = def->nb_oargs;
4088     nb_iargs = def->nb_iargs;
4089 
4090     /* copy constants */
4091     memcpy(new_args + nb_oargs + nb_iargs,
4092            op->args + nb_oargs + nb_iargs,
4093            sizeof(TCGArg) * def->nb_cargs);
4094 
4095     i_allocated_regs = s->reserved_regs;
4096     o_allocated_regs = s->reserved_regs;
4097 
4098     /* satisfy input constraints */
4099     for (k = 0; k < nb_iargs; k++) {
4100         TCGRegSet i_preferred_regs, i_required_regs;
4101         bool allocate_new_reg, copyto_new_reg;
4102         TCGTemp *ts2;
4103         int i1, i2;
4104 
4105         i = def->args_ct[nb_oargs + k].sort_index;
4106         arg = op->args[i];
4107         arg_ct = &def->args_ct[i];
4108         ts = arg_temp(arg);
4109 
4110         if (ts->val_type == TEMP_VAL_CONST
4111             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4112             /* constant is OK for instruction */
4113             const_args[i] = 1;
4114             new_args[i] = ts->val;
4115             continue;
4116         }
4117 
4118         reg = ts->reg;
4119         i_preferred_regs = 0;
4120         i_required_regs = arg_ct->regs;
4121         allocate_new_reg = false;
4122         copyto_new_reg = false;
4123 
4124         switch (arg_ct->pair) {
4125         case 0: /* not paired */
4126             if (arg_ct->ialias) {
4127                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4128 
4129                 /*
4130                  * If the input is readonly, then it cannot also be an
4131                  * output and aliased to itself.  If the input is not
4132                  * dead after the instruction, we must allocate a new
4133                  * register and move it.
4134                  */
4135                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4136                     allocate_new_reg = true;
4137                 } else if (ts->val_type == TEMP_VAL_REG) {
4138                     /*
4139                      * Check if the current register has already been
4140                      * allocated for another input.
4141                      */
4142                     allocate_new_reg =
4143                         tcg_regset_test_reg(i_allocated_regs, reg);
4144                 }
4145             }
4146             if (!allocate_new_reg) {
4147                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4148                           i_preferred_regs);
4149                 reg = ts->reg;
4150                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4151             }
4152             if (allocate_new_reg) {
4153                 /*
4154                  * Allocate a new register matching the constraint
4155                  * and move the temporary register into it.
4156                  */
4157                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4158                           i_allocated_regs, 0);
4159                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4160                                     i_preferred_regs, ts->indirect_base);
4161                 copyto_new_reg = true;
4162             }
4163             break;
4164 
4165         case 1:
4166             /* First of an input pair; if i1 == i2, the second is an output. */
4167             i1 = i;
4168             i2 = arg_ct->pair_index;
4169             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4170 
4171             /*
4172              * It is easier to default to allocating a new pair
4173              * and to identify a few cases where it's not required.
4174              */
4175             if (arg_ct->ialias) {
4176                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4177                 if (IS_DEAD_ARG(i1) &&
4178                     IS_DEAD_ARG(i2) &&
4179                     !temp_readonly(ts) &&
4180                     ts->val_type == TEMP_VAL_REG &&
4181                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4182                     tcg_regset_test_reg(i_required_regs, reg) &&
4183                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4184                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4185                     (ts2
4186                      ? ts2->val_type == TEMP_VAL_REG &&
4187                        ts2->reg == reg + 1 &&
4188                        !temp_readonly(ts2)
4189                      : s->reg_to_temp[reg + 1] == NULL)) {
4190                     break;
4191                 }
4192             } else {
4193                 /* Without aliasing, the pair must also be an input. */
4194                 tcg_debug_assert(ts2);
4195                 if (ts->val_type == TEMP_VAL_REG &&
4196                     ts2->val_type == TEMP_VAL_REG &&
4197                     ts2->reg == reg + 1 &&
4198                     tcg_regset_test_reg(i_required_regs, reg)) {
4199                     break;
4200                 }
4201             }
4202             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4203                                      0, ts->indirect_base);
4204             goto do_pair;
4205 
4206         case 2: /* pair second */
4207             reg = new_args[arg_ct->pair_index] + 1;
4208             goto do_pair;
4209 
4210         case 3: /* ialias with second output, no first input */
4211             tcg_debug_assert(arg_ct->ialias);
4212             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4213 
4214             if (IS_DEAD_ARG(i) &&
4215                 !temp_readonly(ts) &&
4216                 ts->val_type == TEMP_VAL_REG &&
4217                 reg > 0 &&
4218                 s->reg_to_temp[reg - 1] == NULL &&
4219                 tcg_regset_test_reg(i_required_regs, reg) &&
4220                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4221                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4222                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4223                 break;
4224             }
4225             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4226                                      i_allocated_regs, 0,
4227                                      ts->indirect_base);
4228             tcg_regset_set_reg(i_allocated_regs, reg);
4229             reg += 1;
4230             goto do_pair;
4231 
4232         do_pair:
4233             /*
4234              * If an aliased input is not dead after the instruction,
4235              * we must allocate a new register and move it.
4236              */
4237             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4238                 TCGRegSet t_allocated_regs = i_allocated_regs;
4239 
4240                 /*
4241                  * Because of the alias, and the continued life, make sure
4242                  * that the temp is somewhere *other* than the reg pair,
4243                  * and we get a copy in reg.
4244                  */
4245                 tcg_regset_set_reg(t_allocated_regs, reg);
4246                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4247                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4248                     /* If ts was already in reg, copy it somewhere else. */
4249                     TCGReg nr;
4250                     bool ok;
4251 
4252                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4253                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4254                                        t_allocated_regs, 0, ts->indirect_base);
4255                     ok = tcg_out_mov(s, ts->type, nr, reg);
4256                     tcg_debug_assert(ok);
4257 
4258                     set_temp_val_reg(s, ts, nr);
4259                 } else {
4260                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4261                               t_allocated_regs, 0);
4262                     copyto_new_reg = true;
4263                 }
4264             } else {
4265                 /* Preferably allocate to reg, otherwise copy. */
4266                 i_required_regs = (TCGRegSet)1 << reg;
4267                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4268                           i_preferred_regs);
4269                 copyto_new_reg = ts->reg != reg;
4270             }
4271             break;
4272 
4273         default:
4274             g_assert_not_reached();
4275         }
4276 
4277         if (copyto_new_reg) {
4278             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4279                 /*
4280                  * Cross register class move not supported.  Sync the
4281                  * temp back to its slot and load from there.
4282                  */
4283                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4284                 tcg_out_ld(s, ts->type, reg,
4285                            ts->mem_base->reg, ts->mem_offset);
4286             }
4287         }
4288         new_args[i] = reg;
4289         const_args[i] = 0;
4290         tcg_regset_set_reg(i_allocated_regs, reg);
4291     }
4292 
4293     /* mark dead temporaries and free the associated registers */
4294     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4295         if (IS_DEAD_ARG(i)) {
4296             temp_dead(s, arg_temp(op->args[i]));
4297         }
4298     }
4299 
4300     if (def->flags & TCG_OPF_COND_BRANCH) {
4301         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4302     } else if (def->flags & TCG_OPF_BB_END) {
4303         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4304     } else {
4305         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4306             /* XXX: permit generic clobber register list ? */
4307             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4308                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4309                     tcg_reg_free(s, i, i_allocated_regs);
4310                 }
4311             }
4312         }
4313         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4314             /* sync globals if the op has side effects and might trigger
4315                an exception. */
4316             sync_globals(s, i_allocated_regs);
4317         }
4318 
4319         /* satisfy the output constraints */
4320         for(k = 0; k < nb_oargs; k++) {
4321             i = def->args_ct[k].sort_index;
4322             arg = op->args[i];
4323             arg_ct = &def->args_ct[i];
4324             ts = arg_temp(arg);
4325 
4326             /* ENV should not be modified.  */
4327             tcg_debug_assert(!temp_readonly(ts));
4328 
4329             switch (arg_ct->pair) {
4330             case 0: /* not paired */
4331                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4332                     reg = new_args[arg_ct->alias_index];
4333                 } else if (arg_ct->newreg) {
4334                     reg = tcg_reg_alloc(s, arg_ct->regs,
4335                                         i_allocated_regs | o_allocated_regs,
4336                                         output_pref(op, k), ts->indirect_base);
4337                 } else {
4338                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4339                                         output_pref(op, k), ts->indirect_base);
4340                 }
4341                 break;
4342 
4343             case 1: /* first of pair */
4344                 tcg_debug_assert(!arg_ct->newreg);
4345                 if (arg_ct->oalias) {
4346                     reg = new_args[arg_ct->alias_index];
4347                     break;
4348                 }
4349                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4350                                          output_pref(op, k), ts->indirect_base);
4351                 break;
4352 
4353             case 2: /* second of pair */
4354                 tcg_debug_assert(!arg_ct->newreg);
4355                 if (arg_ct->oalias) {
4356                     reg = new_args[arg_ct->alias_index];
4357                 } else {
4358                     reg = new_args[arg_ct->pair_index] + 1;
4359                 }
4360                 break;
4361 
4362             case 3: /* first of pair, aliasing with a second input */
4363                 tcg_debug_assert(!arg_ct->newreg);
4364                 reg = new_args[arg_ct->pair_index] - 1;
4365                 break;
4366 
4367             default:
4368                 g_assert_not_reached();
4369             }
4370             tcg_regset_set_reg(o_allocated_regs, reg);
4371             set_temp_val_reg(s, ts, reg);
4372             ts->mem_coherent = 0;
4373             new_args[i] = reg;
4374         }
4375     }
4376 
4377     /* emit instruction */
4378     if (def->flags & TCG_OPF_VECTOR) {
4379         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4380                        new_args, const_args);
4381     } else {
4382         tcg_out_op(s, op->opc, new_args, const_args);
4383     }
4384 
4385     /* move the outputs in the correct register if needed */
4386     for(i = 0; i < nb_oargs; i++) {
4387         ts = arg_temp(op->args[i]);
4388 
4389         /* ENV should not be modified.  */
4390         tcg_debug_assert(!temp_readonly(ts));
4391 
4392         if (NEED_SYNC_ARG(i)) {
4393             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4394         } else if (IS_DEAD_ARG(i)) {
4395             temp_dead(s, ts);
4396         }
4397     }
4398 }
4399 
4400 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4401 {
4402     const TCGLifeData arg_life = op->life;
4403     TCGTemp *ots, *itsl, *itsh;
4404     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4405 
4406     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4407     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4408     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4409 
4410     ots = arg_temp(op->args[0]);
4411     itsl = arg_temp(op->args[1]);
4412     itsh = arg_temp(op->args[2]);
4413 
4414     /* ENV should not be modified.  */
4415     tcg_debug_assert(!temp_readonly(ots));
4416 
4417     /* Allocate the output register now.  */
4418     if (ots->val_type != TEMP_VAL_REG) {
4419         TCGRegSet allocated_regs = s->reserved_regs;
4420         TCGRegSet dup_out_regs =
4421             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4422         TCGReg oreg;
4423 
4424         /* Make sure to not spill the input registers. */
4425         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4426             tcg_regset_set_reg(allocated_regs, itsl->reg);
4427         }
4428         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4429             tcg_regset_set_reg(allocated_regs, itsh->reg);
4430         }
4431 
4432         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4433                              output_pref(op, 0), ots->indirect_base);
4434         set_temp_val_reg(s, ots, oreg);
4435     }
4436 
4437     /* Promote dup2 of immediates to dupi_vec. */
4438     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4439         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4440         MemOp vece = MO_64;
4441 
4442         if (val == dup_const(MO_8, val)) {
4443             vece = MO_8;
4444         } else if (val == dup_const(MO_16, val)) {
4445             vece = MO_16;
4446         } else if (val == dup_const(MO_32, val)) {
4447             vece = MO_32;
4448         }
4449 
4450         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4451         goto done;
4452     }
4453 
4454     /* If the two inputs form one 64-bit value, try dupm_vec. */
4455     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4456         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4457         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4458         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4459 
4460         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4461         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4462 
4463         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4464                              its->mem_base->reg, its->mem_offset)) {
4465             goto done;
4466         }
4467     }
4468 
4469     /* Fall back to generic expansion. */
4470     return false;
4471 
4472  done:
4473     ots->mem_coherent = 0;
4474     if (IS_DEAD_ARG(1)) {
4475         temp_dead(s, itsl);
4476     }
4477     if (IS_DEAD_ARG(2)) {
4478         temp_dead(s, itsh);
4479     }
4480     if (NEED_SYNC_ARG(0)) {
4481         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4482     } else if (IS_DEAD_ARG(0)) {
4483         temp_dead(s, ots);
4484     }
4485     return true;
4486 }
4487 
4488 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4489                          TCGRegSet allocated_regs)
4490 {
4491     if (ts->val_type == TEMP_VAL_REG) {
4492         if (ts->reg != reg) {
4493             tcg_reg_free(s, reg, allocated_regs);
4494             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4495                 /*
4496                  * Cross register class move not supported.  Sync the
4497                  * temp back to its slot and load from there.
4498                  */
4499                 temp_sync(s, ts, allocated_regs, 0, 0);
4500                 tcg_out_ld(s, ts->type, reg,
4501                            ts->mem_base->reg, ts->mem_offset);
4502             }
4503         }
4504     } else {
4505         TCGRegSet arg_set = 0;
4506 
4507         tcg_reg_free(s, reg, allocated_regs);
4508         tcg_regset_set_reg(arg_set, reg);
4509         temp_load(s, ts, arg_set, allocated_regs, 0);
4510     }
4511 }
4512 
4513 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
4514                          TCGRegSet allocated_regs)
4515 {
4516     /*
4517      * When the destination is on the stack, load up the temp and store.
4518      * If there are many call-saved registers, the temp might live to
4519      * see another use; otherwise it'll be discarded.
4520      */
4521     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4522     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4523                TCG_TARGET_CALL_STACK_OFFSET +
4524                stk_slot * sizeof(tcg_target_long));
4525 }
4526 
4527 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4528                             TCGTemp *ts, TCGRegSet *allocated_regs)
4529 {
4530     if (REG_P(l)) {
4531         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4532         load_arg_reg(s, reg, ts, *allocated_regs);
4533         tcg_regset_set_reg(*allocated_regs, reg);
4534     } else {
4535         load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
4536                      ts, *allocated_regs);
4537     }
4538 }
4539 
4540 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
4541                          intptr_t ref_off, TCGRegSet *allocated_regs)
4542 {
4543     TCGReg reg;
4544     int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
4545 
4546     if (stk_slot < 0) {
4547         reg = tcg_target_call_iarg_regs[arg_slot];
4548         tcg_reg_free(s, reg, *allocated_regs);
4549         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4550         tcg_regset_set_reg(*allocated_regs, reg);
4551     } else {
4552         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
4553                             *allocated_regs, 0, false);
4554         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4555         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
4556                    TCG_TARGET_CALL_STACK_OFFSET
4557                    + stk_slot * sizeof(tcg_target_long));
4558     }
4559 }
4560 
4561 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4562 {
4563     const int nb_oargs = TCGOP_CALLO(op);
4564     const int nb_iargs = TCGOP_CALLI(op);
4565     const TCGLifeData arg_life = op->life;
4566     const TCGHelperInfo *info = tcg_call_info(op);
4567     TCGRegSet allocated_regs = s->reserved_regs;
4568     int i;
4569 
4570     /*
4571      * Move inputs into place in reverse order,
4572      * so that we place stacked arguments first.
4573      */
4574     for (i = nb_iargs - 1; i >= 0; --i) {
4575         const TCGCallArgumentLoc *loc = &info->in[i];
4576         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
4577 
4578         switch (loc->kind) {
4579         case TCG_CALL_ARG_NORMAL:
4580         case TCG_CALL_ARG_EXTEND_U:
4581         case TCG_CALL_ARG_EXTEND_S:
4582             load_arg_normal(s, loc, ts, &allocated_regs);
4583             break;
4584         case TCG_CALL_ARG_BY_REF:
4585             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4586             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
4587                          TCG_TARGET_CALL_STACK_OFFSET
4588                          + loc->ref_slot * sizeof(tcg_target_long),
4589                          &allocated_regs);
4590             break;
4591         case TCG_CALL_ARG_BY_REF_N:
4592             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4593             break;
4594         default:
4595             g_assert_not_reached();
4596         }
4597     }
4598 
4599     /* Mark dead temporaries and free the associated registers.  */
4600     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4601         if (IS_DEAD_ARG(i)) {
4602             temp_dead(s, arg_temp(op->args[i]));
4603         }
4604     }
4605 
4606     /* Clobber call registers.  */
4607     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4608         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4609             tcg_reg_free(s, i, allocated_regs);
4610         }
4611     }
4612 
4613     /*
4614      * Save globals if they might be written by the helper,
4615      * sync them if they might be read.
4616      */
4617     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
4618         /* Nothing to do */
4619     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
4620         sync_globals(s, allocated_regs);
4621     } else {
4622         save_globals(s, allocated_regs);
4623     }
4624 
4625     /*
4626      * If the ABI passes a pointer to the returned struct as the first
4627      * argument, load that now.  Pass a pointer to the output home slot.
4628      */
4629     if (info->out_kind == TCG_CALL_RET_BY_REF) {
4630         TCGTemp *ts = arg_temp(op->args[0]);
4631 
4632         if (!ts->mem_allocated) {
4633             temp_allocate_frame(s, ts);
4634         }
4635         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
4636     }
4637 
4638     tcg_out_call(s, tcg_call_func(op), info);
4639 
4640     /* Assign output registers and emit moves if needed.  */
4641     switch (info->out_kind) {
4642     case TCG_CALL_RET_NORMAL:
4643         for (i = 0; i < nb_oargs; i++) {
4644             TCGTemp *ts = arg_temp(op->args[i]);
4645             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
4646 
4647             /* ENV should not be modified.  */
4648             tcg_debug_assert(!temp_readonly(ts));
4649 
4650             set_temp_val_reg(s, ts, reg);
4651             ts->mem_coherent = 0;
4652         }
4653         break;
4654 
4655     case TCG_CALL_RET_BY_VEC:
4656         {
4657             TCGTemp *ts = arg_temp(op->args[0]);
4658 
4659             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
4660             tcg_debug_assert(ts->temp_subindex == 0);
4661             if (!ts->mem_allocated) {
4662                 temp_allocate_frame(s, ts);
4663             }
4664             tcg_out_st(s, TCG_TYPE_V128,
4665                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
4666                        ts->mem_base->reg, ts->mem_offset);
4667         }
4668         /* fall through to mark all parts in memory */
4669 
4670     case TCG_CALL_RET_BY_REF:
4671         /* The callee has performed a write through the reference. */
4672         for (i = 0; i < nb_oargs; i++) {
4673             TCGTemp *ts = arg_temp(op->args[i]);
4674             ts->val_type = TEMP_VAL_MEM;
4675         }
4676         break;
4677 
4678     default:
4679         g_assert_not_reached();
4680     }
4681 
4682     /* Flush or discard output registers as needed. */
4683     for (i = 0; i < nb_oargs; i++) {
4684         TCGTemp *ts = arg_temp(op->args[i]);
4685         if (NEED_SYNC_ARG(i)) {
4686             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
4687         } else if (IS_DEAD_ARG(i)) {
4688             temp_dead(s, ts);
4689         }
4690     }
4691 }
4692 
4693 #ifdef CONFIG_PROFILER
4694 
4695 /* avoid copy/paste errors */
4696 #define PROF_ADD(to, from, field)                       \
4697     do {                                                \
4698         (to)->field += qatomic_read(&((from)->field));  \
4699     } while (0)
4700 
4701 #define PROF_MAX(to, from, field)                                       \
4702     do {                                                                \
4703         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4704         if (val__ > (to)->field) {                                      \
4705             (to)->field = val__;                                        \
4706         }                                                               \
4707     } while (0)
4708 
4709 /* Pass in a zero'ed @prof */
4710 static inline
4711 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4712 {
4713     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4714     unsigned int i;
4715 
4716     for (i = 0; i < n_ctxs; i++) {
4717         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4718         const TCGProfile *orig = &s->prof;
4719 
4720         if (counters) {
4721             PROF_ADD(prof, orig, cpu_exec_time);
4722             PROF_ADD(prof, orig, tb_count1);
4723             PROF_ADD(prof, orig, tb_count);
4724             PROF_ADD(prof, orig, op_count);
4725             PROF_MAX(prof, orig, op_count_max);
4726             PROF_ADD(prof, orig, temp_count);
4727             PROF_MAX(prof, orig, temp_count_max);
4728             PROF_ADD(prof, orig, del_op_count);
4729             PROF_ADD(prof, orig, code_in_len);
4730             PROF_ADD(prof, orig, code_out_len);
4731             PROF_ADD(prof, orig, search_out_len);
4732             PROF_ADD(prof, orig, interm_time);
4733             PROF_ADD(prof, orig, code_time);
4734             PROF_ADD(prof, orig, la_time);
4735             PROF_ADD(prof, orig, opt_time);
4736             PROF_ADD(prof, orig, restore_count);
4737             PROF_ADD(prof, orig, restore_time);
4738         }
4739         if (table) {
4740             int i;
4741 
4742             for (i = 0; i < NB_OPS; i++) {
4743                 PROF_ADD(prof, orig, table_op_count[i]);
4744             }
4745         }
4746     }
4747 }
4748 
4749 #undef PROF_ADD
4750 #undef PROF_MAX
4751 
4752 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4753 {
4754     tcg_profile_snapshot(prof, true, false);
4755 }
4756 
4757 static void tcg_profile_snapshot_table(TCGProfile *prof)
4758 {
4759     tcg_profile_snapshot(prof, false, true);
4760 }
4761 
4762 void tcg_dump_op_count(GString *buf)
4763 {
4764     TCGProfile prof = {};
4765     int i;
4766 
4767     tcg_profile_snapshot_table(&prof);
4768     for (i = 0; i < NB_OPS; i++) {
4769         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4770                                prof.table_op_count[i]);
4771     }
4772 }
4773 
4774 int64_t tcg_cpu_exec_time(void)
4775 {
4776     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4777     unsigned int i;
4778     int64_t ret = 0;
4779 
4780     for (i = 0; i < n_ctxs; i++) {
4781         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4782         const TCGProfile *prof = &s->prof;
4783 
4784         ret += qatomic_read(&prof->cpu_exec_time);
4785     }
4786     return ret;
4787 }
4788 #else
4789 void tcg_dump_op_count(GString *buf)
4790 {
4791     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4792 }
4793 
4794 int64_t tcg_cpu_exec_time(void)
4795 {
4796     error_report("%s: TCG profiler not compiled", __func__);
4797     exit(EXIT_FAILURE);
4798 }
4799 #endif
4800 
4801 
4802 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4803 {
4804 #ifdef CONFIG_PROFILER
4805     TCGProfile *prof = &s->prof;
4806 #endif
4807     int i, num_insns;
4808     TCGOp *op;
4809 
4810 #ifdef CONFIG_PROFILER
4811     {
4812         int n = 0;
4813 
4814         QTAILQ_FOREACH(op, &s->ops, link) {
4815             n++;
4816         }
4817         qatomic_set(&prof->op_count, prof->op_count + n);
4818         if (n > prof->op_count_max) {
4819             qatomic_set(&prof->op_count_max, n);
4820         }
4821 
4822         n = s->nb_temps;
4823         qatomic_set(&prof->temp_count, prof->temp_count + n);
4824         if (n > prof->temp_count_max) {
4825             qatomic_set(&prof->temp_count_max, n);
4826         }
4827     }
4828 #endif
4829 
4830 #ifdef DEBUG_DISAS
4831     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4832                  && qemu_log_in_addr_range(pc_start))) {
4833         FILE *logfile = qemu_log_trylock();
4834         if (logfile) {
4835             fprintf(logfile, "OP:\n");
4836             tcg_dump_ops(s, logfile, false);
4837             fprintf(logfile, "\n");
4838             qemu_log_unlock(logfile);
4839         }
4840     }
4841 #endif
4842 
4843 #ifdef CONFIG_DEBUG_TCG
4844     /* Ensure all labels referenced have been emitted.  */
4845     {
4846         TCGLabel *l;
4847         bool error = false;
4848 
4849         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4850             if (unlikely(!l->present) && l->refs) {
4851                 qemu_log_mask(CPU_LOG_TB_OP,
4852                               "$L%d referenced but not present.\n", l->id);
4853                 error = true;
4854             }
4855         }
4856         assert(!error);
4857     }
4858 #endif
4859 
4860 #ifdef CONFIG_PROFILER
4861     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4862 #endif
4863 
4864 #ifdef USE_TCG_OPTIMIZATIONS
4865     tcg_optimize(s);
4866 #endif
4867 
4868 #ifdef CONFIG_PROFILER
4869     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4870     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4871 #endif
4872 
4873     reachable_code_pass(s);
4874     liveness_pass_1(s);
4875 
4876     if (s->nb_indirects > 0) {
4877 #ifdef DEBUG_DISAS
4878         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4879                      && qemu_log_in_addr_range(pc_start))) {
4880             FILE *logfile = qemu_log_trylock();
4881             if (logfile) {
4882                 fprintf(logfile, "OP before indirect lowering:\n");
4883                 tcg_dump_ops(s, logfile, false);
4884                 fprintf(logfile, "\n");
4885                 qemu_log_unlock(logfile);
4886             }
4887         }
4888 #endif
4889         /* Replace indirect temps with direct temps.  */
4890         if (liveness_pass_2(s)) {
4891             /* If changes were made, re-run liveness.  */
4892             liveness_pass_1(s);
4893         }
4894     }
4895 
4896 #ifdef CONFIG_PROFILER
4897     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4898 #endif
4899 
4900 #ifdef DEBUG_DISAS
4901     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4902                  && qemu_log_in_addr_range(pc_start))) {
4903         FILE *logfile = qemu_log_trylock();
4904         if (logfile) {
4905             fprintf(logfile, "OP after optimization and liveness analysis:\n");
4906             tcg_dump_ops(s, logfile, true);
4907             fprintf(logfile, "\n");
4908             qemu_log_unlock(logfile);
4909         }
4910     }
4911 #endif
4912 
4913     /* Initialize goto_tb jump offsets. */
4914     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
4915     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
4916     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
4917     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
4918 
4919     tcg_reg_alloc_start(s);
4920 
4921     /*
4922      * Reset the buffer pointers when restarting after overflow.
4923      * TODO: Move this into translate-all.c with the rest of the
4924      * buffer management.  Having only this done here is confusing.
4925      */
4926     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4927     s->code_ptr = s->code_buf;
4928 
4929 #ifdef TCG_TARGET_NEED_LDST_LABELS
4930     QSIMPLEQ_INIT(&s->ldst_labels);
4931 #endif
4932 #ifdef TCG_TARGET_NEED_POOL_LABELS
4933     s->pool_labels = NULL;
4934 #endif
4935 
4936     num_insns = -1;
4937     QTAILQ_FOREACH(op, &s->ops, link) {
4938         TCGOpcode opc = op->opc;
4939 
4940 #ifdef CONFIG_PROFILER
4941         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4942 #endif
4943 
4944         switch (opc) {
4945         case INDEX_op_mov_i32:
4946         case INDEX_op_mov_i64:
4947         case INDEX_op_mov_vec:
4948             tcg_reg_alloc_mov(s, op);
4949             break;
4950         case INDEX_op_dup_vec:
4951             tcg_reg_alloc_dup(s, op);
4952             break;
4953         case INDEX_op_insn_start:
4954             if (num_insns >= 0) {
4955                 size_t off = tcg_current_code_size(s);
4956                 s->gen_insn_end_off[num_insns] = off;
4957                 /* Assert that we do not overflow our stored offset.  */
4958                 assert(s->gen_insn_end_off[num_insns] == off);
4959             }
4960             num_insns++;
4961             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4962                 target_ulong a;
4963 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4964                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4965 #else
4966                 a = op->args[i];
4967 #endif
4968                 s->gen_insn_data[num_insns][i] = a;
4969             }
4970             break;
4971         case INDEX_op_discard:
4972             temp_dead(s, arg_temp(op->args[0]));
4973             break;
4974         case INDEX_op_set_label:
4975             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4976             tcg_out_label(s, arg_label(op->args[0]));
4977             break;
4978         case INDEX_op_call:
4979             tcg_reg_alloc_call(s, op);
4980             break;
4981         case INDEX_op_exit_tb:
4982             tcg_out_exit_tb(s, op->args[0]);
4983             break;
4984         case INDEX_op_goto_tb:
4985             tcg_out_goto_tb(s, op->args[0]);
4986             break;
4987         case INDEX_op_dup2_vec:
4988             if (tcg_reg_alloc_dup2(s, op)) {
4989                 break;
4990             }
4991             /* fall through */
4992         default:
4993             /* Sanity check that we've not introduced any unhandled opcodes. */
4994             tcg_debug_assert(tcg_op_supported(opc));
4995             /* Note: in order to speed up the code, it would be much
4996                faster to have specialized register allocator functions for
4997                some common argument patterns */
4998             tcg_reg_alloc_op(s, op);
4999             break;
5000         }
5001         /* Test for (pending) buffer overflow.  The assumption is that any
5002            one operation beginning below the high water mark cannot overrun
5003            the buffer completely.  Thus we can test for overflow after
5004            generating code without having to check during generation.  */
5005         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
5006             return -1;
5007         }
5008         /* Test for TB overflow, as seen by gen_insn_end_off.  */
5009         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
5010             return -2;
5011         }
5012     }
5013     tcg_debug_assert(num_insns >= 0);
5014     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
5015 
5016     /* Generate TB finalization at the end of block */
5017 #ifdef TCG_TARGET_NEED_LDST_LABELS
5018     i = tcg_out_ldst_finalize(s);
5019     if (i < 0) {
5020         return i;
5021     }
5022 #endif
5023 #ifdef TCG_TARGET_NEED_POOL_LABELS
5024     i = tcg_out_pool_finalize(s);
5025     if (i < 0) {
5026         return i;
5027     }
5028 #endif
5029     if (!tcg_resolve_relocs(s)) {
5030         return -2;
5031     }
5032 
5033 #ifndef CONFIG_TCG_INTERPRETER
5034     /* flush instruction cache */
5035     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
5036                         (uintptr_t)s->code_buf,
5037                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
5038 #endif
5039 
5040     return tcg_current_code_size(s);
5041 }
5042 
5043 #ifdef CONFIG_PROFILER
5044 void tcg_dump_info(GString *buf)
5045 {
5046     TCGProfile prof = {};
5047     const TCGProfile *s;
5048     int64_t tb_count;
5049     int64_t tb_div_count;
5050     int64_t tot;
5051 
5052     tcg_profile_snapshot_counters(&prof);
5053     s = &prof;
5054     tb_count = s->tb_count;
5055     tb_div_count = tb_count ? tb_count : 1;
5056     tot = s->interm_time + s->code_time;
5057 
5058     g_string_append_printf(buf, "JIT cycles          %" PRId64
5059                            " (%0.3f s at 2.4 GHz)\n",
5060                            tot, tot / 2.4e9);
5061     g_string_append_printf(buf, "translated TBs      %" PRId64
5062                            " (aborted=%" PRId64 " %0.1f%%)\n",
5063                            tb_count, s->tb_count1 - tb_count,
5064                            (double)(s->tb_count1 - s->tb_count)
5065                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
5066     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
5067                            (double)s->op_count / tb_div_count, s->op_count_max);
5068     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
5069                            (double)s->del_op_count / tb_div_count);
5070     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
5071                            (double)s->temp_count / tb_div_count,
5072                            s->temp_count_max);
5073     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
5074                            (double)s->code_out_len / tb_div_count);
5075     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
5076                            (double)s->search_out_len / tb_div_count);
5077 
5078     g_string_append_printf(buf, "cycles/op           %0.1f\n",
5079                            s->op_count ? (double)tot / s->op_count : 0);
5080     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
5081                            s->code_in_len ? (double)tot / s->code_in_len : 0);
5082     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
5083                            s->code_out_len ? (double)tot / s->code_out_len : 0);
5084     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
5085                            s->search_out_len ?
5086                            (double)tot / s->search_out_len : 0);
5087     if (tot == 0) {
5088         tot = 1;
5089     }
5090     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
5091                            (double)s->interm_time / tot * 100.0);
5092     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
5093                            (double)s->code_time / tot * 100.0);
5094     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
5095                            (double)s->opt_time / (s->code_time ?
5096                                                   s->code_time : 1)
5097                            * 100.0);
5098     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
5099                            (double)s->la_time / (s->code_time ?
5100                                                  s->code_time : 1) * 100.0);
5101     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
5102                            s->restore_count);
5103     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
5104                            s->restore_count ?
5105                            (double)s->restore_time / s->restore_count : 0);
5106 }
5107 #else
5108 void tcg_dump_info(GString *buf)
5109 {
5110     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5111 }
5112 #endif
5113 
5114 #ifdef ELF_HOST_MACHINE
5115 /* In order to use this feature, the backend needs to do three things:
5116 
5117    (1) Define ELF_HOST_MACHINE to indicate both what value to
5118        put into the ELF image and to indicate support for the feature.
5119 
5120    (2) Define tcg_register_jit.  This should create a buffer containing
5121        the contents of a .debug_frame section that describes the post-
5122        prologue unwind info for the tcg machine.
5123 
5124    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
5125 */
5126 
5127 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
5128 typedef enum {
5129     JIT_NOACTION = 0,
5130     JIT_REGISTER_FN,
5131     JIT_UNREGISTER_FN
5132 } jit_actions_t;
5133 
5134 struct jit_code_entry {
5135     struct jit_code_entry *next_entry;
5136     struct jit_code_entry *prev_entry;
5137     const void *symfile_addr;
5138     uint64_t symfile_size;
5139 };
5140 
5141 struct jit_descriptor {
5142     uint32_t version;
5143     uint32_t action_flag;
5144     struct jit_code_entry *relevant_entry;
5145     struct jit_code_entry *first_entry;
5146 };
5147 
5148 void __jit_debug_register_code(void) __attribute__((noinline));
5149 void __jit_debug_register_code(void)
5150 {
5151     asm("");
5152 }
5153 
5154 /* Must statically initialize the version, because GDB may check
5155    the version before we can set it.  */
5156 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
5157 
5158 /* End GDB interface.  */
5159 
5160 static int find_string(const char *strtab, const char *str)
5161 {
5162     const char *p = strtab + 1;
5163 
5164     while (1) {
5165         if (strcmp(p, str) == 0) {
5166             return p - strtab;
5167         }
5168         p += strlen(p) + 1;
5169     }
5170 }
5171 
5172 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
5173                                  const void *debug_frame,
5174                                  size_t debug_frame_size)
5175 {
5176     struct __attribute__((packed)) DebugInfo {
5177         uint32_t  len;
5178         uint16_t  version;
5179         uint32_t  abbrev;
5180         uint8_t   ptr_size;
5181         uint8_t   cu_die;
5182         uint16_t  cu_lang;
5183         uintptr_t cu_low_pc;
5184         uintptr_t cu_high_pc;
5185         uint8_t   fn_die;
5186         char      fn_name[16];
5187         uintptr_t fn_low_pc;
5188         uintptr_t fn_high_pc;
5189         uint8_t   cu_eoc;
5190     };
5191 
5192     struct ElfImage {
5193         ElfW(Ehdr) ehdr;
5194         ElfW(Phdr) phdr;
5195         ElfW(Shdr) shdr[7];
5196         ElfW(Sym)  sym[2];
5197         struct DebugInfo di;
5198         uint8_t    da[24];
5199         char       str[80];
5200     };
5201 
5202     struct ElfImage *img;
5203 
5204     static const struct ElfImage img_template = {
5205         .ehdr = {
5206             .e_ident[EI_MAG0] = ELFMAG0,
5207             .e_ident[EI_MAG1] = ELFMAG1,
5208             .e_ident[EI_MAG2] = ELFMAG2,
5209             .e_ident[EI_MAG3] = ELFMAG3,
5210             .e_ident[EI_CLASS] = ELF_CLASS,
5211             .e_ident[EI_DATA] = ELF_DATA,
5212             .e_ident[EI_VERSION] = EV_CURRENT,
5213             .e_type = ET_EXEC,
5214             .e_machine = ELF_HOST_MACHINE,
5215             .e_version = EV_CURRENT,
5216             .e_phoff = offsetof(struct ElfImage, phdr),
5217             .e_shoff = offsetof(struct ElfImage, shdr),
5218             .e_ehsize = sizeof(ElfW(Shdr)),
5219             .e_phentsize = sizeof(ElfW(Phdr)),
5220             .e_phnum = 1,
5221             .e_shentsize = sizeof(ElfW(Shdr)),
5222             .e_shnum = ARRAY_SIZE(img->shdr),
5223             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
5224 #ifdef ELF_HOST_FLAGS
5225             .e_flags = ELF_HOST_FLAGS,
5226 #endif
5227 #ifdef ELF_OSABI
5228             .e_ident[EI_OSABI] = ELF_OSABI,
5229 #endif
5230         },
5231         .phdr = {
5232             .p_type = PT_LOAD,
5233             .p_flags = PF_X,
5234         },
5235         .shdr = {
5236             [0] = { .sh_type = SHT_NULL },
5237             /* Trick: The contents of code_gen_buffer are not present in
5238                this fake ELF file; that got allocated elsewhere.  Therefore
5239                we mark .text as SHT_NOBITS (similar to .bss) so that readers
5240                will not look for contents.  We can record any address.  */
5241             [1] = { /* .text */
5242                 .sh_type = SHT_NOBITS,
5243                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
5244             },
5245             [2] = { /* .debug_info */
5246                 .sh_type = SHT_PROGBITS,
5247                 .sh_offset = offsetof(struct ElfImage, di),
5248                 .sh_size = sizeof(struct DebugInfo),
5249             },
5250             [3] = { /* .debug_abbrev */
5251                 .sh_type = SHT_PROGBITS,
5252                 .sh_offset = offsetof(struct ElfImage, da),
5253                 .sh_size = sizeof(img->da),
5254             },
5255             [4] = { /* .debug_frame */
5256                 .sh_type = SHT_PROGBITS,
5257                 .sh_offset = sizeof(struct ElfImage),
5258             },
5259             [5] = { /* .symtab */
5260                 .sh_type = SHT_SYMTAB,
5261                 .sh_offset = offsetof(struct ElfImage, sym),
5262                 .sh_size = sizeof(img->sym),
5263                 .sh_info = 1,
5264                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5265                 .sh_entsize = sizeof(ElfW(Sym)),
5266             },
5267             [6] = { /* .strtab */
5268                 .sh_type = SHT_STRTAB,
5269                 .sh_offset = offsetof(struct ElfImage, str),
5270                 .sh_size = sizeof(img->str),
5271             }
5272         },
5273         .sym = {
5274             [1] = { /* code_gen_buffer */
5275                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5276                 .st_shndx = 1,
5277             }
5278         },
5279         .di = {
5280             .len = sizeof(struct DebugInfo) - 4,
5281             .version = 2,
5282             .ptr_size = sizeof(void *),
5283             .cu_die = 1,
5284             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5285             .fn_die = 2,
5286             .fn_name = "code_gen_buffer"
5287         },
5288         .da = {
5289             1,          /* abbrev number (the cu) */
5290             0x11, 1,    /* DW_TAG_compile_unit, has children */
5291             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5292             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5293             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5294             0, 0,       /* end of abbrev */
5295             2,          /* abbrev number (the fn) */
5296             0x2e, 0,    /* DW_TAG_subprogram, no children */
5297             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5298             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5299             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5300             0, 0,       /* end of abbrev */
5301             0           /* no more abbrev */
5302         },
5303         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5304                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5305     };
5306 
5307     /* We only need a single jit entry; statically allocate it.  */
5308     static struct jit_code_entry one_entry;
5309 
5310     uintptr_t buf = (uintptr_t)buf_ptr;
5311     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5312     DebugFrameHeader *dfh;
5313 
5314     img = g_malloc(img_size);
5315     *img = img_template;
5316 
5317     img->phdr.p_vaddr = buf;
5318     img->phdr.p_paddr = buf;
5319     img->phdr.p_memsz = buf_size;
5320 
5321     img->shdr[1].sh_name = find_string(img->str, ".text");
5322     img->shdr[1].sh_addr = buf;
5323     img->shdr[1].sh_size = buf_size;
5324 
5325     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5326     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5327 
5328     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5329     img->shdr[4].sh_size = debug_frame_size;
5330 
5331     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5332     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5333 
5334     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5335     img->sym[1].st_value = buf;
5336     img->sym[1].st_size = buf_size;
5337 
5338     img->di.cu_low_pc = buf;
5339     img->di.cu_high_pc = buf + buf_size;
5340     img->di.fn_low_pc = buf;
5341     img->di.fn_high_pc = buf + buf_size;
5342 
5343     dfh = (DebugFrameHeader *)(img + 1);
5344     memcpy(dfh, debug_frame, debug_frame_size);
5345     dfh->fde.func_start = buf;
5346     dfh->fde.func_len = buf_size;
5347 
5348 #ifdef DEBUG_JIT
5349     /* Enable this block to be able to debug the ELF image file creation.
5350        One can use readelf, objdump, or other inspection utilities.  */
5351     {
5352         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
5353         FILE *f = fopen(jit, "w+b");
5354         if (f) {
5355             if (fwrite(img, img_size, 1, f) != img_size) {
5356                 /* Avoid stupid unused return value warning for fwrite.  */
5357             }
5358             fclose(f);
5359         }
5360     }
5361 #endif
5362 
5363     one_entry.symfile_addr = img;
5364     one_entry.symfile_size = img_size;
5365 
5366     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5367     __jit_debug_descriptor.relevant_entry = &one_entry;
5368     __jit_debug_descriptor.first_entry = &one_entry;
5369     __jit_debug_register_code();
5370 }
5371 #else
5372 /* No support for the feature.  Provide the entry point expected by exec.c,
5373    and implement the internal function we declared earlier.  */
5374 
5375 static void tcg_register_jit_int(const void *buf, size_t size,
5376                                  const void *debug_frame,
5377                                  size_t debug_frame_size)
5378 {
5379 }
5380 
5381 void tcg_register_jit(const void *buf, size_t buf_size)
5382 {
5383 }
5384 #endif /* ELF_HOST_MACHINE */
5385 
5386 #if !TCG_TARGET_MAYBE_vec
5387 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5388 {
5389     g_assert_not_reached();
5390 }
5391 #endif
5392