xref: /openbmc/qemu/tcg/tcg.c (revision 05a248715cef192336a594afed812871a52efc1f)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 
45 #include "exec/exec-all.h"
46 #include "tcg/tcg-op.h"
47 
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS  ELFCLASS32
50 #else
51 # define ELF_CLASS  ELFCLASS64
52 #endif
53 #ifdef HOST_WORDS_BIGENDIAN
54 # define ELF_DATA   ELFDATA2MSB
55 #else
56 # define ELF_DATA   ELFDATA2LSB
57 #endif
58 
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "tcg/tcg-ldst.h"
62 #include "tcg-internal.h"
63 
64 #ifdef CONFIG_TCG_INTERPRETER
65 #include <ffi.h>
66 #endif
67 
68 /* Forward declarations for functions declared in tcg-target.c.inc and
69    used here. */
70 static void tcg_target_init(TCGContext *s);
71 static void tcg_target_qemu_prologue(TCGContext *s);
72 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
73                         intptr_t value, intptr_t addend);
74 
75 /* The CIE and FDE header definitions will be common to all hosts.  */
76 typedef struct {
77     uint32_t len __attribute__((aligned((sizeof(void *)))));
78     uint32_t id;
79     uint8_t version;
80     char augmentation[1];
81     uint8_t code_align;
82     uint8_t data_align;
83     uint8_t return_column;
84 } DebugFrameCIE;
85 
86 typedef struct QEMU_PACKED {
87     uint32_t len __attribute__((aligned((sizeof(void *)))));
88     uint32_t cie_offset;
89     uintptr_t func_start;
90     uintptr_t func_len;
91 } DebugFrameFDEHeader;
92 
93 typedef struct QEMU_PACKED {
94     DebugFrameCIE cie;
95     DebugFrameFDEHeader fde;
96 } DebugFrameHeader;
97 
98 static void tcg_register_jit_int(const void *buf, size_t size,
99                                  const void *debug_frame,
100                                  size_t debug_frame_size)
101     __attribute__((unused));
102 
103 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
104 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
105                        intptr_t arg2);
106 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
107 static void tcg_out_movi(TCGContext *s, TCGType type,
108                          TCGReg ret, tcg_target_long arg);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
110                        const TCGArg args[TCG_MAX_OP_ARGS],
111                        const int const_args[TCG_MAX_OP_ARGS]);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
114                             TCGReg dst, TCGReg src);
115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
116                              TCGReg dst, TCGReg base, intptr_t offset);
117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, int64_t arg);
119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
120                            unsigned vecl, unsigned vece,
121                            const TCGArg args[TCG_MAX_OP_ARGS],
122                            const int const_args[TCG_MAX_OP_ARGS]);
123 #else
124 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
125                                    TCGReg dst, TCGReg src)
126 {
127     g_assert_not_reached();
128 }
129 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
130                                     TCGReg dst, TCGReg base, intptr_t offset)
131 {
132     g_assert_not_reached();
133 }
134 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
135                                     TCGReg dst, int64_t arg)
136 {
137     g_assert_not_reached();
138 }
139 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                                   unsigned vecl, unsigned vece,
141                                   const TCGArg args[TCG_MAX_OP_ARGS],
142                                   const int const_args[TCG_MAX_OP_ARGS])
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 #ifdef CONFIG_TCG_INTERPRETER
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
153                          ffi_cif *cif);
154 #else
155 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
156 #endif
157 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
158 #ifdef TCG_TARGET_NEED_LDST_LABELS
159 static int tcg_out_ldst_finalize(TCGContext *s);
160 #endif
161 
162 TCGContext tcg_init_ctx;
163 __thread TCGContext *tcg_ctx;
164 
165 TCGContext **tcg_ctxs;
166 unsigned int tcg_cur_ctxs;
167 unsigned int tcg_max_ctxs;
168 TCGv_env cpu_env = 0;
169 const void *tcg_code_gen_epilogue;
170 uintptr_t tcg_splitwx_diff;
171 
172 #ifndef CONFIG_TCG_INTERPRETER
173 tcg_prologue_fn *tcg_qemu_tb_exec;
174 #endif
175 
176 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
177 static TCGRegSet tcg_target_call_clobber_regs;
178 
179 #if TCG_TARGET_INSN_UNIT_SIZE == 1
180 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
181 {
182     *s->code_ptr++ = v;
183 }
184 
185 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
186                                                       uint8_t v)
187 {
188     *p = v;
189 }
190 #endif
191 
192 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
193 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
194 {
195     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
196         *s->code_ptr++ = v;
197     } else {
198         tcg_insn_unit *p = s->code_ptr;
199         memcpy(p, &v, sizeof(v));
200         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
201     }
202 }
203 
204 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
205                                                        uint16_t v)
206 {
207     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
208         *p = v;
209     } else {
210         memcpy(p, &v, sizeof(v));
211     }
212 }
213 #endif
214 
215 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
216 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
217 {
218     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
219         *s->code_ptr++ = v;
220     } else {
221         tcg_insn_unit *p = s->code_ptr;
222         memcpy(p, &v, sizeof(v));
223         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
224     }
225 }
226 
227 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
228                                                        uint32_t v)
229 {
230     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
231         *p = v;
232     } else {
233         memcpy(p, &v, sizeof(v));
234     }
235 }
236 #endif
237 
238 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
239 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
240 {
241     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
242         *s->code_ptr++ = v;
243     } else {
244         tcg_insn_unit *p = s->code_ptr;
245         memcpy(p, &v, sizeof(v));
246         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
247     }
248 }
249 
250 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
251                                                        uint64_t v)
252 {
253     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
254         *p = v;
255     } else {
256         memcpy(p, &v, sizeof(v));
257     }
258 }
259 #endif
260 
261 /* label relocation processing */
262 
263 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
264                           TCGLabel *l, intptr_t addend)
265 {
266     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
267 
268     r->type = type;
269     r->ptr = code_ptr;
270     r->addend = addend;
271     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
272 }
273 
274 static void tcg_out_label(TCGContext *s, TCGLabel *l)
275 {
276     tcg_debug_assert(!l->has_value);
277     l->has_value = 1;
278     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
279 }
280 
281 TCGLabel *gen_new_label(void)
282 {
283     TCGContext *s = tcg_ctx;
284     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
285 
286     memset(l, 0, sizeof(TCGLabel));
287     l->id = s->nb_labels++;
288     QSIMPLEQ_INIT(&l->relocs);
289 
290     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
291 
292     return l;
293 }
294 
295 static bool tcg_resolve_relocs(TCGContext *s)
296 {
297     TCGLabel *l;
298 
299     QSIMPLEQ_FOREACH(l, &s->labels, next) {
300         TCGRelocation *r;
301         uintptr_t value = l->u.value;
302 
303         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
304             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
305                 return false;
306             }
307         }
308     }
309     return true;
310 }
311 
312 static void set_jmp_reset_offset(TCGContext *s, int which)
313 {
314     /*
315      * We will check for overflow at the end of the opcode loop in
316      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
317      */
318     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
319 }
320 
321 /* Signal overflow, starting over with fewer guest insns. */
322 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
323 {
324     siglongjmp(s->jmp_trans, -2);
325 }
326 
327 #define C_PFX1(P, A)                    P##A
328 #define C_PFX2(P, A, B)                 P##A##_##B
329 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
330 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
331 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
332 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
333 
334 /* Define an enumeration for the various combinations. */
335 
336 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
337 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
338 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
339 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
340 
341 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
342 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
343 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
344 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
345 
346 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
347 
348 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
349 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
350 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
351 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
352 
353 typedef enum {
354 #include "tcg-target-con-set.h"
355 } TCGConstraintSetIndex;
356 
357 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
358 
359 #undef C_O0_I1
360 #undef C_O0_I2
361 #undef C_O0_I3
362 #undef C_O0_I4
363 #undef C_O1_I1
364 #undef C_O1_I2
365 #undef C_O1_I3
366 #undef C_O1_I4
367 #undef C_N1_I2
368 #undef C_O2_I1
369 #undef C_O2_I2
370 #undef C_O2_I3
371 #undef C_O2_I4
372 
373 /* Put all of the constraint sets into an array, indexed by the enum. */
374 
375 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
376 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
377 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
378 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
379 
380 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
381 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
382 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
383 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
384 
385 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
386 
387 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
388 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
389 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
390 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
391 
392 static const TCGTargetOpDef constraint_sets[] = {
393 #include "tcg-target-con-set.h"
394 };
395 
396 
397 #undef C_O0_I1
398 #undef C_O0_I2
399 #undef C_O0_I3
400 #undef C_O0_I4
401 #undef C_O1_I1
402 #undef C_O1_I2
403 #undef C_O1_I3
404 #undef C_O1_I4
405 #undef C_N1_I2
406 #undef C_O2_I1
407 #undef C_O2_I2
408 #undef C_O2_I3
409 #undef C_O2_I4
410 
411 /* Expand the enumerator to be returned from tcg_target_op_def(). */
412 
413 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
414 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
415 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
416 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
417 
418 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
419 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
420 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
421 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
422 
423 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
424 
425 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
426 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
427 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
428 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
429 
430 #include "tcg-target.c.inc"
431 
432 static void alloc_tcg_plugin_context(TCGContext *s)
433 {
434 #ifdef CONFIG_PLUGIN
435     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
436     s->plugin_tb->insns =
437         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
438 #endif
439 }
440 
441 /*
442  * All TCG threads except the parent (i.e. the one that called tcg_context_init
443  * and registered the target's TCG globals) must register with this function
444  * before initiating translation.
445  *
446  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
447  * of tcg_region_init() for the reasoning behind this.
448  *
449  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
450  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
451  * is not used anymore for translation once this function is called.
452  *
453  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
454  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
455  */
456 #ifdef CONFIG_USER_ONLY
457 void tcg_register_thread(void)
458 {
459     tcg_ctx = &tcg_init_ctx;
460 }
461 #else
462 void tcg_register_thread(void)
463 {
464     TCGContext *s = g_malloc(sizeof(*s));
465     unsigned int i, n;
466 
467     *s = tcg_init_ctx;
468 
469     /* Relink mem_base.  */
470     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
471         if (tcg_init_ctx.temps[i].mem_base) {
472             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
473             tcg_debug_assert(b >= 0 && b < n);
474             s->temps[i].mem_base = &s->temps[b];
475         }
476     }
477 
478     /* Claim an entry in tcg_ctxs */
479     n = qatomic_fetch_inc(&tcg_cur_ctxs);
480     g_assert(n < tcg_max_ctxs);
481     qatomic_set(&tcg_ctxs[n], s);
482 
483     if (n > 0) {
484         alloc_tcg_plugin_context(s);
485         tcg_region_initial_alloc(s);
486     }
487 
488     tcg_ctx = s;
489 }
490 #endif /* !CONFIG_USER_ONLY */
491 
492 /* pool based memory allocation */
493 void *tcg_malloc_internal(TCGContext *s, int size)
494 {
495     TCGPool *p;
496     int pool_size;
497 
498     if (size > TCG_POOL_CHUNK_SIZE) {
499         /* big malloc: insert a new pool (XXX: could optimize) */
500         p = g_malloc(sizeof(TCGPool) + size);
501         p->size = size;
502         p->next = s->pool_first_large;
503         s->pool_first_large = p;
504         return p->data;
505     } else {
506         p = s->pool_current;
507         if (!p) {
508             p = s->pool_first;
509             if (!p)
510                 goto new_pool;
511         } else {
512             if (!p->next) {
513             new_pool:
514                 pool_size = TCG_POOL_CHUNK_SIZE;
515                 p = g_malloc(sizeof(TCGPool) + pool_size);
516                 p->size = pool_size;
517                 p->next = NULL;
518                 if (s->pool_current)
519                     s->pool_current->next = p;
520                 else
521                     s->pool_first = p;
522             } else {
523                 p = p->next;
524             }
525         }
526     }
527     s->pool_current = p;
528     s->pool_cur = p->data + size;
529     s->pool_end = p->data + p->size;
530     return p->data;
531 }
532 
533 void tcg_pool_reset(TCGContext *s)
534 {
535     TCGPool *p, *t;
536     for (p = s->pool_first_large; p; p = t) {
537         t = p->next;
538         g_free(p);
539     }
540     s->pool_first_large = NULL;
541     s->pool_cur = s->pool_end = NULL;
542     s->pool_current = NULL;
543 }
544 
545 #include "exec/helper-proto.h"
546 
547 static const TCGHelperInfo all_helpers[] = {
548 #include "exec/helper-tcg.h"
549 };
550 static GHashTable *helper_table;
551 
552 #ifdef CONFIG_TCG_INTERPRETER
553 static GHashTable *ffi_table;
554 
555 static ffi_type * const typecode_to_ffi[8] = {
556     [dh_typecode_void] = &ffi_type_void,
557     [dh_typecode_i32]  = &ffi_type_uint32,
558     [dh_typecode_s32]  = &ffi_type_sint32,
559     [dh_typecode_i64]  = &ffi_type_uint64,
560     [dh_typecode_s64]  = &ffi_type_sint64,
561     [dh_typecode_ptr]  = &ffi_type_pointer,
562 };
563 #endif
564 
565 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
566 static void process_op_defs(TCGContext *s);
567 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
568                                             TCGReg reg, const char *name);
569 
570 static void tcg_context_init(unsigned max_cpus)
571 {
572     TCGContext *s = &tcg_init_ctx;
573     int op, total_args, n, i;
574     TCGOpDef *def;
575     TCGArgConstraint *args_ct;
576     TCGTemp *ts;
577 
578     memset(s, 0, sizeof(*s));
579     s->nb_globals = 0;
580 
581     /* Count total number of arguments and allocate the corresponding
582        space */
583     total_args = 0;
584     for(op = 0; op < NB_OPS; op++) {
585         def = &tcg_op_defs[op];
586         n = def->nb_iargs + def->nb_oargs;
587         total_args += n;
588     }
589 
590     args_ct = g_new0(TCGArgConstraint, total_args);
591 
592     for(op = 0; op < NB_OPS; op++) {
593         def = &tcg_op_defs[op];
594         def->args_ct = args_ct;
595         n = def->nb_iargs + def->nb_oargs;
596         args_ct += n;
597     }
598 
599     /* Register helpers.  */
600     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
601     helper_table = g_hash_table_new(NULL, NULL);
602 
603     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
604         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
605                             (gpointer)&all_helpers[i]);
606     }
607 
608 #ifdef CONFIG_TCG_INTERPRETER
609     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
610     ffi_table = g_hash_table_new(NULL, NULL);
611     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
612         struct {
613             ffi_cif cif;
614             ffi_type *args[];
615         } *ca;
616         uint32_t typemask = all_helpers[i].typemask;
617         gpointer hash = (gpointer)(uintptr_t)typemask;
618         ffi_status status;
619         int nargs;
620 
621         if (g_hash_table_lookup(ffi_table, hash)) {
622             continue;
623         }
624 
625         /* Ignoring the return type, find the last non-zero field. */
626         nargs = 32 - clz32(typemask >> 3);
627         nargs = DIV_ROUND_UP(nargs, 3);
628 
629         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
630         ca->cif.rtype = typecode_to_ffi[typemask & 7];
631         ca->cif.nargs = nargs;
632 
633         if (nargs != 0) {
634             ca->cif.arg_types = ca->args;
635             for (i = 0; i < nargs; ++i) {
636                 int typecode = extract32(typemask, (i + 1) * 3, 3);
637                 ca->args[i] = typecode_to_ffi[typecode];
638             }
639         }
640 
641         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
642                               ca->cif.rtype, ca->cif.arg_types);
643         assert(status == FFI_OK);
644 
645         g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
646     }
647 #endif
648 
649     tcg_target_init(s);
650     process_op_defs(s);
651 
652     /* Reverse the order of the saved registers, assuming they're all at
653        the start of tcg_target_reg_alloc_order.  */
654     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
655         int r = tcg_target_reg_alloc_order[n];
656         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
657             break;
658         }
659     }
660     for (i = 0; i < n; ++i) {
661         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
662     }
663     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
664         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
665     }
666 
667     alloc_tcg_plugin_context(s);
668 
669     tcg_ctx = s;
670     /*
671      * In user-mode we simply share the init context among threads, since we
672      * use a single region. See the documentation tcg_region_init() for the
673      * reasoning behind this.
674      * In softmmu we will have at most max_cpus TCG threads.
675      */
676 #ifdef CONFIG_USER_ONLY
677     tcg_ctxs = &tcg_ctx;
678     tcg_cur_ctxs = 1;
679     tcg_max_ctxs = 1;
680 #else
681     tcg_max_ctxs = max_cpus;
682     tcg_ctxs = g_new0(TCGContext *, max_cpus);
683 #endif
684 
685     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
686     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
687     cpu_env = temp_tcgv_ptr(ts);
688 }
689 
690 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
691 {
692     tcg_context_init(max_cpus);
693     tcg_region_init(tb_size, splitwx, max_cpus);
694 }
695 
696 /*
697  * Allocate TBs right before their corresponding translated code, making
698  * sure that TBs and code are on different cache lines.
699  */
700 TranslationBlock *tcg_tb_alloc(TCGContext *s)
701 {
702     uintptr_t align = qemu_icache_linesize;
703     TranslationBlock *tb;
704     void *next;
705 
706  retry:
707     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
708     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
709 
710     if (unlikely(next > s->code_gen_highwater)) {
711         if (tcg_region_alloc(s)) {
712             return NULL;
713         }
714         goto retry;
715     }
716     qatomic_set(&s->code_gen_ptr, next);
717     s->data_gen_ptr = NULL;
718     return tb;
719 }
720 
721 void tcg_prologue_init(TCGContext *s)
722 {
723     size_t prologue_size;
724 
725     s->code_ptr = s->code_gen_ptr;
726     s->code_buf = s->code_gen_ptr;
727     s->data_gen_ptr = NULL;
728 
729 #ifndef CONFIG_TCG_INTERPRETER
730     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
731 #endif
732 
733 #ifdef TCG_TARGET_NEED_POOL_LABELS
734     s->pool_labels = NULL;
735 #endif
736 
737     qemu_thread_jit_write();
738     /* Generate the prologue.  */
739     tcg_target_qemu_prologue(s);
740 
741 #ifdef TCG_TARGET_NEED_POOL_LABELS
742     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
743     {
744         int result = tcg_out_pool_finalize(s);
745         tcg_debug_assert(result == 0);
746     }
747 #endif
748 
749     prologue_size = tcg_current_code_size(s);
750 
751 #ifndef CONFIG_TCG_INTERPRETER
752     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
753                         (uintptr_t)s->code_buf, prologue_size);
754 #endif
755 
756 #ifdef DEBUG_DISAS
757     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
758         FILE *logfile = qemu_log_lock();
759         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
760         if (s->data_gen_ptr) {
761             size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
762             size_t data_size = prologue_size - code_size;
763             size_t i;
764 
765             log_disas(s->code_gen_ptr, code_size);
766 
767             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
768                 if (sizeof(tcg_target_ulong) == 8) {
769                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
770                              (uintptr_t)s->data_gen_ptr + i,
771                              *(uint64_t *)(s->data_gen_ptr + i));
772                 } else {
773                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
774                              (uintptr_t)s->data_gen_ptr + i,
775                              *(uint32_t *)(s->data_gen_ptr + i));
776                 }
777             }
778         } else {
779             log_disas(s->code_gen_ptr, prologue_size);
780         }
781         qemu_log("\n");
782         qemu_log_flush();
783         qemu_log_unlock(logfile);
784     }
785 #endif
786 
787 #ifndef CONFIG_TCG_INTERPRETER
788     /*
789      * Assert that goto_ptr is implemented completely, setting an epilogue.
790      * For tci, we use NULL as the signal to return from the interpreter,
791      * so skip this check.
792      */
793     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
794 #endif
795 
796     tcg_region_prologue_set(s);
797 }
798 
799 void tcg_func_start(TCGContext *s)
800 {
801     tcg_pool_reset(s);
802     s->nb_temps = s->nb_globals;
803 
804     /* No temps have been previously allocated for size or locality.  */
805     memset(s->free_temps, 0, sizeof(s->free_temps));
806 
807     /* No constant temps have been previously allocated. */
808     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
809         if (s->const_table[i]) {
810             g_hash_table_remove_all(s->const_table[i]);
811         }
812     }
813 
814     s->nb_ops = 0;
815     s->nb_labels = 0;
816     s->current_frame_offset = s->frame_start;
817 
818 #ifdef CONFIG_DEBUG_TCG
819     s->goto_tb_issue_mask = 0;
820 #endif
821 
822     QTAILQ_INIT(&s->ops);
823     QTAILQ_INIT(&s->free_ops);
824     QSIMPLEQ_INIT(&s->labels);
825 }
826 
827 static TCGTemp *tcg_temp_alloc(TCGContext *s)
828 {
829     int n = s->nb_temps++;
830 
831     if (n >= TCG_MAX_TEMPS) {
832         tcg_raise_tb_overflow(s);
833     }
834     return memset(&s->temps[n], 0, sizeof(TCGTemp));
835 }
836 
837 static TCGTemp *tcg_global_alloc(TCGContext *s)
838 {
839     TCGTemp *ts;
840 
841     tcg_debug_assert(s->nb_globals == s->nb_temps);
842     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
843     s->nb_globals++;
844     ts = tcg_temp_alloc(s);
845     ts->kind = TEMP_GLOBAL;
846 
847     return ts;
848 }
849 
850 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
851                                             TCGReg reg, const char *name)
852 {
853     TCGTemp *ts;
854 
855     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
856         tcg_abort();
857     }
858 
859     ts = tcg_global_alloc(s);
860     ts->base_type = type;
861     ts->type = type;
862     ts->kind = TEMP_FIXED;
863     ts->reg = reg;
864     ts->name = name;
865     tcg_regset_set_reg(s->reserved_regs, reg);
866 
867     return ts;
868 }
869 
870 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
871 {
872     s->frame_start = start;
873     s->frame_end = start + size;
874     s->frame_temp
875         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
876 }
877 
878 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
879                                      intptr_t offset, const char *name)
880 {
881     TCGContext *s = tcg_ctx;
882     TCGTemp *base_ts = tcgv_ptr_temp(base);
883     TCGTemp *ts = tcg_global_alloc(s);
884     int indirect_reg = 0, bigendian = 0;
885 #ifdef HOST_WORDS_BIGENDIAN
886     bigendian = 1;
887 #endif
888 
889     switch (base_ts->kind) {
890     case TEMP_FIXED:
891         break;
892     case TEMP_GLOBAL:
893         /* We do not support double-indirect registers.  */
894         tcg_debug_assert(!base_ts->indirect_reg);
895         base_ts->indirect_base = 1;
896         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
897                             ? 2 : 1);
898         indirect_reg = 1;
899         break;
900     default:
901         g_assert_not_reached();
902     }
903 
904     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
905         TCGTemp *ts2 = tcg_global_alloc(s);
906         char buf[64];
907 
908         ts->base_type = TCG_TYPE_I64;
909         ts->type = TCG_TYPE_I32;
910         ts->indirect_reg = indirect_reg;
911         ts->mem_allocated = 1;
912         ts->mem_base = base_ts;
913         ts->mem_offset = offset + bigendian * 4;
914         pstrcpy(buf, sizeof(buf), name);
915         pstrcat(buf, sizeof(buf), "_0");
916         ts->name = strdup(buf);
917 
918         tcg_debug_assert(ts2 == ts + 1);
919         ts2->base_type = TCG_TYPE_I64;
920         ts2->type = TCG_TYPE_I32;
921         ts2->indirect_reg = indirect_reg;
922         ts2->mem_allocated = 1;
923         ts2->mem_base = base_ts;
924         ts2->mem_offset = offset + (1 - bigendian) * 4;
925         pstrcpy(buf, sizeof(buf), name);
926         pstrcat(buf, sizeof(buf), "_1");
927         ts2->name = strdup(buf);
928     } else {
929         ts->base_type = type;
930         ts->type = type;
931         ts->indirect_reg = indirect_reg;
932         ts->mem_allocated = 1;
933         ts->mem_base = base_ts;
934         ts->mem_offset = offset;
935         ts->name = name;
936     }
937     return ts;
938 }
939 
940 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
941 {
942     TCGContext *s = tcg_ctx;
943     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
944     TCGTemp *ts;
945     int idx, k;
946 
947     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
948     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
949     if (idx < TCG_MAX_TEMPS) {
950         /* There is already an available temp with the right type.  */
951         clear_bit(idx, s->free_temps[k].l);
952 
953         ts = &s->temps[idx];
954         ts->temp_allocated = 1;
955         tcg_debug_assert(ts->base_type == type);
956         tcg_debug_assert(ts->kind == kind);
957     } else {
958         ts = tcg_temp_alloc(s);
959         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
960             TCGTemp *ts2 = tcg_temp_alloc(s);
961 
962             ts->base_type = type;
963             ts->type = TCG_TYPE_I32;
964             ts->temp_allocated = 1;
965             ts->kind = kind;
966 
967             tcg_debug_assert(ts2 == ts + 1);
968             ts2->base_type = TCG_TYPE_I64;
969             ts2->type = TCG_TYPE_I32;
970             ts2->temp_allocated = 1;
971             ts2->kind = kind;
972         } else {
973             ts->base_type = type;
974             ts->type = type;
975             ts->temp_allocated = 1;
976             ts->kind = kind;
977         }
978     }
979 
980 #if defined(CONFIG_DEBUG_TCG)
981     s->temps_in_use++;
982 #endif
983     return ts;
984 }
985 
986 TCGv_vec tcg_temp_new_vec(TCGType type)
987 {
988     TCGTemp *t;
989 
990 #ifdef CONFIG_DEBUG_TCG
991     switch (type) {
992     case TCG_TYPE_V64:
993         assert(TCG_TARGET_HAS_v64);
994         break;
995     case TCG_TYPE_V128:
996         assert(TCG_TARGET_HAS_v128);
997         break;
998     case TCG_TYPE_V256:
999         assert(TCG_TARGET_HAS_v256);
1000         break;
1001     default:
1002         g_assert_not_reached();
1003     }
1004 #endif
1005 
1006     t = tcg_temp_new_internal(type, 0);
1007     return temp_tcgv_vec(t);
1008 }
1009 
1010 /* Create a new temp of the same type as an existing temp.  */
1011 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1012 {
1013     TCGTemp *t = tcgv_vec_temp(match);
1014 
1015     tcg_debug_assert(t->temp_allocated != 0);
1016 
1017     t = tcg_temp_new_internal(t->base_type, 0);
1018     return temp_tcgv_vec(t);
1019 }
1020 
1021 void tcg_temp_free_internal(TCGTemp *ts)
1022 {
1023     TCGContext *s = tcg_ctx;
1024     int k, idx;
1025 
1026     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1027     if (ts->kind == TEMP_CONST) {
1028         return;
1029     }
1030 
1031 #if defined(CONFIG_DEBUG_TCG)
1032     s->temps_in_use--;
1033     if (s->temps_in_use < 0) {
1034         fprintf(stderr, "More temporaries freed than allocated!\n");
1035     }
1036 #endif
1037 
1038     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1039     tcg_debug_assert(ts->temp_allocated != 0);
1040     ts->temp_allocated = 0;
1041 
1042     idx = temp_idx(ts);
1043     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1044     set_bit(idx, s->free_temps[k].l);
1045 }
1046 
1047 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1048 {
1049     TCGContext *s = tcg_ctx;
1050     GHashTable *h = s->const_table[type];
1051     TCGTemp *ts;
1052 
1053     if (h == NULL) {
1054         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1055         s->const_table[type] = h;
1056     }
1057 
1058     ts = g_hash_table_lookup(h, &val);
1059     if (ts == NULL) {
1060         ts = tcg_temp_alloc(s);
1061 
1062         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1063             TCGTemp *ts2 = tcg_temp_alloc(s);
1064 
1065             ts->base_type = TCG_TYPE_I64;
1066             ts->type = TCG_TYPE_I32;
1067             ts->kind = TEMP_CONST;
1068             ts->temp_allocated = 1;
1069             /*
1070              * Retain the full value of the 64-bit constant in the low
1071              * part, so that the hash table works.  Actual uses will
1072              * truncate the value to the low part.
1073              */
1074             ts->val = val;
1075 
1076             tcg_debug_assert(ts2 == ts + 1);
1077             ts2->base_type = TCG_TYPE_I64;
1078             ts2->type = TCG_TYPE_I32;
1079             ts2->kind = TEMP_CONST;
1080             ts2->temp_allocated = 1;
1081             ts2->val = val >> 32;
1082         } else {
1083             ts->base_type = type;
1084             ts->type = type;
1085             ts->kind = TEMP_CONST;
1086             ts->temp_allocated = 1;
1087             ts->val = val;
1088         }
1089         g_hash_table_insert(h, &ts->val, ts);
1090     }
1091 
1092     return ts;
1093 }
1094 
1095 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1096 {
1097     val = dup_const(vece, val);
1098     return temp_tcgv_vec(tcg_constant_internal(type, val));
1099 }
1100 
1101 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1102 {
1103     TCGTemp *t = tcgv_vec_temp(match);
1104 
1105     tcg_debug_assert(t->temp_allocated != 0);
1106     return tcg_constant_vec(t->base_type, vece, val);
1107 }
1108 
1109 TCGv_i32 tcg_const_i32(int32_t val)
1110 {
1111     TCGv_i32 t0;
1112     t0 = tcg_temp_new_i32();
1113     tcg_gen_movi_i32(t0, val);
1114     return t0;
1115 }
1116 
1117 TCGv_i64 tcg_const_i64(int64_t val)
1118 {
1119     TCGv_i64 t0;
1120     t0 = tcg_temp_new_i64();
1121     tcg_gen_movi_i64(t0, val);
1122     return t0;
1123 }
1124 
1125 TCGv_i32 tcg_const_local_i32(int32_t val)
1126 {
1127     TCGv_i32 t0;
1128     t0 = tcg_temp_local_new_i32();
1129     tcg_gen_movi_i32(t0, val);
1130     return t0;
1131 }
1132 
1133 TCGv_i64 tcg_const_local_i64(int64_t val)
1134 {
1135     TCGv_i64 t0;
1136     t0 = tcg_temp_local_new_i64();
1137     tcg_gen_movi_i64(t0, val);
1138     return t0;
1139 }
1140 
1141 #if defined(CONFIG_DEBUG_TCG)
1142 void tcg_clear_temp_count(void)
1143 {
1144     TCGContext *s = tcg_ctx;
1145     s->temps_in_use = 0;
1146 }
1147 
1148 int tcg_check_temp_count(void)
1149 {
1150     TCGContext *s = tcg_ctx;
1151     if (s->temps_in_use) {
1152         /* Clear the count so that we don't give another
1153          * warning immediately next time around.
1154          */
1155         s->temps_in_use = 0;
1156         return 1;
1157     }
1158     return 0;
1159 }
1160 #endif
1161 
1162 /* Return true if OP may appear in the opcode stream.
1163    Test the runtime variable that controls each opcode.  */
1164 bool tcg_op_supported(TCGOpcode op)
1165 {
1166     const bool have_vec
1167         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1168 
1169     switch (op) {
1170     case INDEX_op_discard:
1171     case INDEX_op_set_label:
1172     case INDEX_op_call:
1173     case INDEX_op_br:
1174     case INDEX_op_mb:
1175     case INDEX_op_insn_start:
1176     case INDEX_op_exit_tb:
1177     case INDEX_op_goto_tb:
1178     case INDEX_op_goto_ptr:
1179     case INDEX_op_qemu_ld_i32:
1180     case INDEX_op_qemu_st_i32:
1181     case INDEX_op_qemu_ld_i64:
1182     case INDEX_op_qemu_st_i64:
1183         return true;
1184 
1185     case INDEX_op_qemu_st8_i32:
1186         return TCG_TARGET_HAS_qemu_st8_i32;
1187 
1188     case INDEX_op_mov_i32:
1189     case INDEX_op_setcond_i32:
1190     case INDEX_op_brcond_i32:
1191     case INDEX_op_ld8u_i32:
1192     case INDEX_op_ld8s_i32:
1193     case INDEX_op_ld16u_i32:
1194     case INDEX_op_ld16s_i32:
1195     case INDEX_op_ld_i32:
1196     case INDEX_op_st8_i32:
1197     case INDEX_op_st16_i32:
1198     case INDEX_op_st_i32:
1199     case INDEX_op_add_i32:
1200     case INDEX_op_sub_i32:
1201     case INDEX_op_mul_i32:
1202     case INDEX_op_and_i32:
1203     case INDEX_op_or_i32:
1204     case INDEX_op_xor_i32:
1205     case INDEX_op_shl_i32:
1206     case INDEX_op_shr_i32:
1207     case INDEX_op_sar_i32:
1208         return true;
1209 
1210     case INDEX_op_movcond_i32:
1211         return TCG_TARGET_HAS_movcond_i32;
1212     case INDEX_op_div_i32:
1213     case INDEX_op_divu_i32:
1214         return TCG_TARGET_HAS_div_i32;
1215     case INDEX_op_rem_i32:
1216     case INDEX_op_remu_i32:
1217         return TCG_TARGET_HAS_rem_i32;
1218     case INDEX_op_div2_i32:
1219     case INDEX_op_divu2_i32:
1220         return TCG_TARGET_HAS_div2_i32;
1221     case INDEX_op_rotl_i32:
1222     case INDEX_op_rotr_i32:
1223         return TCG_TARGET_HAS_rot_i32;
1224     case INDEX_op_deposit_i32:
1225         return TCG_TARGET_HAS_deposit_i32;
1226     case INDEX_op_extract_i32:
1227         return TCG_TARGET_HAS_extract_i32;
1228     case INDEX_op_sextract_i32:
1229         return TCG_TARGET_HAS_sextract_i32;
1230     case INDEX_op_extract2_i32:
1231         return TCG_TARGET_HAS_extract2_i32;
1232     case INDEX_op_add2_i32:
1233         return TCG_TARGET_HAS_add2_i32;
1234     case INDEX_op_sub2_i32:
1235         return TCG_TARGET_HAS_sub2_i32;
1236     case INDEX_op_mulu2_i32:
1237         return TCG_TARGET_HAS_mulu2_i32;
1238     case INDEX_op_muls2_i32:
1239         return TCG_TARGET_HAS_muls2_i32;
1240     case INDEX_op_muluh_i32:
1241         return TCG_TARGET_HAS_muluh_i32;
1242     case INDEX_op_mulsh_i32:
1243         return TCG_TARGET_HAS_mulsh_i32;
1244     case INDEX_op_ext8s_i32:
1245         return TCG_TARGET_HAS_ext8s_i32;
1246     case INDEX_op_ext16s_i32:
1247         return TCG_TARGET_HAS_ext16s_i32;
1248     case INDEX_op_ext8u_i32:
1249         return TCG_TARGET_HAS_ext8u_i32;
1250     case INDEX_op_ext16u_i32:
1251         return TCG_TARGET_HAS_ext16u_i32;
1252     case INDEX_op_bswap16_i32:
1253         return TCG_TARGET_HAS_bswap16_i32;
1254     case INDEX_op_bswap32_i32:
1255         return TCG_TARGET_HAS_bswap32_i32;
1256     case INDEX_op_not_i32:
1257         return TCG_TARGET_HAS_not_i32;
1258     case INDEX_op_neg_i32:
1259         return TCG_TARGET_HAS_neg_i32;
1260     case INDEX_op_andc_i32:
1261         return TCG_TARGET_HAS_andc_i32;
1262     case INDEX_op_orc_i32:
1263         return TCG_TARGET_HAS_orc_i32;
1264     case INDEX_op_eqv_i32:
1265         return TCG_TARGET_HAS_eqv_i32;
1266     case INDEX_op_nand_i32:
1267         return TCG_TARGET_HAS_nand_i32;
1268     case INDEX_op_nor_i32:
1269         return TCG_TARGET_HAS_nor_i32;
1270     case INDEX_op_clz_i32:
1271         return TCG_TARGET_HAS_clz_i32;
1272     case INDEX_op_ctz_i32:
1273         return TCG_TARGET_HAS_ctz_i32;
1274     case INDEX_op_ctpop_i32:
1275         return TCG_TARGET_HAS_ctpop_i32;
1276 
1277     case INDEX_op_brcond2_i32:
1278     case INDEX_op_setcond2_i32:
1279         return TCG_TARGET_REG_BITS == 32;
1280 
1281     case INDEX_op_mov_i64:
1282     case INDEX_op_setcond_i64:
1283     case INDEX_op_brcond_i64:
1284     case INDEX_op_ld8u_i64:
1285     case INDEX_op_ld8s_i64:
1286     case INDEX_op_ld16u_i64:
1287     case INDEX_op_ld16s_i64:
1288     case INDEX_op_ld32u_i64:
1289     case INDEX_op_ld32s_i64:
1290     case INDEX_op_ld_i64:
1291     case INDEX_op_st8_i64:
1292     case INDEX_op_st16_i64:
1293     case INDEX_op_st32_i64:
1294     case INDEX_op_st_i64:
1295     case INDEX_op_add_i64:
1296     case INDEX_op_sub_i64:
1297     case INDEX_op_mul_i64:
1298     case INDEX_op_and_i64:
1299     case INDEX_op_or_i64:
1300     case INDEX_op_xor_i64:
1301     case INDEX_op_shl_i64:
1302     case INDEX_op_shr_i64:
1303     case INDEX_op_sar_i64:
1304     case INDEX_op_ext_i32_i64:
1305     case INDEX_op_extu_i32_i64:
1306         return TCG_TARGET_REG_BITS == 64;
1307 
1308     case INDEX_op_movcond_i64:
1309         return TCG_TARGET_HAS_movcond_i64;
1310     case INDEX_op_div_i64:
1311     case INDEX_op_divu_i64:
1312         return TCG_TARGET_HAS_div_i64;
1313     case INDEX_op_rem_i64:
1314     case INDEX_op_remu_i64:
1315         return TCG_TARGET_HAS_rem_i64;
1316     case INDEX_op_div2_i64:
1317     case INDEX_op_divu2_i64:
1318         return TCG_TARGET_HAS_div2_i64;
1319     case INDEX_op_rotl_i64:
1320     case INDEX_op_rotr_i64:
1321         return TCG_TARGET_HAS_rot_i64;
1322     case INDEX_op_deposit_i64:
1323         return TCG_TARGET_HAS_deposit_i64;
1324     case INDEX_op_extract_i64:
1325         return TCG_TARGET_HAS_extract_i64;
1326     case INDEX_op_sextract_i64:
1327         return TCG_TARGET_HAS_sextract_i64;
1328     case INDEX_op_extract2_i64:
1329         return TCG_TARGET_HAS_extract2_i64;
1330     case INDEX_op_extrl_i64_i32:
1331         return TCG_TARGET_HAS_extrl_i64_i32;
1332     case INDEX_op_extrh_i64_i32:
1333         return TCG_TARGET_HAS_extrh_i64_i32;
1334     case INDEX_op_ext8s_i64:
1335         return TCG_TARGET_HAS_ext8s_i64;
1336     case INDEX_op_ext16s_i64:
1337         return TCG_TARGET_HAS_ext16s_i64;
1338     case INDEX_op_ext32s_i64:
1339         return TCG_TARGET_HAS_ext32s_i64;
1340     case INDEX_op_ext8u_i64:
1341         return TCG_TARGET_HAS_ext8u_i64;
1342     case INDEX_op_ext16u_i64:
1343         return TCG_TARGET_HAS_ext16u_i64;
1344     case INDEX_op_ext32u_i64:
1345         return TCG_TARGET_HAS_ext32u_i64;
1346     case INDEX_op_bswap16_i64:
1347         return TCG_TARGET_HAS_bswap16_i64;
1348     case INDEX_op_bswap32_i64:
1349         return TCG_TARGET_HAS_bswap32_i64;
1350     case INDEX_op_bswap64_i64:
1351         return TCG_TARGET_HAS_bswap64_i64;
1352     case INDEX_op_not_i64:
1353         return TCG_TARGET_HAS_not_i64;
1354     case INDEX_op_neg_i64:
1355         return TCG_TARGET_HAS_neg_i64;
1356     case INDEX_op_andc_i64:
1357         return TCG_TARGET_HAS_andc_i64;
1358     case INDEX_op_orc_i64:
1359         return TCG_TARGET_HAS_orc_i64;
1360     case INDEX_op_eqv_i64:
1361         return TCG_TARGET_HAS_eqv_i64;
1362     case INDEX_op_nand_i64:
1363         return TCG_TARGET_HAS_nand_i64;
1364     case INDEX_op_nor_i64:
1365         return TCG_TARGET_HAS_nor_i64;
1366     case INDEX_op_clz_i64:
1367         return TCG_TARGET_HAS_clz_i64;
1368     case INDEX_op_ctz_i64:
1369         return TCG_TARGET_HAS_ctz_i64;
1370     case INDEX_op_ctpop_i64:
1371         return TCG_TARGET_HAS_ctpop_i64;
1372     case INDEX_op_add2_i64:
1373         return TCG_TARGET_HAS_add2_i64;
1374     case INDEX_op_sub2_i64:
1375         return TCG_TARGET_HAS_sub2_i64;
1376     case INDEX_op_mulu2_i64:
1377         return TCG_TARGET_HAS_mulu2_i64;
1378     case INDEX_op_muls2_i64:
1379         return TCG_TARGET_HAS_muls2_i64;
1380     case INDEX_op_muluh_i64:
1381         return TCG_TARGET_HAS_muluh_i64;
1382     case INDEX_op_mulsh_i64:
1383         return TCG_TARGET_HAS_mulsh_i64;
1384 
1385     case INDEX_op_mov_vec:
1386     case INDEX_op_dup_vec:
1387     case INDEX_op_dupm_vec:
1388     case INDEX_op_ld_vec:
1389     case INDEX_op_st_vec:
1390     case INDEX_op_add_vec:
1391     case INDEX_op_sub_vec:
1392     case INDEX_op_and_vec:
1393     case INDEX_op_or_vec:
1394     case INDEX_op_xor_vec:
1395     case INDEX_op_cmp_vec:
1396         return have_vec;
1397     case INDEX_op_dup2_vec:
1398         return have_vec && TCG_TARGET_REG_BITS == 32;
1399     case INDEX_op_not_vec:
1400         return have_vec && TCG_TARGET_HAS_not_vec;
1401     case INDEX_op_neg_vec:
1402         return have_vec && TCG_TARGET_HAS_neg_vec;
1403     case INDEX_op_abs_vec:
1404         return have_vec && TCG_TARGET_HAS_abs_vec;
1405     case INDEX_op_andc_vec:
1406         return have_vec && TCG_TARGET_HAS_andc_vec;
1407     case INDEX_op_orc_vec:
1408         return have_vec && TCG_TARGET_HAS_orc_vec;
1409     case INDEX_op_mul_vec:
1410         return have_vec && TCG_TARGET_HAS_mul_vec;
1411     case INDEX_op_shli_vec:
1412     case INDEX_op_shri_vec:
1413     case INDEX_op_sari_vec:
1414         return have_vec && TCG_TARGET_HAS_shi_vec;
1415     case INDEX_op_shls_vec:
1416     case INDEX_op_shrs_vec:
1417     case INDEX_op_sars_vec:
1418         return have_vec && TCG_TARGET_HAS_shs_vec;
1419     case INDEX_op_shlv_vec:
1420     case INDEX_op_shrv_vec:
1421     case INDEX_op_sarv_vec:
1422         return have_vec && TCG_TARGET_HAS_shv_vec;
1423     case INDEX_op_rotli_vec:
1424         return have_vec && TCG_TARGET_HAS_roti_vec;
1425     case INDEX_op_rotls_vec:
1426         return have_vec && TCG_TARGET_HAS_rots_vec;
1427     case INDEX_op_rotlv_vec:
1428     case INDEX_op_rotrv_vec:
1429         return have_vec && TCG_TARGET_HAS_rotv_vec;
1430     case INDEX_op_ssadd_vec:
1431     case INDEX_op_usadd_vec:
1432     case INDEX_op_sssub_vec:
1433     case INDEX_op_ussub_vec:
1434         return have_vec && TCG_TARGET_HAS_sat_vec;
1435     case INDEX_op_smin_vec:
1436     case INDEX_op_umin_vec:
1437     case INDEX_op_smax_vec:
1438     case INDEX_op_umax_vec:
1439         return have_vec && TCG_TARGET_HAS_minmax_vec;
1440     case INDEX_op_bitsel_vec:
1441         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1442     case INDEX_op_cmpsel_vec:
1443         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1444 
1445     default:
1446         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1447         return true;
1448     }
1449 }
1450 
1451 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1452    and endian swap. Maybe it would be better to do the alignment
1453    and endian swap in tcg_reg_alloc_call(). */
1454 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1455 {
1456     int i, real_args, nb_rets, pi;
1457     unsigned typemask;
1458     const TCGHelperInfo *info;
1459     TCGOp *op;
1460 
1461     info = g_hash_table_lookup(helper_table, (gpointer)func);
1462     typemask = info->typemask;
1463 
1464 #ifdef CONFIG_PLUGIN
1465     /* detect non-plugin helpers */
1466     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1467         tcg_ctx->plugin_insn->calls_helpers = true;
1468     }
1469 #endif
1470 
1471 #if defined(__sparc__) && !defined(__arch64__) \
1472     && !defined(CONFIG_TCG_INTERPRETER)
1473     /* We have 64-bit values in one register, but need to pass as two
1474        separate parameters.  Split them.  */
1475     int orig_typemask = typemask;
1476     int orig_nargs = nargs;
1477     TCGv_i64 retl, reth;
1478     TCGTemp *split_args[MAX_OPC_PARAM];
1479 
1480     retl = NULL;
1481     reth = NULL;
1482     typemask = 0;
1483     for (i = real_args = 0; i < nargs; ++i) {
1484         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1485         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1486 
1487         if (is_64bit) {
1488             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1489             TCGv_i32 h = tcg_temp_new_i32();
1490             TCGv_i32 l = tcg_temp_new_i32();
1491             tcg_gen_extr_i64_i32(l, h, orig);
1492             split_args[real_args++] = tcgv_i32_temp(h);
1493             typemask |= dh_typecode_i32 << (real_args * 3);
1494             split_args[real_args++] = tcgv_i32_temp(l);
1495             typemask |= dh_typecode_i32 << (real_args * 3);
1496         } else {
1497             split_args[real_args++] = args[i];
1498             typemask |= argtype << (real_args * 3);
1499         }
1500     }
1501     nargs = real_args;
1502     args = split_args;
1503 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1504     for (i = 0; i < nargs; ++i) {
1505         int argtype = extract32(typemask, (i + 1) * 3, 3);
1506         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1507         bool is_signed = argtype & 1;
1508 
1509         if (is_32bit) {
1510             TCGv_i64 temp = tcg_temp_new_i64();
1511             TCGv_i32 orig = temp_tcgv_i32(args[i]);
1512             if (is_signed) {
1513                 tcg_gen_ext_i32_i64(temp, orig);
1514             } else {
1515                 tcg_gen_extu_i32_i64(temp, orig);
1516             }
1517             args[i] = tcgv_i64_temp(temp);
1518         }
1519     }
1520 #endif /* TCG_TARGET_EXTEND_ARGS */
1521 
1522     op = tcg_emit_op(INDEX_op_call);
1523 
1524     pi = 0;
1525     if (ret != NULL) {
1526 #if defined(__sparc__) && !defined(__arch64__) \
1527     && !defined(CONFIG_TCG_INTERPRETER)
1528         if ((typemask & 6) == dh_typecode_i64) {
1529             /* The 32-bit ABI is going to return the 64-bit value in
1530                the %o0/%o1 register pair.  Prepare for this by using
1531                two return temporaries, and reassemble below.  */
1532             retl = tcg_temp_new_i64();
1533             reth = tcg_temp_new_i64();
1534             op->args[pi++] = tcgv_i64_arg(reth);
1535             op->args[pi++] = tcgv_i64_arg(retl);
1536             nb_rets = 2;
1537         } else {
1538             op->args[pi++] = temp_arg(ret);
1539             nb_rets = 1;
1540         }
1541 #else
1542         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1543 #ifdef HOST_WORDS_BIGENDIAN
1544             op->args[pi++] = temp_arg(ret + 1);
1545             op->args[pi++] = temp_arg(ret);
1546 #else
1547             op->args[pi++] = temp_arg(ret);
1548             op->args[pi++] = temp_arg(ret + 1);
1549 #endif
1550             nb_rets = 2;
1551         } else {
1552             op->args[pi++] = temp_arg(ret);
1553             nb_rets = 1;
1554         }
1555 #endif
1556     } else {
1557         nb_rets = 0;
1558     }
1559     TCGOP_CALLO(op) = nb_rets;
1560 
1561     real_args = 0;
1562     for (i = 0; i < nargs; i++) {
1563         int argtype = extract32(typemask, (i + 1) * 3, 3);
1564         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1565         bool want_align = false;
1566 
1567 #if defined(CONFIG_TCG_INTERPRETER)
1568         /*
1569          * Align all arguments, so that they land in predictable places
1570          * for passing off to ffi_call.
1571          */
1572         want_align = true;
1573 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1574         /* Some targets want aligned 64 bit args */
1575         want_align = is_64bit;
1576 #endif
1577 
1578         if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1579             op->args[pi++] = TCG_CALL_DUMMY_ARG;
1580             real_args++;
1581         }
1582 
1583         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1584             /*
1585              * If stack grows up, then we will be placing successive
1586              * arguments at lower addresses, which means we need to
1587              * reverse the order compared to how we would normally
1588              * treat either big or little-endian.  For those arguments
1589              * that will wind up in registers, this still works for
1590              * HPPA (the only current STACK_GROWSUP target) since the
1591              * argument registers are *also* allocated in decreasing
1592              * order.  If another such target is added, this logic may
1593              * have to get more complicated to differentiate between
1594              * stack arguments and register arguments.
1595              */
1596 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1597             op->args[pi++] = temp_arg(args[i] + 1);
1598             op->args[pi++] = temp_arg(args[i]);
1599 #else
1600             op->args[pi++] = temp_arg(args[i]);
1601             op->args[pi++] = temp_arg(args[i] + 1);
1602 #endif
1603             real_args += 2;
1604             continue;
1605         }
1606 
1607         op->args[pi++] = temp_arg(args[i]);
1608         real_args++;
1609     }
1610     op->args[pi++] = (uintptr_t)func;
1611     op->args[pi++] = (uintptr_t)info;
1612     TCGOP_CALLI(op) = real_args;
1613 
1614     /* Make sure the fields didn't overflow.  */
1615     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1616     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1617 
1618 #if defined(__sparc__) && !defined(__arch64__) \
1619     && !defined(CONFIG_TCG_INTERPRETER)
1620     /* Free all of the parts we allocated above.  */
1621     for (i = real_args = 0; i < orig_nargs; ++i) {
1622         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1623         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1624 
1625         if (is_64bit) {
1626             tcg_temp_free_internal(args[real_args++]);
1627             tcg_temp_free_internal(args[real_args++]);
1628         } else {
1629             real_args++;
1630         }
1631     }
1632     if ((orig_typemask & 6) == dh_typecode_i64) {
1633         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1634            Note that describing these as TCGv_i64 eliminates an unnecessary
1635            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1636         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1637         tcg_temp_free_i64(retl);
1638         tcg_temp_free_i64(reth);
1639     }
1640 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1641     for (i = 0; i < nargs; ++i) {
1642         int argtype = extract32(typemask, (i + 1) * 3, 3);
1643         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1644 
1645         if (is_32bit) {
1646             tcg_temp_free_internal(args[i]);
1647         }
1648     }
1649 #endif /* TCG_TARGET_EXTEND_ARGS */
1650 }
1651 
1652 static void tcg_reg_alloc_start(TCGContext *s)
1653 {
1654     int i, n;
1655 
1656     for (i = 0, n = s->nb_temps; i < n; i++) {
1657         TCGTemp *ts = &s->temps[i];
1658         TCGTempVal val = TEMP_VAL_MEM;
1659 
1660         switch (ts->kind) {
1661         case TEMP_CONST:
1662             val = TEMP_VAL_CONST;
1663             break;
1664         case TEMP_FIXED:
1665             val = TEMP_VAL_REG;
1666             break;
1667         case TEMP_GLOBAL:
1668             break;
1669         case TEMP_NORMAL:
1670             val = TEMP_VAL_DEAD;
1671             /* fall through */
1672         case TEMP_LOCAL:
1673             ts->mem_allocated = 0;
1674             break;
1675         default:
1676             g_assert_not_reached();
1677         }
1678         ts->val_type = val;
1679     }
1680 
1681     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1682 }
1683 
1684 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1685                                  TCGTemp *ts)
1686 {
1687     int idx = temp_idx(ts);
1688 
1689     switch (ts->kind) {
1690     case TEMP_FIXED:
1691     case TEMP_GLOBAL:
1692         pstrcpy(buf, buf_size, ts->name);
1693         break;
1694     case TEMP_LOCAL:
1695         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1696         break;
1697     case TEMP_NORMAL:
1698         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1699         break;
1700     case TEMP_CONST:
1701         switch (ts->type) {
1702         case TCG_TYPE_I32:
1703             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1704             break;
1705 #if TCG_TARGET_REG_BITS > 32
1706         case TCG_TYPE_I64:
1707             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1708             break;
1709 #endif
1710         case TCG_TYPE_V64:
1711         case TCG_TYPE_V128:
1712         case TCG_TYPE_V256:
1713             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1714                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1715             break;
1716         default:
1717             g_assert_not_reached();
1718         }
1719         break;
1720     }
1721     return buf;
1722 }
1723 
1724 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1725                              int buf_size, TCGArg arg)
1726 {
1727     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1728 }
1729 
1730 static const char * const cond_name[] =
1731 {
1732     [TCG_COND_NEVER] = "never",
1733     [TCG_COND_ALWAYS] = "always",
1734     [TCG_COND_EQ] = "eq",
1735     [TCG_COND_NE] = "ne",
1736     [TCG_COND_LT] = "lt",
1737     [TCG_COND_GE] = "ge",
1738     [TCG_COND_LE] = "le",
1739     [TCG_COND_GT] = "gt",
1740     [TCG_COND_LTU] = "ltu",
1741     [TCG_COND_GEU] = "geu",
1742     [TCG_COND_LEU] = "leu",
1743     [TCG_COND_GTU] = "gtu"
1744 };
1745 
1746 static const char * const ldst_name[] =
1747 {
1748     [MO_UB]   = "ub",
1749     [MO_SB]   = "sb",
1750     [MO_LEUW] = "leuw",
1751     [MO_LESW] = "lesw",
1752     [MO_LEUL] = "leul",
1753     [MO_LESL] = "lesl",
1754     [MO_LEUQ] = "leq",
1755     [MO_BEUW] = "beuw",
1756     [MO_BESW] = "besw",
1757     [MO_BEUL] = "beul",
1758     [MO_BESL] = "besl",
1759     [MO_BEUQ] = "beq",
1760 };
1761 
1762 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1763 #ifdef TARGET_ALIGNED_ONLY
1764     [MO_UNALN >> MO_ASHIFT]    = "un+",
1765     [MO_ALIGN >> MO_ASHIFT]    = "",
1766 #else
1767     [MO_UNALN >> MO_ASHIFT]    = "",
1768     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1769 #endif
1770     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1771     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1772     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1773     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1774     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1775     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1776 };
1777 
1778 static const char bswap_flag_name[][6] = {
1779     [TCG_BSWAP_IZ] = "iz",
1780     [TCG_BSWAP_OZ] = "oz",
1781     [TCG_BSWAP_OS] = "os",
1782     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1783     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1784 };
1785 
1786 static inline bool tcg_regset_single(TCGRegSet d)
1787 {
1788     return (d & (d - 1)) == 0;
1789 }
1790 
1791 static inline TCGReg tcg_regset_first(TCGRegSet d)
1792 {
1793     if (TCG_TARGET_NB_REGS <= 32) {
1794         return ctz32(d);
1795     } else {
1796         return ctz64(d);
1797     }
1798 }
1799 
1800 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1801 {
1802     char buf[128];
1803     TCGOp *op;
1804 
1805     QTAILQ_FOREACH(op, &s->ops, link) {
1806         int i, k, nb_oargs, nb_iargs, nb_cargs;
1807         const TCGOpDef *def;
1808         TCGOpcode c;
1809         int col = 0;
1810 
1811         c = op->opc;
1812         def = &tcg_op_defs[c];
1813 
1814         if (c == INDEX_op_insn_start) {
1815             nb_oargs = 0;
1816             col += qemu_log("\n ----");
1817 
1818             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1819                 target_ulong a;
1820 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1821                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1822 #else
1823                 a = op->args[i];
1824 #endif
1825                 col += qemu_log(" " TARGET_FMT_lx, a);
1826             }
1827         } else if (c == INDEX_op_call) {
1828             const TCGHelperInfo *info = tcg_call_info(op);
1829             void *func = tcg_call_func(op);
1830 
1831             /* variable number of arguments */
1832             nb_oargs = TCGOP_CALLO(op);
1833             nb_iargs = TCGOP_CALLI(op);
1834             nb_cargs = def->nb_cargs;
1835 
1836             col += qemu_log(" %s ", def->name);
1837 
1838             /*
1839              * Print the function name from TCGHelperInfo, if available.
1840              * Note that plugins have a template function for the info,
1841              * but the actual function pointer comes from the plugin.
1842              */
1843             if (func == info->func) {
1844                 col += qemu_log("%s", info->name);
1845             } else {
1846                 col += qemu_log("plugin(%p)", func);
1847             }
1848 
1849             col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
1850             for (i = 0; i < nb_oargs; i++) {
1851                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1852                                                        op->args[i]));
1853             }
1854             for (i = 0; i < nb_iargs; i++) {
1855                 TCGArg arg = op->args[nb_oargs + i];
1856                 const char *t = "<dummy>";
1857                 if (arg != TCG_CALL_DUMMY_ARG) {
1858                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1859                 }
1860                 col += qemu_log(",%s", t);
1861             }
1862         } else {
1863             col += qemu_log(" %s ", def->name);
1864 
1865             nb_oargs = def->nb_oargs;
1866             nb_iargs = def->nb_iargs;
1867             nb_cargs = def->nb_cargs;
1868 
1869             if (def->flags & TCG_OPF_VECTOR) {
1870                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1871                                 8 << TCGOP_VECE(op));
1872             }
1873 
1874             k = 0;
1875             for (i = 0; i < nb_oargs; i++) {
1876                 if (k != 0) {
1877                     col += qemu_log(",");
1878                 }
1879                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1880                                                       op->args[k++]));
1881             }
1882             for (i = 0; i < nb_iargs; i++) {
1883                 if (k != 0) {
1884                     col += qemu_log(",");
1885                 }
1886                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1887                                                       op->args[k++]));
1888             }
1889             switch (c) {
1890             case INDEX_op_brcond_i32:
1891             case INDEX_op_setcond_i32:
1892             case INDEX_op_movcond_i32:
1893             case INDEX_op_brcond2_i32:
1894             case INDEX_op_setcond2_i32:
1895             case INDEX_op_brcond_i64:
1896             case INDEX_op_setcond_i64:
1897             case INDEX_op_movcond_i64:
1898             case INDEX_op_cmp_vec:
1899             case INDEX_op_cmpsel_vec:
1900                 if (op->args[k] < ARRAY_SIZE(cond_name)
1901                     && cond_name[op->args[k]]) {
1902                     col += qemu_log(",%s", cond_name[op->args[k++]]);
1903                 } else {
1904                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1905                 }
1906                 i = 1;
1907                 break;
1908             case INDEX_op_qemu_ld_i32:
1909             case INDEX_op_qemu_st_i32:
1910             case INDEX_op_qemu_st8_i32:
1911             case INDEX_op_qemu_ld_i64:
1912             case INDEX_op_qemu_st_i64:
1913                 {
1914                     MemOpIdx oi = op->args[k++];
1915                     MemOp op = get_memop(oi);
1916                     unsigned ix = get_mmuidx(oi);
1917 
1918                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1919                         col += qemu_log(",$0x%x,%u", op, ix);
1920                     } else {
1921                         const char *s_al, *s_op;
1922                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1923                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1924                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1925                     }
1926                     i = 1;
1927                 }
1928                 break;
1929             case INDEX_op_bswap16_i32:
1930             case INDEX_op_bswap16_i64:
1931             case INDEX_op_bswap32_i32:
1932             case INDEX_op_bswap32_i64:
1933             case INDEX_op_bswap64_i64:
1934                 {
1935                     TCGArg flags = op->args[k];
1936                     const char *name = NULL;
1937 
1938                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
1939                         name = bswap_flag_name[flags];
1940                     }
1941                     if (name) {
1942                         col += qemu_log(",%s", name);
1943                     } else {
1944                         col += qemu_log(",$0x%" TCG_PRIlx, flags);
1945                     }
1946                     i = k = 1;
1947                 }
1948                 break;
1949             default:
1950                 i = 0;
1951                 break;
1952             }
1953             switch (c) {
1954             case INDEX_op_set_label:
1955             case INDEX_op_br:
1956             case INDEX_op_brcond_i32:
1957             case INDEX_op_brcond_i64:
1958             case INDEX_op_brcond2_i32:
1959                 col += qemu_log("%s$L%d", k ? "," : "",
1960                                 arg_label(op->args[k])->id);
1961                 i++, k++;
1962                 break;
1963             default:
1964                 break;
1965             }
1966             for (; i < nb_cargs; i++, k++) {
1967                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1968             }
1969         }
1970 
1971         if (have_prefs || op->life) {
1972 
1973             QemuLogFile *logfile;
1974 
1975             rcu_read_lock();
1976             logfile = qatomic_rcu_read(&qemu_logfile);
1977             if (logfile) {
1978                 for (; col < 40; ++col) {
1979                     putc(' ', logfile->fd);
1980                 }
1981             }
1982             rcu_read_unlock();
1983         }
1984 
1985         if (op->life) {
1986             unsigned life = op->life;
1987 
1988             if (life & (SYNC_ARG * 3)) {
1989                 qemu_log("  sync:");
1990                 for (i = 0; i < 2; ++i) {
1991                     if (life & (SYNC_ARG << i)) {
1992                         qemu_log(" %d", i);
1993                     }
1994                 }
1995             }
1996             life /= DEAD_ARG;
1997             if (life) {
1998                 qemu_log("  dead:");
1999                 for (i = 0; life; ++i, life >>= 1) {
2000                     if (life & 1) {
2001                         qemu_log(" %d", i);
2002                     }
2003                 }
2004             }
2005         }
2006 
2007         if (have_prefs) {
2008             for (i = 0; i < nb_oargs; ++i) {
2009                 TCGRegSet set = op->output_pref[i];
2010 
2011                 if (i == 0) {
2012                     qemu_log("  pref=");
2013                 } else {
2014                     qemu_log(",");
2015                 }
2016                 if (set == 0) {
2017                     qemu_log("none");
2018                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2019                     qemu_log("all");
2020 #ifdef CONFIG_DEBUG_TCG
2021                 } else if (tcg_regset_single(set)) {
2022                     TCGReg reg = tcg_regset_first(set);
2023                     qemu_log("%s", tcg_target_reg_names[reg]);
2024 #endif
2025                 } else if (TCG_TARGET_NB_REGS <= 32) {
2026                     qemu_log("%#x", (uint32_t)set);
2027                 } else {
2028                     qemu_log("%#" PRIx64, (uint64_t)set);
2029                 }
2030             }
2031         }
2032 
2033         qemu_log("\n");
2034     }
2035 }
2036 
2037 /* we give more priority to constraints with less registers */
2038 static int get_constraint_priority(const TCGOpDef *def, int k)
2039 {
2040     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2041     int n;
2042 
2043     if (arg_ct->oalias) {
2044         /* an alias is equivalent to a single register */
2045         n = 1;
2046     } else {
2047         n = ctpop64(arg_ct->regs);
2048     }
2049     return TCG_TARGET_NB_REGS - n + 1;
2050 }
2051 
2052 /* sort from highest priority to lowest */
2053 static void sort_constraints(TCGOpDef *def, int start, int n)
2054 {
2055     int i, j;
2056     TCGArgConstraint *a = def->args_ct;
2057 
2058     for (i = 0; i < n; i++) {
2059         a[start + i].sort_index = start + i;
2060     }
2061     if (n <= 1) {
2062         return;
2063     }
2064     for (i = 0; i < n - 1; i++) {
2065         for (j = i + 1; j < n; j++) {
2066             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2067             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2068             if (p1 < p2) {
2069                 int tmp = a[start + i].sort_index;
2070                 a[start + i].sort_index = a[start + j].sort_index;
2071                 a[start + j].sort_index = tmp;
2072             }
2073         }
2074     }
2075 }
2076 
2077 static void process_op_defs(TCGContext *s)
2078 {
2079     TCGOpcode op;
2080 
2081     for (op = 0; op < NB_OPS; op++) {
2082         TCGOpDef *def = &tcg_op_defs[op];
2083         const TCGTargetOpDef *tdefs;
2084         int i, nb_args;
2085 
2086         if (def->flags & TCG_OPF_NOT_PRESENT) {
2087             continue;
2088         }
2089 
2090         nb_args = def->nb_iargs + def->nb_oargs;
2091         if (nb_args == 0) {
2092             continue;
2093         }
2094 
2095         /*
2096          * Macro magic should make it impossible, but double-check that
2097          * the array index is in range.  Since the signness of an enum
2098          * is implementation defined, force the result to unsigned.
2099          */
2100         unsigned con_set = tcg_target_op_def(op);
2101         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2102         tdefs = &constraint_sets[con_set];
2103 
2104         for (i = 0; i < nb_args; i++) {
2105             const char *ct_str = tdefs->args_ct_str[i];
2106             /* Incomplete TCGTargetOpDef entry. */
2107             tcg_debug_assert(ct_str != NULL);
2108 
2109             while (*ct_str != '\0') {
2110                 switch(*ct_str) {
2111                 case '0' ... '9':
2112                     {
2113                         int oarg = *ct_str - '0';
2114                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2115                         tcg_debug_assert(oarg < def->nb_oargs);
2116                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2117                         def->args_ct[i] = def->args_ct[oarg];
2118                         /* The output sets oalias.  */
2119                         def->args_ct[oarg].oalias = true;
2120                         def->args_ct[oarg].alias_index = i;
2121                         /* The input sets ialias. */
2122                         def->args_ct[i].ialias = true;
2123                         def->args_ct[i].alias_index = oarg;
2124                     }
2125                     ct_str++;
2126                     break;
2127                 case '&':
2128                     def->args_ct[i].newreg = true;
2129                     ct_str++;
2130                     break;
2131                 case 'i':
2132                     def->args_ct[i].ct |= TCG_CT_CONST;
2133                     ct_str++;
2134                     break;
2135 
2136                 /* Include all of the target-specific constraints. */
2137 
2138 #undef CONST
2139 #define CONST(CASE, MASK) \
2140     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2141 #define REGS(CASE, MASK) \
2142     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2143 
2144 #include "tcg-target-con-str.h"
2145 
2146 #undef REGS
2147 #undef CONST
2148                 default:
2149                     /* Typo in TCGTargetOpDef constraint. */
2150                     g_assert_not_reached();
2151                 }
2152             }
2153         }
2154 
2155         /* TCGTargetOpDef entry with too much information? */
2156         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2157 
2158         /* sort the constraints (XXX: this is just an heuristic) */
2159         sort_constraints(def, 0, def->nb_oargs);
2160         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2161     }
2162 }
2163 
2164 void tcg_op_remove(TCGContext *s, TCGOp *op)
2165 {
2166     TCGLabel *label;
2167 
2168     switch (op->opc) {
2169     case INDEX_op_br:
2170         label = arg_label(op->args[0]);
2171         label->refs--;
2172         break;
2173     case INDEX_op_brcond_i32:
2174     case INDEX_op_brcond_i64:
2175         label = arg_label(op->args[3]);
2176         label->refs--;
2177         break;
2178     case INDEX_op_brcond2_i32:
2179         label = arg_label(op->args[5]);
2180         label->refs--;
2181         break;
2182     default:
2183         break;
2184     }
2185 
2186     QTAILQ_REMOVE(&s->ops, op, link);
2187     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2188     s->nb_ops--;
2189 
2190 #ifdef CONFIG_PROFILER
2191     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2192 #endif
2193 }
2194 
2195 void tcg_remove_ops_after(TCGOp *op)
2196 {
2197     TCGContext *s = tcg_ctx;
2198 
2199     while (true) {
2200         TCGOp *last = tcg_last_op();
2201         if (last == op) {
2202             return;
2203         }
2204         tcg_op_remove(s, last);
2205     }
2206 }
2207 
2208 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2209 {
2210     TCGContext *s = tcg_ctx;
2211     TCGOp *op;
2212 
2213     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2214         op = tcg_malloc(sizeof(TCGOp));
2215     } else {
2216         op = QTAILQ_FIRST(&s->free_ops);
2217         QTAILQ_REMOVE(&s->free_ops, op, link);
2218     }
2219     memset(op, 0, offsetof(TCGOp, link));
2220     op->opc = opc;
2221     s->nb_ops++;
2222 
2223     return op;
2224 }
2225 
2226 TCGOp *tcg_emit_op(TCGOpcode opc)
2227 {
2228     TCGOp *op = tcg_op_alloc(opc);
2229     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2230     return op;
2231 }
2232 
2233 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2234 {
2235     TCGOp *new_op = tcg_op_alloc(opc);
2236     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2237     return new_op;
2238 }
2239 
2240 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2241 {
2242     TCGOp *new_op = tcg_op_alloc(opc);
2243     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2244     return new_op;
2245 }
2246 
2247 /* Reachable analysis : remove unreachable code.  */
2248 static void reachable_code_pass(TCGContext *s)
2249 {
2250     TCGOp *op, *op_next;
2251     bool dead = false;
2252 
2253     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2254         bool remove = dead;
2255         TCGLabel *label;
2256 
2257         switch (op->opc) {
2258         case INDEX_op_set_label:
2259             label = arg_label(op->args[0]);
2260             if (label->refs == 0) {
2261                 /*
2262                  * While there is an occasional backward branch, virtually
2263                  * all branches generated by the translators are forward.
2264                  * Which means that generally we will have already removed
2265                  * all references to the label that will be, and there is
2266                  * little to be gained by iterating.
2267                  */
2268                 remove = true;
2269             } else {
2270                 /* Once we see a label, insns become live again.  */
2271                 dead = false;
2272                 remove = false;
2273 
2274                 /*
2275                  * Optimization can fold conditional branches to unconditional.
2276                  * If we find a label with one reference which is preceded by
2277                  * an unconditional branch to it, remove both.  This needed to
2278                  * wait until the dead code in between them was removed.
2279                  */
2280                 if (label->refs == 1) {
2281                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2282                     if (op_prev->opc == INDEX_op_br &&
2283                         label == arg_label(op_prev->args[0])) {
2284                         tcg_op_remove(s, op_prev);
2285                         remove = true;
2286                     }
2287                 }
2288             }
2289             break;
2290 
2291         case INDEX_op_br:
2292         case INDEX_op_exit_tb:
2293         case INDEX_op_goto_ptr:
2294             /* Unconditional branches; everything following is dead.  */
2295             dead = true;
2296             break;
2297 
2298         case INDEX_op_call:
2299             /* Notice noreturn helper calls, raising exceptions.  */
2300             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2301                 dead = true;
2302             }
2303             break;
2304 
2305         case INDEX_op_insn_start:
2306             /* Never remove -- we need to keep these for unwind.  */
2307             remove = false;
2308             break;
2309 
2310         default:
2311             break;
2312         }
2313 
2314         if (remove) {
2315             tcg_op_remove(s, op);
2316         }
2317     }
2318 }
2319 
2320 #define TS_DEAD  1
2321 #define TS_MEM   2
2322 
2323 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2324 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2325 
2326 /* For liveness_pass_1, the register preferences for a given temp.  */
2327 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2328 {
2329     return ts->state_ptr;
2330 }
2331 
2332 /* For liveness_pass_1, reset the preferences for a given temp to the
2333  * maximal regset for its type.
2334  */
2335 static inline void la_reset_pref(TCGTemp *ts)
2336 {
2337     *la_temp_pref(ts)
2338         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2339 }
2340 
2341 /* liveness analysis: end of function: all temps are dead, and globals
2342    should be in memory. */
2343 static void la_func_end(TCGContext *s, int ng, int nt)
2344 {
2345     int i;
2346 
2347     for (i = 0; i < ng; ++i) {
2348         s->temps[i].state = TS_DEAD | TS_MEM;
2349         la_reset_pref(&s->temps[i]);
2350     }
2351     for (i = ng; i < nt; ++i) {
2352         s->temps[i].state = TS_DEAD;
2353         la_reset_pref(&s->temps[i]);
2354     }
2355 }
2356 
2357 /* liveness analysis: end of basic block: all temps are dead, globals
2358    and local temps should be in memory. */
2359 static void la_bb_end(TCGContext *s, int ng, int nt)
2360 {
2361     int i;
2362 
2363     for (i = 0; i < nt; ++i) {
2364         TCGTemp *ts = &s->temps[i];
2365         int state;
2366 
2367         switch (ts->kind) {
2368         case TEMP_FIXED:
2369         case TEMP_GLOBAL:
2370         case TEMP_LOCAL:
2371             state = TS_DEAD | TS_MEM;
2372             break;
2373         case TEMP_NORMAL:
2374         case TEMP_CONST:
2375             state = TS_DEAD;
2376             break;
2377         default:
2378             g_assert_not_reached();
2379         }
2380         ts->state = state;
2381         la_reset_pref(ts);
2382     }
2383 }
2384 
2385 /* liveness analysis: sync globals back to memory.  */
2386 static void la_global_sync(TCGContext *s, int ng)
2387 {
2388     int i;
2389 
2390     for (i = 0; i < ng; ++i) {
2391         int state = s->temps[i].state;
2392         s->temps[i].state = state | TS_MEM;
2393         if (state == TS_DEAD) {
2394             /* If the global was previously dead, reset prefs.  */
2395             la_reset_pref(&s->temps[i]);
2396         }
2397     }
2398 }
2399 
2400 /*
2401  * liveness analysis: conditional branch: all temps are dead,
2402  * globals and local temps should be synced.
2403  */
2404 static void la_bb_sync(TCGContext *s, int ng, int nt)
2405 {
2406     la_global_sync(s, ng);
2407 
2408     for (int i = ng; i < nt; ++i) {
2409         TCGTemp *ts = &s->temps[i];
2410         int state;
2411 
2412         switch (ts->kind) {
2413         case TEMP_LOCAL:
2414             state = ts->state;
2415             ts->state = state | TS_MEM;
2416             if (state != TS_DEAD) {
2417                 continue;
2418             }
2419             break;
2420         case TEMP_NORMAL:
2421             s->temps[i].state = TS_DEAD;
2422             break;
2423         case TEMP_CONST:
2424             continue;
2425         default:
2426             g_assert_not_reached();
2427         }
2428         la_reset_pref(&s->temps[i]);
2429     }
2430 }
2431 
2432 /* liveness analysis: sync globals back to memory and kill.  */
2433 static void la_global_kill(TCGContext *s, int ng)
2434 {
2435     int i;
2436 
2437     for (i = 0; i < ng; i++) {
2438         s->temps[i].state = TS_DEAD | TS_MEM;
2439         la_reset_pref(&s->temps[i]);
2440     }
2441 }
2442 
2443 /* liveness analysis: note live globals crossing calls.  */
2444 static void la_cross_call(TCGContext *s, int nt)
2445 {
2446     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2447     int i;
2448 
2449     for (i = 0; i < nt; i++) {
2450         TCGTemp *ts = &s->temps[i];
2451         if (!(ts->state & TS_DEAD)) {
2452             TCGRegSet *pset = la_temp_pref(ts);
2453             TCGRegSet set = *pset;
2454 
2455             set &= mask;
2456             /* If the combination is not possible, restart.  */
2457             if (set == 0) {
2458                 set = tcg_target_available_regs[ts->type] & mask;
2459             }
2460             *pset = set;
2461         }
2462     }
2463 }
2464 
2465 /* Liveness analysis : update the opc_arg_life array to tell if a
2466    given input arguments is dead. Instructions updating dead
2467    temporaries are removed. */
2468 static void liveness_pass_1(TCGContext *s)
2469 {
2470     int nb_globals = s->nb_globals;
2471     int nb_temps = s->nb_temps;
2472     TCGOp *op, *op_prev;
2473     TCGRegSet *prefs;
2474     int i;
2475 
2476     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2477     for (i = 0; i < nb_temps; ++i) {
2478         s->temps[i].state_ptr = prefs + i;
2479     }
2480 
2481     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2482     la_func_end(s, nb_globals, nb_temps);
2483 
2484     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2485         int nb_iargs, nb_oargs;
2486         TCGOpcode opc_new, opc_new2;
2487         bool have_opc_new2;
2488         TCGLifeData arg_life = 0;
2489         TCGTemp *ts;
2490         TCGOpcode opc = op->opc;
2491         const TCGOpDef *def = &tcg_op_defs[opc];
2492 
2493         switch (opc) {
2494         case INDEX_op_call:
2495             {
2496                 int call_flags;
2497                 int nb_call_regs;
2498 
2499                 nb_oargs = TCGOP_CALLO(op);
2500                 nb_iargs = TCGOP_CALLI(op);
2501                 call_flags = tcg_call_flags(op);
2502 
2503                 /* pure functions can be removed if their result is unused */
2504                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2505                     for (i = 0; i < nb_oargs; i++) {
2506                         ts = arg_temp(op->args[i]);
2507                         if (ts->state != TS_DEAD) {
2508                             goto do_not_remove_call;
2509                         }
2510                     }
2511                     goto do_remove;
2512                 }
2513             do_not_remove_call:
2514 
2515                 /* Output args are dead.  */
2516                 for (i = 0; i < nb_oargs; i++) {
2517                     ts = arg_temp(op->args[i]);
2518                     if (ts->state & TS_DEAD) {
2519                         arg_life |= DEAD_ARG << i;
2520                     }
2521                     if (ts->state & TS_MEM) {
2522                         arg_life |= SYNC_ARG << i;
2523                     }
2524                     ts->state = TS_DEAD;
2525                     la_reset_pref(ts);
2526 
2527                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2528                     op->output_pref[i] = 0;
2529                 }
2530 
2531                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2532                                     TCG_CALL_NO_READ_GLOBALS))) {
2533                     la_global_kill(s, nb_globals);
2534                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2535                     la_global_sync(s, nb_globals);
2536                 }
2537 
2538                 /* Record arguments that die in this helper.  */
2539                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2540                     ts = arg_temp(op->args[i]);
2541                     if (ts && ts->state & TS_DEAD) {
2542                         arg_life |= DEAD_ARG << i;
2543                     }
2544                 }
2545 
2546                 /* For all live registers, remove call-clobbered prefs.  */
2547                 la_cross_call(s, nb_temps);
2548 
2549                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2550 
2551                 /* Input arguments are live for preceding opcodes.  */
2552                 for (i = 0; i < nb_iargs; i++) {
2553                     ts = arg_temp(op->args[i + nb_oargs]);
2554                     if (ts && ts->state & TS_DEAD) {
2555                         /* For those arguments that die, and will be allocated
2556                          * in registers, clear the register set for that arg,
2557                          * to be filled in below.  For args that will be on
2558                          * the stack, reset to any available reg.
2559                          */
2560                         *la_temp_pref(ts)
2561                             = (i < nb_call_regs ? 0 :
2562                                tcg_target_available_regs[ts->type]);
2563                         ts->state &= ~TS_DEAD;
2564                     }
2565                 }
2566 
2567                 /* For each input argument, add its input register to prefs.
2568                    If a temp is used once, this produces a single set bit.  */
2569                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2570                     ts = arg_temp(op->args[i + nb_oargs]);
2571                     if (ts) {
2572                         tcg_regset_set_reg(*la_temp_pref(ts),
2573                                            tcg_target_call_iarg_regs[i]);
2574                     }
2575                 }
2576             }
2577             break;
2578         case INDEX_op_insn_start:
2579             break;
2580         case INDEX_op_discard:
2581             /* mark the temporary as dead */
2582             ts = arg_temp(op->args[0]);
2583             ts->state = TS_DEAD;
2584             la_reset_pref(ts);
2585             break;
2586 
2587         case INDEX_op_add2_i32:
2588             opc_new = INDEX_op_add_i32;
2589             goto do_addsub2;
2590         case INDEX_op_sub2_i32:
2591             opc_new = INDEX_op_sub_i32;
2592             goto do_addsub2;
2593         case INDEX_op_add2_i64:
2594             opc_new = INDEX_op_add_i64;
2595             goto do_addsub2;
2596         case INDEX_op_sub2_i64:
2597             opc_new = INDEX_op_sub_i64;
2598         do_addsub2:
2599             nb_iargs = 4;
2600             nb_oargs = 2;
2601             /* Test if the high part of the operation is dead, but not
2602                the low part.  The result can be optimized to a simple
2603                add or sub.  This happens often for x86_64 guest when the
2604                cpu mode is set to 32 bit.  */
2605             if (arg_temp(op->args[1])->state == TS_DEAD) {
2606                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2607                     goto do_remove;
2608                 }
2609                 /* Replace the opcode and adjust the args in place,
2610                    leaving 3 unused args at the end.  */
2611                 op->opc = opc = opc_new;
2612                 op->args[1] = op->args[2];
2613                 op->args[2] = op->args[4];
2614                 /* Fall through and mark the single-word operation live.  */
2615                 nb_iargs = 2;
2616                 nb_oargs = 1;
2617             }
2618             goto do_not_remove;
2619 
2620         case INDEX_op_mulu2_i32:
2621             opc_new = INDEX_op_mul_i32;
2622             opc_new2 = INDEX_op_muluh_i32;
2623             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2624             goto do_mul2;
2625         case INDEX_op_muls2_i32:
2626             opc_new = INDEX_op_mul_i32;
2627             opc_new2 = INDEX_op_mulsh_i32;
2628             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2629             goto do_mul2;
2630         case INDEX_op_mulu2_i64:
2631             opc_new = INDEX_op_mul_i64;
2632             opc_new2 = INDEX_op_muluh_i64;
2633             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2634             goto do_mul2;
2635         case INDEX_op_muls2_i64:
2636             opc_new = INDEX_op_mul_i64;
2637             opc_new2 = INDEX_op_mulsh_i64;
2638             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2639             goto do_mul2;
2640         do_mul2:
2641             nb_iargs = 2;
2642             nb_oargs = 2;
2643             if (arg_temp(op->args[1])->state == TS_DEAD) {
2644                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2645                     /* Both parts of the operation are dead.  */
2646                     goto do_remove;
2647                 }
2648                 /* The high part of the operation is dead; generate the low. */
2649                 op->opc = opc = opc_new;
2650                 op->args[1] = op->args[2];
2651                 op->args[2] = op->args[3];
2652             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2653                 /* The low part of the operation is dead; generate the high. */
2654                 op->opc = opc = opc_new2;
2655                 op->args[0] = op->args[1];
2656                 op->args[1] = op->args[2];
2657                 op->args[2] = op->args[3];
2658             } else {
2659                 goto do_not_remove;
2660             }
2661             /* Mark the single-word operation live.  */
2662             nb_oargs = 1;
2663             goto do_not_remove;
2664 
2665         default:
2666             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2667             nb_iargs = def->nb_iargs;
2668             nb_oargs = def->nb_oargs;
2669 
2670             /* Test if the operation can be removed because all
2671                its outputs are dead. We assume that nb_oargs == 0
2672                implies side effects */
2673             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2674                 for (i = 0; i < nb_oargs; i++) {
2675                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2676                         goto do_not_remove;
2677                     }
2678                 }
2679                 goto do_remove;
2680             }
2681             goto do_not_remove;
2682 
2683         do_remove:
2684             tcg_op_remove(s, op);
2685             break;
2686 
2687         do_not_remove:
2688             for (i = 0; i < nb_oargs; i++) {
2689                 ts = arg_temp(op->args[i]);
2690 
2691                 /* Remember the preference of the uses that followed.  */
2692                 op->output_pref[i] = *la_temp_pref(ts);
2693 
2694                 /* Output args are dead.  */
2695                 if (ts->state & TS_DEAD) {
2696                     arg_life |= DEAD_ARG << i;
2697                 }
2698                 if (ts->state & TS_MEM) {
2699                     arg_life |= SYNC_ARG << i;
2700                 }
2701                 ts->state = TS_DEAD;
2702                 la_reset_pref(ts);
2703             }
2704 
2705             /* If end of basic block, update.  */
2706             if (def->flags & TCG_OPF_BB_EXIT) {
2707                 la_func_end(s, nb_globals, nb_temps);
2708             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2709                 la_bb_sync(s, nb_globals, nb_temps);
2710             } else if (def->flags & TCG_OPF_BB_END) {
2711                 la_bb_end(s, nb_globals, nb_temps);
2712             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2713                 la_global_sync(s, nb_globals);
2714                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2715                     la_cross_call(s, nb_temps);
2716                 }
2717             }
2718 
2719             /* Record arguments that die in this opcode.  */
2720             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2721                 ts = arg_temp(op->args[i]);
2722                 if (ts->state & TS_DEAD) {
2723                     arg_life |= DEAD_ARG << i;
2724                 }
2725             }
2726 
2727             /* Input arguments are live for preceding opcodes.  */
2728             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2729                 ts = arg_temp(op->args[i]);
2730                 if (ts->state & TS_DEAD) {
2731                     /* For operands that were dead, initially allow
2732                        all regs for the type.  */
2733                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2734                     ts->state &= ~TS_DEAD;
2735                 }
2736             }
2737 
2738             /* Incorporate constraints for this operand.  */
2739             switch (opc) {
2740             case INDEX_op_mov_i32:
2741             case INDEX_op_mov_i64:
2742                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2743                    have proper constraints.  That said, special case
2744                    moves to propagate preferences backward.  */
2745                 if (IS_DEAD_ARG(1)) {
2746                     *la_temp_pref(arg_temp(op->args[0]))
2747                         = *la_temp_pref(arg_temp(op->args[1]));
2748                 }
2749                 break;
2750 
2751             default:
2752                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2753                     const TCGArgConstraint *ct = &def->args_ct[i];
2754                     TCGRegSet set, *pset;
2755 
2756                     ts = arg_temp(op->args[i]);
2757                     pset = la_temp_pref(ts);
2758                     set = *pset;
2759 
2760                     set &= ct->regs;
2761                     if (ct->ialias) {
2762                         set &= op->output_pref[ct->alias_index];
2763                     }
2764                     /* If the combination is not possible, restart.  */
2765                     if (set == 0) {
2766                         set = ct->regs;
2767                     }
2768                     *pset = set;
2769                 }
2770                 break;
2771             }
2772             break;
2773         }
2774         op->life = arg_life;
2775     }
2776 }
2777 
2778 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2779 static bool liveness_pass_2(TCGContext *s)
2780 {
2781     int nb_globals = s->nb_globals;
2782     int nb_temps, i;
2783     bool changes = false;
2784     TCGOp *op, *op_next;
2785 
2786     /* Create a temporary for each indirect global.  */
2787     for (i = 0; i < nb_globals; ++i) {
2788         TCGTemp *its = &s->temps[i];
2789         if (its->indirect_reg) {
2790             TCGTemp *dts = tcg_temp_alloc(s);
2791             dts->type = its->type;
2792             dts->base_type = its->base_type;
2793             its->state_ptr = dts;
2794         } else {
2795             its->state_ptr = NULL;
2796         }
2797         /* All globals begin dead.  */
2798         its->state = TS_DEAD;
2799     }
2800     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2801         TCGTemp *its = &s->temps[i];
2802         its->state_ptr = NULL;
2803         its->state = TS_DEAD;
2804     }
2805 
2806     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2807         TCGOpcode opc = op->opc;
2808         const TCGOpDef *def = &tcg_op_defs[opc];
2809         TCGLifeData arg_life = op->life;
2810         int nb_iargs, nb_oargs, call_flags;
2811         TCGTemp *arg_ts, *dir_ts;
2812 
2813         if (opc == INDEX_op_call) {
2814             nb_oargs = TCGOP_CALLO(op);
2815             nb_iargs = TCGOP_CALLI(op);
2816             call_flags = tcg_call_flags(op);
2817         } else {
2818             nb_iargs = def->nb_iargs;
2819             nb_oargs = def->nb_oargs;
2820 
2821             /* Set flags similar to how calls require.  */
2822             if (def->flags & TCG_OPF_COND_BRANCH) {
2823                 /* Like reading globals: sync_globals */
2824                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2825             } else if (def->flags & TCG_OPF_BB_END) {
2826                 /* Like writing globals: save_globals */
2827                 call_flags = 0;
2828             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2829                 /* Like reading globals: sync_globals */
2830                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2831             } else {
2832                 /* No effect on globals.  */
2833                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2834                               TCG_CALL_NO_WRITE_GLOBALS);
2835             }
2836         }
2837 
2838         /* Make sure that input arguments are available.  */
2839         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2840             arg_ts = arg_temp(op->args[i]);
2841             if (arg_ts) {
2842                 dir_ts = arg_ts->state_ptr;
2843                 if (dir_ts && arg_ts->state == TS_DEAD) {
2844                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2845                                       ? INDEX_op_ld_i32
2846                                       : INDEX_op_ld_i64);
2847                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2848 
2849                     lop->args[0] = temp_arg(dir_ts);
2850                     lop->args[1] = temp_arg(arg_ts->mem_base);
2851                     lop->args[2] = arg_ts->mem_offset;
2852 
2853                     /* Loaded, but synced with memory.  */
2854                     arg_ts->state = TS_MEM;
2855                 }
2856             }
2857         }
2858 
2859         /* Perform input replacement, and mark inputs that became dead.
2860            No action is required except keeping temp_state up to date
2861            so that we reload when needed.  */
2862         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2863             arg_ts = arg_temp(op->args[i]);
2864             if (arg_ts) {
2865                 dir_ts = arg_ts->state_ptr;
2866                 if (dir_ts) {
2867                     op->args[i] = temp_arg(dir_ts);
2868                     changes = true;
2869                     if (IS_DEAD_ARG(i)) {
2870                         arg_ts->state = TS_DEAD;
2871                     }
2872                 }
2873             }
2874         }
2875 
2876         /* Liveness analysis should ensure that the following are
2877            all correct, for call sites and basic block end points.  */
2878         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2879             /* Nothing to do */
2880         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2881             for (i = 0; i < nb_globals; ++i) {
2882                 /* Liveness should see that globals are synced back,
2883                    that is, either TS_DEAD or TS_MEM.  */
2884                 arg_ts = &s->temps[i];
2885                 tcg_debug_assert(arg_ts->state_ptr == 0
2886                                  || arg_ts->state != 0);
2887             }
2888         } else {
2889             for (i = 0; i < nb_globals; ++i) {
2890                 /* Liveness should see that globals are saved back,
2891                    that is, TS_DEAD, waiting to be reloaded.  */
2892                 arg_ts = &s->temps[i];
2893                 tcg_debug_assert(arg_ts->state_ptr == 0
2894                                  || arg_ts->state == TS_DEAD);
2895             }
2896         }
2897 
2898         /* Outputs become available.  */
2899         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2900             arg_ts = arg_temp(op->args[0]);
2901             dir_ts = arg_ts->state_ptr;
2902             if (dir_ts) {
2903                 op->args[0] = temp_arg(dir_ts);
2904                 changes = true;
2905 
2906                 /* The output is now live and modified.  */
2907                 arg_ts->state = 0;
2908 
2909                 if (NEED_SYNC_ARG(0)) {
2910                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2911                                       ? INDEX_op_st_i32
2912                                       : INDEX_op_st_i64);
2913                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2914                     TCGTemp *out_ts = dir_ts;
2915 
2916                     if (IS_DEAD_ARG(0)) {
2917                         out_ts = arg_temp(op->args[1]);
2918                         arg_ts->state = TS_DEAD;
2919                         tcg_op_remove(s, op);
2920                     } else {
2921                         arg_ts->state = TS_MEM;
2922                     }
2923 
2924                     sop->args[0] = temp_arg(out_ts);
2925                     sop->args[1] = temp_arg(arg_ts->mem_base);
2926                     sop->args[2] = arg_ts->mem_offset;
2927                 } else {
2928                     tcg_debug_assert(!IS_DEAD_ARG(0));
2929                 }
2930             }
2931         } else {
2932             for (i = 0; i < nb_oargs; i++) {
2933                 arg_ts = arg_temp(op->args[i]);
2934                 dir_ts = arg_ts->state_ptr;
2935                 if (!dir_ts) {
2936                     continue;
2937                 }
2938                 op->args[i] = temp_arg(dir_ts);
2939                 changes = true;
2940 
2941                 /* The output is now live and modified.  */
2942                 arg_ts->state = 0;
2943 
2944                 /* Sync outputs upon their last write.  */
2945                 if (NEED_SYNC_ARG(i)) {
2946                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2947                                       ? INDEX_op_st_i32
2948                                       : INDEX_op_st_i64);
2949                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2950 
2951                     sop->args[0] = temp_arg(dir_ts);
2952                     sop->args[1] = temp_arg(arg_ts->mem_base);
2953                     sop->args[2] = arg_ts->mem_offset;
2954 
2955                     arg_ts->state = TS_MEM;
2956                 }
2957                 /* Drop outputs that are dead.  */
2958                 if (IS_DEAD_ARG(i)) {
2959                     arg_ts->state = TS_DEAD;
2960                 }
2961             }
2962         }
2963     }
2964 
2965     return changes;
2966 }
2967 
2968 #ifdef CONFIG_DEBUG_TCG
2969 static void dump_regs(TCGContext *s)
2970 {
2971     TCGTemp *ts;
2972     int i;
2973     char buf[64];
2974 
2975     for(i = 0; i < s->nb_temps; i++) {
2976         ts = &s->temps[i];
2977         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2978         switch(ts->val_type) {
2979         case TEMP_VAL_REG:
2980             printf("%s", tcg_target_reg_names[ts->reg]);
2981             break;
2982         case TEMP_VAL_MEM:
2983             printf("%d(%s)", (int)ts->mem_offset,
2984                    tcg_target_reg_names[ts->mem_base->reg]);
2985             break;
2986         case TEMP_VAL_CONST:
2987             printf("$0x%" PRIx64, ts->val);
2988             break;
2989         case TEMP_VAL_DEAD:
2990             printf("D");
2991             break;
2992         default:
2993             printf("???");
2994             break;
2995         }
2996         printf("\n");
2997     }
2998 
2999     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3000         if (s->reg_to_temp[i] != NULL) {
3001             printf("%s: %s\n",
3002                    tcg_target_reg_names[i],
3003                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3004         }
3005     }
3006 }
3007 
3008 static void check_regs(TCGContext *s)
3009 {
3010     int reg;
3011     int k;
3012     TCGTemp *ts;
3013     char buf[64];
3014 
3015     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3016         ts = s->reg_to_temp[reg];
3017         if (ts != NULL) {
3018             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3019                 printf("Inconsistency for register %s:\n",
3020                        tcg_target_reg_names[reg]);
3021                 goto fail;
3022             }
3023         }
3024     }
3025     for (k = 0; k < s->nb_temps; k++) {
3026         ts = &s->temps[k];
3027         if (ts->val_type == TEMP_VAL_REG
3028             && ts->kind != TEMP_FIXED
3029             && s->reg_to_temp[ts->reg] != ts) {
3030             printf("Inconsistency for temp %s:\n",
3031                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3032         fail:
3033             printf("reg state:\n");
3034             dump_regs(s);
3035             tcg_abort();
3036         }
3037     }
3038 }
3039 #endif
3040 
3041 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3042 {
3043     intptr_t off, size, align;
3044 
3045     switch (ts->type) {
3046     case TCG_TYPE_I32:
3047         size = align = 4;
3048         break;
3049     case TCG_TYPE_I64:
3050     case TCG_TYPE_V64:
3051         size = align = 8;
3052         break;
3053     case TCG_TYPE_V128:
3054         size = align = 16;
3055         break;
3056     case TCG_TYPE_V256:
3057         /* Note that we do not require aligned storage for V256. */
3058         size = 32, align = 16;
3059         break;
3060     default:
3061         g_assert_not_reached();
3062     }
3063 
3064     /*
3065      * Assume the stack is sufficiently aligned.
3066      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3067      * and do not require 16 byte vector alignment.  This seems slightly
3068      * easier than fully parameterizing the above switch statement.
3069      */
3070     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3071     off = ROUND_UP(s->current_frame_offset, align);
3072 
3073     /* If we've exhausted the stack frame, restart with a smaller TB. */
3074     if (off + size > s->frame_end) {
3075         tcg_raise_tb_overflow(s);
3076     }
3077     s->current_frame_offset = off + size;
3078 
3079     ts->mem_offset = off;
3080 #if defined(__sparc__)
3081     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3082 #endif
3083     ts->mem_base = s->frame_temp;
3084     ts->mem_allocated = 1;
3085 }
3086 
3087 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3088 
3089 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3090    mark it free; otherwise mark it dead.  */
3091 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3092 {
3093     TCGTempVal new_type;
3094 
3095     switch (ts->kind) {
3096     case TEMP_FIXED:
3097         return;
3098     case TEMP_GLOBAL:
3099     case TEMP_LOCAL:
3100         new_type = TEMP_VAL_MEM;
3101         break;
3102     case TEMP_NORMAL:
3103         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3104         break;
3105     case TEMP_CONST:
3106         new_type = TEMP_VAL_CONST;
3107         break;
3108     default:
3109         g_assert_not_reached();
3110     }
3111     if (ts->val_type == TEMP_VAL_REG) {
3112         s->reg_to_temp[ts->reg] = NULL;
3113     }
3114     ts->val_type = new_type;
3115 }
3116 
3117 /* Mark a temporary as dead.  */
3118 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3119 {
3120     temp_free_or_dead(s, ts, 1);
3121 }
3122 
3123 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3124    registers needs to be allocated to store a constant.  If 'free_or_dead'
3125    is non-zero, subsequently release the temporary; if it is positive, the
3126    temp is dead; if it is negative, the temp is free.  */
3127 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3128                       TCGRegSet preferred_regs, int free_or_dead)
3129 {
3130     if (!temp_readonly(ts) && !ts->mem_coherent) {
3131         if (!ts->mem_allocated) {
3132             temp_allocate_frame(s, ts);
3133         }
3134         switch (ts->val_type) {
3135         case TEMP_VAL_CONST:
3136             /* If we're going to free the temp immediately, then we won't
3137                require it later in a register, so attempt to store the
3138                constant to memory directly.  */
3139             if (free_or_dead
3140                 && tcg_out_sti(s, ts->type, ts->val,
3141                                ts->mem_base->reg, ts->mem_offset)) {
3142                 break;
3143             }
3144             temp_load(s, ts, tcg_target_available_regs[ts->type],
3145                       allocated_regs, preferred_regs);
3146             /* fallthrough */
3147 
3148         case TEMP_VAL_REG:
3149             tcg_out_st(s, ts->type, ts->reg,
3150                        ts->mem_base->reg, ts->mem_offset);
3151             break;
3152 
3153         case TEMP_VAL_MEM:
3154             break;
3155 
3156         case TEMP_VAL_DEAD:
3157         default:
3158             tcg_abort();
3159         }
3160         ts->mem_coherent = 1;
3161     }
3162     if (free_or_dead) {
3163         temp_free_or_dead(s, ts, free_or_dead);
3164     }
3165 }
3166 
3167 /* free register 'reg' by spilling the corresponding temporary if necessary */
3168 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3169 {
3170     TCGTemp *ts = s->reg_to_temp[reg];
3171     if (ts != NULL) {
3172         temp_sync(s, ts, allocated_regs, 0, -1);
3173     }
3174 }
3175 
3176 /**
3177  * tcg_reg_alloc:
3178  * @required_regs: Set of registers in which we must allocate.
3179  * @allocated_regs: Set of registers which must be avoided.
3180  * @preferred_regs: Set of registers we should prefer.
3181  * @rev: True if we search the registers in "indirect" order.
3182  *
3183  * The allocated register must be in @required_regs & ~@allocated_regs,
3184  * but if we can put it in @preferred_regs we may save a move later.
3185  */
3186 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3187                             TCGRegSet allocated_regs,
3188                             TCGRegSet preferred_regs, bool rev)
3189 {
3190     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3191     TCGRegSet reg_ct[2];
3192     const int *order;
3193 
3194     reg_ct[1] = required_regs & ~allocated_regs;
3195     tcg_debug_assert(reg_ct[1] != 0);
3196     reg_ct[0] = reg_ct[1] & preferred_regs;
3197 
3198     /* Skip the preferred_regs option if it cannot be satisfied,
3199        or if the preference made no difference.  */
3200     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3201 
3202     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3203 
3204     /* Try free registers, preferences first.  */
3205     for (j = f; j < 2; j++) {
3206         TCGRegSet set = reg_ct[j];
3207 
3208         if (tcg_regset_single(set)) {
3209             /* One register in the set.  */
3210             TCGReg reg = tcg_regset_first(set);
3211             if (s->reg_to_temp[reg] == NULL) {
3212                 return reg;
3213             }
3214         } else {
3215             for (i = 0; i < n; i++) {
3216                 TCGReg reg = order[i];
3217                 if (s->reg_to_temp[reg] == NULL &&
3218                     tcg_regset_test_reg(set, reg)) {
3219                     return reg;
3220                 }
3221             }
3222         }
3223     }
3224 
3225     /* We must spill something.  */
3226     for (j = f; j < 2; j++) {
3227         TCGRegSet set = reg_ct[j];
3228 
3229         if (tcg_regset_single(set)) {
3230             /* One register in the set.  */
3231             TCGReg reg = tcg_regset_first(set);
3232             tcg_reg_free(s, reg, allocated_regs);
3233             return reg;
3234         } else {
3235             for (i = 0; i < n; i++) {
3236                 TCGReg reg = order[i];
3237                 if (tcg_regset_test_reg(set, reg)) {
3238                     tcg_reg_free(s, reg, allocated_regs);
3239                     return reg;
3240                 }
3241             }
3242         }
3243     }
3244 
3245     tcg_abort();
3246 }
3247 
3248 /* Make sure the temporary is in a register.  If needed, allocate the register
3249    from DESIRED while avoiding ALLOCATED.  */
3250 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3251                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3252 {
3253     TCGReg reg;
3254 
3255     switch (ts->val_type) {
3256     case TEMP_VAL_REG:
3257         return;
3258     case TEMP_VAL_CONST:
3259         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3260                             preferred_regs, ts->indirect_base);
3261         if (ts->type <= TCG_TYPE_I64) {
3262             tcg_out_movi(s, ts->type, reg, ts->val);
3263         } else {
3264             uint64_t val = ts->val;
3265             MemOp vece = MO_64;
3266 
3267             /*
3268              * Find the minimal vector element that matches the constant.
3269              * The targets will, in general, have to do this search anyway,
3270              * do this generically.
3271              */
3272             if (val == dup_const(MO_8, val)) {
3273                 vece = MO_8;
3274             } else if (val == dup_const(MO_16, val)) {
3275                 vece = MO_16;
3276             } else if (val == dup_const(MO_32, val)) {
3277                 vece = MO_32;
3278             }
3279 
3280             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3281         }
3282         ts->mem_coherent = 0;
3283         break;
3284     case TEMP_VAL_MEM:
3285         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3286                             preferred_regs, ts->indirect_base);
3287         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3288         ts->mem_coherent = 1;
3289         break;
3290     case TEMP_VAL_DEAD:
3291     default:
3292         tcg_abort();
3293     }
3294     ts->reg = reg;
3295     ts->val_type = TEMP_VAL_REG;
3296     s->reg_to_temp[reg] = ts;
3297 }
3298 
3299 /* Save a temporary to memory. 'allocated_regs' is used in case a
3300    temporary registers needs to be allocated to store a constant.  */
3301 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3302 {
3303     /* The liveness analysis already ensures that globals are back
3304        in memory. Keep an tcg_debug_assert for safety. */
3305     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3306 }
3307 
3308 /* save globals to their canonical location and assume they can be
3309    modified be the following code. 'allocated_regs' is used in case a
3310    temporary registers needs to be allocated to store a constant. */
3311 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3312 {
3313     int i, n;
3314 
3315     for (i = 0, n = s->nb_globals; i < n; i++) {
3316         temp_save(s, &s->temps[i], allocated_regs);
3317     }
3318 }
3319 
3320 /* sync globals to their canonical location and assume they can be
3321    read by the following code. 'allocated_regs' is used in case a
3322    temporary registers needs to be allocated to store a constant. */
3323 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3324 {
3325     int i, n;
3326 
3327     for (i = 0, n = s->nb_globals; i < n; i++) {
3328         TCGTemp *ts = &s->temps[i];
3329         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3330                          || ts->kind == TEMP_FIXED
3331                          || ts->mem_coherent);
3332     }
3333 }
3334 
3335 /* at the end of a basic block, we assume all temporaries are dead and
3336    all globals are stored at their canonical location. */
3337 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3338 {
3339     int i;
3340 
3341     for (i = s->nb_globals; i < s->nb_temps; i++) {
3342         TCGTemp *ts = &s->temps[i];
3343 
3344         switch (ts->kind) {
3345         case TEMP_LOCAL:
3346             temp_save(s, ts, allocated_regs);
3347             break;
3348         case TEMP_NORMAL:
3349             /* The liveness analysis already ensures that temps are dead.
3350                Keep an tcg_debug_assert for safety. */
3351             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3352             break;
3353         case TEMP_CONST:
3354             /* Similarly, we should have freed any allocated register. */
3355             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3356             break;
3357         default:
3358             g_assert_not_reached();
3359         }
3360     }
3361 
3362     save_globals(s, allocated_regs);
3363 }
3364 
3365 /*
3366  * At a conditional branch, we assume all temporaries are dead and
3367  * all globals and local temps are synced to their location.
3368  */
3369 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3370 {
3371     sync_globals(s, allocated_regs);
3372 
3373     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3374         TCGTemp *ts = &s->temps[i];
3375         /*
3376          * The liveness analysis already ensures that temps are dead.
3377          * Keep tcg_debug_asserts for safety.
3378          */
3379         switch (ts->kind) {
3380         case TEMP_LOCAL:
3381             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3382             break;
3383         case TEMP_NORMAL:
3384             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3385             break;
3386         case TEMP_CONST:
3387             break;
3388         default:
3389             g_assert_not_reached();
3390         }
3391     }
3392 }
3393 
3394 /*
3395  * Specialized code generation for INDEX_op_mov_* with a constant.
3396  */
3397 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3398                                   tcg_target_ulong val, TCGLifeData arg_life,
3399                                   TCGRegSet preferred_regs)
3400 {
3401     /* ENV should not be modified.  */
3402     tcg_debug_assert(!temp_readonly(ots));
3403 
3404     /* The movi is not explicitly generated here.  */
3405     if (ots->val_type == TEMP_VAL_REG) {
3406         s->reg_to_temp[ots->reg] = NULL;
3407     }
3408     ots->val_type = TEMP_VAL_CONST;
3409     ots->val = val;
3410     ots->mem_coherent = 0;
3411     if (NEED_SYNC_ARG(0)) {
3412         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3413     } else if (IS_DEAD_ARG(0)) {
3414         temp_dead(s, ots);
3415     }
3416 }
3417 
3418 /*
3419  * Specialized code generation for INDEX_op_mov_*.
3420  */
3421 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3422 {
3423     const TCGLifeData arg_life = op->life;
3424     TCGRegSet allocated_regs, preferred_regs;
3425     TCGTemp *ts, *ots;
3426     TCGType otype, itype;
3427 
3428     allocated_regs = s->reserved_regs;
3429     preferred_regs = op->output_pref[0];
3430     ots = arg_temp(op->args[0]);
3431     ts = arg_temp(op->args[1]);
3432 
3433     /* ENV should not be modified.  */
3434     tcg_debug_assert(!temp_readonly(ots));
3435 
3436     /* Note that otype != itype for no-op truncation.  */
3437     otype = ots->type;
3438     itype = ts->type;
3439 
3440     if (ts->val_type == TEMP_VAL_CONST) {
3441         /* propagate constant or generate sti */
3442         tcg_target_ulong val = ts->val;
3443         if (IS_DEAD_ARG(1)) {
3444             temp_dead(s, ts);
3445         }
3446         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3447         return;
3448     }
3449 
3450     /* If the source value is in memory we're going to be forced
3451        to have it in a register in order to perform the copy.  Copy
3452        the SOURCE value into its own register first, that way we
3453        don't have to reload SOURCE the next time it is used. */
3454     if (ts->val_type == TEMP_VAL_MEM) {
3455         temp_load(s, ts, tcg_target_available_regs[itype],
3456                   allocated_regs, preferred_regs);
3457     }
3458 
3459     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3460     if (IS_DEAD_ARG(0)) {
3461         /* mov to a non-saved dead register makes no sense (even with
3462            liveness analysis disabled). */
3463         tcg_debug_assert(NEED_SYNC_ARG(0));
3464         if (!ots->mem_allocated) {
3465             temp_allocate_frame(s, ots);
3466         }
3467         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3468         if (IS_DEAD_ARG(1)) {
3469             temp_dead(s, ts);
3470         }
3471         temp_dead(s, ots);
3472     } else {
3473         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3474             /* the mov can be suppressed */
3475             if (ots->val_type == TEMP_VAL_REG) {
3476                 s->reg_to_temp[ots->reg] = NULL;
3477             }
3478             ots->reg = ts->reg;
3479             temp_dead(s, ts);
3480         } else {
3481             if (ots->val_type != TEMP_VAL_REG) {
3482                 /* When allocating a new register, make sure to not spill the
3483                    input one. */
3484                 tcg_regset_set_reg(allocated_regs, ts->reg);
3485                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3486                                          allocated_regs, preferred_regs,
3487                                          ots->indirect_base);
3488             }
3489             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3490                 /*
3491                  * Cross register class move not supported.
3492                  * Store the source register into the destination slot
3493                  * and leave the destination temp as TEMP_VAL_MEM.
3494                  */
3495                 assert(!temp_readonly(ots));
3496                 if (!ts->mem_allocated) {
3497                     temp_allocate_frame(s, ots);
3498                 }
3499                 tcg_out_st(s, ts->type, ts->reg,
3500                            ots->mem_base->reg, ots->mem_offset);
3501                 ots->mem_coherent = 1;
3502                 temp_free_or_dead(s, ots, -1);
3503                 return;
3504             }
3505         }
3506         ots->val_type = TEMP_VAL_REG;
3507         ots->mem_coherent = 0;
3508         s->reg_to_temp[ots->reg] = ots;
3509         if (NEED_SYNC_ARG(0)) {
3510             temp_sync(s, ots, allocated_regs, 0, 0);
3511         }
3512     }
3513 }
3514 
3515 /*
3516  * Specialized code generation for INDEX_op_dup_vec.
3517  */
3518 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3519 {
3520     const TCGLifeData arg_life = op->life;
3521     TCGRegSet dup_out_regs, dup_in_regs;
3522     TCGTemp *its, *ots;
3523     TCGType itype, vtype;
3524     intptr_t endian_fixup;
3525     unsigned vece;
3526     bool ok;
3527 
3528     ots = arg_temp(op->args[0]);
3529     its = arg_temp(op->args[1]);
3530 
3531     /* ENV should not be modified.  */
3532     tcg_debug_assert(!temp_readonly(ots));
3533 
3534     itype = its->type;
3535     vece = TCGOP_VECE(op);
3536     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3537 
3538     if (its->val_type == TEMP_VAL_CONST) {
3539         /* Propagate constant via movi -> dupi.  */
3540         tcg_target_ulong val = its->val;
3541         if (IS_DEAD_ARG(1)) {
3542             temp_dead(s, its);
3543         }
3544         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3545         return;
3546     }
3547 
3548     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3549     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3550 
3551     /* Allocate the output register now.  */
3552     if (ots->val_type != TEMP_VAL_REG) {
3553         TCGRegSet allocated_regs = s->reserved_regs;
3554 
3555         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3556             /* Make sure to not spill the input register. */
3557             tcg_regset_set_reg(allocated_regs, its->reg);
3558         }
3559         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3560                                  op->output_pref[0], ots->indirect_base);
3561         ots->val_type = TEMP_VAL_REG;
3562         ots->mem_coherent = 0;
3563         s->reg_to_temp[ots->reg] = ots;
3564     }
3565 
3566     switch (its->val_type) {
3567     case TEMP_VAL_REG:
3568         /*
3569          * The dup constriaints must be broad, covering all possible VECE.
3570          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3571          * to fail, indicating that extra moves are required for that case.
3572          */
3573         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3574             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3575                 goto done;
3576             }
3577             /* Try again from memory or a vector input register.  */
3578         }
3579         if (!its->mem_coherent) {
3580             /*
3581              * The input register is not synced, and so an extra store
3582              * would be required to use memory.  Attempt an integer-vector
3583              * register move first.  We do not have a TCGRegSet for this.
3584              */
3585             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3586                 break;
3587             }
3588             /* Sync the temp back to its slot and load from there.  */
3589             temp_sync(s, its, s->reserved_regs, 0, 0);
3590         }
3591         /* fall through */
3592 
3593     case TEMP_VAL_MEM:
3594 #ifdef HOST_WORDS_BIGENDIAN
3595         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3596         endian_fixup -= 1 << vece;
3597 #else
3598         endian_fixup = 0;
3599 #endif
3600         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3601                              its->mem_offset + endian_fixup)) {
3602             goto done;
3603         }
3604         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3605         break;
3606 
3607     default:
3608         g_assert_not_reached();
3609     }
3610 
3611     /* We now have a vector input register, so dup must succeed. */
3612     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3613     tcg_debug_assert(ok);
3614 
3615  done:
3616     if (IS_DEAD_ARG(1)) {
3617         temp_dead(s, its);
3618     }
3619     if (NEED_SYNC_ARG(0)) {
3620         temp_sync(s, ots, s->reserved_regs, 0, 0);
3621     }
3622     if (IS_DEAD_ARG(0)) {
3623         temp_dead(s, ots);
3624     }
3625 }
3626 
3627 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3628 {
3629     const TCGLifeData arg_life = op->life;
3630     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3631     TCGRegSet i_allocated_regs;
3632     TCGRegSet o_allocated_regs;
3633     int i, k, nb_iargs, nb_oargs;
3634     TCGReg reg;
3635     TCGArg arg;
3636     const TCGArgConstraint *arg_ct;
3637     TCGTemp *ts;
3638     TCGArg new_args[TCG_MAX_OP_ARGS];
3639     int const_args[TCG_MAX_OP_ARGS];
3640 
3641     nb_oargs = def->nb_oargs;
3642     nb_iargs = def->nb_iargs;
3643 
3644     /* copy constants */
3645     memcpy(new_args + nb_oargs + nb_iargs,
3646            op->args + nb_oargs + nb_iargs,
3647            sizeof(TCGArg) * def->nb_cargs);
3648 
3649     i_allocated_regs = s->reserved_regs;
3650     o_allocated_regs = s->reserved_regs;
3651 
3652     /* satisfy input constraints */
3653     for (k = 0; k < nb_iargs; k++) {
3654         TCGRegSet i_preferred_regs, o_preferred_regs;
3655 
3656         i = def->args_ct[nb_oargs + k].sort_index;
3657         arg = op->args[i];
3658         arg_ct = &def->args_ct[i];
3659         ts = arg_temp(arg);
3660 
3661         if (ts->val_type == TEMP_VAL_CONST
3662             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3663             /* constant is OK for instruction */
3664             const_args[i] = 1;
3665             new_args[i] = ts->val;
3666             continue;
3667         }
3668 
3669         i_preferred_regs = o_preferred_regs = 0;
3670         if (arg_ct->ialias) {
3671             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3672 
3673             /*
3674              * If the input is readonly, then it cannot also be an
3675              * output and aliased to itself.  If the input is not
3676              * dead after the instruction, we must allocate a new
3677              * register and move it.
3678              */
3679             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3680                 goto allocate_in_reg;
3681             }
3682 
3683             /*
3684              * Check if the current register has already been allocated
3685              * for another input aliased to an output.
3686              */
3687             if (ts->val_type == TEMP_VAL_REG) {
3688                 reg = ts->reg;
3689                 for (int k2 = 0; k2 < k; k2++) {
3690                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3691                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3692                         goto allocate_in_reg;
3693                     }
3694                 }
3695             }
3696             i_preferred_regs = o_preferred_regs;
3697         }
3698 
3699         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3700         reg = ts->reg;
3701 
3702         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3703  allocate_in_reg:
3704             /*
3705              * Allocate a new register matching the constraint
3706              * and move the temporary register into it.
3707              */
3708             temp_load(s, ts, tcg_target_available_regs[ts->type],
3709                       i_allocated_regs, 0);
3710             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3711                                 o_preferred_regs, ts->indirect_base);
3712             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3713                 /*
3714                  * Cross register class move not supported.  Sync the
3715                  * temp back to its slot and load from there.
3716                  */
3717                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3718                 tcg_out_ld(s, ts->type, reg,
3719                            ts->mem_base->reg, ts->mem_offset);
3720             }
3721         }
3722         new_args[i] = reg;
3723         const_args[i] = 0;
3724         tcg_regset_set_reg(i_allocated_regs, reg);
3725     }
3726 
3727     /* mark dead temporaries and free the associated registers */
3728     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3729         if (IS_DEAD_ARG(i)) {
3730             temp_dead(s, arg_temp(op->args[i]));
3731         }
3732     }
3733 
3734     if (def->flags & TCG_OPF_COND_BRANCH) {
3735         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3736     } else if (def->flags & TCG_OPF_BB_END) {
3737         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3738     } else {
3739         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3740             /* XXX: permit generic clobber register list ? */
3741             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3742                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3743                     tcg_reg_free(s, i, i_allocated_regs);
3744                 }
3745             }
3746         }
3747         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3748             /* sync globals if the op has side effects and might trigger
3749                an exception. */
3750             sync_globals(s, i_allocated_regs);
3751         }
3752 
3753         /* satisfy the output constraints */
3754         for(k = 0; k < nb_oargs; k++) {
3755             i = def->args_ct[k].sort_index;
3756             arg = op->args[i];
3757             arg_ct = &def->args_ct[i];
3758             ts = arg_temp(arg);
3759 
3760             /* ENV should not be modified.  */
3761             tcg_debug_assert(!temp_readonly(ts));
3762 
3763             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3764                 reg = new_args[arg_ct->alias_index];
3765             } else if (arg_ct->newreg) {
3766                 reg = tcg_reg_alloc(s, arg_ct->regs,
3767                                     i_allocated_regs | o_allocated_regs,
3768                                     op->output_pref[k], ts->indirect_base);
3769             } else {
3770                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3771                                     op->output_pref[k], ts->indirect_base);
3772             }
3773             tcg_regset_set_reg(o_allocated_regs, reg);
3774             if (ts->val_type == TEMP_VAL_REG) {
3775                 s->reg_to_temp[ts->reg] = NULL;
3776             }
3777             ts->val_type = TEMP_VAL_REG;
3778             ts->reg = reg;
3779             /*
3780              * Temp value is modified, so the value kept in memory is
3781              * potentially not the same.
3782              */
3783             ts->mem_coherent = 0;
3784             s->reg_to_temp[reg] = ts;
3785             new_args[i] = reg;
3786         }
3787     }
3788 
3789     /* emit instruction */
3790     if (def->flags & TCG_OPF_VECTOR) {
3791         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3792                        new_args, const_args);
3793     } else {
3794         tcg_out_op(s, op->opc, new_args, const_args);
3795     }
3796 
3797     /* move the outputs in the correct register if needed */
3798     for(i = 0; i < nb_oargs; i++) {
3799         ts = arg_temp(op->args[i]);
3800 
3801         /* ENV should not be modified.  */
3802         tcg_debug_assert(!temp_readonly(ts));
3803 
3804         if (NEED_SYNC_ARG(i)) {
3805             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3806         } else if (IS_DEAD_ARG(i)) {
3807             temp_dead(s, ts);
3808         }
3809     }
3810 }
3811 
3812 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3813 {
3814     const TCGLifeData arg_life = op->life;
3815     TCGTemp *ots, *itsl, *itsh;
3816     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3817 
3818     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3819     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3820     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3821 
3822     ots = arg_temp(op->args[0]);
3823     itsl = arg_temp(op->args[1]);
3824     itsh = arg_temp(op->args[2]);
3825 
3826     /* ENV should not be modified.  */
3827     tcg_debug_assert(!temp_readonly(ots));
3828 
3829     /* Allocate the output register now.  */
3830     if (ots->val_type != TEMP_VAL_REG) {
3831         TCGRegSet allocated_regs = s->reserved_regs;
3832         TCGRegSet dup_out_regs =
3833             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3834 
3835         /* Make sure to not spill the input registers. */
3836         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3837             tcg_regset_set_reg(allocated_regs, itsl->reg);
3838         }
3839         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3840             tcg_regset_set_reg(allocated_regs, itsh->reg);
3841         }
3842 
3843         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3844                                  op->output_pref[0], ots->indirect_base);
3845         ots->val_type = TEMP_VAL_REG;
3846         ots->mem_coherent = 0;
3847         s->reg_to_temp[ots->reg] = ots;
3848     }
3849 
3850     /* Promote dup2 of immediates to dupi_vec. */
3851     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3852         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3853         MemOp vece = MO_64;
3854 
3855         if (val == dup_const(MO_8, val)) {
3856             vece = MO_8;
3857         } else if (val == dup_const(MO_16, val)) {
3858             vece = MO_16;
3859         } else if (val == dup_const(MO_32, val)) {
3860             vece = MO_32;
3861         }
3862 
3863         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3864         goto done;
3865     }
3866 
3867     /* If the two inputs form one 64-bit value, try dupm_vec. */
3868     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3869         if (!itsl->mem_coherent) {
3870             temp_sync(s, itsl, s->reserved_regs, 0, 0);
3871         }
3872         if (!itsh->mem_coherent) {
3873             temp_sync(s, itsh, s->reserved_regs, 0, 0);
3874         }
3875 #ifdef HOST_WORDS_BIGENDIAN
3876         TCGTemp *its = itsh;
3877 #else
3878         TCGTemp *its = itsl;
3879 #endif
3880         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3881                              its->mem_base->reg, its->mem_offset)) {
3882             goto done;
3883         }
3884     }
3885 
3886     /* Fall back to generic expansion. */
3887     return false;
3888 
3889  done:
3890     if (IS_DEAD_ARG(1)) {
3891         temp_dead(s, itsl);
3892     }
3893     if (IS_DEAD_ARG(2)) {
3894         temp_dead(s, itsh);
3895     }
3896     if (NEED_SYNC_ARG(0)) {
3897         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3898     } else if (IS_DEAD_ARG(0)) {
3899         temp_dead(s, ots);
3900     }
3901     return true;
3902 }
3903 
3904 #ifdef TCG_TARGET_STACK_GROWSUP
3905 #define STACK_DIR(x) (-(x))
3906 #else
3907 #define STACK_DIR(x) (x)
3908 #endif
3909 
3910 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3911 {
3912     const int nb_oargs = TCGOP_CALLO(op);
3913     const int nb_iargs = TCGOP_CALLI(op);
3914     const TCGLifeData arg_life = op->life;
3915     const TCGHelperInfo *info;
3916     int flags, nb_regs, i;
3917     TCGReg reg;
3918     TCGArg arg;
3919     TCGTemp *ts;
3920     intptr_t stack_offset;
3921     size_t call_stack_size;
3922     tcg_insn_unit *func_addr;
3923     int allocate_args;
3924     TCGRegSet allocated_regs;
3925 
3926     func_addr = tcg_call_func(op);
3927     info = tcg_call_info(op);
3928     flags = info->flags;
3929 
3930     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3931     if (nb_regs > nb_iargs) {
3932         nb_regs = nb_iargs;
3933     }
3934 
3935     /* assign stack slots first */
3936     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3937     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3938         ~(TCG_TARGET_STACK_ALIGN - 1);
3939     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3940     if (allocate_args) {
3941         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3942            preallocate call stack */
3943         tcg_abort();
3944     }
3945 
3946     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3947     for (i = nb_regs; i < nb_iargs; i++) {
3948         arg = op->args[nb_oargs + i];
3949 #ifdef TCG_TARGET_STACK_GROWSUP
3950         stack_offset -= sizeof(tcg_target_long);
3951 #endif
3952         if (arg != TCG_CALL_DUMMY_ARG) {
3953             ts = arg_temp(arg);
3954             temp_load(s, ts, tcg_target_available_regs[ts->type],
3955                       s->reserved_regs, 0);
3956             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3957         }
3958 #ifndef TCG_TARGET_STACK_GROWSUP
3959         stack_offset += sizeof(tcg_target_long);
3960 #endif
3961     }
3962 
3963     /* assign input registers */
3964     allocated_regs = s->reserved_regs;
3965     for (i = 0; i < nb_regs; i++) {
3966         arg = op->args[nb_oargs + i];
3967         if (arg != TCG_CALL_DUMMY_ARG) {
3968             ts = arg_temp(arg);
3969             reg = tcg_target_call_iarg_regs[i];
3970 
3971             if (ts->val_type == TEMP_VAL_REG) {
3972                 if (ts->reg != reg) {
3973                     tcg_reg_free(s, reg, allocated_regs);
3974                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3975                         /*
3976                          * Cross register class move not supported.  Sync the
3977                          * temp back to its slot and load from there.
3978                          */
3979                         temp_sync(s, ts, allocated_regs, 0, 0);
3980                         tcg_out_ld(s, ts->type, reg,
3981                                    ts->mem_base->reg, ts->mem_offset);
3982                     }
3983                 }
3984             } else {
3985                 TCGRegSet arg_set = 0;
3986 
3987                 tcg_reg_free(s, reg, allocated_regs);
3988                 tcg_regset_set_reg(arg_set, reg);
3989                 temp_load(s, ts, arg_set, allocated_regs, 0);
3990             }
3991 
3992             tcg_regset_set_reg(allocated_regs, reg);
3993         }
3994     }
3995 
3996     /* mark dead temporaries and free the associated registers */
3997     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3998         if (IS_DEAD_ARG(i)) {
3999             temp_dead(s, arg_temp(op->args[i]));
4000         }
4001     }
4002 
4003     /* clobber call registers */
4004     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4005         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4006             tcg_reg_free(s, i, allocated_regs);
4007         }
4008     }
4009 
4010     /* Save globals if they might be written by the helper, sync them if
4011        they might be read. */
4012     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4013         /* Nothing to do */
4014     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4015         sync_globals(s, allocated_regs);
4016     } else {
4017         save_globals(s, allocated_regs);
4018     }
4019 
4020 #ifdef CONFIG_TCG_INTERPRETER
4021     {
4022         gpointer hash = (gpointer)(uintptr_t)info->typemask;
4023         ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4024         assert(cif != NULL);
4025         tcg_out_call(s, func_addr, cif);
4026     }
4027 #else
4028     tcg_out_call(s, func_addr);
4029 #endif
4030 
4031     /* assign output registers and emit moves if needed */
4032     for(i = 0; i < nb_oargs; i++) {
4033         arg = op->args[i];
4034         ts = arg_temp(arg);
4035 
4036         /* ENV should not be modified.  */
4037         tcg_debug_assert(!temp_readonly(ts));
4038 
4039         reg = tcg_target_call_oarg_regs[i];
4040         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4041         if (ts->val_type == TEMP_VAL_REG) {
4042             s->reg_to_temp[ts->reg] = NULL;
4043         }
4044         ts->val_type = TEMP_VAL_REG;
4045         ts->reg = reg;
4046         ts->mem_coherent = 0;
4047         s->reg_to_temp[reg] = ts;
4048         if (NEED_SYNC_ARG(i)) {
4049             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4050         } else if (IS_DEAD_ARG(i)) {
4051             temp_dead(s, ts);
4052         }
4053     }
4054 }
4055 
4056 #ifdef CONFIG_PROFILER
4057 
4058 /* avoid copy/paste errors */
4059 #define PROF_ADD(to, from, field)                       \
4060     do {                                                \
4061         (to)->field += qatomic_read(&((from)->field));  \
4062     } while (0)
4063 
4064 #define PROF_MAX(to, from, field)                                       \
4065     do {                                                                \
4066         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4067         if (val__ > (to)->field) {                                      \
4068             (to)->field = val__;                                        \
4069         }                                                               \
4070     } while (0)
4071 
4072 /* Pass in a zero'ed @prof */
4073 static inline
4074 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4075 {
4076     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4077     unsigned int i;
4078 
4079     for (i = 0; i < n_ctxs; i++) {
4080         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4081         const TCGProfile *orig = &s->prof;
4082 
4083         if (counters) {
4084             PROF_ADD(prof, orig, cpu_exec_time);
4085             PROF_ADD(prof, orig, tb_count1);
4086             PROF_ADD(prof, orig, tb_count);
4087             PROF_ADD(prof, orig, op_count);
4088             PROF_MAX(prof, orig, op_count_max);
4089             PROF_ADD(prof, orig, temp_count);
4090             PROF_MAX(prof, orig, temp_count_max);
4091             PROF_ADD(prof, orig, del_op_count);
4092             PROF_ADD(prof, orig, code_in_len);
4093             PROF_ADD(prof, orig, code_out_len);
4094             PROF_ADD(prof, orig, search_out_len);
4095             PROF_ADD(prof, orig, interm_time);
4096             PROF_ADD(prof, orig, code_time);
4097             PROF_ADD(prof, orig, la_time);
4098             PROF_ADD(prof, orig, opt_time);
4099             PROF_ADD(prof, orig, restore_count);
4100             PROF_ADD(prof, orig, restore_time);
4101         }
4102         if (table) {
4103             int i;
4104 
4105             for (i = 0; i < NB_OPS; i++) {
4106                 PROF_ADD(prof, orig, table_op_count[i]);
4107             }
4108         }
4109     }
4110 }
4111 
4112 #undef PROF_ADD
4113 #undef PROF_MAX
4114 
4115 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4116 {
4117     tcg_profile_snapshot(prof, true, false);
4118 }
4119 
4120 static void tcg_profile_snapshot_table(TCGProfile *prof)
4121 {
4122     tcg_profile_snapshot(prof, false, true);
4123 }
4124 
4125 void tcg_dump_op_count(GString *buf)
4126 {
4127     TCGProfile prof = {};
4128     int i;
4129 
4130     tcg_profile_snapshot_table(&prof);
4131     for (i = 0; i < NB_OPS; i++) {
4132         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4133                                prof.table_op_count[i]);
4134     }
4135 }
4136 
4137 int64_t tcg_cpu_exec_time(void)
4138 {
4139     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4140     unsigned int i;
4141     int64_t ret = 0;
4142 
4143     for (i = 0; i < n_ctxs; i++) {
4144         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4145         const TCGProfile *prof = &s->prof;
4146 
4147         ret += qatomic_read(&prof->cpu_exec_time);
4148     }
4149     return ret;
4150 }
4151 #else
4152 void tcg_dump_op_count(GString *buf)
4153 {
4154     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4155 }
4156 
4157 int64_t tcg_cpu_exec_time(void)
4158 {
4159     error_report("%s: TCG profiler not compiled", __func__);
4160     exit(EXIT_FAILURE);
4161 }
4162 #endif
4163 
4164 
4165 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4166 {
4167 #ifdef CONFIG_PROFILER
4168     TCGProfile *prof = &s->prof;
4169 #endif
4170     int i, num_insns;
4171     TCGOp *op;
4172 
4173 #ifdef CONFIG_PROFILER
4174     {
4175         int n = 0;
4176 
4177         QTAILQ_FOREACH(op, &s->ops, link) {
4178             n++;
4179         }
4180         qatomic_set(&prof->op_count, prof->op_count + n);
4181         if (n > prof->op_count_max) {
4182             qatomic_set(&prof->op_count_max, n);
4183         }
4184 
4185         n = s->nb_temps;
4186         qatomic_set(&prof->temp_count, prof->temp_count + n);
4187         if (n > prof->temp_count_max) {
4188             qatomic_set(&prof->temp_count_max, n);
4189         }
4190     }
4191 #endif
4192 
4193 #ifdef DEBUG_DISAS
4194     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4195                  && qemu_log_in_addr_range(tb->pc))) {
4196         FILE *logfile = qemu_log_lock();
4197         qemu_log("OP:\n");
4198         tcg_dump_ops(s, false);
4199         qemu_log("\n");
4200         qemu_log_unlock(logfile);
4201     }
4202 #endif
4203 
4204 #ifdef CONFIG_DEBUG_TCG
4205     /* Ensure all labels referenced have been emitted.  */
4206     {
4207         TCGLabel *l;
4208         bool error = false;
4209 
4210         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4211             if (unlikely(!l->present) && l->refs) {
4212                 qemu_log_mask(CPU_LOG_TB_OP,
4213                               "$L%d referenced but not present.\n", l->id);
4214                 error = true;
4215             }
4216         }
4217         assert(!error);
4218     }
4219 #endif
4220 
4221 #ifdef CONFIG_PROFILER
4222     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4223 #endif
4224 
4225 #ifdef USE_TCG_OPTIMIZATIONS
4226     tcg_optimize(s);
4227 #endif
4228 
4229 #ifdef CONFIG_PROFILER
4230     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4231     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4232 #endif
4233 
4234     reachable_code_pass(s);
4235     liveness_pass_1(s);
4236 
4237     if (s->nb_indirects > 0) {
4238 #ifdef DEBUG_DISAS
4239         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4240                      && qemu_log_in_addr_range(tb->pc))) {
4241             FILE *logfile = qemu_log_lock();
4242             qemu_log("OP before indirect lowering:\n");
4243             tcg_dump_ops(s, false);
4244             qemu_log("\n");
4245             qemu_log_unlock(logfile);
4246         }
4247 #endif
4248         /* Replace indirect temps with direct temps.  */
4249         if (liveness_pass_2(s)) {
4250             /* If changes were made, re-run liveness.  */
4251             liveness_pass_1(s);
4252         }
4253     }
4254 
4255 #ifdef CONFIG_PROFILER
4256     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4257 #endif
4258 
4259 #ifdef DEBUG_DISAS
4260     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4261                  && qemu_log_in_addr_range(tb->pc))) {
4262         FILE *logfile = qemu_log_lock();
4263         qemu_log("OP after optimization and liveness analysis:\n");
4264         tcg_dump_ops(s, true);
4265         qemu_log("\n");
4266         qemu_log_unlock(logfile);
4267     }
4268 #endif
4269 
4270     tcg_reg_alloc_start(s);
4271 
4272     /*
4273      * Reset the buffer pointers when restarting after overflow.
4274      * TODO: Move this into translate-all.c with the rest of the
4275      * buffer management.  Having only this done here is confusing.
4276      */
4277     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4278     s->code_ptr = s->code_buf;
4279 
4280 #ifdef TCG_TARGET_NEED_LDST_LABELS
4281     QSIMPLEQ_INIT(&s->ldst_labels);
4282 #endif
4283 #ifdef TCG_TARGET_NEED_POOL_LABELS
4284     s->pool_labels = NULL;
4285 #endif
4286 
4287     num_insns = -1;
4288     QTAILQ_FOREACH(op, &s->ops, link) {
4289         TCGOpcode opc = op->opc;
4290 
4291 #ifdef CONFIG_PROFILER
4292         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4293 #endif
4294 
4295         switch (opc) {
4296         case INDEX_op_mov_i32:
4297         case INDEX_op_mov_i64:
4298         case INDEX_op_mov_vec:
4299             tcg_reg_alloc_mov(s, op);
4300             break;
4301         case INDEX_op_dup_vec:
4302             tcg_reg_alloc_dup(s, op);
4303             break;
4304         case INDEX_op_insn_start:
4305             if (num_insns >= 0) {
4306                 size_t off = tcg_current_code_size(s);
4307                 s->gen_insn_end_off[num_insns] = off;
4308                 /* Assert that we do not overflow our stored offset.  */
4309                 assert(s->gen_insn_end_off[num_insns] == off);
4310             }
4311             num_insns++;
4312             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4313                 target_ulong a;
4314 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4315                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4316 #else
4317                 a = op->args[i];
4318 #endif
4319                 s->gen_insn_data[num_insns][i] = a;
4320             }
4321             break;
4322         case INDEX_op_discard:
4323             temp_dead(s, arg_temp(op->args[0]));
4324             break;
4325         case INDEX_op_set_label:
4326             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4327             tcg_out_label(s, arg_label(op->args[0]));
4328             break;
4329         case INDEX_op_call:
4330             tcg_reg_alloc_call(s, op);
4331             break;
4332         case INDEX_op_dup2_vec:
4333             if (tcg_reg_alloc_dup2(s, op)) {
4334                 break;
4335             }
4336             /* fall through */
4337         default:
4338             /* Sanity check that we've not introduced any unhandled opcodes. */
4339             tcg_debug_assert(tcg_op_supported(opc));
4340             /* Note: in order to speed up the code, it would be much
4341                faster to have specialized register allocator functions for
4342                some common argument patterns */
4343             tcg_reg_alloc_op(s, op);
4344             break;
4345         }
4346 #ifdef CONFIG_DEBUG_TCG
4347         check_regs(s);
4348 #endif
4349         /* Test for (pending) buffer overflow.  The assumption is that any
4350            one operation beginning below the high water mark cannot overrun
4351            the buffer completely.  Thus we can test for overflow after
4352            generating code without having to check during generation.  */
4353         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4354             return -1;
4355         }
4356         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4357         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4358             return -2;
4359         }
4360     }
4361     tcg_debug_assert(num_insns >= 0);
4362     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4363 
4364     /* Generate TB finalization at the end of block */
4365 #ifdef TCG_TARGET_NEED_LDST_LABELS
4366     i = tcg_out_ldst_finalize(s);
4367     if (i < 0) {
4368         return i;
4369     }
4370 #endif
4371 #ifdef TCG_TARGET_NEED_POOL_LABELS
4372     i = tcg_out_pool_finalize(s);
4373     if (i < 0) {
4374         return i;
4375     }
4376 #endif
4377     if (!tcg_resolve_relocs(s)) {
4378         return -2;
4379     }
4380 
4381 #ifndef CONFIG_TCG_INTERPRETER
4382     /* flush instruction cache */
4383     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4384                         (uintptr_t)s->code_buf,
4385                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4386 #endif
4387 
4388     return tcg_current_code_size(s);
4389 }
4390 
4391 #ifdef CONFIG_PROFILER
4392 void tcg_dump_info(GString *buf)
4393 {
4394     TCGProfile prof = {};
4395     const TCGProfile *s;
4396     int64_t tb_count;
4397     int64_t tb_div_count;
4398     int64_t tot;
4399 
4400     tcg_profile_snapshot_counters(&prof);
4401     s = &prof;
4402     tb_count = s->tb_count;
4403     tb_div_count = tb_count ? tb_count : 1;
4404     tot = s->interm_time + s->code_time;
4405 
4406     g_string_append_printf(buf, "JIT cycles          %" PRId64
4407                            " (%0.3f s at 2.4 GHz)\n",
4408                            tot, tot / 2.4e9);
4409     g_string_append_printf(buf, "translated TBs      %" PRId64
4410                            " (aborted=%" PRId64 " %0.1f%%)\n",
4411                            tb_count, s->tb_count1 - tb_count,
4412                            (double)(s->tb_count1 - s->tb_count)
4413                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4414     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4415                            (double)s->op_count / tb_div_count, s->op_count_max);
4416     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4417                            (double)s->del_op_count / tb_div_count);
4418     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4419                            (double)s->temp_count / tb_div_count,
4420                            s->temp_count_max);
4421     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4422                            (double)s->code_out_len / tb_div_count);
4423     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4424                            (double)s->search_out_len / tb_div_count);
4425 
4426     g_string_append_printf(buf, "cycles/op           %0.1f\n",
4427                            s->op_count ? (double)tot / s->op_count : 0);
4428     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4429                            s->code_in_len ? (double)tot / s->code_in_len : 0);
4430     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4431                            s->code_out_len ? (double)tot / s->code_out_len : 0);
4432     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4433                            s->search_out_len ?
4434                            (double)tot / s->search_out_len : 0);
4435     if (tot == 0) {
4436         tot = 1;
4437     }
4438     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4439                            (double)s->interm_time / tot * 100.0);
4440     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4441                            (double)s->code_time / tot * 100.0);
4442     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4443                            (double)s->opt_time / (s->code_time ?
4444                                                   s->code_time : 1)
4445                            * 100.0);
4446     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4447                            (double)s->la_time / (s->code_time ?
4448                                                  s->code_time : 1) * 100.0);
4449     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4450                            s->restore_count);
4451     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4452                            s->restore_count ?
4453                            (double)s->restore_time / s->restore_count : 0);
4454 }
4455 #else
4456 void tcg_dump_info(GString *buf)
4457 {
4458     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4459 }
4460 #endif
4461 
4462 #ifdef ELF_HOST_MACHINE
4463 /* In order to use this feature, the backend needs to do three things:
4464 
4465    (1) Define ELF_HOST_MACHINE to indicate both what value to
4466        put into the ELF image and to indicate support for the feature.
4467 
4468    (2) Define tcg_register_jit.  This should create a buffer containing
4469        the contents of a .debug_frame section that describes the post-
4470        prologue unwind info for the tcg machine.
4471 
4472    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4473 */
4474 
4475 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4476 typedef enum {
4477     JIT_NOACTION = 0,
4478     JIT_REGISTER_FN,
4479     JIT_UNREGISTER_FN
4480 } jit_actions_t;
4481 
4482 struct jit_code_entry {
4483     struct jit_code_entry *next_entry;
4484     struct jit_code_entry *prev_entry;
4485     const void *symfile_addr;
4486     uint64_t symfile_size;
4487 };
4488 
4489 struct jit_descriptor {
4490     uint32_t version;
4491     uint32_t action_flag;
4492     struct jit_code_entry *relevant_entry;
4493     struct jit_code_entry *first_entry;
4494 };
4495 
4496 void __jit_debug_register_code(void) __attribute__((noinline));
4497 void __jit_debug_register_code(void)
4498 {
4499     asm("");
4500 }
4501 
4502 /* Must statically initialize the version, because GDB may check
4503    the version before we can set it.  */
4504 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4505 
4506 /* End GDB interface.  */
4507 
4508 static int find_string(const char *strtab, const char *str)
4509 {
4510     const char *p = strtab + 1;
4511 
4512     while (1) {
4513         if (strcmp(p, str) == 0) {
4514             return p - strtab;
4515         }
4516         p += strlen(p) + 1;
4517     }
4518 }
4519 
4520 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4521                                  const void *debug_frame,
4522                                  size_t debug_frame_size)
4523 {
4524     struct __attribute__((packed)) DebugInfo {
4525         uint32_t  len;
4526         uint16_t  version;
4527         uint32_t  abbrev;
4528         uint8_t   ptr_size;
4529         uint8_t   cu_die;
4530         uint16_t  cu_lang;
4531         uintptr_t cu_low_pc;
4532         uintptr_t cu_high_pc;
4533         uint8_t   fn_die;
4534         char      fn_name[16];
4535         uintptr_t fn_low_pc;
4536         uintptr_t fn_high_pc;
4537         uint8_t   cu_eoc;
4538     };
4539 
4540     struct ElfImage {
4541         ElfW(Ehdr) ehdr;
4542         ElfW(Phdr) phdr;
4543         ElfW(Shdr) shdr[7];
4544         ElfW(Sym)  sym[2];
4545         struct DebugInfo di;
4546         uint8_t    da[24];
4547         char       str[80];
4548     };
4549 
4550     struct ElfImage *img;
4551 
4552     static const struct ElfImage img_template = {
4553         .ehdr = {
4554             .e_ident[EI_MAG0] = ELFMAG0,
4555             .e_ident[EI_MAG1] = ELFMAG1,
4556             .e_ident[EI_MAG2] = ELFMAG2,
4557             .e_ident[EI_MAG3] = ELFMAG3,
4558             .e_ident[EI_CLASS] = ELF_CLASS,
4559             .e_ident[EI_DATA] = ELF_DATA,
4560             .e_ident[EI_VERSION] = EV_CURRENT,
4561             .e_type = ET_EXEC,
4562             .e_machine = ELF_HOST_MACHINE,
4563             .e_version = EV_CURRENT,
4564             .e_phoff = offsetof(struct ElfImage, phdr),
4565             .e_shoff = offsetof(struct ElfImage, shdr),
4566             .e_ehsize = sizeof(ElfW(Shdr)),
4567             .e_phentsize = sizeof(ElfW(Phdr)),
4568             .e_phnum = 1,
4569             .e_shentsize = sizeof(ElfW(Shdr)),
4570             .e_shnum = ARRAY_SIZE(img->shdr),
4571             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4572 #ifdef ELF_HOST_FLAGS
4573             .e_flags = ELF_HOST_FLAGS,
4574 #endif
4575 #ifdef ELF_OSABI
4576             .e_ident[EI_OSABI] = ELF_OSABI,
4577 #endif
4578         },
4579         .phdr = {
4580             .p_type = PT_LOAD,
4581             .p_flags = PF_X,
4582         },
4583         .shdr = {
4584             [0] = { .sh_type = SHT_NULL },
4585             /* Trick: The contents of code_gen_buffer are not present in
4586                this fake ELF file; that got allocated elsewhere.  Therefore
4587                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4588                will not look for contents.  We can record any address.  */
4589             [1] = { /* .text */
4590                 .sh_type = SHT_NOBITS,
4591                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4592             },
4593             [2] = { /* .debug_info */
4594                 .sh_type = SHT_PROGBITS,
4595                 .sh_offset = offsetof(struct ElfImage, di),
4596                 .sh_size = sizeof(struct DebugInfo),
4597             },
4598             [3] = { /* .debug_abbrev */
4599                 .sh_type = SHT_PROGBITS,
4600                 .sh_offset = offsetof(struct ElfImage, da),
4601                 .sh_size = sizeof(img->da),
4602             },
4603             [4] = { /* .debug_frame */
4604                 .sh_type = SHT_PROGBITS,
4605                 .sh_offset = sizeof(struct ElfImage),
4606             },
4607             [5] = { /* .symtab */
4608                 .sh_type = SHT_SYMTAB,
4609                 .sh_offset = offsetof(struct ElfImage, sym),
4610                 .sh_size = sizeof(img->sym),
4611                 .sh_info = 1,
4612                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4613                 .sh_entsize = sizeof(ElfW(Sym)),
4614             },
4615             [6] = { /* .strtab */
4616                 .sh_type = SHT_STRTAB,
4617                 .sh_offset = offsetof(struct ElfImage, str),
4618                 .sh_size = sizeof(img->str),
4619             }
4620         },
4621         .sym = {
4622             [1] = { /* code_gen_buffer */
4623                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4624                 .st_shndx = 1,
4625             }
4626         },
4627         .di = {
4628             .len = sizeof(struct DebugInfo) - 4,
4629             .version = 2,
4630             .ptr_size = sizeof(void *),
4631             .cu_die = 1,
4632             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4633             .fn_die = 2,
4634             .fn_name = "code_gen_buffer"
4635         },
4636         .da = {
4637             1,          /* abbrev number (the cu) */
4638             0x11, 1,    /* DW_TAG_compile_unit, has children */
4639             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4640             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4641             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4642             0, 0,       /* end of abbrev */
4643             2,          /* abbrev number (the fn) */
4644             0x2e, 0,    /* DW_TAG_subprogram, no children */
4645             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4646             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4647             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4648             0, 0,       /* end of abbrev */
4649             0           /* no more abbrev */
4650         },
4651         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4652                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4653     };
4654 
4655     /* We only need a single jit entry; statically allocate it.  */
4656     static struct jit_code_entry one_entry;
4657 
4658     uintptr_t buf = (uintptr_t)buf_ptr;
4659     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4660     DebugFrameHeader *dfh;
4661 
4662     img = g_malloc(img_size);
4663     *img = img_template;
4664 
4665     img->phdr.p_vaddr = buf;
4666     img->phdr.p_paddr = buf;
4667     img->phdr.p_memsz = buf_size;
4668 
4669     img->shdr[1].sh_name = find_string(img->str, ".text");
4670     img->shdr[1].sh_addr = buf;
4671     img->shdr[1].sh_size = buf_size;
4672 
4673     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4674     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4675 
4676     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4677     img->shdr[4].sh_size = debug_frame_size;
4678 
4679     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4680     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4681 
4682     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4683     img->sym[1].st_value = buf;
4684     img->sym[1].st_size = buf_size;
4685 
4686     img->di.cu_low_pc = buf;
4687     img->di.cu_high_pc = buf + buf_size;
4688     img->di.fn_low_pc = buf;
4689     img->di.fn_high_pc = buf + buf_size;
4690 
4691     dfh = (DebugFrameHeader *)(img + 1);
4692     memcpy(dfh, debug_frame, debug_frame_size);
4693     dfh->fde.func_start = buf;
4694     dfh->fde.func_len = buf_size;
4695 
4696 #ifdef DEBUG_JIT
4697     /* Enable this block to be able to debug the ELF image file creation.
4698        One can use readelf, objdump, or other inspection utilities.  */
4699     {
4700         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4701         if (f) {
4702             if (fwrite(img, img_size, 1, f) != img_size) {
4703                 /* Avoid stupid unused return value warning for fwrite.  */
4704             }
4705             fclose(f);
4706         }
4707     }
4708 #endif
4709 
4710     one_entry.symfile_addr = img;
4711     one_entry.symfile_size = img_size;
4712 
4713     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4714     __jit_debug_descriptor.relevant_entry = &one_entry;
4715     __jit_debug_descriptor.first_entry = &one_entry;
4716     __jit_debug_register_code();
4717 }
4718 #else
4719 /* No support for the feature.  Provide the entry point expected by exec.c,
4720    and implement the internal function we declared earlier.  */
4721 
4722 static void tcg_register_jit_int(const void *buf, size_t size,
4723                                  const void *debug_frame,
4724                                  size_t debug_frame_size)
4725 {
4726 }
4727 
4728 void tcg_register_jit(const void *buf, size_t buf_size)
4729 {
4730 }
4731 #endif /* ELF_HOST_MACHINE */
4732 
4733 #if !TCG_TARGET_MAYBE_vec
4734 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4735 {
4736     g_assert_not_reached();
4737 }
4738 #endif
4739