xref: /openbmc/qemu/tcg/tcg.c (revision 8eb806a7)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 #include "qemu/cacheinfo.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64 
65 #ifdef CONFIG_TCG_INTERPRETER
66 #include <ffi.h>
67 #endif
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(const void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106                        intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109                          TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111                        const TCGArg args[TCG_MAX_OP_ARGS],
112                        const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115                             TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121                            unsigned vecl, unsigned vece,
122                            const TCGArg args[TCG_MAX_OP_ARGS],
123                            const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136                                     TCGReg dst, int64_t arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141                                   unsigned vecl, unsigned vece,
142                                   const TCGArg args[TCG_MAX_OP_ARGS],
143                                   const int const_args[TCG_MAX_OP_ARGS])
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 #ifdef CONFIG_TCG_INTERPRETER
153 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
154                          ffi_cif *cif);
155 #else
156 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
157 #endif
158 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
159 #ifdef TCG_TARGET_NEED_LDST_LABELS
160 static int tcg_out_ldst_finalize(TCGContext *s);
161 #endif
162 
163 TCGContext tcg_init_ctx;
164 __thread TCGContext *tcg_ctx;
165 
166 TCGContext **tcg_ctxs;
167 unsigned int tcg_cur_ctxs;
168 unsigned int tcg_max_ctxs;
169 TCGv_env cpu_env = 0;
170 const void *tcg_code_gen_epilogue;
171 uintptr_t tcg_splitwx_diff;
172 
173 #ifndef CONFIG_TCG_INTERPRETER
174 tcg_prologue_fn *tcg_qemu_tb_exec;
175 #endif
176 
177 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
178 static TCGRegSet tcg_target_call_clobber_regs;
179 
180 #if TCG_TARGET_INSN_UNIT_SIZE == 1
181 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
182 {
183     *s->code_ptr++ = v;
184 }
185 
186 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
187                                                       uint8_t v)
188 {
189     *p = v;
190 }
191 #endif
192 
193 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
194 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
195 {
196     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
197         *s->code_ptr++ = v;
198     } else {
199         tcg_insn_unit *p = s->code_ptr;
200         memcpy(p, &v, sizeof(v));
201         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
202     }
203 }
204 
205 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
206                                                        uint16_t v)
207 {
208     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
209         *p = v;
210     } else {
211         memcpy(p, &v, sizeof(v));
212     }
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
217 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
229                                                        uint32_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
240 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
252                                                        uint64_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 /* label relocation processing */
263 
264 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
265                           TCGLabel *l, intptr_t addend)
266 {
267     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
268 
269     r->type = type;
270     r->ptr = code_ptr;
271     r->addend = addend;
272     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
273 }
274 
275 static void tcg_out_label(TCGContext *s, TCGLabel *l)
276 {
277     tcg_debug_assert(!l->has_value);
278     l->has_value = 1;
279     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
280 }
281 
282 TCGLabel *gen_new_label(void)
283 {
284     TCGContext *s = tcg_ctx;
285     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
286 
287     memset(l, 0, sizeof(TCGLabel));
288     l->id = s->nb_labels++;
289     QSIMPLEQ_INIT(&l->relocs);
290 
291     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
292 
293     return l;
294 }
295 
296 static bool tcg_resolve_relocs(TCGContext *s)
297 {
298     TCGLabel *l;
299 
300     QSIMPLEQ_FOREACH(l, &s->labels, next) {
301         TCGRelocation *r;
302         uintptr_t value = l->u.value;
303 
304         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
305             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
306                 return false;
307             }
308         }
309     }
310     return true;
311 }
312 
313 static void set_jmp_reset_offset(TCGContext *s, int which)
314 {
315     /*
316      * We will check for overflow at the end of the opcode loop in
317      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
318      */
319     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
320 }
321 
322 /* Signal overflow, starting over with fewer guest insns. */
323 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
324 {
325     siglongjmp(s->jmp_trans, -2);
326 }
327 
328 #define C_PFX1(P, A)                    P##A
329 #define C_PFX2(P, A, B)                 P##A##_##B
330 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
331 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
332 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
333 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
334 
335 /* Define an enumeration for the various combinations. */
336 
337 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
338 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
339 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
340 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
341 
342 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
343 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
344 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
345 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
346 
347 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
348 
349 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
350 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
351 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
352 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
353 
354 typedef enum {
355 #include "tcg-target-con-set.h"
356 } TCGConstraintSetIndex;
357 
358 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
359 
360 #undef C_O0_I1
361 #undef C_O0_I2
362 #undef C_O0_I3
363 #undef C_O0_I4
364 #undef C_O1_I1
365 #undef C_O1_I2
366 #undef C_O1_I3
367 #undef C_O1_I4
368 #undef C_N1_I2
369 #undef C_O2_I1
370 #undef C_O2_I2
371 #undef C_O2_I3
372 #undef C_O2_I4
373 
374 /* Put all of the constraint sets into an array, indexed by the enum. */
375 
376 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
377 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
378 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
379 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
380 
381 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
382 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
383 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
384 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
385 
386 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
387 
388 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
389 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
390 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
391 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
392 
393 static const TCGTargetOpDef constraint_sets[] = {
394 #include "tcg-target-con-set.h"
395 };
396 
397 
398 #undef C_O0_I1
399 #undef C_O0_I2
400 #undef C_O0_I3
401 #undef C_O0_I4
402 #undef C_O1_I1
403 #undef C_O1_I2
404 #undef C_O1_I3
405 #undef C_O1_I4
406 #undef C_N1_I2
407 #undef C_O2_I1
408 #undef C_O2_I2
409 #undef C_O2_I3
410 #undef C_O2_I4
411 
412 /* Expand the enumerator to be returned from tcg_target_op_def(). */
413 
414 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
415 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
416 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
417 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
418 
419 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
420 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
421 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
422 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
423 
424 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
425 
426 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
427 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
428 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
429 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
430 
431 #include "tcg-target.c.inc"
432 
433 static void alloc_tcg_plugin_context(TCGContext *s)
434 {
435 #ifdef CONFIG_PLUGIN
436     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
437     s->plugin_tb->insns =
438         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
439 #endif
440 }
441 
442 /*
443  * All TCG threads except the parent (i.e. the one that called tcg_context_init
444  * and registered the target's TCG globals) must register with this function
445  * before initiating translation.
446  *
447  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
448  * of tcg_region_init() for the reasoning behind this.
449  *
450  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
451  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
452  * is not used anymore for translation once this function is called.
453  *
454  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
455  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
456  */
457 #ifdef CONFIG_USER_ONLY
458 void tcg_register_thread(void)
459 {
460     tcg_ctx = &tcg_init_ctx;
461 }
462 #else
463 void tcg_register_thread(void)
464 {
465     TCGContext *s = g_malloc(sizeof(*s));
466     unsigned int i, n;
467 
468     *s = tcg_init_ctx;
469 
470     /* Relink mem_base.  */
471     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
472         if (tcg_init_ctx.temps[i].mem_base) {
473             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
474             tcg_debug_assert(b >= 0 && b < n);
475             s->temps[i].mem_base = &s->temps[b];
476         }
477     }
478 
479     /* Claim an entry in tcg_ctxs */
480     n = qatomic_fetch_inc(&tcg_cur_ctxs);
481     g_assert(n < tcg_max_ctxs);
482     qatomic_set(&tcg_ctxs[n], s);
483 
484     if (n > 0) {
485         alloc_tcg_plugin_context(s);
486         tcg_region_initial_alloc(s);
487     }
488 
489     tcg_ctx = s;
490 }
491 #endif /* !CONFIG_USER_ONLY */
492 
493 /* pool based memory allocation */
494 void *tcg_malloc_internal(TCGContext *s, int size)
495 {
496     TCGPool *p;
497     int pool_size;
498 
499     if (size > TCG_POOL_CHUNK_SIZE) {
500         /* big malloc: insert a new pool (XXX: could optimize) */
501         p = g_malloc(sizeof(TCGPool) + size);
502         p->size = size;
503         p->next = s->pool_first_large;
504         s->pool_first_large = p;
505         return p->data;
506     } else {
507         p = s->pool_current;
508         if (!p) {
509             p = s->pool_first;
510             if (!p)
511                 goto new_pool;
512         } else {
513             if (!p->next) {
514             new_pool:
515                 pool_size = TCG_POOL_CHUNK_SIZE;
516                 p = g_malloc(sizeof(TCGPool) + pool_size);
517                 p->size = pool_size;
518                 p->next = NULL;
519                 if (s->pool_current)
520                     s->pool_current->next = p;
521                 else
522                     s->pool_first = p;
523             } else {
524                 p = p->next;
525             }
526         }
527     }
528     s->pool_current = p;
529     s->pool_cur = p->data + size;
530     s->pool_end = p->data + p->size;
531     return p->data;
532 }
533 
534 void tcg_pool_reset(TCGContext *s)
535 {
536     TCGPool *p, *t;
537     for (p = s->pool_first_large; p; p = t) {
538         t = p->next;
539         g_free(p);
540     }
541     s->pool_first_large = NULL;
542     s->pool_cur = s->pool_end = NULL;
543     s->pool_current = NULL;
544 }
545 
546 #include "exec/helper-proto.h"
547 
548 static const TCGHelperInfo all_helpers[] = {
549 #include "exec/helper-tcg.h"
550 };
551 static GHashTable *helper_table;
552 
553 #ifdef CONFIG_TCG_INTERPRETER
554 static GHashTable *ffi_table;
555 
556 static ffi_type * const typecode_to_ffi[8] = {
557     [dh_typecode_void] = &ffi_type_void,
558     [dh_typecode_i32]  = &ffi_type_uint32,
559     [dh_typecode_s32]  = &ffi_type_sint32,
560     [dh_typecode_i64]  = &ffi_type_uint64,
561     [dh_typecode_s64]  = &ffi_type_sint64,
562     [dh_typecode_ptr]  = &ffi_type_pointer,
563 };
564 #endif
565 
566 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
567 static void process_op_defs(TCGContext *s);
568 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
569                                             TCGReg reg, const char *name);
570 
571 static void tcg_context_init(unsigned max_cpus)
572 {
573     TCGContext *s = &tcg_init_ctx;
574     int op, total_args, n, i;
575     TCGOpDef *def;
576     TCGArgConstraint *args_ct;
577     TCGTemp *ts;
578 
579     memset(s, 0, sizeof(*s));
580     s->nb_globals = 0;
581 
582     /* Count total number of arguments and allocate the corresponding
583        space */
584     total_args = 0;
585     for(op = 0; op < NB_OPS; op++) {
586         def = &tcg_op_defs[op];
587         n = def->nb_iargs + def->nb_oargs;
588         total_args += n;
589     }
590 
591     args_ct = g_new0(TCGArgConstraint, total_args);
592 
593     for(op = 0; op < NB_OPS; op++) {
594         def = &tcg_op_defs[op];
595         def->args_ct = args_ct;
596         n = def->nb_iargs + def->nb_oargs;
597         args_ct += n;
598     }
599 
600     /* Register helpers.  */
601     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
602     helper_table = g_hash_table_new(NULL, NULL);
603 
604     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
605         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
606                             (gpointer)&all_helpers[i]);
607     }
608 
609 #ifdef CONFIG_TCG_INTERPRETER
610     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
611     ffi_table = g_hash_table_new(NULL, NULL);
612     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
613         struct {
614             ffi_cif cif;
615             ffi_type *args[];
616         } *ca;
617         uint32_t typemask = all_helpers[i].typemask;
618         gpointer hash = (gpointer)(uintptr_t)typemask;
619         ffi_status status;
620         int nargs;
621 
622         if (g_hash_table_lookup(ffi_table, hash)) {
623             continue;
624         }
625 
626         /* Ignoring the return type, find the last non-zero field. */
627         nargs = 32 - clz32(typemask >> 3);
628         nargs = DIV_ROUND_UP(nargs, 3);
629 
630         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
631         ca->cif.rtype = typecode_to_ffi[typemask & 7];
632         ca->cif.nargs = nargs;
633 
634         if (nargs != 0) {
635             ca->cif.arg_types = ca->args;
636             for (i = 0; i < nargs; ++i) {
637                 int typecode = extract32(typemask, (i + 1) * 3, 3);
638                 ca->args[i] = typecode_to_ffi[typecode];
639             }
640         }
641 
642         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
643                               ca->cif.rtype, ca->cif.arg_types);
644         assert(status == FFI_OK);
645 
646         g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
647     }
648 #endif
649 
650     tcg_target_init(s);
651     process_op_defs(s);
652 
653     /* Reverse the order of the saved registers, assuming they're all at
654        the start of tcg_target_reg_alloc_order.  */
655     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
656         int r = tcg_target_reg_alloc_order[n];
657         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
658             break;
659         }
660     }
661     for (i = 0; i < n; ++i) {
662         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
663     }
664     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
665         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
666     }
667 
668     alloc_tcg_plugin_context(s);
669 
670     tcg_ctx = s;
671     /*
672      * In user-mode we simply share the init context among threads, since we
673      * use a single region. See the documentation tcg_region_init() for the
674      * reasoning behind this.
675      * In softmmu we will have at most max_cpus TCG threads.
676      */
677 #ifdef CONFIG_USER_ONLY
678     tcg_ctxs = &tcg_ctx;
679     tcg_cur_ctxs = 1;
680     tcg_max_ctxs = 1;
681 #else
682     tcg_max_ctxs = max_cpus;
683     tcg_ctxs = g_new0(TCGContext *, max_cpus);
684 #endif
685 
686     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
687     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
688     cpu_env = temp_tcgv_ptr(ts);
689 }
690 
691 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
692 {
693     tcg_context_init(max_cpus);
694     tcg_region_init(tb_size, splitwx, max_cpus);
695 }
696 
697 /*
698  * Allocate TBs right before their corresponding translated code, making
699  * sure that TBs and code are on different cache lines.
700  */
701 TranslationBlock *tcg_tb_alloc(TCGContext *s)
702 {
703     uintptr_t align = qemu_icache_linesize;
704     TranslationBlock *tb;
705     void *next;
706 
707  retry:
708     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
709     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
710 
711     if (unlikely(next > s->code_gen_highwater)) {
712         if (tcg_region_alloc(s)) {
713             return NULL;
714         }
715         goto retry;
716     }
717     qatomic_set(&s->code_gen_ptr, next);
718     s->data_gen_ptr = NULL;
719     return tb;
720 }
721 
722 void tcg_prologue_init(TCGContext *s)
723 {
724     size_t prologue_size;
725 
726     s->code_ptr = s->code_gen_ptr;
727     s->code_buf = s->code_gen_ptr;
728     s->data_gen_ptr = NULL;
729 
730 #ifndef CONFIG_TCG_INTERPRETER
731     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
732 #endif
733 
734 #ifdef TCG_TARGET_NEED_POOL_LABELS
735     s->pool_labels = NULL;
736 #endif
737 
738     qemu_thread_jit_write();
739     /* Generate the prologue.  */
740     tcg_target_qemu_prologue(s);
741 
742 #ifdef TCG_TARGET_NEED_POOL_LABELS
743     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
744     {
745         int result = tcg_out_pool_finalize(s);
746         tcg_debug_assert(result == 0);
747     }
748 #endif
749 
750     prologue_size = tcg_current_code_size(s);
751 
752 #ifndef CONFIG_TCG_INTERPRETER
753     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
754                         (uintptr_t)s->code_buf, prologue_size);
755 #endif
756 
757 #ifdef DEBUG_DISAS
758     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
759         FILE *logfile = qemu_log_trylock();
760         if (logfile) {
761             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
762             if (s->data_gen_ptr) {
763                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
764                 size_t data_size = prologue_size - code_size;
765                 size_t i;
766 
767                 disas(logfile, s->code_gen_ptr, code_size);
768 
769                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
770                     if (sizeof(tcg_target_ulong) == 8) {
771                         fprintf(logfile,
772                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
773                                 (uintptr_t)s->data_gen_ptr + i,
774                                 *(uint64_t *)(s->data_gen_ptr + i));
775                     } else {
776                         fprintf(logfile,
777                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
778                                 (uintptr_t)s->data_gen_ptr + i,
779                                 *(uint32_t *)(s->data_gen_ptr + i));
780                     }
781                 }
782             } else {
783                 disas(logfile, s->code_gen_ptr, prologue_size);
784             }
785             fprintf(logfile, "\n");
786             qemu_log_flush();
787             qemu_log_unlock(logfile);
788         }
789     }
790 #endif
791 
792 #ifndef CONFIG_TCG_INTERPRETER
793     /*
794      * Assert that goto_ptr is implemented completely, setting an epilogue.
795      * For tci, we use NULL as the signal to return from the interpreter,
796      * so skip this check.
797      */
798     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
799 #endif
800 
801     tcg_region_prologue_set(s);
802 }
803 
804 void tcg_func_start(TCGContext *s)
805 {
806     tcg_pool_reset(s);
807     s->nb_temps = s->nb_globals;
808 
809     /* No temps have been previously allocated for size or locality.  */
810     memset(s->free_temps, 0, sizeof(s->free_temps));
811 
812     /* No constant temps have been previously allocated. */
813     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
814         if (s->const_table[i]) {
815             g_hash_table_remove_all(s->const_table[i]);
816         }
817     }
818 
819     s->nb_ops = 0;
820     s->nb_labels = 0;
821     s->current_frame_offset = s->frame_start;
822 
823 #ifdef CONFIG_DEBUG_TCG
824     s->goto_tb_issue_mask = 0;
825 #endif
826 
827     QTAILQ_INIT(&s->ops);
828     QTAILQ_INIT(&s->free_ops);
829     QSIMPLEQ_INIT(&s->labels);
830 }
831 
832 static TCGTemp *tcg_temp_alloc(TCGContext *s)
833 {
834     int n = s->nb_temps++;
835 
836     if (n >= TCG_MAX_TEMPS) {
837         tcg_raise_tb_overflow(s);
838     }
839     return memset(&s->temps[n], 0, sizeof(TCGTemp));
840 }
841 
842 static TCGTemp *tcg_global_alloc(TCGContext *s)
843 {
844     TCGTemp *ts;
845 
846     tcg_debug_assert(s->nb_globals == s->nb_temps);
847     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
848     s->nb_globals++;
849     ts = tcg_temp_alloc(s);
850     ts->kind = TEMP_GLOBAL;
851 
852     return ts;
853 }
854 
855 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
856                                             TCGReg reg, const char *name)
857 {
858     TCGTemp *ts;
859 
860     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
861         tcg_abort();
862     }
863 
864     ts = tcg_global_alloc(s);
865     ts->base_type = type;
866     ts->type = type;
867     ts->kind = TEMP_FIXED;
868     ts->reg = reg;
869     ts->name = name;
870     tcg_regset_set_reg(s->reserved_regs, reg);
871 
872     return ts;
873 }
874 
875 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
876 {
877     s->frame_start = start;
878     s->frame_end = start + size;
879     s->frame_temp
880         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
881 }
882 
883 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
884                                      intptr_t offset, const char *name)
885 {
886     TCGContext *s = tcg_ctx;
887     TCGTemp *base_ts = tcgv_ptr_temp(base);
888     TCGTemp *ts = tcg_global_alloc(s);
889     int indirect_reg = 0, bigendian = 0;
890 #if HOST_BIG_ENDIAN
891     bigendian = 1;
892 #endif
893 
894     switch (base_ts->kind) {
895     case TEMP_FIXED:
896         break;
897     case TEMP_GLOBAL:
898         /* We do not support double-indirect registers.  */
899         tcg_debug_assert(!base_ts->indirect_reg);
900         base_ts->indirect_base = 1;
901         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
902                             ? 2 : 1);
903         indirect_reg = 1;
904         break;
905     default:
906         g_assert_not_reached();
907     }
908 
909     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
910         TCGTemp *ts2 = tcg_global_alloc(s);
911         char buf[64];
912 
913         ts->base_type = TCG_TYPE_I64;
914         ts->type = TCG_TYPE_I32;
915         ts->indirect_reg = indirect_reg;
916         ts->mem_allocated = 1;
917         ts->mem_base = base_ts;
918         ts->mem_offset = offset + bigendian * 4;
919         pstrcpy(buf, sizeof(buf), name);
920         pstrcat(buf, sizeof(buf), "_0");
921         ts->name = strdup(buf);
922 
923         tcg_debug_assert(ts2 == ts + 1);
924         ts2->base_type = TCG_TYPE_I64;
925         ts2->type = TCG_TYPE_I32;
926         ts2->indirect_reg = indirect_reg;
927         ts2->mem_allocated = 1;
928         ts2->mem_base = base_ts;
929         ts2->mem_offset = offset + (1 - bigendian) * 4;
930         pstrcpy(buf, sizeof(buf), name);
931         pstrcat(buf, sizeof(buf), "_1");
932         ts2->name = strdup(buf);
933     } else {
934         ts->base_type = type;
935         ts->type = type;
936         ts->indirect_reg = indirect_reg;
937         ts->mem_allocated = 1;
938         ts->mem_base = base_ts;
939         ts->mem_offset = offset;
940         ts->name = name;
941     }
942     return ts;
943 }
944 
945 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
946 {
947     TCGContext *s = tcg_ctx;
948     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
949     TCGTemp *ts;
950     int idx, k;
951 
952     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
953     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
954     if (idx < TCG_MAX_TEMPS) {
955         /* There is already an available temp with the right type.  */
956         clear_bit(idx, s->free_temps[k].l);
957 
958         ts = &s->temps[idx];
959         ts->temp_allocated = 1;
960         tcg_debug_assert(ts->base_type == type);
961         tcg_debug_assert(ts->kind == kind);
962     } else {
963         ts = tcg_temp_alloc(s);
964         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
965             TCGTemp *ts2 = tcg_temp_alloc(s);
966 
967             ts->base_type = type;
968             ts->type = TCG_TYPE_I32;
969             ts->temp_allocated = 1;
970             ts->kind = kind;
971 
972             tcg_debug_assert(ts2 == ts + 1);
973             ts2->base_type = TCG_TYPE_I64;
974             ts2->type = TCG_TYPE_I32;
975             ts2->temp_allocated = 1;
976             ts2->kind = kind;
977         } else {
978             ts->base_type = type;
979             ts->type = type;
980             ts->temp_allocated = 1;
981             ts->kind = kind;
982         }
983     }
984 
985 #if defined(CONFIG_DEBUG_TCG)
986     s->temps_in_use++;
987 #endif
988     return ts;
989 }
990 
991 TCGv_vec tcg_temp_new_vec(TCGType type)
992 {
993     TCGTemp *t;
994 
995 #ifdef CONFIG_DEBUG_TCG
996     switch (type) {
997     case TCG_TYPE_V64:
998         assert(TCG_TARGET_HAS_v64);
999         break;
1000     case TCG_TYPE_V128:
1001         assert(TCG_TARGET_HAS_v128);
1002         break;
1003     case TCG_TYPE_V256:
1004         assert(TCG_TARGET_HAS_v256);
1005         break;
1006     default:
1007         g_assert_not_reached();
1008     }
1009 #endif
1010 
1011     t = tcg_temp_new_internal(type, 0);
1012     return temp_tcgv_vec(t);
1013 }
1014 
1015 /* Create a new temp of the same type as an existing temp.  */
1016 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1017 {
1018     TCGTemp *t = tcgv_vec_temp(match);
1019 
1020     tcg_debug_assert(t->temp_allocated != 0);
1021 
1022     t = tcg_temp_new_internal(t->base_type, 0);
1023     return temp_tcgv_vec(t);
1024 }
1025 
1026 void tcg_temp_free_internal(TCGTemp *ts)
1027 {
1028     TCGContext *s = tcg_ctx;
1029     int k, idx;
1030 
1031     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1032     if (ts->kind == TEMP_CONST) {
1033         return;
1034     }
1035 
1036 #if defined(CONFIG_DEBUG_TCG)
1037     s->temps_in_use--;
1038     if (s->temps_in_use < 0) {
1039         fprintf(stderr, "More temporaries freed than allocated!\n");
1040     }
1041 #endif
1042 
1043     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1044     tcg_debug_assert(ts->temp_allocated != 0);
1045     ts->temp_allocated = 0;
1046 
1047     idx = temp_idx(ts);
1048     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1049     set_bit(idx, s->free_temps[k].l);
1050 }
1051 
1052 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1053 {
1054     TCGContext *s = tcg_ctx;
1055     GHashTable *h = s->const_table[type];
1056     TCGTemp *ts;
1057 
1058     if (h == NULL) {
1059         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1060         s->const_table[type] = h;
1061     }
1062 
1063     ts = g_hash_table_lookup(h, &val);
1064     if (ts == NULL) {
1065         ts = tcg_temp_alloc(s);
1066 
1067         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1068             TCGTemp *ts2 = tcg_temp_alloc(s);
1069 
1070             ts->base_type = TCG_TYPE_I64;
1071             ts->type = TCG_TYPE_I32;
1072             ts->kind = TEMP_CONST;
1073             ts->temp_allocated = 1;
1074             /*
1075              * Retain the full value of the 64-bit constant in the low
1076              * part, so that the hash table works.  Actual uses will
1077              * truncate the value to the low part.
1078              */
1079             ts->val = val;
1080 
1081             tcg_debug_assert(ts2 == ts + 1);
1082             ts2->base_type = TCG_TYPE_I64;
1083             ts2->type = TCG_TYPE_I32;
1084             ts2->kind = TEMP_CONST;
1085             ts2->temp_allocated = 1;
1086             ts2->val = val >> 32;
1087         } else {
1088             ts->base_type = type;
1089             ts->type = type;
1090             ts->kind = TEMP_CONST;
1091             ts->temp_allocated = 1;
1092             ts->val = val;
1093         }
1094         g_hash_table_insert(h, &ts->val, ts);
1095     }
1096 
1097     return ts;
1098 }
1099 
1100 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1101 {
1102     val = dup_const(vece, val);
1103     return temp_tcgv_vec(tcg_constant_internal(type, val));
1104 }
1105 
1106 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1107 {
1108     TCGTemp *t = tcgv_vec_temp(match);
1109 
1110     tcg_debug_assert(t->temp_allocated != 0);
1111     return tcg_constant_vec(t->base_type, vece, val);
1112 }
1113 
1114 TCGv_i32 tcg_const_i32(int32_t val)
1115 {
1116     TCGv_i32 t0;
1117     t0 = tcg_temp_new_i32();
1118     tcg_gen_movi_i32(t0, val);
1119     return t0;
1120 }
1121 
1122 TCGv_i64 tcg_const_i64(int64_t val)
1123 {
1124     TCGv_i64 t0;
1125     t0 = tcg_temp_new_i64();
1126     tcg_gen_movi_i64(t0, val);
1127     return t0;
1128 }
1129 
1130 TCGv_i32 tcg_const_local_i32(int32_t val)
1131 {
1132     TCGv_i32 t0;
1133     t0 = tcg_temp_local_new_i32();
1134     tcg_gen_movi_i32(t0, val);
1135     return t0;
1136 }
1137 
1138 TCGv_i64 tcg_const_local_i64(int64_t val)
1139 {
1140     TCGv_i64 t0;
1141     t0 = tcg_temp_local_new_i64();
1142     tcg_gen_movi_i64(t0, val);
1143     return t0;
1144 }
1145 
1146 #if defined(CONFIG_DEBUG_TCG)
1147 void tcg_clear_temp_count(void)
1148 {
1149     TCGContext *s = tcg_ctx;
1150     s->temps_in_use = 0;
1151 }
1152 
1153 int tcg_check_temp_count(void)
1154 {
1155     TCGContext *s = tcg_ctx;
1156     if (s->temps_in_use) {
1157         /* Clear the count so that we don't give another
1158          * warning immediately next time around.
1159          */
1160         s->temps_in_use = 0;
1161         return 1;
1162     }
1163     return 0;
1164 }
1165 #endif
1166 
1167 /* Return true if OP may appear in the opcode stream.
1168    Test the runtime variable that controls each opcode.  */
1169 bool tcg_op_supported(TCGOpcode op)
1170 {
1171     const bool have_vec
1172         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1173 
1174     switch (op) {
1175     case INDEX_op_discard:
1176     case INDEX_op_set_label:
1177     case INDEX_op_call:
1178     case INDEX_op_br:
1179     case INDEX_op_mb:
1180     case INDEX_op_insn_start:
1181     case INDEX_op_exit_tb:
1182     case INDEX_op_goto_tb:
1183     case INDEX_op_goto_ptr:
1184     case INDEX_op_qemu_ld_i32:
1185     case INDEX_op_qemu_st_i32:
1186     case INDEX_op_qemu_ld_i64:
1187     case INDEX_op_qemu_st_i64:
1188         return true;
1189 
1190     case INDEX_op_qemu_st8_i32:
1191         return TCG_TARGET_HAS_qemu_st8_i32;
1192 
1193     case INDEX_op_mov_i32:
1194     case INDEX_op_setcond_i32:
1195     case INDEX_op_brcond_i32:
1196     case INDEX_op_ld8u_i32:
1197     case INDEX_op_ld8s_i32:
1198     case INDEX_op_ld16u_i32:
1199     case INDEX_op_ld16s_i32:
1200     case INDEX_op_ld_i32:
1201     case INDEX_op_st8_i32:
1202     case INDEX_op_st16_i32:
1203     case INDEX_op_st_i32:
1204     case INDEX_op_add_i32:
1205     case INDEX_op_sub_i32:
1206     case INDEX_op_mul_i32:
1207     case INDEX_op_and_i32:
1208     case INDEX_op_or_i32:
1209     case INDEX_op_xor_i32:
1210     case INDEX_op_shl_i32:
1211     case INDEX_op_shr_i32:
1212     case INDEX_op_sar_i32:
1213         return true;
1214 
1215     case INDEX_op_movcond_i32:
1216         return TCG_TARGET_HAS_movcond_i32;
1217     case INDEX_op_div_i32:
1218     case INDEX_op_divu_i32:
1219         return TCG_TARGET_HAS_div_i32;
1220     case INDEX_op_rem_i32:
1221     case INDEX_op_remu_i32:
1222         return TCG_TARGET_HAS_rem_i32;
1223     case INDEX_op_div2_i32:
1224     case INDEX_op_divu2_i32:
1225         return TCG_TARGET_HAS_div2_i32;
1226     case INDEX_op_rotl_i32:
1227     case INDEX_op_rotr_i32:
1228         return TCG_TARGET_HAS_rot_i32;
1229     case INDEX_op_deposit_i32:
1230         return TCG_TARGET_HAS_deposit_i32;
1231     case INDEX_op_extract_i32:
1232         return TCG_TARGET_HAS_extract_i32;
1233     case INDEX_op_sextract_i32:
1234         return TCG_TARGET_HAS_sextract_i32;
1235     case INDEX_op_extract2_i32:
1236         return TCG_TARGET_HAS_extract2_i32;
1237     case INDEX_op_add2_i32:
1238         return TCG_TARGET_HAS_add2_i32;
1239     case INDEX_op_sub2_i32:
1240         return TCG_TARGET_HAS_sub2_i32;
1241     case INDEX_op_mulu2_i32:
1242         return TCG_TARGET_HAS_mulu2_i32;
1243     case INDEX_op_muls2_i32:
1244         return TCG_TARGET_HAS_muls2_i32;
1245     case INDEX_op_muluh_i32:
1246         return TCG_TARGET_HAS_muluh_i32;
1247     case INDEX_op_mulsh_i32:
1248         return TCG_TARGET_HAS_mulsh_i32;
1249     case INDEX_op_ext8s_i32:
1250         return TCG_TARGET_HAS_ext8s_i32;
1251     case INDEX_op_ext16s_i32:
1252         return TCG_TARGET_HAS_ext16s_i32;
1253     case INDEX_op_ext8u_i32:
1254         return TCG_TARGET_HAS_ext8u_i32;
1255     case INDEX_op_ext16u_i32:
1256         return TCG_TARGET_HAS_ext16u_i32;
1257     case INDEX_op_bswap16_i32:
1258         return TCG_TARGET_HAS_bswap16_i32;
1259     case INDEX_op_bswap32_i32:
1260         return TCG_TARGET_HAS_bswap32_i32;
1261     case INDEX_op_not_i32:
1262         return TCG_TARGET_HAS_not_i32;
1263     case INDEX_op_neg_i32:
1264         return TCG_TARGET_HAS_neg_i32;
1265     case INDEX_op_andc_i32:
1266         return TCG_TARGET_HAS_andc_i32;
1267     case INDEX_op_orc_i32:
1268         return TCG_TARGET_HAS_orc_i32;
1269     case INDEX_op_eqv_i32:
1270         return TCG_TARGET_HAS_eqv_i32;
1271     case INDEX_op_nand_i32:
1272         return TCG_TARGET_HAS_nand_i32;
1273     case INDEX_op_nor_i32:
1274         return TCG_TARGET_HAS_nor_i32;
1275     case INDEX_op_clz_i32:
1276         return TCG_TARGET_HAS_clz_i32;
1277     case INDEX_op_ctz_i32:
1278         return TCG_TARGET_HAS_ctz_i32;
1279     case INDEX_op_ctpop_i32:
1280         return TCG_TARGET_HAS_ctpop_i32;
1281 
1282     case INDEX_op_brcond2_i32:
1283     case INDEX_op_setcond2_i32:
1284         return TCG_TARGET_REG_BITS == 32;
1285 
1286     case INDEX_op_mov_i64:
1287     case INDEX_op_setcond_i64:
1288     case INDEX_op_brcond_i64:
1289     case INDEX_op_ld8u_i64:
1290     case INDEX_op_ld8s_i64:
1291     case INDEX_op_ld16u_i64:
1292     case INDEX_op_ld16s_i64:
1293     case INDEX_op_ld32u_i64:
1294     case INDEX_op_ld32s_i64:
1295     case INDEX_op_ld_i64:
1296     case INDEX_op_st8_i64:
1297     case INDEX_op_st16_i64:
1298     case INDEX_op_st32_i64:
1299     case INDEX_op_st_i64:
1300     case INDEX_op_add_i64:
1301     case INDEX_op_sub_i64:
1302     case INDEX_op_mul_i64:
1303     case INDEX_op_and_i64:
1304     case INDEX_op_or_i64:
1305     case INDEX_op_xor_i64:
1306     case INDEX_op_shl_i64:
1307     case INDEX_op_shr_i64:
1308     case INDEX_op_sar_i64:
1309     case INDEX_op_ext_i32_i64:
1310     case INDEX_op_extu_i32_i64:
1311         return TCG_TARGET_REG_BITS == 64;
1312 
1313     case INDEX_op_movcond_i64:
1314         return TCG_TARGET_HAS_movcond_i64;
1315     case INDEX_op_div_i64:
1316     case INDEX_op_divu_i64:
1317         return TCG_TARGET_HAS_div_i64;
1318     case INDEX_op_rem_i64:
1319     case INDEX_op_remu_i64:
1320         return TCG_TARGET_HAS_rem_i64;
1321     case INDEX_op_div2_i64:
1322     case INDEX_op_divu2_i64:
1323         return TCG_TARGET_HAS_div2_i64;
1324     case INDEX_op_rotl_i64:
1325     case INDEX_op_rotr_i64:
1326         return TCG_TARGET_HAS_rot_i64;
1327     case INDEX_op_deposit_i64:
1328         return TCG_TARGET_HAS_deposit_i64;
1329     case INDEX_op_extract_i64:
1330         return TCG_TARGET_HAS_extract_i64;
1331     case INDEX_op_sextract_i64:
1332         return TCG_TARGET_HAS_sextract_i64;
1333     case INDEX_op_extract2_i64:
1334         return TCG_TARGET_HAS_extract2_i64;
1335     case INDEX_op_extrl_i64_i32:
1336         return TCG_TARGET_HAS_extrl_i64_i32;
1337     case INDEX_op_extrh_i64_i32:
1338         return TCG_TARGET_HAS_extrh_i64_i32;
1339     case INDEX_op_ext8s_i64:
1340         return TCG_TARGET_HAS_ext8s_i64;
1341     case INDEX_op_ext16s_i64:
1342         return TCG_TARGET_HAS_ext16s_i64;
1343     case INDEX_op_ext32s_i64:
1344         return TCG_TARGET_HAS_ext32s_i64;
1345     case INDEX_op_ext8u_i64:
1346         return TCG_TARGET_HAS_ext8u_i64;
1347     case INDEX_op_ext16u_i64:
1348         return TCG_TARGET_HAS_ext16u_i64;
1349     case INDEX_op_ext32u_i64:
1350         return TCG_TARGET_HAS_ext32u_i64;
1351     case INDEX_op_bswap16_i64:
1352         return TCG_TARGET_HAS_bswap16_i64;
1353     case INDEX_op_bswap32_i64:
1354         return TCG_TARGET_HAS_bswap32_i64;
1355     case INDEX_op_bswap64_i64:
1356         return TCG_TARGET_HAS_bswap64_i64;
1357     case INDEX_op_not_i64:
1358         return TCG_TARGET_HAS_not_i64;
1359     case INDEX_op_neg_i64:
1360         return TCG_TARGET_HAS_neg_i64;
1361     case INDEX_op_andc_i64:
1362         return TCG_TARGET_HAS_andc_i64;
1363     case INDEX_op_orc_i64:
1364         return TCG_TARGET_HAS_orc_i64;
1365     case INDEX_op_eqv_i64:
1366         return TCG_TARGET_HAS_eqv_i64;
1367     case INDEX_op_nand_i64:
1368         return TCG_TARGET_HAS_nand_i64;
1369     case INDEX_op_nor_i64:
1370         return TCG_TARGET_HAS_nor_i64;
1371     case INDEX_op_clz_i64:
1372         return TCG_TARGET_HAS_clz_i64;
1373     case INDEX_op_ctz_i64:
1374         return TCG_TARGET_HAS_ctz_i64;
1375     case INDEX_op_ctpop_i64:
1376         return TCG_TARGET_HAS_ctpop_i64;
1377     case INDEX_op_add2_i64:
1378         return TCG_TARGET_HAS_add2_i64;
1379     case INDEX_op_sub2_i64:
1380         return TCG_TARGET_HAS_sub2_i64;
1381     case INDEX_op_mulu2_i64:
1382         return TCG_TARGET_HAS_mulu2_i64;
1383     case INDEX_op_muls2_i64:
1384         return TCG_TARGET_HAS_muls2_i64;
1385     case INDEX_op_muluh_i64:
1386         return TCG_TARGET_HAS_muluh_i64;
1387     case INDEX_op_mulsh_i64:
1388         return TCG_TARGET_HAS_mulsh_i64;
1389 
1390     case INDEX_op_mov_vec:
1391     case INDEX_op_dup_vec:
1392     case INDEX_op_dupm_vec:
1393     case INDEX_op_ld_vec:
1394     case INDEX_op_st_vec:
1395     case INDEX_op_add_vec:
1396     case INDEX_op_sub_vec:
1397     case INDEX_op_and_vec:
1398     case INDEX_op_or_vec:
1399     case INDEX_op_xor_vec:
1400     case INDEX_op_cmp_vec:
1401         return have_vec;
1402     case INDEX_op_dup2_vec:
1403         return have_vec && TCG_TARGET_REG_BITS == 32;
1404     case INDEX_op_not_vec:
1405         return have_vec && TCG_TARGET_HAS_not_vec;
1406     case INDEX_op_neg_vec:
1407         return have_vec && TCG_TARGET_HAS_neg_vec;
1408     case INDEX_op_abs_vec:
1409         return have_vec && TCG_TARGET_HAS_abs_vec;
1410     case INDEX_op_andc_vec:
1411         return have_vec && TCG_TARGET_HAS_andc_vec;
1412     case INDEX_op_orc_vec:
1413         return have_vec && TCG_TARGET_HAS_orc_vec;
1414     case INDEX_op_nand_vec:
1415         return have_vec && TCG_TARGET_HAS_nand_vec;
1416     case INDEX_op_nor_vec:
1417         return have_vec && TCG_TARGET_HAS_nor_vec;
1418     case INDEX_op_eqv_vec:
1419         return have_vec && TCG_TARGET_HAS_eqv_vec;
1420     case INDEX_op_mul_vec:
1421         return have_vec && TCG_TARGET_HAS_mul_vec;
1422     case INDEX_op_shli_vec:
1423     case INDEX_op_shri_vec:
1424     case INDEX_op_sari_vec:
1425         return have_vec && TCG_TARGET_HAS_shi_vec;
1426     case INDEX_op_shls_vec:
1427     case INDEX_op_shrs_vec:
1428     case INDEX_op_sars_vec:
1429         return have_vec && TCG_TARGET_HAS_shs_vec;
1430     case INDEX_op_shlv_vec:
1431     case INDEX_op_shrv_vec:
1432     case INDEX_op_sarv_vec:
1433         return have_vec && TCG_TARGET_HAS_shv_vec;
1434     case INDEX_op_rotli_vec:
1435         return have_vec && TCG_TARGET_HAS_roti_vec;
1436     case INDEX_op_rotls_vec:
1437         return have_vec && TCG_TARGET_HAS_rots_vec;
1438     case INDEX_op_rotlv_vec:
1439     case INDEX_op_rotrv_vec:
1440         return have_vec && TCG_TARGET_HAS_rotv_vec;
1441     case INDEX_op_ssadd_vec:
1442     case INDEX_op_usadd_vec:
1443     case INDEX_op_sssub_vec:
1444     case INDEX_op_ussub_vec:
1445         return have_vec && TCG_TARGET_HAS_sat_vec;
1446     case INDEX_op_smin_vec:
1447     case INDEX_op_umin_vec:
1448     case INDEX_op_smax_vec:
1449     case INDEX_op_umax_vec:
1450         return have_vec && TCG_TARGET_HAS_minmax_vec;
1451     case INDEX_op_bitsel_vec:
1452         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1453     case INDEX_op_cmpsel_vec:
1454         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1455 
1456     default:
1457         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1458         return true;
1459     }
1460 }
1461 
1462 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1463    and endian swap. Maybe it would be better to do the alignment
1464    and endian swap in tcg_reg_alloc_call(). */
1465 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1466 {
1467     int i, real_args, nb_rets, pi;
1468     unsigned typemask;
1469     const TCGHelperInfo *info;
1470     TCGOp *op;
1471 
1472     info = g_hash_table_lookup(helper_table, (gpointer)func);
1473     typemask = info->typemask;
1474 
1475 #ifdef CONFIG_PLUGIN
1476     /* detect non-plugin helpers */
1477     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1478         tcg_ctx->plugin_insn->calls_helpers = true;
1479     }
1480 #endif
1481 
1482 #if defined(__sparc__) && !defined(__arch64__) \
1483     && !defined(CONFIG_TCG_INTERPRETER)
1484     /* We have 64-bit values in one register, but need to pass as two
1485        separate parameters.  Split them.  */
1486     int orig_typemask = typemask;
1487     int orig_nargs = nargs;
1488     TCGv_i64 retl, reth;
1489     TCGTemp *split_args[MAX_OPC_PARAM];
1490 
1491     retl = NULL;
1492     reth = NULL;
1493     typemask = 0;
1494     for (i = real_args = 0; i < nargs; ++i) {
1495         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1496         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1497 
1498         if (is_64bit) {
1499             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1500             TCGv_i32 h = tcg_temp_new_i32();
1501             TCGv_i32 l = tcg_temp_new_i32();
1502             tcg_gen_extr_i64_i32(l, h, orig);
1503             split_args[real_args++] = tcgv_i32_temp(h);
1504             typemask |= dh_typecode_i32 << (real_args * 3);
1505             split_args[real_args++] = tcgv_i32_temp(l);
1506             typemask |= dh_typecode_i32 << (real_args * 3);
1507         } else {
1508             split_args[real_args++] = args[i];
1509             typemask |= argtype << (real_args * 3);
1510         }
1511     }
1512     nargs = real_args;
1513     args = split_args;
1514 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1515     for (i = 0; i < nargs; ++i) {
1516         int argtype = extract32(typemask, (i + 1) * 3, 3);
1517         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1518         bool is_signed = argtype & 1;
1519 
1520         if (is_32bit) {
1521             TCGv_i64 temp = tcg_temp_new_i64();
1522             TCGv_i32 orig = temp_tcgv_i32(args[i]);
1523             if (is_signed) {
1524                 tcg_gen_ext_i32_i64(temp, orig);
1525             } else {
1526                 tcg_gen_extu_i32_i64(temp, orig);
1527             }
1528             args[i] = tcgv_i64_temp(temp);
1529         }
1530     }
1531 #endif /* TCG_TARGET_EXTEND_ARGS */
1532 
1533     op = tcg_emit_op(INDEX_op_call);
1534 
1535     pi = 0;
1536     if (ret != NULL) {
1537 #if defined(__sparc__) && !defined(__arch64__) \
1538     && !defined(CONFIG_TCG_INTERPRETER)
1539         if ((typemask & 6) == dh_typecode_i64) {
1540             /* The 32-bit ABI is going to return the 64-bit value in
1541                the %o0/%o1 register pair.  Prepare for this by using
1542                two return temporaries, and reassemble below.  */
1543             retl = tcg_temp_new_i64();
1544             reth = tcg_temp_new_i64();
1545             op->args[pi++] = tcgv_i64_arg(reth);
1546             op->args[pi++] = tcgv_i64_arg(retl);
1547             nb_rets = 2;
1548         } else {
1549             op->args[pi++] = temp_arg(ret);
1550             nb_rets = 1;
1551         }
1552 #else
1553         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1554 #if HOST_BIG_ENDIAN
1555             op->args[pi++] = temp_arg(ret + 1);
1556             op->args[pi++] = temp_arg(ret);
1557 #else
1558             op->args[pi++] = temp_arg(ret);
1559             op->args[pi++] = temp_arg(ret + 1);
1560 #endif
1561             nb_rets = 2;
1562         } else {
1563             op->args[pi++] = temp_arg(ret);
1564             nb_rets = 1;
1565         }
1566 #endif
1567     } else {
1568         nb_rets = 0;
1569     }
1570     TCGOP_CALLO(op) = nb_rets;
1571 
1572     real_args = 0;
1573     for (i = 0; i < nargs; i++) {
1574         int argtype = extract32(typemask, (i + 1) * 3, 3);
1575         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1576         bool want_align = false;
1577 
1578 #if defined(CONFIG_TCG_INTERPRETER)
1579         /*
1580          * Align all arguments, so that they land in predictable places
1581          * for passing off to ffi_call.
1582          */
1583         want_align = true;
1584 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1585         /* Some targets want aligned 64 bit args */
1586         want_align = is_64bit;
1587 #endif
1588 
1589         if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1590             op->args[pi++] = TCG_CALL_DUMMY_ARG;
1591             real_args++;
1592         }
1593 
1594         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1595             /*
1596              * If stack grows up, then we will be placing successive
1597              * arguments at lower addresses, which means we need to
1598              * reverse the order compared to how we would normally
1599              * treat either big or little-endian.  For those arguments
1600              * that will wind up in registers, this still works for
1601              * HPPA (the only current STACK_GROWSUP target) since the
1602              * argument registers are *also* allocated in decreasing
1603              * order.  If another such target is added, this logic may
1604              * have to get more complicated to differentiate between
1605              * stack arguments and register arguments.
1606              */
1607 #if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP)
1608             op->args[pi++] = temp_arg(args[i] + 1);
1609             op->args[pi++] = temp_arg(args[i]);
1610 #else
1611             op->args[pi++] = temp_arg(args[i]);
1612             op->args[pi++] = temp_arg(args[i] + 1);
1613 #endif
1614             real_args += 2;
1615             continue;
1616         }
1617 
1618         op->args[pi++] = temp_arg(args[i]);
1619         real_args++;
1620     }
1621     op->args[pi++] = (uintptr_t)func;
1622     op->args[pi++] = (uintptr_t)info;
1623     TCGOP_CALLI(op) = real_args;
1624 
1625     /* Make sure the fields didn't overflow.  */
1626     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1627     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1628 
1629 #if defined(__sparc__) && !defined(__arch64__) \
1630     && !defined(CONFIG_TCG_INTERPRETER)
1631     /* Free all of the parts we allocated above.  */
1632     for (i = real_args = 0; i < orig_nargs; ++i) {
1633         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1634         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1635 
1636         if (is_64bit) {
1637             tcg_temp_free_internal(args[real_args++]);
1638             tcg_temp_free_internal(args[real_args++]);
1639         } else {
1640             real_args++;
1641         }
1642     }
1643     if ((orig_typemask & 6) == dh_typecode_i64) {
1644         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1645            Note that describing these as TCGv_i64 eliminates an unnecessary
1646            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1647         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1648         tcg_temp_free_i64(retl);
1649         tcg_temp_free_i64(reth);
1650     }
1651 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1652     for (i = 0; i < nargs; ++i) {
1653         int argtype = extract32(typemask, (i + 1) * 3, 3);
1654         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1655 
1656         if (is_32bit) {
1657             tcg_temp_free_internal(args[i]);
1658         }
1659     }
1660 #endif /* TCG_TARGET_EXTEND_ARGS */
1661 }
1662 
1663 static void tcg_reg_alloc_start(TCGContext *s)
1664 {
1665     int i, n;
1666 
1667     for (i = 0, n = s->nb_temps; i < n; i++) {
1668         TCGTemp *ts = &s->temps[i];
1669         TCGTempVal val = TEMP_VAL_MEM;
1670 
1671         switch (ts->kind) {
1672         case TEMP_CONST:
1673             val = TEMP_VAL_CONST;
1674             break;
1675         case TEMP_FIXED:
1676             val = TEMP_VAL_REG;
1677             break;
1678         case TEMP_GLOBAL:
1679             break;
1680         case TEMP_NORMAL:
1681             val = TEMP_VAL_DEAD;
1682             /* fall through */
1683         case TEMP_LOCAL:
1684             ts->mem_allocated = 0;
1685             break;
1686         default:
1687             g_assert_not_reached();
1688         }
1689         ts->val_type = val;
1690     }
1691 
1692     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1693 }
1694 
1695 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1696                                  TCGTemp *ts)
1697 {
1698     int idx = temp_idx(ts);
1699 
1700     switch (ts->kind) {
1701     case TEMP_FIXED:
1702     case TEMP_GLOBAL:
1703         pstrcpy(buf, buf_size, ts->name);
1704         break;
1705     case TEMP_LOCAL:
1706         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1707         break;
1708     case TEMP_NORMAL:
1709         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1710         break;
1711     case TEMP_CONST:
1712         switch (ts->type) {
1713         case TCG_TYPE_I32:
1714             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1715             break;
1716 #if TCG_TARGET_REG_BITS > 32
1717         case TCG_TYPE_I64:
1718             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1719             break;
1720 #endif
1721         case TCG_TYPE_V64:
1722         case TCG_TYPE_V128:
1723         case TCG_TYPE_V256:
1724             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1725                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1726             break;
1727         default:
1728             g_assert_not_reached();
1729         }
1730         break;
1731     }
1732     return buf;
1733 }
1734 
1735 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1736                              int buf_size, TCGArg arg)
1737 {
1738     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1739 }
1740 
1741 static const char * const cond_name[] =
1742 {
1743     [TCG_COND_NEVER] = "never",
1744     [TCG_COND_ALWAYS] = "always",
1745     [TCG_COND_EQ] = "eq",
1746     [TCG_COND_NE] = "ne",
1747     [TCG_COND_LT] = "lt",
1748     [TCG_COND_GE] = "ge",
1749     [TCG_COND_LE] = "le",
1750     [TCG_COND_GT] = "gt",
1751     [TCG_COND_LTU] = "ltu",
1752     [TCG_COND_GEU] = "geu",
1753     [TCG_COND_LEU] = "leu",
1754     [TCG_COND_GTU] = "gtu"
1755 };
1756 
1757 static const char * const ldst_name[] =
1758 {
1759     [MO_UB]   = "ub",
1760     [MO_SB]   = "sb",
1761     [MO_LEUW] = "leuw",
1762     [MO_LESW] = "lesw",
1763     [MO_LEUL] = "leul",
1764     [MO_LESL] = "lesl",
1765     [MO_LEUQ] = "leq",
1766     [MO_BEUW] = "beuw",
1767     [MO_BESW] = "besw",
1768     [MO_BEUL] = "beul",
1769     [MO_BESL] = "besl",
1770     [MO_BEUQ] = "beq",
1771 };
1772 
1773 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1774 #ifdef TARGET_ALIGNED_ONLY
1775     [MO_UNALN >> MO_ASHIFT]    = "un+",
1776     [MO_ALIGN >> MO_ASHIFT]    = "",
1777 #else
1778     [MO_UNALN >> MO_ASHIFT]    = "",
1779     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1780 #endif
1781     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1782     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1783     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1784     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1785     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1786     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1787 };
1788 
1789 static const char bswap_flag_name[][6] = {
1790     [TCG_BSWAP_IZ] = "iz",
1791     [TCG_BSWAP_OZ] = "oz",
1792     [TCG_BSWAP_OS] = "os",
1793     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1794     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1795 };
1796 
1797 static inline bool tcg_regset_single(TCGRegSet d)
1798 {
1799     return (d & (d - 1)) == 0;
1800 }
1801 
1802 static inline TCGReg tcg_regset_first(TCGRegSet d)
1803 {
1804     if (TCG_TARGET_NB_REGS <= 32) {
1805         return ctz32(d);
1806     } else {
1807         return ctz64(d);
1808     }
1809 }
1810 
1811 /* Return only the number of characters output -- no error return. */
1812 #define ne_fprintf(...) \
1813     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
1814 
1815 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
1816 {
1817     char buf[128];
1818     TCGOp *op;
1819 
1820     QTAILQ_FOREACH(op, &s->ops, link) {
1821         int i, k, nb_oargs, nb_iargs, nb_cargs;
1822         const TCGOpDef *def;
1823         TCGOpcode c;
1824         int col = 0;
1825 
1826         c = op->opc;
1827         def = &tcg_op_defs[c];
1828 
1829         if (c == INDEX_op_insn_start) {
1830             nb_oargs = 0;
1831             col += ne_fprintf(f, "\n ----");
1832 
1833             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1834                 target_ulong a;
1835 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1836                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1837 #else
1838                 a = op->args[i];
1839 #endif
1840                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
1841             }
1842         } else if (c == INDEX_op_call) {
1843             const TCGHelperInfo *info = tcg_call_info(op);
1844             void *func = tcg_call_func(op);
1845 
1846             /* variable number of arguments */
1847             nb_oargs = TCGOP_CALLO(op);
1848             nb_iargs = TCGOP_CALLI(op);
1849             nb_cargs = def->nb_cargs;
1850 
1851             col += ne_fprintf(f, " %s ", def->name);
1852 
1853             /*
1854              * Print the function name from TCGHelperInfo, if available.
1855              * Note that plugins have a template function for the info,
1856              * but the actual function pointer comes from the plugin.
1857              */
1858             if (func == info->func) {
1859                 col += ne_fprintf(f, "%s", info->name);
1860             } else {
1861                 col += ne_fprintf(f, "plugin(%p)", func);
1862             }
1863 
1864             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
1865             for (i = 0; i < nb_oargs; i++) {
1866                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1867                                                             op->args[i]));
1868             }
1869             for (i = 0; i < nb_iargs; i++) {
1870                 TCGArg arg = op->args[nb_oargs + i];
1871                 const char *t = "<dummy>";
1872                 if (arg != TCG_CALL_DUMMY_ARG) {
1873                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1874                 }
1875                 col += ne_fprintf(f, ",%s", t);
1876             }
1877         } else {
1878             col += ne_fprintf(f, " %s ", def->name);
1879 
1880             nb_oargs = def->nb_oargs;
1881             nb_iargs = def->nb_iargs;
1882             nb_cargs = def->nb_cargs;
1883 
1884             if (def->flags & TCG_OPF_VECTOR) {
1885                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
1886                                   8 << TCGOP_VECE(op));
1887             }
1888 
1889             k = 0;
1890             for (i = 0; i < nb_oargs; i++) {
1891                 const char *sep =  k ? "," : "";
1892                 col += ne_fprintf(f, "%s%s", sep,
1893                                   tcg_get_arg_str(s, buf, sizeof(buf),
1894                                                   op->args[k++]));
1895             }
1896             for (i = 0; i < nb_iargs; i++) {
1897                 const char *sep =  k ? "," : "";
1898                 col += ne_fprintf(f, "%s%s", sep,
1899                                   tcg_get_arg_str(s, buf, sizeof(buf),
1900                                                   op->args[k++]));
1901             }
1902             switch (c) {
1903             case INDEX_op_brcond_i32:
1904             case INDEX_op_setcond_i32:
1905             case INDEX_op_movcond_i32:
1906             case INDEX_op_brcond2_i32:
1907             case INDEX_op_setcond2_i32:
1908             case INDEX_op_brcond_i64:
1909             case INDEX_op_setcond_i64:
1910             case INDEX_op_movcond_i64:
1911             case INDEX_op_cmp_vec:
1912             case INDEX_op_cmpsel_vec:
1913                 if (op->args[k] < ARRAY_SIZE(cond_name)
1914                     && cond_name[op->args[k]]) {
1915                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
1916                 } else {
1917                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
1918                 }
1919                 i = 1;
1920                 break;
1921             case INDEX_op_qemu_ld_i32:
1922             case INDEX_op_qemu_st_i32:
1923             case INDEX_op_qemu_st8_i32:
1924             case INDEX_op_qemu_ld_i64:
1925             case INDEX_op_qemu_st_i64:
1926                 {
1927                     MemOpIdx oi = op->args[k++];
1928                     MemOp op = get_memop(oi);
1929                     unsigned ix = get_mmuidx(oi);
1930 
1931                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1932                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
1933                     } else {
1934                         const char *s_al, *s_op;
1935                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1936                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1937                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
1938                     }
1939                     i = 1;
1940                 }
1941                 break;
1942             case INDEX_op_bswap16_i32:
1943             case INDEX_op_bswap16_i64:
1944             case INDEX_op_bswap32_i32:
1945             case INDEX_op_bswap32_i64:
1946             case INDEX_op_bswap64_i64:
1947                 {
1948                     TCGArg flags = op->args[k];
1949                     const char *name = NULL;
1950 
1951                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
1952                         name = bswap_flag_name[flags];
1953                     }
1954                     if (name) {
1955                         col += ne_fprintf(f, ",%s", name);
1956                     } else {
1957                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
1958                     }
1959                     i = k = 1;
1960                 }
1961                 break;
1962             default:
1963                 i = 0;
1964                 break;
1965             }
1966             switch (c) {
1967             case INDEX_op_set_label:
1968             case INDEX_op_br:
1969             case INDEX_op_brcond_i32:
1970             case INDEX_op_brcond_i64:
1971             case INDEX_op_brcond2_i32:
1972                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
1973                                   arg_label(op->args[k])->id);
1974                 i++, k++;
1975                 break;
1976             default:
1977                 break;
1978             }
1979             for (; i < nb_cargs; i++, k++) {
1980                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
1981                                   op->args[k]);
1982             }
1983         }
1984 
1985         if (have_prefs || op->life) {
1986             for (; col < 40; ++col) {
1987                 putc(' ', f);
1988             }
1989         }
1990 
1991         if (op->life) {
1992             unsigned life = op->life;
1993 
1994             if (life & (SYNC_ARG * 3)) {
1995                 ne_fprintf(f, "  sync:");
1996                 for (i = 0; i < 2; ++i) {
1997                     if (life & (SYNC_ARG << i)) {
1998                         ne_fprintf(f, " %d", i);
1999                     }
2000                 }
2001             }
2002             life /= DEAD_ARG;
2003             if (life) {
2004                 ne_fprintf(f, "  dead:");
2005                 for (i = 0; life; ++i, life >>= 1) {
2006                     if (life & 1) {
2007                         ne_fprintf(f, " %d", i);
2008                     }
2009                 }
2010             }
2011         }
2012 
2013         if (have_prefs) {
2014             for (i = 0; i < nb_oargs; ++i) {
2015                 TCGRegSet set = op->output_pref[i];
2016 
2017                 if (i == 0) {
2018                     ne_fprintf(f, "  pref=");
2019                 } else {
2020                     ne_fprintf(f, ",");
2021                 }
2022                 if (set == 0) {
2023                     ne_fprintf(f, "none");
2024                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2025                     ne_fprintf(f, "all");
2026 #ifdef CONFIG_DEBUG_TCG
2027                 } else if (tcg_regset_single(set)) {
2028                     TCGReg reg = tcg_regset_first(set);
2029                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2030 #endif
2031                 } else if (TCG_TARGET_NB_REGS <= 32) {
2032                     ne_fprintf(f, "0x%x", (uint32_t)set);
2033                 } else {
2034                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2035                 }
2036             }
2037         }
2038 
2039         putc('\n', f);
2040     }
2041 }
2042 
2043 /* we give more priority to constraints with less registers */
2044 static int get_constraint_priority(const TCGOpDef *def, int k)
2045 {
2046     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2047     int n;
2048 
2049     if (arg_ct->oalias) {
2050         /* an alias is equivalent to a single register */
2051         n = 1;
2052     } else {
2053         n = ctpop64(arg_ct->regs);
2054     }
2055     return TCG_TARGET_NB_REGS - n + 1;
2056 }
2057 
2058 /* sort from highest priority to lowest */
2059 static void sort_constraints(TCGOpDef *def, int start, int n)
2060 {
2061     int i, j;
2062     TCGArgConstraint *a = def->args_ct;
2063 
2064     for (i = 0; i < n; i++) {
2065         a[start + i].sort_index = start + i;
2066     }
2067     if (n <= 1) {
2068         return;
2069     }
2070     for (i = 0; i < n - 1; i++) {
2071         for (j = i + 1; j < n; j++) {
2072             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2073             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2074             if (p1 < p2) {
2075                 int tmp = a[start + i].sort_index;
2076                 a[start + i].sort_index = a[start + j].sort_index;
2077                 a[start + j].sort_index = tmp;
2078             }
2079         }
2080     }
2081 }
2082 
2083 static void process_op_defs(TCGContext *s)
2084 {
2085     TCGOpcode op;
2086 
2087     for (op = 0; op < NB_OPS; op++) {
2088         TCGOpDef *def = &tcg_op_defs[op];
2089         const TCGTargetOpDef *tdefs;
2090         int i, nb_args;
2091 
2092         if (def->flags & TCG_OPF_NOT_PRESENT) {
2093             continue;
2094         }
2095 
2096         nb_args = def->nb_iargs + def->nb_oargs;
2097         if (nb_args == 0) {
2098             continue;
2099         }
2100 
2101         /*
2102          * Macro magic should make it impossible, but double-check that
2103          * the array index is in range.  Since the signness of an enum
2104          * is implementation defined, force the result to unsigned.
2105          */
2106         unsigned con_set = tcg_target_op_def(op);
2107         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2108         tdefs = &constraint_sets[con_set];
2109 
2110         for (i = 0; i < nb_args; i++) {
2111             const char *ct_str = tdefs->args_ct_str[i];
2112             /* Incomplete TCGTargetOpDef entry. */
2113             tcg_debug_assert(ct_str != NULL);
2114 
2115             while (*ct_str != '\0') {
2116                 switch(*ct_str) {
2117                 case '0' ... '9':
2118                     {
2119                         int oarg = *ct_str - '0';
2120                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2121                         tcg_debug_assert(oarg < def->nb_oargs);
2122                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2123                         def->args_ct[i] = def->args_ct[oarg];
2124                         /* The output sets oalias.  */
2125                         def->args_ct[oarg].oalias = true;
2126                         def->args_ct[oarg].alias_index = i;
2127                         /* The input sets ialias. */
2128                         def->args_ct[i].ialias = true;
2129                         def->args_ct[i].alias_index = oarg;
2130                     }
2131                     ct_str++;
2132                     break;
2133                 case '&':
2134                     def->args_ct[i].newreg = true;
2135                     ct_str++;
2136                     break;
2137                 case 'i':
2138                     def->args_ct[i].ct |= TCG_CT_CONST;
2139                     ct_str++;
2140                     break;
2141 
2142                 /* Include all of the target-specific constraints. */
2143 
2144 #undef CONST
2145 #define CONST(CASE, MASK) \
2146     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2147 #define REGS(CASE, MASK) \
2148     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2149 
2150 #include "tcg-target-con-str.h"
2151 
2152 #undef REGS
2153 #undef CONST
2154                 default:
2155                     /* Typo in TCGTargetOpDef constraint. */
2156                     g_assert_not_reached();
2157                 }
2158             }
2159         }
2160 
2161         /* TCGTargetOpDef entry with too much information? */
2162         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2163 
2164         /* sort the constraints (XXX: this is just an heuristic) */
2165         sort_constraints(def, 0, def->nb_oargs);
2166         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2167     }
2168 }
2169 
2170 void tcg_op_remove(TCGContext *s, TCGOp *op)
2171 {
2172     TCGLabel *label;
2173 
2174     switch (op->opc) {
2175     case INDEX_op_br:
2176         label = arg_label(op->args[0]);
2177         label->refs--;
2178         break;
2179     case INDEX_op_brcond_i32:
2180     case INDEX_op_brcond_i64:
2181         label = arg_label(op->args[3]);
2182         label->refs--;
2183         break;
2184     case INDEX_op_brcond2_i32:
2185         label = arg_label(op->args[5]);
2186         label->refs--;
2187         break;
2188     default:
2189         break;
2190     }
2191 
2192     QTAILQ_REMOVE(&s->ops, op, link);
2193     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2194     s->nb_ops--;
2195 
2196 #ifdef CONFIG_PROFILER
2197     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2198 #endif
2199 }
2200 
2201 void tcg_remove_ops_after(TCGOp *op)
2202 {
2203     TCGContext *s = tcg_ctx;
2204 
2205     while (true) {
2206         TCGOp *last = tcg_last_op();
2207         if (last == op) {
2208             return;
2209         }
2210         tcg_op_remove(s, last);
2211     }
2212 }
2213 
2214 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2215 {
2216     TCGContext *s = tcg_ctx;
2217     TCGOp *op;
2218 
2219     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2220         op = tcg_malloc(sizeof(TCGOp));
2221     } else {
2222         op = QTAILQ_FIRST(&s->free_ops);
2223         QTAILQ_REMOVE(&s->free_ops, op, link);
2224     }
2225     memset(op, 0, offsetof(TCGOp, link));
2226     op->opc = opc;
2227     s->nb_ops++;
2228 
2229     return op;
2230 }
2231 
2232 TCGOp *tcg_emit_op(TCGOpcode opc)
2233 {
2234     TCGOp *op = tcg_op_alloc(opc);
2235     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2236     return op;
2237 }
2238 
2239 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2240 {
2241     TCGOp *new_op = tcg_op_alloc(opc);
2242     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2243     return new_op;
2244 }
2245 
2246 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2247 {
2248     TCGOp *new_op = tcg_op_alloc(opc);
2249     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2250     return new_op;
2251 }
2252 
2253 /* Reachable analysis : remove unreachable code.  */
2254 static void reachable_code_pass(TCGContext *s)
2255 {
2256     TCGOp *op, *op_next;
2257     bool dead = false;
2258 
2259     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2260         bool remove = dead;
2261         TCGLabel *label;
2262 
2263         switch (op->opc) {
2264         case INDEX_op_set_label:
2265             label = arg_label(op->args[0]);
2266             if (label->refs == 0) {
2267                 /*
2268                  * While there is an occasional backward branch, virtually
2269                  * all branches generated by the translators are forward.
2270                  * Which means that generally we will have already removed
2271                  * all references to the label that will be, and there is
2272                  * little to be gained by iterating.
2273                  */
2274                 remove = true;
2275             } else {
2276                 /* Once we see a label, insns become live again.  */
2277                 dead = false;
2278                 remove = false;
2279 
2280                 /*
2281                  * Optimization can fold conditional branches to unconditional.
2282                  * If we find a label with one reference which is preceded by
2283                  * an unconditional branch to it, remove both.  This needed to
2284                  * wait until the dead code in between them was removed.
2285                  */
2286                 if (label->refs == 1) {
2287                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2288                     if (op_prev->opc == INDEX_op_br &&
2289                         label == arg_label(op_prev->args[0])) {
2290                         tcg_op_remove(s, op_prev);
2291                         remove = true;
2292                     }
2293                 }
2294             }
2295             break;
2296 
2297         case INDEX_op_br:
2298         case INDEX_op_exit_tb:
2299         case INDEX_op_goto_ptr:
2300             /* Unconditional branches; everything following is dead.  */
2301             dead = true;
2302             break;
2303 
2304         case INDEX_op_call:
2305             /* Notice noreturn helper calls, raising exceptions.  */
2306             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2307                 dead = true;
2308             }
2309             break;
2310 
2311         case INDEX_op_insn_start:
2312             /* Never remove -- we need to keep these for unwind.  */
2313             remove = false;
2314             break;
2315 
2316         default:
2317             break;
2318         }
2319 
2320         if (remove) {
2321             tcg_op_remove(s, op);
2322         }
2323     }
2324 }
2325 
2326 #define TS_DEAD  1
2327 #define TS_MEM   2
2328 
2329 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2330 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2331 
2332 /* For liveness_pass_1, the register preferences for a given temp.  */
2333 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2334 {
2335     return ts->state_ptr;
2336 }
2337 
2338 /* For liveness_pass_1, reset the preferences for a given temp to the
2339  * maximal regset for its type.
2340  */
2341 static inline void la_reset_pref(TCGTemp *ts)
2342 {
2343     *la_temp_pref(ts)
2344         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2345 }
2346 
2347 /* liveness analysis: end of function: all temps are dead, and globals
2348    should be in memory. */
2349 static void la_func_end(TCGContext *s, int ng, int nt)
2350 {
2351     int i;
2352 
2353     for (i = 0; i < ng; ++i) {
2354         s->temps[i].state = TS_DEAD | TS_MEM;
2355         la_reset_pref(&s->temps[i]);
2356     }
2357     for (i = ng; i < nt; ++i) {
2358         s->temps[i].state = TS_DEAD;
2359         la_reset_pref(&s->temps[i]);
2360     }
2361 }
2362 
2363 /* liveness analysis: end of basic block: all temps are dead, globals
2364    and local temps should be in memory. */
2365 static void la_bb_end(TCGContext *s, int ng, int nt)
2366 {
2367     int i;
2368 
2369     for (i = 0; i < nt; ++i) {
2370         TCGTemp *ts = &s->temps[i];
2371         int state;
2372 
2373         switch (ts->kind) {
2374         case TEMP_FIXED:
2375         case TEMP_GLOBAL:
2376         case TEMP_LOCAL:
2377             state = TS_DEAD | TS_MEM;
2378             break;
2379         case TEMP_NORMAL:
2380         case TEMP_CONST:
2381             state = TS_DEAD;
2382             break;
2383         default:
2384             g_assert_not_reached();
2385         }
2386         ts->state = state;
2387         la_reset_pref(ts);
2388     }
2389 }
2390 
2391 /* liveness analysis: sync globals back to memory.  */
2392 static void la_global_sync(TCGContext *s, int ng)
2393 {
2394     int i;
2395 
2396     for (i = 0; i < ng; ++i) {
2397         int state = s->temps[i].state;
2398         s->temps[i].state = state | TS_MEM;
2399         if (state == TS_DEAD) {
2400             /* If the global was previously dead, reset prefs.  */
2401             la_reset_pref(&s->temps[i]);
2402         }
2403     }
2404 }
2405 
2406 /*
2407  * liveness analysis: conditional branch: all temps are dead,
2408  * globals and local temps should be synced.
2409  */
2410 static void la_bb_sync(TCGContext *s, int ng, int nt)
2411 {
2412     la_global_sync(s, ng);
2413 
2414     for (int i = ng; i < nt; ++i) {
2415         TCGTemp *ts = &s->temps[i];
2416         int state;
2417 
2418         switch (ts->kind) {
2419         case TEMP_LOCAL:
2420             state = ts->state;
2421             ts->state = state | TS_MEM;
2422             if (state != TS_DEAD) {
2423                 continue;
2424             }
2425             break;
2426         case TEMP_NORMAL:
2427             s->temps[i].state = TS_DEAD;
2428             break;
2429         case TEMP_CONST:
2430             continue;
2431         default:
2432             g_assert_not_reached();
2433         }
2434         la_reset_pref(&s->temps[i]);
2435     }
2436 }
2437 
2438 /* liveness analysis: sync globals back to memory and kill.  */
2439 static void la_global_kill(TCGContext *s, int ng)
2440 {
2441     int i;
2442 
2443     for (i = 0; i < ng; i++) {
2444         s->temps[i].state = TS_DEAD | TS_MEM;
2445         la_reset_pref(&s->temps[i]);
2446     }
2447 }
2448 
2449 /* liveness analysis: note live globals crossing calls.  */
2450 static void la_cross_call(TCGContext *s, int nt)
2451 {
2452     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2453     int i;
2454 
2455     for (i = 0; i < nt; i++) {
2456         TCGTemp *ts = &s->temps[i];
2457         if (!(ts->state & TS_DEAD)) {
2458             TCGRegSet *pset = la_temp_pref(ts);
2459             TCGRegSet set = *pset;
2460 
2461             set &= mask;
2462             /* If the combination is not possible, restart.  */
2463             if (set == 0) {
2464                 set = tcg_target_available_regs[ts->type] & mask;
2465             }
2466             *pset = set;
2467         }
2468     }
2469 }
2470 
2471 /* Liveness analysis : update the opc_arg_life array to tell if a
2472    given input arguments is dead. Instructions updating dead
2473    temporaries are removed. */
2474 static void liveness_pass_1(TCGContext *s)
2475 {
2476     int nb_globals = s->nb_globals;
2477     int nb_temps = s->nb_temps;
2478     TCGOp *op, *op_prev;
2479     TCGRegSet *prefs;
2480     int i;
2481 
2482     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2483     for (i = 0; i < nb_temps; ++i) {
2484         s->temps[i].state_ptr = prefs + i;
2485     }
2486 
2487     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2488     la_func_end(s, nb_globals, nb_temps);
2489 
2490     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2491         int nb_iargs, nb_oargs;
2492         TCGOpcode opc_new, opc_new2;
2493         bool have_opc_new2;
2494         TCGLifeData arg_life = 0;
2495         TCGTemp *ts;
2496         TCGOpcode opc = op->opc;
2497         const TCGOpDef *def = &tcg_op_defs[opc];
2498 
2499         switch (opc) {
2500         case INDEX_op_call:
2501             {
2502                 int call_flags;
2503                 int nb_call_regs;
2504 
2505                 nb_oargs = TCGOP_CALLO(op);
2506                 nb_iargs = TCGOP_CALLI(op);
2507                 call_flags = tcg_call_flags(op);
2508 
2509                 /* pure functions can be removed if their result is unused */
2510                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2511                     for (i = 0; i < nb_oargs; i++) {
2512                         ts = arg_temp(op->args[i]);
2513                         if (ts->state != TS_DEAD) {
2514                             goto do_not_remove_call;
2515                         }
2516                     }
2517                     goto do_remove;
2518                 }
2519             do_not_remove_call:
2520 
2521                 /* Output args are dead.  */
2522                 for (i = 0; i < nb_oargs; i++) {
2523                     ts = arg_temp(op->args[i]);
2524                     if (ts->state & TS_DEAD) {
2525                         arg_life |= DEAD_ARG << i;
2526                     }
2527                     if (ts->state & TS_MEM) {
2528                         arg_life |= SYNC_ARG << i;
2529                     }
2530                     ts->state = TS_DEAD;
2531                     la_reset_pref(ts);
2532 
2533                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2534                     op->output_pref[i] = 0;
2535                 }
2536 
2537                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2538                                     TCG_CALL_NO_READ_GLOBALS))) {
2539                     la_global_kill(s, nb_globals);
2540                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2541                     la_global_sync(s, nb_globals);
2542                 }
2543 
2544                 /* Record arguments that die in this helper.  */
2545                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2546                     ts = arg_temp(op->args[i]);
2547                     if (ts && ts->state & TS_DEAD) {
2548                         arg_life |= DEAD_ARG << i;
2549                     }
2550                 }
2551 
2552                 /* For all live registers, remove call-clobbered prefs.  */
2553                 la_cross_call(s, nb_temps);
2554 
2555                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2556 
2557                 /* Input arguments are live for preceding opcodes.  */
2558                 for (i = 0; i < nb_iargs; i++) {
2559                     ts = arg_temp(op->args[i + nb_oargs]);
2560                     if (ts && ts->state & TS_DEAD) {
2561                         /* For those arguments that die, and will be allocated
2562                          * in registers, clear the register set for that arg,
2563                          * to be filled in below.  For args that will be on
2564                          * the stack, reset to any available reg.
2565                          */
2566                         *la_temp_pref(ts)
2567                             = (i < nb_call_regs ? 0 :
2568                                tcg_target_available_regs[ts->type]);
2569                         ts->state &= ~TS_DEAD;
2570                     }
2571                 }
2572 
2573                 /* For each input argument, add its input register to prefs.
2574                    If a temp is used once, this produces a single set bit.  */
2575                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2576                     ts = arg_temp(op->args[i + nb_oargs]);
2577                     if (ts) {
2578                         tcg_regset_set_reg(*la_temp_pref(ts),
2579                                            tcg_target_call_iarg_regs[i]);
2580                     }
2581                 }
2582             }
2583             break;
2584         case INDEX_op_insn_start:
2585             break;
2586         case INDEX_op_discard:
2587             /* mark the temporary as dead */
2588             ts = arg_temp(op->args[0]);
2589             ts->state = TS_DEAD;
2590             la_reset_pref(ts);
2591             break;
2592 
2593         case INDEX_op_add2_i32:
2594             opc_new = INDEX_op_add_i32;
2595             goto do_addsub2;
2596         case INDEX_op_sub2_i32:
2597             opc_new = INDEX_op_sub_i32;
2598             goto do_addsub2;
2599         case INDEX_op_add2_i64:
2600             opc_new = INDEX_op_add_i64;
2601             goto do_addsub2;
2602         case INDEX_op_sub2_i64:
2603             opc_new = INDEX_op_sub_i64;
2604         do_addsub2:
2605             nb_iargs = 4;
2606             nb_oargs = 2;
2607             /* Test if the high part of the operation is dead, but not
2608                the low part.  The result can be optimized to a simple
2609                add or sub.  This happens often for x86_64 guest when the
2610                cpu mode is set to 32 bit.  */
2611             if (arg_temp(op->args[1])->state == TS_DEAD) {
2612                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2613                     goto do_remove;
2614                 }
2615                 /* Replace the opcode and adjust the args in place,
2616                    leaving 3 unused args at the end.  */
2617                 op->opc = opc = opc_new;
2618                 op->args[1] = op->args[2];
2619                 op->args[2] = op->args[4];
2620                 /* Fall through and mark the single-word operation live.  */
2621                 nb_iargs = 2;
2622                 nb_oargs = 1;
2623             }
2624             goto do_not_remove;
2625 
2626         case INDEX_op_mulu2_i32:
2627             opc_new = INDEX_op_mul_i32;
2628             opc_new2 = INDEX_op_muluh_i32;
2629             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2630             goto do_mul2;
2631         case INDEX_op_muls2_i32:
2632             opc_new = INDEX_op_mul_i32;
2633             opc_new2 = INDEX_op_mulsh_i32;
2634             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2635             goto do_mul2;
2636         case INDEX_op_mulu2_i64:
2637             opc_new = INDEX_op_mul_i64;
2638             opc_new2 = INDEX_op_muluh_i64;
2639             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2640             goto do_mul2;
2641         case INDEX_op_muls2_i64:
2642             opc_new = INDEX_op_mul_i64;
2643             opc_new2 = INDEX_op_mulsh_i64;
2644             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2645             goto do_mul2;
2646         do_mul2:
2647             nb_iargs = 2;
2648             nb_oargs = 2;
2649             if (arg_temp(op->args[1])->state == TS_DEAD) {
2650                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2651                     /* Both parts of the operation are dead.  */
2652                     goto do_remove;
2653                 }
2654                 /* The high part of the operation is dead; generate the low. */
2655                 op->opc = opc = opc_new;
2656                 op->args[1] = op->args[2];
2657                 op->args[2] = op->args[3];
2658             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2659                 /* The low part of the operation is dead; generate the high. */
2660                 op->opc = opc = opc_new2;
2661                 op->args[0] = op->args[1];
2662                 op->args[1] = op->args[2];
2663                 op->args[2] = op->args[3];
2664             } else {
2665                 goto do_not_remove;
2666             }
2667             /* Mark the single-word operation live.  */
2668             nb_oargs = 1;
2669             goto do_not_remove;
2670 
2671         default:
2672             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2673             nb_iargs = def->nb_iargs;
2674             nb_oargs = def->nb_oargs;
2675 
2676             /* Test if the operation can be removed because all
2677                its outputs are dead. We assume that nb_oargs == 0
2678                implies side effects */
2679             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2680                 for (i = 0; i < nb_oargs; i++) {
2681                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2682                         goto do_not_remove;
2683                     }
2684                 }
2685                 goto do_remove;
2686             }
2687             goto do_not_remove;
2688 
2689         do_remove:
2690             tcg_op_remove(s, op);
2691             break;
2692 
2693         do_not_remove:
2694             for (i = 0; i < nb_oargs; i++) {
2695                 ts = arg_temp(op->args[i]);
2696 
2697                 /* Remember the preference of the uses that followed.  */
2698                 op->output_pref[i] = *la_temp_pref(ts);
2699 
2700                 /* Output args are dead.  */
2701                 if (ts->state & TS_DEAD) {
2702                     arg_life |= DEAD_ARG << i;
2703                 }
2704                 if (ts->state & TS_MEM) {
2705                     arg_life |= SYNC_ARG << i;
2706                 }
2707                 ts->state = TS_DEAD;
2708                 la_reset_pref(ts);
2709             }
2710 
2711             /* If end of basic block, update.  */
2712             if (def->flags & TCG_OPF_BB_EXIT) {
2713                 la_func_end(s, nb_globals, nb_temps);
2714             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2715                 la_bb_sync(s, nb_globals, nb_temps);
2716             } else if (def->flags & TCG_OPF_BB_END) {
2717                 la_bb_end(s, nb_globals, nb_temps);
2718             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2719                 la_global_sync(s, nb_globals);
2720                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2721                     la_cross_call(s, nb_temps);
2722                 }
2723             }
2724 
2725             /* Record arguments that die in this opcode.  */
2726             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2727                 ts = arg_temp(op->args[i]);
2728                 if (ts->state & TS_DEAD) {
2729                     arg_life |= DEAD_ARG << i;
2730                 }
2731             }
2732 
2733             /* Input arguments are live for preceding opcodes.  */
2734             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2735                 ts = arg_temp(op->args[i]);
2736                 if (ts->state & TS_DEAD) {
2737                     /* For operands that were dead, initially allow
2738                        all regs for the type.  */
2739                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2740                     ts->state &= ~TS_DEAD;
2741                 }
2742             }
2743 
2744             /* Incorporate constraints for this operand.  */
2745             switch (opc) {
2746             case INDEX_op_mov_i32:
2747             case INDEX_op_mov_i64:
2748                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2749                    have proper constraints.  That said, special case
2750                    moves to propagate preferences backward.  */
2751                 if (IS_DEAD_ARG(1)) {
2752                     *la_temp_pref(arg_temp(op->args[0]))
2753                         = *la_temp_pref(arg_temp(op->args[1]));
2754                 }
2755                 break;
2756 
2757             default:
2758                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2759                     const TCGArgConstraint *ct = &def->args_ct[i];
2760                     TCGRegSet set, *pset;
2761 
2762                     ts = arg_temp(op->args[i]);
2763                     pset = la_temp_pref(ts);
2764                     set = *pset;
2765 
2766                     set &= ct->regs;
2767                     if (ct->ialias) {
2768                         set &= op->output_pref[ct->alias_index];
2769                     }
2770                     /* If the combination is not possible, restart.  */
2771                     if (set == 0) {
2772                         set = ct->regs;
2773                     }
2774                     *pset = set;
2775                 }
2776                 break;
2777             }
2778             break;
2779         }
2780         op->life = arg_life;
2781     }
2782 }
2783 
2784 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2785 static bool liveness_pass_2(TCGContext *s)
2786 {
2787     int nb_globals = s->nb_globals;
2788     int nb_temps, i;
2789     bool changes = false;
2790     TCGOp *op, *op_next;
2791 
2792     /* Create a temporary for each indirect global.  */
2793     for (i = 0; i < nb_globals; ++i) {
2794         TCGTemp *its = &s->temps[i];
2795         if (its->indirect_reg) {
2796             TCGTemp *dts = tcg_temp_alloc(s);
2797             dts->type = its->type;
2798             dts->base_type = its->base_type;
2799             its->state_ptr = dts;
2800         } else {
2801             its->state_ptr = NULL;
2802         }
2803         /* All globals begin dead.  */
2804         its->state = TS_DEAD;
2805     }
2806     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2807         TCGTemp *its = &s->temps[i];
2808         its->state_ptr = NULL;
2809         its->state = TS_DEAD;
2810     }
2811 
2812     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2813         TCGOpcode opc = op->opc;
2814         const TCGOpDef *def = &tcg_op_defs[opc];
2815         TCGLifeData arg_life = op->life;
2816         int nb_iargs, nb_oargs, call_flags;
2817         TCGTemp *arg_ts, *dir_ts;
2818 
2819         if (opc == INDEX_op_call) {
2820             nb_oargs = TCGOP_CALLO(op);
2821             nb_iargs = TCGOP_CALLI(op);
2822             call_flags = tcg_call_flags(op);
2823         } else {
2824             nb_iargs = def->nb_iargs;
2825             nb_oargs = def->nb_oargs;
2826 
2827             /* Set flags similar to how calls require.  */
2828             if (def->flags & TCG_OPF_COND_BRANCH) {
2829                 /* Like reading globals: sync_globals */
2830                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2831             } else if (def->flags & TCG_OPF_BB_END) {
2832                 /* Like writing globals: save_globals */
2833                 call_flags = 0;
2834             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2835                 /* Like reading globals: sync_globals */
2836                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2837             } else {
2838                 /* No effect on globals.  */
2839                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2840                               TCG_CALL_NO_WRITE_GLOBALS);
2841             }
2842         }
2843 
2844         /* Make sure that input arguments are available.  */
2845         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2846             arg_ts = arg_temp(op->args[i]);
2847             if (arg_ts) {
2848                 dir_ts = arg_ts->state_ptr;
2849                 if (dir_ts && arg_ts->state == TS_DEAD) {
2850                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2851                                       ? INDEX_op_ld_i32
2852                                       : INDEX_op_ld_i64);
2853                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2854 
2855                     lop->args[0] = temp_arg(dir_ts);
2856                     lop->args[1] = temp_arg(arg_ts->mem_base);
2857                     lop->args[2] = arg_ts->mem_offset;
2858 
2859                     /* Loaded, but synced with memory.  */
2860                     arg_ts->state = TS_MEM;
2861                 }
2862             }
2863         }
2864 
2865         /* Perform input replacement, and mark inputs that became dead.
2866            No action is required except keeping temp_state up to date
2867            so that we reload when needed.  */
2868         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2869             arg_ts = arg_temp(op->args[i]);
2870             if (arg_ts) {
2871                 dir_ts = arg_ts->state_ptr;
2872                 if (dir_ts) {
2873                     op->args[i] = temp_arg(dir_ts);
2874                     changes = true;
2875                     if (IS_DEAD_ARG(i)) {
2876                         arg_ts->state = TS_DEAD;
2877                     }
2878                 }
2879             }
2880         }
2881 
2882         /* Liveness analysis should ensure that the following are
2883            all correct, for call sites and basic block end points.  */
2884         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2885             /* Nothing to do */
2886         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2887             for (i = 0; i < nb_globals; ++i) {
2888                 /* Liveness should see that globals are synced back,
2889                    that is, either TS_DEAD or TS_MEM.  */
2890                 arg_ts = &s->temps[i];
2891                 tcg_debug_assert(arg_ts->state_ptr == 0
2892                                  || arg_ts->state != 0);
2893             }
2894         } else {
2895             for (i = 0; i < nb_globals; ++i) {
2896                 /* Liveness should see that globals are saved back,
2897                    that is, TS_DEAD, waiting to be reloaded.  */
2898                 arg_ts = &s->temps[i];
2899                 tcg_debug_assert(arg_ts->state_ptr == 0
2900                                  || arg_ts->state == TS_DEAD);
2901             }
2902         }
2903 
2904         /* Outputs become available.  */
2905         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2906             arg_ts = arg_temp(op->args[0]);
2907             dir_ts = arg_ts->state_ptr;
2908             if (dir_ts) {
2909                 op->args[0] = temp_arg(dir_ts);
2910                 changes = true;
2911 
2912                 /* The output is now live and modified.  */
2913                 arg_ts->state = 0;
2914 
2915                 if (NEED_SYNC_ARG(0)) {
2916                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2917                                       ? INDEX_op_st_i32
2918                                       : INDEX_op_st_i64);
2919                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2920                     TCGTemp *out_ts = dir_ts;
2921 
2922                     if (IS_DEAD_ARG(0)) {
2923                         out_ts = arg_temp(op->args[1]);
2924                         arg_ts->state = TS_DEAD;
2925                         tcg_op_remove(s, op);
2926                     } else {
2927                         arg_ts->state = TS_MEM;
2928                     }
2929 
2930                     sop->args[0] = temp_arg(out_ts);
2931                     sop->args[1] = temp_arg(arg_ts->mem_base);
2932                     sop->args[2] = arg_ts->mem_offset;
2933                 } else {
2934                     tcg_debug_assert(!IS_DEAD_ARG(0));
2935                 }
2936             }
2937         } else {
2938             for (i = 0; i < nb_oargs; i++) {
2939                 arg_ts = arg_temp(op->args[i]);
2940                 dir_ts = arg_ts->state_ptr;
2941                 if (!dir_ts) {
2942                     continue;
2943                 }
2944                 op->args[i] = temp_arg(dir_ts);
2945                 changes = true;
2946 
2947                 /* The output is now live and modified.  */
2948                 arg_ts->state = 0;
2949 
2950                 /* Sync outputs upon their last write.  */
2951                 if (NEED_SYNC_ARG(i)) {
2952                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2953                                       ? INDEX_op_st_i32
2954                                       : INDEX_op_st_i64);
2955                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2956 
2957                     sop->args[0] = temp_arg(dir_ts);
2958                     sop->args[1] = temp_arg(arg_ts->mem_base);
2959                     sop->args[2] = arg_ts->mem_offset;
2960 
2961                     arg_ts->state = TS_MEM;
2962                 }
2963                 /* Drop outputs that are dead.  */
2964                 if (IS_DEAD_ARG(i)) {
2965                     arg_ts->state = TS_DEAD;
2966                 }
2967             }
2968         }
2969     }
2970 
2971     return changes;
2972 }
2973 
2974 #ifdef CONFIG_DEBUG_TCG
2975 static void dump_regs(TCGContext *s)
2976 {
2977     TCGTemp *ts;
2978     int i;
2979     char buf[64];
2980 
2981     for(i = 0; i < s->nb_temps; i++) {
2982         ts = &s->temps[i];
2983         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2984         switch(ts->val_type) {
2985         case TEMP_VAL_REG:
2986             printf("%s", tcg_target_reg_names[ts->reg]);
2987             break;
2988         case TEMP_VAL_MEM:
2989             printf("%d(%s)", (int)ts->mem_offset,
2990                    tcg_target_reg_names[ts->mem_base->reg]);
2991             break;
2992         case TEMP_VAL_CONST:
2993             printf("$0x%" PRIx64, ts->val);
2994             break;
2995         case TEMP_VAL_DEAD:
2996             printf("D");
2997             break;
2998         default:
2999             printf("???");
3000             break;
3001         }
3002         printf("\n");
3003     }
3004 
3005     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3006         if (s->reg_to_temp[i] != NULL) {
3007             printf("%s: %s\n",
3008                    tcg_target_reg_names[i],
3009                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3010         }
3011     }
3012 }
3013 
3014 static void check_regs(TCGContext *s)
3015 {
3016     int reg;
3017     int k;
3018     TCGTemp *ts;
3019     char buf[64];
3020 
3021     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3022         ts = s->reg_to_temp[reg];
3023         if (ts != NULL) {
3024             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3025                 printf("Inconsistency for register %s:\n",
3026                        tcg_target_reg_names[reg]);
3027                 goto fail;
3028             }
3029         }
3030     }
3031     for (k = 0; k < s->nb_temps; k++) {
3032         ts = &s->temps[k];
3033         if (ts->val_type == TEMP_VAL_REG
3034             && ts->kind != TEMP_FIXED
3035             && s->reg_to_temp[ts->reg] != ts) {
3036             printf("Inconsistency for temp %s:\n",
3037                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3038         fail:
3039             printf("reg state:\n");
3040             dump_regs(s);
3041             tcg_abort();
3042         }
3043     }
3044 }
3045 #endif
3046 
3047 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3048 {
3049     intptr_t off, size, align;
3050 
3051     switch (ts->type) {
3052     case TCG_TYPE_I32:
3053         size = align = 4;
3054         break;
3055     case TCG_TYPE_I64:
3056     case TCG_TYPE_V64:
3057         size = align = 8;
3058         break;
3059     case TCG_TYPE_V128:
3060         size = align = 16;
3061         break;
3062     case TCG_TYPE_V256:
3063         /* Note that we do not require aligned storage for V256. */
3064         size = 32, align = 16;
3065         break;
3066     default:
3067         g_assert_not_reached();
3068     }
3069 
3070     /*
3071      * Assume the stack is sufficiently aligned.
3072      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3073      * and do not require 16 byte vector alignment.  This seems slightly
3074      * easier than fully parameterizing the above switch statement.
3075      */
3076     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3077     off = ROUND_UP(s->current_frame_offset, align);
3078 
3079     /* If we've exhausted the stack frame, restart with a smaller TB. */
3080     if (off + size > s->frame_end) {
3081         tcg_raise_tb_overflow(s);
3082     }
3083     s->current_frame_offset = off + size;
3084 
3085     ts->mem_offset = off;
3086 #if defined(__sparc__)
3087     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3088 #endif
3089     ts->mem_base = s->frame_temp;
3090     ts->mem_allocated = 1;
3091 }
3092 
3093 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3094 
3095 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3096    mark it free; otherwise mark it dead.  */
3097 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3098 {
3099     TCGTempVal new_type;
3100 
3101     switch (ts->kind) {
3102     case TEMP_FIXED:
3103         return;
3104     case TEMP_GLOBAL:
3105     case TEMP_LOCAL:
3106         new_type = TEMP_VAL_MEM;
3107         break;
3108     case TEMP_NORMAL:
3109         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3110         break;
3111     case TEMP_CONST:
3112         new_type = TEMP_VAL_CONST;
3113         break;
3114     default:
3115         g_assert_not_reached();
3116     }
3117     if (ts->val_type == TEMP_VAL_REG) {
3118         s->reg_to_temp[ts->reg] = NULL;
3119     }
3120     ts->val_type = new_type;
3121 }
3122 
3123 /* Mark a temporary as dead.  */
3124 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3125 {
3126     temp_free_or_dead(s, ts, 1);
3127 }
3128 
3129 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3130    registers needs to be allocated to store a constant.  If 'free_or_dead'
3131    is non-zero, subsequently release the temporary; if it is positive, the
3132    temp is dead; if it is negative, the temp is free.  */
3133 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3134                       TCGRegSet preferred_regs, int free_or_dead)
3135 {
3136     if (!temp_readonly(ts) && !ts->mem_coherent) {
3137         if (!ts->mem_allocated) {
3138             temp_allocate_frame(s, ts);
3139         }
3140         switch (ts->val_type) {
3141         case TEMP_VAL_CONST:
3142             /* If we're going to free the temp immediately, then we won't
3143                require it later in a register, so attempt to store the
3144                constant to memory directly.  */
3145             if (free_or_dead
3146                 && tcg_out_sti(s, ts->type, ts->val,
3147                                ts->mem_base->reg, ts->mem_offset)) {
3148                 break;
3149             }
3150             temp_load(s, ts, tcg_target_available_regs[ts->type],
3151                       allocated_regs, preferred_regs);
3152             /* fallthrough */
3153 
3154         case TEMP_VAL_REG:
3155             tcg_out_st(s, ts->type, ts->reg,
3156                        ts->mem_base->reg, ts->mem_offset);
3157             break;
3158 
3159         case TEMP_VAL_MEM:
3160             break;
3161 
3162         case TEMP_VAL_DEAD:
3163         default:
3164             tcg_abort();
3165         }
3166         ts->mem_coherent = 1;
3167     }
3168     if (free_or_dead) {
3169         temp_free_or_dead(s, ts, free_or_dead);
3170     }
3171 }
3172 
3173 /* free register 'reg' by spilling the corresponding temporary if necessary */
3174 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3175 {
3176     TCGTemp *ts = s->reg_to_temp[reg];
3177     if (ts != NULL) {
3178         temp_sync(s, ts, allocated_regs, 0, -1);
3179     }
3180 }
3181 
3182 /**
3183  * tcg_reg_alloc:
3184  * @required_regs: Set of registers in which we must allocate.
3185  * @allocated_regs: Set of registers which must be avoided.
3186  * @preferred_regs: Set of registers we should prefer.
3187  * @rev: True if we search the registers in "indirect" order.
3188  *
3189  * The allocated register must be in @required_regs & ~@allocated_regs,
3190  * but if we can put it in @preferred_regs we may save a move later.
3191  */
3192 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3193                             TCGRegSet allocated_regs,
3194                             TCGRegSet preferred_regs, bool rev)
3195 {
3196     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3197     TCGRegSet reg_ct[2];
3198     const int *order;
3199 
3200     reg_ct[1] = required_regs & ~allocated_regs;
3201     tcg_debug_assert(reg_ct[1] != 0);
3202     reg_ct[0] = reg_ct[1] & preferred_regs;
3203 
3204     /* Skip the preferred_regs option if it cannot be satisfied,
3205        or if the preference made no difference.  */
3206     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3207 
3208     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3209 
3210     /* Try free registers, preferences first.  */
3211     for (j = f; j < 2; j++) {
3212         TCGRegSet set = reg_ct[j];
3213 
3214         if (tcg_regset_single(set)) {
3215             /* One register in the set.  */
3216             TCGReg reg = tcg_regset_first(set);
3217             if (s->reg_to_temp[reg] == NULL) {
3218                 return reg;
3219             }
3220         } else {
3221             for (i = 0; i < n; i++) {
3222                 TCGReg reg = order[i];
3223                 if (s->reg_to_temp[reg] == NULL &&
3224                     tcg_regset_test_reg(set, reg)) {
3225                     return reg;
3226                 }
3227             }
3228         }
3229     }
3230 
3231     /* We must spill something.  */
3232     for (j = f; j < 2; j++) {
3233         TCGRegSet set = reg_ct[j];
3234 
3235         if (tcg_regset_single(set)) {
3236             /* One register in the set.  */
3237             TCGReg reg = tcg_regset_first(set);
3238             tcg_reg_free(s, reg, allocated_regs);
3239             return reg;
3240         } else {
3241             for (i = 0; i < n; i++) {
3242                 TCGReg reg = order[i];
3243                 if (tcg_regset_test_reg(set, reg)) {
3244                     tcg_reg_free(s, reg, allocated_regs);
3245                     return reg;
3246                 }
3247             }
3248         }
3249     }
3250 
3251     tcg_abort();
3252 }
3253 
3254 /* Make sure the temporary is in a register.  If needed, allocate the register
3255    from DESIRED while avoiding ALLOCATED.  */
3256 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3257                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3258 {
3259     TCGReg reg;
3260 
3261     switch (ts->val_type) {
3262     case TEMP_VAL_REG:
3263         return;
3264     case TEMP_VAL_CONST:
3265         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3266                             preferred_regs, ts->indirect_base);
3267         if (ts->type <= TCG_TYPE_I64) {
3268             tcg_out_movi(s, ts->type, reg, ts->val);
3269         } else {
3270             uint64_t val = ts->val;
3271             MemOp vece = MO_64;
3272 
3273             /*
3274              * Find the minimal vector element that matches the constant.
3275              * The targets will, in general, have to do this search anyway,
3276              * do this generically.
3277              */
3278             if (val == dup_const(MO_8, val)) {
3279                 vece = MO_8;
3280             } else if (val == dup_const(MO_16, val)) {
3281                 vece = MO_16;
3282             } else if (val == dup_const(MO_32, val)) {
3283                 vece = MO_32;
3284             }
3285 
3286             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3287         }
3288         ts->mem_coherent = 0;
3289         break;
3290     case TEMP_VAL_MEM:
3291         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3292                             preferred_regs, ts->indirect_base);
3293         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3294         ts->mem_coherent = 1;
3295         break;
3296     case TEMP_VAL_DEAD:
3297     default:
3298         tcg_abort();
3299     }
3300     ts->reg = reg;
3301     ts->val_type = TEMP_VAL_REG;
3302     s->reg_to_temp[reg] = ts;
3303 }
3304 
3305 /* Save a temporary to memory. 'allocated_regs' is used in case a
3306    temporary registers needs to be allocated to store a constant.  */
3307 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3308 {
3309     /* The liveness analysis already ensures that globals are back
3310        in memory. Keep an tcg_debug_assert for safety. */
3311     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3312 }
3313 
3314 /* save globals to their canonical location and assume they can be
3315    modified be the following code. 'allocated_regs' is used in case a
3316    temporary registers needs to be allocated to store a constant. */
3317 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3318 {
3319     int i, n;
3320 
3321     for (i = 0, n = s->nb_globals; i < n; i++) {
3322         temp_save(s, &s->temps[i], allocated_regs);
3323     }
3324 }
3325 
3326 /* sync globals to their canonical location and assume they can be
3327    read by the following code. 'allocated_regs' is used in case a
3328    temporary registers needs to be allocated to store a constant. */
3329 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3330 {
3331     int i, n;
3332 
3333     for (i = 0, n = s->nb_globals; i < n; i++) {
3334         TCGTemp *ts = &s->temps[i];
3335         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3336                          || ts->kind == TEMP_FIXED
3337                          || ts->mem_coherent);
3338     }
3339 }
3340 
3341 /* at the end of a basic block, we assume all temporaries are dead and
3342    all globals are stored at their canonical location. */
3343 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3344 {
3345     int i;
3346 
3347     for (i = s->nb_globals; i < s->nb_temps; i++) {
3348         TCGTemp *ts = &s->temps[i];
3349 
3350         switch (ts->kind) {
3351         case TEMP_LOCAL:
3352             temp_save(s, ts, allocated_regs);
3353             break;
3354         case TEMP_NORMAL:
3355             /* The liveness analysis already ensures that temps are dead.
3356                Keep an tcg_debug_assert for safety. */
3357             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3358             break;
3359         case TEMP_CONST:
3360             /* Similarly, we should have freed any allocated register. */
3361             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3362             break;
3363         default:
3364             g_assert_not_reached();
3365         }
3366     }
3367 
3368     save_globals(s, allocated_regs);
3369 }
3370 
3371 /*
3372  * At a conditional branch, we assume all temporaries are dead and
3373  * all globals and local temps are synced to their location.
3374  */
3375 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3376 {
3377     sync_globals(s, allocated_regs);
3378 
3379     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3380         TCGTemp *ts = &s->temps[i];
3381         /*
3382          * The liveness analysis already ensures that temps are dead.
3383          * Keep tcg_debug_asserts for safety.
3384          */
3385         switch (ts->kind) {
3386         case TEMP_LOCAL:
3387             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3388             break;
3389         case TEMP_NORMAL:
3390             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3391             break;
3392         case TEMP_CONST:
3393             break;
3394         default:
3395             g_assert_not_reached();
3396         }
3397     }
3398 }
3399 
3400 /*
3401  * Specialized code generation for INDEX_op_mov_* with a constant.
3402  */
3403 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3404                                   tcg_target_ulong val, TCGLifeData arg_life,
3405                                   TCGRegSet preferred_regs)
3406 {
3407     /* ENV should not be modified.  */
3408     tcg_debug_assert(!temp_readonly(ots));
3409 
3410     /* The movi is not explicitly generated here.  */
3411     if (ots->val_type == TEMP_VAL_REG) {
3412         s->reg_to_temp[ots->reg] = NULL;
3413     }
3414     ots->val_type = TEMP_VAL_CONST;
3415     ots->val = val;
3416     ots->mem_coherent = 0;
3417     if (NEED_SYNC_ARG(0)) {
3418         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3419     } else if (IS_DEAD_ARG(0)) {
3420         temp_dead(s, ots);
3421     }
3422 }
3423 
3424 /*
3425  * Specialized code generation for INDEX_op_mov_*.
3426  */
3427 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3428 {
3429     const TCGLifeData arg_life = op->life;
3430     TCGRegSet allocated_regs, preferred_regs;
3431     TCGTemp *ts, *ots;
3432     TCGType otype, itype;
3433 
3434     allocated_regs = s->reserved_regs;
3435     preferred_regs = op->output_pref[0];
3436     ots = arg_temp(op->args[0]);
3437     ts = arg_temp(op->args[1]);
3438 
3439     /* ENV should not be modified.  */
3440     tcg_debug_assert(!temp_readonly(ots));
3441 
3442     /* Note that otype != itype for no-op truncation.  */
3443     otype = ots->type;
3444     itype = ts->type;
3445 
3446     if (ts->val_type == TEMP_VAL_CONST) {
3447         /* propagate constant or generate sti */
3448         tcg_target_ulong val = ts->val;
3449         if (IS_DEAD_ARG(1)) {
3450             temp_dead(s, ts);
3451         }
3452         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3453         return;
3454     }
3455 
3456     /* If the source value is in memory we're going to be forced
3457        to have it in a register in order to perform the copy.  Copy
3458        the SOURCE value into its own register first, that way we
3459        don't have to reload SOURCE the next time it is used. */
3460     if (ts->val_type == TEMP_VAL_MEM) {
3461         temp_load(s, ts, tcg_target_available_regs[itype],
3462                   allocated_regs, preferred_regs);
3463     }
3464 
3465     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3466     if (IS_DEAD_ARG(0)) {
3467         /* mov to a non-saved dead register makes no sense (even with
3468            liveness analysis disabled). */
3469         tcg_debug_assert(NEED_SYNC_ARG(0));
3470         if (!ots->mem_allocated) {
3471             temp_allocate_frame(s, ots);
3472         }
3473         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3474         if (IS_DEAD_ARG(1)) {
3475             temp_dead(s, ts);
3476         }
3477         temp_dead(s, ots);
3478     } else {
3479         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3480             /* the mov can be suppressed */
3481             if (ots->val_type == TEMP_VAL_REG) {
3482                 s->reg_to_temp[ots->reg] = NULL;
3483             }
3484             ots->reg = ts->reg;
3485             temp_dead(s, ts);
3486         } else {
3487             if (ots->val_type != TEMP_VAL_REG) {
3488                 /* When allocating a new register, make sure to not spill the
3489                    input one. */
3490                 tcg_regset_set_reg(allocated_regs, ts->reg);
3491                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3492                                          allocated_regs, preferred_regs,
3493                                          ots->indirect_base);
3494             }
3495             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3496                 /*
3497                  * Cross register class move not supported.
3498                  * Store the source register into the destination slot
3499                  * and leave the destination temp as TEMP_VAL_MEM.
3500                  */
3501                 assert(!temp_readonly(ots));
3502                 if (!ts->mem_allocated) {
3503                     temp_allocate_frame(s, ots);
3504                 }
3505                 tcg_out_st(s, ts->type, ts->reg,
3506                            ots->mem_base->reg, ots->mem_offset);
3507                 ots->mem_coherent = 1;
3508                 temp_free_or_dead(s, ots, -1);
3509                 return;
3510             }
3511         }
3512         ots->val_type = TEMP_VAL_REG;
3513         ots->mem_coherent = 0;
3514         s->reg_to_temp[ots->reg] = ots;
3515         if (NEED_SYNC_ARG(0)) {
3516             temp_sync(s, ots, allocated_regs, 0, 0);
3517         }
3518     }
3519 }
3520 
3521 /*
3522  * Specialized code generation for INDEX_op_dup_vec.
3523  */
3524 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3525 {
3526     const TCGLifeData arg_life = op->life;
3527     TCGRegSet dup_out_regs, dup_in_regs;
3528     TCGTemp *its, *ots;
3529     TCGType itype, vtype;
3530     intptr_t endian_fixup;
3531     unsigned vece;
3532     bool ok;
3533 
3534     ots = arg_temp(op->args[0]);
3535     its = arg_temp(op->args[1]);
3536 
3537     /* ENV should not be modified.  */
3538     tcg_debug_assert(!temp_readonly(ots));
3539 
3540     itype = its->type;
3541     vece = TCGOP_VECE(op);
3542     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3543 
3544     if (its->val_type == TEMP_VAL_CONST) {
3545         /* Propagate constant via movi -> dupi.  */
3546         tcg_target_ulong val = its->val;
3547         if (IS_DEAD_ARG(1)) {
3548             temp_dead(s, its);
3549         }
3550         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3551         return;
3552     }
3553 
3554     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3555     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3556 
3557     /* Allocate the output register now.  */
3558     if (ots->val_type != TEMP_VAL_REG) {
3559         TCGRegSet allocated_regs = s->reserved_regs;
3560 
3561         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3562             /* Make sure to not spill the input register. */
3563             tcg_regset_set_reg(allocated_regs, its->reg);
3564         }
3565         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3566                                  op->output_pref[0], ots->indirect_base);
3567         ots->val_type = TEMP_VAL_REG;
3568         ots->mem_coherent = 0;
3569         s->reg_to_temp[ots->reg] = ots;
3570     }
3571 
3572     switch (its->val_type) {
3573     case TEMP_VAL_REG:
3574         /*
3575          * The dup constriaints must be broad, covering all possible VECE.
3576          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3577          * to fail, indicating that extra moves are required for that case.
3578          */
3579         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3580             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3581                 goto done;
3582             }
3583             /* Try again from memory or a vector input register.  */
3584         }
3585         if (!its->mem_coherent) {
3586             /*
3587              * The input register is not synced, and so an extra store
3588              * would be required to use memory.  Attempt an integer-vector
3589              * register move first.  We do not have a TCGRegSet for this.
3590              */
3591             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3592                 break;
3593             }
3594             /* Sync the temp back to its slot and load from there.  */
3595             temp_sync(s, its, s->reserved_regs, 0, 0);
3596         }
3597         /* fall through */
3598 
3599     case TEMP_VAL_MEM:
3600 #if HOST_BIG_ENDIAN
3601         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3602         endian_fixup -= 1 << vece;
3603 #else
3604         endian_fixup = 0;
3605 #endif
3606         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3607                              its->mem_offset + endian_fixup)) {
3608             goto done;
3609         }
3610         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3611         break;
3612 
3613     default:
3614         g_assert_not_reached();
3615     }
3616 
3617     /* We now have a vector input register, so dup must succeed. */
3618     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3619     tcg_debug_assert(ok);
3620 
3621  done:
3622     if (IS_DEAD_ARG(1)) {
3623         temp_dead(s, its);
3624     }
3625     if (NEED_SYNC_ARG(0)) {
3626         temp_sync(s, ots, s->reserved_regs, 0, 0);
3627     }
3628     if (IS_DEAD_ARG(0)) {
3629         temp_dead(s, ots);
3630     }
3631 }
3632 
3633 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3634 {
3635     const TCGLifeData arg_life = op->life;
3636     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3637     TCGRegSet i_allocated_regs;
3638     TCGRegSet o_allocated_regs;
3639     int i, k, nb_iargs, nb_oargs;
3640     TCGReg reg;
3641     TCGArg arg;
3642     const TCGArgConstraint *arg_ct;
3643     TCGTemp *ts;
3644     TCGArg new_args[TCG_MAX_OP_ARGS];
3645     int const_args[TCG_MAX_OP_ARGS];
3646 
3647     nb_oargs = def->nb_oargs;
3648     nb_iargs = def->nb_iargs;
3649 
3650     /* copy constants */
3651     memcpy(new_args + nb_oargs + nb_iargs,
3652            op->args + nb_oargs + nb_iargs,
3653            sizeof(TCGArg) * def->nb_cargs);
3654 
3655     i_allocated_regs = s->reserved_regs;
3656     o_allocated_regs = s->reserved_regs;
3657 
3658     /* satisfy input constraints */
3659     for (k = 0; k < nb_iargs; k++) {
3660         TCGRegSet i_preferred_regs, o_preferred_regs;
3661 
3662         i = def->args_ct[nb_oargs + k].sort_index;
3663         arg = op->args[i];
3664         arg_ct = &def->args_ct[i];
3665         ts = arg_temp(arg);
3666 
3667         if (ts->val_type == TEMP_VAL_CONST
3668             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3669             /* constant is OK for instruction */
3670             const_args[i] = 1;
3671             new_args[i] = ts->val;
3672             continue;
3673         }
3674 
3675         i_preferred_regs = o_preferred_regs = 0;
3676         if (arg_ct->ialias) {
3677             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3678 
3679             /*
3680              * If the input is readonly, then it cannot also be an
3681              * output and aliased to itself.  If the input is not
3682              * dead after the instruction, we must allocate a new
3683              * register and move it.
3684              */
3685             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3686                 goto allocate_in_reg;
3687             }
3688 
3689             /*
3690              * Check if the current register has already been allocated
3691              * for another input aliased to an output.
3692              */
3693             if (ts->val_type == TEMP_VAL_REG) {
3694                 reg = ts->reg;
3695                 for (int k2 = 0; k2 < k; k2++) {
3696                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3697                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3698                         goto allocate_in_reg;
3699                     }
3700                 }
3701             }
3702             i_preferred_regs = o_preferred_regs;
3703         }
3704 
3705         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3706         reg = ts->reg;
3707 
3708         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3709  allocate_in_reg:
3710             /*
3711              * Allocate a new register matching the constraint
3712              * and move the temporary register into it.
3713              */
3714             temp_load(s, ts, tcg_target_available_regs[ts->type],
3715                       i_allocated_regs, 0);
3716             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3717                                 o_preferred_regs, ts->indirect_base);
3718             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3719                 /*
3720                  * Cross register class move not supported.  Sync the
3721                  * temp back to its slot and load from there.
3722                  */
3723                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3724                 tcg_out_ld(s, ts->type, reg,
3725                            ts->mem_base->reg, ts->mem_offset);
3726             }
3727         }
3728         new_args[i] = reg;
3729         const_args[i] = 0;
3730         tcg_regset_set_reg(i_allocated_regs, reg);
3731     }
3732 
3733     /* mark dead temporaries and free the associated registers */
3734     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3735         if (IS_DEAD_ARG(i)) {
3736             temp_dead(s, arg_temp(op->args[i]));
3737         }
3738     }
3739 
3740     if (def->flags & TCG_OPF_COND_BRANCH) {
3741         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3742     } else if (def->flags & TCG_OPF_BB_END) {
3743         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3744     } else {
3745         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3746             /* XXX: permit generic clobber register list ? */
3747             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3748                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3749                     tcg_reg_free(s, i, i_allocated_regs);
3750                 }
3751             }
3752         }
3753         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3754             /* sync globals if the op has side effects and might trigger
3755                an exception. */
3756             sync_globals(s, i_allocated_regs);
3757         }
3758 
3759         /* satisfy the output constraints */
3760         for(k = 0; k < nb_oargs; k++) {
3761             i = def->args_ct[k].sort_index;
3762             arg = op->args[i];
3763             arg_ct = &def->args_ct[i];
3764             ts = arg_temp(arg);
3765 
3766             /* ENV should not be modified.  */
3767             tcg_debug_assert(!temp_readonly(ts));
3768 
3769             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3770                 reg = new_args[arg_ct->alias_index];
3771             } else if (arg_ct->newreg) {
3772                 reg = tcg_reg_alloc(s, arg_ct->regs,
3773                                     i_allocated_regs | o_allocated_regs,
3774                                     op->output_pref[k], ts->indirect_base);
3775             } else {
3776                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3777                                     op->output_pref[k], ts->indirect_base);
3778             }
3779             tcg_regset_set_reg(o_allocated_regs, reg);
3780             if (ts->val_type == TEMP_VAL_REG) {
3781                 s->reg_to_temp[ts->reg] = NULL;
3782             }
3783             ts->val_type = TEMP_VAL_REG;
3784             ts->reg = reg;
3785             /*
3786              * Temp value is modified, so the value kept in memory is
3787              * potentially not the same.
3788              */
3789             ts->mem_coherent = 0;
3790             s->reg_to_temp[reg] = ts;
3791             new_args[i] = reg;
3792         }
3793     }
3794 
3795     /* emit instruction */
3796     if (def->flags & TCG_OPF_VECTOR) {
3797         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3798                        new_args, const_args);
3799     } else {
3800         tcg_out_op(s, op->opc, new_args, const_args);
3801     }
3802 
3803     /* move the outputs in the correct register if needed */
3804     for(i = 0; i < nb_oargs; i++) {
3805         ts = arg_temp(op->args[i]);
3806 
3807         /* ENV should not be modified.  */
3808         tcg_debug_assert(!temp_readonly(ts));
3809 
3810         if (NEED_SYNC_ARG(i)) {
3811             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3812         } else if (IS_DEAD_ARG(i)) {
3813             temp_dead(s, ts);
3814         }
3815     }
3816 }
3817 
3818 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3819 {
3820     const TCGLifeData arg_life = op->life;
3821     TCGTemp *ots, *itsl, *itsh;
3822     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3823 
3824     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3825     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3826     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3827 
3828     ots = arg_temp(op->args[0]);
3829     itsl = arg_temp(op->args[1]);
3830     itsh = arg_temp(op->args[2]);
3831 
3832     /* ENV should not be modified.  */
3833     tcg_debug_assert(!temp_readonly(ots));
3834 
3835     /* Allocate the output register now.  */
3836     if (ots->val_type != TEMP_VAL_REG) {
3837         TCGRegSet allocated_regs = s->reserved_regs;
3838         TCGRegSet dup_out_regs =
3839             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3840 
3841         /* Make sure to not spill the input registers. */
3842         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3843             tcg_regset_set_reg(allocated_regs, itsl->reg);
3844         }
3845         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3846             tcg_regset_set_reg(allocated_regs, itsh->reg);
3847         }
3848 
3849         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3850                                  op->output_pref[0], ots->indirect_base);
3851         ots->val_type = TEMP_VAL_REG;
3852         ots->mem_coherent = 0;
3853         s->reg_to_temp[ots->reg] = ots;
3854     }
3855 
3856     /* Promote dup2 of immediates to dupi_vec. */
3857     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3858         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3859         MemOp vece = MO_64;
3860 
3861         if (val == dup_const(MO_8, val)) {
3862             vece = MO_8;
3863         } else if (val == dup_const(MO_16, val)) {
3864             vece = MO_16;
3865         } else if (val == dup_const(MO_32, val)) {
3866             vece = MO_32;
3867         }
3868 
3869         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3870         goto done;
3871     }
3872 
3873     /* If the two inputs form one 64-bit value, try dupm_vec. */
3874     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3875         if (!itsl->mem_coherent) {
3876             temp_sync(s, itsl, s->reserved_regs, 0, 0);
3877         }
3878         if (!itsh->mem_coherent) {
3879             temp_sync(s, itsh, s->reserved_regs, 0, 0);
3880         }
3881 #if HOST_BIG_ENDIAN
3882         TCGTemp *its = itsh;
3883 #else
3884         TCGTemp *its = itsl;
3885 #endif
3886         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3887                              its->mem_base->reg, its->mem_offset)) {
3888             goto done;
3889         }
3890     }
3891 
3892     /* Fall back to generic expansion. */
3893     return false;
3894 
3895  done:
3896     if (IS_DEAD_ARG(1)) {
3897         temp_dead(s, itsl);
3898     }
3899     if (IS_DEAD_ARG(2)) {
3900         temp_dead(s, itsh);
3901     }
3902     if (NEED_SYNC_ARG(0)) {
3903         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3904     } else if (IS_DEAD_ARG(0)) {
3905         temp_dead(s, ots);
3906     }
3907     return true;
3908 }
3909 
3910 #ifdef TCG_TARGET_STACK_GROWSUP
3911 #define STACK_DIR(x) (-(x))
3912 #else
3913 #define STACK_DIR(x) (x)
3914 #endif
3915 
3916 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3917 {
3918     const int nb_oargs = TCGOP_CALLO(op);
3919     const int nb_iargs = TCGOP_CALLI(op);
3920     const TCGLifeData arg_life = op->life;
3921     const TCGHelperInfo *info;
3922     int flags, nb_regs, i;
3923     TCGReg reg;
3924     TCGArg arg;
3925     TCGTemp *ts;
3926     intptr_t stack_offset;
3927     size_t call_stack_size;
3928     tcg_insn_unit *func_addr;
3929     int allocate_args;
3930     TCGRegSet allocated_regs;
3931 
3932     func_addr = tcg_call_func(op);
3933     info = tcg_call_info(op);
3934     flags = info->flags;
3935 
3936     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3937     if (nb_regs > nb_iargs) {
3938         nb_regs = nb_iargs;
3939     }
3940 
3941     /* assign stack slots first */
3942     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3943     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3944         ~(TCG_TARGET_STACK_ALIGN - 1);
3945     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3946     if (allocate_args) {
3947         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3948            preallocate call stack */
3949         tcg_abort();
3950     }
3951 
3952     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3953     for (i = nb_regs; i < nb_iargs; i++) {
3954         arg = op->args[nb_oargs + i];
3955 #ifdef TCG_TARGET_STACK_GROWSUP
3956         stack_offset -= sizeof(tcg_target_long);
3957 #endif
3958         if (arg != TCG_CALL_DUMMY_ARG) {
3959             ts = arg_temp(arg);
3960             temp_load(s, ts, tcg_target_available_regs[ts->type],
3961                       s->reserved_regs, 0);
3962             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3963         }
3964 #ifndef TCG_TARGET_STACK_GROWSUP
3965         stack_offset += sizeof(tcg_target_long);
3966 #endif
3967     }
3968 
3969     /* assign input registers */
3970     allocated_regs = s->reserved_regs;
3971     for (i = 0; i < nb_regs; i++) {
3972         arg = op->args[nb_oargs + i];
3973         if (arg != TCG_CALL_DUMMY_ARG) {
3974             ts = arg_temp(arg);
3975             reg = tcg_target_call_iarg_regs[i];
3976 
3977             if (ts->val_type == TEMP_VAL_REG) {
3978                 if (ts->reg != reg) {
3979                     tcg_reg_free(s, reg, allocated_regs);
3980                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3981                         /*
3982                          * Cross register class move not supported.  Sync the
3983                          * temp back to its slot and load from there.
3984                          */
3985                         temp_sync(s, ts, allocated_regs, 0, 0);
3986                         tcg_out_ld(s, ts->type, reg,
3987                                    ts->mem_base->reg, ts->mem_offset);
3988                     }
3989                 }
3990             } else {
3991                 TCGRegSet arg_set = 0;
3992 
3993                 tcg_reg_free(s, reg, allocated_regs);
3994                 tcg_regset_set_reg(arg_set, reg);
3995                 temp_load(s, ts, arg_set, allocated_regs, 0);
3996             }
3997 
3998             tcg_regset_set_reg(allocated_regs, reg);
3999         }
4000     }
4001 
4002     /* mark dead temporaries and free the associated registers */
4003     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4004         if (IS_DEAD_ARG(i)) {
4005             temp_dead(s, arg_temp(op->args[i]));
4006         }
4007     }
4008 
4009     /* clobber call registers */
4010     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4011         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4012             tcg_reg_free(s, i, allocated_regs);
4013         }
4014     }
4015 
4016     /* Save globals if they might be written by the helper, sync them if
4017        they might be read. */
4018     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4019         /* Nothing to do */
4020     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4021         sync_globals(s, allocated_regs);
4022     } else {
4023         save_globals(s, allocated_regs);
4024     }
4025 
4026 #ifdef CONFIG_TCG_INTERPRETER
4027     {
4028         gpointer hash = (gpointer)(uintptr_t)info->typemask;
4029         ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4030         assert(cif != NULL);
4031         tcg_out_call(s, func_addr, cif);
4032     }
4033 #else
4034     tcg_out_call(s, func_addr);
4035 #endif
4036 
4037     /* assign output registers and emit moves if needed */
4038     for(i = 0; i < nb_oargs; i++) {
4039         arg = op->args[i];
4040         ts = arg_temp(arg);
4041 
4042         /* ENV should not be modified.  */
4043         tcg_debug_assert(!temp_readonly(ts));
4044 
4045         reg = tcg_target_call_oarg_regs[i];
4046         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4047         if (ts->val_type == TEMP_VAL_REG) {
4048             s->reg_to_temp[ts->reg] = NULL;
4049         }
4050         ts->val_type = TEMP_VAL_REG;
4051         ts->reg = reg;
4052         ts->mem_coherent = 0;
4053         s->reg_to_temp[reg] = ts;
4054         if (NEED_SYNC_ARG(i)) {
4055             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4056         } else if (IS_DEAD_ARG(i)) {
4057             temp_dead(s, ts);
4058         }
4059     }
4060 }
4061 
4062 #ifdef CONFIG_PROFILER
4063 
4064 /* avoid copy/paste errors */
4065 #define PROF_ADD(to, from, field)                       \
4066     do {                                                \
4067         (to)->field += qatomic_read(&((from)->field));  \
4068     } while (0)
4069 
4070 #define PROF_MAX(to, from, field)                                       \
4071     do {                                                                \
4072         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4073         if (val__ > (to)->field) {                                      \
4074             (to)->field = val__;                                        \
4075         }                                                               \
4076     } while (0)
4077 
4078 /* Pass in a zero'ed @prof */
4079 static inline
4080 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4081 {
4082     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4083     unsigned int i;
4084 
4085     for (i = 0; i < n_ctxs; i++) {
4086         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4087         const TCGProfile *orig = &s->prof;
4088 
4089         if (counters) {
4090             PROF_ADD(prof, orig, cpu_exec_time);
4091             PROF_ADD(prof, orig, tb_count1);
4092             PROF_ADD(prof, orig, tb_count);
4093             PROF_ADD(prof, orig, op_count);
4094             PROF_MAX(prof, orig, op_count_max);
4095             PROF_ADD(prof, orig, temp_count);
4096             PROF_MAX(prof, orig, temp_count_max);
4097             PROF_ADD(prof, orig, del_op_count);
4098             PROF_ADD(prof, orig, code_in_len);
4099             PROF_ADD(prof, orig, code_out_len);
4100             PROF_ADD(prof, orig, search_out_len);
4101             PROF_ADD(prof, orig, interm_time);
4102             PROF_ADD(prof, orig, code_time);
4103             PROF_ADD(prof, orig, la_time);
4104             PROF_ADD(prof, orig, opt_time);
4105             PROF_ADD(prof, orig, restore_count);
4106             PROF_ADD(prof, orig, restore_time);
4107         }
4108         if (table) {
4109             int i;
4110 
4111             for (i = 0; i < NB_OPS; i++) {
4112                 PROF_ADD(prof, orig, table_op_count[i]);
4113             }
4114         }
4115     }
4116 }
4117 
4118 #undef PROF_ADD
4119 #undef PROF_MAX
4120 
4121 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4122 {
4123     tcg_profile_snapshot(prof, true, false);
4124 }
4125 
4126 static void tcg_profile_snapshot_table(TCGProfile *prof)
4127 {
4128     tcg_profile_snapshot(prof, false, true);
4129 }
4130 
4131 void tcg_dump_op_count(GString *buf)
4132 {
4133     TCGProfile prof = {};
4134     int i;
4135 
4136     tcg_profile_snapshot_table(&prof);
4137     for (i = 0; i < NB_OPS; i++) {
4138         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4139                                prof.table_op_count[i]);
4140     }
4141 }
4142 
4143 int64_t tcg_cpu_exec_time(void)
4144 {
4145     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4146     unsigned int i;
4147     int64_t ret = 0;
4148 
4149     for (i = 0; i < n_ctxs; i++) {
4150         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4151         const TCGProfile *prof = &s->prof;
4152 
4153         ret += qatomic_read(&prof->cpu_exec_time);
4154     }
4155     return ret;
4156 }
4157 #else
4158 void tcg_dump_op_count(GString *buf)
4159 {
4160     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4161 }
4162 
4163 int64_t tcg_cpu_exec_time(void)
4164 {
4165     error_report("%s: TCG profiler not compiled", __func__);
4166     exit(EXIT_FAILURE);
4167 }
4168 #endif
4169 
4170 
4171 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4172 {
4173 #ifdef CONFIG_PROFILER
4174     TCGProfile *prof = &s->prof;
4175 #endif
4176     int i, num_insns;
4177     TCGOp *op;
4178 
4179 #ifdef CONFIG_PROFILER
4180     {
4181         int n = 0;
4182 
4183         QTAILQ_FOREACH(op, &s->ops, link) {
4184             n++;
4185         }
4186         qatomic_set(&prof->op_count, prof->op_count + n);
4187         if (n > prof->op_count_max) {
4188             qatomic_set(&prof->op_count_max, n);
4189         }
4190 
4191         n = s->nb_temps;
4192         qatomic_set(&prof->temp_count, prof->temp_count + n);
4193         if (n > prof->temp_count_max) {
4194             qatomic_set(&prof->temp_count_max, n);
4195         }
4196     }
4197 #endif
4198 
4199 #ifdef DEBUG_DISAS
4200     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4201                  && qemu_log_in_addr_range(tb->pc))) {
4202         FILE *logfile = qemu_log_trylock();
4203         if (logfile) {
4204             fprintf(logfile, "OP:\n");
4205             tcg_dump_ops(s, logfile, false);
4206             fprintf(logfile, "\n");
4207             qemu_log_unlock(logfile);
4208         }
4209     }
4210 #endif
4211 
4212 #ifdef CONFIG_DEBUG_TCG
4213     /* Ensure all labels referenced have been emitted.  */
4214     {
4215         TCGLabel *l;
4216         bool error = false;
4217 
4218         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4219             if (unlikely(!l->present) && l->refs) {
4220                 qemu_log_mask(CPU_LOG_TB_OP,
4221                               "$L%d referenced but not present.\n", l->id);
4222                 error = true;
4223             }
4224         }
4225         assert(!error);
4226     }
4227 #endif
4228 
4229 #ifdef CONFIG_PROFILER
4230     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4231 #endif
4232 
4233 #ifdef USE_TCG_OPTIMIZATIONS
4234     tcg_optimize(s);
4235 #endif
4236 
4237 #ifdef CONFIG_PROFILER
4238     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4239     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4240 #endif
4241 
4242     reachable_code_pass(s);
4243     liveness_pass_1(s);
4244 
4245     if (s->nb_indirects > 0) {
4246 #ifdef DEBUG_DISAS
4247         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4248                      && qemu_log_in_addr_range(tb->pc))) {
4249             FILE *logfile = qemu_log_trylock();
4250             if (logfile) {
4251                 fprintf(logfile, "OP before indirect lowering:\n");
4252                 tcg_dump_ops(s, logfile, false);
4253                 fprintf(logfile, "\n");
4254                 qemu_log_unlock(logfile);
4255             }
4256         }
4257 #endif
4258         /* Replace indirect temps with direct temps.  */
4259         if (liveness_pass_2(s)) {
4260             /* If changes were made, re-run liveness.  */
4261             liveness_pass_1(s);
4262         }
4263     }
4264 
4265 #ifdef CONFIG_PROFILER
4266     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4267 #endif
4268 
4269 #ifdef DEBUG_DISAS
4270     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4271                  && qemu_log_in_addr_range(tb->pc))) {
4272         FILE *logfile = qemu_log_trylock();
4273         if (logfile) {
4274             fprintf(logfile, "OP after optimization and liveness analysis:\n");
4275             tcg_dump_ops(s, logfile, true);
4276             fprintf(logfile, "\n");
4277             qemu_log_unlock(logfile);
4278         }
4279     }
4280 #endif
4281 
4282     tcg_reg_alloc_start(s);
4283 
4284     /*
4285      * Reset the buffer pointers when restarting after overflow.
4286      * TODO: Move this into translate-all.c with the rest of the
4287      * buffer management.  Having only this done here is confusing.
4288      */
4289     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4290     s->code_ptr = s->code_buf;
4291 
4292 #ifdef TCG_TARGET_NEED_LDST_LABELS
4293     QSIMPLEQ_INIT(&s->ldst_labels);
4294 #endif
4295 #ifdef TCG_TARGET_NEED_POOL_LABELS
4296     s->pool_labels = NULL;
4297 #endif
4298 
4299     num_insns = -1;
4300     QTAILQ_FOREACH(op, &s->ops, link) {
4301         TCGOpcode opc = op->opc;
4302 
4303 #ifdef CONFIG_PROFILER
4304         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4305 #endif
4306 
4307         switch (opc) {
4308         case INDEX_op_mov_i32:
4309         case INDEX_op_mov_i64:
4310         case INDEX_op_mov_vec:
4311             tcg_reg_alloc_mov(s, op);
4312             break;
4313         case INDEX_op_dup_vec:
4314             tcg_reg_alloc_dup(s, op);
4315             break;
4316         case INDEX_op_insn_start:
4317             if (num_insns >= 0) {
4318                 size_t off = tcg_current_code_size(s);
4319                 s->gen_insn_end_off[num_insns] = off;
4320                 /* Assert that we do not overflow our stored offset.  */
4321                 assert(s->gen_insn_end_off[num_insns] == off);
4322             }
4323             num_insns++;
4324             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4325                 target_ulong a;
4326 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4327                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4328 #else
4329                 a = op->args[i];
4330 #endif
4331                 s->gen_insn_data[num_insns][i] = a;
4332             }
4333             break;
4334         case INDEX_op_discard:
4335             temp_dead(s, arg_temp(op->args[0]));
4336             break;
4337         case INDEX_op_set_label:
4338             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4339             tcg_out_label(s, arg_label(op->args[0]));
4340             break;
4341         case INDEX_op_call:
4342             tcg_reg_alloc_call(s, op);
4343             break;
4344         case INDEX_op_dup2_vec:
4345             if (tcg_reg_alloc_dup2(s, op)) {
4346                 break;
4347             }
4348             /* fall through */
4349         default:
4350             /* Sanity check that we've not introduced any unhandled opcodes. */
4351             tcg_debug_assert(tcg_op_supported(opc));
4352             /* Note: in order to speed up the code, it would be much
4353                faster to have specialized register allocator functions for
4354                some common argument patterns */
4355             tcg_reg_alloc_op(s, op);
4356             break;
4357         }
4358 #ifdef CONFIG_DEBUG_TCG
4359         check_regs(s);
4360 #endif
4361         /* Test for (pending) buffer overflow.  The assumption is that any
4362            one operation beginning below the high water mark cannot overrun
4363            the buffer completely.  Thus we can test for overflow after
4364            generating code without having to check during generation.  */
4365         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4366             return -1;
4367         }
4368         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4369         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4370             return -2;
4371         }
4372     }
4373     tcg_debug_assert(num_insns >= 0);
4374     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4375 
4376     /* Generate TB finalization at the end of block */
4377 #ifdef TCG_TARGET_NEED_LDST_LABELS
4378     i = tcg_out_ldst_finalize(s);
4379     if (i < 0) {
4380         return i;
4381     }
4382 #endif
4383 #ifdef TCG_TARGET_NEED_POOL_LABELS
4384     i = tcg_out_pool_finalize(s);
4385     if (i < 0) {
4386         return i;
4387     }
4388 #endif
4389     if (!tcg_resolve_relocs(s)) {
4390         return -2;
4391     }
4392 
4393 #ifndef CONFIG_TCG_INTERPRETER
4394     /* flush instruction cache */
4395     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4396                         (uintptr_t)s->code_buf,
4397                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4398 #endif
4399 
4400     return tcg_current_code_size(s);
4401 }
4402 
4403 #ifdef CONFIG_PROFILER
4404 void tcg_dump_info(GString *buf)
4405 {
4406     TCGProfile prof = {};
4407     const TCGProfile *s;
4408     int64_t tb_count;
4409     int64_t tb_div_count;
4410     int64_t tot;
4411 
4412     tcg_profile_snapshot_counters(&prof);
4413     s = &prof;
4414     tb_count = s->tb_count;
4415     tb_div_count = tb_count ? tb_count : 1;
4416     tot = s->interm_time + s->code_time;
4417 
4418     g_string_append_printf(buf, "JIT cycles          %" PRId64
4419                            " (%0.3f s at 2.4 GHz)\n",
4420                            tot, tot / 2.4e9);
4421     g_string_append_printf(buf, "translated TBs      %" PRId64
4422                            " (aborted=%" PRId64 " %0.1f%%)\n",
4423                            tb_count, s->tb_count1 - tb_count,
4424                            (double)(s->tb_count1 - s->tb_count)
4425                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4426     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4427                            (double)s->op_count / tb_div_count, s->op_count_max);
4428     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4429                            (double)s->del_op_count / tb_div_count);
4430     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4431                            (double)s->temp_count / tb_div_count,
4432                            s->temp_count_max);
4433     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4434                            (double)s->code_out_len / tb_div_count);
4435     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4436                            (double)s->search_out_len / tb_div_count);
4437 
4438     g_string_append_printf(buf, "cycles/op           %0.1f\n",
4439                            s->op_count ? (double)tot / s->op_count : 0);
4440     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4441                            s->code_in_len ? (double)tot / s->code_in_len : 0);
4442     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4443                            s->code_out_len ? (double)tot / s->code_out_len : 0);
4444     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4445                            s->search_out_len ?
4446                            (double)tot / s->search_out_len : 0);
4447     if (tot == 0) {
4448         tot = 1;
4449     }
4450     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4451                            (double)s->interm_time / tot * 100.0);
4452     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4453                            (double)s->code_time / tot * 100.0);
4454     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4455                            (double)s->opt_time / (s->code_time ?
4456                                                   s->code_time : 1)
4457                            * 100.0);
4458     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4459                            (double)s->la_time / (s->code_time ?
4460                                                  s->code_time : 1) * 100.0);
4461     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4462                            s->restore_count);
4463     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4464                            s->restore_count ?
4465                            (double)s->restore_time / s->restore_count : 0);
4466 }
4467 #else
4468 void tcg_dump_info(GString *buf)
4469 {
4470     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4471 }
4472 #endif
4473 
4474 #ifdef ELF_HOST_MACHINE
4475 /* In order to use this feature, the backend needs to do three things:
4476 
4477    (1) Define ELF_HOST_MACHINE to indicate both what value to
4478        put into the ELF image and to indicate support for the feature.
4479 
4480    (2) Define tcg_register_jit.  This should create a buffer containing
4481        the contents of a .debug_frame section that describes the post-
4482        prologue unwind info for the tcg machine.
4483 
4484    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4485 */
4486 
4487 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4488 typedef enum {
4489     JIT_NOACTION = 0,
4490     JIT_REGISTER_FN,
4491     JIT_UNREGISTER_FN
4492 } jit_actions_t;
4493 
4494 struct jit_code_entry {
4495     struct jit_code_entry *next_entry;
4496     struct jit_code_entry *prev_entry;
4497     const void *symfile_addr;
4498     uint64_t symfile_size;
4499 };
4500 
4501 struct jit_descriptor {
4502     uint32_t version;
4503     uint32_t action_flag;
4504     struct jit_code_entry *relevant_entry;
4505     struct jit_code_entry *first_entry;
4506 };
4507 
4508 void __jit_debug_register_code(void) __attribute__((noinline));
4509 void __jit_debug_register_code(void)
4510 {
4511     asm("");
4512 }
4513 
4514 /* Must statically initialize the version, because GDB may check
4515    the version before we can set it.  */
4516 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4517 
4518 /* End GDB interface.  */
4519 
4520 static int find_string(const char *strtab, const char *str)
4521 {
4522     const char *p = strtab + 1;
4523 
4524     while (1) {
4525         if (strcmp(p, str) == 0) {
4526             return p - strtab;
4527         }
4528         p += strlen(p) + 1;
4529     }
4530 }
4531 
4532 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4533                                  const void *debug_frame,
4534                                  size_t debug_frame_size)
4535 {
4536     struct __attribute__((packed)) DebugInfo {
4537         uint32_t  len;
4538         uint16_t  version;
4539         uint32_t  abbrev;
4540         uint8_t   ptr_size;
4541         uint8_t   cu_die;
4542         uint16_t  cu_lang;
4543         uintptr_t cu_low_pc;
4544         uintptr_t cu_high_pc;
4545         uint8_t   fn_die;
4546         char      fn_name[16];
4547         uintptr_t fn_low_pc;
4548         uintptr_t fn_high_pc;
4549         uint8_t   cu_eoc;
4550     };
4551 
4552     struct ElfImage {
4553         ElfW(Ehdr) ehdr;
4554         ElfW(Phdr) phdr;
4555         ElfW(Shdr) shdr[7];
4556         ElfW(Sym)  sym[2];
4557         struct DebugInfo di;
4558         uint8_t    da[24];
4559         char       str[80];
4560     };
4561 
4562     struct ElfImage *img;
4563 
4564     static const struct ElfImage img_template = {
4565         .ehdr = {
4566             .e_ident[EI_MAG0] = ELFMAG0,
4567             .e_ident[EI_MAG1] = ELFMAG1,
4568             .e_ident[EI_MAG2] = ELFMAG2,
4569             .e_ident[EI_MAG3] = ELFMAG3,
4570             .e_ident[EI_CLASS] = ELF_CLASS,
4571             .e_ident[EI_DATA] = ELF_DATA,
4572             .e_ident[EI_VERSION] = EV_CURRENT,
4573             .e_type = ET_EXEC,
4574             .e_machine = ELF_HOST_MACHINE,
4575             .e_version = EV_CURRENT,
4576             .e_phoff = offsetof(struct ElfImage, phdr),
4577             .e_shoff = offsetof(struct ElfImage, shdr),
4578             .e_ehsize = sizeof(ElfW(Shdr)),
4579             .e_phentsize = sizeof(ElfW(Phdr)),
4580             .e_phnum = 1,
4581             .e_shentsize = sizeof(ElfW(Shdr)),
4582             .e_shnum = ARRAY_SIZE(img->shdr),
4583             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4584 #ifdef ELF_HOST_FLAGS
4585             .e_flags = ELF_HOST_FLAGS,
4586 #endif
4587 #ifdef ELF_OSABI
4588             .e_ident[EI_OSABI] = ELF_OSABI,
4589 #endif
4590         },
4591         .phdr = {
4592             .p_type = PT_LOAD,
4593             .p_flags = PF_X,
4594         },
4595         .shdr = {
4596             [0] = { .sh_type = SHT_NULL },
4597             /* Trick: The contents of code_gen_buffer are not present in
4598                this fake ELF file; that got allocated elsewhere.  Therefore
4599                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4600                will not look for contents.  We can record any address.  */
4601             [1] = { /* .text */
4602                 .sh_type = SHT_NOBITS,
4603                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4604             },
4605             [2] = { /* .debug_info */
4606                 .sh_type = SHT_PROGBITS,
4607                 .sh_offset = offsetof(struct ElfImage, di),
4608                 .sh_size = sizeof(struct DebugInfo),
4609             },
4610             [3] = { /* .debug_abbrev */
4611                 .sh_type = SHT_PROGBITS,
4612                 .sh_offset = offsetof(struct ElfImage, da),
4613                 .sh_size = sizeof(img->da),
4614             },
4615             [4] = { /* .debug_frame */
4616                 .sh_type = SHT_PROGBITS,
4617                 .sh_offset = sizeof(struct ElfImage),
4618             },
4619             [5] = { /* .symtab */
4620                 .sh_type = SHT_SYMTAB,
4621                 .sh_offset = offsetof(struct ElfImage, sym),
4622                 .sh_size = sizeof(img->sym),
4623                 .sh_info = 1,
4624                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4625                 .sh_entsize = sizeof(ElfW(Sym)),
4626             },
4627             [6] = { /* .strtab */
4628                 .sh_type = SHT_STRTAB,
4629                 .sh_offset = offsetof(struct ElfImage, str),
4630                 .sh_size = sizeof(img->str),
4631             }
4632         },
4633         .sym = {
4634             [1] = { /* code_gen_buffer */
4635                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4636                 .st_shndx = 1,
4637             }
4638         },
4639         .di = {
4640             .len = sizeof(struct DebugInfo) - 4,
4641             .version = 2,
4642             .ptr_size = sizeof(void *),
4643             .cu_die = 1,
4644             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4645             .fn_die = 2,
4646             .fn_name = "code_gen_buffer"
4647         },
4648         .da = {
4649             1,          /* abbrev number (the cu) */
4650             0x11, 1,    /* DW_TAG_compile_unit, has children */
4651             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4652             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4653             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4654             0, 0,       /* end of abbrev */
4655             2,          /* abbrev number (the fn) */
4656             0x2e, 0,    /* DW_TAG_subprogram, no children */
4657             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4658             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4659             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4660             0, 0,       /* end of abbrev */
4661             0           /* no more abbrev */
4662         },
4663         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4664                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4665     };
4666 
4667     /* We only need a single jit entry; statically allocate it.  */
4668     static struct jit_code_entry one_entry;
4669 
4670     uintptr_t buf = (uintptr_t)buf_ptr;
4671     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4672     DebugFrameHeader *dfh;
4673 
4674     img = g_malloc(img_size);
4675     *img = img_template;
4676 
4677     img->phdr.p_vaddr = buf;
4678     img->phdr.p_paddr = buf;
4679     img->phdr.p_memsz = buf_size;
4680 
4681     img->shdr[1].sh_name = find_string(img->str, ".text");
4682     img->shdr[1].sh_addr = buf;
4683     img->shdr[1].sh_size = buf_size;
4684 
4685     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4686     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4687 
4688     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4689     img->shdr[4].sh_size = debug_frame_size;
4690 
4691     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4692     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4693 
4694     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4695     img->sym[1].st_value = buf;
4696     img->sym[1].st_size = buf_size;
4697 
4698     img->di.cu_low_pc = buf;
4699     img->di.cu_high_pc = buf + buf_size;
4700     img->di.fn_low_pc = buf;
4701     img->di.fn_high_pc = buf + buf_size;
4702 
4703     dfh = (DebugFrameHeader *)(img + 1);
4704     memcpy(dfh, debug_frame, debug_frame_size);
4705     dfh->fde.func_start = buf;
4706     dfh->fde.func_len = buf_size;
4707 
4708 #ifdef DEBUG_JIT
4709     /* Enable this block to be able to debug the ELF image file creation.
4710        One can use readelf, objdump, or other inspection utilities.  */
4711     {
4712         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4713         if (f) {
4714             if (fwrite(img, img_size, 1, f) != img_size) {
4715                 /* Avoid stupid unused return value warning for fwrite.  */
4716             }
4717             fclose(f);
4718         }
4719     }
4720 #endif
4721 
4722     one_entry.symfile_addr = img;
4723     one_entry.symfile_size = img_size;
4724 
4725     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4726     __jit_debug_descriptor.relevant_entry = &one_entry;
4727     __jit_debug_descriptor.first_entry = &one_entry;
4728     __jit_debug_register_code();
4729 }
4730 #else
4731 /* No support for the feature.  Provide the entry point expected by exec.c,
4732    and implement the internal function we declared earlier.  */
4733 
4734 static void tcg_register_jit_int(const void *buf, size_t size,
4735                                  const void *debug_frame,
4736                                  size_t debug_frame_size)
4737 {
4738 }
4739 
4740 void tcg_register_jit(const void *buf, size_t buf_size)
4741 {
4742 }
4743 #endif /* ELF_HOST_MACHINE */
4744 
4745 #if !TCG_TARGET_MAYBE_vec
4746 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4747 {
4748     g_assert_not_reached();
4749 }
4750 #endif
4751