xref: /openbmc/qemu/tcg/tcg.c (revision b14df228)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 #include "qemu/cacheinfo.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64 
65 #ifdef CONFIG_TCG_INTERPRETER
66 #include <ffi.h>
67 #endif
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(const void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106                        intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109                          TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111                        const TCGArg args[TCG_MAX_OP_ARGS],
112                        const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115                             TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121                            unsigned vecl, unsigned vece,
122                            const TCGArg args[TCG_MAX_OP_ARGS],
123                            const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136                                     TCGReg dst, int64_t arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141                                   unsigned vecl, unsigned vece,
142                                   const TCGArg args[TCG_MAX_OP_ARGS],
143                                   const int const_args[TCG_MAX_OP_ARGS])
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 #ifdef CONFIG_TCG_INTERPRETER
153 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
154                          ffi_cif *cif);
155 #else
156 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
157 #endif
158 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
159 #ifdef TCG_TARGET_NEED_LDST_LABELS
160 static int tcg_out_ldst_finalize(TCGContext *s);
161 #endif
162 
163 TCGContext tcg_init_ctx;
164 __thread TCGContext *tcg_ctx;
165 
166 TCGContext **tcg_ctxs;
167 unsigned int tcg_cur_ctxs;
168 unsigned int tcg_max_ctxs;
169 TCGv_env cpu_env = 0;
170 const void *tcg_code_gen_epilogue;
171 uintptr_t tcg_splitwx_diff;
172 
173 #ifndef CONFIG_TCG_INTERPRETER
174 tcg_prologue_fn *tcg_qemu_tb_exec;
175 #endif
176 
177 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
178 static TCGRegSet tcg_target_call_clobber_regs;
179 
180 #if TCG_TARGET_INSN_UNIT_SIZE == 1
181 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
182 {
183     *s->code_ptr++ = v;
184 }
185 
186 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
187                                                       uint8_t v)
188 {
189     *p = v;
190 }
191 #endif
192 
193 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
194 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
195 {
196     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
197         *s->code_ptr++ = v;
198     } else {
199         tcg_insn_unit *p = s->code_ptr;
200         memcpy(p, &v, sizeof(v));
201         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
202     }
203 }
204 
205 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
206                                                        uint16_t v)
207 {
208     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
209         *p = v;
210     } else {
211         memcpy(p, &v, sizeof(v));
212     }
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
217 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
229                                                        uint32_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
240 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
252                                                        uint64_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 /* label relocation processing */
263 
264 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
265                           TCGLabel *l, intptr_t addend)
266 {
267     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
268 
269     r->type = type;
270     r->ptr = code_ptr;
271     r->addend = addend;
272     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
273 }
274 
275 static void tcg_out_label(TCGContext *s, TCGLabel *l)
276 {
277     tcg_debug_assert(!l->has_value);
278     l->has_value = 1;
279     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
280 }
281 
282 TCGLabel *gen_new_label(void)
283 {
284     TCGContext *s = tcg_ctx;
285     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
286 
287     memset(l, 0, sizeof(TCGLabel));
288     l->id = s->nb_labels++;
289     QSIMPLEQ_INIT(&l->relocs);
290 
291     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
292 
293     return l;
294 }
295 
296 static bool tcg_resolve_relocs(TCGContext *s)
297 {
298     TCGLabel *l;
299 
300     QSIMPLEQ_FOREACH(l, &s->labels, next) {
301         TCGRelocation *r;
302         uintptr_t value = l->u.value;
303 
304         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
305             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
306                 return false;
307             }
308         }
309     }
310     return true;
311 }
312 
313 static void set_jmp_reset_offset(TCGContext *s, int which)
314 {
315     /*
316      * We will check for overflow at the end of the opcode loop in
317      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
318      */
319     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
320 }
321 
322 /* Signal overflow, starting over with fewer guest insns. */
323 static G_NORETURN
324 void tcg_raise_tb_overflow(TCGContext *s)
325 {
326     siglongjmp(s->jmp_trans, -2);
327 }
328 
329 #define C_PFX1(P, A)                    P##A
330 #define C_PFX2(P, A, B)                 P##A##_##B
331 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
332 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
333 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
334 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
335 
336 /* Define an enumeration for the various combinations. */
337 
338 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
339 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
340 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
341 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
342 
343 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
344 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
345 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
346 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
347 
348 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
349 
350 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
351 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
352 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
353 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
354 
355 typedef enum {
356 #include "tcg-target-con-set.h"
357 } TCGConstraintSetIndex;
358 
359 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
360 
361 #undef C_O0_I1
362 #undef C_O0_I2
363 #undef C_O0_I3
364 #undef C_O0_I4
365 #undef C_O1_I1
366 #undef C_O1_I2
367 #undef C_O1_I3
368 #undef C_O1_I4
369 #undef C_N1_I2
370 #undef C_O2_I1
371 #undef C_O2_I2
372 #undef C_O2_I3
373 #undef C_O2_I4
374 
375 /* Put all of the constraint sets into an array, indexed by the enum. */
376 
377 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
378 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
379 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
380 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
381 
382 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
383 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
384 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
385 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
386 
387 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
388 
389 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
390 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
391 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
392 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
393 
394 static const TCGTargetOpDef constraint_sets[] = {
395 #include "tcg-target-con-set.h"
396 };
397 
398 
399 #undef C_O0_I1
400 #undef C_O0_I2
401 #undef C_O0_I3
402 #undef C_O0_I4
403 #undef C_O1_I1
404 #undef C_O1_I2
405 #undef C_O1_I3
406 #undef C_O1_I4
407 #undef C_N1_I2
408 #undef C_O2_I1
409 #undef C_O2_I2
410 #undef C_O2_I3
411 #undef C_O2_I4
412 
413 /* Expand the enumerator to be returned from tcg_target_op_def(). */
414 
415 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
416 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
417 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
418 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
419 
420 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
421 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
422 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
423 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
424 
425 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
426 
427 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
428 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
429 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
430 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
431 
432 #include "tcg-target.c.inc"
433 
434 static void alloc_tcg_plugin_context(TCGContext *s)
435 {
436 #ifdef CONFIG_PLUGIN
437     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
438     s->plugin_tb->insns =
439         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
440 #endif
441 }
442 
443 /*
444  * All TCG threads except the parent (i.e. the one that called tcg_context_init
445  * and registered the target's TCG globals) must register with this function
446  * before initiating translation.
447  *
448  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
449  * of tcg_region_init() for the reasoning behind this.
450  *
451  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
452  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
453  * is not used anymore for translation once this function is called.
454  *
455  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
456  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
457  */
458 #ifdef CONFIG_USER_ONLY
459 void tcg_register_thread(void)
460 {
461     tcg_ctx = &tcg_init_ctx;
462 }
463 #else
464 void tcg_register_thread(void)
465 {
466     TCGContext *s = g_malloc(sizeof(*s));
467     unsigned int i, n;
468 
469     *s = tcg_init_ctx;
470 
471     /* Relink mem_base.  */
472     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
473         if (tcg_init_ctx.temps[i].mem_base) {
474             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
475             tcg_debug_assert(b >= 0 && b < n);
476             s->temps[i].mem_base = &s->temps[b];
477         }
478     }
479 
480     /* Claim an entry in tcg_ctxs */
481     n = qatomic_fetch_inc(&tcg_cur_ctxs);
482     g_assert(n < tcg_max_ctxs);
483     qatomic_set(&tcg_ctxs[n], s);
484 
485     if (n > 0) {
486         alloc_tcg_plugin_context(s);
487         tcg_region_initial_alloc(s);
488     }
489 
490     tcg_ctx = s;
491 }
492 #endif /* !CONFIG_USER_ONLY */
493 
494 /* pool based memory allocation */
495 void *tcg_malloc_internal(TCGContext *s, int size)
496 {
497     TCGPool *p;
498     int pool_size;
499 
500     if (size > TCG_POOL_CHUNK_SIZE) {
501         /* big malloc: insert a new pool (XXX: could optimize) */
502         p = g_malloc(sizeof(TCGPool) + size);
503         p->size = size;
504         p->next = s->pool_first_large;
505         s->pool_first_large = p;
506         return p->data;
507     } else {
508         p = s->pool_current;
509         if (!p) {
510             p = s->pool_first;
511             if (!p)
512                 goto new_pool;
513         } else {
514             if (!p->next) {
515             new_pool:
516                 pool_size = TCG_POOL_CHUNK_SIZE;
517                 p = g_malloc(sizeof(TCGPool) + pool_size);
518                 p->size = pool_size;
519                 p->next = NULL;
520                 if (s->pool_current)
521                     s->pool_current->next = p;
522                 else
523                     s->pool_first = p;
524             } else {
525                 p = p->next;
526             }
527         }
528     }
529     s->pool_current = p;
530     s->pool_cur = p->data + size;
531     s->pool_end = p->data + p->size;
532     return p->data;
533 }
534 
535 void tcg_pool_reset(TCGContext *s)
536 {
537     TCGPool *p, *t;
538     for (p = s->pool_first_large; p; p = t) {
539         t = p->next;
540         g_free(p);
541     }
542     s->pool_first_large = NULL;
543     s->pool_cur = s->pool_end = NULL;
544     s->pool_current = NULL;
545 }
546 
547 #include "exec/helper-proto.h"
548 
549 static const TCGHelperInfo all_helpers[] = {
550 #include "exec/helper-tcg.h"
551 };
552 static GHashTable *helper_table;
553 
554 #ifdef CONFIG_TCG_INTERPRETER
555 static GHashTable *ffi_table;
556 
557 static ffi_type * const typecode_to_ffi[8] = {
558     [dh_typecode_void] = &ffi_type_void,
559     [dh_typecode_i32]  = &ffi_type_uint32,
560     [dh_typecode_s32]  = &ffi_type_sint32,
561     [dh_typecode_i64]  = &ffi_type_uint64,
562     [dh_typecode_s64]  = &ffi_type_sint64,
563     [dh_typecode_ptr]  = &ffi_type_pointer,
564 };
565 #endif
566 
567 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
568 static void process_op_defs(TCGContext *s);
569 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
570                                             TCGReg reg, const char *name);
571 
572 static void tcg_context_init(unsigned max_cpus)
573 {
574     TCGContext *s = &tcg_init_ctx;
575     int op, total_args, n, i;
576     TCGOpDef *def;
577     TCGArgConstraint *args_ct;
578     TCGTemp *ts;
579 
580     memset(s, 0, sizeof(*s));
581     s->nb_globals = 0;
582 
583     /* Count total number of arguments and allocate the corresponding
584        space */
585     total_args = 0;
586     for(op = 0; op < NB_OPS; op++) {
587         def = &tcg_op_defs[op];
588         n = def->nb_iargs + def->nb_oargs;
589         total_args += n;
590     }
591 
592     args_ct = g_new0(TCGArgConstraint, total_args);
593 
594     for(op = 0; op < NB_OPS; op++) {
595         def = &tcg_op_defs[op];
596         def->args_ct = args_ct;
597         n = def->nb_iargs + def->nb_oargs;
598         args_ct += n;
599     }
600 
601     /* Register helpers.  */
602     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
603     helper_table = g_hash_table_new(NULL, NULL);
604 
605     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
606         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
607                             (gpointer)&all_helpers[i]);
608     }
609 
610 #ifdef CONFIG_TCG_INTERPRETER
611     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
612     ffi_table = g_hash_table_new(NULL, NULL);
613     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
614         struct {
615             ffi_cif cif;
616             ffi_type *args[];
617         } *ca;
618         uint32_t typemask = all_helpers[i].typemask;
619         gpointer hash = (gpointer)(uintptr_t)typemask;
620         ffi_status status;
621         int nargs;
622 
623         if (g_hash_table_lookup(ffi_table, hash)) {
624             continue;
625         }
626 
627         /* Ignoring the return type, find the last non-zero field. */
628         nargs = 32 - clz32(typemask >> 3);
629         nargs = DIV_ROUND_UP(nargs, 3);
630 
631         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
632         ca->cif.rtype = typecode_to_ffi[typemask & 7];
633         ca->cif.nargs = nargs;
634 
635         if (nargs != 0) {
636             ca->cif.arg_types = ca->args;
637             for (i = 0; i < nargs; ++i) {
638                 int typecode = extract32(typemask, (i + 1) * 3, 3);
639                 ca->args[i] = typecode_to_ffi[typecode];
640             }
641         }
642 
643         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
644                               ca->cif.rtype, ca->cif.arg_types);
645         assert(status == FFI_OK);
646 
647         g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
648     }
649 #endif
650 
651     tcg_target_init(s);
652     process_op_defs(s);
653 
654     /* Reverse the order of the saved registers, assuming they're all at
655        the start of tcg_target_reg_alloc_order.  */
656     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
657         int r = tcg_target_reg_alloc_order[n];
658         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
659             break;
660         }
661     }
662     for (i = 0; i < n; ++i) {
663         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
664     }
665     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
666         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
667     }
668 
669     alloc_tcg_plugin_context(s);
670 
671     tcg_ctx = s;
672     /*
673      * In user-mode we simply share the init context among threads, since we
674      * use a single region. See the documentation tcg_region_init() for the
675      * reasoning behind this.
676      * In softmmu we will have at most max_cpus TCG threads.
677      */
678 #ifdef CONFIG_USER_ONLY
679     tcg_ctxs = &tcg_ctx;
680     tcg_cur_ctxs = 1;
681     tcg_max_ctxs = 1;
682 #else
683     tcg_max_ctxs = max_cpus;
684     tcg_ctxs = g_new0(TCGContext *, max_cpus);
685 #endif
686 
687     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
688     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
689     cpu_env = temp_tcgv_ptr(ts);
690 }
691 
692 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
693 {
694     tcg_context_init(max_cpus);
695     tcg_region_init(tb_size, splitwx, max_cpus);
696 }
697 
698 /*
699  * Allocate TBs right before their corresponding translated code, making
700  * sure that TBs and code are on different cache lines.
701  */
702 TranslationBlock *tcg_tb_alloc(TCGContext *s)
703 {
704     uintptr_t align = qemu_icache_linesize;
705     TranslationBlock *tb;
706     void *next;
707 
708  retry:
709     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
710     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
711 
712     if (unlikely(next > s->code_gen_highwater)) {
713         if (tcg_region_alloc(s)) {
714             return NULL;
715         }
716         goto retry;
717     }
718     qatomic_set(&s->code_gen_ptr, next);
719     s->data_gen_ptr = NULL;
720     return tb;
721 }
722 
723 void tcg_prologue_init(TCGContext *s)
724 {
725     size_t prologue_size;
726 
727     s->code_ptr = s->code_gen_ptr;
728     s->code_buf = s->code_gen_ptr;
729     s->data_gen_ptr = NULL;
730 
731 #ifndef CONFIG_TCG_INTERPRETER
732     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
733 #endif
734 
735 #ifdef TCG_TARGET_NEED_POOL_LABELS
736     s->pool_labels = NULL;
737 #endif
738 
739     qemu_thread_jit_write();
740     /* Generate the prologue.  */
741     tcg_target_qemu_prologue(s);
742 
743 #ifdef TCG_TARGET_NEED_POOL_LABELS
744     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
745     {
746         int result = tcg_out_pool_finalize(s);
747         tcg_debug_assert(result == 0);
748     }
749 #endif
750 
751     prologue_size = tcg_current_code_size(s);
752 
753 #ifndef CONFIG_TCG_INTERPRETER
754     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
755                         (uintptr_t)s->code_buf, prologue_size);
756 #endif
757 
758 #ifdef DEBUG_DISAS
759     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
760         FILE *logfile = qemu_log_trylock();
761         if (logfile) {
762             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
763             if (s->data_gen_ptr) {
764                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
765                 size_t data_size = prologue_size - code_size;
766                 size_t i;
767 
768                 disas(logfile, s->code_gen_ptr, code_size);
769 
770                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
771                     if (sizeof(tcg_target_ulong) == 8) {
772                         fprintf(logfile,
773                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
774                                 (uintptr_t)s->data_gen_ptr + i,
775                                 *(uint64_t *)(s->data_gen_ptr + i));
776                     } else {
777                         fprintf(logfile,
778                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
779                                 (uintptr_t)s->data_gen_ptr + i,
780                                 *(uint32_t *)(s->data_gen_ptr + i));
781                     }
782                 }
783             } else {
784                 disas(logfile, s->code_gen_ptr, prologue_size);
785             }
786             fprintf(logfile, "\n");
787             qemu_log_unlock(logfile);
788         }
789     }
790 #endif
791 
792 #ifndef CONFIG_TCG_INTERPRETER
793     /*
794      * Assert that goto_ptr is implemented completely, setting an epilogue.
795      * For tci, we use NULL as the signal to return from the interpreter,
796      * so skip this check.
797      */
798     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
799 #endif
800 
801     tcg_region_prologue_set(s);
802 }
803 
804 void tcg_func_start(TCGContext *s)
805 {
806     tcg_pool_reset(s);
807     s->nb_temps = s->nb_globals;
808 
809     /* No temps have been previously allocated for size or locality.  */
810     memset(s->free_temps, 0, sizeof(s->free_temps));
811 
812     /* No constant temps have been previously allocated. */
813     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
814         if (s->const_table[i]) {
815             g_hash_table_remove_all(s->const_table[i]);
816         }
817     }
818 
819     s->nb_ops = 0;
820     s->nb_labels = 0;
821     s->current_frame_offset = s->frame_start;
822 
823 #ifdef CONFIG_DEBUG_TCG
824     s->goto_tb_issue_mask = 0;
825 #endif
826 
827     QTAILQ_INIT(&s->ops);
828     QTAILQ_INIT(&s->free_ops);
829     QSIMPLEQ_INIT(&s->labels);
830 }
831 
832 static TCGTemp *tcg_temp_alloc(TCGContext *s)
833 {
834     int n = s->nb_temps++;
835 
836     if (n >= TCG_MAX_TEMPS) {
837         tcg_raise_tb_overflow(s);
838     }
839     return memset(&s->temps[n], 0, sizeof(TCGTemp));
840 }
841 
842 static TCGTemp *tcg_global_alloc(TCGContext *s)
843 {
844     TCGTemp *ts;
845 
846     tcg_debug_assert(s->nb_globals == s->nb_temps);
847     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
848     s->nb_globals++;
849     ts = tcg_temp_alloc(s);
850     ts->kind = TEMP_GLOBAL;
851 
852     return ts;
853 }
854 
855 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
856                                             TCGReg reg, const char *name)
857 {
858     TCGTemp *ts;
859 
860     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
861         tcg_abort();
862     }
863 
864     ts = tcg_global_alloc(s);
865     ts->base_type = type;
866     ts->type = type;
867     ts->kind = TEMP_FIXED;
868     ts->reg = reg;
869     ts->name = name;
870     tcg_regset_set_reg(s->reserved_regs, reg);
871 
872     return ts;
873 }
874 
875 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
876 {
877     s->frame_start = start;
878     s->frame_end = start + size;
879     s->frame_temp
880         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
881 }
882 
883 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
884                                      intptr_t offset, const char *name)
885 {
886     TCGContext *s = tcg_ctx;
887     TCGTemp *base_ts = tcgv_ptr_temp(base);
888     TCGTemp *ts = tcg_global_alloc(s);
889     int indirect_reg = 0, bigendian = 0;
890 #if HOST_BIG_ENDIAN
891     bigendian = 1;
892 #endif
893 
894     switch (base_ts->kind) {
895     case TEMP_FIXED:
896         break;
897     case TEMP_GLOBAL:
898         /* We do not support double-indirect registers.  */
899         tcg_debug_assert(!base_ts->indirect_reg);
900         base_ts->indirect_base = 1;
901         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
902                             ? 2 : 1);
903         indirect_reg = 1;
904         break;
905     default:
906         g_assert_not_reached();
907     }
908 
909     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
910         TCGTemp *ts2 = tcg_global_alloc(s);
911         char buf[64];
912 
913         ts->base_type = TCG_TYPE_I64;
914         ts->type = TCG_TYPE_I32;
915         ts->indirect_reg = indirect_reg;
916         ts->mem_allocated = 1;
917         ts->mem_base = base_ts;
918         ts->mem_offset = offset + bigendian * 4;
919         pstrcpy(buf, sizeof(buf), name);
920         pstrcat(buf, sizeof(buf), "_0");
921         ts->name = strdup(buf);
922 
923         tcg_debug_assert(ts2 == ts + 1);
924         ts2->base_type = TCG_TYPE_I64;
925         ts2->type = TCG_TYPE_I32;
926         ts2->indirect_reg = indirect_reg;
927         ts2->mem_allocated = 1;
928         ts2->mem_base = base_ts;
929         ts2->mem_offset = offset + (1 - bigendian) * 4;
930         pstrcpy(buf, sizeof(buf), name);
931         pstrcat(buf, sizeof(buf), "_1");
932         ts2->name = strdup(buf);
933     } else {
934         ts->base_type = type;
935         ts->type = type;
936         ts->indirect_reg = indirect_reg;
937         ts->mem_allocated = 1;
938         ts->mem_base = base_ts;
939         ts->mem_offset = offset;
940         ts->name = name;
941     }
942     return ts;
943 }
944 
945 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
946 {
947     TCGContext *s = tcg_ctx;
948     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
949     TCGTemp *ts;
950     int idx, k;
951 
952     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
953     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
954     if (idx < TCG_MAX_TEMPS) {
955         /* There is already an available temp with the right type.  */
956         clear_bit(idx, s->free_temps[k].l);
957 
958         ts = &s->temps[idx];
959         ts->temp_allocated = 1;
960         tcg_debug_assert(ts->base_type == type);
961         tcg_debug_assert(ts->kind == kind);
962     } else {
963         ts = tcg_temp_alloc(s);
964         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
965             TCGTemp *ts2 = tcg_temp_alloc(s);
966 
967             ts->base_type = type;
968             ts->type = TCG_TYPE_I32;
969             ts->temp_allocated = 1;
970             ts->kind = kind;
971 
972             tcg_debug_assert(ts2 == ts + 1);
973             ts2->base_type = TCG_TYPE_I64;
974             ts2->type = TCG_TYPE_I32;
975             ts2->temp_allocated = 1;
976             ts2->kind = kind;
977         } else {
978             ts->base_type = type;
979             ts->type = type;
980             ts->temp_allocated = 1;
981             ts->kind = kind;
982         }
983     }
984 
985 #if defined(CONFIG_DEBUG_TCG)
986     s->temps_in_use++;
987 #endif
988     return ts;
989 }
990 
991 TCGv_vec tcg_temp_new_vec(TCGType type)
992 {
993     TCGTemp *t;
994 
995 #ifdef CONFIG_DEBUG_TCG
996     switch (type) {
997     case TCG_TYPE_V64:
998         assert(TCG_TARGET_HAS_v64);
999         break;
1000     case TCG_TYPE_V128:
1001         assert(TCG_TARGET_HAS_v128);
1002         break;
1003     case TCG_TYPE_V256:
1004         assert(TCG_TARGET_HAS_v256);
1005         break;
1006     default:
1007         g_assert_not_reached();
1008     }
1009 #endif
1010 
1011     t = tcg_temp_new_internal(type, 0);
1012     return temp_tcgv_vec(t);
1013 }
1014 
1015 /* Create a new temp of the same type as an existing temp.  */
1016 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1017 {
1018     TCGTemp *t = tcgv_vec_temp(match);
1019 
1020     tcg_debug_assert(t->temp_allocated != 0);
1021 
1022     t = tcg_temp_new_internal(t->base_type, 0);
1023     return temp_tcgv_vec(t);
1024 }
1025 
1026 void tcg_temp_free_internal(TCGTemp *ts)
1027 {
1028     TCGContext *s = tcg_ctx;
1029     int k, idx;
1030 
1031     switch (ts->kind) {
1032     case TEMP_CONST:
1033         /*
1034          * In order to simplify users of tcg_constant_*,
1035          * silently ignore free.
1036          */
1037         return;
1038     case TEMP_NORMAL:
1039     case TEMP_LOCAL:
1040         break;
1041     default:
1042         g_assert_not_reached();
1043     }
1044 
1045 #if defined(CONFIG_DEBUG_TCG)
1046     s->temps_in_use--;
1047     if (s->temps_in_use < 0) {
1048         fprintf(stderr, "More temporaries freed than allocated!\n");
1049     }
1050 #endif
1051 
1052     tcg_debug_assert(ts->temp_allocated != 0);
1053     ts->temp_allocated = 0;
1054 
1055     idx = temp_idx(ts);
1056     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1057     set_bit(idx, s->free_temps[k].l);
1058 }
1059 
1060 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1061 {
1062     TCGContext *s = tcg_ctx;
1063     GHashTable *h = s->const_table[type];
1064     TCGTemp *ts;
1065 
1066     if (h == NULL) {
1067         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1068         s->const_table[type] = h;
1069     }
1070 
1071     ts = g_hash_table_lookup(h, &val);
1072     if (ts == NULL) {
1073         ts = tcg_temp_alloc(s);
1074 
1075         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1076             TCGTemp *ts2 = tcg_temp_alloc(s);
1077 
1078             ts->base_type = TCG_TYPE_I64;
1079             ts->type = TCG_TYPE_I32;
1080             ts->kind = TEMP_CONST;
1081             ts->temp_allocated = 1;
1082             /*
1083              * Retain the full value of the 64-bit constant in the low
1084              * part, so that the hash table works.  Actual uses will
1085              * truncate the value to the low part.
1086              */
1087             ts->val = val;
1088 
1089             tcg_debug_assert(ts2 == ts + 1);
1090             ts2->base_type = TCG_TYPE_I64;
1091             ts2->type = TCG_TYPE_I32;
1092             ts2->kind = TEMP_CONST;
1093             ts2->temp_allocated = 1;
1094             ts2->val = val >> 32;
1095         } else {
1096             ts->base_type = type;
1097             ts->type = type;
1098             ts->kind = TEMP_CONST;
1099             ts->temp_allocated = 1;
1100             ts->val = val;
1101         }
1102         g_hash_table_insert(h, &ts->val, ts);
1103     }
1104 
1105     return ts;
1106 }
1107 
1108 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1109 {
1110     val = dup_const(vece, val);
1111     return temp_tcgv_vec(tcg_constant_internal(type, val));
1112 }
1113 
1114 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1115 {
1116     TCGTemp *t = tcgv_vec_temp(match);
1117 
1118     tcg_debug_assert(t->temp_allocated != 0);
1119     return tcg_constant_vec(t->base_type, vece, val);
1120 }
1121 
1122 TCGv_i32 tcg_const_i32(int32_t val)
1123 {
1124     TCGv_i32 t0;
1125     t0 = tcg_temp_new_i32();
1126     tcg_gen_movi_i32(t0, val);
1127     return t0;
1128 }
1129 
1130 TCGv_i64 tcg_const_i64(int64_t val)
1131 {
1132     TCGv_i64 t0;
1133     t0 = tcg_temp_new_i64();
1134     tcg_gen_movi_i64(t0, val);
1135     return t0;
1136 }
1137 
1138 TCGv_i32 tcg_const_local_i32(int32_t val)
1139 {
1140     TCGv_i32 t0;
1141     t0 = tcg_temp_local_new_i32();
1142     tcg_gen_movi_i32(t0, val);
1143     return t0;
1144 }
1145 
1146 TCGv_i64 tcg_const_local_i64(int64_t val)
1147 {
1148     TCGv_i64 t0;
1149     t0 = tcg_temp_local_new_i64();
1150     tcg_gen_movi_i64(t0, val);
1151     return t0;
1152 }
1153 
1154 #if defined(CONFIG_DEBUG_TCG)
1155 void tcg_clear_temp_count(void)
1156 {
1157     TCGContext *s = tcg_ctx;
1158     s->temps_in_use = 0;
1159 }
1160 
1161 int tcg_check_temp_count(void)
1162 {
1163     TCGContext *s = tcg_ctx;
1164     if (s->temps_in_use) {
1165         /* Clear the count so that we don't give another
1166          * warning immediately next time around.
1167          */
1168         s->temps_in_use = 0;
1169         return 1;
1170     }
1171     return 0;
1172 }
1173 #endif
1174 
1175 /* Return true if OP may appear in the opcode stream.
1176    Test the runtime variable that controls each opcode.  */
1177 bool tcg_op_supported(TCGOpcode op)
1178 {
1179     const bool have_vec
1180         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1181 
1182     switch (op) {
1183     case INDEX_op_discard:
1184     case INDEX_op_set_label:
1185     case INDEX_op_call:
1186     case INDEX_op_br:
1187     case INDEX_op_mb:
1188     case INDEX_op_insn_start:
1189     case INDEX_op_exit_tb:
1190     case INDEX_op_goto_tb:
1191     case INDEX_op_goto_ptr:
1192     case INDEX_op_qemu_ld_i32:
1193     case INDEX_op_qemu_st_i32:
1194     case INDEX_op_qemu_ld_i64:
1195     case INDEX_op_qemu_st_i64:
1196         return true;
1197 
1198     case INDEX_op_qemu_st8_i32:
1199         return TCG_TARGET_HAS_qemu_st8_i32;
1200 
1201     case INDEX_op_mov_i32:
1202     case INDEX_op_setcond_i32:
1203     case INDEX_op_brcond_i32:
1204     case INDEX_op_ld8u_i32:
1205     case INDEX_op_ld8s_i32:
1206     case INDEX_op_ld16u_i32:
1207     case INDEX_op_ld16s_i32:
1208     case INDEX_op_ld_i32:
1209     case INDEX_op_st8_i32:
1210     case INDEX_op_st16_i32:
1211     case INDEX_op_st_i32:
1212     case INDEX_op_add_i32:
1213     case INDEX_op_sub_i32:
1214     case INDEX_op_mul_i32:
1215     case INDEX_op_and_i32:
1216     case INDEX_op_or_i32:
1217     case INDEX_op_xor_i32:
1218     case INDEX_op_shl_i32:
1219     case INDEX_op_shr_i32:
1220     case INDEX_op_sar_i32:
1221         return true;
1222 
1223     case INDEX_op_movcond_i32:
1224         return TCG_TARGET_HAS_movcond_i32;
1225     case INDEX_op_div_i32:
1226     case INDEX_op_divu_i32:
1227         return TCG_TARGET_HAS_div_i32;
1228     case INDEX_op_rem_i32:
1229     case INDEX_op_remu_i32:
1230         return TCG_TARGET_HAS_rem_i32;
1231     case INDEX_op_div2_i32:
1232     case INDEX_op_divu2_i32:
1233         return TCG_TARGET_HAS_div2_i32;
1234     case INDEX_op_rotl_i32:
1235     case INDEX_op_rotr_i32:
1236         return TCG_TARGET_HAS_rot_i32;
1237     case INDEX_op_deposit_i32:
1238         return TCG_TARGET_HAS_deposit_i32;
1239     case INDEX_op_extract_i32:
1240         return TCG_TARGET_HAS_extract_i32;
1241     case INDEX_op_sextract_i32:
1242         return TCG_TARGET_HAS_sextract_i32;
1243     case INDEX_op_extract2_i32:
1244         return TCG_TARGET_HAS_extract2_i32;
1245     case INDEX_op_add2_i32:
1246         return TCG_TARGET_HAS_add2_i32;
1247     case INDEX_op_sub2_i32:
1248         return TCG_TARGET_HAS_sub2_i32;
1249     case INDEX_op_mulu2_i32:
1250         return TCG_TARGET_HAS_mulu2_i32;
1251     case INDEX_op_muls2_i32:
1252         return TCG_TARGET_HAS_muls2_i32;
1253     case INDEX_op_muluh_i32:
1254         return TCG_TARGET_HAS_muluh_i32;
1255     case INDEX_op_mulsh_i32:
1256         return TCG_TARGET_HAS_mulsh_i32;
1257     case INDEX_op_ext8s_i32:
1258         return TCG_TARGET_HAS_ext8s_i32;
1259     case INDEX_op_ext16s_i32:
1260         return TCG_TARGET_HAS_ext16s_i32;
1261     case INDEX_op_ext8u_i32:
1262         return TCG_TARGET_HAS_ext8u_i32;
1263     case INDEX_op_ext16u_i32:
1264         return TCG_TARGET_HAS_ext16u_i32;
1265     case INDEX_op_bswap16_i32:
1266         return TCG_TARGET_HAS_bswap16_i32;
1267     case INDEX_op_bswap32_i32:
1268         return TCG_TARGET_HAS_bswap32_i32;
1269     case INDEX_op_not_i32:
1270         return TCG_TARGET_HAS_not_i32;
1271     case INDEX_op_neg_i32:
1272         return TCG_TARGET_HAS_neg_i32;
1273     case INDEX_op_andc_i32:
1274         return TCG_TARGET_HAS_andc_i32;
1275     case INDEX_op_orc_i32:
1276         return TCG_TARGET_HAS_orc_i32;
1277     case INDEX_op_eqv_i32:
1278         return TCG_TARGET_HAS_eqv_i32;
1279     case INDEX_op_nand_i32:
1280         return TCG_TARGET_HAS_nand_i32;
1281     case INDEX_op_nor_i32:
1282         return TCG_TARGET_HAS_nor_i32;
1283     case INDEX_op_clz_i32:
1284         return TCG_TARGET_HAS_clz_i32;
1285     case INDEX_op_ctz_i32:
1286         return TCG_TARGET_HAS_ctz_i32;
1287     case INDEX_op_ctpop_i32:
1288         return TCG_TARGET_HAS_ctpop_i32;
1289 
1290     case INDEX_op_brcond2_i32:
1291     case INDEX_op_setcond2_i32:
1292         return TCG_TARGET_REG_BITS == 32;
1293 
1294     case INDEX_op_mov_i64:
1295     case INDEX_op_setcond_i64:
1296     case INDEX_op_brcond_i64:
1297     case INDEX_op_ld8u_i64:
1298     case INDEX_op_ld8s_i64:
1299     case INDEX_op_ld16u_i64:
1300     case INDEX_op_ld16s_i64:
1301     case INDEX_op_ld32u_i64:
1302     case INDEX_op_ld32s_i64:
1303     case INDEX_op_ld_i64:
1304     case INDEX_op_st8_i64:
1305     case INDEX_op_st16_i64:
1306     case INDEX_op_st32_i64:
1307     case INDEX_op_st_i64:
1308     case INDEX_op_add_i64:
1309     case INDEX_op_sub_i64:
1310     case INDEX_op_mul_i64:
1311     case INDEX_op_and_i64:
1312     case INDEX_op_or_i64:
1313     case INDEX_op_xor_i64:
1314     case INDEX_op_shl_i64:
1315     case INDEX_op_shr_i64:
1316     case INDEX_op_sar_i64:
1317     case INDEX_op_ext_i32_i64:
1318     case INDEX_op_extu_i32_i64:
1319         return TCG_TARGET_REG_BITS == 64;
1320 
1321     case INDEX_op_movcond_i64:
1322         return TCG_TARGET_HAS_movcond_i64;
1323     case INDEX_op_div_i64:
1324     case INDEX_op_divu_i64:
1325         return TCG_TARGET_HAS_div_i64;
1326     case INDEX_op_rem_i64:
1327     case INDEX_op_remu_i64:
1328         return TCG_TARGET_HAS_rem_i64;
1329     case INDEX_op_div2_i64:
1330     case INDEX_op_divu2_i64:
1331         return TCG_TARGET_HAS_div2_i64;
1332     case INDEX_op_rotl_i64:
1333     case INDEX_op_rotr_i64:
1334         return TCG_TARGET_HAS_rot_i64;
1335     case INDEX_op_deposit_i64:
1336         return TCG_TARGET_HAS_deposit_i64;
1337     case INDEX_op_extract_i64:
1338         return TCG_TARGET_HAS_extract_i64;
1339     case INDEX_op_sextract_i64:
1340         return TCG_TARGET_HAS_sextract_i64;
1341     case INDEX_op_extract2_i64:
1342         return TCG_TARGET_HAS_extract2_i64;
1343     case INDEX_op_extrl_i64_i32:
1344         return TCG_TARGET_HAS_extrl_i64_i32;
1345     case INDEX_op_extrh_i64_i32:
1346         return TCG_TARGET_HAS_extrh_i64_i32;
1347     case INDEX_op_ext8s_i64:
1348         return TCG_TARGET_HAS_ext8s_i64;
1349     case INDEX_op_ext16s_i64:
1350         return TCG_TARGET_HAS_ext16s_i64;
1351     case INDEX_op_ext32s_i64:
1352         return TCG_TARGET_HAS_ext32s_i64;
1353     case INDEX_op_ext8u_i64:
1354         return TCG_TARGET_HAS_ext8u_i64;
1355     case INDEX_op_ext16u_i64:
1356         return TCG_TARGET_HAS_ext16u_i64;
1357     case INDEX_op_ext32u_i64:
1358         return TCG_TARGET_HAS_ext32u_i64;
1359     case INDEX_op_bswap16_i64:
1360         return TCG_TARGET_HAS_bswap16_i64;
1361     case INDEX_op_bswap32_i64:
1362         return TCG_TARGET_HAS_bswap32_i64;
1363     case INDEX_op_bswap64_i64:
1364         return TCG_TARGET_HAS_bswap64_i64;
1365     case INDEX_op_not_i64:
1366         return TCG_TARGET_HAS_not_i64;
1367     case INDEX_op_neg_i64:
1368         return TCG_TARGET_HAS_neg_i64;
1369     case INDEX_op_andc_i64:
1370         return TCG_TARGET_HAS_andc_i64;
1371     case INDEX_op_orc_i64:
1372         return TCG_TARGET_HAS_orc_i64;
1373     case INDEX_op_eqv_i64:
1374         return TCG_TARGET_HAS_eqv_i64;
1375     case INDEX_op_nand_i64:
1376         return TCG_TARGET_HAS_nand_i64;
1377     case INDEX_op_nor_i64:
1378         return TCG_TARGET_HAS_nor_i64;
1379     case INDEX_op_clz_i64:
1380         return TCG_TARGET_HAS_clz_i64;
1381     case INDEX_op_ctz_i64:
1382         return TCG_TARGET_HAS_ctz_i64;
1383     case INDEX_op_ctpop_i64:
1384         return TCG_TARGET_HAS_ctpop_i64;
1385     case INDEX_op_add2_i64:
1386         return TCG_TARGET_HAS_add2_i64;
1387     case INDEX_op_sub2_i64:
1388         return TCG_TARGET_HAS_sub2_i64;
1389     case INDEX_op_mulu2_i64:
1390         return TCG_TARGET_HAS_mulu2_i64;
1391     case INDEX_op_muls2_i64:
1392         return TCG_TARGET_HAS_muls2_i64;
1393     case INDEX_op_muluh_i64:
1394         return TCG_TARGET_HAS_muluh_i64;
1395     case INDEX_op_mulsh_i64:
1396         return TCG_TARGET_HAS_mulsh_i64;
1397 
1398     case INDEX_op_mov_vec:
1399     case INDEX_op_dup_vec:
1400     case INDEX_op_dupm_vec:
1401     case INDEX_op_ld_vec:
1402     case INDEX_op_st_vec:
1403     case INDEX_op_add_vec:
1404     case INDEX_op_sub_vec:
1405     case INDEX_op_and_vec:
1406     case INDEX_op_or_vec:
1407     case INDEX_op_xor_vec:
1408     case INDEX_op_cmp_vec:
1409         return have_vec;
1410     case INDEX_op_dup2_vec:
1411         return have_vec && TCG_TARGET_REG_BITS == 32;
1412     case INDEX_op_not_vec:
1413         return have_vec && TCG_TARGET_HAS_not_vec;
1414     case INDEX_op_neg_vec:
1415         return have_vec && TCG_TARGET_HAS_neg_vec;
1416     case INDEX_op_abs_vec:
1417         return have_vec && TCG_TARGET_HAS_abs_vec;
1418     case INDEX_op_andc_vec:
1419         return have_vec && TCG_TARGET_HAS_andc_vec;
1420     case INDEX_op_orc_vec:
1421         return have_vec && TCG_TARGET_HAS_orc_vec;
1422     case INDEX_op_nand_vec:
1423         return have_vec && TCG_TARGET_HAS_nand_vec;
1424     case INDEX_op_nor_vec:
1425         return have_vec && TCG_TARGET_HAS_nor_vec;
1426     case INDEX_op_eqv_vec:
1427         return have_vec && TCG_TARGET_HAS_eqv_vec;
1428     case INDEX_op_mul_vec:
1429         return have_vec && TCG_TARGET_HAS_mul_vec;
1430     case INDEX_op_shli_vec:
1431     case INDEX_op_shri_vec:
1432     case INDEX_op_sari_vec:
1433         return have_vec && TCG_TARGET_HAS_shi_vec;
1434     case INDEX_op_shls_vec:
1435     case INDEX_op_shrs_vec:
1436     case INDEX_op_sars_vec:
1437         return have_vec && TCG_TARGET_HAS_shs_vec;
1438     case INDEX_op_shlv_vec:
1439     case INDEX_op_shrv_vec:
1440     case INDEX_op_sarv_vec:
1441         return have_vec && TCG_TARGET_HAS_shv_vec;
1442     case INDEX_op_rotli_vec:
1443         return have_vec && TCG_TARGET_HAS_roti_vec;
1444     case INDEX_op_rotls_vec:
1445         return have_vec && TCG_TARGET_HAS_rots_vec;
1446     case INDEX_op_rotlv_vec:
1447     case INDEX_op_rotrv_vec:
1448         return have_vec && TCG_TARGET_HAS_rotv_vec;
1449     case INDEX_op_ssadd_vec:
1450     case INDEX_op_usadd_vec:
1451     case INDEX_op_sssub_vec:
1452     case INDEX_op_ussub_vec:
1453         return have_vec && TCG_TARGET_HAS_sat_vec;
1454     case INDEX_op_smin_vec:
1455     case INDEX_op_umin_vec:
1456     case INDEX_op_smax_vec:
1457     case INDEX_op_umax_vec:
1458         return have_vec && TCG_TARGET_HAS_minmax_vec;
1459     case INDEX_op_bitsel_vec:
1460         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1461     case INDEX_op_cmpsel_vec:
1462         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1463 
1464     default:
1465         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1466         return true;
1467     }
1468 }
1469 
1470 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1471    and endian swap. Maybe it would be better to do the alignment
1472    and endian swap in tcg_reg_alloc_call(). */
1473 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1474 {
1475     int i, real_args, nb_rets, pi;
1476     unsigned typemask;
1477     const TCGHelperInfo *info;
1478     TCGOp *op;
1479 
1480     info = g_hash_table_lookup(helper_table, (gpointer)func);
1481     typemask = info->typemask;
1482 
1483 #ifdef CONFIG_PLUGIN
1484     /* detect non-plugin helpers */
1485     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1486         tcg_ctx->plugin_insn->calls_helpers = true;
1487     }
1488 #endif
1489 
1490 #if defined(__sparc__) && !defined(__arch64__) \
1491     && !defined(CONFIG_TCG_INTERPRETER)
1492     /* We have 64-bit values in one register, but need to pass as two
1493        separate parameters.  Split them.  */
1494     int orig_typemask = typemask;
1495     int orig_nargs = nargs;
1496     TCGv_i64 retl, reth;
1497     TCGTemp *split_args[MAX_OPC_PARAM];
1498 
1499     retl = NULL;
1500     reth = NULL;
1501     typemask = 0;
1502     for (i = real_args = 0; i < nargs; ++i) {
1503         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1504         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1505 
1506         if (is_64bit) {
1507             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1508             TCGv_i32 h = tcg_temp_new_i32();
1509             TCGv_i32 l = tcg_temp_new_i32();
1510             tcg_gen_extr_i64_i32(l, h, orig);
1511             split_args[real_args++] = tcgv_i32_temp(h);
1512             typemask |= dh_typecode_i32 << (real_args * 3);
1513             split_args[real_args++] = tcgv_i32_temp(l);
1514             typemask |= dh_typecode_i32 << (real_args * 3);
1515         } else {
1516             split_args[real_args++] = args[i];
1517             typemask |= argtype << (real_args * 3);
1518         }
1519     }
1520     nargs = real_args;
1521     args = split_args;
1522 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1523     for (i = 0; i < nargs; ++i) {
1524         int argtype = extract32(typemask, (i + 1) * 3, 3);
1525         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1526         bool is_signed = argtype & 1;
1527 
1528         if (is_32bit) {
1529             TCGv_i64 temp = tcg_temp_new_i64();
1530             TCGv_i32 orig = temp_tcgv_i32(args[i]);
1531             if (is_signed) {
1532                 tcg_gen_ext_i32_i64(temp, orig);
1533             } else {
1534                 tcg_gen_extu_i32_i64(temp, orig);
1535             }
1536             args[i] = tcgv_i64_temp(temp);
1537         }
1538     }
1539 #endif /* TCG_TARGET_EXTEND_ARGS */
1540 
1541     op = tcg_emit_op(INDEX_op_call);
1542 
1543     pi = 0;
1544     if (ret != NULL) {
1545 #if defined(__sparc__) && !defined(__arch64__) \
1546     && !defined(CONFIG_TCG_INTERPRETER)
1547         if ((typemask & 6) == dh_typecode_i64) {
1548             /* The 32-bit ABI is going to return the 64-bit value in
1549                the %o0/%o1 register pair.  Prepare for this by using
1550                two return temporaries, and reassemble below.  */
1551             retl = tcg_temp_new_i64();
1552             reth = tcg_temp_new_i64();
1553             op->args[pi++] = tcgv_i64_arg(reth);
1554             op->args[pi++] = tcgv_i64_arg(retl);
1555             nb_rets = 2;
1556         } else {
1557             op->args[pi++] = temp_arg(ret);
1558             nb_rets = 1;
1559         }
1560 #else
1561         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1562 #if HOST_BIG_ENDIAN
1563             op->args[pi++] = temp_arg(ret + 1);
1564             op->args[pi++] = temp_arg(ret);
1565 #else
1566             op->args[pi++] = temp_arg(ret);
1567             op->args[pi++] = temp_arg(ret + 1);
1568 #endif
1569             nb_rets = 2;
1570         } else {
1571             op->args[pi++] = temp_arg(ret);
1572             nb_rets = 1;
1573         }
1574 #endif
1575     } else {
1576         nb_rets = 0;
1577     }
1578     TCGOP_CALLO(op) = nb_rets;
1579 
1580     real_args = 0;
1581     for (i = 0; i < nargs; i++) {
1582         int argtype = extract32(typemask, (i + 1) * 3, 3);
1583         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1584         bool want_align = false;
1585 
1586 #if defined(CONFIG_TCG_INTERPRETER)
1587         /*
1588          * Align all arguments, so that they land in predictable places
1589          * for passing off to ffi_call.
1590          */
1591         want_align = true;
1592 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1593         /* Some targets want aligned 64 bit args */
1594         want_align = is_64bit;
1595 #endif
1596 
1597         if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1598             op->args[pi++] = TCG_CALL_DUMMY_ARG;
1599             real_args++;
1600         }
1601 
1602         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1603             /*
1604              * If stack grows up, then we will be placing successive
1605              * arguments at lower addresses, which means we need to
1606              * reverse the order compared to how we would normally
1607              * treat either big or little-endian.  For those arguments
1608              * that will wind up in registers, this still works for
1609              * HPPA (the only current STACK_GROWSUP target) since the
1610              * argument registers are *also* allocated in decreasing
1611              * order.  If another such target is added, this logic may
1612              * have to get more complicated to differentiate between
1613              * stack arguments and register arguments.
1614              */
1615 #if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP)
1616             op->args[pi++] = temp_arg(args[i] + 1);
1617             op->args[pi++] = temp_arg(args[i]);
1618 #else
1619             op->args[pi++] = temp_arg(args[i]);
1620             op->args[pi++] = temp_arg(args[i] + 1);
1621 #endif
1622             real_args += 2;
1623             continue;
1624         }
1625 
1626         op->args[pi++] = temp_arg(args[i]);
1627         real_args++;
1628     }
1629     op->args[pi++] = (uintptr_t)func;
1630     op->args[pi++] = (uintptr_t)info;
1631     TCGOP_CALLI(op) = real_args;
1632 
1633     /* Make sure the fields didn't overflow.  */
1634     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1635     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1636 
1637 #if defined(__sparc__) && !defined(__arch64__) \
1638     && !defined(CONFIG_TCG_INTERPRETER)
1639     /* Free all of the parts we allocated above.  */
1640     for (i = real_args = 0; i < orig_nargs; ++i) {
1641         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1642         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1643 
1644         if (is_64bit) {
1645             tcg_temp_free_internal(args[real_args++]);
1646             tcg_temp_free_internal(args[real_args++]);
1647         } else {
1648             real_args++;
1649         }
1650     }
1651     if ((orig_typemask & 6) == dh_typecode_i64) {
1652         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1653            Note that describing these as TCGv_i64 eliminates an unnecessary
1654            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1655         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1656         tcg_temp_free_i64(retl);
1657         tcg_temp_free_i64(reth);
1658     }
1659 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1660     for (i = 0; i < nargs; ++i) {
1661         int argtype = extract32(typemask, (i + 1) * 3, 3);
1662         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1663 
1664         if (is_32bit) {
1665             tcg_temp_free_internal(args[i]);
1666         }
1667     }
1668 #endif /* TCG_TARGET_EXTEND_ARGS */
1669 }
1670 
1671 static void tcg_reg_alloc_start(TCGContext *s)
1672 {
1673     int i, n;
1674 
1675     for (i = 0, n = s->nb_temps; i < n; i++) {
1676         TCGTemp *ts = &s->temps[i];
1677         TCGTempVal val = TEMP_VAL_MEM;
1678 
1679         switch (ts->kind) {
1680         case TEMP_CONST:
1681             val = TEMP_VAL_CONST;
1682             break;
1683         case TEMP_FIXED:
1684             val = TEMP_VAL_REG;
1685             break;
1686         case TEMP_GLOBAL:
1687             break;
1688         case TEMP_NORMAL:
1689         case TEMP_EBB:
1690             val = TEMP_VAL_DEAD;
1691             /* fall through */
1692         case TEMP_LOCAL:
1693             ts->mem_allocated = 0;
1694             break;
1695         default:
1696             g_assert_not_reached();
1697         }
1698         ts->val_type = val;
1699     }
1700 
1701     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1702 }
1703 
1704 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1705                                  TCGTemp *ts)
1706 {
1707     int idx = temp_idx(ts);
1708 
1709     switch (ts->kind) {
1710     case TEMP_FIXED:
1711     case TEMP_GLOBAL:
1712         pstrcpy(buf, buf_size, ts->name);
1713         break;
1714     case TEMP_LOCAL:
1715         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1716         break;
1717     case TEMP_EBB:
1718         snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
1719         break;
1720     case TEMP_NORMAL:
1721         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1722         break;
1723     case TEMP_CONST:
1724         switch (ts->type) {
1725         case TCG_TYPE_I32:
1726             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1727             break;
1728 #if TCG_TARGET_REG_BITS > 32
1729         case TCG_TYPE_I64:
1730             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1731             break;
1732 #endif
1733         case TCG_TYPE_V64:
1734         case TCG_TYPE_V128:
1735         case TCG_TYPE_V256:
1736             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1737                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1738             break;
1739         default:
1740             g_assert_not_reached();
1741         }
1742         break;
1743     }
1744     return buf;
1745 }
1746 
1747 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1748                              int buf_size, TCGArg arg)
1749 {
1750     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1751 }
1752 
1753 static const char * const cond_name[] =
1754 {
1755     [TCG_COND_NEVER] = "never",
1756     [TCG_COND_ALWAYS] = "always",
1757     [TCG_COND_EQ] = "eq",
1758     [TCG_COND_NE] = "ne",
1759     [TCG_COND_LT] = "lt",
1760     [TCG_COND_GE] = "ge",
1761     [TCG_COND_LE] = "le",
1762     [TCG_COND_GT] = "gt",
1763     [TCG_COND_LTU] = "ltu",
1764     [TCG_COND_GEU] = "geu",
1765     [TCG_COND_LEU] = "leu",
1766     [TCG_COND_GTU] = "gtu"
1767 };
1768 
1769 static const char * const ldst_name[] =
1770 {
1771     [MO_UB]   = "ub",
1772     [MO_SB]   = "sb",
1773     [MO_LEUW] = "leuw",
1774     [MO_LESW] = "lesw",
1775     [MO_LEUL] = "leul",
1776     [MO_LESL] = "lesl",
1777     [MO_LEUQ] = "leq",
1778     [MO_BEUW] = "beuw",
1779     [MO_BESW] = "besw",
1780     [MO_BEUL] = "beul",
1781     [MO_BESL] = "besl",
1782     [MO_BEUQ] = "beq",
1783 };
1784 
1785 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1786 #ifdef TARGET_ALIGNED_ONLY
1787     [MO_UNALN >> MO_ASHIFT]    = "un+",
1788     [MO_ALIGN >> MO_ASHIFT]    = "",
1789 #else
1790     [MO_UNALN >> MO_ASHIFT]    = "",
1791     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1792 #endif
1793     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1794     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1795     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1796     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1797     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1798     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1799 };
1800 
1801 static const char bswap_flag_name[][6] = {
1802     [TCG_BSWAP_IZ] = "iz",
1803     [TCG_BSWAP_OZ] = "oz",
1804     [TCG_BSWAP_OS] = "os",
1805     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1806     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1807 };
1808 
1809 static inline bool tcg_regset_single(TCGRegSet d)
1810 {
1811     return (d & (d - 1)) == 0;
1812 }
1813 
1814 static inline TCGReg tcg_regset_first(TCGRegSet d)
1815 {
1816     if (TCG_TARGET_NB_REGS <= 32) {
1817         return ctz32(d);
1818     } else {
1819         return ctz64(d);
1820     }
1821 }
1822 
1823 /* Return only the number of characters output -- no error return. */
1824 #define ne_fprintf(...) \
1825     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
1826 
1827 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
1828 {
1829     char buf[128];
1830     TCGOp *op;
1831 
1832     QTAILQ_FOREACH(op, &s->ops, link) {
1833         int i, k, nb_oargs, nb_iargs, nb_cargs;
1834         const TCGOpDef *def;
1835         TCGOpcode c;
1836         int col = 0;
1837 
1838         c = op->opc;
1839         def = &tcg_op_defs[c];
1840 
1841         if (c == INDEX_op_insn_start) {
1842             nb_oargs = 0;
1843             col += ne_fprintf(f, "\n ----");
1844 
1845             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1846                 target_ulong a;
1847 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1848                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1849 #else
1850                 a = op->args[i];
1851 #endif
1852                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
1853             }
1854         } else if (c == INDEX_op_call) {
1855             const TCGHelperInfo *info = tcg_call_info(op);
1856             void *func = tcg_call_func(op);
1857 
1858             /* variable number of arguments */
1859             nb_oargs = TCGOP_CALLO(op);
1860             nb_iargs = TCGOP_CALLI(op);
1861             nb_cargs = def->nb_cargs;
1862 
1863             col += ne_fprintf(f, " %s ", def->name);
1864 
1865             /*
1866              * Print the function name from TCGHelperInfo, if available.
1867              * Note that plugins have a template function for the info,
1868              * but the actual function pointer comes from the plugin.
1869              */
1870             if (func == info->func) {
1871                 col += ne_fprintf(f, "%s", info->name);
1872             } else {
1873                 col += ne_fprintf(f, "plugin(%p)", func);
1874             }
1875 
1876             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
1877             for (i = 0; i < nb_oargs; i++) {
1878                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1879                                                             op->args[i]));
1880             }
1881             for (i = 0; i < nb_iargs; i++) {
1882                 TCGArg arg = op->args[nb_oargs + i];
1883                 const char *t = "<dummy>";
1884                 if (arg != TCG_CALL_DUMMY_ARG) {
1885                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1886                 }
1887                 col += ne_fprintf(f, ",%s", t);
1888             }
1889         } else {
1890             col += ne_fprintf(f, " %s ", def->name);
1891 
1892             nb_oargs = def->nb_oargs;
1893             nb_iargs = def->nb_iargs;
1894             nb_cargs = def->nb_cargs;
1895 
1896             if (def->flags & TCG_OPF_VECTOR) {
1897                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
1898                                   8 << TCGOP_VECE(op));
1899             }
1900 
1901             k = 0;
1902             for (i = 0; i < nb_oargs; i++) {
1903                 const char *sep =  k ? "," : "";
1904                 col += ne_fprintf(f, "%s%s", sep,
1905                                   tcg_get_arg_str(s, buf, sizeof(buf),
1906                                                   op->args[k++]));
1907             }
1908             for (i = 0; i < nb_iargs; i++) {
1909                 const char *sep =  k ? "," : "";
1910                 col += ne_fprintf(f, "%s%s", sep,
1911                                   tcg_get_arg_str(s, buf, sizeof(buf),
1912                                                   op->args[k++]));
1913             }
1914             switch (c) {
1915             case INDEX_op_brcond_i32:
1916             case INDEX_op_setcond_i32:
1917             case INDEX_op_movcond_i32:
1918             case INDEX_op_brcond2_i32:
1919             case INDEX_op_setcond2_i32:
1920             case INDEX_op_brcond_i64:
1921             case INDEX_op_setcond_i64:
1922             case INDEX_op_movcond_i64:
1923             case INDEX_op_cmp_vec:
1924             case INDEX_op_cmpsel_vec:
1925                 if (op->args[k] < ARRAY_SIZE(cond_name)
1926                     && cond_name[op->args[k]]) {
1927                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
1928                 } else {
1929                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
1930                 }
1931                 i = 1;
1932                 break;
1933             case INDEX_op_qemu_ld_i32:
1934             case INDEX_op_qemu_st_i32:
1935             case INDEX_op_qemu_st8_i32:
1936             case INDEX_op_qemu_ld_i64:
1937             case INDEX_op_qemu_st_i64:
1938                 {
1939                     MemOpIdx oi = op->args[k++];
1940                     MemOp op = get_memop(oi);
1941                     unsigned ix = get_mmuidx(oi);
1942 
1943                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1944                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
1945                     } else {
1946                         const char *s_al, *s_op;
1947                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1948                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1949                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
1950                     }
1951                     i = 1;
1952                 }
1953                 break;
1954             case INDEX_op_bswap16_i32:
1955             case INDEX_op_bswap16_i64:
1956             case INDEX_op_bswap32_i32:
1957             case INDEX_op_bswap32_i64:
1958             case INDEX_op_bswap64_i64:
1959                 {
1960                     TCGArg flags = op->args[k];
1961                     const char *name = NULL;
1962 
1963                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
1964                         name = bswap_flag_name[flags];
1965                     }
1966                     if (name) {
1967                         col += ne_fprintf(f, ",%s", name);
1968                     } else {
1969                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
1970                     }
1971                     i = k = 1;
1972                 }
1973                 break;
1974             default:
1975                 i = 0;
1976                 break;
1977             }
1978             switch (c) {
1979             case INDEX_op_set_label:
1980             case INDEX_op_br:
1981             case INDEX_op_brcond_i32:
1982             case INDEX_op_brcond_i64:
1983             case INDEX_op_brcond2_i32:
1984                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
1985                                   arg_label(op->args[k])->id);
1986                 i++, k++;
1987                 break;
1988             default:
1989                 break;
1990             }
1991             for (; i < nb_cargs; i++, k++) {
1992                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
1993                                   op->args[k]);
1994             }
1995         }
1996 
1997         if (have_prefs || op->life) {
1998             for (; col < 40; ++col) {
1999                 putc(' ', f);
2000             }
2001         }
2002 
2003         if (op->life) {
2004             unsigned life = op->life;
2005 
2006             if (life & (SYNC_ARG * 3)) {
2007                 ne_fprintf(f, "  sync:");
2008                 for (i = 0; i < 2; ++i) {
2009                     if (life & (SYNC_ARG << i)) {
2010                         ne_fprintf(f, " %d", i);
2011                     }
2012                 }
2013             }
2014             life /= DEAD_ARG;
2015             if (life) {
2016                 ne_fprintf(f, "  dead:");
2017                 for (i = 0; life; ++i, life >>= 1) {
2018                     if (life & 1) {
2019                         ne_fprintf(f, " %d", i);
2020                     }
2021                 }
2022             }
2023         }
2024 
2025         if (have_prefs) {
2026             for (i = 0; i < nb_oargs; ++i) {
2027                 TCGRegSet set = op->output_pref[i];
2028 
2029                 if (i == 0) {
2030                     ne_fprintf(f, "  pref=");
2031                 } else {
2032                     ne_fprintf(f, ",");
2033                 }
2034                 if (set == 0) {
2035                     ne_fprintf(f, "none");
2036                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2037                     ne_fprintf(f, "all");
2038 #ifdef CONFIG_DEBUG_TCG
2039                 } else if (tcg_regset_single(set)) {
2040                     TCGReg reg = tcg_regset_first(set);
2041                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2042 #endif
2043                 } else if (TCG_TARGET_NB_REGS <= 32) {
2044                     ne_fprintf(f, "0x%x", (uint32_t)set);
2045                 } else {
2046                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2047                 }
2048             }
2049         }
2050 
2051         putc('\n', f);
2052     }
2053 }
2054 
2055 /* we give more priority to constraints with less registers */
2056 static int get_constraint_priority(const TCGOpDef *def, int k)
2057 {
2058     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2059     int n;
2060 
2061     if (arg_ct->oalias) {
2062         /* an alias is equivalent to a single register */
2063         n = 1;
2064     } else {
2065         n = ctpop64(arg_ct->regs);
2066     }
2067     return TCG_TARGET_NB_REGS - n + 1;
2068 }
2069 
2070 /* sort from highest priority to lowest */
2071 static void sort_constraints(TCGOpDef *def, int start, int n)
2072 {
2073     int i, j;
2074     TCGArgConstraint *a = def->args_ct;
2075 
2076     for (i = 0; i < n; i++) {
2077         a[start + i].sort_index = start + i;
2078     }
2079     if (n <= 1) {
2080         return;
2081     }
2082     for (i = 0; i < n - 1; i++) {
2083         for (j = i + 1; j < n; j++) {
2084             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2085             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2086             if (p1 < p2) {
2087                 int tmp = a[start + i].sort_index;
2088                 a[start + i].sort_index = a[start + j].sort_index;
2089                 a[start + j].sort_index = tmp;
2090             }
2091         }
2092     }
2093 }
2094 
2095 static void process_op_defs(TCGContext *s)
2096 {
2097     TCGOpcode op;
2098 
2099     for (op = 0; op < NB_OPS; op++) {
2100         TCGOpDef *def = &tcg_op_defs[op];
2101         const TCGTargetOpDef *tdefs;
2102         int i, nb_args;
2103 
2104         if (def->flags & TCG_OPF_NOT_PRESENT) {
2105             continue;
2106         }
2107 
2108         nb_args = def->nb_iargs + def->nb_oargs;
2109         if (nb_args == 0) {
2110             continue;
2111         }
2112 
2113         /*
2114          * Macro magic should make it impossible, but double-check that
2115          * the array index is in range.  Since the signness of an enum
2116          * is implementation defined, force the result to unsigned.
2117          */
2118         unsigned con_set = tcg_target_op_def(op);
2119         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2120         tdefs = &constraint_sets[con_set];
2121 
2122         for (i = 0; i < nb_args; i++) {
2123             const char *ct_str = tdefs->args_ct_str[i];
2124             /* Incomplete TCGTargetOpDef entry. */
2125             tcg_debug_assert(ct_str != NULL);
2126 
2127             while (*ct_str != '\0') {
2128                 switch(*ct_str) {
2129                 case '0' ... '9':
2130                     {
2131                         int oarg = *ct_str - '0';
2132                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2133                         tcg_debug_assert(oarg < def->nb_oargs);
2134                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2135                         def->args_ct[i] = def->args_ct[oarg];
2136                         /* The output sets oalias.  */
2137                         def->args_ct[oarg].oalias = true;
2138                         def->args_ct[oarg].alias_index = i;
2139                         /* The input sets ialias. */
2140                         def->args_ct[i].ialias = true;
2141                         def->args_ct[i].alias_index = oarg;
2142                     }
2143                     ct_str++;
2144                     break;
2145                 case '&':
2146                     def->args_ct[i].newreg = true;
2147                     ct_str++;
2148                     break;
2149                 case 'i':
2150                     def->args_ct[i].ct |= TCG_CT_CONST;
2151                     ct_str++;
2152                     break;
2153 
2154                 /* Include all of the target-specific constraints. */
2155 
2156 #undef CONST
2157 #define CONST(CASE, MASK) \
2158     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2159 #define REGS(CASE, MASK) \
2160     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2161 
2162 #include "tcg-target-con-str.h"
2163 
2164 #undef REGS
2165 #undef CONST
2166                 default:
2167                     /* Typo in TCGTargetOpDef constraint. */
2168                     g_assert_not_reached();
2169                 }
2170             }
2171         }
2172 
2173         /* TCGTargetOpDef entry with too much information? */
2174         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2175 
2176         /* sort the constraints (XXX: this is just an heuristic) */
2177         sort_constraints(def, 0, def->nb_oargs);
2178         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2179     }
2180 }
2181 
2182 void tcg_op_remove(TCGContext *s, TCGOp *op)
2183 {
2184     TCGLabel *label;
2185 
2186     switch (op->opc) {
2187     case INDEX_op_br:
2188         label = arg_label(op->args[0]);
2189         label->refs--;
2190         break;
2191     case INDEX_op_brcond_i32:
2192     case INDEX_op_brcond_i64:
2193         label = arg_label(op->args[3]);
2194         label->refs--;
2195         break;
2196     case INDEX_op_brcond2_i32:
2197         label = arg_label(op->args[5]);
2198         label->refs--;
2199         break;
2200     default:
2201         break;
2202     }
2203 
2204     QTAILQ_REMOVE(&s->ops, op, link);
2205     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2206     s->nb_ops--;
2207 
2208 #ifdef CONFIG_PROFILER
2209     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2210 #endif
2211 }
2212 
2213 void tcg_remove_ops_after(TCGOp *op)
2214 {
2215     TCGContext *s = tcg_ctx;
2216 
2217     while (true) {
2218         TCGOp *last = tcg_last_op();
2219         if (last == op) {
2220             return;
2221         }
2222         tcg_op_remove(s, last);
2223     }
2224 }
2225 
2226 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2227 {
2228     TCGContext *s = tcg_ctx;
2229     TCGOp *op;
2230 
2231     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2232         op = tcg_malloc(sizeof(TCGOp));
2233     } else {
2234         op = QTAILQ_FIRST(&s->free_ops);
2235         QTAILQ_REMOVE(&s->free_ops, op, link);
2236     }
2237     memset(op, 0, offsetof(TCGOp, link));
2238     op->opc = opc;
2239     s->nb_ops++;
2240 
2241     return op;
2242 }
2243 
2244 TCGOp *tcg_emit_op(TCGOpcode opc)
2245 {
2246     TCGOp *op = tcg_op_alloc(opc);
2247     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2248     return op;
2249 }
2250 
2251 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2252 {
2253     TCGOp *new_op = tcg_op_alloc(opc);
2254     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2255     return new_op;
2256 }
2257 
2258 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2259 {
2260     TCGOp *new_op = tcg_op_alloc(opc);
2261     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2262     return new_op;
2263 }
2264 
2265 /* Reachable analysis : remove unreachable code.  */
2266 static void reachable_code_pass(TCGContext *s)
2267 {
2268     TCGOp *op, *op_next;
2269     bool dead = false;
2270 
2271     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2272         bool remove = dead;
2273         TCGLabel *label;
2274 
2275         switch (op->opc) {
2276         case INDEX_op_set_label:
2277             label = arg_label(op->args[0]);
2278             if (label->refs == 0) {
2279                 /*
2280                  * While there is an occasional backward branch, virtually
2281                  * all branches generated by the translators are forward.
2282                  * Which means that generally we will have already removed
2283                  * all references to the label that will be, and there is
2284                  * little to be gained by iterating.
2285                  */
2286                 remove = true;
2287             } else {
2288                 /* Once we see a label, insns become live again.  */
2289                 dead = false;
2290                 remove = false;
2291 
2292                 /*
2293                  * Optimization can fold conditional branches to unconditional.
2294                  * If we find a label with one reference which is preceded by
2295                  * an unconditional branch to it, remove both.  This needed to
2296                  * wait until the dead code in between them was removed.
2297                  */
2298                 if (label->refs == 1) {
2299                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2300                     if (op_prev->opc == INDEX_op_br &&
2301                         label == arg_label(op_prev->args[0])) {
2302                         tcg_op_remove(s, op_prev);
2303                         remove = true;
2304                     }
2305                 }
2306             }
2307             break;
2308 
2309         case INDEX_op_br:
2310         case INDEX_op_exit_tb:
2311         case INDEX_op_goto_ptr:
2312             /* Unconditional branches; everything following is dead.  */
2313             dead = true;
2314             break;
2315 
2316         case INDEX_op_call:
2317             /* Notice noreturn helper calls, raising exceptions.  */
2318             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2319                 dead = true;
2320             }
2321             break;
2322 
2323         case INDEX_op_insn_start:
2324             /* Never remove -- we need to keep these for unwind.  */
2325             remove = false;
2326             break;
2327 
2328         default:
2329             break;
2330         }
2331 
2332         if (remove) {
2333             tcg_op_remove(s, op);
2334         }
2335     }
2336 }
2337 
2338 #define TS_DEAD  1
2339 #define TS_MEM   2
2340 
2341 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2342 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2343 
2344 /* For liveness_pass_1, the register preferences for a given temp.  */
2345 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2346 {
2347     return ts->state_ptr;
2348 }
2349 
2350 /* For liveness_pass_1, reset the preferences for a given temp to the
2351  * maximal regset for its type.
2352  */
2353 static inline void la_reset_pref(TCGTemp *ts)
2354 {
2355     *la_temp_pref(ts)
2356         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2357 }
2358 
2359 /* liveness analysis: end of function: all temps are dead, and globals
2360    should be in memory. */
2361 static void la_func_end(TCGContext *s, int ng, int nt)
2362 {
2363     int i;
2364 
2365     for (i = 0; i < ng; ++i) {
2366         s->temps[i].state = TS_DEAD | TS_MEM;
2367         la_reset_pref(&s->temps[i]);
2368     }
2369     for (i = ng; i < nt; ++i) {
2370         s->temps[i].state = TS_DEAD;
2371         la_reset_pref(&s->temps[i]);
2372     }
2373 }
2374 
2375 /* liveness analysis: end of basic block: all temps are dead, globals
2376    and local temps should be in memory. */
2377 static void la_bb_end(TCGContext *s, int ng, int nt)
2378 {
2379     int i;
2380 
2381     for (i = 0; i < nt; ++i) {
2382         TCGTemp *ts = &s->temps[i];
2383         int state;
2384 
2385         switch (ts->kind) {
2386         case TEMP_FIXED:
2387         case TEMP_GLOBAL:
2388         case TEMP_LOCAL:
2389             state = TS_DEAD | TS_MEM;
2390             break;
2391         case TEMP_NORMAL:
2392         case TEMP_EBB:
2393         case TEMP_CONST:
2394             state = TS_DEAD;
2395             break;
2396         default:
2397             g_assert_not_reached();
2398         }
2399         ts->state = state;
2400         la_reset_pref(ts);
2401     }
2402 }
2403 
2404 /* liveness analysis: sync globals back to memory.  */
2405 static void la_global_sync(TCGContext *s, int ng)
2406 {
2407     int i;
2408 
2409     for (i = 0; i < ng; ++i) {
2410         int state = s->temps[i].state;
2411         s->temps[i].state = state | TS_MEM;
2412         if (state == TS_DEAD) {
2413             /* If the global was previously dead, reset prefs.  */
2414             la_reset_pref(&s->temps[i]);
2415         }
2416     }
2417 }
2418 
2419 /*
2420  * liveness analysis: conditional branch: all temps are dead unless
2421  * explicitly live-across-conditional-branch, globals and local temps
2422  * should be synced.
2423  */
2424 static void la_bb_sync(TCGContext *s, int ng, int nt)
2425 {
2426     la_global_sync(s, ng);
2427 
2428     for (int i = ng; i < nt; ++i) {
2429         TCGTemp *ts = &s->temps[i];
2430         int state;
2431 
2432         switch (ts->kind) {
2433         case TEMP_LOCAL:
2434             state = ts->state;
2435             ts->state = state | TS_MEM;
2436             if (state != TS_DEAD) {
2437                 continue;
2438             }
2439             break;
2440         case TEMP_NORMAL:
2441             s->temps[i].state = TS_DEAD;
2442             break;
2443         case TEMP_EBB:
2444         case TEMP_CONST:
2445             continue;
2446         default:
2447             g_assert_not_reached();
2448         }
2449         la_reset_pref(&s->temps[i]);
2450     }
2451 }
2452 
2453 /* liveness analysis: sync globals back to memory and kill.  */
2454 static void la_global_kill(TCGContext *s, int ng)
2455 {
2456     int i;
2457 
2458     for (i = 0; i < ng; i++) {
2459         s->temps[i].state = TS_DEAD | TS_MEM;
2460         la_reset_pref(&s->temps[i]);
2461     }
2462 }
2463 
2464 /* liveness analysis: note live globals crossing calls.  */
2465 static void la_cross_call(TCGContext *s, int nt)
2466 {
2467     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2468     int i;
2469 
2470     for (i = 0; i < nt; i++) {
2471         TCGTemp *ts = &s->temps[i];
2472         if (!(ts->state & TS_DEAD)) {
2473             TCGRegSet *pset = la_temp_pref(ts);
2474             TCGRegSet set = *pset;
2475 
2476             set &= mask;
2477             /* If the combination is not possible, restart.  */
2478             if (set == 0) {
2479                 set = tcg_target_available_regs[ts->type] & mask;
2480             }
2481             *pset = set;
2482         }
2483     }
2484 }
2485 
2486 /* Liveness analysis : update the opc_arg_life array to tell if a
2487    given input arguments is dead. Instructions updating dead
2488    temporaries are removed. */
2489 static void liveness_pass_1(TCGContext *s)
2490 {
2491     int nb_globals = s->nb_globals;
2492     int nb_temps = s->nb_temps;
2493     TCGOp *op, *op_prev;
2494     TCGRegSet *prefs;
2495     int i;
2496 
2497     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2498     for (i = 0; i < nb_temps; ++i) {
2499         s->temps[i].state_ptr = prefs + i;
2500     }
2501 
2502     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2503     la_func_end(s, nb_globals, nb_temps);
2504 
2505     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2506         int nb_iargs, nb_oargs;
2507         TCGOpcode opc_new, opc_new2;
2508         bool have_opc_new2;
2509         TCGLifeData arg_life = 0;
2510         TCGTemp *ts;
2511         TCGOpcode opc = op->opc;
2512         const TCGOpDef *def = &tcg_op_defs[opc];
2513 
2514         switch (opc) {
2515         case INDEX_op_call:
2516             {
2517                 int call_flags;
2518                 int nb_call_regs;
2519 
2520                 nb_oargs = TCGOP_CALLO(op);
2521                 nb_iargs = TCGOP_CALLI(op);
2522                 call_flags = tcg_call_flags(op);
2523 
2524                 /* pure functions can be removed if their result is unused */
2525                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2526                     for (i = 0; i < nb_oargs; i++) {
2527                         ts = arg_temp(op->args[i]);
2528                         if (ts->state != TS_DEAD) {
2529                             goto do_not_remove_call;
2530                         }
2531                     }
2532                     goto do_remove;
2533                 }
2534             do_not_remove_call:
2535 
2536                 /* Output args are dead.  */
2537                 for (i = 0; i < nb_oargs; i++) {
2538                     ts = arg_temp(op->args[i]);
2539                     if (ts->state & TS_DEAD) {
2540                         arg_life |= DEAD_ARG << i;
2541                     }
2542                     if (ts->state & TS_MEM) {
2543                         arg_life |= SYNC_ARG << i;
2544                     }
2545                     ts->state = TS_DEAD;
2546                     la_reset_pref(ts);
2547 
2548                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2549                     op->output_pref[i] = 0;
2550                 }
2551 
2552                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2553                                     TCG_CALL_NO_READ_GLOBALS))) {
2554                     la_global_kill(s, nb_globals);
2555                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2556                     la_global_sync(s, nb_globals);
2557                 }
2558 
2559                 /* Record arguments that die in this helper.  */
2560                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2561                     ts = arg_temp(op->args[i]);
2562                     if (ts && ts->state & TS_DEAD) {
2563                         arg_life |= DEAD_ARG << i;
2564                     }
2565                 }
2566 
2567                 /* For all live registers, remove call-clobbered prefs.  */
2568                 la_cross_call(s, nb_temps);
2569 
2570                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2571 
2572                 /* Input arguments are live for preceding opcodes.  */
2573                 for (i = 0; i < nb_iargs; i++) {
2574                     ts = arg_temp(op->args[i + nb_oargs]);
2575                     if (ts && ts->state & TS_DEAD) {
2576                         /* For those arguments that die, and will be allocated
2577                          * in registers, clear the register set for that arg,
2578                          * to be filled in below.  For args that will be on
2579                          * the stack, reset to any available reg.
2580                          */
2581                         *la_temp_pref(ts)
2582                             = (i < nb_call_regs ? 0 :
2583                                tcg_target_available_regs[ts->type]);
2584                         ts->state &= ~TS_DEAD;
2585                     }
2586                 }
2587 
2588                 /* For each input argument, add its input register to prefs.
2589                    If a temp is used once, this produces a single set bit.  */
2590                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2591                     ts = arg_temp(op->args[i + nb_oargs]);
2592                     if (ts) {
2593                         tcg_regset_set_reg(*la_temp_pref(ts),
2594                                            tcg_target_call_iarg_regs[i]);
2595                     }
2596                 }
2597             }
2598             break;
2599         case INDEX_op_insn_start:
2600             break;
2601         case INDEX_op_discard:
2602             /* mark the temporary as dead */
2603             ts = arg_temp(op->args[0]);
2604             ts->state = TS_DEAD;
2605             la_reset_pref(ts);
2606             break;
2607 
2608         case INDEX_op_add2_i32:
2609             opc_new = INDEX_op_add_i32;
2610             goto do_addsub2;
2611         case INDEX_op_sub2_i32:
2612             opc_new = INDEX_op_sub_i32;
2613             goto do_addsub2;
2614         case INDEX_op_add2_i64:
2615             opc_new = INDEX_op_add_i64;
2616             goto do_addsub2;
2617         case INDEX_op_sub2_i64:
2618             opc_new = INDEX_op_sub_i64;
2619         do_addsub2:
2620             nb_iargs = 4;
2621             nb_oargs = 2;
2622             /* Test if the high part of the operation is dead, but not
2623                the low part.  The result can be optimized to a simple
2624                add or sub.  This happens often for x86_64 guest when the
2625                cpu mode is set to 32 bit.  */
2626             if (arg_temp(op->args[1])->state == TS_DEAD) {
2627                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2628                     goto do_remove;
2629                 }
2630                 /* Replace the opcode and adjust the args in place,
2631                    leaving 3 unused args at the end.  */
2632                 op->opc = opc = opc_new;
2633                 op->args[1] = op->args[2];
2634                 op->args[2] = op->args[4];
2635                 /* Fall through and mark the single-word operation live.  */
2636                 nb_iargs = 2;
2637                 nb_oargs = 1;
2638             }
2639             goto do_not_remove;
2640 
2641         case INDEX_op_mulu2_i32:
2642             opc_new = INDEX_op_mul_i32;
2643             opc_new2 = INDEX_op_muluh_i32;
2644             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2645             goto do_mul2;
2646         case INDEX_op_muls2_i32:
2647             opc_new = INDEX_op_mul_i32;
2648             opc_new2 = INDEX_op_mulsh_i32;
2649             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2650             goto do_mul2;
2651         case INDEX_op_mulu2_i64:
2652             opc_new = INDEX_op_mul_i64;
2653             opc_new2 = INDEX_op_muluh_i64;
2654             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2655             goto do_mul2;
2656         case INDEX_op_muls2_i64:
2657             opc_new = INDEX_op_mul_i64;
2658             opc_new2 = INDEX_op_mulsh_i64;
2659             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2660             goto do_mul2;
2661         do_mul2:
2662             nb_iargs = 2;
2663             nb_oargs = 2;
2664             if (arg_temp(op->args[1])->state == TS_DEAD) {
2665                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2666                     /* Both parts of the operation are dead.  */
2667                     goto do_remove;
2668                 }
2669                 /* The high part of the operation is dead; generate the low. */
2670                 op->opc = opc = opc_new;
2671                 op->args[1] = op->args[2];
2672                 op->args[2] = op->args[3];
2673             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2674                 /* The low part of the operation is dead; generate the high. */
2675                 op->opc = opc = opc_new2;
2676                 op->args[0] = op->args[1];
2677                 op->args[1] = op->args[2];
2678                 op->args[2] = op->args[3];
2679             } else {
2680                 goto do_not_remove;
2681             }
2682             /* Mark the single-word operation live.  */
2683             nb_oargs = 1;
2684             goto do_not_remove;
2685 
2686         default:
2687             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2688             nb_iargs = def->nb_iargs;
2689             nb_oargs = def->nb_oargs;
2690 
2691             /* Test if the operation can be removed because all
2692                its outputs are dead. We assume that nb_oargs == 0
2693                implies side effects */
2694             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2695                 for (i = 0; i < nb_oargs; i++) {
2696                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2697                         goto do_not_remove;
2698                     }
2699                 }
2700                 goto do_remove;
2701             }
2702             goto do_not_remove;
2703 
2704         do_remove:
2705             tcg_op_remove(s, op);
2706             break;
2707 
2708         do_not_remove:
2709             for (i = 0; i < nb_oargs; i++) {
2710                 ts = arg_temp(op->args[i]);
2711 
2712                 /* Remember the preference of the uses that followed.  */
2713                 op->output_pref[i] = *la_temp_pref(ts);
2714 
2715                 /* Output args are dead.  */
2716                 if (ts->state & TS_DEAD) {
2717                     arg_life |= DEAD_ARG << i;
2718                 }
2719                 if (ts->state & TS_MEM) {
2720                     arg_life |= SYNC_ARG << i;
2721                 }
2722                 ts->state = TS_DEAD;
2723                 la_reset_pref(ts);
2724             }
2725 
2726             /* If end of basic block, update.  */
2727             if (def->flags & TCG_OPF_BB_EXIT) {
2728                 la_func_end(s, nb_globals, nb_temps);
2729             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2730                 la_bb_sync(s, nb_globals, nb_temps);
2731             } else if (def->flags & TCG_OPF_BB_END) {
2732                 la_bb_end(s, nb_globals, nb_temps);
2733             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2734                 la_global_sync(s, nb_globals);
2735                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2736                     la_cross_call(s, nb_temps);
2737                 }
2738             }
2739 
2740             /* Record arguments that die in this opcode.  */
2741             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2742                 ts = arg_temp(op->args[i]);
2743                 if (ts->state & TS_DEAD) {
2744                     arg_life |= DEAD_ARG << i;
2745                 }
2746             }
2747 
2748             /* Input arguments are live for preceding opcodes.  */
2749             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2750                 ts = arg_temp(op->args[i]);
2751                 if (ts->state & TS_DEAD) {
2752                     /* For operands that were dead, initially allow
2753                        all regs for the type.  */
2754                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2755                     ts->state &= ~TS_DEAD;
2756                 }
2757             }
2758 
2759             /* Incorporate constraints for this operand.  */
2760             switch (opc) {
2761             case INDEX_op_mov_i32:
2762             case INDEX_op_mov_i64:
2763                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2764                    have proper constraints.  That said, special case
2765                    moves to propagate preferences backward.  */
2766                 if (IS_DEAD_ARG(1)) {
2767                     *la_temp_pref(arg_temp(op->args[0]))
2768                         = *la_temp_pref(arg_temp(op->args[1]));
2769                 }
2770                 break;
2771 
2772             default:
2773                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2774                     const TCGArgConstraint *ct = &def->args_ct[i];
2775                     TCGRegSet set, *pset;
2776 
2777                     ts = arg_temp(op->args[i]);
2778                     pset = la_temp_pref(ts);
2779                     set = *pset;
2780 
2781                     set &= ct->regs;
2782                     if (ct->ialias) {
2783                         set &= op->output_pref[ct->alias_index];
2784                     }
2785                     /* If the combination is not possible, restart.  */
2786                     if (set == 0) {
2787                         set = ct->regs;
2788                     }
2789                     *pset = set;
2790                 }
2791                 break;
2792             }
2793             break;
2794         }
2795         op->life = arg_life;
2796     }
2797 }
2798 
2799 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2800 static bool liveness_pass_2(TCGContext *s)
2801 {
2802     int nb_globals = s->nb_globals;
2803     int nb_temps, i;
2804     bool changes = false;
2805     TCGOp *op, *op_next;
2806 
2807     /* Create a temporary for each indirect global.  */
2808     for (i = 0; i < nb_globals; ++i) {
2809         TCGTemp *its = &s->temps[i];
2810         if (its->indirect_reg) {
2811             TCGTemp *dts = tcg_temp_alloc(s);
2812             dts->type = its->type;
2813             dts->base_type = its->base_type;
2814             dts->kind = TEMP_EBB;
2815             its->state_ptr = dts;
2816         } else {
2817             its->state_ptr = NULL;
2818         }
2819         /* All globals begin dead.  */
2820         its->state = TS_DEAD;
2821     }
2822     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2823         TCGTemp *its = &s->temps[i];
2824         its->state_ptr = NULL;
2825         its->state = TS_DEAD;
2826     }
2827 
2828     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2829         TCGOpcode opc = op->opc;
2830         const TCGOpDef *def = &tcg_op_defs[opc];
2831         TCGLifeData arg_life = op->life;
2832         int nb_iargs, nb_oargs, call_flags;
2833         TCGTemp *arg_ts, *dir_ts;
2834 
2835         if (opc == INDEX_op_call) {
2836             nb_oargs = TCGOP_CALLO(op);
2837             nb_iargs = TCGOP_CALLI(op);
2838             call_flags = tcg_call_flags(op);
2839         } else {
2840             nb_iargs = def->nb_iargs;
2841             nb_oargs = def->nb_oargs;
2842 
2843             /* Set flags similar to how calls require.  */
2844             if (def->flags & TCG_OPF_COND_BRANCH) {
2845                 /* Like reading globals: sync_globals */
2846                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2847             } else if (def->flags & TCG_OPF_BB_END) {
2848                 /* Like writing globals: save_globals */
2849                 call_flags = 0;
2850             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2851                 /* Like reading globals: sync_globals */
2852                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2853             } else {
2854                 /* No effect on globals.  */
2855                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2856                               TCG_CALL_NO_WRITE_GLOBALS);
2857             }
2858         }
2859 
2860         /* Make sure that input arguments are available.  */
2861         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2862             arg_ts = arg_temp(op->args[i]);
2863             if (arg_ts) {
2864                 dir_ts = arg_ts->state_ptr;
2865                 if (dir_ts && arg_ts->state == TS_DEAD) {
2866                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2867                                       ? INDEX_op_ld_i32
2868                                       : INDEX_op_ld_i64);
2869                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2870 
2871                     lop->args[0] = temp_arg(dir_ts);
2872                     lop->args[1] = temp_arg(arg_ts->mem_base);
2873                     lop->args[2] = arg_ts->mem_offset;
2874 
2875                     /* Loaded, but synced with memory.  */
2876                     arg_ts->state = TS_MEM;
2877                 }
2878             }
2879         }
2880 
2881         /* Perform input replacement, and mark inputs that became dead.
2882            No action is required except keeping temp_state up to date
2883            so that we reload when needed.  */
2884         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2885             arg_ts = arg_temp(op->args[i]);
2886             if (arg_ts) {
2887                 dir_ts = arg_ts->state_ptr;
2888                 if (dir_ts) {
2889                     op->args[i] = temp_arg(dir_ts);
2890                     changes = true;
2891                     if (IS_DEAD_ARG(i)) {
2892                         arg_ts->state = TS_DEAD;
2893                     }
2894                 }
2895             }
2896         }
2897 
2898         /* Liveness analysis should ensure that the following are
2899            all correct, for call sites and basic block end points.  */
2900         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2901             /* Nothing to do */
2902         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2903             for (i = 0; i < nb_globals; ++i) {
2904                 /* Liveness should see that globals are synced back,
2905                    that is, either TS_DEAD or TS_MEM.  */
2906                 arg_ts = &s->temps[i];
2907                 tcg_debug_assert(arg_ts->state_ptr == 0
2908                                  || arg_ts->state != 0);
2909             }
2910         } else {
2911             for (i = 0; i < nb_globals; ++i) {
2912                 /* Liveness should see that globals are saved back,
2913                    that is, TS_DEAD, waiting to be reloaded.  */
2914                 arg_ts = &s->temps[i];
2915                 tcg_debug_assert(arg_ts->state_ptr == 0
2916                                  || arg_ts->state == TS_DEAD);
2917             }
2918         }
2919 
2920         /* Outputs become available.  */
2921         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2922             arg_ts = arg_temp(op->args[0]);
2923             dir_ts = arg_ts->state_ptr;
2924             if (dir_ts) {
2925                 op->args[0] = temp_arg(dir_ts);
2926                 changes = true;
2927 
2928                 /* The output is now live and modified.  */
2929                 arg_ts->state = 0;
2930 
2931                 if (NEED_SYNC_ARG(0)) {
2932                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2933                                       ? INDEX_op_st_i32
2934                                       : INDEX_op_st_i64);
2935                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2936                     TCGTemp *out_ts = dir_ts;
2937 
2938                     if (IS_DEAD_ARG(0)) {
2939                         out_ts = arg_temp(op->args[1]);
2940                         arg_ts->state = TS_DEAD;
2941                         tcg_op_remove(s, op);
2942                     } else {
2943                         arg_ts->state = TS_MEM;
2944                     }
2945 
2946                     sop->args[0] = temp_arg(out_ts);
2947                     sop->args[1] = temp_arg(arg_ts->mem_base);
2948                     sop->args[2] = arg_ts->mem_offset;
2949                 } else {
2950                     tcg_debug_assert(!IS_DEAD_ARG(0));
2951                 }
2952             }
2953         } else {
2954             for (i = 0; i < nb_oargs; i++) {
2955                 arg_ts = arg_temp(op->args[i]);
2956                 dir_ts = arg_ts->state_ptr;
2957                 if (!dir_ts) {
2958                     continue;
2959                 }
2960                 op->args[i] = temp_arg(dir_ts);
2961                 changes = true;
2962 
2963                 /* The output is now live and modified.  */
2964                 arg_ts->state = 0;
2965 
2966                 /* Sync outputs upon their last write.  */
2967                 if (NEED_SYNC_ARG(i)) {
2968                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2969                                       ? INDEX_op_st_i32
2970                                       : INDEX_op_st_i64);
2971                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2972 
2973                     sop->args[0] = temp_arg(dir_ts);
2974                     sop->args[1] = temp_arg(arg_ts->mem_base);
2975                     sop->args[2] = arg_ts->mem_offset;
2976 
2977                     arg_ts->state = TS_MEM;
2978                 }
2979                 /* Drop outputs that are dead.  */
2980                 if (IS_DEAD_ARG(i)) {
2981                     arg_ts->state = TS_DEAD;
2982                 }
2983             }
2984         }
2985     }
2986 
2987     return changes;
2988 }
2989 
2990 #ifdef CONFIG_DEBUG_TCG
2991 static void dump_regs(TCGContext *s)
2992 {
2993     TCGTemp *ts;
2994     int i;
2995     char buf[64];
2996 
2997     for(i = 0; i < s->nb_temps; i++) {
2998         ts = &s->temps[i];
2999         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3000         switch(ts->val_type) {
3001         case TEMP_VAL_REG:
3002             printf("%s", tcg_target_reg_names[ts->reg]);
3003             break;
3004         case TEMP_VAL_MEM:
3005             printf("%d(%s)", (int)ts->mem_offset,
3006                    tcg_target_reg_names[ts->mem_base->reg]);
3007             break;
3008         case TEMP_VAL_CONST:
3009             printf("$0x%" PRIx64, ts->val);
3010             break;
3011         case TEMP_VAL_DEAD:
3012             printf("D");
3013             break;
3014         default:
3015             printf("???");
3016             break;
3017         }
3018         printf("\n");
3019     }
3020 
3021     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3022         if (s->reg_to_temp[i] != NULL) {
3023             printf("%s: %s\n",
3024                    tcg_target_reg_names[i],
3025                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3026         }
3027     }
3028 }
3029 
3030 static void check_regs(TCGContext *s)
3031 {
3032     int reg;
3033     int k;
3034     TCGTemp *ts;
3035     char buf[64];
3036 
3037     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3038         ts = s->reg_to_temp[reg];
3039         if (ts != NULL) {
3040             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3041                 printf("Inconsistency for register %s:\n",
3042                        tcg_target_reg_names[reg]);
3043                 goto fail;
3044             }
3045         }
3046     }
3047     for (k = 0; k < s->nb_temps; k++) {
3048         ts = &s->temps[k];
3049         if (ts->val_type == TEMP_VAL_REG
3050             && ts->kind != TEMP_FIXED
3051             && s->reg_to_temp[ts->reg] != ts) {
3052             printf("Inconsistency for temp %s:\n",
3053                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3054         fail:
3055             printf("reg state:\n");
3056             dump_regs(s);
3057             tcg_abort();
3058         }
3059     }
3060 }
3061 #endif
3062 
3063 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3064 {
3065     intptr_t off, size, align;
3066 
3067     switch (ts->type) {
3068     case TCG_TYPE_I32:
3069         size = align = 4;
3070         break;
3071     case TCG_TYPE_I64:
3072     case TCG_TYPE_V64:
3073         size = align = 8;
3074         break;
3075     case TCG_TYPE_V128:
3076         size = align = 16;
3077         break;
3078     case TCG_TYPE_V256:
3079         /* Note that we do not require aligned storage for V256. */
3080         size = 32, align = 16;
3081         break;
3082     default:
3083         g_assert_not_reached();
3084     }
3085 
3086     /*
3087      * Assume the stack is sufficiently aligned.
3088      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3089      * and do not require 16 byte vector alignment.  This seems slightly
3090      * easier than fully parameterizing the above switch statement.
3091      */
3092     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3093     off = ROUND_UP(s->current_frame_offset, align);
3094 
3095     /* If we've exhausted the stack frame, restart with a smaller TB. */
3096     if (off + size > s->frame_end) {
3097         tcg_raise_tb_overflow(s);
3098     }
3099     s->current_frame_offset = off + size;
3100 
3101     ts->mem_offset = off;
3102 #if defined(__sparc__)
3103     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3104 #endif
3105     ts->mem_base = s->frame_temp;
3106     ts->mem_allocated = 1;
3107 }
3108 
3109 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3110 
3111 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3112    mark it free; otherwise mark it dead.  */
3113 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3114 {
3115     TCGTempVal new_type;
3116 
3117     switch (ts->kind) {
3118     case TEMP_FIXED:
3119         return;
3120     case TEMP_GLOBAL:
3121     case TEMP_LOCAL:
3122         new_type = TEMP_VAL_MEM;
3123         break;
3124     case TEMP_NORMAL:
3125     case TEMP_EBB:
3126         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3127         break;
3128     case TEMP_CONST:
3129         new_type = TEMP_VAL_CONST;
3130         break;
3131     default:
3132         g_assert_not_reached();
3133     }
3134     if (ts->val_type == TEMP_VAL_REG) {
3135         s->reg_to_temp[ts->reg] = NULL;
3136     }
3137     ts->val_type = new_type;
3138 }
3139 
3140 /* Mark a temporary as dead.  */
3141 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3142 {
3143     temp_free_or_dead(s, ts, 1);
3144 }
3145 
3146 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3147    registers needs to be allocated to store a constant.  If 'free_or_dead'
3148    is non-zero, subsequently release the temporary; if it is positive, the
3149    temp is dead; if it is negative, the temp is free.  */
3150 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3151                       TCGRegSet preferred_regs, int free_or_dead)
3152 {
3153     if (!temp_readonly(ts) && !ts->mem_coherent) {
3154         if (!ts->mem_allocated) {
3155             temp_allocate_frame(s, ts);
3156         }
3157         switch (ts->val_type) {
3158         case TEMP_VAL_CONST:
3159             /* If we're going to free the temp immediately, then we won't
3160                require it later in a register, so attempt to store the
3161                constant to memory directly.  */
3162             if (free_or_dead
3163                 && tcg_out_sti(s, ts->type, ts->val,
3164                                ts->mem_base->reg, ts->mem_offset)) {
3165                 break;
3166             }
3167             temp_load(s, ts, tcg_target_available_regs[ts->type],
3168                       allocated_regs, preferred_regs);
3169             /* fallthrough */
3170 
3171         case TEMP_VAL_REG:
3172             tcg_out_st(s, ts->type, ts->reg,
3173                        ts->mem_base->reg, ts->mem_offset);
3174             break;
3175 
3176         case TEMP_VAL_MEM:
3177             break;
3178 
3179         case TEMP_VAL_DEAD:
3180         default:
3181             tcg_abort();
3182         }
3183         ts->mem_coherent = 1;
3184     }
3185     if (free_or_dead) {
3186         temp_free_or_dead(s, ts, free_or_dead);
3187     }
3188 }
3189 
3190 /* free register 'reg' by spilling the corresponding temporary if necessary */
3191 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3192 {
3193     TCGTemp *ts = s->reg_to_temp[reg];
3194     if (ts != NULL) {
3195         temp_sync(s, ts, allocated_regs, 0, -1);
3196     }
3197 }
3198 
3199 /**
3200  * tcg_reg_alloc:
3201  * @required_regs: Set of registers in which we must allocate.
3202  * @allocated_regs: Set of registers which must be avoided.
3203  * @preferred_regs: Set of registers we should prefer.
3204  * @rev: True if we search the registers in "indirect" order.
3205  *
3206  * The allocated register must be in @required_regs & ~@allocated_regs,
3207  * but if we can put it in @preferred_regs we may save a move later.
3208  */
3209 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3210                             TCGRegSet allocated_regs,
3211                             TCGRegSet preferred_regs, bool rev)
3212 {
3213     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3214     TCGRegSet reg_ct[2];
3215     const int *order;
3216 
3217     reg_ct[1] = required_regs & ~allocated_regs;
3218     tcg_debug_assert(reg_ct[1] != 0);
3219     reg_ct[0] = reg_ct[1] & preferred_regs;
3220 
3221     /* Skip the preferred_regs option if it cannot be satisfied,
3222        or if the preference made no difference.  */
3223     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3224 
3225     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3226 
3227     /* Try free registers, preferences first.  */
3228     for (j = f; j < 2; j++) {
3229         TCGRegSet set = reg_ct[j];
3230 
3231         if (tcg_regset_single(set)) {
3232             /* One register in the set.  */
3233             TCGReg reg = tcg_regset_first(set);
3234             if (s->reg_to_temp[reg] == NULL) {
3235                 return reg;
3236             }
3237         } else {
3238             for (i = 0; i < n; i++) {
3239                 TCGReg reg = order[i];
3240                 if (s->reg_to_temp[reg] == NULL &&
3241                     tcg_regset_test_reg(set, reg)) {
3242                     return reg;
3243                 }
3244             }
3245         }
3246     }
3247 
3248     /* We must spill something.  */
3249     for (j = f; j < 2; j++) {
3250         TCGRegSet set = reg_ct[j];
3251 
3252         if (tcg_regset_single(set)) {
3253             /* One register in the set.  */
3254             TCGReg reg = tcg_regset_first(set);
3255             tcg_reg_free(s, reg, allocated_regs);
3256             return reg;
3257         } else {
3258             for (i = 0; i < n; i++) {
3259                 TCGReg reg = order[i];
3260                 if (tcg_regset_test_reg(set, reg)) {
3261                     tcg_reg_free(s, reg, allocated_regs);
3262                     return reg;
3263                 }
3264             }
3265         }
3266     }
3267 
3268     tcg_abort();
3269 }
3270 
3271 /* Make sure the temporary is in a register.  If needed, allocate the register
3272    from DESIRED while avoiding ALLOCATED.  */
3273 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3274                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3275 {
3276     TCGReg reg;
3277 
3278     switch (ts->val_type) {
3279     case TEMP_VAL_REG:
3280         return;
3281     case TEMP_VAL_CONST:
3282         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3283                             preferred_regs, ts->indirect_base);
3284         if (ts->type <= TCG_TYPE_I64) {
3285             tcg_out_movi(s, ts->type, reg, ts->val);
3286         } else {
3287             uint64_t val = ts->val;
3288             MemOp vece = MO_64;
3289 
3290             /*
3291              * Find the minimal vector element that matches the constant.
3292              * The targets will, in general, have to do this search anyway,
3293              * do this generically.
3294              */
3295             if (val == dup_const(MO_8, val)) {
3296                 vece = MO_8;
3297             } else if (val == dup_const(MO_16, val)) {
3298                 vece = MO_16;
3299             } else if (val == dup_const(MO_32, val)) {
3300                 vece = MO_32;
3301             }
3302 
3303             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3304         }
3305         ts->mem_coherent = 0;
3306         break;
3307     case TEMP_VAL_MEM:
3308         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3309                             preferred_regs, ts->indirect_base);
3310         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3311         ts->mem_coherent = 1;
3312         break;
3313     case TEMP_VAL_DEAD:
3314     default:
3315         tcg_abort();
3316     }
3317     ts->reg = reg;
3318     ts->val_type = TEMP_VAL_REG;
3319     s->reg_to_temp[reg] = ts;
3320 }
3321 
3322 /* Save a temporary to memory. 'allocated_regs' is used in case a
3323    temporary registers needs to be allocated to store a constant.  */
3324 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3325 {
3326     /* The liveness analysis already ensures that globals are back
3327        in memory. Keep an tcg_debug_assert for safety. */
3328     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3329 }
3330 
3331 /* save globals to their canonical location and assume they can be
3332    modified be the following code. 'allocated_regs' is used in case a
3333    temporary registers needs to be allocated to store a constant. */
3334 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3335 {
3336     int i, n;
3337 
3338     for (i = 0, n = s->nb_globals; i < n; i++) {
3339         temp_save(s, &s->temps[i], allocated_regs);
3340     }
3341 }
3342 
3343 /* sync globals to their canonical location and assume they can be
3344    read by the following code. 'allocated_regs' is used in case a
3345    temporary registers needs to be allocated to store a constant. */
3346 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3347 {
3348     int i, n;
3349 
3350     for (i = 0, n = s->nb_globals; i < n; i++) {
3351         TCGTemp *ts = &s->temps[i];
3352         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3353                          || ts->kind == TEMP_FIXED
3354                          || ts->mem_coherent);
3355     }
3356 }
3357 
3358 /* at the end of a basic block, we assume all temporaries are dead and
3359    all globals are stored at their canonical location. */
3360 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3361 {
3362     int i;
3363 
3364     for (i = s->nb_globals; i < s->nb_temps; i++) {
3365         TCGTemp *ts = &s->temps[i];
3366 
3367         switch (ts->kind) {
3368         case TEMP_LOCAL:
3369             temp_save(s, ts, allocated_regs);
3370             break;
3371         case TEMP_NORMAL:
3372         case TEMP_EBB:
3373             /* The liveness analysis already ensures that temps are dead.
3374                Keep an tcg_debug_assert for safety. */
3375             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3376             break;
3377         case TEMP_CONST:
3378             /* Similarly, we should have freed any allocated register. */
3379             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3380             break;
3381         default:
3382             g_assert_not_reached();
3383         }
3384     }
3385 
3386     save_globals(s, allocated_regs);
3387 }
3388 
3389 /*
3390  * At a conditional branch, we assume all temporaries are dead unless
3391  * explicitly live-across-conditional-branch; all globals and local
3392  * temps are synced to their location.
3393  */
3394 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3395 {
3396     sync_globals(s, allocated_regs);
3397 
3398     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3399         TCGTemp *ts = &s->temps[i];
3400         /*
3401          * The liveness analysis already ensures that temps are dead.
3402          * Keep tcg_debug_asserts for safety.
3403          */
3404         switch (ts->kind) {
3405         case TEMP_LOCAL:
3406             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3407             break;
3408         case TEMP_NORMAL:
3409             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3410             break;
3411         case TEMP_EBB:
3412         case TEMP_CONST:
3413             break;
3414         default:
3415             g_assert_not_reached();
3416         }
3417     }
3418 }
3419 
3420 /*
3421  * Specialized code generation for INDEX_op_mov_* with a constant.
3422  */
3423 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3424                                   tcg_target_ulong val, TCGLifeData arg_life,
3425                                   TCGRegSet preferred_regs)
3426 {
3427     /* ENV should not be modified.  */
3428     tcg_debug_assert(!temp_readonly(ots));
3429 
3430     /* The movi is not explicitly generated here.  */
3431     if (ots->val_type == TEMP_VAL_REG) {
3432         s->reg_to_temp[ots->reg] = NULL;
3433     }
3434     ots->val_type = TEMP_VAL_CONST;
3435     ots->val = val;
3436     ots->mem_coherent = 0;
3437     if (NEED_SYNC_ARG(0)) {
3438         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3439     } else if (IS_DEAD_ARG(0)) {
3440         temp_dead(s, ots);
3441     }
3442 }
3443 
3444 /*
3445  * Specialized code generation for INDEX_op_mov_*.
3446  */
3447 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3448 {
3449     const TCGLifeData arg_life = op->life;
3450     TCGRegSet allocated_regs, preferred_regs;
3451     TCGTemp *ts, *ots;
3452     TCGType otype, itype;
3453 
3454     allocated_regs = s->reserved_regs;
3455     preferred_regs = op->output_pref[0];
3456     ots = arg_temp(op->args[0]);
3457     ts = arg_temp(op->args[1]);
3458 
3459     /* ENV should not be modified.  */
3460     tcg_debug_assert(!temp_readonly(ots));
3461 
3462     /* Note that otype != itype for no-op truncation.  */
3463     otype = ots->type;
3464     itype = ts->type;
3465 
3466     if (ts->val_type == TEMP_VAL_CONST) {
3467         /* propagate constant or generate sti */
3468         tcg_target_ulong val = ts->val;
3469         if (IS_DEAD_ARG(1)) {
3470             temp_dead(s, ts);
3471         }
3472         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3473         return;
3474     }
3475 
3476     /* If the source value is in memory we're going to be forced
3477        to have it in a register in order to perform the copy.  Copy
3478        the SOURCE value into its own register first, that way we
3479        don't have to reload SOURCE the next time it is used. */
3480     if (ts->val_type == TEMP_VAL_MEM) {
3481         temp_load(s, ts, tcg_target_available_regs[itype],
3482                   allocated_regs, preferred_regs);
3483     }
3484 
3485     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3486     if (IS_DEAD_ARG(0)) {
3487         /* mov to a non-saved dead register makes no sense (even with
3488            liveness analysis disabled). */
3489         tcg_debug_assert(NEED_SYNC_ARG(0));
3490         if (!ots->mem_allocated) {
3491             temp_allocate_frame(s, ots);
3492         }
3493         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3494         if (IS_DEAD_ARG(1)) {
3495             temp_dead(s, ts);
3496         }
3497         temp_dead(s, ots);
3498     } else {
3499         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3500             /* the mov can be suppressed */
3501             if (ots->val_type == TEMP_VAL_REG) {
3502                 s->reg_to_temp[ots->reg] = NULL;
3503             }
3504             ots->reg = ts->reg;
3505             temp_dead(s, ts);
3506         } else {
3507             if (ots->val_type != TEMP_VAL_REG) {
3508                 /* When allocating a new register, make sure to not spill the
3509                    input one. */
3510                 tcg_regset_set_reg(allocated_regs, ts->reg);
3511                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3512                                          allocated_regs, preferred_regs,
3513                                          ots->indirect_base);
3514             }
3515             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3516                 /*
3517                  * Cross register class move not supported.
3518                  * Store the source register into the destination slot
3519                  * and leave the destination temp as TEMP_VAL_MEM.
3520                  */
3521                 assert(!temp_readonly(ots));
3522                 if (!ts->mem_allocated) {
3523                     temp_allocate_frame(s, ots);
3524                 }
3525                 tcg_out_st(s, ts->type, ts->reg,
3526                            ots->mem_base->reg, ots->mem_offset);
3527                 ots->mem_coherent = 1;
3528                 temp_free_or_dead(s, ots, -1);
3529                 return;
3530             }
3531         }
3532         ots->val_type = TEMP_VAL_REG;
3533         ots->mem_coherent = 0;
3534         s->reg_to_temp[ots->reg] = ots;
3535         if (NEED_SYNC_ARG(0)) {
3536             temp_sync(s, ots, allocated_regs, 0, 0);
3537         }
3538     }
3539 }
3540 
3541 /*
3542  * Specialized code generation for INDEX_op_dup_vec.
3543  */
3544 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3545 {
3546     const TCGLifeData arg_life = op->life;
3547     TCGRegSet dup_out_regs, dup_in_regs;
3548     TCGTemp *its, *ots;
3549     TCGType itype, vtype;
3550     intptr_t endian_fixup;
3551     unsigned vece;
3552     bool ok;
3553 
3554     ots = arg_temp(op->args[0]);
3555     its = arg_temp(op->args[1]);
3556 
3557     /* ENV should not be modified.  */
3558     tcg_debug_assert(!temp_readonly(ots));
3559 
3560     itype = its->type;
3561     vece = TCGOP_VECE(op);
3562     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3563 
3564     if (its->val_type == TEMP_VAL_CONST) {
3565         /* Propagate constant via movi -> dupi.  */
3566         tcg_target_ulong val = its->val;
3567         if (IS_DEAD_ARG(1)) {
3568             temp_dead(s, its);
3569         }
3570         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3571         return;
3572     }
3573 
3574     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3575     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3576 
3577     /* Allocate the output register now.  */
3578     if (ots->val_type != TEMP_VAL_REG) {
3579         TCGRegSet allocated_regs = s->reserved_regs;
3580 
3581         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3582             /* Make sure to not spill the input register. */
3583             tcg_regset_set_reg(allocated_regs, its->reg);
3584         }
3585         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3586                                  op->output_pref[0], ots->indirect_base);
3587         ots->val_type = TEMP_VAL_REG;
3588         ots->mem_coherent = 0;
3589         s->reg_to_temp[ots->reg] = ots;
3590     }
3591 
3592     switch (its->val_type) {
3593     case TEMP_VAL_REG:
3594         /*
3595          * The dup constriaints must be broad, covering all possible VECE.
3596          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3597          * to fail, indicating that extra moves are required for that case.
3598          */
3599         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3600             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3601                 goto done;
3602             }
3603             /* Try again from memory or a vector input register.  */
3604         }
3605         if (!its->mem_coherent) {
3606             /*
3607              * The input register is not synced, and so an extra store
3608              * would be required to use memory.  Attempt an integer-vector
3609              * register move first.  We do not have a TCGRegSet for this.
3610              */
3611             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3612                 break;
3613             }
3614             /* Sync the temp back to its slot and load from there.  */
3615             temp_sync(s, its, s->reserved_regs, 0, 0);
3616         }
3617         /* fall through */
3618 
3619     case TEMP_VAL_MEM:
3620 #if HOST_BIG_ENDIAN
3621         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3622         endian_fixup -= 1 << vece;
3623 #else
3624         endian_fixup = 0;
3625 #endif
3626         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3627                              its->mem_offset + endian_fixup)) {
3628             goto done;
3629         }
3630         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3631         break;
3632 
3633     default:
3634         g_assert_not_reached();
3635     }
3636 
3637     /* We now have a vector input register, so dup must succeed. */
3638     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3639     tcg_debug_assert(ok);
3640 
3641  done:
3642     if (IS_DEAD_ARG(1)) {
3643         temp_dead(s, its);
3644     }
3645     if (NEED_SYNC_ARG(0)) {
3646         temp_sync(s, ots, s->reserved_regs, 0, 0);
3647     }
3648     if (IS_DEAD_ARG(0)) {
3649         temp_dead(s, ots);
3650     }
3651 }
3652 
3653 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3654 {
3655     const TCGLifeData arg_life = op->life;
3656     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3657     TCGRegSet i_allocated_regs;
3658     TCGRegSet o_allocated_regs;
3659     int i, k, nb_iargs, nb_oargs;
3660     TCGReg reg;
3661     TCGArg arg;
3662     const TCGArgConstraint *arg_ct;
3663     TCGTemp *ts;
3664     TCGArg new_args[TCG_MAX_OP_ARGS];
3665     int const_args[TCG_MAX_OP_ARGS];
3666 
3667     nb_oargs = def->nb_oargs;
3668     nb_iargs = def->nb_iargs;
3669 
3670     /* copy constants */
3671     memcpy(new_args + nb_oargs + nb_iargs,
3672            op->args + nb_oargs + nb_iargs,
3673            sizeof(TCGArg) * def->nb_cargs);
3674 
3675     i_allocated_regs = s->reserved_regs;
3676     o_allocated_regs = s->reserved_regs;
3677 
3678     /* satisfy input constraints */
3679     for (k = 0; k < nb_iargs; k++) {
3680         TCGRegSet i_preferred_regs, o_preferred_regs;
3681 
3682         i = def->args_ct[nb_oargs + k].sort_index;
3683         arg = op->args[i];
3684         arg_ct = &def->args_ct[i];
3685         ts = arg_temp(arg);
3686 
3687         if (ts->val_type == TEMP_VAL_CONST
3688             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3689             /* constant is OK for instruction */
3690             const_args[i] = 1;
3691             new_args[i] = ts->val;
3692             continue;
3693         }
3694 
3695         i_preferred_regs = o_preferred_regs = 0;
3696         if (arg_ct->ialias) {
3697             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3698 
3699             /*
3700              * If the input is readonly, then it cannot also be an
3701              * output and aliased to itself.  If the input is not
3702              * dead after the instruction, we must allocate a new
3703              * register and move it.
3704              */
3705             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3706                 goto allocate_in_reg;
3707             }
3708 
3709             /*
3710              * Check if the current register has already been allocated
3711              * for another input aliased to an output.
3712              */
3713             if (ts->val_type == TEMP_VAL_REG) {
3714                 reg = ts->reg;
3715                 for (int k2 = 0; k2 < k; k2++) {
3716                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3717                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3718                         goto allocate_in_reg;
3719                     }
3720                 }
3721             }
3722             i_preferred_regs = o_preferred_regs;
3723         }
3724 
3725         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3726         reg = ts->reg;
3727 
3728         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3729  allocate_in_reg:
3730             /*
3731              * Allocate a new register matching the constraint
3732              * and move the temporary register into it.
3733              */
3734             temp_load(s, ts, tcg_target_available_regs[ts->type],
3735                       i_allocated_regs, 0);
3736             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3737                                 o_preferred_regs, ts->indirect_base);
3738             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3739                 /*
3740                  * Cross register class move not supported.  Sync the
3741                  * temp back to its slot and load from there.
3742                  */
3743                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3744                 tcg_out_ld(s, ts->type, reg,
3745                            ts->mem_base->reg, ts->mem_offset);
3746             }
3747         }
3748         new_args[i] = reg;
3749         const_args[i] = 0;
3750         tcg_regset_set_reg(i_allocated_regs, reg);
3751     }
3752 
3753     /* mark dead temporaries and free the associated registers */
3754     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3755         if (IS_DEAD_ARG(i)) {
3756             temp_dead(s, arg_temp(op->args[i]));
3757         }
3758     }
3759 
3760     if (def->flags & TCG_OPF_COND_BRANCH) {
3761         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3762     } else if (def->flags & TCG_OPF_BB_END) {
3763         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3764     } else {
3765         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3766             /* XXX: permit generic clobber register list ? */
3767             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3768                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3769                     tcg_reg_free(s, i, i_allocated_regs);
3770                 }
3771             }
3772         }
3773         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3774             /* sync globals if the op has side effects and might trigger
3775                an exception. */
3776             sync_globals(s, i_allocated_regs);
3777         }
3778 
3779         /* satisfy the output constraints */
3780         for(k = 0; k < nb_oargs; k++) {
3781             i = def->args_ct[k].sort_index;
3782             arg = op->args[i];
3783             arg_ct = &def->args_ct[i];
3784             ts = arg_temp(arg);
3785 
3786             /* ENV should not be modified.  */
3787             tcg_debug_assert(!temp_readonly(ts));
3788 
3789             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3790                 reg = new_args[arg_ct->alias_index];
3791             } else if (arg_ct->newreg) {
3792                 reg = tcg_reg_alloc(s, arg_ct->regs,
3793                                     i_allocated_regs | o_allocated_regs,
3794                                     op->output_pref[k], ts->indirect_base);
3795             } else {
3796                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3797                                     op->output_pref[k], ts->indirect_base);
3798             }
3799             tcg_regset_set_reg(o_allocated_regs, reg);
3800             if (ts->val_type == TEMP_VAL_REG) {
3801                 s->reg_to_temp[ts->reg] = NULL;
3802             }
3803             ts->val_type = TEMP_VAL_REG;
3804             ts->reg = reg;
3805             /*
3806              * Temp value is modified, so the value kept in memory is
3807              * potentially not the same.
3808              */
3809             ts->mem_coherent = 0;
3810             s->reg_to_temp[reg] = ts;
3811             new_args[i] = reg;
3812         }
3813     }
3814 
3815     /* emit instruction */
3816     if (def->flags & TCG_OPF_VECTOR) {
3817         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3818                        new_args, const_args);
3819     } else {
3820         tcg_out_op(s, op->opc, new_args, const_args);
3821     }
3822 
3823     /* move the outputs in the correct register if needed */
3824     for(i = 0; i < nb_oargs; i++) {
3825         ts = arg_temp(op->args[i]);
3826 
3827         /* ENV should not be modified.  */
3828         tcg_debug_assert(!temp_readonly(ts));
3829 
3830         if (NEED_SYNC_ARG(i)) {
3831             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3832         } else if (IS_DEAD_ARG(i)) {
3833             temp_dead(s, ts);
3834         }
3835     }
3836 }
3837 
3838 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3839 {
3840     const TCGLifeData arg_life = op->life;
3841     TCGTemp *ots, *itsl, *itsh;
3842     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3843 
3844     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3845     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3846     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3847 
3848     ots = arg_temp(op->args[0]);
3849     itsl = arg_temp(op->args[1]);
3850     itsh = arg_temp(op->args[2]);
3851 
3852     /* ENV should not be modified.  */
3853     tcg_debug_assert(!temp_readonly(ots));
3854 
3855     /* Allocate the output register now.  */
3856     if (ots->val_type != TEMP_VAL_REG) {
3857         TCGRegSet allocated_regs = s->reserved_regs;
3858         TCGRegSet dup_out_regs =
3859             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3860 
3861         /* Make sure to not spill the input registers. */
3862         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3863             tcg_regset_set_reg(allocated_regs, itsl->reg);
3864         }
3865         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3866             tcg_regset_set_reg(allocated_regs, itsh->reg);
3867         }
3868 
3869         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3870                                  op->output_pref[0], ots->indirect_base);
3871         ots->val_type = TEMP_VAL_REG;
3872         ots->mem_coherent = 0;
3873         s->reg_to_temp[ots->reg] = ots;
3874     }
3875 
3876     /* Promote dup2 of immediates to dupi_vec. */
3877     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3878         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3879         MemOp vece = MO_64;
3880 
3881         if (val == dup_const(MO_8, val)) {
3882             vece = MO_8;
3883         } else if (val == dup_const(MO_16, val)) {
3884             vece = MO_16;
3885         } else if (val == dup_const(MO_32, val)) {
3886             vece = MO_32;
3887         }
3888 
3889         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3890         goto done;
3891     }
3892 
3893     /* If the two inputs form one 64-bit value, try dupm_vec. */
3894     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3895         if (!itsl->mem_coherent) {
3896             temp_sync(s, itsl, s->reserved_regs, 0, 0);
3897         }
3898         if (!itsh->mem_coherent) {
3899             temp_sync(s, itsh, s->reserved_regs, 0, 0);
3900         }
3901 #if HOST_BIG_ENDIAN
3902         TCGTemp *its = itsh;
3903 #else
3904         TCGTemp *its = itsl;
3905 #endif
3906         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3907                              its->mem_base->reg, its->mem_offset)) {
3908             goto done;
3909         }
3910     }
3911 
3912     /* Fall back to generic expansion. */
3913     return false;
3914 
3915  done:
3916     if (IS_DEAD_ARG(1)) {
3917         temp_dead(s, itsl);
3918     }
3919     if (IS_DEAD_ARG(2)) {
3920         temp_dead(s, itsh);
3921     }
3922     if (NEED_SYNC_ARG(0)) {
3923         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3924     } else if (IS_DEAD_ARG(0)) {
3925         temp_dead(s, ots);
3926     }
3927     return true;
3928 }
3929 
3930 #ifdef TCG_TARGET_STACK_GROWSUP
3931 #define STACK_DIR(x) (-(x))
3932 #else
3933 #define STACK_DIR(x) (x)
3934 #endif
3935 
3936 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3937 {
3938     const int nb_oargs = TCGOP_CALLO(op);
3939     const int nb_iargs = TCGOP_CALLI(op);
3940     const TCGLifeData arg_life = op->life;
3941     const TCGHelperInfo *info;
3942     int flags, nb_regs, i;
3943     TCGReg reg;
3944     TCGArg arg;
3945     TCGTemp *ts;
3946     intptr_t stack_offset;
3947     size_t call_stack_size;
3948     tcg_insn_unit *func_addr;
3949     int allocate_args;
3950     TCGRegSet allocated_regs;
3951 
3952     func_addr = tcg_call_func(op);
3953     info = tcg_call_info(op);
3954     flags = info->flags;
3955 
3956     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3957     if (nb_regs > nb_iargs) {
3958         nb_regs = nb_iargs;
3959     }
3960 
3961     /* assign stack slots first */
3962     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3963     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3964         ~(TCG_TARGET_STACK_ALIGN - 1);
3965     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3966     if (allocate_args) {
3967         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3968            preallocate call stack */
3969         tcg_abort();
3970     }
3971 
3972     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3973     for (i = nb_regs; i < nb_iargs; i++) {
3974         arg = op->args[nb_oargs + i];
3975 #ifdef TCG_TARGET_STACK_GROWSUP
3976         stack_offset -= sizeof(tcg_target_long);
3977 #endif
3978         if (arg != TCG_CALL_DUMMY_ARG) {
3979             ts = arg_temp(arg);
3980             temp_load(s, ts, tcg_target_available_regs[ts->type],
3981                       s->reserved_regs, 0);
3982             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3983         }
3984 #ifndef TCG_TARGET_STACK_GROWSUP
3985         stack_offset += sizeof(tcg_target_long);
3986 #endif
3987     }
3988 
3989     /* assign input registers */
3990     allocated_regs = s->reserved_regs;
3991     for (i = 0; i < nb_regs; i++) {
3992         arg = op->args[nb_oargs + i];
3993         if (arg != TCG_CALL_DUMMY_ARG) {
3994             ts = arg_temp(arg);
3995             reg = tcg_target_call_iarg_regs[i];
3996 
3997             if (ts->val_type == TEMP_VAL_REG) {
3998                 if (ts->reg != reg) {
3999                     tcg_reg_free(s, reg, allocated_regs);
4000                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4001                         /*
4002                          * Cross register class move not supported.  Sync the
4003                          * temp back to its slot and load from there.
4004                          */
4005                         temp_sync(s, ts, allocated_regs, 0, 0);
4006                         tcg_out_ld(s, ts->type, reg,
4007                                    ts->mem_base->reg, ts->mem_offset);
4008                     }
4009                 }
4010             } else {
4011                 TCGRegSet arg_set = 0;
4012 
4013                 tcg_reg_free(s, reg, allocated_regs);
4014                 tcg_regset_set_reg(arg_set, reg);
4015                 temp_load(s, ts, arg_set, allocated_regs, 0);
4016             }
4017 
4018             tcg_regset_set_reg(allocated_regs, reg);
4019         }
4020     }
4021 
4022     /* mark dead temporaries and free the associated registers */
4023     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4024         if (IS_DEAD_ARG(i)) {
4025             temp_dead(s, arg_temp(op->args[i]));
4026         }
4027     }
4028 
4029     /* clobber call registers */
4030     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4031         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4032             tcg_reg_free(s, i, allocated_regs);
4033         }
4034     }
4035 
4036     /* Save globals if they might be written by the helper, sync them if
4037        they might be read. */
4038     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4039         /* Nothing to do */
4040     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4041         sync_globals(s, allocated_regs);
4042     } else {
4043         save_globals(s, allocated_regs);
4044     }
4045 
4046 #ifdef CONFIG_TCG_INTERPRETER
4047     {
4048         gpointer hash = (gpointer)(uintptr_t)info->typemask;
4049         ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4050         assert(cif != NULL);
4051         tcg_out_call(s, func_addr, cif);
4052     }
4053 #else
4054     tcg_out_call(s, func_addr);
4055 #endif
4056 
4057     /* assign output registers and emit moves if needed */
4058     for(i = 0; i < nb_oargs; i++) {
4059         arg = op->args[i];
4060         ts = arg_temp(arg);
4061 
4062         /* ENV should not be modified.  */
4063         tcg_debug_assert(!temp_readonly(ts));
4064 
4065         reg = tcg_target_call_oarg_regs[i];
4066         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4067         if (ts->val_type == TEMP_VAL_REG) {
4068             s->reg_to_temp[ts->reg] = NULL;
4069         }
4070         ts->val_type = TEMP_VAL_REG;
4071         ts->reg = reg;
4072         ts->mem_coherent = 0;
4073         s->reg_to_temp[reg] = ts;
4074         if (NEED_SYNC_ARG(i)) {
4075             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4076         } else if (IS_DEAD_ARG(i)) {
4077             temp_dead(s, ts);
4078         }
4079     }
4080 }
4081 
4082 #ifdef CONFIG_PROFILER
4083 
4084 /* avoid copy/paste errors */
4085 #define PROF_ADD(to, from, field)                       \
4086     do {                                                \
4087         (to)->field += qatomic_read(&((from)->field));  \
4088     } while (0)
4089 
4090 #define PROF_MAX(to, from, field)                                       \
4091     do {                                                                \
4092         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4093         if (val__ > (to)->field) {                                      \
4094             (to)->field = val__;                                        \
4095         }                                                               \
4096     } while (0)
4097 
4098 /* Pass in a zero'ed @prof */
4099 static inline
4100 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4101 {
4102     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4103     unsigned int i;
4104 
4105     for (i = 0; i < n_ctxs; i++) {
4106         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4107         const TCGProfile *orig = &s->prof;
4108 
4109         if (counters) {
4110             PROF_ADD(prof, orig, cpu_exec_time);
4111             PROF_ADD(prof, orig, tb_count1);
4112             PROF_ADD(prof, orig, tb_count);
4113             PROF_ADD(prof, orig, op_count);
4114             PROF_MAX(prof, orig, op_count_max);
4115             PROF_ADD(prof, orig, temp_count);
4116             PROF_MAX(prof, orig, temp_count_max);
4117             PROF_ADD(prof, orig, del_op_count);
4118             PROF_ADD(prof, orig, code_in_len);
4119             PROF_ADD(prof, orig, code_out_len);
4120             PROF_ADD(prof, orig, search_out_len);
4121             PROF_ADD(prof, orig, interm_time);
4122             PROF_ADD(prof, orig, code_time);
4123             PROF_ADD(prof, orig, la_time);
4124             PROF_ADD(prof, orig, opt_time);
4125             PROF_ADD(prof, orig, restore_count);
4126             PROF_ADD(prof, orig, restore_time);
4127         }
4128         if (table) {
4129             int i;
4130 
4131             for (i = 0; i < NB_OPS; i++) {
4132                 PROF_ADD(prof, orig, table_op_count[i]);
4133             }
4134         }
4135     }
4136 }
4137 
4138 #undef PROF_ADD
4139 #undef PROF_MAX
4140 
4141 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4142 {
4143     tcg_profile_snapshot(prof, true, false);
4144 }
4145 
4146 static void tcg_profile_snapshot_table(TCGProfile *prof)
4147 {
4148     tcg_profile_snapshot(prof, false, true);
4149 }
4150 
4151 void tcg_dump_op_count(GString *buf)
4152 {
4153     TCGProfile prof = {};
4154     int i;
4155 
4156     tcg_profile_snapshot_table(&prof);
4157     for (i = 0; i < NB_OPS; i++) {
4158         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4159                                prof.table_op_count[i]);
4160     }
4161 }
4162 
4163 int64_t tcg_cpu_exec_time(void)
4164 {
4165     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4166     unsigned int i;
4167     int64_t ret = 0;
4168 
4169     for (i = 0; i < n_ctxs; i++) {
4170         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4171         const TCGProfile *prof = &s->prof;
4172 
4173         ret += qatomic_read(&prof->cpu_exec_time);
4174     }
4175     return ret;
4176 }
4177 #else
4178 void tcg_dump_op_count(GString *buf)
4179 {
4180     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4181 }
4182 
4183 int64_t tcg_cpu_exec_time(void)
4184 {
4185     error_report("%s: TCG profiler not compiled", __func__);
4186     exit(EXIT_FAILURE);
4187 }
4188 #endif
4189 
4190 
4191 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4192 {
4193 #ifdef CONFIG_PROFILER
4194     TCGProfile *prof = &s->prof;
4195 #endif
4196     int i, num_insns;
4197     TCGOp *op;
4198 
4199 #ifdef CONFIG_PROFILER
4200     {
4201         int n = 0;
4202 
4203         QTAILQ_FOREACH(op, &s->ops, link) {
4204             n++;
4205         }
4206         qatomic_set(&prof->op_count, prof->op_count + n);
4207         if (n > prof->op_count_max) {
4208             qatomic_set(&prof->op_count_max, n);
4209         }
4210 
4211         n = s->nb_temps;
4212         qatomic_set(&prof->temp_count, prof->temp_count + n);
4213         if (n > prof->temp_count_max) {
4214             qatomic_set(&prof->temp_count_max, n);
4215         }
4216     }
4217 #endif
4218 
4219 #ifdef DEBUG_DISAS
4220     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4221                  && qemu_log_in_addr_range(tb->pc))) {
4222         FILE *logfile = qemu_log_trylock();
4223         if (logfile) {
4224             fprintf(logfile, "OP:\n");
4225             tcg_dump_ops(s, logfile, false);
4226             fprintf(logfile, "\n");
4227             qemu_log_unlock(logfile);
4228         }
4229     }
4230 #endif
4231 
4232 #ifdef CONFIG_DEBUG_TCG
4233     /* Ensure all labels referenced have been emitted.  */
4234     {
4235         TCGLabel *l;
4236         bool error = false;
4237 
4238         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4239             if (unlikely(!l->present) && l->refs) {
4240                 qemu_log_mask(CPU_LOG_TB_OP,
4241                               "$L%d referenced but not present.\n", l->id);
4242                 error = true;
4243             }
4244         }
4245         assert(!error);
4246     }
4247 #endif
4248 
4249 #ifdef CONFIG_PROFILER
4250     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4251 #endif
4252 
4253 #ifdef USE_TCG_OPTIMIZATIONS
4254     tcg_optimize(s);
4255 #endif
4256 
4257 #ifdef CONFIG_PROFILER
4258     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4259     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4260 #endif
4261 
4262     reachable_code_pass(s);
4263     liveness_pass_1(s);
4264 
4265     if (s->nb_indirects > 0) {
4266 #ifdef DEBUG_DISAS
4267         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4268                      && qemu_log_in_addr_range(tb->pc))) {
4269             FILE *logfile = qemu_log_trylock();
4270             if (logfile) {
4271                 fprintf(logfile, "OP before indirect lowering:\n");
4272                 tcg_dump_ops(s, logfile, false);
4273                 fprintf(logfile, "\n");
4274                 qemu_log_unlock(logfile);
4275             }
4276         }
4277 #endif
4278         /* Replace indirect temps with direct temps.  */
4279         if (liveness_pass_2(s)) {
4280             /* If changes were made, re-run liveness.  */
4281             liveness_pass_1(s);
4282         }
4283     }
4284 
4285 #ifdef CONFIG_PROFILER
4286     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4287 #endif
4288 
4289 #ifdef DEBUG_DISAS
4290     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4291                  && qemu_log_in_addr_range(tb->pc))) {
4292         FILE *logfile = qemu_log_trylock();
4293         if (logfile) {
4294             fprintf(logfile, "OP after optimization and liveness analysis:\n");
4295             tcg_dump_ops(s, logfile, true);
4296             fprintf(logfile, "\n");
4297             qemu_log_unlock(logfile);
4298         }
4299     }
4300 #endif
4301 
4302     tcg_reg_alloc_start(s);
4303 
4304     /*
4305      * Reset the buffer pointers when restarting after overflow.
4306      * TODO: Move this into translate-all.c with the rest of the
4307      * buffer management.  Having only this done here is confusing.
4308      */
4309     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4310     s->code_ptr = s->code_buf;
4311 
4312 #ifdef TCG_TARGET_NEED_LDST_LABELS
4313     QSIMPLEQ_INIT(&s->ldst_labels);
4314 #endif
4315 #ifdef TCG_TARGET_NEED_POOL_LABELS
4316     s->pool_labels = NULL;
4317 #endif
4318 
4319     num_insns = -1;
4320     QTAILQ_FOREACH(op, &s->ops, link) {
4321         TCGOpcode opc = op->opc;
4322 
4323 #ifdef CONFIG_PROFILER
4324         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4325 #endif
4326 
4327         switch (opc) {
4328         case INDEX_op_mov_i32:
4329         case INDEX_op_mov_i64:
4330         case INDEX_op_mov_vec:
4331             tcg_reg_alloc_mov(s, op);
4332             break;
4333         case INDEX_op_dup_vec:
4334             tcg_reg_alloc_dup(s, op);
4335             break;
4336         case INDEX_op_insn_start:
4337             if (num_insns >= 0) {
4338                 size_t off = tcg_current_code_size(s);
4339                 s->gen_insn_end_off[num_insns] = off;
4340                 /* Assert that we do not overflow our stored offset.  */
4341                 assert(s->gen_insn_end_off[num_insns] == off);
4342             }
4343             num_insns++;
4344             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4345                 target_ulong a;
4346 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4347                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4348 #else
4349                 a = op->args[i];
4350 #endif
4351                 s->gen_insn_data[num_insns][i] = a;
4352             }
4353             break;
4354         case INDEX_op_discard:
4355             temp_dead(s, arg_temp(op->args[0]));
4356             break;
4357         case INDEX_op_set_label:
4358             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4359             tcg_out_label(s, arg_label(op->args[0]));
4360             break;
4361         case INDEX_op_call:
4362             tcg_reg_alloc_call(s, op);
4363             break;
4364         case INDEX_op_dup2_vec:
4365             if (tcg_reg_alloc_dup2(s, op)) {
4366                 break;
4367             }
4368             /* fall through */
4369         default:
4370             /* Sanity check that we've not introduced any unhandled opcodes. */
4371             tcg_debug_assert(tcg_op_supported(opc));
4372             /* Note: in order to speed up the code, it would be much
4373                faster to have specialized register allocator functions for
4374                some common argument patterns */
4375             tcg_reg_alloc_op(s, op);
4376             break;
4377         }
4378 #ifdef CONFIG_DEBUG_TCG
4379         check_regs(s);
4380 #endif
4381         /* Test for (pending) buffer overflow.  The assumption is that any
4382            one operation beginning below the high water mark cannot overrun
4383            the buffer completely.  Thus we can test for overflow after
4384            generating code without having to check during generation.  */
4385         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4386             return -1;
4387         }
4388         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4389         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4390             return -2;
4391         }
4392     }
4393     tcg_debug_assert(num_insns >= 0);
4394     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4395 
4396     /* Generate TB finalization at the end of block */
4397 #ifdef TCG_TARGET_NEED_LDST_LABELS
4398     i = tcg_out_ldst_finalize(s);
4399     if (i < 0) {
4400         return i;
4401     }
4402 #endif
4403 #ifdef TCG_TARGET_NEED_POOL_LABELS
4404     i = tcg_out_pool_finalize(s);
4405     if (i < 0) {
4406         return i;
4407     }
4408 #endif
4409     if (!tcg_resolve_relocs(s)) {
4410         return -2;
4411     }
4412 
4413 #ifndef CONFIG_TCG_INTERPRETER
4414     /* flush instruction cache */
4415     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4416                         (uintptr_t)s->code_buf,
4417                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4418 #endif
4419 
4420     return tcg_current_code_size(s);
4421 }
4422 
4423 #ifdef CONFIG_PROFILER
4424 void tcg_dump_info(GString *buf)
4425 {
4426     TCGProfile prof = {};
4427     const TCGProfile *s;
4428     int64_t tb_count;
4429     int64_t tb_div_count;
4430     int64_t tot;
4431 
4432     tcg_profile_snapshot_counters(&prof);
4433     s = &prof;
4434     tb_count = s->tb_count;
4435     tb_div_count = tb_count ? tb_count : 1;
4436     tot = s->interm_time + s->code_time;
4437 
4438     g_string_append_printf(buf, "JIT cycles          %" PRId64
4439                            " (%0.3f s at 2.4 GHz)\n",
4440                            tot, tot / 2.4e9);
4441     g_string_append_printf(buf, "translated TBs      %" PRId64
4442                            " (aborted=%" PRId64 " %0.1f%%)\n",
4443                            tb_count, s->tb_count1 - tb_count,
4444                            (double)(s->tb_count1 - s->tb_count)
4445                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4446     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4447                            (double)s->op_count / tb_div_count, s->op_count_max);
4448     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4449                            (double)s->del_op_count / tb_div_count);
4450     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4451                            (double)s->temp_count / tb_div_count,
4452                            s->temp_count_max);
4453     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4454                            (double)s->code_out_len / tb_div_count);
4455     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4456                            (double)s->search_out_len / tb_div_count);
4457 
4458     g_string_append_printf(buf, "cycles/op           %0.1f\n",
4459                            s->op_count ? (double)tot / s->op_count : 0);
4460     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4461                            s->code_in_len ? (double)tot / s->code_in_len : 0);
4462     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4463                            s->code_out_len ? (double)tot / s->code_out_len : 0);
4464     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4465                            s->search_out_len ?
4466                            (double)tot / s->search_out_len : 0);
4467     if (tot == 0) {
4468         tot = 1;
4469     }
4470     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4471                            (double)s->interm_time / tot * 100.0);
4472     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4473                            (double)s->code_time / tot * 100.0);
4474     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4475                            (double)s->opt_time / (s->code_time ?
4476                                                   s->code_time : 1)
4477                            * 100.0);
4478     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4479                            (double)s->la_time / (s->code_time ?
4480                                                  s->code_time : 1) * 100.0);
4481     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4482                            s->restore_count);
4483     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4484                            s->restore_count ?
4485                            (double)s->restore_time / s->restore_count : 0);
4486 }
4487 #else
4488 void tcg_dump_info(GString *buf)
4489 {
4490     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4491 }
4492 #endif
4493 
4494 #ifdef ELF_HOST_MACHINE
4495 /* In order to use this feature, the backend needs to do three things:
4496 
4497    (1) Define ELF_HOST_MACHINE to indicate both what value to
4498        put into the ELF image and to indicate support for the feature.
4499 
4500    (2) Define tcg_register_jit.  This should create a buffer containing
4501        the contents of a .debug_frame section that describes the post-
4502        prologue unwind info for the tcg machine.
4503 
4504    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4505 */
4506 
4507 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4508 typedef enum {
4509     JIT_NOACTION = 0,
4510     JIT_REGISTER_FN,
4511     JIT_UNREGISTER_FN
4512 } jit_actions_t;
4513 
4514 struct jit_code_entry {
4515     struct jit_code_entry *next_entry;
4516     struct jit_code_entry *prev_entry;
4517     const void *symfile_addr;
4518     uint64_t symfile_size;
4519 };
4520 
4521 struct jit_descriptor {
4522     uint32_t version;
4523     uint32_t action_flag;
4524     struct jit_code_entry *relevant_entry;
4525     struct jit_code_entry *first_entry;
4526 };
4527 
4528 void __jit_debug_register_code(void) __attribute__((noinline));
4529 void __jit_debug_register_code(void)
4530 {
4531     asm("");
4532 }
4533 
4534 /* Must statically initialize the version, because GDB may check
4535    the version before we can set it.  */
4536 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4537 
4538 /* End GDB interface.  */
4539 
4540 static int find_string(const char *strtab, const char *str)
4541 {
4542     const char *p = strtab + 1;
4543 
4544     while (1) {
4545         if (strcmp(p, str) == 0) {
4546             return p - strtab;
4547         }
4548         p += strlen(p) + 1;
4549     }
4550 }
4551 
4552 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4553                                  const void *debug_frame,
4554                                  size_t debug_frame_size)
4555 {
4556     struct __attribute__((packed)) DebugInfo {
4557         uint32_t  len;
4558         uint16_t  version;
4559         uint32_t  abbrev;
4560         uint8_t   ptr_size;
4561         uint8_t   cu_die;
4562         uint16_t  cu_lang;
4563         uintptr_t cu_low_pc;
4564         uintptr_t cu_high_pc;
4565         uint8_t   fn_die;
4566         char      fn_name[16];
4567         uintptr_t fn_low_pc;
4568         uintptr_t fn_high_pc;
4569         uint8_t   cu_eoc;
4570     };
4571 
4572     struct ElfImage {
4573         ElfW(Ehdr) ehdr;
4574         ElfW(Phdr) phdr;
4575         ElfW(Shdr) shdr[7];
4576         ElfW(Sym)  sym[2];
4577         struct DebugInfo di;
4578         uint8_t    da[24];
4579         char       str[80];
4580     };
4581 
4582     struct ElfImage *img;
4583 
4584     static const struct ElfImage img_template = {
4585         .ehdr = {
4586             .e_ident[EI_MAG0] = ELFMAG0,
4587             .e_ident[EI_MAG1] = ELFMAG1,
4588             .e_ident[EI_MAG2] = ELFMAG2,
4589             .e_ident[EI_MAG3] = ELFMAG3,
4590             .e_ident[EI_CLASS] = ELF_CLASS,
4591             .e_ident[EI_DATA] = ELF_DATA,
4592             .e_ident[EI_VERSION] = EV_CURRENT,
4593             .e_type = ET_EXEC,
4594             .e_machine = ELF_HOST_MACHINE,
4595             .e_version = EV_CURRENT,
4596             .e_phoff = offsetof(struct ElfImage, phdr),
4597             .e_shoff = offsetof(struct ElfImage, shdr),
4598             .e_ehsize = sizeof(ElfW(Shdr)),
4599             .e_phentsize = sizeof(ElfW(Phdr)),
4600             .e_phnum = 1,
4601             .e_shentsize = sizeof(ElfW(Shdr)),
4602             .e_shnum = ARRAY_SIZE(img->shdr),
4603             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4604 #ifdef ELF_HOST_FLAGS
4605             .e_flags = ELF_HOST_FLAGS,
4606 #endif
4607 #ifdef ELF_OSABI
4608             .e_ident[EI_OSABI] = ELF_OSABI,
4609 #endif
4610         },
4611         .phdr = {
4612             .p_type = PT_LOAD,
4613             .p_flags = PF_X,
4614         },
4615         .shdr = {
4616             [0] = { .sh_type = SHT_NULL },
4617             /* Trick: The contents of code_gen_buffer are not present in
4618                this fake ELF file; that got allocated elsewhere.  Therefore
4619                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4620                will not look for contents.  We can record any address.  */
4621             [1] = { /* .text */
4622                 .sh_type = SHT_NOBITS,
4623                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4624             },
4625             [2] = { /* .debug_info */
4626                 .sh_type = SHT_PROGBITS,
4627                 .sh_offset = offsetof(struct ElfImage, di),
4628                 .sh_size = sizeof(struct DebugInfo),
4629             },
4630             [3] = { /* .debug_abbrev */
4631                 .sh_type = SHT_PROGBITS,
4632                 .sh_offset = offsetof(struct ElfImage, da),
4633                 .sh_size = sizeof(img->da),
4634             },
4635             [4] = { /* .debug_frame */
4636                 .sh_type = SHT_PROGBITS,
4637                 .sh_offset = sizeof(struct ElfImage),
4638             },
4639             [5] = { /* .symtab */
4640                 .sh_type = SHT_SYMTAB,
4641                 .sh_offset = offsetof(struct ElfImage, sym),
4642                 .sh_size = sizeof(img->sym),
4643                 .sh_info = 1,
4644                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4645                 .sh_entsize = sizeof(ElfW(Sym)),
4646             },
4647             [6] = { /* .strtab */
4648                 .sh_type = SHT_STRTAB,
4649                 .sh_offset = offsetof(struct ElfImage, str),
4650                 .sh_size = sizeof(img->str),
4651             }
4652         },
4653         .sym = {
4654             [1] = { /* code_gen_buffer */
4655                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4656                 .st_shndx = 1,
4657             }
4658         },
4659         .di = {
4660             .len = sizeof(struct DebugInfo) - 4,
4661             .version = 2,
4662             .ptr_size = sizeof(void *),
4663             .cu_die = 1,
4664             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4665             .fn_die = 2,
4666             .fn_name = "code_gen_buffer"
4667         },
4668         .da = {
4669             1,          /* abbrev number (the cu) */
4670             0x11, 1,    /* DW_TAG_compile_unit, has children */
4671             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4672             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4673             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4674             0, 0,       /* end of abbrev */
4675             2,          /* abbrev number (the fn) */
4676             0x2e, 0,    /* DW_TAG_subprogram, no children */
4677             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4678             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4679             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4680             0, 0,       /* end of abbrev */
4681             0           /* no more abbrev */
4682         },
4683         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4684                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4685     };
4686 
4687     /* We only need a single jit entry; statically allocate it.  */
4688     static struct jit_code_entry one_entry;
4689 
4690     uintptr_t buf = (uintptr_t)buf_ptr;
4691     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4692     DebugFrameHeader *dfh;
4693 
4694     img = g_malloc(img_size);
4695     *img = img_template;
4696 
4697     img->phdr.p_vaddr = buf;
4698     img->phdr.p_paddr = buf;
4699     img->phdr.p_memsz = buf_size;
4700 
4701     img->shdr[1].sh_name = find_string(img->str, ".text");
4702     img->shdr[1].sh_addr = buf;
4703     img->shdr[1].sh_size = buf_size;
4704 
4705     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4706     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4707 
4708     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4709     img->shdr[4].sh_size = debug_frame_size;
4710 
4711     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4712     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4713 
4714     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4715     img->sym[1].st_value = buf;
4716     img->sym[1].st_size = buf_size;
4717 
4718     img->di.cu_low_pc = buf;
4719     img->di.cu_high_pc = buf + buf_size;
4720     img->di.fn_low_pc = buf;
4721     img->di.fn_high_pc = buf + buf_size;
4722 
4723     dfh = (DebugFrameHeader *)(img + 1);
4724     memcpy(dfh, debug_frame, debug_frame_size);
4725     dfh->fde.func_start = buf;
4726     dfh->fde.func_len = buf_size;
4727 
4728 #ifdef DEBUG_JIT
4729     /* Enable this block to be able to debug the ELF image file creation.
4730        One can use readelf, objdump, or other inspection utilities.  */
4731     {
4732         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4733         if (f) {
4734             if (fwrite(img, img_size, 1, f) != img_size) {
4735                 /* Avoid stupid unused return value warning for fwrite.  */
4736             }
4737             fclose(f);
4738         }
4739     }
4740 #endif
4741 
4742     one_entry.symfile_addr = img;
4743     one_entry.symfile_size = img_size;
4744 
4745     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4746     __jit_debug_descriptor.relevant_entry = &one_entry;
4747     __jit_debug_descriptor.first_entry = &one_entry;
4748     __jit_debug_register_code();
4749 }
4750 #else
4751 /* No support for the feature.  Provide the entry point expected by exec.c,
4752    and implement the internal function we declared earlier.  */
4753 
4754 static void tcg_register_jit_int(const void *buf, size_t size,
4755                                  const void *debug_frame,
4756                                  size_t debug_frame_size)
4757 {
4758 }
4759 
4760 void tcg_register_jit(const void *buf, size_t buf_size)
4761 {
4762 }
4763 #endif /* ELF_HOST_MACHINE */
4764 
4765 #if !TCG_TARGET_MAYBE_vec
4766 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4767 {
4768     g_assert_not_reached();
4769 }
4770 #endif
4771