xref: /openbmc/qemu/tcg/tcg.c (revision b3eb5b86)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 #include "qemu/cacheinfo.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64 
65 #ifdef CONFIG_TCG_INTERPRETER
66 #include <ffi.h>
67 #endif
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(const void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106                        intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109                          TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111                        const TCGArg args[TCG_MAX_OP_ARGS],
112                        const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115                             TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121                            unsigned vecl, unsigned vece,
122                            const TCGArg args[TCG_MAX_OP_ARGS],
123                            const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136                                     TCGReg dst, int64_t arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141                                   unsigned vecl, unsigned vece,
142                                   const TCGArg args[TCG_MAX_OP_ARGS],
143                                   const int const_args[TCG_MAX_OP_ARGS])
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 #ifdef CONFIG_TCG_INTERPRETER
153 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
154                          ffi_cif *cif);
155 #else
156 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
157 #endif
158 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
159 #ifdef TCG_TARGET_NEED_LDST_LABELS
160 static int tcg_out_ldst_finalize(TCGContext *s);
161 #endif
162 
163 TCGContext tcg_init_ctx;
164 __thread TCGContext *tcg_ctx;
165 
166 TCGContext **tcg_ctxs;
167 unsigned int tcg_cur_ctxs;
168 unsigned int tcg_max_ctxs;
169 TCGv_env cpu_env = 0;
170 const void *tcg_code_gen_epilogue;
171 uintptr_t tcg_splitwx_diff;
172 
173 #ifndef CONFIG_TCG_INTERPRETER
174 tcg_prologue_fn *tcg_qemu_tb_exec;
175 #endif
176 
177 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
178 static TCGRegSet tcg_target_call_clobber_regs;
179 
180 #if TCG_TARGET_INSN_UNIT_SIZE == 1
181 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
182 {
183     *s->code_ptr++ = v;
184 }
185 
186 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
187                                                       uint8_t v)
188 {
189     *p = v;
190 }
191 #endif
192 
193 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
194 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
195 {
196     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
197         *s->code_ptr++ = v;
198     } else {
199         tcg_insn_unit *p = s->code_ptr;
200         memcpy(p, &v, sizeof(v));
201         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
202     }
203 }
204 
205 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
206                                                        uint16_t v)
207 {
208     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
209         *p = v;
210     } else {
211         memcpy(p, &v, sizeof(v));
212     }
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
217 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
229                                                        uint32_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
240 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
252                                                        uint64_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 /* label relocation processing */
263 
264 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
265                           TCGLabel *l, intptr_t addend)
266 {
267     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
268 
269     r->type = type;
270     r->ptr = code_ptr;
271     r->addend = addend;
272     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
273 }
274 
275 static void tcg_out_label(TCGContext *s, TCGLabel *l)
276 {
277     tcg_debug_assert(!l->has_value);
278     l->has_value = 1;
279     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
280 }
281 
282 TCGLabel *gen_new_label(void)
283 {
284     TCGContext *s = tcg_ctx;
285     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
286 
287     memset(l, 0, sizeof(TCGLabel));
288     l->id = s->nb_labels++;
289     QSIMPLEQ_INIT(&l->relocs);
290 
291     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
292 
293     return l;
294 }
295 
296 static bool tcg_resolve_relocs(TCGContext *s)
297 {
298     TCGLabel *l;
299 
300     QSIMPLEQ_FOREACH(l, &s->labels, next) {
301         TCGRelocation *r;
302         uintptr_t value = l->u.value;
303 
304         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
305             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
306                 return false;
307             }
308         }
309     }
310     return true;
311 }
312 
313 static void set_jmp_reset_offset(TCGContext *s, int which)
314 {
315     /*
316      * We will check for overflow at the end of the opcode loop in
317      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
318      */
319     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
320 }
321 
322 /* Signal overflow, starting over with fewer guest insns. */
323 static G_NORETURN
324 void tcg_raise_tb_overflow(TCGContext *s)
325 {
326     siglongjmp(s->jmp_trans, -2);
327 }
328 
329 #define C_PFX1(P, A)                    P##A
330 #define C_PFX2(P, A, B)                 P##A##_##B
331 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
332 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
333 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
334 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
335 
336 /* Define an enumeration for the various combinations. */
337 
338 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
339 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
340 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
341 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
342 
343 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
344 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
345 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
346 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
347 
348 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
349 
350 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
351 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
352 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
353 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
354 
355 typedef enum {
356 #include "tcg-target-con-set.h"
357 } TCGConstraintSetIndex;
358 
359 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
360 
361 #undef C_O0_I1
362 #undef C_O0_I2
363 #undef C_O0_I3
364 #undef C_O0_I4
365 #undef C_O1_I1
366 #undef C_O1_I2
367 #undef C_O1_I3
368 #undef C_O1_I4
369 #undef C_N1_I2
370 #undef C_O2_I1
371 #undef C_O2_I2
372 #undef C_O2_I3
373 #undef C_O2_I4
374 
375 /* Put all of the constraint sets into an array, indexed by the enum. */
376 
377 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
378 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
379 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
380 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
381 
382 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
383 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
384 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
385 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
386 
387 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
388 
389 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
390 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
391 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
392 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
393 
394 static const TCGTargetOpDef constraint_sets[] = {
395 #include "tcg-target-con-set.h"
396 };
397 
398 
399 #undef C_O0_I1
400 #undef C_O0_I2
401 #undef C_O0_I3
402 #undef C_O0_I4
403 #undef C_O1_I1
404 #undef C_O1_I2
405 #undef C_O1_I3
406 #undef C_O1_I4
407 #undef C_N1_I2
408 #undef C_O2_I1
409 #undef C_O2_I2
410 #undef C_O2_I3
411 #undef C_O2_I4
412 
413 /* Expand the enumerator to be returned from tcg_target_op_def(). */
414 
415 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
416 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
417 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
418 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
419 
420 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
421 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
422 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
423 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
424 
425 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
426 
427 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
428 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
429 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
430 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
431 
432 #include "tcg-target.c.inc"
433 
434 static void alloc_tcg_plugin_context(TCGContext *s)
435 {
436 #ifdef CONFIG_PLUGIN
437     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
438     s->plugin_tb->insns =
439         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
440 #endif
441 }
442 
443 /*
444  * All TCG threads except the parent (i.e. the one that called tcg_context_init
445  * and registered the target's TCG globals) must register with this function
446  * before initiating translation.
447  *
448  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
449  * of tcg_region_init() for the reasoning behind this.
450  *
451  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
452  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
453  * is not used anymore for translation once this function is called.
454  *
455  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
456  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
457  */
458 #ifdef CONFIG_USER_ONLY
459 void tcg_register_thread(void)
460 {
461     tcg_ctx = &tcg_init_ctx;
462 }
463 #else
464 void tcg_register_thread(void)
465 {
466     TCGContext *s = g_malloc(sizeof(*s));
467     unsigned int i, n;
468 
469     *s = tcg_init_ctx;
470 
471     /* Relink mem_base.  */
472     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
473         if (tcg_init_ctx.temps[i].mem_base) {
474             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
475             tcg_debug_assert(b >= 0 && b < n);
476             s->temps[i].mem_base = &s->temps[b];
477         }
478     }
479 
480     /* Claim an entry in tcg_ctxs */
481     n = qatomic_fetch_inc(&tcg_cur_ctxs);
482     g_assert(n < tcg_max_ctxs);
483     qatomic_set(&tcg_ctxs[n], s);
484 
485     if (n > 0) {
486         alloc_tcg_plugin_context(s);
487         tcg_region_initial_alloc(s);
488     }
489 
490     tcg_ctx = s;
491 }
492 #endif /* !CONFIG_USER_ONLY */
493 
494 /* pool based memory allocation */
495 void *tcg_malloc_internal(TCGContext *s, int size)
496 {
497     TCGPool *p;
498     int pool_size;
499 
500     if (size > TCG_POOL_CHUNK_SIZE) {
501         /* big malloc: insert a new pool (XXX: could optimize) */
502         p = g_malloc(sizeof(TCGPool) + size);
503         p->size = size;
504         p->next = s->pool_first_large;
505         s->pool_first_large = p;
506         return p->data;
507     } else {
508         p = s->pool_current;
509         if (!p) {
510             p = s->pool_first;
511             if (!p)
512                 goto new_pool;
513         } else {
514             if (!p->next) {
515             new_pool:
516                 pool_size = TCG_POOL_CHUNK_SIZE;
517                 p = g_malloc(sizeof(TCGPool) + pool_size);
518                 p->size = pool_size;
519                 p->next = NULL;
520                 if (s->pool_current) {
521                     s->pool_current->next = p;
522                 } else {
523                     s->pool_first = p;
524                 }
525             } else {
526                 p = p->next;
527             }
528         }
529     }
530     s->pool_current = p;
531     s->pool_cur = p->data + size;
532     s->pool_end = p->data + p->size;
533     return p->data;
534 }
535 
536 void tcg_pool_reset(TCGContext *s)
537 {
538     TCGPool *p, *t;
539     for (p = s->pool_first_large; p; p = t) {
540         t = p->next;
541         g_free(p);
542     }
543     s->pool_first_large = NULL;
544     s->pool_cur = s->pool_end = NULL;
545     s->pool_current = NULL;
546 }
547 
548 #include "exec/helper-proto.h"
549 
550 static const TCGHelperInfo all_helpers[] = {
551 #include "exec/helper-tcg.h"
552 };
553 static GHashTable *helper_table;
554 
555 #ifdef CONFIG_TCG_INTERPRETER
556 static GHashTable *ffi_table;
557 
558 static ffi_type * const typecode_to_ffi[8] = {
559     [dh_typecode_void] = &ffi_type_void,
560     [dh_typecode_i32]  = &ffi_type_uint32,
561     [dh_typecode_s32]  = &ffi_type_sint32,
562     [dh_typecode_i64]  = &ffi_type_uint64,
563     [dh_typecode_s64]  = &ffi_type_sint64,
564     [dh_typecode_ptr]  = &ffi_type_pointer,
565 };
566 #endif
567 
568 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
569 static void process_op_defs(TCGContext *s);
570 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
571                                             TCGReg reg, const char *name);
572 
573 static void tcg_context_init(unsigned max_cpus)
574 {
575     TCGContext *s = &tcg_init_ctx;
576     int op, total_args, n, i;
577     TCGOpDef *def;
578     TCGArgConstraint *args_ct;
579     TCGTemp *ts;
580 
581     memset(s, 0, sizeof(*s));
582     s->nb_globals = 0;
583 
584     /* Count total number of arguments and allocate the corresponding
585        space */
586     total_args = 0;
587     for(op = 0; op < NB_OPS; op++) {
588         def = &tcg_op_defs[op];
589         n = def->nb_iargs + def->nb_oargs;
590         total_args += n;
591     }
592 
593     args_ct = g_new0(TCGArgConstraint, total_args);
594 
595     for(op = 0; op < NB_OPS; op++) {
596         def = &tcg_op_defs[op];
597         def->args_ct = args_ct;
598         n = def->nb_iargs + def->nb_oargs;
599         args_ct += n;
600     }
601 
602     /* Register helpers.  */
603     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
604     helper_table = g_hash_table_new(NULL, NULL);
605 
606     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
607         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
608                             (gpointer)&all_helpers[i]);
609     }
610 
611 #ifdef CONFIG_TCG_INTERPRETER
612     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
613     ffi_table = g_hash_table_new(NULL, NULL);
614     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
615         struct {
616             ffi_cif cif;
617             ffi_type *args[];
618         } *ca;
619         uint32_t typemask = all_helpers[i].typemask;
620         gpointer hash = (gpointer)(uintptr_t)typemask;
621         ffi_status status;
622         int nargs;
623 
624         if (g_hash_table_lookup(ffi_table, hash)) {
625             continue;
626         }
627 
628         /* Ignoring the return type, find the last non-zero field. */
629         nargs = 32 - clz32(typemask >> 3);
630         nargs = DIV_ROUND_UP(nargs, 3);
631 
632         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
633         ca->cif.rtype = typecode_to_ffi[typemask & 7];
634         ca->cif.nargs = nargs;
635 
636         if (nargs != 0) {
637             ca->cif.arg_types = ca->args;
638             for (int j = 0; j < nargs; ++j) {
639                 int typecode = extract32(typemask, (j + 1) * 3, 3);
640                 ca->args[j] = typecode_to_ffi[typecode];
641             }
642         }
643 
644         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
645                               ca->cif.rtype, ca->cif.arg_types);
646         assert(status == FFI_OK);
647 
648         g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
649     }
650 #endif
651 
652     tcg_target_init(s);
653     process_op_defs(s);
654 
655     /* Reverse the order of the saved registers, assuming they're all at
656        the start of tcg_target_reg_alloc_order.  */
657     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
658         int r = tcg_target_reg_alloc_order[n];
659         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
660             break;
661         }
662     }
663     for (i = 0; i < n; ++i) {
664         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
665     }
666     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
667         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
668     }
669 
670     alloc_tcg_plugin_context(s);
671 
672     tcg_ctx = s;
673     /*
674      * In user-mode we simply share the init context among threads, since we
675      * use a single region. See the documentation tcg_region_init() for the
676      * reasoning behind this.
677      * In softmmu we will have at most max_cpus TCG threads.
678      */
679 #ifdef CONFIG_USER_ONLY
680     tcg_ctxs = &tcg_ctx;
681     tcg_cur_ctxs = 1;
682     tcg_max_ctxs = 1;
683 #else
684     tcg_max_ctxs = max_cpus;
685     tcg_ctxs = g_new0(TCGContext *, max_cpus);
686 #endif
687 
688     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
689     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
690     cpu_env = temp_tcgv_ptr(ts);
691 }
692 
693 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
694 {
695     tcg_context_init(max_cpus);
696     tcg_region_init(tb_size, splitwx, max_cpus);
697 }
698 
699 /*
700  * Allocate TBs right before their corresponding translated code, making
701  * sure that TBs and code are on different cache lines.
702  */
703 TranslationBlock *tcg_tb_alloc(TCGContext *s)
704 {
705     uintptr_t align = qemu_icache_linesize;
706     TranslationBlock *tb;
707     void *next;
708 
709  retry:
710     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
711     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
712 
713     if (unlikely(next > s->code_gen_highwater)) {
714         if (tcg_region_alloc(s)) {
715             return NULL;
716         }
717         goto retry;
718     }
719     qatomic_set(&s->code_gen_ptr, next);
720     s->data_gen_ptr = NULL;
721     return tb;
722 }
723 
724 void tcg_prologue_init(TCGContext *s)
725 {
726     size_t prologue_size;
727 
728     s->code_ptr = s->code_gen_ptr;
729     s->code_buf = s->code_gen_ptr;
730     s->data_gen_ptr = NULL;
731 
732 #ifndef CONFIG_TCG_INTERPRETER
733     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
734 #endif
735 
736 #ifdef TCG_TARGET_NEED_POOL_LABELS
737     s->pool_labels = NULL;
738 #endif
739 
740     qemu_thread_jit_write();
741     /* Generate the prologue.  */
742     tcg_target_qemu_prologue(s);
743 
744 #ifdef TCG_TARGET_NEED_POOL_LABELS
745     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
746     {
747         int result = tcg_out_pool_finalize(s);
748         tcg_debug_assert(result == 0);
749     }
750 #endif
751 
752     prologue_size = tcg_current_code_size(s);
753 
754 #ifndef CONFIG_TCG_INTERPRETER
755     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
756                         (uintptr_t)s->code_buf, prologue_size);
757 #endif
758 
759 #ifdef DEBUG_DISAS
760     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
761         FILE *logfile = qemu_log_trylock();
762         if (logfile) {
763             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
764             if (s->data_gen_ptr) {
765                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
766                 size_t data_size = prologue_size - code_size;
767                 size_t i;
768 
769                 disas(logfile, s->code_gen_ptr, code_size);
770 
771                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
772                     if (sizeof(tcg_target_ulong) == 8) {
773                         fprintf(logfile,
774                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
775                                 (uintptr_t)s->data_gen_ptr + i,
776                                 *(uint64_t *)(s->data_gen_ptr + i));
777                     } else {
778                         fprintf(logfile,
779                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
780                                 (uintptr_t)s->data_gen_ptr + i,
781                                 *(uint32_t *)(s->data_gen_ptr + i));
782                     }
783                 }
784             } else {
785                 disas(logfile, s->code_gen_ptr, prologue_size);
786             }
787             fprintf(logfile, "\n");
788             qemu_log_unlock(logfile);
789         }
790     }
791 #endif
792 
793 #ifndef CONFIG_TCG_INTERPRETER
794     /*
795      * Assert that goto_ptr is implemented completely, setting an epilogue.
796      * For tci, we use NULL as the signal to return from the interpreter,
797      * so skip this check.
798      */
799     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
800 #endif
801 
802     tcg_region_prologue_set(s);
803 }
804 
805 void tcg_func_start(TCGContext *s)
806 {
807     tcg_pool_reset(s);
808     s->nb_temps = s->nb_globals;
809 
810     /* No temps have been previously allocated for size or locality.  */
811     memset(s->free_temps, 0, sizeof(s->free_temps));
812 
813     /* No constant temps have been previously allocated. */
814     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
815         if (s->const_table[i]) {
816             g_hash_table_remove_all(s->const_table[i]);
817         }
818     }
819 
820     s->nb_ops = 0;
821     s->nb_labels = 0;
822     s->current_frame_offset = s->frame_start;
823 
824 #ifdef CONFIG_DEBUG_TCG
825     s->goto_tb_issue_mask = 0;
826 #endif
827 
828     QTAILQ_INIT(&s->ops);
829     QTAILQ_INIT(&s->free_ops);
830     QSIMPLEQ_INIT(&s->labels);
831 }
832 
833 static TCGTemp *tcg_temp_alloc(TCGContext *s)
834 {
835     int n = s->nb_temps++;
836 
837     if (n >= TCG_MAX_TEMPS) {
838         tcg_raise_tb_overflow(s);
839     }
840     return memset(&s->temps[n], 0, sizeof(TCGTemp));
841 }
842 
843 static TCGTemp *tcg_global_alloc(TCGContext *s)
844 {
845     TCGTemp *ts;
846 
847     tcg_debug_assert(s->nb_globals == s->nb_temps);
848     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
849     s->nb_globals++;
850     ts = tcg_temp_alloc(s);
851     ts->kind = TEMP_GLOBAL;
852 
853     return ts;
854 }
855 
856 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
857                                             TCGReg reg, const char *name)
858 {
859     TCGTemp *ts;
860 
861     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
862         tcg_abort();
863     }
864 
865     ts = tcg_global_alloc(s);
866     ts->base_type = type;
867     ts->type = type;
868     ts->kind = TEMP_FIXED;
869     ts->reg = reg;
870     ts->name = name;
871     tcg_regset_set_reg(s->reserved_regs, reg);
872 
873     return ts;
874 }
875 
876 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
877 {
878     s->frame_start = start;
879     s->frame_end = start + size;
880     s->frame_temp
881         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
882 }
883 
884 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
885                                      intptr_t offset, const char *name)
886 {
887     TCGContext *s = tcg_ctx;
888     TCGTemp *base_ts = tcgv_ptr_temp(base);
889     TCGTemp *ts = tcg_global_alloc(s);
890     int indirect_reg = 0, bigendian = 0;
891 #if HOST_BIG_ENDIAN
892     bigendian = 1;
893 #endif
894 
895     switch (base_ts->kind) {
896     case TEMP_FIXED:
897         break;
898     case TEMP_GLOBAL:
899         /* We do not support double-indirect registers.  */
900         tcg_debug_assert(!base_ts->indirect_reg);
901         base_ts->indirect_base = 1;
902         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
903                             ? 2 : 1);
904         indirect_reg = 1;
905         break;
906     default:
907         g_assert_not_reached();
908     }
909 
910     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
911         TCGTemp *ts2 = tcg_global_alloc(s);
912         char buf[64];
913 
914         ts->base_type = TCG_TYPE_I64;
915         ts->type = TCG_TYPE_I32;
916         ts->indirect_reg = indirect_reg;
917         ts->mem_allocated = 1;
918         ts->mem_base = base_ts;
919         ts->mem_offset = offset + bigendian * 4;
920         pstrcpy(buf, sizeof(buf), name);
921         pstrcat(buf, sizeof(buf), "_0");
922         ts->name = strdup(buf);
923 
924         tcg_debug_assert(ts2 == ts + 1);
925         ts2->base_type = TCG_TYPE_I64;
926         ts2->type = TCG_TYPE_I32;
927         ts2->indirect_reg = indirect_reg;
928         ts2->mem_allocated = 1;
929         ts2->mem_base = base_ts;
930         ts2->mem_offset = offset + (1 - bigendian) * 4;
931         pstrcpy(buf, sizeof(buf), name);
932         pstrcat(buf, sizeof(buf), "_1");
933         ts2->name = strdup(buf);
934     } else {
935         ts->base_type = type;
936         ts->type = type;
937         ts->indirect_reg = indirect_reg;
938         ts->mem_allocated = 1;
939         ts->mem_base = base_ts;
940         ts->mem_offset = offset;
941         ts->name = name;
942     }
943     return ts;
944 }
945 
946 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
947 {
948     TCGContext *s = tcg_ctx;
949     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
950     TCGTemp *ts;
951     int idx, k;
952 
953     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
954     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
955     if (idx < TCG_MAX_TEMPS) {
956         /* There is already an available temp with the right type.  */
957         clear_bit(idx, s->free_temps[k].l);
958 
959         ts = &s->temps[idx];
960         ts->temp_allocated = 1;
961         tcg_debug_assert(ts->base_type == type);
962         tcg_debug_assert(ts->kind == kind);
963     } else {
964         ts = tcg_temp_alloc(s);
965         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
966             TCGTemp *ts2 = tcg_temp_alloc(s);
967 
968             ts->base_type = type;
969             ts->type = TCG_TYPE_I32;
970             ts->temp_allocated = 1;
971             ts->kind = kind;
972 
973             tcg_debug_assert(ts2 == ts + 1);
974             ts2->base_type = TCG_TYPE_I64;
975             ts2->type = TCG_TYPE_I32;
976             ts2->temp_allocated = 1;
977             ts2->kind = kind;
978         } else {
979             ts->base_type = type;
980             ts->type = type;
981             ts->temp_allocated = 1;
982             ts->kind = kind;
983         }
984     }
985 
986 #if defined(CONFIG_DEBUG_TCG)
987     s->temps_in_use++;
988 #endif
989     return ts;
990 }
991 
992 TCGv_vec tcg_temp_new_vec(TCGType type)
993 {
994     TCGTemp *t;
995 
996 #ifdef CONFIG_DEBUG_TCG
997     switch (type) {
998     case TCG_TYPE_V64:
999         assert(TCG_TARGET_HAS_v64);
1000         break;
1001     case TCG_TYPE_V128:
1002         assert(TCG_TARGET_HAS_v128);
1003         break;
1004     case TCG_TYPE_V256:
1005         assert(TCG_TARGET_HAS_v256);
1006         break;
1007     default:
1008         g_assert_not_reached();
1009     }
1010 #endif
1011 
1012     t = tcg_temp_new_internal(type, 0);
1013     return temp_tcgv_vec(t);
1014 }
1015 
1016 /* Create a new temp of the same type as an existing temp.  */
1017 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1018 {
1019     TCGTemp *t = tcgv_vec_temp(match);
1020 
1021     tcg_debug_assert(t->temp_allocated != 0);
1022 
1023     t = tcg_temp_new_internal(t->base_type, 0);
1024     return temp_tcgv_vec(t);
1025 }
1026 
1027 void tcg_temp_free_internal(TCGTemp *ts)
1028 {
1029     TCGContext *s = tcg_ctx;
1030     int k, idx;
1031 
1032     switch (ts->kind) {
1033     case TEMP_CONST:
1034         /*
1035          * In order to simplify users of tcg_constant_*,
1036          * silently ignore free.
1037          */
1038         return;
1039     case TEMP_NORMAL:
1040     case TEMP_LOCAL:
1041         break;
1042     default:
1043         g_assert_not_reached();
1044     }
1045 
1046 #if defined(CONFIG_DEBUG_TCG)
1047     s->temps_in_use--;
1048     if (s->temps_in_use < 0) {
1049         fprintf(stderr, "More temporaries freed than allocated!\n");
1050     }
1051 #endif
1052 
1053     tcg_debug_assert(ts->temp_allocated != 0);
1054     ts->temp_allocated = 0;
1055 
1056     idx = temp_idx(ts);
1057     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1058     set_bit(idx, s->free_temps[k].l);
1059 }
1060 
1061 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1062 {
1063     TCGContext *s = tcg_ctx;
1064     GHashTable *h = s->const_table[type];
1065     TCGTemp *ts;
1066 
1067     if (h == NULL) {
1068         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1069         s->const_table[type] = h;
1070     }
1071 
1072     ts = g_hash_table_lookup(h, &val);
1073     if (ts == NULL) {
1074         ts = tcg_temp_alloc(s);
1075 
1076         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1077             TCGTemp *ts2 = tcg_temp_alloc(s);
1078 
1079             ts->base_type = TCG_TYPE_I64;
1080             ts->type = TCG_TYPE_I32;
1081             ts->kind = TEMP_CONST;
1082             ts->temp_allocated = 1;
1083             /*
1084              * Retain the full value of the 64-bit constant in the low
1085              * part, so that the hash table works.  Actual uses will
1086              * truncate the value to the low part.
1087              */
1088             ts->val = val;
1089 
1090             tcg_debug_assert(ts2 == ts + 1);
1091             ts2->base_type = TCG_TYPE_I64;
1092             ts2->type = TCG_TYPE_I32;
1093             ts2->kind = TEMP_CONST;
1094             ts2->temp_allocated = 1;
1095             ts2->val = val >> 32;
1096         } else {
1097             ts->base_type = type;
1098             ts->type = type;
1099             ts->kind = TEMP_CONST;
1100             ts->temp_allocated = 1;
1101             ts->val = val;
1102         }
1103         g_hash_table_insert(h, &ts->val, ts);
1104     }
1105 
1106     return ts;
1107 }
1108 
1109 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1110 {
1111     val = dup_const(vece, val);
1112     return temp_tcgv_vec(tcg_constant_internal(type, val));
1113 }
1114 
1115 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1116 {
1117     TCGTemp *t = tcgv_vec_temp(match);
1118 
1119     tcg_debug_assert(t->temp_allocated != 0);
1120     return tcg_constant_vec(t->base_type, vece, val);
1121 }
1122 
1123 TCGv_i32 tcg_const_i32(int32_t val)
1124 {
1125     TCGv_i32 t0;
1126     t0 = tcg_temp_new_i32();
1127     tcg_gen_movi_i32(t0, val);
1128     return t0;
1129 }
1130 
1131 TCGv_i64 tcg_const_i64(int64_t val)
1132 {
1133     TCGv_i64 t0;
1134     t0 = tcg_temp_new_i64();
1135     tcg_gen_movi_i64(t0, val);
1136     return t0;
1137 }
1138 
1139 TCGv_i32 tcg_const_local_i32(int32_t val)
1140 {
1141     TCGv_i32 t0;
1142     t0 = tcg_temp_local_new_i32();
1143     tcg_gen_movi_i32(t0, val);
1144     return t0;
1145 }
1146 
1147 TCGv_i64 tcg_const_local_i64(int64_t val)
1148 {
1149     TCGv_i64 t0;
1150     t0 = tcg_temp_local_new_i64();
1151     tcg_gen_movi_i64(t0, val);
1152     return t0;
1153 }
1154 
1155 #if defined(CONFIG_DEBUG_TCG)
1156 void tcg_clear_temp_count(void)
1157 {
1158     TCGContext *s = tcg_ctx;
1159     s->temps_in_use = 0;
1160 }
1161 
1162 int tcg_check_temp_count(void)
1163 {
1164     TCGContext *s = tcg_ctx;
1165     if (s->temps_in_use) {
1166         /* Clear the count so that we don't give another
1167          * warning immediately next time around.
1168          */
1169         s->temps_in_use = 0;
1170         return 1;
1171     }
1172     return 0;
1173 }
1174 #endif
1175 
1176 /* Return true if OP may appear in the opcode stream.
1177    Test the runtime variable that controls each opcode.  */
1178 bool tcg_op_supported(TCGOpcode op)
1179 {
1180     const bool have_vec
1181         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1182 
1183     switch (op) {
1184     case INDEX_op_discard:
1185     case INDEX_op_set_label:
1186     case INDEX_op_call:
1187     case INDEX_op_br:
1188     case INDEX_op_mb:
1189     case INDEX_op_insn_start:
1190     case INDEX_op_exit_tb:
1191     case INDEX_op_goto_tb:
1192     case INDEX_op_goto_ptr:
1193     case INDEX_op_qemu_ld_i32:
1194     case INDEX_op_qemu_st_i32:
1195     case INDEX_op_qemu_ld_i64:
1196     case INDEX_op_qemu_st_i64:
1197         return true;
1198 
1199     case INDEX_op_qemu_st8_i32:
1200         return TCG_TARGET_HAS_qemu_st8_i32;
1201 
1202     case INDEX_op_mov_i32:
1203     case INDEX_op_setcond_i32:
1204     case INDEX_op_brcond_i32:
1205     case INDEX_op_ld8u_i32:
1206     case INDEX_op_ld8s_i32:
1207     case INDEX_op_ld16u_i32:
1208     case INDEX_op_ld16s_i32:
1209     case INDEX_op_ld_i32:
1210     case INDEX_op_st8_i32:
1211     case INDEX_op_st16_i32:
1212     case INDEX_op_st_i32:
1213     case INDEX_op_add_i32:
1214     case INDEX_op_sub_i32:
1215     case INDEX_op_mul_i32:
1216     case INDEX_op_and_i32:
1217     case INDEX_op_or_i32:
1218     case INDEX_op_xor_i32:
1219     case INDEX_op_shl_i32:
1220     case INDEX_op_shr_i32:
1221     case INDEX_op_sar_i32:
1222         return true;
1223 
1224     case INDEX_op_movcond_i32:
1225         return TCG_TARGET_HAS_movcond_i32;
1226     case INDEX_op_div_i32:
1227     case INDEX_op_divu_i32:
1228         return TCG_TARGET_HAS_div_i32;
1229     case INDEX_op_rem_i32:
1230     case INDEX_op_remu_i32:
1231         return TCG_TARGET_HAS_rem_i32;
1232     case INDEX_op_div2_i32:
1233     case INDEX_op_divu2_i32:
1234         return TCG_TARGET_HAS_div2_i32;
1235     case INDEX_op_rotl_i32:
1236     case INDEX_op_rotr_i32:
1237         return TCG_TARGET_HAS_rot_i32;
1238     case INDEX_op_deposit_i32:
1239         return TCG_TARGET_HAS_deposit_i32;
1240     case INDEX_op_extract_i32:
1241         return TCG_TARGET_HAS_extract_i32;
1242     case INDEX_op_sextract_i32:
1243         return TCG_TARGET_HAS_sextract_i32;
1244     case INDEX_op_extract2_i32:
1245         return TCG_TARGET_HAS_extract2_i32;
1246     case INDEX_op_add2_i32:
1247         return TCG_TARGET_HAS_add2_i32;
1248     case INDEX_op_sub2_i32:
1249         return TCG_TARGET_HAS_sub2_i32;
1250     case INDEX_op_mulu2_i32:
1251         return TCG_TARGET_HAS_mulu2_i32;
1252     case INDEX_op_muls2_i32:
1253         return TCG_TARGET_HAS_muls2_i32;
1254     case INDEX_op_muluh_i32:
1255         return TCG_TARGET_HAS_muluh_i32;
1256     case INDEX_op_mulsh_i32:
1257         return TCG_TARGET_HAS_mulsh_i32;
1258     case INDEX_op_ext8s_i32:
1259         return TCG_TARGET_HAS_ext8s_i32;
1260     case INDEX_op_ext16s_i32:
1261         return TCG_TARGET_HAS_ext16s_i32;
1262     case INDEX_op_ext8u_i32:
1263         return TCG_TARGET_HAS_ext8u_i32;
1264     case INDEX_op_ext16u_i32:
1265         return TCG_TARGET_HAS_ext16u_i32;
1266     case INDEX_op_bswap16_i32:
1267         return TCG_TARGET_HAS_bswap16_i32;
1268     case INDEX_op_bswap32_i32:
1269         return TCG_TARGET_HAS_bswap32_i32;
1270     case INDEX_op_not_i32:
1271         return TCG_TARGET_HAS_not_i32;
1272     case INDEX_op_neg_i32:
1273         return TCG_TARGET_HAS_neg_i32;
1274     case INDEX_op_andc_i32:
1275         return TCG_TARGET_HAS_andc_i32;
1276     case INDEX_op_orc_i32:
1277         return TCG_TARGET_HAS_orc_i32;
1278     case INDEX_op_eqv_i32:
1279         return TCG_TARGET_HAS_eqv_i32;
1280     case INDEX_op_nand_i32:
1281         return TCG_TARGET_HAS_nand_i32;
1282     case INDEX_op_nor_i32:
1283         return TCG_TARGET_HAS_nor_i32;
1284     case INDEX_op_clz_i32:
1285         return TCG_TARGET_HAS_clz_i32;
1286     case INDEX_op_ctz_i32:
1287         return TCG_TARGET_HAS_ctz_i32;
1288     case INDEX_op_ctpop_i32:
1289         return TCG_TARGET_HAS_ctpop_i32;
1290 
1291     case INDEX_op_brcond2_i32:
1292     case INDEX_op_setcond2_i32:
1293         return TCG_TARGET_REG_BITS == 32;
1294 
1295     case INDEX_op_mov_i64:
1296     case INDEX_op_setcond_i64:
1297     case INDEX_op_brcond_i64:
1298     case INDEX_op_ld8u_i64:
1299     case INDEX_op_ld8s_i64:
1300     case INDEX_op_ld16u_i64:
1301     case INDEX_op_ld16s_i64:
1302     case INDEX_op_ld32u_i64:
1303     case INDEX_op_ld32s_i64:
1304     case INDEX_op_ld_i64:
1305     case INDEX_op_st8_i64:
1306     case INDEX_op_st16_i64:
1307     case INDEX_op_st32_i64:
1308     case INDEX_op_st_i64:
1309     case INDEX_op_add_i64:
1310     case INDEX_op_sub_i64:
1311     case INDEX_op_mul_i64:
1312     case INDEX_op_and_i64:
1313     case INDEX_op_or_i64:
1314     case INDEX_op_xor_i64:
1315     case INDEX_op_shl_i64:
1316     case INDEX_op_shr_i64:
1317     case INDEX_op_sar_i64:
1318     case INDEX_op_ext_i32_i64:
1319     case INDEX_op_extu_i32_i64:
1320         return TCG_TARGET_REG_BITS == 64;
1321 
1322     case INDEX_op_movcond_i64:
1323         return TCG_TARGET_HAS_movcond_i64;
1324     case INDEX_op_div_i64:
1325     case INDEX_op_divu_i64:
1326         return TCG_TARGET_HAS_div_i64;
1327     case INDEX_op_rem_i64:
1328     case INDEX_op_remu_i64:
1329         return TCG_TARGET_HAS_rem_i64;
1330     case INDEX_op_div2_i64:
1331     case INDEX_op_divu2_i64:
1332         return TCG_TARGET_HAS_div2_i64;
1333     case INDEX_op_rotl_i64:
1334     case INDEX_op_rotr_i64:
1335         return TCG_TARGET_HAS_rot_i64;
1336     case INDEX_op_deposit_i64:
1337         return TCG_TARGET_HAS_deposit_i64;
1338     case INDEX_op_extract_i64:
1339         return TCG_TARGET_HAS_extract_i64;
1340     case INDEX_op_sextract_i64:
1341         return TCG_TARGET_HAS_sextract_i64;
1342     case INDEX_op_extract2_i64:
1343         return TCG_TARGET_HAS_extract2_i64;
1344     case INDEX_op_extrl_i64_i32:
1345         return TCG_TARGET_HAS_extrl_i64_i32;
1346     case INDEX_op_extrh_i64_i32:
1347         return TCG_TARGET_HAS_extrh_i64_i32;
1348     case INDEX_op_ext8s_i64:
1349         return TCG_TARGET_HAS_ext8s_i64;
1350     case INDEX_op_ext16s_i64:
1351         return TCG_TARGET_HAS_ext16s_i64;
1352     case INDEX_op_ext32s_i64:
1353         return TCG_TARGET_HAS_ext32s_i64;
1354     case INDEX_op_ext8u_i64:
1355         return TCG_TARGET_HAS_ext8u_i64;
1356     case INDEX_op_ext16u_i64:
1357         return TCG_TARGET_HAS_ext16u_i64;
1358     case INDEX_op_ext32u_i64:
1359         return TCG_TARGET_HAS_ext32u_i64;
1360     case INDEX_op_bswap16_i64:
1361         return TCG_TARGET_HAS_bswap16_i64;
1362     case INDEX_op_bswap32_i64:
1363         return TCG_TARGET_HAS_bswap32_i64;
1364     case INDEX_op_bswap64_i64:
1365         return TCG_TARGET_HAS_bswap64_i64;
1366     case INDEX_op_not_i64:
1367         return TCG_TARGET_HAS_not_i64;
1368     case INDEX_op_neg_i64:
1369         return TCG_TARGET_HAS_neg_i64;
1370     case INDEX_op_andc_i64:
1371         return TCG_TARGET_HAS_andc_i64;
1372     case INDEX_op_orc_i64:
1373         return TCG_TARGET_HAS_orc_i64;
1374     case INDEX_op_eqv_i64:
1375         return TCG_TARGET_HAS_eqv_i64;
1376     case INDEX_op_nand_i64:
1377         return TCG_TARGET_HAS_nand_i64;
1378     case INDEX_op_nor_i64:
1379         return TCG_TARGET_HAS_nor_i64;
1380     case INDEX_op_clz_i64:
1381         return TCG_TARGET_HAS_clz_i64;
1382     case INDEX_op_ctz_i64:
1383         return TCG_TARGET_HAS_ctz_i64;
1384     case INDEX_op_ctpop_i64:
1385         return TCG_TARGET_HAS_ctpop_i64;
1386     case INDEX_op_add2_i64:
1387         return TCG_TARGET_HAS_add2_i64;
1388     case INDEX_op_sub2_i64:
1389         return TCG_TARGET_HAS_sub2_i64;
1390     case INDEX_op_mulu2_i64:
1391         return TCG_TARGET_HAS_mulu2_i64;
1392     case INDEX_op_muls2_i64:
1393         return TCG_TARGET_HAS_muls2_i64;
1394     case INDEX_op_muluh_i64:
1395         return TCG_TARGET_HAS_muluh_i64;
1396     case INDEX_op_mulsh_i64:
1397         return TCG_TARGET_HAS_mulsh_i64;
1398 
1399     case INDEX_op_mov_vec:
1400     case INDEX_op_dup_vec:
1401     case INDEX_op_dupm_vec:
1402     case INDEX_op_ld_vec:
1403     case INDEX_op_st_vec:
1404     case INDEX_op_add_vec:
1405     case INDEX_op_sub_vec:
1406     case INDEX_op_and_vec:
1407     case INDEX_op_or_vec:
1408     case INDEX_op_xor_vec:
1409     case INDEX_op_cmp_vec:
1410         return have_vec;
1411     case INDEX_op_dup2_vec:
1412         return have_vec && TCG_TARGET_REG_BITS == 32;
1413     case INDEX_op_not_vec:
1414         return have_vec && TCG_TARGET_HAS_not_vec;
1415     case INDEX_op_neg_vec:
1416         return have_vec && TCG_TARGET_HAS_neg_vec;
1417     case INDEX_op_abs_vec:
1418         return have_vec && TCG_TARGET_HAS_abs_vec;
1419     case INDEX_op_andc_vec:
1420         return have_vec && TCG_TARGET_HAS_andc_vec;
1421     case INDEX_op_orc_vec:
1422         return have_vec && TCG_TARGET_HAS_orc_vec;
1423     case INDEX_op_nand_vec:
1424         return have_vec && TCG_TARGET_HAS_nand_vec;
1425     case INDEX_op_nor_vec:
1426         return have_vec && TCG_TARGET_HAS_nor_vec;
1427     case INDEX_op_eqv_vec:
1428         return have_vec && TCG_TARGET_HAS_eqv_vec;
1429     case INDEX_op_mul_vec:
1430         return have_vec && TCG_TARGET_HAS_mul_vec;
1431     case INDEX_op_shli_vec:
1432     case INDEX_op_shri_vec:
1433     case INDEX_op_sari_vec:
1434         return have_vec && TCG_TARGET_HAS_shi_vec;
1435     case INDEX_op_shls_vec:
1436     case INDEX_op_shrs_vec:
1437     case INDEX_op_sars_vec:
1438         return have_vec && TCG_TARGET_HAS_shs_vec;
1439     case INDEX_op_shlv_vec:
1440     case INDEX_op_shrv_vec:
1441     case INDEX_op_sarv_vec:
1442         return have_vec && TCG_TARGET_HAS_shv_vec;
1443     case INDEX_op_rotli_vec:
1444         return have_vec && TCG_TARGET_HAS_roti_vec;
1445     case INDEX_op_rotls_vec:
1446         return have_vec && TCG_TARGET_HAS_rots_vec;
1447     case INDEX_op_rotlv_vec:
1448     case INDEX_op_rotrv_vec:
1449         return have_vec && TCG_TARGET_HAS_rotv_vec;
1450     case INDEX_op_ssadd_vec:
1451     case INDEX_op_usadd_vec:
1452     case INDEX_op_sssub_vec:
1453     case INDEX_op_ussub_vec:
1454         return have_vec && TCG_TARGET_HAS_sat_vec;
1455     case INDEX_op_smin_vec:
1456     case INDEX_op_umin_vec:
1457     case INDEX_op_smax_vec:
1458     case INDEX_op_umax_vec:
1459         return have_vec && TCG_TARGET_HAS_minmax_vec;
1460     case INDEX_op_bitsel_vec:
1461         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1462     case INDEX_op_cmpsel_vec:
1463         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1464 
1465     default:
1466         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1467         return true;
1468     }
1469 }
1470 
1471 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1472    and endian swap. Maybe it would be better to do the alignment
1473    and endian swap in tcg_reg_alloc_call(). */
1474 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1475 {
1476     int i, real_args, nb_rets, pi;
1477     unsigned typemask;
1478     const TCGHelperInfo *info;
1479     TCGOp *op;
1480 
1481     info = g_hash_table_lookup(helper_table, (gpointer)func);
1482     typemask = info->typemask;
1483 
1484 #ifdef CONFIG_PLUGIN
1485     /* detect non-plugin helpers */
1486     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1487         tcg_ctx->plugin_insn->calls_helpers = true;
1488     }
1489 #endif
1490 
1491 #if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1492     for (i = 0; i < nargs; ++i) {
1493         int argtype = extract32(typemask, (i + 1) * 3, 3);
1494         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1495         bool is_signed = argtype & 1;
1496 
1497         if (is_32bit) {
1498             TCGv_i64 temp = tcg_temp_new_i64();
1499             TCGv_i32 orig = temp_tcgv_i32(args[i]);
1500             if (is_signed) {
1501                 tcg_gen_ext_i32_i64(temp, orig);
1502             } else {
1503                 tcg_gen_extu_i32_i64(temp, orig);
1504             }
1505             args[i] = tcgv_i64_temp(temp);
1506         }
1507     }
1508 #endif /* TCG_TARGET_EXTEND_ARGS */
1509 
1510     op = tcg_emit_op(INDEX_op_call);
1511 
1512     pi = 0;
1513     if (ret != NULL) {
1514         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1515 #if HOST_BIG_ENDIAN
1516             op->args[pi++] = temp_arg(ret + 1);
1517             op->args[pi++] = temp_arg(ret);
1518 #else
1519             op->args[pi++] = temp_arg(ret);
1520             op->args[pi++] = temp_arg(ret + 1);
1521 #endif
1522             nb_rets = 2;
1523         } else {
1524             op->args[pi++] = temp_arg(ret);
1525             nb_rets = 1;
1526         }
1527     } else {
1528         nb_rets = 0;
1529     }
1530     TCGOP_CALLO(op) = nb_rets;
1531 
1532     real_args = 0;
1533     for (i = 0; i < nargs; i++) {
1534         int argtype = extract32(typemask, (i + 1) * 3, 3);
1535         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1536         bool want_align = false;
1537 
1538 #if defined(CONFIG_TCG_INTERPRETER)
1539         /*
1540          * Align all arguments, so that they land in predictable places
1541          * for passing off to ffi_call.
1542          */
1543         want_align = true;
1544 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1545         /* Some targets want aligned 64 bit args */
1546         want_align = is_64bit;
1547 #endif
1548 
1549         if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1550             op->args[pi++] = TCG_CALL_DUMMY_ARG;
1551             real_args++;
1552         }
1553 
1554         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1555             /*
1556              * If stack grows up, then we will be placing successive
1557              * arguments at lower addresses, which means we need to
1558              * reverse the order compared to how we would normally
1559              * treat either big or little-endian.  For those arguments
1560              * that will wind up in registers, this still works for
1561              * HPPA (the only current STACK_GROWSUP target) since the
1562              * argument registers are *also* allocated in decreasing
1563              * order.  If another such target is added, this logic may
1564              * have to get more complicated to differentiate between
1565              * stack arguments and register arguments.
1566              */
1567 #if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP)
1568             op->args[pi++] = temp_arg(args[i] + 1);
1569             op->args[pi++] = temp_arg(args[i]);
1570 #else
1571             op->args[pi++] = temp_arg(args[i]);
1572             op->args[pi++] = temp_arg(args[i] + 1);
1573 #endif
1574             real_args += 2;
1575             continue;
1576         }
1577 
1578         op->args[pi++] = temp_arg(args[i]);
1579         real_args++;
1580     }
1581     op->args[pi++] = (uintptr_t)func;
1582     op->args[pi++] = (uintptr_t)info;
1583     TCGOP_CALLI(op) = real_args;
1584 
1585     /* Make sure the fields didn't overflow.  */
1586     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1587     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1588 
1589 #if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1590     for (i = 0; i < nargs; ++i) {
1591         int argtype = extract32(typemask, (i + 1) * 3, 3);
1592         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1593 
1594         if (is_32bit) {
1595             tcg_temp_free_internal(args[i]);
1596         }
1597     }
1598 #endif /* TCG_TARGET_EXTEND_ARGS */
1599 }
1600 
1601 static void tcg_reg_alloc_start(TCGContext *s)
1602 {
1603     int i, n;
1604 
1605     for (i = 0, n = s->nb_temps; i < n; i++) {
1606         TCGTemp *ts = &s->temps[i];
1607         TCGTempVal val = TEMP_VAL_MEM;
1608 
1609         switch (ts->kind) {
1610         case TEMP_CONST:
1611             val = TEMP_VAL_CONST;
1612             break;
1613         case TEMP_FIXED:
1614             val = TEMP_VAL_REG;
1615             break;
1616         case TEMP_GLOBAL:
1617             break;
1618         case TEMP_NORMAL:
1619         case TEMP_EBB:
1620             val = TEMP_VAL_DEAD;
1621             /* fall through */
1622         case TEMP_LOCAL:
1623             ts->mem_allocated = 0;
1624             break;
1625         default:
1626             g_assert_not_reached();
1627         }
1628         ts->val_type = val;
1629     }
1630 
1631     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1632 }
1633 
1634 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1635                                  TCGTemp *ts)
1636 {
1637     int idx = temp_idx(ts);
1638 
1639     switch (ts->kind) {
1640     case TEMP_FIXED:
1641     case TEMP_GLOBAL:
1642         pstrcpy(buf, buf_size, ts->name);
1643         break;
1644     case TEMP_LOCAL:
1645         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1646         break;
1647     case TEMP_EBB:
1648         snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
1649         break;
1650     case TEMP_NORMAL:
1651         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1652         break;
1653     case TEMP_CONST:
1654         switch (ts->type) {
1655         case TCG_TYPE_I32:
1656             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1657             break;
1658 #if TCG_TARGET_REG_BITS > 32
1659         case TCG_TYPE_I64:
1660             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1661             break;
1662 #endif
1663         case TCG_TYPE_V64:
1664         case TCG_TYPE_V128:
1665         case TCG_TYPE_V256:
1666             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1667                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1668             break;
1669         default:
1670             g_assert_not_reached();
1671         }
1672         break;
1673     }
1674     return buf;
1675 }
1676 
1677 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1678                              int buf_size, TCGArg arg)
1679 {
1680     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1681 }
1682 
1683 static const char * const cond_name[] =
1684 {
1685     [TCG_COND_NEVER] = "never",
1686     [TCG_COND_ALWAYS] = "always",
1687     [TCG_COND_EQ] = "eq",
1688     [TCG_COND_NE] = "ne",
1689     [TCG_COND_LT] = "lt",
1690     [TCG_COND_GE] = "ge",
1691     [TCG_COND_LE] = "le",
1692     [TCG_COND_GT] = "gt",
1693     [TCG_COND_LTU] = "ltu",
1694     [TCG_COND_GEU] = "geu",
1695     [TCG_COND_LEU] = "leu",
1696     [TCG_COND_GTU] = "gtu"
1697 };
1698 
1699 static const char * const ldst_name[] =
1700 {
1701     [MO_UB]   = "ub",
1702     [MO_SB]   = "sb",
1703     [MO_LEUW] = "leuw",
1704     [MO_LESW] = "lesw",
1705     [MO_LEUL] = "leul",
1706     [MO_LESL] = "lesl",
1707     [MO_LEUQ] = "leq",
1708     [MO_BEUW] = "beuw",
1709     [MO_BESW] = "besw",
1710     [MO_BEUL] = "beul",
1711     [MO_BESL] = "besl",
1712     [MO_BEUQ] = "beq",
1713 };
1714 
1715 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1716 #ifdef TARGET_ALIGNED_ONLY
1717     [MO_UNALN >> MO_ASHIFT]    = "un+",
1718     [MO_ALIGN >> MO_ASHIFT]    = "",
1719 #else
1720     [MO_UNALN >> MO_ASHIFT]    = "",
1721     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1722 #endif
1723     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1724     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1725     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1726     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1727     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1728     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1729 };
1730 
1731 static const char bswap_flag_name[][6] = {
1732     [TCG_BSWAP_IZ] = "iz",
1733     [TCG_BSWAP_OZ] = "oz",
1734     [TCG_BSWAP_OS] = "os",
1735     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1736     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1737 };
1738 
1739 static inline bool tcg_regset_single(TCGRegSet d)
1740 {
1741     return (d & (d - 1)) == 0;
1742 }
1743 
1744 static inline TCGReg tcg_regset_first(TCGRegSet d)
1745 {
1746     if (TCG_TARGET_NB_REGS <= 32) {
1747         return ctz32(d);
1748     } else {
1749         return ctz64(d);
1750     }
1751 }
1752 
1753 /* Return only the number of characters output -- no error return. */
1754 #define ne_fprintf(...) \
1755     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
1756 
1757 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
1758 {
1759     char buf[128];
1760     TCGOp *op;
1761 
1762     QTAILQ_FOREACH(op, &s->ops, link) {
1763         int i, k, nb_oargs, nb_iargs, nb_cargs;
1764         const TCGOpDef *def;
1765         TCGOpcode c;
1766         int col = 0;
1767 
1768         c = op->opc;
1769         def = &tcg_op_defs[c];
1770 
1771         if (c == INDEX_op_insn_start) {
1772             nb_oargs = 0;
1773             col += ne_fprintf(f, "\n ----");
1774 
1775             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1776                 target_ulong a;
1777 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1778                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1779 #else
1780                 a = op->args[i];
1781 #endif
1782                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
1783             }
1784         } else if (c == INDEX_op_call) {
1785             const TCGHelperInfo *info = tcg_call_info(op);
1786             void *func = tcg_call_func(op);
1787 
1788             /* variable number of arguments */
1789             nb_oargs = TCGOP_CALLO(op);
1790             nb_iargs = TCGOP_CALLI(op);
1791             nb_cargs = def->nb_cargs;
1792 
1793             col += ne_fprintf(f, " %s ", def->name);
1794 
1795             /*
1796              * Print the function name from TCGHelperInfo, if available.
1797              * Note that plugins have a template function for the info,
1798              * but the actual function pointer comes from the plugin.
1799              */
1800             if (func == info->func) {
1801                 col += ne_fprintf(f, "%s", info->name);
1802             } else {
1803                 col += ne_fprintf(f, "plugin(%p)", func);
1804             }
1805 
1806             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
1807             for (i = 0; i < nb_oargs; i++) {
1808                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1809                                                             op->args[i]));
1810             }
1811             for (i = 0; i < nb_iargs; i++) {
1812                 TCGArg arg = op->args[nb_oargs + i];
1813                 const char *t = "<dummy>";
1814                 if (arg != TCG_CALL_DUMMY_ARG) {
1815                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1816                 }
1817                 col += ne_fprintf(f, ",%s", t);
1818             }
1819         } else {
1820             col += ne_fprintf(f, " %s ", def->name);
1821 
1822             nb_oargs = def->nb_oargs;
1823             nb_iargs = def->nb_iargs;
1824             nb_cargs = def->nb_cargs;
1825 
1826             if (def->flags & TCG_OPF_VECTOR) {
1827                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
1828                                   8 << TCGOP_VECE(op));
1829             }
1830 
1831             k = 0;
1832             for (i = 0; i < nb_oargs; i++) {
1833                 const char *sep =  k ? "," : "";
1834                 col += ne_fprintf(f, "%s%s", sep,
1835                                   tcg_get_arg_str(s, buf, sizeof(buf),
1836                                                   op->args[k++]));
1837             }
1838             for (i = 0; i < nb_iargs; i++) {
1839                 const char *sep =  k ? "," : "";
1840                 col += ne_fprintf(f, "%s%s", sep,
1841                                   tcg_get_arg_str(s, buf, sizeof(buf),
1842                                                   op->args[k++]));
1843             }
1844             switch (c) {
1845             case INDEX_op_brcond_i32:
1846             case INDEX_op_setcond_i32:
1847             case INDEX_op_movcond_i32:
1848             case INDEX_op_brcond2_i32:
1849             case INDEX_op_setcond2_i32:
1850             case INDEX_op_brcond_i64:
1851             case INDEX_op_setcond_i64:
1852             case INDEX_op_movcond_i64:
1853             case INDEX_op_cmp_vec:
1854             case INDEX_op_cmpsel_vec:
1855                 if (op->args[k] < ARRAY_SIZE(cond_name)
1856                     && cond_name[op->args[k]]) {
1857                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
1858                 } else {
1859                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
1860                 }
1861                 i = 1;
1862                 break;
1863             case INDEX_op_qemu_ld_i32:
1864             case INDEX_op_qemu_st_i32:
1865             case INDEX_op_qemu_st8_i32:
1866             case INDEX_op_qemu_ld_i64:
1867             case INDEX_op_qemu_st_i64:
1868                 {
1869                     MemOpIdx oi = op->args[k++];
1870                     MemOp op = get_memop(oi);
1871                     unsigned ix = get_mmuidx(oi);
1872 
1873                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1874                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
1875                     } else {
1876                         const char *s_al, *s_op;
1877                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1878                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1879                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
1880                     }
1881                     i = 1;
1882                 }
1883                 break;
1884             case INDEX_op_bswap16_i32:
1885             case INDEX_op_bswap16_i64:
1886             case INDEX_op_bswap32_i32:
1887             case INDEX_op_bswap32_i64:
1888             case INDEX_op_bswap64_i64:
1889                 {
1890                     TCGArg flags = op->args[k];
1891                     const char *name = NULL;
1892 
1893                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
1894                         name = bswap_flag_name[flags];
1895                     }
1896                     if (name) {
1897                         col += ne_fprintf(f, ",%s", name);
1898                     } else {
1899                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
1900                     }
1901                     i = k = 1;
1902                 }
1903                 break;
1904             default:
1905                 i = 0;
1906                 break;
1907             }
1908             switch (c) {
1909             case INDEX_op_set_label:
1910             case INDEX_op_br:
1911             case INDEX_op_brcond_i32:
1912             case INDEX_op_brcond_i64:
1913             case INDEX_op_brcond2_i32:
1914                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
1915                                   arg_label(op->args[k])->id);
1916                 i++, k++;
1917                 break;
1918             default:
1919                 break;
1920             }
1921             for (; i < nb_cargs; i++, k++) {
1922                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
1923                                   op->args[k]);
1924             }
1925         }
1926 
1927         if (have_prefs || op->life) {
1928             for (; col < 40; ++col) {
1929                 putc(' ', f);
1930             }
1931         }
1932 
1933         if (op->life) {
1934             unsigned life = op->life;
1935 
1936             if (life & (SYNC_ARG * 3)) {
1937                 ne_fprintf(f, "  sync:");
1938                 for (i = 0; i < 2; ++i) {
1939                     if (life & (SYNC_ARG << i)) {
1940                         ne_fprintf(f, " %d", i);
1941                     }
1942                 }
1943             }
1944             life /= DEAD_ARG;
1945             if (life) {
1946                 ne_fprintf(f, "  dead:");
1947                 for (i = 0; life; ++i, life >>= 1) {
1948                     if (life & 1) {
1949                         ne_fprintf(f, " %d", i);
1950                     }
1951                 }
1952             }
1953         }
1954 
1955         if (have_prefs) {
1956             for (i = 0; i < nb_oargs; ++i) {
1957                 TCGRegSet set = op->output_pref[i];
1958 
1959                 if (i == 0) {
1960                     ne_fprintf(f, "  pref=");
1961                 } else {
1962                     ne_fprintf(f, ",");
1963                 }
1964                 if (set == 0) {
1965                     ne_fprintf(f, "none");
1966                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
1967                     ne_fprintf(f, "all");
1968 #ifdef CONFIG_DEBUG_TCG
1969                 } else if (tcg_regset_single(set)) {
1970                     TCGReg reg = tcg_regset_first(set);
1971                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
1972 #endif
1973                 } else if (TCG_TARGET_NB_REGS <= 32) {
1974                     ne_fprintf(f, "0x%x", (uint32_t)set);
1975                 } else {
1976                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
1977                 }
1978             }
1979         }
1980 
1981         putc('\n', f);
1982     }
1983 }
1984 
1985 /* we give more priority to constraints with less registers */
1986 static int get_constraint_priority(const TCGOpDef *def, int k)
1987 {
1988     const TCGArgConstraint *arg_ct = &def->args_ct[k];
1989     int n;
1990 
1991     if (arg_ct->oalias) {
1992         /* an alias is equivalent to a single register */
1993         n = 1;
1994     } else {
1995         n = ctpop64(arg_ct->regs);
1996     }
1997     return TCG_TARGET_NB_REGS - n + 1;
1998 }
1999 
2000 /* sort from highest priority to lowest */
2001 static void sort_constraints(TCGOpDef *def, int start, int n)
2002 {
2003     int i, j;
2004     TCGArgConstraint *a = def->args_ct;
2005 
2006     for (i = 0; i < n; i++) {
2007         a[start + i].sort_index = start + i;
2008     }
2009     if (n <= 1) {
2010         return;
2011     }
2012     for (i = 0; i < n - 1; i++) {
2013         for (j = i + 1; j < n; j++) {
2014             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2015             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2016             if (p1 < p2) {
2017                 int tmp = a[start + i].sort_index;
2018                 a[start + i].sort_index = a[start + j].sort_index;
2019                 a[start + j].sort_index = tmp;
2020             }
2021         }
2022     }
2023 }
2024 
2025 static void process_op_defs(TCGContext *s)
2026 {
2027     TCGOpcode op;
2028 
2029     for (op = 0; op < NB_OPS; op++) {
2030         TCGOpDef *def = &tcg_op_defs[op];
2031         const TCGTargetOpDef *tdefs;
2032         int i, nb_args;
2033 
2034         if (def->flags & TCG_OPF_NOT_PRESENT) {
2035             continue;
2036         }
2037 
2038         nb_args = def->nb_iargs + def->nb_oargs;
2039         if (nb_args == 0) {
2040             continue;
2041         }
2042 
2043         /*
2044          * Macro magic should make it impossible, but double-check that
2045          * the array index is in range.  Since the signness of an enum
2046          * is implementation defined, force the result to unsigned.
2047          */
2048         unsigned con_set = tcg_target_op_def(op);
2049         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2050         tdefs = &constraint_sets[con_set];
2051 
2052         for (i = 0; i < nb_args; i++) {
2053             const char *ct_str = tdefs->args_ct_str[i];
2054             /* Incomplete TCGTargetOpDef entry. */
2055             tcg_debug_assert(ct_str != NULL);
2056 
2057             while (*ct_str != '\0') {
2058                 switch(*ct_str) {
2059                 case '0' ... '9':
2060                     {
2061                         int oarg = *ct_str - '0';
2062                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2063                         tcg_debug_assert(oarg < def->nb_oargs);
2064                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2065                         def->args_ct[i] = def->args_ct[oarg];
2066                         /* The output sets oalias.  */
2067                         def->args_ct[oarg].oalias = true;
2068                         def->args_ct[oarg].alias_index = i;
2069                         /* The input sets ialias. */
2070                         def->args_ct[i].ialias = true;
2071                         def->args_ct[i].alias_index = oarg;
2072                     }
2073                     ct_str++;
2074                     break;
2075                 case '&':
2076                     def->args_ct[i].newreg = true;
2077                     ct_str++;
2078                     break;
2079                 case 'i':
2080                     def->args_ct[i].ct |= TCG_CT_CONST;
2081                     ct_str++;
2082                     break;
2083 
2084                 /* Include all of the target-specific constraints. */
2085 
2086 #undef CONST
2087 #define CONST(CASE, MASK) \
2088     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2089 #define REGS(CASE, MASK) \
2090     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2091 
2092 #include "tcg-target-con-str.h"
2093 
2094 #undef REGS
2095 #undef CONST
2096                 default:
2097                     /* Typo in TCGTargetOpDef constraint. */
2098                     g_assert_not_reached();
2099                 }
2100             }
2101         }
2102 
2103         /* TCGTargetOpDef entry with too much information? */
2104         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2105 
2106         /* sort the constraints (XXX: this is just an heuristic) */
2107         sort_constraints(def, 0, def->nb_oargs);
2108         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2109     }
2110 }
2111 
2112 void tcg_op_remove(TCGContext *s, TCGOp *op)
2113 {
2114     TCGLabel *label;
2115 
2116     switch (op->opc) {
2117     case INDEX_op_br:
2118         label = arg_label(op->args[0]);
2119         label->refs--;
2120         break;
2121     case INDEX_op_brcond_i32:
2122     case INDEX_op_brcond_i64:
2123         label = arg_label(op->args[3]);
2124         label->refs--;
2125         break;
2126     case INDEX_op_brcond2_i32:
2127         label = arg_label(op->args[5]);
2128         label->refs--;
2129         break;
2130     default:
2131         break;
2132     }
2133 
2134     QTAILQ_REMOVE(&s->ops, op, link);
2135     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2136     s->nb_ops--;
2137 
2138 #ifdef CONFIG_PROFILER
2139     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2140 #endif
2141 }
2142 
2143 void tcg_remove_ops_after(TCGOp *op)
2144 {
2145     TCGContext *s = tcg_ctx;
2146 
2147     while (true) {
2148         TCGOp *last = tcg_last_op();
2149         if (last == op) {
2150             return;
2151         }
2152         tcg_op_remove(s, last);
2153     }
2154 }
2155 
2156 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2157 {
2158     TCGContext *s = tcg_ctx;
2159     TCGOp *op;
2160 
2161     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2162         op = tcg_malloc(sizeof(TCGOp));
2163     } else {
2164         op = QTAILQ_FIRST(&s->free_ops);
2165         QTAILQ_REMOVE(&s->free_ops, op, link);
2166     }
2167     memset(op, 0, offsetof(TCGOp, link));
2168     op->opc = opc;
2169     s->nb_ops++;
2170 
2171     return op;
2172 }
2173 
2174 TCGOp *tcg_emit_op(TCGOpcode opc)
2175 {
2176     TCGOp *op = tcg_op_alloc(opc);
2177     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2178     return op;
2179 }
2180 
2181 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2182 {
2183     TCGOp *new_op = tcg_op_alloc(opc);
2184     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2185     return new_op;
2186 }
2187 
2188 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2189 {
2190     TCGOp *new_op = tcg_op_alloc(opc);
2191     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2192     return new_op;
2193 }
2194 
2195 /* Reachable analysis : remove unreachable code.  */
2196 static void reachable_code_pass(TCGContext *s)
2197 {
2198     TCGOp *op, *op_next;
2199     bool dead = false;
2200 
2201     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2202         bool remove = dead;
2203         TCGLabel *label;
2204 
2205         switch (op->opc) {
2206         case INDEX_op_set_label:
2207             label = arg_label(op->args[0]);
2208             if (label->refs == 0) {
2209                 /*
2210                  * While there is an occasional backward branch, virtually
2211                  * all branches generated by the translators are forward.
2212                  * Which means that generally we will have already removed
2213                  * all references to the label that will be, and there is
2214                  * little to be gained by iterating.
2215                  */
2216                 remove = true;
2217             } else {
2218                 /* Once we see a label, insns become live again.  */
2219                 dead = false;
2220                 remove = false;
2221 
2222                 /*
2223                  * Optimization can fold conditional branches to unconditional.
2224                  * If we find a label with one reference which is preceded by
2225                  * an unconditional branch to it, remove both.  This needed to
2226                  * wait until the dead code in between them was removed.
2227                  */
2228                 if (label->refs == 1) {
2229                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2230                     if (op_prev->opc == INDEX_op_br &&
2231                         label == arg_label(op_prev->args[0])) {
2232                         tcg_op_remove(s, op_prev);
2233                         remove = true;
2234                     }
2235                 }
2236             }
2237             break;
2238 
2239         case INDEX_op_br:
2240         case INDEX_op_exit_tb:
2241         case INDEX_op_goto_ptr:
2242             /* Unconditional branches; everything following is dead.  */
2243             dead = true;
2244             break;
2245 
2246         case INDEX_op_call:
2247             /* Notice noreturn helper calls, raising exceptions.  */
2248             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2249                 dead = true;
2250             }
2251             break;
2252 
2253         case INDEX_op_insn_start:
2254             /* Never remove -- we need to keep these for unwind.  */
2255             remove = false;
2256             break;
2257 
2258         default:
2259             break;
2260         }
2261 
2262         if (remove) {
2263             tcg_op_remove(s, op);
2264         }
2265     }
2266 }
2267 
2268 #define TS_DEAD  1
2269 #define TS_MEM   2
2270 
2271 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2272 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2273 
2274 /* For liveness_pass_1, the register preferences for a given temp.  */
2275 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2276 {
2277     return ts->state_ptr;
2278 }
2279 
2280 /* For liveness_pass_1, reset the preferences for a given temp to the
2281  * maximal regset for its type.
2282  */
2283 static inline void la_reset_pref(TCGTemp *ts)
2284 {
2285     *la_temp_pref(ts)
2286         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2287 }
2288 
2289 /* liveness analysis: end of function: all temps are dead, and globals
2290    should be in memory. */
2291 static void la_func_end(TCGContext *s, int ng, int nt)
2292 {
2293     int i;
2294 
2295     for (i = 0; i < ng; ++i) {
2296         s->temps[i].state = TS_DEAD | TS_MEM;
2297         la_reset_pref(&s->temps[i]);
2298     }
2299     for (i = ng; i < nt; ++i) {
2300         s->temps[i].state = TS_DEAD;
2301         la_reset_pref(&s->temps[i]);
2302     }
2303 }
2304 
2305 /* liveness analysis: end of basic block: all temps are dead, globals
2306    and local temps should be in memory. */
2307 static void la_bb_end(TCGContext *s, int ng, int nt)
2308 {
2309     int i;
2310 
2311     for (i = 0; i < nt; ++i) {
2312         TCGTemp *ts = &s->temps[i];
2313         int state;
2314 
2315         switch (ts->kind) {
2316         case TEMP_FIXED:
2317         case TEMP_GLOBAL:
2318         case TEMP_LOCAL:
2319             state = TS_DEAD | TS_MEM;
2320             break;
2321         case TEMP_NORMAL:
2322         case TEMP_EBB:
2323         case TEMP_CONST:
2324             state = TS_DEAD;
2325             break;
2326         default:
2327             g_assert_not_reached();
2328         }
2329         ts->state = state;
2330         la_reset_pref(ts);
2331     }
2332 }
2333 
2334 /* liveness analysis: sync globals back to memory.  */
2335 static void la_global_sync(TCGContext *s, int ng)
2336 {
2337     int i;
2338 
2339     for (i = 0; i < ng; ++i) {
2340         int state = s->temps[i].state;
2341         s->temps[i].state = state | TS_MEM;
2342         if (state == TS_DEAD) {
2343             /* If the global was previously dead, reset prefs.  */
2344             la_reset_pref(&s->temps[i]);
2345         }
2346     }
2347 }
2348 
2349 /*
2350  * liveness analysis: conditional branch: all temps are dead unless
2351  * explicitly live-across-conditional-branch, globals and local temps
2352  * should be synced.
2353  */
2354 static void la_bb_sync(TCGContext *s, int ng, int nt)
2355 {
2356     la_global_sync(s, ng);
2357 
2358     for (int i = ng; i < nt; ++i) {
2359         TCGTemp *ts = &s->temps[i];
2360         int state;
2361 
2362         switch (ts->kind) {
2363         case TEMP_LOCAL:
2364             state = ts->state;
2365             ts->state = state | TS_MEM;
2366             if (state != TS_DEAD) {
2367                 continue;
2368             }
2369             break;
2370         case TEMP_NORMAL:
2371             s->temps[i].state = TS_DEAD;
2372             break;
2373         case TEMP_EBB:
2374         case TEMP_CONST:
2375             continue;
2376         default:
2377             g_assert_not_reached();
2378         }
2379         la_reset_pref(&s->temps[i]);
2380     }
2381 }
2382 
2383 /* liveness analysis: sync globals back to memory and kill.  */
2384 static void la_global_kill(TCGContext *s, int ng)
2385 {
2386     int i;
2387 
2388     for (i = 0; i < ng; i++) {
2389         s->temps[i].state = TS_DEAD | TS_MEM;
2390         la_reset_pref(&s->temps[i]);
2391     }
2392 }
2393 
2394 /* liveness analysis: note live globals crossing calls.  */
2395 static void la_cross_call(TCGContext *s, int nt)
2396 {
2397     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2398     int i;
2399 
2400     for (i = 0; i < nt; i++) {
2401         TCGTemp *ts = &s->temps[i];
2402         if (!(ts->state & TS_DEAD)) {
2403             TCGRegSet *pset = la_temp_pref(ts);
2404             TCGRegSet set = *pset;
2405 
2406             set &= mask;
2407             /* If the combination is not possible, restart.  */
2408             if (set == 0) {
2409                 set = tcg_target_available_regs[ts->type] & mask;
2410             }
2411             *pset = set;
2412         }
2413     }
2414 }
2415 
2416 /* Liveness analysis : update the opc_arg_life array to tell if a
2417    given input arguments is dead. Instructions updating dead
2418    temporaries are removed. */
2419 static void liveness_pass_1(TCGContext *s)
2420 {
2421     int nb_globals = s->nb_globals;
2422     int nb_temps = s->nb_temps;
2423     TCGOp *op, *op_prev;
2424     TCGRegSet *prefs;
2425     int i;
2426 
2427     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2428     for (i = 0; i < nb_temps; ++i) {
2429         s->temps[i].state_ptr = prefs + i;
2430     }
2431 
2432     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2433     la_func_end(s, nb_globals, nb_temps);
2434 
2435     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2436         int nb_iargs, nb_oargs;
2437         TCGOpcode opc_new, opc_new2;
2438         bool have_opc_new2;
2439         TCGLifeData arg_life = 0;
2440         TCGTemp *ts;
2441         TCGOpcode opc = op->opc;
2442         const TCGOpDef *def = &tcg_op_defs[opc];
2443 
2444         switch (opc) {
2445         case INDEX_op_call:
2446             {
2447                 int call_flags;
2448                 int nb_call_regs;
2449 
2450                 nb_oargs = TCGOP_CALLO(op);
2451                 nb_iargs = TCGOP_CALLI(op);
2452                 call_flags = tcg_call_flags(op);
2453 
2454                 /* pure functions can be removed if their result is unused */
2455                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2456                     for (i = 0; i < nb_oargs; i++) {
2457                         ts = arg_temp(op->args[i]);
2458                         if (ts->state != TS_DEAD) {
2459                             goto do_not_remove_call;
2460                         }
2461                     }
2462                     goto do_remove;
2463                 }
2464             do_not_remove_call:
2465 
2466                 /* Output args are dead.  */
2467                 for (i = 0; i < nb_oargs; i++) {
2468                     ts = arg_temp(op->args[i]);
2469                     if (ts->state & TS_DEAD) {
2470                         arg_life |= DEAD_ARG << i;
2471                     }
2472                     if (ts->state & TS_MEM) {
2473                         arg_life |= SYNC_ARG << i;
2474                     }
2475                     ts->state = TS_DEAD;
2476                     la_reset_pref(ts);
2477 
2478                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2479                     op->output_pref[i] = 0;
2480                 }
2481 
2482                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2483                                     TCG_CALL_NO_READ_GLOBALS))) {
2484                     la_global_kill(s, nb_globals);
2485                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2486                     la_global_sync(s, nb_globals);
2487                 }
2488 
2489                 /* Record arguments that die in this helper.  */
2490                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2491                     ts = arg_temp(op->args[i]);
2492                     if (ts && ts->state & TS_DEAD) {
2493                         arg_life |= DEAD_ARG << i;
2494                     }
2495                 }
2496 
2497                 /* For all live registers, remove call-clobbered prefs.  */
2498                 la_cross_call(s, nb_temps);
2499 
2500                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2501 
2502                 /* Input arguments are live for preceding opcodes.  */
2503                 for (i = 0; i < nb_iargs; i++) {
2504                     ts = arg_temp(op->args[i + nb_oargs]);
2505                     if (ts && ts->state & TS_DEAD) {
2506                         /* For those arguments that die, and will be allocated
2507                          * in registers, clear the register set for that arg,
2508                          * to be filled in below.  For args that will be on
2509                          * the stack, reset to any available reg.
2510                          */
2511                         *la_temp_pref(ts)
2512                             = (i < nb_call_regs ? 0 :
2513                                tcg_target_available_regs[ts->type]);
2514                         ts->state &= ~TS_DEAD;
2515                     }
2516                 }
2517 
2518                 /* For each input argument, add its input register to prefs.
2519                    If a temp is used once, this produces a single set bit.  */
2520                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2521                     ts = arg_temp(op->args[i + nb_oargs]);
2522                     if (ts) {
2523                         tcg_regset_set_reg(*la_temp_pref(ts),
2524                                            tcg_target_call_iarg_regs[i]);
2525                     }
2526                 }
2527             }
2528             break;
2529         case INDEX_op_insn_start:
2530             break;
2531         case INDEX_op_discard:
2532             /* mark the temporary as dead */
2533             ts = arg_temp(op->args[0]);
2534             ts->state = TS_DEAD;
2535             la_reset_pref(ts);
2536             break;
2537 
2538         case INDEX_op_add2_i32:
2539             opc_new = INDEX_op_add_i32;
2540             goto do_addsub2;
2541         case INDEX_op_sub2_i32:
2542             opc_new = INDEX_op_sub_i32;
2543             goto do_addsub2;
2544         case INDEX_op_add2_i64:
2545             opc_new = INDEX_op_add_i64;
2546             goto do_addsub2;
2547         case INDEX_op_sub2_i64:
2548             opc_new = INDEX_op_sub_i64;
2549         do_addsub2:
2550             nb_iargs = 4;
2551             nb_oargs = 2;
2552             /* Test if the high part of the operation is dead, but not
2553                the low part.  The result can be optimized to a simple
2554                add or sub.  This happens often for x86_64 guest when the
2555                cpu mode is set to 32 bit.  */
2556             if (arg_temp(op->args[1])->state == TS_DEAD) {
2557                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2558                     goto do_remove;
2559                 }
2560                 /* Replace the opcode and adjust the args in place,
2561                    leaving 3 unused args at the end.  */
2562                 op->opc = opc = opc_new;
2563                 op->args[1] = op->args[2];
2564                 op->args[2] = op->args[4];
2565                 /* Fall through and mark the single-word operation live.  */
2566                 nb_iargs = 2;
2567                 nb_oargs = 1;
2568             }
2569             goto do_not_remove;
2570 
2571         case INDEX_op_mulu2_i32:
2572             opc_new = INDEX_op_mul_i32;
2573             opc_new2 = INDEX_op_muluh_i32;
2574             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2575             goto do_mul2;
2576         case INDEX_op_muls2_i32:
2577             opc_new = INDEX_op_mul_i32;
2578             opc_new2 = INDEX_op_mulsh_i32;
2579             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2580             goto do_mul2;
2581         case INDEX_op_mulu2_i64:
2582             opc_new = INDEX_op_mul_i64;
2583             opc_new2 = INDEX_op_muluh_i64;
2584             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2585             goto do_mul2;
2586         case INDEX_op_muls2_i64:
2587             opc_new = INDEX_op_mul_i64;
2588             opc_new2 = INDEX_op_mulsh_i64;
2589             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2590             goto do_mul2;
2591         do_mul2:
2592             nb_iargs = 2;
2593             nb_oargs = 2;
2594             if (arg_temp(op->args[1])->state == TS_DEAD) {
2595                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2596                     /* Both parts of the operation are dead.  */
2597                     goto do_remove;
2598                 }
2599                 /* The high part of the operation is dead; generate the low. */
2600                 op->opc = opc = opc_new;
2601                 op->args[1] = op->args[2];
2602                 op->args[2] = op->args[3];
2603             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2604                 /* The low part of the operation is dead; generate the high. */
2605                 op->opc = opc = opc_new2;
2606                 op->args[0] = op->args[1];
2607                 op->args[1] = op->args[2];
2608                 op->args[2] = op->args[3];
2609             } else {
2610                 goto do_not_remove;
2611             }
2612             /* Mark the single-word operation live.  */
2613             nb_oargs = 1;
2614             goto do_not_remove;
2615 
2616         default:
2617             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2618             nb_iargs = def->nb_iargs;
2619             nb_oargs = def->nb_oargs;
2620 
2621             /* Test if the operation can be removed because all
2622                its outputs are dead. We assume that nb_oargs == 0
2623                implies side effects */
2624             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2625                 for (i = 0; i < nb_oargs; i++) {
2626                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2627                         goto do_not_remove;
2628                     }
2629                 }
2630                 goto do_remove;
2631             }
2632             goto do_not_remove;
2633 
2634         do_remove:
2635             tcg_op_remove(s, op);
2636             break;
2637 
2638         do_not_remove:
2639             for (i = 0; i < nb_oargs; i++) {
2640                 ts = arg_temp(op->args[i]);
2641 
2642                 /* Remember the preference of the uses that followed.  */
2643                 op->output_pref[i] = *la_temp_pref(ts);
2644 
2645                 /* Output args are dead.  */
2646                 if (ts->state & TS_DEAD) {
2647                     arg_life |= DEAD_ARG << i;
2648                 }
2649                 if (ts->state & TS_MEM) {
2650                     arg_life |= SYNC_ARG << i;
2651                 }
2652                 ts->state = TS_DEAD;
2653                 la_reset_pref(ts);
2654             }
2655 
2656             /* If end of basic block, update.  */
2657             if (def->flags & TCG_OPF_BB_EXIT) {
2658                 la_func_end(s, nb_globals, nb_temps);
2659             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2660                 la_bb_sync(s, nb_globals, nb_temps);
2661             } else if (def->flags & TCG_OPF_BB_END) {
2662                 la_bb_end(s, nb_globals, nb_temps);
2663             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2664                 la_global_sync(s, nb_globals);
2665                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2666                     la_cross_call(s, nb_temps);
2667                 }
2668             }
2669 
2670             /* Record arguments that die in this opcode.  */
2671             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2672                 ts = arg_temp(op->args[i]);
2673                 if (ts->state & TS_DEAD) {
2674                     arg_life |= DEAD_ARG << i;
2675                 }
2676             }
2677 
2678             /* Input arguments are live for preceding opcodes.  */
2679             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2680                 ts = arg_temp(op->args[i]);
2681                 if (ts->state & TS_DEAD) {
2682                     /* For operands that were dead, initially allow
2683                        all regs for the type.  */
2684                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2685                     ts->state &= ~TS_DEAD;
2686                 }
2687             }
2688 
2689             /* Incorporate constraints for this operand.  */
2690             switch (opc) {
2691             case INDEX_op_mov_i32:
2692             case INDEX_op_mov_i64:
2693                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2694                    have proper constraints.  That said, special case
2695                    moves to propagate preferences backward.  */
2696                 if (IS_DEAD_ARG(1)) {
2697                     *la_temp_pref(arg_temp(op->args[0]))
2698                         = *la_temp_pref(arg_temp(op->args[1]));
2699                 }
2700                 break;
2701 
2702             default:
2703                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2704                     const TCGArgConstraint *ct = &def->args_ct[i];
2705                     TCGRegSet set, *pset;
2706 
2707                     ts = arg_temp(op->args[i]);
2708                     pset = la_temp_pref(ts);
2709                     set = *pset;
2710 
2711                     set &= ct->regs;
2712                     if (ct->ialias) {
2713                         set &= op->output_pref[ct->alias_index];
2714                     }
2715                     /* If the combination is not possible, restart.  */
2716                     if (set == 0) {
2717                         set = ct->regs;
2718                     }
2719                     *pset = set;
2720                 }
2721                 break;
2722             }
2723             break;
2724         }
2725         op->life = arg_life;
2726     }
2727 }
2728 
2729 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2730 static bool liveness_pass_2(TCGContext *s)
2731 {
2732     int nb_globals = s->nb_globals;
2733     int nb_temps, i;
2734     bool changes = false;
2735     TCGOp *op, *op_next;
2736 
2737     /* Create a temporary for each indirect global.  */
2738     for (i = 0; i < nb_globals; ++i) {
2739         TCGTemp *its = &s->temps[i];
2740         if (its->indirect_reg) {
2741             TCGTemp *dts = tcg_temp_alloc(s);
2742             dts->type = its->type;
2743             dts->base_type = its->base_type;
2744             dts->kind = TEMP_EBB;
2745             its->state_ptr = dts;
2746         } else {
2747             its->state_ptr = NULL;
2748         }
2749         /* All globals begin dead.  */
2750         its->state = TS_DEAD;
2751     }
2752     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2753         TCGTemp *its = &s->temps[i];
2754         its->state_ptr = NULL;
2755         its->state = TS_DEAD;
2756     }
2757 
2758     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2759         TCGOpcode opc = op->opc;
2760         const TCGOpDef *def = &tcg_op_defs[opc];
2761         TCGLifeData arg_life = op->life;
2762         int nb_iargs, nb_oargs, call_flags;
2763         TCGTemp *arg_ts, *dir_ts;
2764 
2765         if (opc == INDEX_op_call) {
2766             nb_oargs = TCGOP_CALLO(op);
2767             nb_iargs = TCGOP_CALLI(op);
2768             call_flags = tcg_call_flags(op);
2769         } else {
2770             nb_iargs = def->nb_iargs;
2771             nb_oargs = def->nb_oargs;
2772 
2773             /* Set flags similar to how calls require.  */
2774             if (def->flags & TCG_OPF_COND_BRANCH) {
2775                 /* Like reading globals: sync_globals */
2776                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2777             } else if (def->flags & TCG_OPF_BB_END) {
2778                 /* Like writing globals: save_globals */
2779                 call_flags = 0;
2780             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2781                 /* Like reading globals: sync_globals */
2782                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2783             } else {
2784                 /* No effect on globals.  */
2785                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2786                               TCG_CALL_NO_WRITE_GLOBALS);
2787             }
2788         }
2789 
2790         /* Make sure that input arguments are available.  */
2791         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2792             arg_ts = arg_temp(op->args[i]);
2793             if (arg_ts) {
2794                 dir_ts = arg_ts->state_ptr;
2795                 if (dir_ts && arg_ts->state == TS_DEAD) {
2796                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2797                                       ? INDEX_op_ld_i32
2798                                       : INDEX_op_ld_i64);
2799                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2800 
2801                     lop->args[0] = temp_arg(dir_ts);
2802                     lop->args[1] = temp_arg(arg_ts->mem_base);
2803                     lop->args[2] = arg_ts->mem_offset;
2804 
2805                     /* Loaded, but synced with memory.  */
2806                     arg_ts->state = TS_MEM;
2807                 }
2808             }
2809         }
2810 
2811         /* Perform input replacement, and mark inputs that became dead.
2812            No action is required except keeping temp_state up to date
2813            so that we reload when needed.  */
2814         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2815             arg_ts = arg_temp(op->args[i]);
2816             if (arg_ts) {
2817                 dir_ts = arg_ts->state_ptr;
2818                 if (dir_ts) {
2819                     op->args[i] = temp_arg(dir_ts);
2820                     changes = true;
2821                     if (IS_DEAD_ARG(i)) {
2822                         arg_ts->state = TS_DEAD;
2823                     }
2824                 }
2825             }
2826         }
2827 
2828         /* Liveness analysis should ensure that the following are
2829            all correct, for call sites and basic block end points.  */
2830         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2831             /* Nothing to do */
2832         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2833             for (i = 0; i < nb_globals; ++i) {
2834                 /* Liveness should see that globals are synced back,
2835                    that is, either TS_DEAD or TS_MEM.  */
2836                 arg_ts = &s->temps[i];
2837                 tcg_debug_assert(arg_ts->state_ptr == 0
2838                                  || arg_ts->state != 0);
2839             }
2840         } else {
2841             for (i = 0; i < nb_globals; ++i) {
2842                 /* Liveness should see that globals are saved back,
2843                    that is, TS_DEAD, waiting to be reloaded.  */
2844                 arg_ts = &s->temps[i];
2845                 tcg_debug_assert(arg_ts->state_ptr == 0
2846                                  || arg_ts->state == TS_DEAD);
2847             }
2848         }
2849 
2850         /* Outputs become available.  */
2851         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2852             arg_ts = arg_temp(op->args[0]);
2853             dir_ts = arg_ts->state_ptr;
2854             if (dir_ts) {
2855                 op->args[0] = temp_arg(dir_ts);
2856                 changes = true;
2857 
2858                 /* The output is now live and modified.  */
2859                 arg_ts->state = 0;
2860 
2861                 if (NEED_SYNC_ARG(0)) {
2862                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2863                                       ? INDEX_op_st_i32
2864                                       : INDEX_op_st_i64);
2865                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2866                     TCGTemp *out_ts = dir_ts;
2867 
2868                     if (IS_DEAD_ARG(0)) {
2869                         out_ts = arg_temp(op->args[1]);
2870                         arg_ts->state = TS_DEAD;
2871                         tcg_op_remove(s, op);
2872                     } else {
2873                         arg_ts->state = TS_MEM;
2874                     }
2875 
2876                     sop->args[0] = temp_arg(out_ts);
2877                     sop->args[1] = temp_arg(arg_ts->mem_base);
2878                     sop->args[2] = arg_ts->mem_offset;
2879                 } else {
2880                     tcg_debug_assert(!IS_DEAD_ARG(0));
2881                 }
2882             }
2883         } else {
2884             for (i = 0; i < nb_oargs; i++) {
2885                 arg_ts = arg_temp(op->args[i]);
2886                 dir_ts = arg_ts->state_ptr;
2887                 if (!dir_ts) {
2888                     continue;
2889                 }
2890                 op->args[i] = temp_arg(dir_ts);
2891                 changes = true;
2892 
2893                 /* The output is now live and modified.  */
2894                 arg_ts->state = 0;
2895 
2896                 /* Sync outputs upon their last write.  */
2897                 if (NEED_SYNC_ARG(i)) {
2898                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2899                                       ? INDEX_op_st_i32
2900                                       : INDEX_op_st_i64);
2901                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2902 
2903                     sop->args[0] = temp_arg(dir_ts);
2904                     sop->args[1] = temp_arg(arg_ts->mem_base);
2905                     sop->args[2] = arg_ts->mem_offset;
2906 
2907                     arg_ts->state = TS_MEM;
2908                 }
2909                 /* Drop outputs that are dead.  */
2910                 if (IS_DEAD_ARG(i)) {
2911                     arg_ts->state = TS_DEAD;
2912                 }
2913             }
2914         }
2915     }
2916 
2917     return changes;
2918 }
2919 
2920 #ifdef CONFIG_DEBUG_TCG
2921 static void dump_regs(TCGContext *s)
2922 {
2923     TCGTemp *ts;
2924     int i;
2925     char buf[64];
2926 
2927     for(i = 0; i < s->nb_temps; i++) {
2928         ts = &s->temps[i];
2929         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2930         switch(ts->val_type) {
2931         case TEMP_VAL_REG:
2932             printf("%s", tcg_target_reg_names[ts->reg]);
2933             break;
2934         case TEMP_VAL_MEM:
2935             printf("%d(%s)", (int)ts->mem_offset,
2936                    tcg_target_reg_names[ts->mem_base->reg]);
2937             break;
2938         case TEMP_VAL_CONST:
2939             printf("$0x%" PRIx64, ts->val);
2940             break;
2941         case TEMP_VAL_DEAD:
2942             printf("D");
2943             break;
2944         default:
2945             printf("???");
2946             break;
2947         }
2948         printf("\n");
2949     }
2950 
2951     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2952         if (s->reg_to_temp[i] != NULL) {
2953             printf("%s: %s\n",
2954                    tcg_target_reg_names[i],
2955                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2956         }
2957     }
2958 }
2959 
2960 static void check_regs(TCGContext *s)
2961 {
2962     int reg;
2963     int k;
2964     TCGTemp *ts;
2965     char buf[64];
2966 
2967     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2968         ts = s->reg_to_temp[reg];
2969         if (ts != NULL) {
2970             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2971                 printf("Inconsistency for register %s:\n",
2972                        tcg_target_reg_names[reg]);
2973                 goto fail;
2974             }
2975         }
2976     }
2977     for (k = 0; k < s->nb_temps; k++) {
2978         ts = &s->temps[k];
2979         if (ts->val_type == TEMP_VAL_REG
2980             && ts->kind != TEMP_FIXED
2981             && s->reg_to_temp[ts->reg] != ts) {
2982             printf("Inconsistency for temp %s:\n",
2983                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2984         fail:
2985             printf("reg state:\n");
2986             dump_regs(s);
2987             tcg_abort();
2988         }
2989     }
2990 }
2991 #endif
2992 
2993 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2994 {
2995     intptr_t off, size, align;
2996 
2997     switch (ts->type) {
2998     case TCG_TYPE_I32:
2999         size = align = 4;
3000         break;
3001     case TCG_TYPE_I64:
3002     case TCG_TYPE_V64:
3003         size = align = 8;
3004         break;
3005     case TCG_TYPE_V128:
3006         size = align = 16;
3007         break;
3008     case TCG_TYPE_V256:
3009         /* Note that we do not require aligned storage for V256. */
3010         size = 32, align = 16;
3011         break;
3012     default:
3013         g_assert_not_reached();
3014     }
3015 
3016     /*
3017      * Assume the stack is sufficiently aligned.
3018      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3019      * and do not require 16 byte vector alignment.  This seems slightly
3020      * easier than fully parameterizing the above switch statement.
3021      */
3022     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3023     off = ROUND_UP(s->current_frame_offset, align);
3024 
3025     /* If we've exhausted the stack frame, restart with a smaller TB. */
3026     if (off + size > s->frame_end) {
3027         tcg_raise_tb_overflow(s);
3028     }
3029     s->current_frame_offset = off + size;
3030 
3031     ts->mem_offset = off;
3032 #if defined(__sparc__)
3033     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3034 #endif
3035     ts->mem_base = s->frame_temp;
3036     ts->mem_allocated = 1;
3037 }
3038 
3039 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3040 
3041 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3042    mark it free; otherwise mark it dead.  */
3043 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3044 {
3045     TCGTempVal new_type;
3046 
3047     switch (ts->kind) {
3048     case TEMP_FIXED:
3049         return;
3050     case TEMP_GLOBAL:
3051     case TEMP_LOCAL:
3052         new_type = TEMP_VAL_MEM;
3053         break;
3054     case TEMP_NORMAL:
3055     case TEMP_EBB:
3056         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3057         break;
3058     case TEMP_CONST:
3059         new_type = TEMP_VAL_CONST;
3060         break;
3061     default:
3062         g_assert_not_reached();
3063     }
3064     if (ts->val_type == TEMP_VAL_REG) {
3065         s->reg_to_temp[ts->reg] = NULL;
3066     }
3067     ts->val_type = new_type;
3068 }
3069 
3070 /* Mark a temporary as dead.  */
3071 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3072 {
3073     temp_free_or_dead(s, ts, 1);
3074 }
3075 
3076 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3077    registers needs to be allocated to store a constant.  If 'free_or_dead'
3078    is non-zero, subsequently release the temporary; if it is positive, the
3079    temp is dead; if it is negative, the temp is free.  */
3080 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3081                       TCGRegSet preferred_regs, int free_or_dead)
3082 {
3083     if (!temp_readonly(ts) && !ts->mem_coherent) {
3084         if (!ts->mem_allocated) {
3085             temp_allocate_frame(s, ts);
3086         }
3087         switch (ts->val_type) {
3088         case TEMP_VAL_CONST:
3089             /* If we're going to free the temp immediately, then we won't
3090                require it later in a register, so attempt to store the
3091                constant to memory directly.  */
3092             if (free_or_dead
3093                 && tcg_out_sti(s, ts->type, ts->val,
3094                                ts->mem_base->reg, ts->mem_offset)) {
3095                 break;
3096             }
3097             temp_load(s, ts, tcg_target_available_regs[ts->type],
3098                       allocated_regs, preferred_regs);
3099             /* fallthrough */
3100 
3101         case TEMP_VAL_REG:
3102             tcg_out_st(s, ts->type, ts->reg,
3103                        ts->mem_base->reg, ts->mem_offset);
3104             break;
3105 
3106         case TEMP_VAL_MEM:
3107             break;
3108 
3109         case TEMP_VAL_DEAD:
3110         default:
3111             tcg_abort();
3112         }
3113         ts->mem_coherent = 1;
3114     }
3115     if (free_or_dead) {
3116         temp_free_or_dead(s, ts, free_or_dead);
3117     }
3118 }
3119 
3120 /* free register 'reg' by spilling the corresponding temporary if necessary */
3121 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3122 {
3123     TCGTemp *ts = s->reg_to_temp[reg];
3124     if (ts != NULL) {
3125         temp_sync(s, ts, allocated_regs, 0, -1);
3126     }
3127 }
3128 
3129 /**
3130  * tcg_reg_alloc:
3131  * @required_regs: Set of registers in which we must allocate.
3132  * @allocated_regs: Set of registers which must be avoided.
3133  * @preferred_regs: Set of registers we should prefer.
3134  * @rev: True if we search the registers in "indirect" order.
3135  *
3136  * The allocated register must be in @required_regs & ~@allocated_regs,
3137  * but if we can put it in @preferred_regs we may save a move later.
3138  */
3139 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3140                             TCGRegSet allocated_regs,
3141                             TCGRegSet preferred_regs, bool rev)
3142 {
3143     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3144     TCGRegSet reg_ct[2];
3145     const int *order;
3146 
3147     reg_ct[1] = required_regs & ~allocated_regs;
3148     tcg_debug_assert(reg_ct[1] != 0);
3149     reg_ct[0] = reg_ct[1] & preferred_regs;
3150 
3151     /* Skip the preferred_regs option if it cannot be satisfied,
3152        or if the preference made no difference.  */
3153     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3154 
3155     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3156 
3157     /* Try free registers, preferences first.  */
3158     for (j = f; j < 2; j++) {
3159         TCGRegSet set = reg_ct[j];
3160 
3161         if (tcg_regset_single(set)) {
3162             /* One register in the set.  */
3163             TCGReg reg = tcg_regset_first(set);
3164             if (s->reg_to_temp[reg] == NULL) {
3165                 return reg;
3166             }
3167         } else {
3168             for (i = 0; i < n; i++) {
3169                 TCGReg reg = order[i];
3170                 if (s->reg_to_temp[reg] == NULL &&
3171                     tcg_regset_test_reg(set, reg)) {
3172                     return reg;
3173                 }
3174             }
3175         }
3176     }
3177 
3178     /* We must spill something.  */
3179     for (j = f; j < 2; j++) {
3180         TCGRegSet set = reg_ct[j];
3181 
3182         if (tcg_regset_single(set)) {
3183             /* One register in the set.  */
3184             TCGReg reg = tcg_regset_first(set);
3185             tcg_reg_free(s, reg, allocated_regs);
3186             return reg;
3187         } else {
3188             for (i = 0; i < n; i++) {
3189                 TCGReg reg = order[i];
3190                 if (tcg_regset_test_reg(set, reg)) {
3191                     tcg_reg_free(s, reg, allocated_regs);
3192                     return reg;
3193                 }
3194             }
3195         }
3196     }
3197 
3198     tcg_abort();
3199 }
3200 
3201 /* Make sure the temporary is in a register.  If needed, allocate the register
3202    from DESIRED while avoiding ALLOCATED.  */
3203 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3204                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3205 {
3206     TCGReg reg;
3207 
3208     switch (ts->val_type) {
3209     case TEMP_VAL_REG:
3210         return;
3211     case TEMP_VAL_CONST:
3212         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3213                             preferred_regs, ts->indirect_base);
3214         if (ts->type <= TCG_TYPE_I64) {
3215             tcg_out_movi(s, ts->type, reg, ts->val);
3216         } else {
3217             uint64_t val = ts->val;
3218             MemOp vece = MO_64;
3219 
3220             /*
3221              * Find the minimal vector element that matches the constant.
3222              * The targets will, in general, have to do this search anyway,
3223              * do this generically.
3224              */
3225             if (val == dup_const(MO_8, val)) {
3226                 vece = MO_8;
3227             } else if (val == dup_const(MO_16, val)) {
3228                 vece = MO_16;
3229             } else if (val == dup_const(MO_32, val)) {
3230                 vece = MO_32;
3231             }
3232 
3233             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3234         }
3235         ts->mem_coherent = 0;
3236         break;
3237     case TEMP_VAL_MEM:
3238         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3239                             preferred_regs, ts->indirect_base);
3240         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3241         ts->mem_coherent = 1;
3242         break;
3243     case TEMP_VAL_DEAD:
3244     default:
3245         tcg_abort();
3246     }
3247     ts->reg = reg;
3248     ts->val_type = TEMP_VAL_REG;
3249     s->reg_to_temp[reg] = ts;
3250 }
3251 
3252 /* Save a temporary to memory. 'allocated_regs' is used in case a
3253    temporary registers needs to be allocated to store a constant.  */
3254 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3255 {
3256     /* The liveness analysis already ensures that globals are back
3257        in memory. Keep an tcg_debug_assert for safety. */
3258     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3259 }
3260 
3261 /* save globals to their canonical location and assume they can be
3262    modified be the following code. 'allocated_regs' is used in case a
3263    temporary registers needs to be allocated to store a constant. */
3264 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3265 {
3266     int i, n;
3267 
3268     for (i = 0, n = s->nb_globals; i < n; i++) {
3269         temp_save(s, &s->temps[i], allocated_regs);
3270     }
3271 }
3272 
3273 /* sync globals to their canonical location and assume they can be
3274    read by the following code. 'allocated_regs' is used in case a
3275    temporary registers needs to be allocated to store a constant. */
3276 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3277 {
3278     int i, n;
3279 
3280     for (i = 0, n = s->nb_globals; i < n; i++) {
3281         TCGTemp *ts = &s->temps[i];
3282         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3283                          || ts->kind == TEMP_FIXED
3284                          || ts->mem_coherent);
3285     }
3286 }
3287 
3288 /* at the end of a basic block, we assume all temporaries are dead and
3289    all globals are stored at their canonical location. */
3290 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3291 {
3292     int i;
3293 
3294     for (i = s->nb_globals; i < s->nb_temps; i++) {
3295         TCGTemp *ts = &s->temps[i];
3296 
3297         switch (ts->kind) {
3298         case TEMP_LOCAL:
3299             temp_save(s, ts, allocated_regs);
3300             break;
3301         case TEMP_NORMAL:
3302         case TEMP_EBB:
3303             /* The liveness analysis already ensures that temps are dead.
3304                Keep an tcg_debug_assert for safety. */
3305             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3306             break;
3307         case TEMP_CONST:
3308             /* Similarly, we should have freed any allocated register. */
3309             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3310             break;
3311         default:
3312             g_assert_not_reached();
3313         }
3314     }
3315 
3316     save_globals(s, allocated_regs);
3317 }
3318 
3319 /*
3320  * At a conditional branch, we assume all temporaries are dead unless
3321  * explicitly live-across-conditional-branch; all globals and local
3322  * temps are synced to their location.
3323  */
3324 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3325 {
3326     sync_globals(s, allocated_regs);
3327 
3328     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3329         TCGTemp *ts = &s->temps[i];
3330         /*
3331          * The liveness analysis already ensures that temps are dead.
3332          * Keep tcg_debug_asserts for safety.
3333          */
3334         switch (ts->kind) {
3335         case TEMP_LOCAL:
3336             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3337             break;
3338         case TEMP_NORMAL:
3339             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3340             break;
3341         case TEMP_EBB:
3342         case TEMP_CONST:
3343             break;
3344         default:
3345             g_assert_not_reached();
3346         }
3347     }
3348 }
3349 
3350 /*
3351  * Specialized code generation for INDEX_op_mov_* with a constant.
3352  */
3353 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3354                                   tcg_target_ulong val, TCGLifeData arg_life,
3355                                   TCGRegSet preferred_regs)
3356 {
3357     /* ENV should not be modified.  */
3358     tcg_debug_assert(!temp_readonly(ots));
3359 
3360     /* The movi is not explicitly generated here.  */
3361     if (ots->val_type == TEMP_VAL_REG) {
3362         s->reg_to_temp[ots->reg] = NULL;
3363     }
3364     ots->val_type = TEMP_VAL_CONST;
3365     ots->val = val;
3366     ots->mem_coherent = 0;
3367     if (NEED_SYNC_ARG(0)) {
3368         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3369     } else if (IS_DEAD_ARG(0)) {
3370         temp_dead(s, ots);
3371     }
3372 }
3373 
3374 /*
3375  * Specialized code generation for INDEX_op_mov_*.
3376  */
3377 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3378 {
3379     const TCGLifeData arg_life = op->life;
3380     TCGRegSet allocated_regs, preferred_regs;
3381     TCGTemp *ts, *ots;
3382     TCGType otype, itype;
3383 
3384     allocated_regs = s->reserved_regs;
3385     preferred_regs = op->output_pref[0];
3386     ots = arg_temp(op->args[0]);
3387     ts = arg_temp(op->args[1]);
3388 
3389     /* ENV should not be modified.  */
3390     tcg_debug_assert(!temp_readonly(ots));
3391 
3392     /* Note that otype != itype for no-op truncation.  */
3393     otype = ots->type;
3394     itype = ts->type;
3395 
3396     if (ts->val_type == TEMP_VAL_CONST) {
3397         /* propagate constant or generate sti */
3398         tcg_target_ulong val = ts->val;
3399         if (IS_DEAD_ARG(1)) {
3400             temp_dead(s, ts);
3401         }
3402         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3403         return;
3404     }
3405 
3406     /* If the source value is in memory we're going to be forced
3407        to have it in a register in order to perform the copy.  Copy
3408        the SOURCE value into its own register first, that way we
3409        don't have to reload SOURCE the next time it is used. */
3410     if (ts->val_type == TEMP_VAL_MEM) {
3411         temp_load(s, ts, tcg_target_available_regs[itype],
3412                   allocated_regs, preferred_regs);
3413     }
3414 
3415     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3416     if (IS_DEAD_ARG(0)) {
3417         /* mov to a non-saved dead register makes no sense (even with
3418            liveness analysis disabled). */
3419         tcg_debug_assert(NEED_SYNC_ARG(0));
3420         if (!ots->mem_allocated) {
3421             temp_allocate_frame(s, ots);
3422         }
3423         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3424         if (IS_DEAD_ARG(1)) {
3425             temp_dead(s, ts);
3426         }
3427         temp_dead(s, ots);
3428     } else {
3429         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3430             /* the mov can be suppressed */
3431             if (ots->val_type == TEMP_VAL_REG) {
3432                 s->reg_to_temp[ots->reg] = NULL;
3433             }
3434             ots->reg = ts->reg;
3435             temp_dead(s, ts);
3436         } else {
3437             if (ots->val_type != TEMP_VAL_REG) {
3438                 /* When allocating a new register, make sure to not spill the
3439                    input one. */
3440                 tcg_regset_set_reg(allocated_regs, ts->reg);
3441                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3442                                          allocated_regs, preferred_regs,
3443                                          ots->indirect_base);
3444             }
3445             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3446                 /*
3447                  * Cross register class move not supported.
3448                  * Store the source register into the destination slot
3449                  * and leave the destination temp as TEMP_VAL_MEM.
3450                  */
3451                 assert(!temp_readonly(ots));
3452                 if (!ts->mem_allocated) {
3453                     temp_allocate_frame(s, ots);
3454                 }
3455                 tcg_out_st(s, ts->type, ts->reg,
3456                            ots->mem_base->reg, ots->mem_offset);
3457                 ots->mem_coherent = 1;
3458                 temp_free_or_dead(s, ots, -1);
3459                 return;
3460             }
3461         }
3462         ots->val_type = TEMP_VAL_REG;
3463         ots->mem_coherent = 0;
3464         s->reg_to_temp[ots->reg] = ots;
3465         if (NEED_SYNC_ARG(0)) {
3466             temp_sync(s, ots, allocated_regs, 0, 0);
3467         }
3468     }
3469 }
3470 
3471 /*
3472  * Specialized code generation for INDEX_op_dup_vec.
3473  */
3474 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3475 {
3476     const TCGLifeData arg_life = op->life;
3477     TCGRegSet dup_out_regs, dup_in_regs;
3478     TCGTemp *its, *ots;
3479     TCGType itype, vtype;
3480     intptr_t endian_fixup;
3481     unsigned vece;
3482     bool ok;
3483 
3484     ots = arg_temp(op->args[0]);
3485     its = arg_temp(op->args[1]);
3486 
3487     /* ENV should not be modified.  */
3488     tcg_debug_assert(!temp_readonly(ots));
3489 
3490     itype = its->type;
3491     vece = TCGOP_VECE(op);
3492     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3493 
3494     if (its->val_type == TEMP_VAL_CONST) {
3495         /* Propagate constant via movi -> dupi.  */
3496         tcg_target_ulong val = its->val;
3497         if (IS_DEAD_ARG(1)) {
3498             temp_dead(s, its);
3499         }
3500         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3501         return;
3502     }
3503 
3504     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3505     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3506 
3507     /* Allocate the output register now.  */
3508     if (ots->val_type != TEMP_VAL_REG) {
3509         TCGRegSet allocated_regs = s->reserved_regs;
3510 
3511         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3512             /* Make sure to not spill the input register. */
3513             tcg_regset_set_reg(allocated_regs, its->reg);
3514         }
3515         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3516                                  op->output_pref[0], ots->indirect_base);
3517         ots->val_type = TEMP_VAL_REG;
3518         ots->mem_coherent = 0;
3519         s->reg_to_temp[ots->reg] = ots;
3520     }
3521 
3522     switch (its->val_type) {
3523     case TEMP_VAL_REG:
3524         /*
3525          * The dup constriaints must be broad, covering all possible VECE.
3526          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3527          * to fail, indicating that extra moves are required for that case.
3528          */
3529         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3530             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3531                 goto done;
3532             }
3533             /* Try again from memory or a vector input register.  */
3534         }
3535         if (!its->mem_coherent) {
3536             /*
3537              * The input register is not synced, and so an extra store
3538              * would be required to use memory.  Attempt an integer-vector
3539              * register move first.  We do not have a TCGRegSet for this.
3540              */
3541             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3542                 break;
3543             }
3544             /* Sync the temp back to its slot and load from there.  */
3545             temp_sync(s, its, s->reserved_regs, 0, 0);
3546         }
3547         /* fall through */
3548 
3549     case TEMP_VAL_MEM:
3550 #if HOST_BIG_ENDIAN
3551         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3552         endian_fixup -= 1 << vece;
3553 #else
3554         endian_fixup = 0;
3555 #endif
3556         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3557                              its->mem_offset + endian_fixup)) {
3558             goto done;
3559         }
3560         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3561         break;
3562 
3563     default:
3564         g_assert_not_reached();
3565     }
3566 
3567     /* We now have a vector input register, so dup must succeed. */
3568     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3569     tcg_debug_assert(ok);
3570 
3571  done:
3572     if (IS_DEAD_ARG(1)) {
3573         temp_dead(s, its);
3574     }
3575     if (NEED_SYNC_ARG(0)) {
3576         temp_sync(s, ots, s->reserved_regs, 0, 0);
3577     }
3578     if (IS_DEAD_ARG(0)) {
3579         temp_dead(s, ots);
3580     }
3581 }
3582 
3583 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3584 {
3585     const TCGLifeData arg_life = op->life;
3586     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3587     TCGRegSet i_allocated_regs;
3588     TCGRegSet o_allocated_regs;
3589     int i, k, nb_iargs, nb_oargs;
3590     TCGReg reg;
3591     TCGArg arg;
3592     const TCGArgConstraint *arg_ct;
3593     TCGTemp *ts;
3594     TCGArg new_args[TCG_MAX_OP_ARGS];
3595     int const_args[TCG_MAX_OP_ARGS];
3596 
3597     nb_oargs = def->nb_oargs;
3598     nb_iargs = def->nb_iargs;
3599 
3600     /* copy constants */
3601     memcpy(new_args + nb_oargs + nb_iargs,
3602            op->args + nb_oargs + nb_iargs,
3603            sizeof(TCGArg) * def->nb_cargs);
3604 
3605     i_allocated_regs = s->reserved_regs;
3606     o_allocated_regs = s->reserved_regs;
3607 
3608     /* satisfy input constraints */
3609     for (k = 0; k < nb_iargs; k++) {
3610         TCGRegSet i_preferred_regs, o_preferred_regs;
3611 
3612         i = def->args_ct[nb_oargs + k].sort_index;
3613         arg = op->args[i];
3614         arg_ct = &def->args_ct[i];
3615         ts = arg_temp(arg);
3616 
3617         if (ts->val_type == TEMP_VAL_CONST
3618             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3619             /* constant is OK for instruction */
3620             const_args[i] = 1;
3621             new_args[i] = ts->val;
3622             continue;
3623         }
3624 
3625         i_preferred_regs = o_preferred_regs = 0;
3626         if (arg_ct->ialias) {
3627             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3628 
3629             /*
3630              * If the input is readonly, then it cannot also be an
3631              * output and aliased to itself.  If the input is not
3632              * dead after the instruction, we must allocate a new
3633              * register and move it.
3634              */
3635             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3636                 goto allocate_in_reg;
3637             }
3638 
3639             /*
3640              * Check if the current register has already been allocated
3641              * for another input aliased to an output.
3642              */
3643             if (ts->val_type == TEMP_VAL_REG) {
3644                 reg = ts->reg;
3645                 for (int k2 = 0; k2 < k; k2++) {
3646                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3647                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3648                         goto allocate_in_reg;
3649                     }
3650                 }
3651             }
3652             i_preferred_regs = o_preferred_regs;
3653         }
3654 
3655         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3656         reg = ts->reg;
3657 
3658         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3659  allocate_in_reg:
3660             /*
3661              * Allocate a new register matching the constraint
3662              * and move the temporary register into it.
3663              */
3664             temp_load(s, ts, tcg_target_available_regs[ts->type],
3665                       i_allocated_regs, 0);
3666             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3667                                 o_preferred_regs, ts->indirect_base);
3668             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3669                 /*
3670                  * Cross register class move not supported.  Sync the
3671                  * temp back to its slot and load from there.
3672                  */
3673                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3674                 tcg_out_ld(s, ts->type, reg,
3675                            ts->mem_base->reg, ts->mem_offset);
3676             }
3677         }
3678         new_args[i] = reg;
3679         const_args[i] = 0;
3680         tcg_regset_set_reg(i_allocated_regs, reg);
3681     }
3682 
3683     /* mark dead temporaries and free the associated registers */
3684     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3685         if (IS_DEAD_ARG(i)) {
3686             temp_dead(s, arg_temp(op->args[i]));
3687         }
3688     }
3689 
3690     if (def->flags & TCG_OPF_COND_BRANCH) {
3691         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3692     } else if (def->flags & TCG_OPF_BB_END) {
3693         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3694     } else {
3695         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3696             /* XXX: permit generic clobber register list ? */
3697             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3698                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3699                     tcg_reg_free(s, i, i_allocated_regs);
3700                 }
3701             }
3702         }
3703         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3704             /* sync globals if the op has side effects and might trigger
3705                an exception. */
3706             sync_globals(s, i_allocated_regs);
3707         }
3708 
3709         /* satisfy the output constraints */
3710         for(k = 0; k < nb_oargs; k++) {
3711             i = def->args_ct[k].sort_index;
3712             arg = op->args[i];
3713             arg_ct = &def->args_ct[i];
3714             ts = arg_temp(arg);
3715 
3716             /* ENV should not be modified.  */
3717             tcg_debug_assert(!temp_readonly(ts));
3718 
3719             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3720                 reg = new_args[arg_ct->alias_index];
3721             } else if (arg_ct->newreg) {
3722                 reg = tcg_reg_alloc(s, arg_ct->regs,
3723                                     i_allocated_regs | o_allocated_regs,
3724                                     op->output_pref[k], ts->indirect_base);
3725             } else {
3726                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3727                                     op->output_pref[k], ts->indirect_base);
3728             }
3729             tcg_regset_set_reg(o_allocated_regs, reg);
3730             if (ts->val_type == TEMP_VAL_REG) {
3731                 s->reg_to_temp[ts->reg] = NULL;
3732             }
3733             ts->val_type = TEMP_VAL_REG;
3734             ts->reg = reg;
3735             /*
3736              * Temp value is modified, so the value kept in memory is
3737              * potentially not the same.
3738              */
3739             ts->mem_coherent = 0;
3740             s->reg_to_temp[reg] = ts;
3741             new_args[i] = reg;
3742         }
3743     }
3744 
3745     /* emit instruction */
3746     if (def->flags & TCG_OPF_VECTOR) {
3747         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3748                        new_args, const_args);
3749     } else {
3750         tcg_out_op(s, op->opc, new_args, const_args);
3751     }
3752 
3753     /* move the outputs in the correct register if needed */
3754     for(i = 0; i < nb_oargs; i++) {
3755         ts = arg_temp(op->args[i]);
3756 
3757         /* ENV should not be modified.  */
3758         tcg_debug_assert(!temp_readonly(ts));
3759 
3760         if (NEED_SYNC_ARG(i)) {
3761             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3762         } else if (IS_DEAD_ARG(i)) {
3763             temp_dead(s, ts);
3764         }
3765     }
3766 }
3767 
3768 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3769 {
3770     const TCGLifeData arg_life = op->life;
3771     TCGTemp *ots, *itsl, *itsh;
3772     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3773 
3774     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3775     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3776     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3777 
3778     ots = arg_temp(op->args[0]);
3779     itsl = arg_temp(op->args[1]);
3780     itsh = arg_temp(op->args[2]);
3781 
3782     /* ENV should not be modified.  */
3783     tcg_debug_assert(!temp_readonly(ots));
3784 
3785     /* Allocate the output register now.  */
3786     if (ots->val_type != TEMP_VAL_REG) {
3787         TCGRegSet allocated_regs = s->reserved_regs;
3788         TCGRegSet dup_out_regs =
3789             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3790 
3791         /* Make sure to not spill the input registers. */
3792         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3793             tcg_regset_set_reg(allocated_regs, itsl->reg);
3794         }
3795         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3796             tcg_regset_set_reg(allocated_regs, itsh->reg);
3797         }
3798 
3799         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3800                                  op->output_pref[0], ots->indirect_base);
3801         ots->val_type = TEMP_VAL_REG;
3802         ots->mem_coherent = 0;
3803         s->reg_to_temp[ots->reg] = ots;
3804     }
3805 
3806     /* Promote dup2 of immediates to dupi_vec. */
3807     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3808         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3809         MemOp vece = MO_64;
3810 
3811         if (val == dup_const(MO_8, val)) {
3812             vece = MO_8;
3813         } else if (val == dup_const(MO_16, val)) {
3814             vece = MO_16;
3815         } else if (val == dup_const(MO_32, val)) {
3816             vece = MO_32;
3817         }
3818 
3819         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3820         goto done;
3821     }
3822 
3823     /* If the two inputs form one 64-bit value, try dupm_vec. */
3824     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3825         if (!itsl->mem_coherent) {
3826             temp_sync(s, itsl, s->reserved_regs, 0, 0);
3827         }
3828         if (!itsh->mem_coherent) {
3829             temp_sync(s, itsh, s->reserved_regs, 0, 0);
3830         }
3831 #if HOST_BIG_ENDIAN
3832         TCGTemp *its = itsh;
3833 #else
3834         TCGTemp *its = itsl;
3835 #endif
3836         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3837                              its->mem_base->reg, its->mem_offset)) {
3838             goto done;
3839         }
3840     }
3841 
3842     /* Fall back to generic expansion. */
3843     return false;
3844 
3845  done:
3846     if (IS_DEAD_ARG(1)) {
3847         temp_dead(s, itsl);
3848     }
3849     if (IS_DEAD_ARG(2)) {
3850         temp_dead(s, itsh);
3851     }
3852     if (NEED_SYNC_ARG(0)) {
3853         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3854     } else if (IS_DEAD_ARG(0)) {
3855         temp_dead(s, ots);
3856     }
3857     return true;
3858 }
3859 
3860 #ifdef TCG_TARGET_STACK_GROWSUP
3861 #define STACK_DIR(x) (-(x))
3862 #else
3863 #define STACK_DIR(x) (x)
3864 #endif
3865 
3866 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3867 {
3868     const int nb_oargs = TCGOP_CALLO(op);
3869     const int nb_iargs = TCGOP_CALLI(op);
3870     const TCGLifeData arg_life = op->life;
3871     const TCGHelperInfo *info;
3872     int flags, nb_regs, i;
3873     TCGReg reg;
3874     TCGArg arg;
3875     TCGTemp *ts;
3876     intptr_t stack_offset;
3877     size_t call_stack_size;
3878     tcg_insn_unit *func_addr;
3879     int allocate_args;
3880     TCGRegSet allocated_regs;
3881 
3882     func_addr = tcg_call_func(op);
3883     info = tcg_call_info(op);
3884     flags = info->flags;
3885 
3886     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3887     if (nb_regs > nb_iargs) {
3888         nb_regs = nb_iargs;
3889     }
3890 
3891     /* assign stack slots first */
3892     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3893     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3894         ~(TCG_TARGET_STACK_ALIGN - 1);
3895     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3896     if (allocate_args) {
3897         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3898            preallocate call stack */
3899         tcg_abort();
3900     }
3901 
3902     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3903     for (i = nb_regs; i < nb_iargs; i++) {
3904         arg = op->args[nb_oargs + i];
3905 #ifdef TCG_TARGET_STACK_GROWSUP
3906         stack_offset -= sizeof(tcg_target_long);
3907 #endif
3908         if (arg != TCG_CALL_DUMMY_ARG) {
3909             ts = arg_temp(arg);
3910             temp_load(s, ts, tcg_target_available_regs[ts->type],
3911                       s->reserved_regs, 0);
3912             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3913         }
3914 #ifndef TCG_TARGET_STACK_GROWSUP
3915         stack_offset += sizeof(tcg_target_long);
3916 #endif
3917     }
3918 
3919     /* assign input registers */
3920     allocated_regs = s->reserved_regs;
3921     for (i = 0; i < nb_regs; i++) {
3922         arg = op->args[nb_oargs + i];
3923         if (arg != TCG_CALL_DUMMY_ARG) {
3924             ts = arg_temp(arg);
3925             reg = tcg_target_call_iarg_regs[i];
3926 
3927             if (ts->val_type == TEMP_VAL_REG) {
3928                 if (ts->reg != reg) {
3929                     tcg_reg_free(s, reg, allocated_regs);
3930                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3931                         /*
3932                          * Cross register class move not supported.  Sync the
3933                          * temp back to its slot and load from there.
3934                          */
3935                         temp_sync(s, ts, allocated_regs, 0, 0);
3936                         tcg_out_ld(s, ts->type, reg,
3937                                    ts->mem_base->reg, ts->mem_offset);
3938                     }
3939                 }
3940             } else {
3941                 TCGRegSet arg_set = 0;
3942 
3943                 tcg_reg_free(s, reg, allocated_regs);
3944                 tcg_regset_set_reg(arg_set, reg);
3945                 temp_load(s, ts, arg_set, allocated_regs, 0);
3946             }
3947 
3948             tcg_regset_set_reg(allocated_regs, reg);
3949         }
3950     }
3951 
3952     /* mark dead temporaries and free the associated registers */
3953     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3954         if (IS_DEAD_ARG(i)) {
3955             temp_dead(s, arg_temp(op->args[i]));
3956         }
3957     }
3958 
3959     /* clobber call registers */
3960     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3961         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3962             tcg_reg_free(s, i, allocated_regs);
3963         }
3964     }
3965 
3966     /* Save globals if they might be written by the helper, sync them if
3967        they might be read. */
3968     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3969         /* Nothing to do */
3970     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3971         sync_globals(s, allocated_regs);
3972     } else {
3973         save_globals(s, allocated_regs);
3974     }
3975 
3976 #ifdef CONFIG_TCG_INTERPRETER
3977     {
3978         gpointer hash = (gpointer)(uintptr_t)info->typemask;
3979         ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
3980         assert(cif != NULL);
3981         tcg_out_call(s, func_addr, cif);
3982     }
3983 #else
3984     tcg_out_call(s, func_addr);
3985 #endif
3986 
3987     /* assign output registers and emit moves if needed */
3988     for(i = 0; i < nb_oargs; i++) {
3989         arg = op->args[i];
3990         ts = arg_temp(arg);
3991 
3992         /* ENV should not be modified.  */
3993         tcg_debug_assert(!temp_readonly(ts));
3994 
3995         reg = tcg_target_call_oarg_regs[i];
3996         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3997         if (ts->val_type == TEMP_VAL_REG) {
3998             s->reg_to_temp[ts->reg] = NULL;
3999         }
4000         ts->val_type = TEMP_VAL_REG;
4001         ts->reg = reg;
4002         ts->mem_coherent = 0;
4003         s->reg_to_temp[reg] = ts;
4004         if (NEED_SYNC_ARG(i)) {
4005             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4006         } else if (IS_DEAD_ARG(i)) {
4007             temp_dead(s, ts);
4008         }
4009     }
4010 }
4011 
4012 #ifdef CONFIG_PROFILER
4013 
4014 /* avoid copy/paste errors */
4015 #define PROF_ADD(to, from, field)                       \
4016     do {                                                \
4017         (to)->field += qatomic_read(&((from)->field));  \
4018     } while (0)
4019 
4020 #define PROF_MAX(to, from, field)                                       \
4021     do {                                                                \
4022         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4023         if (val__ > (to)->field) {                                      \
4024             (to)->field = val__;                                        \
4025         }                                                               \
4026     } while (0)
4027 
4028 /* Pass in a zero'ed @prof */
4029 static inline
4030 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4031 {
4032     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4033     unsigned int i;
4034 
4035     for (i = 0; i < n_ctxs; i++) {
4036         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4037         const TCGProfile *orig = &s->prof;
4038 
4039         if (counters) {
4040             PROF_ADD(prof, orig, cpu_exec_time);
4041             PROF_ADD(prof, orig, tb_count1);
4042             PROF_ADD(prof, orig, tb_count);
4043             PROF_ADD(prof, orig, op_count);
4044             PROF_MAX(prof, orig, op_count_max);
4045             PROF_ADD(prof, orig, temp_count);
4046             PROF_MAX(prof, orig, temp_count_max);
4047             PROF_ADD(prof, orig, del_op_count);
4048             PROF_ADD(prof, orig, code_in_len);
4049             PROF_ADD(prof, orig, code_out_len);
4050             PROF_ADD(prof, orig, search_out_len);
4051             PROF_ADD(prof, orig, interm_time);
4052             PROF_ADD(prof, orig, code_time);
4053             PROF_ADD(prof, orig, la_time);
4054             PROF_ADD(prof, orig, opt_time);
4055             PROF_ADD(prof, orig, restore_count);
4056             PROF_ADD(prof, orig, restore_time);
4057         }
4058         if (table) {
4059             int i;
4060 
4061             for (i = 0; i < NB_OPS; i++) {
4062                 PROF_ADD(prof, orig, table_op_count[i]);
4063             }
4064         }
4065     }
4066 }
4067 
4068 #undef PROF_ADD
4069 #undef PROF_MAX
4070 
4071 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4072 {
4073     tcg_profile_snapshot(prof, true, false);
4074 }
4075 
4076 static void tcg_profile_snapshot_table(TCGProfile *prof)
4077 {
4078     tcg_profile_snapshot(prof, false, true);
4079 }
4080 
4081 void tcg_dump_op_count(GString *buf)
4082 {
4083     TCGProfile prof = {};
4084     int i;
4085 
4086     tcg_profile_snapshot_table(&prof);
4087     for (i = 0; i < NB_OPS; i++) {
4088         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4089                                prof.table_op_count[i]);
4090     }
4091 }
4092 
4093 int64_t tcg_cpu_exec_time(void)
4094 {
4095     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4096     unsigned int i;
4097     int64_t ret = 0;
4098 
4099     for (i = 0; i < n_ctxs; i++) {
4100         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4101         const TCGProfile *prof = &s->prof;
4102 
4103         ret += qatomic_read(&prof->cpu_exec_time);
4104     }
4105     return ret;
4106 }
4107 #else
4108 void tcg_dump_op_count(GString *buf)
4109 {
4110     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4111 }
4112 
4113 int64_t tcg_cpu_exec_time(void)
4114 {
4115     error_report("%s: TCG profiler not compiled", __func__);
4116     exit(EXIT_FAILURE);
4117 }
4118 #endif
4119 
4120 
4121 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4122 {
4123 #ifdef CONFIG_PROFILER
4124     TCGProfile *prof = &s->prof;
4125 #endif
4126     int i, num_insns;
4127     TCGOp *op;
4128 
4129 #ifdef CONFIG_PROFILER
4130     {
4131         int n = 0;
4132 
4133         QTAILQ_FOREACH(op, &s->ops, link) {
4134             n++;
4135         }
4136         qatomic_set(&prof->op_count, prof->op_count + n);
4137         if (n > prof->op_count_max) {
4138             qatomic_set(&prof->op_count_max, n);
4139         }
4140 
4141         n = s->nb_temps;
4142         qatomic_set(&prof->temp_count, prof->temp_count + n);
4143         if (n > prof->temp_count_max) {
4144             qatomic_set(&prof->temp_count_max, n);
4145         }
4146     }
4147 #endif
4148 
4149 #ifdef DEBUG_DISAS
4150     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4151                  && qemu_log_in_addr_range(pc_start))) {
4152         FILE *logfile = qemu_log_trylock();
4153         if (logfile) {
4154             fprintf(logfile, "OP:\n");
4155             tcg_dump_ops(s, logfile, false);
4156             fprintf(logfile, "\n");
4157             qemu_log_unlock(logfile);
4158         }
4159     }
4160 #endif
4161 
4162 #ifdef CONFIG_DEBUG_TCG
4163     /* Ensure all labels referenced have been emitted.  */
4164     {
4165         TCGLabel *l;
4166         bool error = false;
4167 
4168         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4169             if (unlikely(!l->present) && l->refs) {
4170                 qemu_log_mask(CPU_LOG_TB_OP,
4171                               "$L%d referenced but not present.\n", l->id);
4172                 error = true;
4173             }
4174         }
4175         assert(!error);
4176     }
4177 #endif
4178 
4179 #ifdef CONFIG_PROFILER
4180     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4181 #endif
4182 
4183 #ifdef USE_TCG_OPTIMIZATIONS
4184     tcg_optimize(s);
4185 #endif
4186 
4187 #ifdef CONFIG_PROFILER
4188     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4189     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4190 #endif
4191 
4192     reachable_code_pass(s);
4193     liveness_pass_1(s);
4194 
4195     if (s->nb_indirects > 0) {
4196 #ifdef DEBUG_DISAS
4197         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4198                      && qemu_log_in_addr_range(pc_start))) {
4199             FILE *logfile = qemu_log_trylock();
4200             if (logfile) {
4201                 fprintf(logfile, "OP before indirect lowering:\n");
4202                 tcg_dump_ops(s, logfile, false);
4203                 fprintf(logfile, "\n");
4204                 qemu_log_unlock(logfile);
4205             }
4206         }
4207 #endif
4208         /* Replace indirect temps with direct temps.  */
4209         if (liveness_pass_2(s)) {
4210             /* If changes were made, re-run liveness.  */
4211             liveness_pass_1(s);
4212         }
4213     }
4214 
4215 #ifdef CONFIG_PROFILER
4216     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4217 #endif
4218 
4219 #ifdef DEBUG_DISAS
4220     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4221                  && qemu_log_in_addr_range(pc_start))) {
4222         FILE *logfile = qemu_log_trylock();
4223         if (logfile) {
4224             fprintf(logfile, "OP after optimization and liveness analysis:\n");
4225             tcg_dump_ops(s, logfile, true);
4226             fprintf(logfile, "\n");
4227             qemu_log_unlock(logfile);
4228         }
4229     }
4230 #endif
4231 
4232     /* Initialize goto_tb jump offsets. */
4233     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
4234     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
4235     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
4236     if (TCG_TARGET_HAS_direct_jump) {
4237         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
4238         tcg_ctx->tb_jmp_target_addr = NULL;
4239     } else {
4240         tcg_ctx->tb_jmp_insn_offset = NULL;
4241         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
4242     }
4243 
4244     tcg_reg_alloc_start(s);
4245 
4246     /*
4247      * Reset the buffer pointers when restarting after overflow.
4248      * TODO: Move this into translate-all.c with the rest of the
4249      * buffer management.  Having only this done here is confusing.
4250      */
4251     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4252     s->code_ptr = s->code_buf;
4253 
4254 #ifdef TCG_TARGET_NEED_LDST_LABELS
4255     QSIMPLEQ_INIT(&s->ldst_labels);
4256 #endif
4257 #ifdef TCG_TARGET_NEED_POOL_LABELS
4258     s->pool_labels = NULL;
4259 #endif
4260 
4261     num_insns = -1;
4262     QTAILQ_FOREACH(op, &s->ops, link) {
4263         TCGOpcode opc = op->opc;
4264 
4265 #ifdef CONFIG_PROFILER
4266         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4267 #endif
4268 
4269         switch (opc) {
4270         case INDEX_op_mov_i32:
4271         case INDEX_op_mov_i64:
4272         case INDEX_op_mov_vec:
4273             tcg_reg_alloc_mov(s, op);
4274             break;
4275         case INDEX_op_dup_vec:
4276             tcg_reg_alloc_dup(s, op);
4277             break;
4278         case INDEX_op_insn_start:
4279             if (num_insns >= 0) {
4280                 size_t off = tcg_current_code_size(s);
4281                 s->gen_insn_end_off[num_insns] = off;
4282                 /* Assert that we do not overflow our stored offset.  */
4283                 assert(s->gen_insn_end_off[num_insns] == off);
4284             }
4285             num_insns++;
4286             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4287                 target_ulong a;
4288 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4289                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4290 #else
4291                 a = op->args[i];
4292 #endif
4293                 s->gen_insn_data[num_insns][i] = a;
4294             }
4295             break;
4296         case INDEX_op_discard:
4297             temp_dead(s, arg_temp(op->args[0]));
4298             break;
4299         case INDEX_op_set_label:
4300             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4301             tcg_out_label(s, arg_label(op->args[0]));
4302             break;
4303         case INDEX_op_call:
4304             tcg_reg_alloc_call(s, op);
4305             break;
4306         case INDEX_op_dup2_vec:
4307             if (tcg_reg_alloc_dup2(s, op)) {
4308                 break;
4309             }
4310             /* fall through */
4311         default:
4312             /* Sanity check that we've not introduced any unhandled opcodes. */
4313             tcg_debug_assert(tcg_op_supported(opc));
4314             /* Note: in order to speed up the code, it would be much
4315                faster to have specialized register allocator functions for
4316                some common argument patterns */
4317             tcg_reg_alloc_op(s, op);
4318             break;
4319         }
4320 #ifdef CONFIG_DEBUG_TCG
4321         check_regs(s);
4322 #endif
4323         /* Test for (pending) buffer overflow.  The assumption is that any
4324            one operation beginning below the high water mark cannot overrun
4325            the buffer completely.  Thus we can test for overflow after
4326            generating code without having to check during generation.  */
4327         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4328             return -1;
4329         }
4330         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4331         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4332             return -2;
4333         }
4334     }
4335     tcg_debug_assert(num_insns >= 0);
4336     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4337 
4338     /* Generate TB finalization at the end of block */
4339 #ifdef TCG_TARGET_NEED_LDST_LABELS
4340     i = tcg_out_ldst_finalize(s);
4341     if (i < 0) {
4342         return i;
4343     }
4344 #endif
4345 #ifdef TCG_TARGET_NEED_POOL_LABELS
4346     i = tcg_out_pool_finalize(s);
4347     if (i < 0) {
4348         return i;
4349     }
4350 #endif
4351     if (!tcg_resolve_relocs(s)) {
4352         return -2;
4353     }
4354 
4355 #ifndef CONFIG_TCG_INTERPRETER
4356     /* flush instruction cache */
4357     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4358                         (uintptr_t)s->code_buf,
4359                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4360 #endif
4361 
4362     return tcg_current_code_size(s);
4363 }
4364 
4365 #ifdef CONFIG_PROFILER
4366 void tcg_dump_info(GString *buf)
4367 {
4368     TCGProfile prof = {};
4369     const TCGProfile *s;
4370     int64_t tb_count;
4371     int64_t tb_div_count;
4372     int64_t tot;
4373 
4374     tcg_profile_snapshot_counters(&prof);
4375     s = &prof;
4376     tb_count = s->tb_count;
4377     tb_div_count = tb_count ? tb_count : 1;
4378     tot = s->interm_time + s->code_time;
4379 
4380     g_string_append_printf(buf, "JIT cycles          %" PRId64
4381                            " (%0.3f s at 2.4 GHz)\n",
4382                            tot, tot / 2.4e9);
4383     g_string_append_printf(buf, "translated TBs      %" PRId64
4384                            " (aborted=%" PRId64 " %0.1f%%)\n",
4385                            tb_count, s->tb_count1 - tb_count,
4386                            (double)(s->tb_count1 - s->tb_count)
4387                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4388     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4389                            (double)s->op_count / tb_div_count, s->op_count_max);
4390     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4391                            (double)s->del_op_count / tb_div_count);
4392     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4393                            (double)s->temp_count / tb_div_count,
4394                            s->temp_count_max);
4395     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4396                            (double)s->code_out_len / tb_div_count);
4397     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4398                            (double)s->search_out_len / tb_div_count);
4399 
4400     g_string_append_printf(buf, "cycles/op           %0.1f\n",
4401                            s->op_count ? (double)tot / s->op_count : 0);
4402     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4403                            s->code_in_len ? (double)tot / s->code_in_len : 0);
4404     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4405                            s->code_out_len ? (double)tot / s->code_out_len : 0);
4406     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4407                            s->search_out_len ?
4408                            (double)tot / s->search_out_len : 0);
4409     if (tot == 0) {
4410         tot = 1;
4411     }
4412     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4413                            (double)s->interm_time / tot * 100.0);
4414     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4415                            (double)s->code_time / tot * 100.0);
4416     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4417                            (double)s->opt_time / (s->code_time ?
4418                                                   s->code_time : 1)
4419                            * 100.0);
4420     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4421                            (double)s->la_time / (s->code_time ?
4422                                                  s->code_time : 1) * 100.0);
4423     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4424                            s->restore_count);
4425     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4426                            s->restore_count ?
4427                            (double)s->restore_time / s->restore_count : 0);
4428 }
4429 #else
4430 void tcg_dump_info(GString *buf)
4431 {
4432     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4433 }
4434 #endif
4435 
4436 #ifdef ELF_HOST_MACHINE
4437 /* In order to use this feature, the backend needs to do three things:
4438 
4439    (1) Define ELF_HOST_MACHINE to indicate both what value to
4440        put into the ELF image and to indicate support for the feature.
4441 
4442    (2) Define tcg_register_jit.  This should create a buffer containing
4443        the contents of a .debug_frame section that describes the post-
4444        prologue unwind info for the tcg machine.
4445 
4446    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4447 */
4448 
4449 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4450 typedef enum {
4451     JIT_NOACTION = 0,
4452     JIT_REGISTER_FN,
4453     JIT_UNREGISTER_FN
4454 } jit_actions_t;
4455 
4456 struct jit_code_entry {
4457     struct jit_code_entry *next_entry;
4458     struct jit_code_entry *prev_entry;
4459     const void *symfile_addr;
4460     uint64_t symfile_size;
4461 };
4462 
4463 struct jit_descriptor {
4464     uint32_t version;
4465     uint32_t action_flag;
4466     struct jit_code_entry *relevant_entry;
4467     struct jit_code_entry *first_entry;
4468 };
4469 
4470 void __jit_debug_register_code(void) __attribute__((noinline));
4471 void __jit_debug_register_code(void)
4472 {
4473     asm("");
4474 }
4475 
4476 /* Must statically initialize the version, because GDB may check
4477    the version before we can set it.  */
4478 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4479 
4480 /* End GDB interface.  */
4481 
4482 static int find_string(const char *strtab, const char *str)
4483 {
4484     const char *p = strtab + 1;
4485 
4486     while (1) {
4487         if (strcmp(p, str) == 0) {
4488             return p - strtab;
4489         }
4490         p += strlen(p) + 1;
4491     }
4492 }
4493 
4494 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4495                                  const void *debug_frame,
4496                                  size_t debug_frame_size)
4497 {
4498     struct __attribute__((packed)) DebugInfo {
4499         uint32_t  len;
4500         uint16_t  version;
4501         uint32_t  abbrev;
4502         uint8_t   ptr_size;
4503         uint8_t   cu_die;
4504         uint16_t  cu_lang;
4505         uintptr_t cu_low_pc;
4506         uintptr_t cu_high_pc;
4507         uint8_t   fn_die;
4508         char      fn_name[16];
4509         uintptr_t fn_low_pc;
4510         uintptr_t fn_high_pc;
4511         uint8_t   cu_eoc;
4512     };
4513 
4514     struct ElfImage {
4515         ElfW(Ehdr) ehdr;
4516         ElfW(Phdr) phdr;
4517         ElfW(Shdr) shdr[7];
4518         ElfW(Sym)  sym[2];
4519         struct DebugInfo di;
4520         uint8_t    da[24];
4521         char       str[80];
4522     };
4523 
4524     struct ElfImage *img;
4525 
4526     static const struct ElfImage img_template = {
4527         .ehdr = {
4528             .e_ident[EI_MAG0] = ELFMAG0,
4529             .e_ident[EI_MAG1] = ELFMAG1,
4530             .e_ident[EI_MAG2] = ELFMAG2,
4531             .e_ident[EI_MAG3] = ELFMAG3,
4532             .e_ident[EI_CLASS] = ELF_CLASS,
4533             .e_ident[EI_DATA] = ELF_DATA,
4534             .e_ident[EI_VERSION] = EV_CURRENT,
4535             .e_type = ET_EXEC,
4536             .e_machine = ELF_HOST_MACHINE,
4537             .e_version = EV_CURRENT,
4538             .e_phoff = offsetof(struct ElfImage, phdr),
4539             .e_shoff = offsetof(struct ElfImage, shdr),
4540             .e_ehsize = sizeof(ElfW(Shdr)),
4541             .e_phentsize = sizeof(ElfW(Phdr)),
4542             .e_phnum = 1,
4543             .e_shentsize = sizeof(ElfW(Shdr)),
4544             .e_shnum = ARRAY_SIZE(img->shdr),
4545             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4546 #ifdef ELF_HOST_FLAGS
4547             .e_flags = ELF_HOST_FLAGS,
4548 #endif
4549 #ifdef ELF_OSABI
4550             .e_ident[EI_OSABI] = ELF_OSABI,
4551 #endif
4552         },
4553         .phdr = {
4554             .p_type = PT_LOAD,
4555             .p_flags = PF_X,
4556         },
4557         .shdr = {
4558             [0] = { .sh_type = SHT_NULL },
4559             /* Trick: The contents of code_gen_buffer are not present in
4560                this fake ELF file; that got allocated elsewhere.  Therefore
4561                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4562                will not look for contents.  We can record any address.  */
4563             [1] = { /* .text */
4564                 .sh_type = SHT_NOBITS,
4565                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4566             },
4567             [2] = { /* .debug_info */
4568                 .sh_type = SHT_PROGBITS,
4569                 .sh_offset = offsetof(struct ElfImage, di),
4570                 .sh_size = sizeof(struct DebugInfo),
4571             },
4572             [3] = { /* .debug_abbrev */
4573                 .sh_type = SHT_PROGBITS,
4574                 .sh_offset = offsetof(struct ElfImage, da),
4575                 .sh_size = sizeof(img->da),
4576             },
4577             [4] = { /* .debug_frame */
4578                 .sh_type = SHT_PROGBITS,
4579                 .sh_offset = sizeof(struct ElfImage),
4580             },
4581             [5] = { /* .symtab */
4582                 .sh_type = SHT_SYMTAB,
4583                 .sh_offset = offsetof(struct ElfImage, sym),
4584                 .sh_size = sizeof(img->sym),
4585                 .sh_info = 1,
4586                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4587                 .sh_entsize = sizeof(ElfW(Sym)),
4588             },
4589             [6] = { /* .strtab */
4590                 .sh_type = SHT_STRTAB,
4591                 .sh_offset = offsetof(struct ElfImage, str),
4592                 .sh_size = sizeof(img->str),
4593             }
4594         },
4595         .sym = {
4596             [1] = { /* code_gen_buffer */
4597                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4598                 .st_shndx = 1,
4599             }
4600         },
4601         .di = {
4602             .len = sizeof(struct DebugInfo) - 4,
4603             .version = 2,
4604             .ptr_size = sizeof(void *),
4605             .cu_die = 1,
4606             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4607             .fn_die = 2,
4608             .fn_name = "code_gen_buffer"
4609         },
4610         .da = {
4611             1,          /* abbrev number (the cu) */
4612             0x11, 1,    /* DW_TAG_compile_unit, has children */
4613             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4614             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4615             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4616             0, 0,       /* end of abbrev */
4617             2,          /* abbrev number (the fn) */
4618             0x2e, 0,    /* DW_TAG_subprogram, no children */
4619             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4620             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4621             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4622             0, 0,       /* end of abbrev */
4623             0           /* no more abbrev */
4624         },
4625         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4626                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4627     };
4628 
4629     /* We only need a single jit entry; statically allocate it.  */
4630     static struct jit_code_entry one_entry;
4631 
4632     uintptr_t buf = (uintptr_t)buf_ptr;
4633     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4634     DebugFrameHeader *dfh;
4635 
4636     img = g_malloc(img_size);
4637     *img = img_template;
4638 
4639     img->phdr.p_vaddr = buf;
4640     img->phdr.p_paddr = buf;
4641     img->phdr.p_memsz = buf_size;
4642 
4643     img->shdr[1].sh_name = find_string(img->str, ".text");
4644     img->shdr[1].sh_addr = buf;
4645     img->shdr[1].sh_size = buf_size;
4646 
4647     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4648     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4649 
4650     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4651     img->shdr[4].sh_size = debug_frame_size;
4652 
4653     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4654     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4655 
4656     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4657     img->sym[1].st_value = buf;
4658     img->sym[1].st_size = buf_size;
4659 
4660     img->di.cu_low_pc = buf;
4661     img->di.cu_high_pc = buf + buf_size;
4662     img->di.fn_low_pc = buf;
4663     img->di.fn_high_pc = buf + buf_size;
4664 
4665     dfh = (DebugFrameHeader *)(img + 1);
4666     memcpy(dfh, debug_frame, debug_frame_size);
4667     dfh->fde.func_start = buf;
4668     dfh->fde.func_len = buf_size;
4669 
4670 #ifdef DEBUG_JIT
4671     /* Enable this block to be able to debug the ELF image file creation.
4672        One can use readelf, objdump, or other inspection utilities.  */
4673     {
4674         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
4675         FILE *f = fopen(jit, "w+b");
4676         if (f) {
4677             if (fwrite(img, img_size, 1, f) != img_size) {
4678                 /* Avoid stupid unused return value warning for fwrite.  */
4679             }
4680             fclose(f);
4681         }
4682     }
4683 #endif
4684 
4685     one_entry.symfile_addr = img;
4686     one_entry.symfile_size = img_size;
4687 
4688     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4689     __jit_debug_descriptor.relevant_entry = &one_entry;
4690     __jit_debug_descriptor.first_entry = &one_entry;
4691     __jit_debug_register_code();
4692 }
4693 #else
4694 /* No support for the feature.  Provide the entry point expected by exec.c,
4695    and implement the internal function we declared earlier.  */
4696 
4697 static void tcg_register_jit_int(const void *buf, size_t size,
4698                                  const void *debug_frame,
4699                                  size_t debug_frame_size)
4700 {
4701 }
4702 
4703 void tcg_register_jit(const void *buf, size_t buf_size)
4704 {
4705 }
4706 #endif /* ELF_HOST_MACHINE */
4707 
4708 #if !TCG_TARGET_MAYBE_vec
4709 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4710 {
4711     g_assert_not_reached();
4712 }
4713 #endif
4714