xref: /openbmc/qemu/tcg/tcg.c (revision 30b6852c)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 
45 #include "exec/exec-all.h"
46 #include "tcg/tcg-op.h"
47 
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS  ELFCLASS32
50 #else
51 # define ELF_CLASS  ELFCLASS64
52 #endif
53 #ifdef HOST_WORDS_BIGENDIAN
54 # define ELF_DATA   ELFDATA2MSB
55 #else
56 # define ELF_DATA   ELFDATA2LSB
57 #endif
58 
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "tcg-internal.h"
62 
63 #ifdef CONFIG_TCG_INTERPRETER
64 #include <ffi.h>
65 #endif
66 
67 /* Forward declarations for functions declared in tcg-target.c.inc and
68    used here. */
69 static void tcg_target_init(TCGContext *s);
70 static void tcg_target_qemu_prologue(TCGContext *s);
71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
72                         intptr_t value, intptr_t addend);
73 
74 /* The CIE and FDE header definitions will be common to all hosts.  */
75 typedef struct {
76     uint32_t len __attribute__((aligned((sizeof(void *)))));
77     uint32_t id;
78     uint8_t version;
79     char augmentation[1];
80     uint8_t code_align;
81     uint8_t data_align;
82     uint8_t return_column;
83 } DebugFrameCIE;
84 
85 typedef struct QEMU_PACKED {
86     uint32_t len __attribute__((aligned((sizeof(void *)))));
87     uint32_t cie_offset;
88     uintptr_t func_start;
89     uintptr_t func_len;
90 } DebugFrameFDEHeader;
91 
92 typedef struct QEMU_PACKED {
93     DebugFrameCIE cie;
94     DebugFrameFDEHeader fde;
95 } DebugFrameHeader;
96 
97 static void tcg_register_jit_int(const void *buf, size_t size,
98                                  const void *debug_frame,
99                                  size_t debug_frame_size)
100     __attribute__((unused));
101 
102 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
104                        intptr_t arg2);
105 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106 static void tcg_out_movi(TCGContext *s, TCGType type,
107                          TCGReg ret, tcg_target_long arg);
108 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
109                        const TCGArg args[TCG_MAX_OP_ARGS],
110                        const int const_args[TCG_MAX_OP_ARGS]);
111 #if TCG_TARGET_MAYBE_vec
112 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
113                             TCGReg dst, TCGReg src);
114 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
115                              TCGReg dst, TCGReg base, intptr_t offset);
116 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, int64_t arg);
118 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
119                            unsigned vecl, unsigned vece,
120                            const TCGArg args[TCG_MAX_OP_ARGS],
121                            const int const_args[TCG_MAX_OP_ARGS]);
122 #else
123 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
124                                    TCGReg dst, TCGReg src)
125 {
126     g_assert_not_reached();
127 }
128 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
129                                     TCGReg dst, TCGReg base, intptr_t offset)
130 {
131     g_assert_not_reached();
132 }
133 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
134                                     TCGReg dst, int64_t arg)
135 {
136     g_assert_not_reached();
137 }
138 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
139                                   unsigned vecl, unsigned vece,
140                                   const TCGArg args[TCG_MAX_OP_ARGS],
141                                   const int const_args[TCG_MAX_OP_ARGS])
142 {
143     g_assert_not_reached();
144 }
145 #endif
146 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
147                        intptr_t arg2);
148 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
149                         TCGReg base, intptr_t ofs);
150 #ifdef CONFIG_TCG_INTERPRETER
151 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
152                          ffi_cif *cif);
153 #else
154 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
155 #endif
156 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
157 #ifdef TCG_TARGET_NEED_LDST_LABELS
158 static int tcg_out_ldst_finalize(TCGContext *s);
159 #endif
160 
161 TCGContext tcg_init_ctx;
162 __thread TCGContext *tcg_ctx;
163 
164 TCGContext **tcg_ctxs;
165 unsigned int tcg_cur_ctxs;
166 unsigned int tcg_max_ctxs;
167 TCGv_env cpu_env = 0;
168 const void *tcg_code_gen_epilogue;
169 uintptr_t tcg_splitwx_diff;
170 
171 #ifndef CONFIG_TCG_INTERPRETER
172 tcg_prologue_fn *tcg_qemu_tb_exec;
173 #endif
174 
175 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
176 static TCGRegSet tcg_target_call_clobber_regs;
177 
178 #if TCG_TARGET_INSN_UNIT_SIZE == 1
179 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
180 {
181     *s->code_ptr++ = v;
182 }
183 
184 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
185                                                       uint8_t v)
186 {
187     *p = v;
188 }
189 #endif
190 
191 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
192 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
193 {
194     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
195         *s->code_ptr++ = v;
196     } else {
197         tcg_insn_unit *p = s->code_ptr;
198         memcpy(p, &v, sizeof(v));
199         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
200     }
201 }
202 
203 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
204                                                        uint16_t v)
205 {
206     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
207         *p = v;
208     } else {
209         memcpy(p, &v, sizeof(v));
210     }
211 }
212 #endif
213 
214 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
215 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
216 {
217     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
218         *s->code_ptr++ = v;
219     } else {
220         tcg_insn_unit *p = s->code_ptr;
221         memcpy(p, &v, sizeof(v));
222         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
223     }
224 }
225 
226 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
227                                                        uint32_t v)
228 {
229     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
230         *p = v;
231     } else {
232         memcpy(p, &v, sizeof(v));
233     }
234 }
235 #endif
236 
237 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
238 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
239 {
240     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
241         *s->code_ptr++ = v;
242     } else {
243         tcg_insn_unit *p = s->code_ptr;
244         memcpy(p, &v, sizeof(v));
245         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
246     }
247 }
248 
249 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
250                                                        uint64_t v)
251 {
252     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
253         *p = v;
254     } else {
255         memcpy(p, &v, sizeof(v));
256     }
257 }
258 #endif
259 
260 /* label relocation processing */
261 
262 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
263                           TCGLabel *l, intptr_t addend)
264 {
265     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
266 
267     r->type = type;
268     r->ptr = code_ptr;
269     r->addend = addend;
270     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
271 }
272 
273 static void tcg_out_label(TCGContext *s, TCGLabel *l)
274 {
275     tcg_debug_assert(!l->has_value);
276     l->has_value = 1;
277     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
278 }
279 
280 TCGLabel *gen_new_label(void)
281 {
282     TCGContext *s = tcg_ctx;
283     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
284 
285     memset(l, 0, sizeof(TCGLabel));
286     l->id = s->nb_labels++;
287     QSIMPLEQ_INIT(&l->relocs);
288 
289     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
290 
291     return l;
292 }
293 
294 static bool tcg_resolve_relocs(TCGContext *s)
295 {
296     TCGLabel *l;
297 
298     QSIMPLEQ_FOREACH(l, &s->labels, next) {
299         TCGRelocation *r;
300         uintptr_t value = l->u.value;
301 
302         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
303             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
304                 return false;
305             }
306         }
307     }
308     return true;
309 }
310 
311 static void set_jmp_reset_offset(TCGContext *s, int which)
312 {
313     /*
314      * We will check for overflow at the end of the opcode loop in
315      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
316      */
317     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
318 }
319 
320 /* Signal overflow, starting over with fewer guest insns. */
321 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
322 {
323     siglongjmp(s->jmp_trans, -2);
324 }
325 
326 #define C_PFX1(P, A)                    P##A
327 #define C_PFX2(P, A, B)                 P##A##_##B
328 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
329 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
330 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
331 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
332 
333 /* Define an enumeration for the various combinations. */
334 
335 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
336 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
337 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
338 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
339 
340 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
341 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
342 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
343 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
344 
345 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
346 
347 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
348 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
349 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
350 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
351 
352 typedef enum {
353 #include "tcg-target-con-set.h"
354 } TCGConstraintSetIndex;
355 
356 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
357 
358 #undef C_O0_I1
359 #undef C_O0_I2
360 #undef C_O0_I3
361 #undef C_O0_I4
362 #undef C_O1_I1
363 #undef C_O1_I2
364 #undef C_O1_I3
365 #undef C_O1_I4
366 #undef C_N1_I2
367 #undef C_O2_I1
368 #undef C_O2_I2
369 #undef C_O2_I3
370 #undef C_O2_I4
371 
372 /* Put all of the constraint sets into an array, indexed by the enum. */
373 
374 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
375 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
376 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
377 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
378 
379 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
380 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
381 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
382 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
383 
384 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
385 
386 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
387 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
388 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
389 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
390 
391 static const TCGTargetOpDef constraint_sets[] = {
392 #include "tcg-target-con-set.h"
393 };
394 
395 
396 #undef C_O0_I1
397 #undef C_O0_I2
398 #undef C_O0_I3
399 #undef C_O0_I4
400 #undef C_O1_I1
401 #undef C_O1_I2
402 #undef C_O1_I3
403 #undef C_O1_I4
404 #undef C_N1_I2
405 #undef C_O2_I1
406 #undef C_O2_I2
407 #undef C_O2_I3
408 #undef C_O2_I4
409 
410 /* Expand the enumerator to be returned from tcg_target_op_def(). */
411 
412 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
413 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
414 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
415 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
416 
417 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
418 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
419 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
420 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
421 
422 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
423 
424 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
425 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
426 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
427 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
428 
429 #include "tcg-target.c.inc"
430 
431 static void alloc_tcg_plugin_context(TCGContext *s)
432 {
433 #ifdef CONFIG_PLUGIN
434     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
435     s->plugin_tb->insns =
436         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
437 #endif
438 }
439 
440 /*
441  * All TCG threads except the parent (i.e. the one that called tcg_context_init
442  * and registered the target's TCG globals) must register with this function
443  * before initiating translation.
444  *
445  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
446  * of tcg_region_init() for the reasoning behind this.
447  *
448  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
449  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
450  * is not used anymore for translation once this function is called.
451  *
452  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
453  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
454  */
455 #ifdef CONFIG_USER_ONLY
456 void tcg_register_thread(void)
457 {
458     tcg_ctx = &tcg_init_ctx;
459 }
460 #else
461 void tcg_register_thread(void)
462 {
463     TCGContext *s = g_malloc(sizeof(*s));
464     unsigned int i, n;
465 
466     *s = tcg_init_ctx;
467 
468     /* Relink mem_base.  */
469     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
470         if (tcg_init_ctx.temps[i].mem_base) {
471             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
472             tcg_debug_assert(b >= 0 && b < n);
473             s->temps[i].mem_base = &s->temps[b];
474         }
475     }
476 
477     /* Claim an entry in tcg_ctxs */
478     n = qatomic_fetch_inc(&tcg_cur_ctxs);
479     g_assert(n < tcg_max_ctxs);
480     qatomic_set(&tcg_ctxs[n], s);
481 
482     if (n > 0) {
483         alloc_tcg_plugin_context(s);
484         tcg_region_initial_alloc(s);
485     }
486 
487     tcg_ctx = s;
488 }
489 #endif /* !CONFIG_USER_ONLY */
490 
491 /* pool based memory allocation */
492 void *tcg_malloc_internal(TCGContext *s, int size)
493 {
494     TCGPool *p;
495     int pool_size;
496 
497     if (size > TCG_POOL_CHUNK_SIZE) {
498         /* big malloc: insert a new pool (XXX: could optimize) */
499         p = g_malloc(sizeof(TCGPool) + size);
500         p->size = size;
501         p->next = s->pool_first_large;
502         s->pool_first_large = p;
503         return p->data;
504     } else {
505         p = s->pool_current;
506         if (!p) {
507             p = s->pool_first;
508             if (!p)
509                 goto new_pool;
510         } else {
511             if (!p->next) {
512             new_pool:
513                 pool_size = TCG_POOL_CHUNK_SIZE;
514                 p = g_malloc(sizeof(TCGPool) + pool_size);
515                 p->size = pool_size;
516                 p->next = NULL;
517                 if (s->pool_current)
518                     s->pool_current->next = p;
519                 else
520                     s->pool_first = p;
521             } else {
522                 p = p->next;
523             }
524         }
525     }
526     s->pool_current = p;
527     s->pool_cur = p->data + size;
528     s->pool_end = p->data + p->size;
529     return p->data;
530 }
531 
532 void tcg_pool_reset(TCGContext *s)
533 {
534     TCGPool *p, *t;
535     for (p = s->pool_first_large; p; p = t) {
536         t = p->next;
537         g_free(p);
538     }
539     s->pool_first_large = NULL;
540     s->pool_cur = s->pool_end = NULL;
541     s->pool_current = NULL;
542 }
543 
544 #include "exec/helper-proto.h"
545 
546 static const TCGHelperInfo all_helpers[] = {
547 #include "exec/helper-tcg.h"
548 };
549 static GHashTable *helper_table;
550 
551 #ifdef CONFIG_TCG_INTERPRETER
552 static GHashTable *ffi_table;
553 
554 static ffi_type * const typecode_to_ffi[8] = {
555     [dh_typecode_void] = &ffi_type_void,
556     [dh_typecode_i32]  = &ffi_type_uint32,
557     [dh_typecode_s32]  = &ffi_type_sint32,
558     [dh_typecode_i64]  = &ffi_type_uint64,
559     [dh_typecode_s64]  = &ffi_type_sint64,
560     [dh_typecode_ptr]  = &ffi_type_pointer,
561 };
562 #endif
563 
564 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
565 static void process_op_defs(TCGContext *s);
566 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
567                                             TCGReg reg, const char *name);
568 
569 static void tcg_context_init(unsigned max_cpus)
570 {
571     TCGContext *s = &tcg_init_ctx;
572     int op, total_args, n, i;
573     TCGOpDef *def;
574     TCGArgConstraint *args_ct;
575     TCGTemp *ts;
576 
577     memset(s, 0, sizeof(*s));
578     s->nb_globals = 0;
579 
580     /* Count total number of arguments and allocate the corresponding
581        space */
582     total_args = 0;
583     for(op = 0; op < NB_OPS; op++) {
584         def = &tcg_op_defs[op];
585         n = def->nb_iargs + def->nb_oargs;
586         total_args += n;
587     }
588 
589     args_ct = g_new0(TCGArgConstraint, total_args);
590 
591     for(op = 0; op < NB_OPS; op++) {
592         def = &tcg_op_defs[op];
593         def->args_ct = args_ct;
594         n = def->nb_iargs + def->nb_oargs;
595         args_ct += n;
596     }
597 
598     /* Register helpers.  */
599     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
600     helper_table = g_hash_table_new(NULL, NULL);
601 
602     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
603         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
604                             (gpointer)&all_helpers[i]);
605     }
606 
607 #ifdef CONFIG_TCG_INTERPRETER
608     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
609     ffi_table = g_hash_table_new(NULL, NULL);
610     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
611         struct {
612             ffi_cif cif;
613             ffi_type *args[];
614         } *ca;
615         uint32_t typemask = all_helpers[i].typemask;
616         gpointer hash = (gpointer)(uintptr_t)typemask;
617         ffi_status status;
618         int nargs;
619 
620         if (g_hash_table_lookup(ffi_table, hash)) {
621             continue;
622         }
623 
624         /* Ignoring the return type, find the last non-zero field. */
625         nargs = 32 - clz32(typemask >> 3);
626         nargs = DIV_ROUND_UP(nargs, 3);
627 
628         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
629         ca->cif.rtype = typecode_to_ffi[typemask & 7];
630         ca->cif.nargs = nargs;
631 
632         if (nargs != 0) {
633             ca->cif.arg_types = ca->args;
634             for (i = 0; i < nargs; ++i) {
635                 int typecode = extract32(typemask, (i + 1) * 3, 3);
636                 ca->args[i] = typecode_to_ffi[typecode];
637             }
638         }
639 
640         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
641                               ca->cif.rtype, ca->cif.arg_types);
642         assert(status == FFI_OK);
643 
644         g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
645     }
646 #endif
647 
648     tcg_target_init(s);
649     process_op_defs(s);
650 
651     /* Reverse the order of the saved registers, assuming they're all at
652        the start of tcg_target_reg_alloc_order.  */
653     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
654         int r = tcg_target_reg_alloc_order[n];
655         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
656             break;
657         }
658     }
659     for (i = 0; i < n; ++i) {
660         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
661     }
662     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
663         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
664     }
665 
666     alloc_tcg_plugin_context(s);
667 
668     tcg_ctx = s;
669     /*
670      * In user-mode we simply share the init context among threads, since we
671      * use a single region. See the documentation tcg_region_init() for the
672      * reasoning behind this.
673      * In softmmu we will have at most max_cpus TCG threads.
674      */
675 #ifdef CONFIG_USER_ONLY
676     tcg_ctxs = &tcg_ctx;
677     tcg_cur_ctxs = 1;
678     tcg_max_ctxs = 1;
679 #else
680     tcg_max_ctxs = max_cpus;
681     tcg_ctxs = g_new0(TCGContext *, max_cpus);
682 #endif
683 
684     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
685     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
686     cpu_env = temp_tcgv_ptr(ts);
687 }
688 
689 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
690 {
691     tcg_context_init(max_cpus);
692     tcg_region_init(tb_size, splitwx, max_cpus);
693 }
694 
695 /*
696  * Allocate TBs right before their corresponding translated code, making
697  * sure that TBs and code are on different cache lines.
698  */
699 TranslationBlock *tcg_tb_alloc(TCGContext *s)
700 {
701     uintptr_t align = qemu_icache_linesize;
702     TranslationBlock *tb;
703     void *next;
704 
705  retry:
706     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
707     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
708 
709     if (unlikely(next > s->code_gen_highwater)) {
710         if (tcg_region_alloc(s)) {
711             return NULL;
712         }
713         goto retry;
714     }
715     qatomic_set(&s->code_gen_ptr, next);
716     s->data_gen_ptr = NULL;
717     return tb;
718 }
719 
720 void tcg_prologue_init(TCGContext *s)
721 {
722     size_t prologue_size;
723 
724     s->code_ptr = s->code_gen_ptr;
725     s->code_buf = s->code_gen_ptr;
726     s->data_gen_ptr = NULL;
727 
728 #ifndef CONFIG_TCG_INTERPRETER
729     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
730 #endif
731 
732 #ifdef TCG_TARGET_NEED_POOL_LABELS
733     s->pool_labels = NULL;
734 #endif
735 
736     qemu_thread_jit_write();
737     /* Generate the prologue.  */
738     tcg_target_qemu_prologue(s);
739 
740 #ifdef TCG_TARGET_NEED_POOL_LABELS
741     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
742     {
743         int result = tcg_out_pool_finalize(s);
744         tcg_debug_assert(result == 0);
745     }
746 #endif
747 
748     prologue_size = tcg_current_code_size(s);
749 
750 #ifndef CONFIG_TCG_INTERPRETER
751     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
752                         (uintptr_t)s->code_buf, prologue_size);
753 #endif
754 
755 #ifdef DEBUG_DISAS
756     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
757         FILE *logfile = qemu_log_lock();
758         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
759         if (s->data_gen_ptr) {
760             size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
761             size_t data_size = prologue_size - code_size;
762             size_t i;
763 
764             log_disas(s->code_gen_ptr, code_size);
765 
766             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
767                 if (sizeof(tcg_target_ulong) == 8) {
768                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
769                              (uintptr_t)s->data_gen_ptr + i,
770                              *(uint64_t *)(s->data_gen_ptr + i));
771                 } else {
772                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
773                              (uintptr_t)s->data_gen_ptr + i,
774                              *(uint32_t *)(s->data_gen_ptr + i));
775                 }
776             }
777         } else {
778             log_disas(s->code_gen_ptr, prologue_size);
779         }
780         qemu_log("\n");
781         qemu_log_flush();
782         qemu_log_unlock(logfile);
783     }
784 #endif
785 
786 #ifndef CONFIG_TCG_INTERPRETER
787     /*
788      * Assert that goto_ptr is implemented completely, setting an epilogue.
789      * For tci, we use NULL as the signal to return from the interpreter,
790      * so skip this check.
791      */
792     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
793 #endif
794 
795     tcg_region_prologue_set(s);
796 }
797 
798 void tcg_func_start(TCGContext *s)
799 {
800     tcg_pool_reset(s);
801     s->nb_temps = s->nb_globals;
802 
803     /* No temps have been previously allocated for size or locality.  */
804     memset(s->free_temps, 0, sizeof(s->free_temps));
805 
806     /* No constant temps have been previously allocated. */
807     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
808         if (s->const_table[i]) {
809             g_hash_table_remove_all(s->const_table[i]);
810         }
811     }
812 
813     s->nb_ops = 0;
814     s->nb_labels = 0;
815     s->current_frame_offset = s->frame_start;
816 
817 #ifdef CONFIG_DEBUG_TCG
818     s->goto_tb_issue_mask = 0;
819 #endif
820 
821     QTAILQ_INIT(&s->ops);
822     QTAILQ_INIT(&s->free_ops);
823     QSIMPLEQ_INIT(&s->labels);
824 }
825 
826 static TCGTemp *tcg_temp_alloc(TCGContext *s)
827 {
828     int n = s->nb_temps++;
829 
830     if (n >= TCG_MAX_TEMPS) {
831         tcg_raise_tb_overflow(s);
832     }
833     return memset(&s->temps[n], 0, sizeof(TCGTemp));
834 }
835 
836 static TCGTemp *tcg_global_alloc(TCGContext *s)
837 {
838     TCGTemp *ts;
839 
840     tcg_debug_assert(s->nb_globals == s->nb_temps);
841     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
842     s->nb_globals++;
843     ts = tcg_temp_alloc(s);
844     ts->kind = TEMP_GLOBAL;
845 
846     return ts;
847 }
848 
849 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
850                                             TCGReg reg, const char *name)
851 {
852     TCGTemp *ts;
853 
854     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
855         tcg_abort();
856     }
857 
858     ts = tcg_global_alloc(s);
859     ts->base_type = type;
860     ts->type = type;
861     ts->kind = TEMP_FIXED;
862     ts->reg = reg;
863     ts->name = name;
864     tcg_regset_set_reg(s->reserved_regs, reg);
865 
866     return ts;
867 }
868 
869 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
870 {
871     s->frame_start = start;
872     s->frame_end = start + size;
873     s->frame_temp
874         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
875 }
876 
877 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
878                                      intptr_t offset, const char *name)
879 {
880     TCGContext *s = tcg_ctx;
881     TCGTemp *base_ts = tcgv_ptr_temp(base);
882     TCGTemp *ts = tcg_global_alloc(s);
883     int indirect_reg = 0, bigendian = 0;
884 #ifdef HOST_WORDS_BIGENDIAN
885     bigendian = 1;
886 #endif
887 
888     switch (base_ts->kind) {
889     case TEMP_FIXED:
890         break;
891     case TEMP_GLOBAL:
892         /* We do not support double-indirect registers.  */
893         tcg_debug_assert(!base_ts->indirect_reg);
894         base_ts->indirect_base = 1;
895         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
896                             ? 2 : 1);
897         indirect_reg = 1;
898         break;
899     default:
900         g_assert_not_reached();
901     }
902 
903     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
904         TCGTemp *ts2 = tcg_global_alloc(s);
905         char buf[64];
906 
907         ts->base_type = TCG_TYPE_I64;
908         ts->type = TCG_TYPE_I32;
909         ts->indirect_reg = indirect_reg;
910         ts->mem_allocated = 1;
911         ts->mem_base = base_ts;
912         ts->mem_offset = offset + bigendian * 4;
913         pstrcpy(buf, sizeof(buf), name);
914         pstrcat(buf, sizeof(buf), "_0");
915         ts->name = strdup(buf);
916 
917         tcg_debug_assert(ts2 == ts + 1);
918         ts2->base_type = TCG_TYPE_I64;
919         ts2->type = TCG_TYPE_I32;
920         ts2->indirect_reg = indirect_reg;
921         ts2->mem_allocated = 1;
922         ts2->mem_base = base_ts;
923         ts2->mem_offset = offset + (1 - bigendian) * 4;
924         pstrcpy(buf, sizeof(buf), name);
925         pstrcat(buf, sizeof(buf), "_1");
926         ts2->name = strdup(buf);
927     } else {
928         ts->base_type = type;
929         ts->type = type;
930         ts->indirect_reg = indirect_reg;
931         ts->mem_allocated = 1;
932         ts->mem_base = base_ts;
933         ts->mem_offset = offset;
934         ts->name = name;
935     }
936     return ts;
937 }
938 
939 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
940 {
941     TCGContext *s = tcg_ctx;
942     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
943     TCGTemp *ts;
944     int idx, k;
945 
946     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
947     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
948     if (idx < TCG_MAX_TEMPS) {
949         /* There is already an available temp with the right type.  */
950         clear_bit(idx, s->free_temps[k].l);
951 
952         ts = &s->temps[idx];
953         ts->temp_allocated = 1;
954         tcg_debug_assert(ts->base_type == type);
955         tcg_debug_assert(ts->kind == kind);
956     } else {
957         ts = tcg_temp_alloc(s);
958         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
959             TCGTemp *ts2 = tcg_temp_alloc(s);
960 
961             ts->base_type = type;
962             ts->type = TCG_TYPE_I32;
963             ts->temp_allocated = 1;
964             ts->kind = kind;
965 
966             tcg_debug_assert(ts2 == ts + 1);
967             ts2->base_type = TCG_TYPE_I64;
968             ts2->type = TCG_TYPE_I32;
969             ts2->temp_allocated = 1;
970             ts2->kind = kind;
971         } else {
972             ts->base_type = type;
973             ts->type = type;
974             ts->temp_allocated = 1;
975             ts->kind = kind;
976         }
977     }
978 
979 #if defined(CONFIG_DEBUG_TCG)
980     s->temps_in_use++;
981 #endif
982     return ts;
983 }
984 
985 TCGv_vec tcg_temp_new_vec(TCGType type)
986 {
987     TCGTemp *t;
988 
989 #ifdef CONFIG_DEBUG_TCG
990     switch (type) {
991     case TCG_TYPE_V64:
992         assert(TCG_TARGET_HAS_v64);
993         break;
994     case TCG_TYPE_V128:
995         assert(TCG_TARGET_HAS_v128);
996         break;
997     case TCG_TYPE_V256:
998         assert(TCG_TARGET_HAS_v256);
999         break;
1000     default:
1001         g_assert_not_reached();
1002     }
1003 #endif
1004 
1005     t = tcg_temp_new_internal(type, 0);
1006     return temp_tcgv_vec(t);
1007 }
1008 
1009 /* Create a new temp of the same type as an existing temp.  */
1010 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1011 {
1012     TCGTemp *t = tcgv_vec_temp(match);
1013 
1014     tcg_debug_assert(t->temp_allocated != 0);
1015 
1016     t = tcg_temp_new_internal(t->base_type, 0);
1017     return temp_tcgv_vec(t);
1018 }
1019 
1020 void tcg_temp_free_internal(TCGTemp *ts)
1021 {
1022     TCGContext *s = tcg_ctx;
1023     int k, idx;
1024 
1025     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1026     if (ts->kind == TEMP_CONST) {
1027         return;
1028     }
1029 
1030 #if defined(CONFIG_DEBUG_TCG)
1031     s->temps_in_use--;
1032     if (s->temps_in_use < 0) {
1033         fprintf(stderr, "More temporaries freed than allocated!\n");
1034     }
1035 #endif
1036 
1037     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1038     tcg_debug_assert(ts->temp_allocated != 0);
1039     ts->temp_allocated = 0;
1040 
1041     idx = temp_idx(ts);
1042     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1043     set_bit(idx, s->free_temps[k].l);
1044 }
1045 
1046 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1047 {
1048     TCGContext *s = tcg_ctx;
1049     GHashTable *h = s->const_table[type];
1050     TCGTemp *ts;
1051 
1052     if (h == NULL) {
1053         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1054         s->const_table[type] = h;
1055     }
1056 
1057     ts = g_hash_table_lookup(h, &val);
1058     if (ts == NULL) {
1059         ts = tcg_temp_alloc(s);
1060 
1061         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1062             TCGTemp *ts2 = tcg_temp_alloc(s);
1063 
1064             ts->base_type = TCG_TYPE_I64;
1065             ts->type = TCG_TYPE_I32;
1066             ts->kind = TEMP_CONST;
1067             ts->temp_allocated = 1;
1068             /*
1069              * Retain the full value of the 64-bit constant in the low
1070              * part, so that the hash table works.  Actual uses will
1071              * truncate the value to the low part.
1072              */
1073             ts->val = val;
1074 
1075             tcg_debug_assert(ts2 == ts + 1);
1076             ts2->base_type = TCG_TYPE_I64;
1077             ts2->type = TCG_TYPE_I32;
1078             ts2->kind = TEMP_CONST;
1079             ts2->temp_allocated = 1;
1080             ts2->val = val >> 32;
1081         } else {
1082             ts->base_type = type;
1083             ts->type = type;
1084             ts->kind = TEMP_CONST;
1085             ts->temp_allocated = 1;
1086             ts->val = val;
1087         }
1088         g_hash_table_insert(h, &ts->val, ts);
1089     }
1090 
1091     return ts;
1092 }
1093 
1094 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1095 {
1096     val = dup_const(vece, val);
1097     return temp_tcgv_vec(tcg_constant_internal(type, val));
1098 }
1099 
1100 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1101 {
1102     TCGTemp *t = tcgv_vec_temp(match);
1103 
1104     tcg_debug_assert(t->temp_allocated != 0);
1105     return tcg_constant_vec(t->base_type, vece, val);
1106 }
1107 
1108 TCGv_i32 tcg_const_i32(int32_t val)
1109 {
1110     TCGv_i32 t0;
1111     t0 = tcg_temp_new_i32();
1112     tcg_gen_movi_i32(t0, val);
1113     return t0;
1114 }
1115 
1116 TCGv_i64 tcg_const_i64(int64_t val)
1117 {
1118     TCGv_i64 t0;
1119     t0 = tcg_temp_new_i64();
1120     tcg_gen_movi_i64(t0, val);
1121     return t0;
1122 }
1123 
1124 TCGv_i32 tcg_const_local_i32(int32_t val)
1125 {
1126     TCGv_i32 t0;
1127     t0 = tcg_temp_local_new_i32();
1128     tcg_gen_movi_i32(t0, val);
1129     return t0;
1130 }
1131 
1132 TCGv_i64 tcg_const_local_i64(int64_t val)
1133 {
1134     TCGv_i64 t0;
1135     t0 = tcg_temp_local_new_i64();
1136     tcg_gen_movi_i64(t0, val);
1137     return t0;
1138 }
1139 
1140 #if defined(CONFIG_DEBUG_TCG)
1141 void tcg_clear_temp_count(void)
1142 {
1143     TCGContext *s = tcg_ctx;
1144     s->temps_in_use = 0;
1145 }
1146 
1147 int tcg_check_temp_count(void)
1148 {
1149     TCGContext *s = tcg_ctx;
1150     if (s->temps_in_use) {
1151         /* Clear the count so that we don't give another
1152          * warning immediately next time around.
1153          */
1154         s->temps_in_use = 0;
1155         return 1;
1156     }
1157     return 0;
1158 }
1159 #endif
1160 
1161 /* Return true if OP may appear in the opcode stream.
1162    Test the runtime variable that controls each opcode.  */
1163 bool tcg_op_supported(TCGOpcode op)
1164 {
1165     const bool have_vec
1166         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1167 
1168     switch (op) {
1169     case INDEX_op_discard:
1170     case INDEX_op_set_label:
1171     case INDEX_op_call:
1172     case INDEX_op_br:
1173     case INDEX_op_mb:
1174     case INDEX_op_insn_start:
1175     case INDEX_op_exit_tb:
1176     case INDEX_op_goto_tb:
1177     case INDEX_op_goto_ptr:
1178     case INDEX_op_qemu_ld_i32:
1179     case INDEX_op_qemu_st_i32:
1180     case INDEX_op_qemu_ld_i64:
1181     case INDEX_op_qemu_st_i64:
1182         return true;
1183 
1184     case INDEX_op_qemu_st8_i32:
1185         return TCG_TARGET_HAS_qemu_st8_i32;
1186 
1187     case INDEX_op_mov_i32:
1188     case INDEX_op_setcond_i32:
1189     case INDEX_op_brcond_i32:
1190     case INDEX_op_ld8u_i32:
1191     case INDEX_op_ld8s_i32:
1192     case INDEX_op_ld16u_i32:
1193     case INDEX_op_ld16s_i32:
1194     case INDEX_op_ld_i32:
1195     case INDEX_op_st8_i32:
1196     case INDEX_op_st16_i32:
1197     case INDEX_op_st_i32:
1198     case INDEX_op_add_i32:
1199     case INDEX_op_sub_i32:
1200     case INDEX_op_mul_i32:
1201     case INDEX_op_and_i32:
1202     case INDEX_op_or_i32:
1203     case INDEX_op_xor_i32:
1204     case INDEX_op_shl_i32:
1205     case INDEX_op_shr_i32:
1206     case INDEX_op_sar_i32:
1207         return true;
1208 
1209     case INDEX_op_movcond_i32:
1210         return TCG_TARGET_HAS_movcond_i32;
1211     case INDEX_op_div_i32:
1212     case INDEX_op_divu_i32:
1213         return TCG_TARGET_HAS_div_i32;
1214     case INDEX_op_rem_i32:
1215     case INDEX_op_remu_i32:
1216         return TCG_TARGET_HAS_rem_i32;
1217     case INDEX_op_div2_i32:
1218     case INDEX_op_divu2_i32:
1219         return TCG_TARGET_HAS_div2_i32;
1220     case INDEX_op_rotl_i32:
1221     case INDEX_op_rotr_i32:
1222         return TCG_TARGET_HAS_rot_i32;
1223     case INDEX_op_deposit_i32:
1224         return TCG_TARGET_HAS_deposit_i32;
1225     case INDEX_op_extract_i32:
1226         return TCG_TARGET_HAS_extract_i32;
1227     case INDEX_op_sextract_i32:
1228         return TCG_TARGET_HAS_sextract_i32;
1229     case INDEX_op_extract2_i32:
1230         return TCG_TARGET_HAS_extract2_i32;
1231     case INDEX_op_add2_i32:
1232         return TCG_TARGET_HAS_add2_i32;
1233     case INDEX_op_sub2_i32:
1234         return TCG_TARGET_HAS_sub2_i32;
1235     case INDEX_op_mulu2_i32:
1236         return TCG_TARGET_HAS_mulu2_i32;
1237     case INDEX_op_muls2_i32:
1238         return TCG_TARGET_HAS_muls2_i32;
1239     case INDEX_op_muluh_i32:
1240         return TCG_TARGET_HAS_muluh_i32;
1241     case INDEX_op_mulsh_i32:
1242         return TCG_TARGET_HAS_mulsh_i32;
1243     case INDEX_op_ext8s_i32:
1244         return TCG_TARGET_HAS_ext8s_i32;
1245     case INDEX_op_ext16s_i32:
1246         return TCG_TARGET_HAS_ext16s_i32;
1247     case INDEX_op_ext8u_i32:
1248         return TCG_TARGET_HAS_ext8u_i32;
1249     case INDEX_op_ext16u_i32:
1250         return TCG_TARGET_HAS_ext16u_i32;
1251     case INDEX_op_bswap16_i32:
1252         return TCG_TARGET_HAS_bswap16_i32;
1253     case INDEX_op_bswap32_i32:
1254         return TCG_TARGET_HAS_bswap32_i32;
1255     case INDEX_op_not_i32:
1256         return TCG_TARGET_HAS_not_i32;
1257     case INDEX_op_neg_i32:
1258         return TCG_TARGET_HAS_neg_i32;
1259     case INDEX_op_andc_i32:
1260         return TCG_TARGET_HAS_andc_i32;
1261     case INDEX_op_orc_i32:
1262         return TCG_TARGET_HAS_orc_i32;
1263     case INDEX_op_eqv_i32:
1264         return TCG_TARGET_HAS_eqv_i32;
1265     case INDEX_op_nand_i32:
1266         return TCG_TARGET_HAS_nand_i32;
1267     case INDEX_op_nor_i32:
1268         return TCG_TARGET_HAS_nor_i32;
1269     case INDEX_op_clz_i32:
1270         return TCG_TARGET_HAS_clz_i32;
1271     case INDEX_op_ctz_i32:
1272         return TCG_TARGET_HAS_ctz_i32;
1273     case INDEX_op_ctpop_i32:
1274         return TCG_TARGET_HAS_ctpop_i32;
1275 
1276     case INDEX_op_brcond2_i32:
1277     case INDEX_op_setcond2_i32:
1278         return TCG_TARGET_REG_BITS == 32;
1279 
1280     case INDEX_op_mov_i64:
1281     case INDEX_op_setcond_i64:
1282     case INDEX_op_brcond_i64:
1283     case INDEX_op_ld8u_i64:
1284     case INDEX_op_ld8s_i64:
1285     case INDEX_op_ld16u_i64:
1286     case INDEX_op_ld16s_i64:
1287     case INDEX_op_ld32u_i64:
1288     case INDEX_op_ld32s_i64:
1289     case INDEX_op_ld_i64:
1290     case INDEX_op_st8_i64:
1291     case INDEX_op_st16_i64:
1292     case INDEX_op_st32_i64:
1293     case INDEX_op_st_i64:
1294     case INDEX_op_add_i64:
1295     case INDEX_op_sub_i64:
1296     case INDEX_op_mul_i64:
1297     case INDEX_op_and_i64:
1298     case INDEX_op_or_i64:
1299     case INDEX_op_xor_i64:
1300     case INDEX_op_shl_i64:
1301     case INDEX_op_shr_i64:
1302     case INDEX_op_sar_i64:
1303     case INDEX_op_ext_i32_i64:
1304     case INDEX_op_extu_i32_i64:
1305         return TCG_TARGET_REG_BITS == 64;
1306 
1307     case INDEX_op_movcond_i64:
1308         return TCG_TARGET_HAS_movcond_i64;
1309     case INDEX_op_div_i64:
1310     case INDEX_op_divu_i64:
1311         return TCG_TARGET_HAS_div_i64;
1312     case INDEX_op_rem_i64:
1313     case INDEX_op_remu_i64:
1314         return TCG_TARGET_HAS_rem_i64;
1315     case INDEX_op_div2_i64:
1316     case INDEX_op_divu2_i64:
1317         return TCG_TARGET_HAS_div2_i64;
1318     case INDEX_op_rotl_i64:
1319     case INDEX_op_rotr_i64:
1320         return TCG_TARGET_HAS_rot_i64;
1321     case INDEX_op_deposit_i64:
1322         return TCG_TARGET_HAS_deposit_i64;
1323     case INDEX_op_extract_i64:
1324         return TCG_TARGET_HAS_extract_i64;
1325     case INDEX_op_sextract_i64:
1326         return TCG_TARGET_HAS_sextract_i64;
1327     case INDEX_op_extract2_i64:
1328         return TCG_TARGET_HAS_extract2_i64;
1329     case INDEX_op_extrl_i64_i32:
1330         return TCG_TARGET_HAS_extrl_i64_i32;
1331     case INDEX_op_extrh_i64_i32:
1332         return TCG_TARGET_HAS_extrh_i64_i32;
1333     case INDEX_op_ext8s_i64:
1334         return TCG_TARGET_HAS_ext8s_i64;
1335     case INDEX_op_ext16s_i64:
1336         return TCG_TARGET_HAS_ext16s_i64;
1337     case INDEX_op_ext32s_i64:
1338         return TCG_TARGET_HAS_ext32s_i64;
1339     case INDEX_op_ext8u_i64:
1340         return TCG_TARGET_HAS_ext8u_i64;
1341     case INDEX_op_ext16u_i64:
1342         return TCG_TARGET_HAS_ext16u_i64;
1343     case INDEX_op_ext32u_i64:
1344         return TCG_TARGET_HAS_ext32u_i64;
1345     case INDEX_op_bswap16_i64:
1346         return TCG_TARGET_HAS_bswap16_i64;
1347     case INDEX_op_bswap32_i64:
1348         return TCG_TARGET_HAS_bswap32_i64;
1349     case INDEX_op_bswap64_i64:
1350         return TCG_TARGET_HAS_bswap64_i64;
1351     case INDEX_op_not_i64:
1352         return TCG_TARGET_HAS_not_i64;
1353     case INDEX_op_neg_i64:
1354         return TCG_TARGET_HAS_neg_i64;
1355     case INDEX_op_andc_i64:
1356         return TCG_TARGET_HAS_andc_i64;
1357     case INDEX_op_orc_i64:
1358         return TCG_TARGET_HAS_orc_i64;
1359     case INDEX_op_eqv_i64:
1360         return TCG_TARGET_HAS_eqv_i64;
1361     case INDEX_op_nand_i64:
1362         return TCG_TARGET_HAS_nand_i64;
1363     case INDEX_op_nor_i64:
1364         return TCG_TARGET_HAS_nor_i64;
1365     case INDEX_op_clz_i64:
1366         return TCG_TARGET_HAS_clz_i64;
1367     case INDEX_op_ctz_i64:
1368         return TCG_TARGET_HAS_ctz_i64;
1369     case INDEX_op_ctpop_i64:
1370         return TCG_TARGET_HAS_ctpop_i64;
1371     case INDEX_op_add2_i64:
1372         return TCG_TARGET_HAS_add2_i64;
1373     case INDEX_op_sub2_i64:
1374         return TCG_TARGET_HAS_sub2_i64;
1375     case INDEX_op_mulu2_i64:
1376         return TCG_TARGET_HAS_mulu2_i64;
1377     case INDEX_op_muls2_i64:
1378         return TCG_TARGET_HAS_muls2_i64;
1379     case INDEX_op_muluh_i64:
1380         return TCG_TARGET_HAS_muluh_i64;
1381     case INDEX_op_mulsh_i64:
1382         return TCG_TARGET_HAS_mulsh_i64;
1383 
1384     case INDEX_op_mov_vec:
1385     case INDEX_op_dup_vec:
1386     case INDEX_op_dupm_vec:
1387     case INDEX_op_ld_vec:
1388     case INDEX_op_st_vec:
1389     case INDEX_op_add_vec:
1390     case INDEX_op_sub_vec:
1391     case INDEX_op_and_vec:
1392     case INDEX_op_or_vec:
1393     case INDEX_op_xor_vec:
1394     case INDEX_op_cmp_vec:
1395         return have_vec;
1396     case INDEX_op_dup2_vec:
1397         return have_vec && TCG_TARGET_REG_BITS == 32;
1398     case INDEX_op_not_vec:
1399         return have_vec && TCG_TARGET_HAS_not_vec;
1400     case INDEX_op_neg_vec:
1401         return have_vec && TCG_TARGET_HAS_neg_vec;
1402     case INDEX_op_abs_vec:
1403         return have_vec && TCG_TARGET_HAS_abs_vec;
1404     case INDEX_op_andc_vec:
1405         return have_vec && TCG_TARGET_HAS_andc_vec;
1406     case INDEX_op_orc_vec:
1407         return have_vec && TCG_TARGET_HAS_orc_vec;
1408     case INDEX_op_mul_vec:
1409         return have_vec && TCG_TARGET_HAS_mul_vec;
1410     case INDEX_op_shli_vec:
1411     case INDEX_op_shri_vec:
1412     case INDEX_op_sari_vec:
1413         return have_vec && TCG_TARGET_HAS_shi_vec;
1414     case INDEX_op_shls_vec:
1415     case INDEX_op_shrs_vec:
1416     case INDEX_op_sars_vec:
1417         return have_vec && TCG_TARGET_HAS_shs_vec;
1418     case INDEX_op_shlv_vec:
1419     case INDEX_op_shrv_vec:
1420     case INDEX_op_sarv_vec:
1421         return have_vec && TCG_TARGET_HAS_shv_vec;
1422     case INDEX_op_rotli_vec:
1423         return have_vec && TCG_TARGET_HAS_roti_vec;
1424     case INDEX_op_rotls_vec:
1425         return have_vec && TCG_TARGET_HAS_rots_vec;
1426     case INDEX_op_rotlv_vec:
1427     case INDEX_op_rotrv_vec:
1428         return have_vec && TCG_TARGET_HAS_rotv_vec;
1429     case INDEX_op_ssadd_vec:
1430     case INDEX_op_usadd_vec:
1431     case INDEX_op_sssub_vec:
1432     case INDEX_op_ussub_vec:
1433         return have_vec && TCG_TARGET_HAS_sat_vec;
1434     case INDEX_op_smin_vec:
1435     case INDEX_op_umin_vec:
1436     case INDEX_op_smax_vec:
1437     case INDEX_op_umax_vec:
1438         return have_vec && TCG_TARGET_HAS_minmax_vec;
1439     case INDEX_op_bitsel_vec:
1440         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1441     case INDEX_op_cmpsel_vec:
1442         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1443 
1444     default:
1445         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1446         return true;
1447     }
1448 }
1449 
1450 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1451    and endian swap. Maybe it would be better to do the alignment
1452    and endian swap in tcg_reg_alloc_call(). */
1453 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1454 {
1455     int i, real_args, nb_rets, pi;
1456     unsigned typemask;
1457     const TCGHelperInfo *info;
1458     TCGOp *op;
1459 
1460     info = g_hash_table_lookup(helper_table, (gpointer)func);
1461     typemask = info->typemask;
1462 
1463 #ifdef CONFIG_PLUGIN
1464     /* detect non-plugin helpers */
1465     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1466         tcg_ctx->plugin_insn->calls_helpers = true;
1467     }
1468 #endif
1469 
1470 #if defined(__sparc__) && !defined(__arch64__) \
1471     && !defined(CONFIG_TCG_INTERPRETER)
1472     /* We have 64-bit values in one register, but need to pass as two
1473        separate parameters.  Split them.  */
1474     int orig_typemask = typemask;
1475     int orig_nargs = nargs;
1476     TCGv_i64 retl, reth;
1477     TCGTemp *split_args[MAX_OPC_PARAM];
1478 
1479     retl = NULL;
1480     reth = NULL;
1481     typemask = 0;
1482     for (i = real_args = 0; i < nargs; ++i) {
1483         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1484         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1485 
1486         if (is_64bit) {
1487             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1488             TCGv_i32 h = tcg_temp_new_i32();
1489             TCGv_i32 l = tcg_temp_new_i32();
1490             tcg_gen_extr_i64_i32(l, h, orig);
1491             split_args[real_args++] = tcgv_i32_temp(h);
1492             typemask |= dh_typecode_i32 << (real_args * 3);
1493             split_args[real_args++] = tcgv_i32_temp(l);
1494             typemask |= dh_typecode_i32 << (real_args * 3);
1495         } else {
1496             split_args[real_args++] = args[i];
1497             typemask |= argtype << (real_args * 3);
1498         }
1499     }
1500     nargs = real_args;
1501     args = split_args;
1502 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1503     for (i = 0; i < nargs; ++i) {
1504         int argtype = extract32(typemask, (i + 1) * 3, 3);
1505         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1506         bool is_signed = argtype & 1;
1507 
1508         if (is_32bit) {
1509             TCGv_i64 temp = tcg_temp_new_i64();
1510             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1511             if (is_signed) {
1512                 tcg_gen_ext32s_i64(temp, orig);
1513             } else {
1514                 tcg_gen_ext32u_i64(temp, orig);
1515             }
1516             args[i] = tcgv_i64_temp(temp);
1517         }
1518     }
1519 #endif /* TCG_TARGET_EXTEND_ARGS */
1520 
1521     op = tcg_emit_op(INDEX_op_call);
1522 
1523     pi = 0;
1524     if (ret != NULL) {
1525 #if defined(__sparc__) && !defined(__arch64__) \
1526     && !defined(CONFIG_TCG_INTERPRETER)
1527         if ((typemask & 6) == dh_typecode_i64) {
1528             /* The 32-bit ABI is going to return the 64-bit value in
1529                the %o0/%o1 register pair.  Prepare for this by using
1530                two return temporaries, and reassemble below.  */
1531             retl = tcg_temp_new_i64();
1532             reth = tcg_temp_new_i64();
1533             op->args[pi++] = tcgv_i64_arg(reth);
1534             op->args[pi++] = tcgv_i64_arg(retl);
1535             nb_rets = 2;
1536         } else {
1537             op->args[pi++] = temp_arg(ret);
1538             nb_rets = 1;
1539         }
1540 #else
1541         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1542 #ifdef HOST_WORDS_BIGENDIAN
1543             op->args[pi++] = temp_arg(ret + 1);
1544             op->args[pi++] = temp_arg(ret);
1545 #else
1546             op->args[pi++] = temp_arg(ret);
1547             op->args[pi++] = temp_arg(ret + 1);
1548 #endif
1549             nb_rets = 2;
1550         } else {
1551             op->args[pi++] = temp_arg(ret);
1552             nb_rets = 1;
1553         }
1554 #endif
1555     } else {
1556         nb_rets = 0;
1557     }
1558     TCGOP_CALLO(op) = nb_rets;
1559 
1560     real_args = 0;
1561     for (i = 0; i < nargs; i++) {
1562         int argtype = extract32(typemask, (i + 1) * 3, 3);
1563         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1564         bool want_align = false;
1565 
1566 #if defined(CONFIG_TCG_INTERPRETER)
1567         /*
1568          * Align all arguments, so that they land in predictable places
1569          * for passing off to ffi_call.
1570          */
1571         want_align = true;
1572 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1573         /* Some targets want aligned 64 bit args */
1574         want_align = is_64bit;
1575 #endif
1576 
1577         if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1578             op->args[pi++] = TCG_CALL_DUMMY_ARG;
1579             real_args++;
1580         }
1581 
1582         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1583             /*
1584              * If stack grows up, then we will be placing successive
1585              * arguments at lower addresses, which means we need to
1586              * reverse the order compared to how we would normally
1587              * treat either big or little-endian.  For those arguments
1588              * that will wind up in registers, this still works for
1589              * HPPA (the only current STACK_GROWSUP target) since the
1590              * argument registers are *also* allocated in decreasing
1591              * order.  If another such target is added, this logic may
1592              * have to get more complicated to differentiate between
1593              * stack arguments and register arguments.
1594              */
1595 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1596             op->args[pi++] = temp_arg(args[i] + 1);
1597             op->args[pi++] = temp_arg(args[i]);
1598 #else
1599             op->args[pi++] = temp_arg(args[i]);
1600             op->args[pi++] = temp_arg(args[i] + 1);
1601 #endif
1602             real_args += 2;
1603             continue;
1604         }
1605 
1606         op->args[pi++] = temp_arg(args[i]);
1607         real_args++;
1608     }
1609     op->args[pi++] = (uintptr_t)func;
1610     op->args[pi++] = (uintptr_t)info;
1611     TCGOP_CALLI(op) = real_args;
1612 
1613     /* Make sure the fields didn't overflow.  */
1614     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1615     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1616 
1617 #if defined(__sparc__) && !defined(__arch64__) \
1618     && !defined(CONFIG_TCG_INTERPRETER)
1619     /* Free all of the parts we allocated above.  */
1620     for (i = real_args = 0; i < orig_nargs; ++i) {
1621         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1622         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1623 
1624         if (is_64bit) {
1625             tcg_temp_free_internal(args[real_args++]);
1626             tcg_temp_free_internal(args[real_args++]);
1627         } else {
1628             real_args++;
1629         }
1630     }
1631     if ((orig_typemask & 6) == dh_typecode_i64) {
1632         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1633            Note that describing these as TCGv_i64 eliminates an unnecessary
1634            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1635         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1636         tcg_temp_free_i64(retl);
1637         tcg_temp_free_i64(reth);
1638     }
1639 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1640     for (i = 0; i < nargs; ++i) {
1641         int argtype = extract32(typemask, (i + 1) * 3, 3);
1642         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1643 
1644         if (is_32bit) {
1645             tcg_temp_free_internal(args[i]);
1646         }
1647     }
1648 #endif /* TCG_TARGET_EXTEND_ARGS */
1649 }
1650 
1651 static void tcg_reg_alloc_start(TCGContext *s)
1652 {
1653     int i, n;
1654 
1655     for (i = 0, n = s->nb_temps; i < n; i++) {
1656         TCGTemp *ts = &s->temps[i];
1657         TCGTempVal val = TEMP_VAL_MEM;
1658 
1659         switch (ts->kind) {
1660         case TEMP_CONST:
1661             val = TEMP_VAL_CONST;
1662             break;
1663         case TEMP_FIXED:
1664             val = TEMP_VAL_REG;
1665             break;
1666         case TEMP_GLOBAL:
1667             break;
1668         case TEMP_NORMAL:
1669             val = TEMP_VAL_DEAD;
1670             /* fall through */
1671         case TEMP_LOCAL:
1672             ts->mem_allocated = 0;
1673             break;
1674         default:
1675             g_assert_not_reached();
1676         }
1677         ts->val_type = val;
1678     }
1679 
1680     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1681 }
1682 
1683 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1684                                  TCGTemp *ts)
1685 {
1686     int idx = temp_idx(ts);
1687 
1688     switch (ts->kind) {
1689     case TEMP_FIXED:
1690     case TEMP_GLOBAL:
1691         pstrcpy(buf, buf_size, ts->name);
1692         break;
1693     case TEMP_LOCAL:
1694         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1695         break;
1696     case TEMP_NORMAL:
1697         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1698         break;
1699     case TEMP_CONST:
1700         switch (ts->type) {
1701         case TCG_TYPE_I32:
1702             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1703             break;
1704 #if TCG_TARGET_REG_BITS > 32
1705         case TCG_TYPE_I64:
1706             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1707             break;
1708 #endif
1709         case TCG_TYPE_V64:
1710         case TCG_TYPE_V128:
1711         case TCG_TYPE_V256:
1712             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1713                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1714             break;
1715         default:
1716             g_assert_not_reached();
1717         }
1718         break;
1719     }
1720     return buf;
1721 }
1722 
1723 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1724                              int buf_size, TCGArg arg)
1725 {
1726     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1727 }
1728 
1729 static const char * const cond_name[] =
1730 {
1731     [TCG_COND_NEVER] = "never",
1732     [TCG_COND_ALWAYS] = "always",
1733     [TCG_COND_EQ] = "eq",
1734     [TCG_COND_NE] = "ne",
1735     [TCG_COND_LT] = "lt",
1736     [TCG_COND_GE] = "ge",
1737     [TCG_COND_LE] = "le",
1738     [TCG_COND_GT] = "gt",
1739     [TCG_COND_LTU] = "ltu",
1740     [TCG_COND_GEU] = "geu",
1741     [TCG_COND_LEU] = "leu",
1742     [TCG_COND_GTU] = "gtu"
1743 };
1744 
1745 static const char * const ldst_name[] =
1746 {
1747     [MO_UB]   = "ub",
1748     [MO_SB]   = "sb",
1749     [MO_LEUW] = "leuw",
1750     [MO_LESW] = "lesw",
1751     [MO_LEUL] = "leul",
1752     [MO_LESL] = "lesl",
1753     [MO_LEQ]  = "leq",
1754     [MO_BEUW] = "beuw",
1755     [MO_BESW] = "besw",
1756     [MO_BEUL] = "beul",
1757     [MO_BESL] = "besl",
1758     [MO_BEQ]  = "beq",
1759 };
1760 
1761 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1762 #ifdef TARGET_ALIGNED_ONLY
1763     [MO_UNALN >> MO_ASHIFT]    = "un+",
1764     [MO_ALIGN >> MO_ASHIFT]    = "",
1765 #else
1766     [MO_UNALN >> MO_ASHIFT]    = "",
1767     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1768 #endif
1769     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1770     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1771     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1772     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1773     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1774     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1775 };
1776 
1777 static const char bswap_flag_name[][6] = {
1778     [TCG_BSWAP_IZ] = "iz",
1779     [TCG_BSWAP_OZ] = "oz",
1780     [TCG_BSWAP_OS] = "os",
1781     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1782     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1783 };
1784 
1785 static inline bool tcg_regset_single(TCGRegSet d)
1786 {
1787     return (d & (d - 1)) == 0;
1788 }
1789 
1790 static inline TCGReg tcg_regset_first(TCGRegSet d)
1791 {
1792     if (TCG_TARGET_NB_REGS <= 32) {
1793         return ctz32(d);
1794     } else {
1795         return ctz64(d);
1796     }
1797 }
1798 
1799 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1800 {
1801     char buf[128];
1802     TCGOp *op;
1803 
1804     QTAILQ_FOREACH(op, &s->ops, link) {
1805         int i, k, nb_oargs, nb_iargs, nb_cargs;
1806         const TCGOpDef *def;
1807         TCGOpcode c;
1808         int col = 0;
1809 
1810         c = op->opc;
1811         def = &tcg_op_defs[c];
1812 
1813         if (c == INDEX_op_insn_start) {
1814             nb_oargs = 0;
1815             col += qemu_log("\n ----");
1816 
1817             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1818                 target_ulong a;
1819 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1820                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1821 #else
1822                 a = op->args[i];
1823 #endif
1824                 col += qemu_log(" " TARGET_FMT_lx, a);
1825             }
1826         } else if (c == INDEX_op_call) {
1827             const TCGHelperInfo *info = tcg_call_info(op);
1828             void *func = tcg_call_func(op);
1829 
1830             /* variable number of arguments */
1831             nb_oargs = TCGOP_CALLO(op);
1832             nb_iargs = TCGOP_CALLI(op);
1833             nb_cargs = def->nb_cargs;
1834 
1835             col += qemu_log(" %s ", def->name);
1836 
1837             /*
1838              * Print the function name from TCGHelperInfo, if available.
1839              * Note that plugins have a template function for the info,
1840              * but the actual function pointer comes from the plugin.
1841              */
1842             if (func == info->func) {
1843                 col += qemu_log("%s", info->name);
1844             } else {
1845                 col += qemu_log("plugin(%p)", func);
1846             }
1847 
1848             col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
1849             for (i = 0; i < nb_oargs; i++) {
1850                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1851                                                        op->args[i]));
1852             }
1853             for (i = 0; i < nb_iargs; i++) {
1854                 TCGArg arg = op->args[nb_oargs + i];
1855                 const char *t = "<dummy>";
1856                 if (arg != TCG_CALL_DUMMY_ARG) {
1857                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1858                 }
1859                 col += qemu_log(",%s", t);
1860             }
1861         } else {
1862             col += qemu_log(" %s ", def->name);
1863 
1864             nb_oargs = def->nb_oargs;
1865             nb_iargs = def->nb_iargs;
1866             nb_cargs = def->nb_cargs;
1867 
1868             if (def->flags & TCG_OPF_VECTOR) {
1869                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1870                                 8 << TCGOP_VECE(op));
1871             }
1872 
1873             k = 0;
1874             for (i = 0; i < nb_oargs; i++) {
1875                 if (k != 0) {
1876                     col += qemu_log(",");
1877                 }
1878                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1879                                                       op->args[k++]));
1880             }
1881             for (i = 0; i < nb_iargs; i++) {
1882                 if (k != 0) {
1883                     col += qemu_log(",");
1884                 }
1885                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1886                                                       op->args[k++]));
1887             }
1888             switch (c) {
1889             case INDEX_op_brcond_i32:
1890             case INDEX_op_setcond_i32:
1891             case INDEX_op_movcond_i32:
1892             case INDEX_op_brcond2_i32:
1893             case INDEX_op_setcond2_i32:
1894             case INDEX_op_brcond_i64:
1895             case INDEX_op_setcond_i64:
1896             case INDEX_op_movcond_i64:
1897             case INDEX_op_cmp_vec:
1898             case INDEX_op_cmpsel_vec:
1899                 if (op->args[k] < ARRAY_SIZE(cond_name)
1900                     && cond_name[op->args[k]]) {
1901                     col += qemu_log(",%s", cond_name[op->args[k++]]);
1902                 } else {
1903                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1904                 }
1905                 i = 1;
1906                 break;
1907             case INDEX_op_qemu_ld_i32:
1908             case INDEX_op_qemu_st_i32:
1909             case INDEX_op_qemu_st8_i32:
1910             case INDEX_op_qemu_ld_i64:
1911             case INDEX_op_qemu_st_i64:
1912                 {
1913                     MemOpIdx oi = op->args[k++];
1914                     MemOp op = get_memop(oi);
1915                     unsigned ix = get_mmuidx(oi);
1916 
1917                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1918                         col += qemu_log(",$0x%x,%u", op, ix);
1919                     } else {
1920                         const char *s_al, *s_op;
1921                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1922                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1923                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1924                     }
1925                     i = 1;
1926                 }
1927                 break;
1928             case INDEX_op_bswap16_i32:
1929             case INDEX_op_bswap16_i64:
1930             case INDEX_op_bswap32_i32:
1931             case INDEX_op_bswap32_i64:
1932             case INDEX_op_bswap64_i64:
1933                 {
1934                     TCGArg flags = op->args[k];
1935                     const char *name = NULL;
1936 
1937                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
1938                         name = bswap_flag_name[flags];
1939                     }
1940                     if (name) {
1941                         col += qemu_log(",%s", name);
1942                     } else {
1943                         col += qemu_log(",$0x%" TCG_PRIlx, flags);
1944                     }
1945                     i = k = 1;
1946                 }
1947                 break;
1948             default:
1949                 i = 0;
1950                 break;
1951             }
1952             switch (c) {
1953             case INDEX_op_set_label:
1954             case INDEX_op_br:
1955             case INDEX_op_brcond_i32:
1956             case INDEX_op_brcond_i64:
1957             case INDEX_op_brcond2_i32:
1958                 col += qemu_log("%s$L%d", k ? "," : "",
1959                                 arg_label(op->args[k])->id);
1960                 i++, k++;
1961                 break;
1962             default:
1963                 break;
1964             }
1965             for (; i < nb_cargs; i++, k++) {
1966                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1967             }
1968         }
1969 
1970         if (have_prefs || op->life) {
1971 
1972             QemuLogFile *logfile;
1973 
1974             rcu_read_lock();
1975             logfile = qatomic_rcu_read(&qemu_logfile);
1976             if (logfile) {
1977                 for (; col < 40; ++col) {
1978                     putc(' ', logfile->fd);
1979                 }
1980             }
1981             rcu_read_unlock();
1982         }
1983 
1984         if (op->life) {
1985             unsigned life = op->life;
1986 
1987             if (life & (SYNC_ARG * 3)) {
1988                 qemu_log("  sync:");
1989                 for (i = 0; i < 2; ++i) {
1990                     if (life & (SYNC_ARG << i)) {
1991                         qemu_log(" %d", i);
1992                     }
1993                 }
1994             }
1995             life /= DEAD_ARG;
1996             if (life) {
1997                 qemu_log("  dead:");
1998                 for (i = 0; life; ++i, life >>= 1) {
1999                     if (life & 1) {
2000                         qemu_log(" %d", i);
2001                     }
2002                 }
2003             }
2004         }
2005 
2006         if (have_prefs) {
2007             for (i = 0; i < nb_oargs; ++i) {
2008                 TCGRegSet set = op->output_pref[i];
2009 
2010                 if (i == 0) {
2011                     qemu_log("  pref=");
2012                 } else {
2013                     qemu_log(",");
2014                 }
2015                 if (set == 0) {
2016                     qemu_log("none");
2017                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2018                     qemu_log("all");
2019 #ifdef CONFIG_DEBUG_TCG
2020                 } else if (tcg_regset_single(set)) {
2021                     TCGReg reg = tcg_regset_first(set);
2022                     qemu_log("%s", tcg_target_reg_names[reg]);
2023 #endif
2024                 } else if (TCG_TARGET_NB_REGS <= 32) {
2025                     qemu_log("%#x", (uint32_t)set);
2026                 } else {
2027                     qemu_log("%#" PRIx64, (uint64_t)set);
2028                 }
2029             }
2030         }
2031 
2032         qemu_log("\n");
2033     }
2034 }
2035 
2036 /* we give more priority to constraints with less registers */
2037 static int get_constraint_priority(const TCGOpDef *def, int k)
2038 {
2039     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2040     int n;
2041 
2042     if (arg_ct->oalias) {
2043         /* an alias is equivalent to a single register */
2044         n = 1;
2045     } else {
2046         n = ctpop64(arg_ct->regs);
2047     }
2048     return TCG_TARGET_NB_REGS - n + 1;
2049 }
2050 
2051 /* sort from highest priority to lowest */
2052 static void sort_constraints(TCGOpDef *def, int start, int n)
2053 {
2054     int i, j;
2055     TCGArgConstraint *a = def->args_ct;
2056 
2057     for (i = 0; i < n; i++) {
2058         a[start + i].sort_index = start + i;
2059     }
2060     if (n <= 1) {
2061         return;
2062     }
2063     for (i = 0; i < n - 1; i++) {
2064         for (j = i + 1; j < n; j++) {
2065             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2066             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2067             if (p1 < p2) {
2068                 int tmp = a[start + i].sort_index;
2069                 a[start + i].sort_index = a[start + j].sort_index;
2070                 a[start + j].sort_index = tmp;
2071             }
2072         }
2073     }
2074 }
2075 
2076 static void process_op_defs(TCGContext *s)
2077 {
2078     TCGOpcode op;
2079 
2080     for (op = 0; op < NB_OPS; op++) {
2081         TCGOpDef *def = &tcg_op_defs[op];
2082         const TCGTargetOpDef *tdefs;
2083         int i, nb_args;
2084 
2085         if (def->flags & TCG_OPF_NOT_PRESENT) {
2086             continue;
2087         }
2088 
2089         nb_args = def->nb_iargs + def->nb_oargs;
2090         if (nb_args == 0) {
2091             continue;
2092         }
2093 
2094         /*
2095          * Macro magic should make it impossible, but double-check that
2096          * the array index is in range.  Since the signness of an enum
2097          * is implementation defined, force the result to unsigned.
2098          */
2099         unsigned con_set = tcg_target_op_def(op);
2100         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2101         tdefs = &constraint_sets[con_set];
2102 
2103         for (i = 0; i < nb_args; i++) {
2104             const char *ct_str = tdefs->args_ct_str[i];
2105             /* Incomplete TCGTargetOpDef entry. */
2106             tcg_debug_assert(ct_str != NULL);
2107 
2108             while (*ct_str != '\0') {
2109                 switch(*ct_str) {
2110                 case '0' ... '9':
2111                     {
2112                         int oarg = *ct_str - '0';
2113                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2114                         tcg_debug_assert(oarg < def->nb_oargs);
2115                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2116                         def->args_ct[i] = def->args_ct[oarg];
2117                         /* The output sets oalias.  */
2118                         def->args_ct[oarg].oalias = true;
2119                         def->args_ct[oarg].alias_index = i;
2120                         /* The input sets ialias. */
2121                         def->args_ct[i].ialias = true;
2122                         def->args_ct[i].alias_index = oarg;
2123                     }
2124                     ct_str++;
2125                     break;
2126                 case '&':
2127                     def->args_ct[i].newreg = true;
2128                     ct_str++;
2129                     break;
2130                 case 'i':
2131                     def->args_ct[i].ct |= TCG_CT_CONST;
2132                     ct_str++;
2133                     break;
2134 
2135                 /* Include all of the target-specific constraints. */
2136 
2137 #undef CONST
2138 #define CONST(CASE, MASK) \
2139     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2140 #define REGS(CASE, MASK) \
2141     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2142 
2143 #include "tcg-target-con-str.h"
2144 
2145 #undef REGS
2146 #undef CONST
2147                 default:
2148                     /* Typo in TCGTargetOpDef constraint. */
2149                     g_assert_not_reached();
2150                 }
2151             }
2152         }
2153 
2154         /* TCGTargetOpDef entry with too much information? */
2155         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2156 
2157         /* sort the constraints (XXX: this is just an heuristic) */
2158         sort_constraints(def, 0, def->nb_oargs);
2159         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2160     }
2161 }
2162 
2163 void tcg_op_remove(TCGContext *s, TCGOp *op)
2164 {
2165     TCGLabel *label;
2166 
2167     switch (op->opc) {
2168     case INDEX_op_br:
2169         label = arg_label(op->args[0]);
2170         label->refs--;
2171         break;
2172     case INDEX_op_brcond_i32:
2173     case INDEX_op_brcond_i64:
2174         label = arg_label(op->args[3]);
2175         label->refs--;
2176         break;
2177     case INDEX_op_brcond2_i32:
2178         label = arg_label(op->args[5]);
2179         label->refs--;
2180         break;
2181     default:
2182         break;
2183     }
2184 
2185     QTAILQ_REMOVE(&s->ops, op, link);
2186     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2187     s->nb_ops--;
2188 
2189 #ifdef CONFIG_PROFILER
2190     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2191 #endif
2192 }
2193 
2194 void tcg_remove_ops_after(TCGOp *op)
2195 {
2196     TCGContext *s = tcg_ctx;
2197 
2198     while (true) {
2199         TCGOp *last = tcg_last_op();
2200         if (last == op) {
2201             return;
2202         }
2203         tcg_op_remove(s, last);
2204     }
2205 }
2206 
2207 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2208 {
2209     TCGContext *s = tcg_ctx;
2210     TCGOp *op;
2211 
2212     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2213         op = tcg_malloc(sizeof(TCGOp));
2214     } else {
2215         op = QTAILQ_FIRST(&s->free_ops);
2216         QTAILQ_REMOVE(&s->free_ops, op, link);
2217     }
2218     memset(op, 0, offsetof(TCGOp, link));
2219     op->opc = opc;
2220     s->nb_ops++;
2221 
2222     return op;
2223 }
2224 
2225 TCGOp *tcg_emit_op(TCGOpcode opc)
2226 {
2227     TCGOp *op = tcg_op_alloc(opc);
2228     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2229     return op;
2230 }
2231 
2232 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2233 {
2234     TCGOp *new_op = tcg_op_alloc(opc);
2235     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2236     return new_op;
2237 }
2238 
2239 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2240 {
2241     TCGOp *new_op = tcg_op_alloc(opc);
2242     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2243     return new_op;
2244 }
2245 
2246 /* Reachable analysis : remove unreachable code.  */
2247 static void reachable_code_pass(TCGContext *s)
2248 {
2249     TCGOp *op, *op_next;
2250     bool dead = false;
2251 
2252     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2253         bool remove = dead;
2254         TCGLabel *label;
2255 
2256         switch (op->opc) {
2257         case INDEX_op_set_label:
2258             label = arg_label(op->args[0]);
2259             if (label->refs == 0) {
2260                 /*
2261                  * While there is an occasional backward branch, virtually
2262                  * all branches generated by the translators are forward.
2263                  * Which means that generally we will have already removed
2264                  * all references to the label that will be, and there is
2265                  * little to be gained by iterating.
2266                  */
2267                 remove = true;
2268             } else {
2269                 /* Once we see a label, insns become live again.  */
2270                 dead = false;
2271                 remove = false;
2272 
2273                 /*
2274                  * Optimization can fold conditional branches to unconditional.
2275                  * If we find a label with one reference which is preceded by
2276                  * an unconditional branch to it, remove both.  This needed to
2277                  * wait until the dead code in between them was removed.
2278                  */
2279                 if (label->refs == 1) {
2280                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2281                     if (op_prev->opc == INDEX_op_br &&
2282                         label == arg_label(op_prev->args[0])) {
2283                         tcg_op_remove(s, op_prev);
2284                         remove = true;
2285                     }
2286                 }
2287             }
2288             break;
2289 
2290         case INDEX_op_br:
2291         case INDEX_op_exit_tb:
2292         case INDEX_op_goto_ptr:
2293             /* Unconditional branches; everything following is dead.  */
2294             dead = true;
2295             break;
2296 
2297         case INDEX_op_call:
2298             /* Notice noreturn helper calls, raising exceptions.  */
2299             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2300                 dead = true;
2301             }
2302             break;
2303 
2304         case INDEX_op_insn_start:
2305             /* Never remove -- we need to keep these for unwind.  */
2306             remove = false;
2307             break;
2308 
2309         default:
2310             break;
2311         }
2312 
2313         if (remove) {
2314             tcg_op_remove(s, op);
2315         }
2316     }
2317 }
2318 
2319 #define TS_DEAD  1
2320 #define TS_MEM   2
2321 
2322 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2323 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2324 
2325 /* For liveness_pass_1, the register preferences for a given temp.  */
2326 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2327 {
2328     return ts->state_ptr;
2329 }
2330 
2331 /* For liveness_pass_1, reset the preferences for a given temp to the
2332  * maximal regset for its type.
2333  */
2334 static inline void la_reset_pref(TCGTemp *ts)
2335 {
2336     *la_temp_pref(ts)
2337         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2338 }
2339 
2340 /* liveness analysis: end of function: all temps are dead, and globals
2341    should be in memory. */
2342 static void la_func_end(TCGContext *s, int ng, int nt)
2343 {
2344     int i;
2345 
2346     for (i = 0; i < ng; ++i) {
2347         s->temps[i].state = TS_DEAD | TS_MEM;
2348         la_reset_pref(&s->temps[i]);
2349     }
2350     for (i = ng; i < nt; ++i) {
2351         s->temps[i].state = TS_DEAD;
2352         la_reset_pref(&s->temps[i]);
2353     }
2354 }
2355 
2356 /* liveness analysis: end of basic block: all temps are dead, globals
2357    and local temps should be in memory. */
2358 static void la_bb_end(TCGContext *s, int ng, int nt)
2359 {
2360     int i;
2361 
2362     for (i = 0; i < nt; ++i) {
2363         TCGTemp *ts = &s->temps[i];
2364         int state;
2365 
2366         switch (ts->kind) {
2367         case TEMP_FIXED:
2368         case TEMP_GLOBAL:
2369         case TEMP_LOCAL:
2370             state = TS_DEAD | TS_MEM;
2371             break;
2372         case TEMP_NORMAL:
2373         case TEMP_CONST:
2374             state = TS_DEAD;
2375             break;
2376         default:
2377             g_assert_not_reached();
2378         }
2379         ts->state = state;
2380         la_reset_pref(ts);
2381     }
2382 }
2383 
2384 /* liveness analysis: sync globals back to memory.  */
2385 static void la_global_sync(TCGContext *s, int ng)
2386 {
2387     int i;
2388 
2389     for (i = 0; i < ng; ++i) {
2390         int state = s->temps[i].state;
2391         s->temps[i].state = state | TS_MEM;
2392         if (state == TS_DEAD) {
2393             /* If the global was previously dead, reset prefs.  */
2394             la_reset_pref(&s->temps[i]);
2395         }
2396     }
2397 }
2398 
2399 /*
2400  * liveness analysis: conditional branch: all temps are dead,
2401  * globals and local temps should be synced.
2402  */
2403 static void la_bb_sync(TCGContext *s, int ng, int nt)
2404 {
2405     la_global_sync(s, ng);
2406 
2407     for (int i = ng; i < nt; ++i) {
2408         TCGTemp *ts = &s->temps[i];
2409         int state;
2410 
2411         switch (ts->kind) {
2412         case TEMP_LOCAL:
2413             state = ts->state;
2414             ts->state = state | TS_MEM;
2415             if (state != TS_DEAD) {
2416                 continue;
2417             }
2418             break;
2419         case TEMP_NORMAL:
2420             s->temps[i].state = TS_DEAD;
2421             break;
2422         case TEMP_CONST:
2423             continue;
2424         default:
2425             g_assert_not_reached();
2426         }
2427         la_reset_pref(&s->temps[i]);
2428     }
2429 }
2430 
2431 /* liveness analysis: sync globals back to memory and kill.  */
2432 static void la_global_kill(TCGContext *s, int ng)
2433 {
2434     int i;
2435 
2436     for (i = 0; i < ng; i++) {
2437         s->temps[i].state = TS_DEAD | TS_MEM;
2438         la_reset_pref(&s->temps[i]);
2439     }
2440 }
2441 
2442 /* liveness analysis: note live globals crossing calls.  */
2443 static void la_cross_call(TCGContext *s, int nt)
2444 {
2445     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2446     int i;
2447 
2448     for (i = 0; i < nt; i++) {
2449         TCGTemp *ts = &s->temps[i];
2450         if (!(ts->state & TS_DEAD)) {
2451             TCGRegSet *pset = la_temp_pref(ts);
2452             TCGRegSet set = *pset;
2453 
2454             set &= mask;
2455             /* If the combination is not possible, restart.  */
2456             if (set == 0) {
2457                 set = tcg_target_available_regs[ts->type] & mask;
2458             }
2459             *pset = set;
2460         }
2461     }
2462 }
2463 
2464 /* Liveness analysis : update the opc_arg_life array to tell if a
2465    given input arguments is dead. Instructions updating dead
2466    temporaries are removed. */
2467 static void liveness_pass_1(TCGContext *s)
2468 {
2469     int nb_globals = s->nb_globals;
2470     int nb_temps = s->nb_temps;
2471     TCGOp *op, *op_prev;
2472     TCGRegSet *prefs;
2473     int i;
2474 
2475     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2476     for (i = 0; i < nb_temps; ++i) {
2477         s->temps[i].state_ptr = prefs + i;
2478     }
2479 
2480     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2481     la_func_end(s, nb_globals, nb_temps);
2482 
2483     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2484         int nb_iargs, nb_oargs;
2485         TCGOpcode opc_new, opc_new2;
2486         bool have_opc_new2;
2487         TCGLifeData arg_life = 0;
2488         TCGTemp *ts;
2489         TCGOpcode opc = op->opc;
2490         const TCGOpDef *def = &tcg_op_defs[opc];
2491 
2492         switch (opc) {
2493         case INDEX_op_call:
2494             {
2495                 int call_flags;
2496                 int nb_call_regs;
2497 
2498                 nb_oargs = TCGOP_CALLO(op);
2499                 nb_iargs = TCGOP_CALLI(op);
2500                 call_flags = tcg_call_flags(op);
2501 
2502                 /* pure functions can be removed if their result is unused */
2503                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2504                     for (i = 0; i < nb_oargs; i++) {
2505                         ts = arg_temp(op->args[i]);
2506                         if (ts->state != TS_DEAD) {
2507                             goto do_not_remove_call;
2508                         }
2509                     }
2510                     goto do_remove;
2511                 }
2512             do_not_remove_call:
2513 
2514                 /* Output args are dead.  */
2515                 for (i = 0; i < nb_oargs; i++) {
2516                     ts = arg_temp(op->args[i]);
2517                     if (ts->state & TS_DEAD) {
2518                         arg_life |= DEAD_ARG << i;
2519                     }
2520                     if (ts->state & TS_MEM) {
2521                         arg_life |= SYNC_ARG << i;
2522                     }
2523                     ts->state = TS_DEAD;
2524                     la_reset_pref(ts);
2525 
2526                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2527                     op->output_pref[i] = 0;
2528                 }
2529 
2530                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2531                                     TCG_CALL_NO_READ_GLOBALS))) {
2532                     la_global_kill(s, nb_globals);
2533                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2534                     la_global_sync(s, nb_globals);
2535                 }
2536 
2537                 /* Record arguments that die in this helper.  */
2538                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2539                     ts = arg_temp(op->args[i]);
2540                     if (ts && ts->state & TS_DEAD) {
2541                         arg_life |= DEAD_ARG << i;
2542                     }
2543                 }
2544 
2545                 /* For all live registers, remove call-clobbered prefs.  */
2546                 la_cross_call(s, nb_temps);
2547 
2548                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2549 
2550                 /* Input arguments are live for preceding opcodes.  */
2551                 for (i = 0; i < nb_iargs; i++) {
2552                     ts = arg_temp(op->args[i + nb_oargs]);
2553                     if (ts && ts->state & TS_DEAD) {
2554                         /* For those arguments that die, and will be allocated
2555                          * in registers, clear the register set for that arg,
2556                          * to be filled in below.  For args that will be on
2557                          * the stack, reset to any available reg.
2558                          */
2559                         *la_temp_pref(ts)
2560                             = (i < nb_call_regs ? 0 :
2561                                tcg_target_available_regs[ts->type]);
2562                         ts->state &= ~TS_DEAD;
2563                     }
2564                 }
2565 
2566                 /* For each input argument, add its input register to prefs.
2567                    If a temp is used once, this produces a single set bit.  */
2568                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2569                     ts = arg_temp(op->args[i + nb_oargs]);
2570                     if (ts) {
2571                         tcg_regset_set_reg(*la_temp_pref(ts),
2572                                            tcg_target_call_iarg_regs[i]);
2573                     }
2574                 }
2575             }
2576             break;
2577         case INDEX_op_insn_start:
2578             break;
2579         case INDEX_op_discard:
2580             /* mark the temporary as dead */
2581             ts = arg_temp(op->args[0]);
2582             ts->state = TS_DEAD;
2583             la_reset_pref(ts);
2584             break;
2585 
2586         case INDEX_op_add2_i32:
2587             opc_new = INDEX_op_add_i32;
2588             goto do_addsub2;
2589         case INDEX_op_sub2_i32:
2590             opc_new = INDEX_op_sub_i32;
2591             goto do_addsub2;
2592         case INDEX_op_add2_i64:
2593             opc_new = INDEX_op_add_i64;
2594             goto do_addsub2;
2595         case INDEX_op_sub2_i64:
2596             opc_new = INDEX_op_sub_i64;
2597         do_addsub2:
2598             nb_iargs = 4;
2599             nb_oargs = 2;
2600             /* Test if the high part of the operation is dead, but not
2601                the low part.  The result can be optimized to a simple
2602                add or sub.  This happens often for x86_64 guest when the
2603                cpu mode is set to 32 bit.  */
2604             if (arg_temp(op->args[1])->state == TS_DEAD) {
2605                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2606                     goto do_remove;
2607                 }
2608                 /* Replace the opcode and adjust the args in place,
2609                    leaving 3 unused args at the end.  */
2610                 op->opc = opc = opc_new;
2611                 op->args[1] = op->args[2];
2612                 op->args[2] = op->args[4];
2613                 /* Fall through and mark the single-word operation live.  */
2614                 nb_iargs = 2;
2615                 nb_oargs = 1;
2616             }
2617             goto do_not_remove;
2618 
2619         case INDEX_op_mulu2_i32:
2620             opc_new = INDEX_op_mul_i32;
2621             opc_new2 = INDEX_op_muluh_i32;
2622             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2623             goto do_mul2;
2624         case INDEX_op_muls2_i32:
2625             opc_new = INDEX_op_mul_i32;
2626             opc_new2 = INDEX_op_mulsh_i32;
2627             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2628             goto do_mul2;
2629         case INDEX_op_mulu2_i64:
2630             opc_new = INDEX_op_mul_i64;
2631             opc_new2 = INDEX_op_muluh_i64;
2632             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2633             goto do_mul2;
2634         case INDEX_op_muls2_i64:
2635             opc_new = INDEX_op_mul_i64;
2636             opc_new2 = INDEX_op_mulsh_i64;
2637             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2638             goto do_mul2;
2639         do_mul2:
2640             nb_iargs = 2;
2641             nb_oargs = 2;
2642             if (arg_temp(op->args[1])->state == TS_DEAD) {
2643                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2644                     /* Both parts of the operation are dead.  */
2645                     goto do_remove;
2646                 }
2647                 /* The high part of the operation is dead; generate the low. */
2648                 op->opc = opc = opc_new;
2649                 op->args[1] = op->args[2];
2650                 op->args[2] = op->args[3];
2651             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2652                 /* The low part of the operation is dead; generate the high. */
2653                 op->opc = opc = opc_new2;
2654                 op->args[0] = op->args[1];
2655                 op->args[1] = op->args[2];
2656                 op->args[2] = op->args[3];
2657             } else {
2658                 goto do_not_remove;
2659             }
2660             /* Mark the single-word operation live.  */
2661             nb_oargs = 1;
2662             goto do_not_remove;
2663 
2664         default:
2665             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2666             nb_iargs = def->nb_iargs;
2667             nb_oargs = def->nb_oargs;
2668 
2669             /* Test if the operation can be removed because all
2670                its outputs are dead. We assume that nb_oargs == 0
2671                implies side effects */
2672             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2673                 for (i = 0; i < nb_oargs; i++) {
2674                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2675                         goto do_not_remove;
2676                     }
2677                 }
2678                 goto do_remove;
2679             }
2680             goto do_not_remove;
2681 
2682         do_remove:
2683             tcg_op_remove(s, op);
2684             break;
2685 
2686         do_not_remove:
2687             for (i = 0; i < nb_oargs; i++) {
2688                 ts = arg_temp(op->args[i]);
2689 
2690                 /* Remember the preference of the uses that followed.  */
2691                 op->output_pref[i] = *la_temp_pref(ts);
2692 
2693                 /* Output args are dead.  */
2694                 if (ts->state & TS_DEAD) {
2695                     arg_life |= DEAD_ARG << i;
2696                 }
2697                 if (ts->state & TS_MEM) {
2698                     arg_life |= SYNC_ARG << i;
2699                 }
2700                 ts->state = TS_DEAD;
2701                 la_reset_pref(ts);
2702             }
2703 
2704             /* If end of basic block, update.  */
2705             if (def->flags & TCG_OPF_BB_EXIT) {
2706                 la_func_end(s, nb_globals, nb_temps);
2707             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2708                 la_bb_sync(s, nb_globals, nb_temps);
2709             } else if (def->flags & TCG_OPF_BB_END) {
2710                 la_bb_end(s, nb_globals, nb_temps);
2711             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2712                 la_global_sync(s, nb_globals);
2713                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2714                     la_cross_call(s, nb_temps);
2715                 }
2716             }
2717 
2718             /* Record arguments that die in this opcode.  */
2719             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2720                 ts = arg_temp(op->args[i]);
2721                 if (ts->state & TS_DEAD) {
2722                     arg_life |= DEAD_ARG << i;
2723                 }
2724             }
2725 
2726             /* Input arguments are live for preceding opcodes.  */
2727             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2728                 ts = arg_temp(op->args[i]);
2729                 if (ts->state & TS_DEAD) {
2730                     /* For operands that were dead, initially allow
2731                        all regs for the type.  */
2732                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2733                     ts->state &= ~TS_DEAD;
2734                 }
2735             }
2736 
2737             /* Incorporate constraints for this operand.  */
2738             switch (opc) {
2739             case INDEX_op_mov_i32:
2740             case INDEX_op_mov_i64:
2741                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2742                    have proper constraints.  That said, special case
2743                    moves to propagate preferences backward.  */
2744                 if (IS_DEAD_ARG(1)) {
2745                     *la_temp_pref(arg_temp(op->args[0]))
2746                         = *la_temp_pref(arg_temp(op->args[1]));
2747                 }
2748                 break;
2749 
2750             default:
2751                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2752                     const TCGArgConstraint *ct = &def->args_ct[i];
2753                     TCGRegSet set, *pset;
2754 
2755                     ts = arg_temp(op->args[i]);
2756                     pset = la_temp_pref(ts);
2757                     set = *pset;
2758 
2759                     set &= ct->regs;
2760                     if (ct->ialias) {
2761                         set &= op->output_pref[ct->alias_index];
2762                     }
2763                     /* If the combination is not possible, restart.  */
2764                     if (set == 0) {
2765                         set = ct->regs;
2766                     }
2767                     *pset = set;
2768                 }
2769                 break;
2770             }
2771             break;
2772         }
2773         op->life = arg_life;
2774     }
2775 }
2776 
2777 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2778 static bool liveness_pass_2(TCGContext *s)
2779 {
2780     int nb_globals = s->nb_globals;
2781     int nb_temps, i;
2782     bool changes = false;
2783     TCGOp *op, *op_next;
2784 
2785     /* Create a temporary for each indirect global.  */
2786     for (i = 0; i < nb_globals; ++i) {
2787         TCGTemp *its = &s->temps[i];
2788         if (its->indirect_reg) {
2789             TCGTemp *dts = tcg_temp_alloc(s);
2790             dts->type = its->type;
2791             dts->base_type = its->base_type;
2792             its->state_ptr = dts;
2793         } else {
2794             its->state_ptr = NULL;
2795         }
2796         /* All globals begin dead.  */
2797         its->state = TS_DEAD;
2798     }
2799     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2800         TCGTemp *its = &s->temps[i];
2801         its->state_ptr = NULL;
2802         its->state = TS_DEAD;
2803     }
2804 
2805     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2806         TCGOpcode opc = op->opc;
2807         const TCGOpDef *def = &tcg_op_defs[opc];
2808         TCGLifeData arg_life = op->life;
2809         int nb_iargs, nb_oargs, call_flags;
2810         TCGTemp *arg_ts, *dir_ts;
2811 
2812         if (opc == INDEX_op_call) {
2813             nb_oargs = TCGOP_CALLO(op);
2814             nb_iargs = TCGOP_CALLI(op);
2815             call_flags = tcg_call_flags(op);
2816         } else {
2817             nb_iargs = def->nb_iargs;
2818             nb_oargs = def->nb_oargs;
2819 
2820             /* Set flags similar to how calls require.  */
2821             if (def->flags & TCG_OPF_COND_BRANCH) {
2822                 /* Like reading globals: sync_globals */
2823                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2824             } else if (def->flags & TCG_OPF_BB_END) {
2825                 /* Like writing globals: save_globals */
2826                 call_flags = 0;
2827             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2828                 /* Like reading globals: sync_globals */
2829                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2830             } else {
2831                 /* No effect on globals.  */
2832                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2833                               TCG_CALL_NO_WRITE_GLOBALS);
2834             }
2835         }
2836 
2837         /* Make sure that input arguments are available.  */
2838         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2839             arg_ts = arg_temp(op->args[i]);
2840             if (arg_ts) {
2841                 dir_ts = arg_ts->state_ptr;
2842                 if (dir_ts && arg_ts->state == TS_DEAD) {
2843                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2844                                       ? INDEX_op_ld_i32
2845                                       : INDEX_op_ld_i64);
2846                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2847 
2848                     lop->args[0] = temp_arg(dir_ts);
2849                     lop->args[1] = temp_arg(arg_ts->mem_base);
2850                     lop->args[2] = arg_ts->mem_offset;
2851 
2852                     /* Loaded, but synced with memory.  */
2853                     arg_ts->state = TS_MEM;
2854                 }
2855             }
2856         }
2857 
2858         /* Perform input replacement, and mark inputs that became dead.
2859            No action is required except keeping temp_state up to date
2860            so that we reload when needed.  */
2861         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2862             arg_ts = arg_temp(op->args[i]);
2863             if (arg_ts) {
2864                 dir_ts = arg_ts->state_ptr;
2865                 if (dir_ts) {
2866                     op->args[i] = temp_arg(dir_ts);
2867                     changes = true;
2868                     if (IS_DEAD_ARG(i)) {
2869                         arg_ts->state = TS_DEAD;
2870                     }
2871                 }
2872             }
2873         }
2874 
2875         /* Liveness analysis should ensure that the following are
2876            all correct, for call sites and basic block end points.  */
2877         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2878             /* Nothing to do */
2879         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2880             for (i = 0; i < nb_globals; ++i) {
2881                 /* Liveness should see that globals are synced back,
2882                    that is, either TS_DEAD or TS_MEM.  */
2883                 arg_ts = &s->temps[i];
2884                 tcg_debug_assert(arg_ts->state_ptr == 0
2885                                  || arg_ts->state != 0);
2886             }
2887         } else {
2888             for (i = 0; i < nb_globals; ++i) {
2889                 /* Liveness should see that globals are saved back,
2890                    that is, TS_DEAD, waiting to be reloaded.  */
2891                 arg_ts = &s->temps[i];
2892                 tcg_debug_assert(arg_ts->state_ptr == 0
2893                                  || arg_ts->state == TS_DEAD);
2894             }
2895         }
2896 
2897         /* Outputs become available.  */
2898         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2899             arg_ts = arg_temp(op->args[0]);
2900             dir_ts = arg_ts->state_ptr;
2901             if (dir_ts) {
2902                 op->args[0] = temp_arg(dir_ts);
2903                 changes = true;
2904 
2905                 /* The output is now live and modified.  */
2906                 arg_ts->state = 0;
2907 
2908                 if (NEED_SYNC_ARG(0)) {
2909                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2910                                       ? INDEX_op_st_i32
2911                                       : INDEX_op_st_i64);
2912                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2913                     TCGTemp *out_ts = dir_ts;
2914 
2915                     if (IS_DEAD_ARG(0)) {
2916                         out_ts = arg_temp(op->args[1]);
2917                         arg_ts->state = TS_DEAD;
2918                         tcg_op_remove(s, op);
2919                     } else {
2920                         arg_ts->state = TS_MEM;
2921                     }
2922 
2923                     sop->args[0] = temp_arg(out_ts);
2924                     sop->args[1] = temp_arg(arg_ts->mem_base);
2925                     sop->args[2] = arg_ts->mem_offset;
2926                 } else {
2927                     tcg_debug_assert(!IS_DEAD_ARG(0));
2928                 }
2929             }
2930         } else {
2931             for (i = 0; i < nb_oargs; i++) {
2932                 arg_ts = arg_temp(op->args[i]);
2933                 dir_ts = arg_ts->state_ptr;
2934                 if (!dir_ts) {
2935                     continue;
2936                 }
2937                 op->args[i] = temp_arg(dir_ts);
2938                 changes = true;
2939 
2940                 /* The output is now live and modified.  */
2941                 arg_ts->state = 0;
2942 
2943                 /* Sync outputs upon their last write.  */
2944                 if (NEED_SYNC_ARG(i)) {
2945                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2946                                       ? INDEX_op_st_i32
2947                                       : INDEX_op_st_i64);
2948                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2949 
2950                     sop->args[0] = temp_arg(dir_ts);
2951                     sop->args[1] = temp_arg(arg_ts->mem_base);
2952                     sop->args[2] = arg_ts->mem_offset;
2953 
2954                     arg_ts->state = TS_MEM;
2955                 }
2956                 /* Drop outputs that are dead.  */
2957                 if (IS_DEAD_ARG(i)) {
2958                     arg_ts->state = TS_DEAD;
2959                 }
2960             }
2961         }
2962     }
2963 
2964     return changes;
2965 }
2966 
2967 #ifdef CONFIG_DEBUG_TCG
2968 static void dump_regs(TCGContext *s)
2969 {
2970     TCGTemp *ts;
2971     int i;
2972     char buf[64];
2973 
2974     for(i = 0; i < s->nb_temps; i++) {
2975         ts = &s->temps[i];
2976         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2977         switch(ts->val_type) {
2978         case TEMP_VAL_REG:
2979             printf("%s", tcg_target_reg_names[ts->reg]);
2980             break;
2981         case TEMP_VAL_MEM:
2982             printf("%d(%s)", (int)ts->mem_offset,
2983                    tcg_target_reg_names[ts->mem_base->reg]);
2984             break;
2985         case TEMP_VAL_CONST:
2986             printf("$0x%" PRIx64, ts->val);
2987             break;
2988         case TEMP_VAL_DEAD:
2989             printf("D");
2990             break;
2991         default:
2992             printf("???");
2993             break;
2994         }
2995         printf("\n");
2996     }
2997 
2998     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2999         if (s->reg_to_temp[i] != NULL) {
3000             printf("%s: %s\n",
3001                    tcg_target_reg_names[i],
3002                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3003         }
3004     }
3005 }
3006 
3007 static void check_regs(TCGContext *s)
3008 {
3009     int reg;
3010     int k;
3011     TCGTemp *ts;
3012     char buf[64];
3013 
3014     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3015         ts = s->reg_to_temp[reg];
3016         if (ts != NULL) {
3017             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3018                 printf("Inconsistency for register %s:\n",
3019                        tcg_target_reg_names[reg]);
3020                 goto fail;
3021             }
3022         }
3023     }
3024     for (k = 0; k < s->nb_temps; k++) {
3025         ts = &s->temps[k];
3026         if (ts->val_type == TEMP_VAL_REG
3027             && ts->kind != TEMP_FIXED
3028             && s->reg_to_temp[ts->reg] != ts) {
3029             printf("Inconsistency for temp %s:\n",
3030                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3031         fail:
3032             printf("reg state:\n");
3033             dump_regs(s);
3034             tcg_abort();
3035         }
3036     }
3037 }
3038 #endif
3039 
3040 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3041 {
3042     intptr_t off, size, align;
3043 
3044     switch (ts->type) {
3045     case TCG_TYPE_I32:
3046         size = align = 4;
3047         break;
3048     case TCG_TYPE_I64:
3049     case TCG_TYPE_V64:
3050         size = align = 8;
3051         break;
3052     case TCG_TYPE_V128:
3053         size = align = 16;
3054         break;
3055     case TCG_TYPE_V256:
3056         /* Note that we do not require aligned storage for V256. */
3057         size = 32, align = 16;
3058         break;
3059     default:
3060         g_assert_not_reached();
3061     }
3062 
3063     assert(align <= TCG_TARGET_STACK_ALIGN);
3064     off = ROUND_UP(s->current_frame_offset, align);
3065 
3066     /* If we've exhausted the stack frame, restart with a smaller TB. */
3067     if (off + size > s->frame_end) {
3068         tcg_raise_tb_overflow(s);
3069     }
3070     s->current_frame_offset = off + size;
3071 
3072     ts->mem_offset = off;
3073 #if defined(__sparc__)
3074     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3075 #endif
3076     ts->mem_base = s->frame_temp;
3077     ts->mem_allocated = 1;
3078 }
3079 
3080 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3081 
3082 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3083    mark it free; otherwise mark it dead.  */
3084 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3085 {
3086     TCGTempVal new_type;
3087 
3088     switch (ts->kind) {
3089     case TEMP_FIXED:
3090         return;
3091     case TEMP_GLOBAL:
3092     case TEMP_LOCAL:
3093         new_type = TEMP_VAL_MEM;
3094         break;
3095     case TEMP_NORMAL:
3096         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3097         break;
3098     case TEMP_CONST:
3099         new_type = TEMP_VAL_CONST;
3100         break;
3101     default:
3102         g_assert_not_reached();
3103     }
3104     if (ts->val_type == TEMP_VAL_REG) {
3105         s->reg_to_temp[ts->reg] = NULL;
3106     }
3107     ts->val_type = new_type;
3108 }
3109 
3110 /* Mark a temporary as dead.  */
3111 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3112 {
3113     temp_free_or_dead(s, ts, 1);
3114 }
3115 
3116 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3117    registers needs to be allocated to store a constant.  If 'free_or_dead'
3118    is non-zero, subsequently release the temporary; if it is positive, the
3119    temp is dead; if it is negative, the temp is free.  */
3120 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3121                       TCGRegSet preferred_regs, int free_or_dead)
3122 {
3123     if (!temp_readonly(ts) && !ts->mem_coherent) {
3124         if (!ts->mem_allocated) {
3125             temp_allocate_frame(s, ts);
3126         }
3127         switch (ts->val_type) {
3128         case TEMP_VAL_CONST:
3129             /* If we're going to free the temp immediately, then we won't
3130                require it later in a register, so attempt to store the
3131                constant to memory directly.  */
3132             if (free_or_dead
3133                 && tcg_out_sti(s, ts->type, ts->val,
3134                                ts->mem_base->reg, ts->mem_offset)) {
3135                 break;
3136             }
3137             temp_load(s, ts, tcg_target_available_regs[ts->type],
3138                       allocated_regs, preferred_regs);
3139             /* fallthrough */
3140 
3141         case TEMP_VAL_REG:
3142             tcg_out_st(s, ts->type, ts->reg,
3143                        ts->mem_base->reg, ts->mem_offset);
3144             break;
3145 
3146         case TEMP_VAL_MEM:
3147             break;
3148 
3149         case TEMP_VAL_DEAD:
3150         default:
3151             tcg_abort();
3152         }
3153         ts->mem_coherent = 1;
3154     }
3155     if (free_or_dead) {
3156         temp_free_or_dead(s, ts, free_or_dead);
3157     }
3158 }
3159 
3160 /* free register 'reg' by spilling the corresponding temporary if necessary */
3161 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3162 {
3163     TCGTemp *ts = s->reg_to_temp[reg];
3164     if (ts != NULL) {
3165         temp_sync(s, ts, allocated_regs, 0, -1);
3166     }
3167 }
3168 
3169 /**
3170  * tcg_reg_alloc:
3171  * @required_regs: Set of registers in which we must allocate.
3172  * @allocated_regs: Set of registers which must be avoided.
3173  * @preferred_regs: Set of registers we should prefer.
3174  * @rev: True if we search the registers in "indirect" order.
3175  *
3176  * The allocated register must be in @required_regs & ~@allocated_regs,
3177  * but if we can put it in @preferred_regs we may save a move later.
3178  */
3179 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3180                             TCGRegSet allocated_regs,
3181                             TCGRegSet preferred_regs, bool rev)
3182 {
3183     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3184     TCGRegSet reg_ct[2];
3185     const int *order;
3186 
3187     reg_ct[1] = required_regs & ~allocated_regs;
3188     tcg_debug_assert(reg_ct[1] != 0);
3189     reg_ct[0] = reg_ct[1] & preferred_regs;
3190 
3191     /* Skip the preferred_regs option if it cannot be satisfied,
3192        or if the preference made no difference.  */
3193     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3194 
3195     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3196 
3197     /* Try free registers, preferences first.  */
3198     for (j = f; j < 2; j++) {
3199         TCGRegSet set = reg_ct[j];
3200 
3201         if (tcg_regset_single(set)) {
3202             /* One register in the set.  */
3203             TCGReg reg = tcg_regset_first(set);
3204             if (s->reg_to_temp[reg] == NULL) {
3205                 return reg;
3206             }
3207         } else {
3208             for (i = 0; i < n; i++) {
3209                 TCGReg reg = order[i];
3210                 if (s->reg_to_temp[reg] == NULL &&
3211                     tcg_regset_test_reg(set, reg)) {
3212                     return reg;
3213                 }
3214             }
3215         }
3216     }
3217 
3218     /* We must spill something.  */
3219     for (j = f; j < 2; j++) {
3220         TCGRegSet set = reg_ct[j];
3221 
3222         if (tcg_regset_single(set)) {
3223             /* One register in the set.  */
3224             TCGReg reg = tcg_regset_first(set);
3225             tcg_reg_free(s, reg, allocated_regs);
3226             return reg;
3227         } else {
3228             for (i = 0; i < n; i++) {
3229                 TCGReg reg = order[i];
3230                 if (tcg_regset_test_reg(set, reg)) {
3231                     tcg_reg_free(s, reg, allocated_regs);
3232                     return reg;
3233                 }
3234             }
3235         }
3236     }
3237 
3238     tcg_abort();
3239 }
3240 
3241 /* Make sure the temporary is in a register.  If needed, allocate the register
3242    from DESIRED while avoiding ALLOCATED.  */
3243 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3244                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3245 {
3246     TCGReg reg;
3247 
3248     switch (ts->val_type) {
3249     case TEMP_VAL_REG:
3250         return;
3251     case TEMP_VAL_CONST:
3252         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3253                             preferred_regs, ts->indirect_base);
3254         if (ts->type <= TCG_TYPE_I64) {
3255             tcg_out_movi(s, ts->type, reg, ts->val);
3256         } else {
3257             uint64_t val = ts->val;
3258             MemOp vece = MO_64;
3259 
3260             /*
3261              * Find the minimal vector element that matches the constant.
3262              * The targets will, in general, have to do this search anyway,
3263              * do this generically.
3264              */
3265             if (val == dup_const(MO_8, val)) {
3266                 vece = MO_8;
3267             } else if (val == dup_const(MO_16, val)) {
3268                 vece = MO_16;
3269             } else if (val == dup_const(MO_32, val)) {
3270                 vece = MO_32;
3271             }
3272 
3273             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3274         }
3275         ts->mem_coherent = 0;
3276         break;
3277     case TEMP_VAL_MEM:
3278         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3279                             preferred_regs, ts->indirect_base);
3280         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3281         ts->mem_coherent = 1;
3282         break;
3283     case TEMP_VAL_DEAD:
3284     default:
3285         tcg_abort();
3286     }
3287     ts->reg = reg;
3288     ts->val_type = TEMP_VAL_REG;
3289     s->reg_to_temp[reg] = ts;
3290 }
3291 
3292 /* Save a temporary to memory. 'allocated_regs' is used in case a
3293    temporary registers needs to be allocated to store a constant.  */
3294 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3295 {
3296     /* The liveness analysis already ensures that globals are back
3297        in memory. Keep an tcg_debug_assert for safety. */
3298     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3299 }
3300 
3301 /* save globals to their canonical location and assume they can be
3302    modified be the following code. 'allocated_regs' is used in case a
3303    temporary registers needs to be allocated to store a constant. */
3304 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3305 {
3306     int i, n;
3307 
3308     for (i = 0, n = s->nb_globals; i < n; i++) {
3309         temp_save(s, &s->temps[i], allocated_regs);
3310     }
3311 }
3312 
3313 /* sync globals to their canonical location and assume they can be
3314    read by the following code. 'allocated_regs' is used in case a
3315    temporary registers needs to be allocated to store a constant. */
3316 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3317 {
3318     int i, n;
3319 
3320     for (i = 0, n = s->nb_globals; i < n; i++) {
3321         TCGTemp *ts = &s->temps[i];
3322         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3323                          || ts->kind == TEMP_FIXED
3324                          || ts->mem_coherent);
3325     }
3326 }
3327 
3328 /* at the end of a basic block, we assume all temporaries are dead and
3329    all globals are stored at their canonical location. */
3330 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3331 {
3332     int i;
3333 
3334     for (i = s->nb_globals; i < s->nb_temps; i++) {
3335         TCGTemp *ts = &s->temps[i];
3336 
3337         switch (ts->kind) {
3338         case TEMP_LOCAL:
3339             temp_save(s, ts, allocated_regs);
3340             break;
3341         case TEMP_NORMAL:
3342             /* The liveness analysis already ensures that temps are dead.
3343                Keep an tcg_debug_assert for safety. */
3344             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3345             break;
3346         case TEMP_CONST:
3347             /* Similarly, we should have freed any allocated register. */
3348             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3349             break;
3350         default:
3351             g_assert_not_reached();
3352         }
3353     }
3354 
3355     save_globals(s, allocated_regs);
3356 }
3357 
3358 /*
3359  * At a conditional branch, we assume all temporaries are dead and
3360  * all globals and local temps are synced to their location.
3361  */
3362 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3363 {
3364     sync_globals(s, allocated_regs);
3365 
3366     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3367         TCGTemp *ts = &s->temps[i];
3368         /*
3369          * The liveness analysis already ensures that temps are dead.
3370          * Keep tcg_debug_asserts for safety.
3371          */
3372         switch (ts->kind) {
3373         case TEMP_LOCAL:
3374             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3375             break;
3376         case TEMP_NORMAL:
3377             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3378             break;
3379         case TEMP_CONST:
3380             break;
3381         default:
3382             g_assert_not_reached();
3383         }
3384     }
3385 }
3386 
3387 /*
3388  * Specialized code generation for INDEX_op_mov_* with a constant.
3389  */
3390 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3391                                   tcg_target_ulong val, TCGLifeData arg_life,
3392                                   TCGRegSet preferred_regs)
3393 {
3394     /* ENV should not be modified.  */
3395     tcg_debug_assert(!temp_readonly(ots));
3396 
3397     /* The movi is not explicitly generated here.  */
3398     if (ots->val_type == TEMP_VAL_REG) {
3399         s->reg_to_temp[ots->reg] = NULL;
3400     }
3401     ots->val_type = TEMP_VAL_CONST;
3402     ots->val = val;
3403     ots->mem_coherent = 0;
3404     if (NEED_SYNC_ARG(0)) {
3405         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3406     } else if (IS_DEAD_ARG(0)) {
3407         temp_dead(s, ots);
3408     }
3409 }
3410 
3411 /*
3412  * Specialized code generation for INDEX_op_mov_*.
3413  */
3414 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3415 {
3416     const TCGLifeData arg_life = op->life;
3417     TCGRegSet allocated_regs, preferred_regs;
3418     TCGTemp *ts, *ots;
3419     TCGType otype, itype;
3420 
3421     allocated_regs = s->reserved_regs;
3422     preferred_regs = op->output_pref[0];
3423     ots = arg_temp(op->args[0]);
3424     ts = arg_temp(op->args[1]);
3425 
3426     /* ENV should not be modified.  */
3427     tcg_debug_assert(!temp_readonly(ots));
3428 
3429     /* Note that otype != itype for no-op truncation.  */
3430     otype = ots->type;
3431     itype = ts->type;
3432 
3433     if (ts->val_type == TEMP_VAL_CONST) {
3434         /* propagate constant or generate sti */
3435         tcg_target_ulong val = ts->val;
3436         if (IS_DEAD_ARG(1)) {
3437             temp_dead(s, ts);
3438         }
3439         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3440         return;
3441     }
3442 
3443     /* If the source value is in memory we're going to be forced
3444        to have it in a register in order to perform the copy.  Copy
3445        the SOURCE value into its own register first, that way we
3446        don't have to reload SOURCE the next time it is used. */
3447     if (ts->val_type == TEMP_VAL_MEM) {
3448         temp_load(s, ts, tcg_target_available_regs[itype],
3449                   allocated_regs, preferred_regs);
3450     }
3451 
3452     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3453     if (IS_DEAD_ARG(0)) {
3454         /* mov to a non-saved dead register makes no sense (even with
3455            liveness analysis disabled). */
3456         tcg_debug_assert(NEED_SYNC_ARG(0));
3457         if (!ots->mem_allocated) {
3458             temp_allocate_frame(s, ots);
3459         }
3460         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3461         if (IS_DEAD_ARG(1)) {
3462             temp_dead(s, ts);
3463         }
3464         temp_dead(s, ots);
3465     } else {
3466         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3467             /* the mov can be suppressed */
3468             if (ots->val_type == TEMP_VAL_REG) {
3469                 s->reg_to_temp[ots->reg] = NULL;
3470             }
3471             ots->reg = ts->reg;
3472             temp_dead(s, ts);
3473         } else {
3474             if (ots->val_type != TEMP_VAL_REG) {
3475                 /* When allocating a new register, make sure to not spill the
3476                    input one. */
3477                 tcg_regset_set_reg(allocated_regs, ts->reg);
3478                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3479                                          allocated_regs, preferred_regs,
3480                                          ots->indirect_base);
3481             }
3482             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3483                 /*
3484                  * Cross register class move not supported.
3485                  * Store the source register into the destination slot
3486                  * and leave the destination temp as TEMP_VAL_MEM.
3487                  */
3488                 assert(!temp_readonly(ots));
3489                 if (!ts->mem_allocated) {
3490                     temp_allocate_frame(s, ots);
3491                 }
3492                 tcg_out_st(s, ts->type, ts->reg,
3493                            ots->mem_base->reg, ots->mem_offset);
3494                 ots->mem_coherent = 1;
3495                 temp_free_or_dead(s, ots, -1);
3496                 return;
3497             }
3498         }
3499         ots->val_type = TEMP_VAL_REG;
3500         ots->mem_coherent = 0;
3501         s->reg_to_temp[ots->reg] = ots;
3502         if (NEED_SYNC_ARG(0)) {
3503             temp_sync(s, ots, allocated_regs, 0, 0);
3504         }
3505     }
3506 }
3507 
3508 /*
3509  * Specialized code generation for INDEX_op_dup_vec.
3510  */
3511 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3512 {
3513     const TCGLifeData arg_life = op->life;
3514     TCGRegSet dup_out_regs, dup_in_regs;
3515     TCGTemp *its, *ots;
3516     TCGType itype, vtype;
3517     intptr_t endian_fixup;
3518     unsigned vece;
3519     bool ok;
3520 
3521     ots = arg_temp(op->args[0]);
3522     its = arg_temp(op->args[1]);
3523 
3524     /* ENV should not be modified.  */
3525     tcg_debug_assert(!temp_readonly(ots));
3526 
3527     itype = its->type;
3528     vece = TCGOP_VECE(op);
3529     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3530 
3531     if (its->val_type == TEMP_VAL_CONST) {
3532         /* Propagate constant via movi -> dupi.  */
3533         tcg_target_ulong val = its->val;
3534         if (IS_DEAD_ARG(1)) {
3535             temp_dead(s, its);
3536         }
3537         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3538         return;
3539     }
3540 
3541     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3542     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3543 
3544     /* Allocate the output register now.  */
3545     if (ots->val_type != TEMP_VAL_REG) {
3546         TCGRegSet allocated_regs = s->reserved_regs;
3547 
3548         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3549             /* Make sure to not spill the input register. */
3550             tcg_regset_set_reg(allocated_regs, its->reg);
3551         }
3552         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3553                                  op->output_pref[0], ots->indirect_base);
3554         ots->val_type = TEMP_VAL_REG;
3555         ots->mem_coherent = 0;
3556         s->reg_to_temp[ots->reg] = ots;
3557     }
3558 
3559     switch (its->val_type) {
3560     case TEMP_VAL_REG:
3561         /*
3562          * The dup constriaints must be broad, covering all possible VECE.
3563          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3564          * to fail, indicating that extra moves are required for that case.
3565          */
3566         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3567             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3568                 goto done;
3569             }
3570             /* Try again from memory or a vector input register.  */
3571         }
3572         if (!its->mem_coherent) {
3573             /*
3574              * The input register is not synced, and so an extra store
3575              * would be required to use memory.  Attempt an integer-vector
3576              * register move first.  We do not have a TCGRegSet for this.
3577              */
3578             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3579                 break;
3580             }
3581             /* Sync the temp back to its slot and load from there.  */
3582             temp_sync(s, its, s->reserved_regs, 0, 0);
3583         }
3584         /* fall through */
3585 
3586     case TEMP_VAL_MEM:
3587 #ifdef HOST_WORDS_BIGENDIAN
3588         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3589         endian_fixup -= 1 << vece;
3590 #else
3591         endian_fixup = 0;
3592 #endif
3593         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3594                              its->mem_offset + endian_fixup)) {
3595             goto done;
3596         }
3597         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3598         break;
3599 
3600     default:
3601         g_assert_not_reached();
3602     }
3603 
3604     /* We now have a vector input register, so dup must succeed. */
3605     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3606     tcg_debug_assert(ok);
3607 
3608  done:
3609     if (IS_DEAD_ARG(1)) {
3610         temp_dead(s, its);
3611     }
3612     if (NEED_SYNC_ARG(0)) {
3613         temp_sync(s, ots, s->reserved_regs, 0, 0);
3614     }
3615     if (IS_DEAD_ARG(0)) {
3616         temp_dead(s, ots);
3617     }
3618 }
3619 
3620 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3621 {
3622     const TCGLifeData arg_life = op->life;
3623     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3624     TCGRegSet i_allocated_regs;
3625     TCGRegSet o_allocated_regs;
3626     int i, k, nb_iargs, nb_oargs;
3627     TCGReg reg;
3628     TCGArg arg;
3629     const TCGArgConstraint *arg_ct;
3630     TCGTemp *ts;
3631     TCGArg new_args[TCG_MAX_OP_ARGS];
3632     int const_args[TCG_MAX_OP_ARGS];
3633 
3634     nb_oargs = def->nb_oargs;
3635     nb_iargs = def->nb_iargs;
3636 
3637     /* copy constants */
3638     memcpy(new_args + nb_oargs + nb_iargs,
3639            op->args + nb_oargs + nb_iargs,
3640            sizeof(TCGArg) * def->nb_cargs);
3641 
3642     i_allocated_regs = s->reserved_regs;
3643     o_allocated_regs = s->reserved_regs;
3644 
3645     /* satisfy input constraints */
3646     for (k = 0; k < nb_iargs; k++) {
3647         TCGRegSet i_preferred_regs, o_preferred_regs;
3648 
3649         i = def->args_ct[nb_oargs + k].sort_index;
3650         arg = op->args[i];
3651         arg_ct = &def->args_ct[i];
3652         ts = arg_temp(arg);
3653 
3654         if (ts->val_type == TEMP_VAL_CONST
3655             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3656             /* constant is OK for instruction */
3657             const_args[i] = 1;
3658             new_args[i] = ts->val;
3659             continue;
3660         }
3661 
3662         i_preferred_regs = o_preferred_regs = 0;
3663         if (arg_ct->ialias) {
3664             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3665 
3666             /*
3667              * If the input is readonly, then it cannot also be an
3668              * output and aliased to itself.  If the input is not
3669              * dead after the instruction, we must allocate a new
3670              * register and move it.
3671              */
3672             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3673                 goto allocate_in_reg;
3674             }
3675 
3676             /*
3677              * Check if the current register has already been allocated
3678              * for another input aliased to an output.
3679              */
3680             if (ts->val_type == TEMP_VAL_REG) {
3681                 reg = ts->reg;
3682                 for (int k2 = 0; k2 < k; k2++) {
3683                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3684                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3685                         goto allocate_in_reg;
3686                     }
3687                 }
3688             }
3689             i_preferred_regs = o_preferred_regs;
3690         }
3691 
3692         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3693         reg = ts->reg;
3694 
3695         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3696  allocate_in_reg:
3697             /*
3698              * Allocate a new register matching the constraint
3699              * and move the temporary register into it.
3700              */
3701             temp_load(s, ts, tcg_target_available_regs[ts->type],
3702                       i_allocated_regs, 0);
3703             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3704                                 o_preferred_regs, ts->indirect_base);
3705             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3706                 /*
3707                  * Cross register class move not supported.  Sync the
3708                  * temp back to its slot and load from there.
3709                  */
3710                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3711                 tcg_out_ld(s, ts->type, reg,
3712                            ts->mem_base->reg, ts->mem_offset);
3713             }
3714         }
3715         new_args[i] = reg;
3716         const_args[i] = 0;
3717         tcg_regset_set_reg(i_allocated_regs, reg);
3718     }
3719 
3720     /* mark dead temporaries and free the associated registers */
3721     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3722         if (IS_DEAD_ARG(i)) {
3723             temp_dead(s, arg_temp(op->args[i]));
3724         }
3725     }
3726 
3727     if (def->flags & TCG_OPF_COND_BRANCH) {
3728         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3729     } else if (def->flags & TCG_OPF_BB_END) {
3730         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3731     } else {
3732         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3733             /* XXX: permit generic clobber register list ? */
3734             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3735                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3736                     tcg_reg_free(s, i, i_allocated_regs);
3737                 }
3738             }
3739         }
3740         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3741             /* sync globals if the op has side effects and might trigger
3742                an exception. */
3743             sync_globals(s, i_allocated_regs);
3744         }
3745 
3746         /* satisfy the output constraints */
3747         for(k = 0; k < nb_oargs; k++) {
3748             i = def->args_ct[k].sort_index;
3749             arg = op->args[i];
3750             arg_ct = &def->args_ct[i];
3751             ts = arg_temp(arg);
3752 
3753             /* ENV should not be modified.  */
3754             tcg_debug_assert(!temp_readonly(ts));
3755 
3756             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3757                 reg = new_args[arg_ct->alias_index];
3758             } else if (arg_ct->newreg) {
3759                 reg = tcg_reg_alloc(s, arg_ct->regs,
3760                                     i_allocated_regs | o_allocated_regs,
3761                                     op->output_pref[k], ts->indirect_base);
3762             } else {
3763                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3764                                     op->output_pref[k], ts->indirect_base);
3765             }
3766             tcg_regset_set_reg(o_allocated_regs, reg);
3767             if (ts->val_type == TEMP_VAL_REG) {
3768                 s->reg_to_temp[ts->reg] = NULL;
3769             }
3770             ts->val_type = TEMP_VAL_REG;
3771             ts->reg = reg;
3772             /*
3773              * Temp value is modified, so the value kept in memory is
3774              * potentially not the same.
3775              */
3776             ts->mem_coherent = 0;
3777             s->reg_to_temp[reg] = ts;
3778             new_args[i] = reg;
3779         }
3780     }
3781 
3782     /* emit instruction */
3783     if (def->flags & TCG_OPF_VECTOR) {
3784         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3785                        new_args, const_args);
3786     } else {
3787         tcg_out_op(s, op->opc, new_args, const_args);
3788     }
3789 
3790     /* move the outputs in the correct register if needed */
3791     for(i = 0; i < nb_oargs; i++) {
3792         ts = arg_temp(op->args[i]);
3793 
3794         /* ENV should not be modified.  */
3795         tcg_debug_assert(!temp_readonly(ts));
3796 
3797         if (NEED_SYNC_ARG(i)) {
3798             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3799         } else if (IS_DEAD_ARG(i)) {
3800             temp_dead(s, ts);
3801         }
3802     }
3803 }
3804 
3805 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3806 {
3807     const TCGLifeData arg_life = op->life;
3808     TCGTemp *ots, *itsl, *itsh;
3809     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3810 
3811     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3812     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3813     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3814 
3815     ots = arg_temp(op->args[0]);
3816     itsl = arg_temp(op->args[1]);
3817     itsh = arg_temp(op->args[2]);
3818 
3819     /* ENV should not be modified.  */
3820     tcg_debug_assert(!temp_readonly(ots));
3821 
3822     /* Allocate the output register now.  */
3823     if (ots->val_type != TEMP_VAL_REG) {
3824         TCGRegSet allocated_regs = s->reserved_regs;
3825         TCGRegSet dup_out_regs =
3826             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3827 
3828         /* Make sure to not spill the input registers. */
3829         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3830             tcg_regset_set_reg(allocated_regs, itsl->reg);
3831         }
3832         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3833             tcg_regset_set_reg(allocated_regs, itsh->reg);
3834         }
3835 
3836         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3837                                  op->output_pref[0], ots->indirect_base);
3838         ots->val_type = TEMP_VAL_REG;
3839         ots->mem_coherent = 0;
3840         s->reg_to_temp[ots->reg] = ots;
3841     }
3842 
3843     /* Promote dup2 of immediates to dupi_vec. */
3844     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3845         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3846         MemOp vece = MO_64;
3847 
3848         if (val == dup_const(MO_8, val)) {
3849             vece = MO_8;
3850         } else if (val == dup_const(MO_16, val)) {
3851             vece = MO_16;
3852         } else if (val == dup_const(MO_32, val)) {
3853             vece = MO_32;
3854         }
3855 
3856         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3857         goto done;
3858     }
3859 
3860     /* If the two inputs form one 64-bit value, try dupm_vec. */
3861     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3862         if (!itsl->mem_coherent) {
3863             temp_sync(s, itsl, s->reserved_regs, 0, 0);
3864         }
3865         if (!itsh->mem_coherent) {
3866             temp_sync(s, itsh, s->reserved_regs, 0, 0);
3867         }
3868 #ifdef HOST_WORDS_BIGENDIAN
3869         TCGTemp *its = itsh;
3870 #else
3871         TCGTemp *its = itsl;
3872 #endif
3873         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3874                              its->mem_base->reg, its->mem_offset)) {
3875             goto done;
3876         }
3877     }
3878 
3879     /* Fall back to generic expansion. */
3880     return false;
3881 
3882  done:
3883     if (IS_DEAD_ARG(1)) {
3884         temp_dead(s, itsl);
3885     }
3886     if (IS_DEAD_ARG(2)) {
3887         temp_dead(s, itsh);
3888     }
3889     if (NEED_SYNC_ARG(0)) {
3890         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3891     } else if (IS_DEAD_ARG(0)) {
3892         temp_dead(s, ots);
3893     }
3894     return true;
3895 }
3896 
3897 #ifdef TCG_TARGET_STACK_GROWSUP
3898 #define STACK_DIR(x) (-(x))
3899 #else
3900 #define STACK_DIR(x) (x)
3901 #endif
3902 
3903 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3904 {
3905     const int nb_oargs = TCGOP_CALLO(op);
3906     const int nb_iargs = TCGOP_CALLI(op);
3907     const TCGLifeData arg_life = op->life;
3908     const TCGHelperInfo *info;
3909     int flags, nb_regs, i;
3910     TCGReg reg;
3911     TCGArg arg;
3912     TCGTemp *ts;
3913     intptr_t stack_offset;
3914     size_t call_stack_size;
3915     tcg_insn_unit *func_addr;
3916     int allocate_args;
3917     TCGRegSet allocated_regs;
3918 
3919     func_addr = tcg_call_func(op);
3920     info = tcg_call_info(op);
3921     flags = info->flags;
3922 
3923     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3924     if (nb_regs > nb_iargs) {
3925         nb_regs = nb_iargs;
3926     }
3927 
3928     /* assign stack slots first */
3929     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3930     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3931         ~(TCG_TARGET_STACK_ALIGN - 1);
3932     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3933     if (allocate_args) {
3934         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3935            preallocate call stack */
3936         tcg_abort();
3937     }
3938 
3939     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3940     for (i = nb_regs; i < nb_iargs; i++) {
3941         arg = op->args[nb_oargs + i];
3942 #ifdef TCG_TARGET_STACK_GROWSUP
3943         stack_offset -= sizeof(tcg_target_long);
3944 #endif
3945         if (arg != TCG_CALL_DUMMY_ARG) {
3946             ts = arg_temp(arg);
3947             temp_load(s, ts, tcg_target_available_regs[ts->type],
3948                       s->reserved_regs, 0);
3949             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3950         }
3951 #ifndef TCG_TARGET_STACK_GROWSUP
3952         stack_offset += sizeof(tcg_target_long);
3953 #endif
3954     }
3955 
3956     /* assign input registers */
3957     allocated_regs = s->reserved_regs;
3958     for (i = 0; i < nb_regs; i++) {
3959         arg = op->args[nb_oargs + i];
3960         if (arg != TCG_CALL_DUMMY_ARG) {
3961             ts = arg_temp(arg);
3962             reg = tcg_target_call_iarg_regs[i];
3963 
3964             if (ts->val_type == TEMP_VAL_REG) {
3965                 if (ts->reg != reg) {
3966                     tcg_reg_free(s, reg, allocated_regs);
3967                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3968                         /*
3969                          * Cross register class move not supported.  Sync the
3970                          * temp back to its slot and load from there.
3971                          */
3972                         temp_sync(s, ts, allocated_regs, 0, 0);
3973                         tcg_out_ld(s, ts->type, reg,
3974                                    ts->mem_base->reg, ts->mem_offset);
3975                     }
3976                 }
3977             } else {
3978                 TCGRegSet arg_set = 0;
3979 
3980                 tcg_reg_free(s, reg, allocated_regs);
3981                 tcg_regset_set_reg(arg_set, reg);
3982                 temp_load(s, ts, arg_set, allocated_regs, 0);
3983             }
3984 
3985             tcg_regset_set_reg(allocated_regs, reg);
3986         }
3987     }
3988 
3989     /* mark dead temporaries and free the associated registers */
3990     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3991         if (IS_DEAD_ARG(i)) {
3992             temp_dead(s, arg_temp(op->args[i]));
3993         }
3994     }
3995 
3996     /* clobber call registers */
3997     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3998         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3999             tcg_reg_free(s, i, allocated_regs);
4000         }
4001     }
4002 
4003     /* Save globals if they might be written by the helper, sync them if
4004        they might be read. */
4005     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4006         /* Nothing to do */
4007     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4008         sync_globals(s, allocated_regs);
4009     } else {
4010         save_globals(s, allocated_regs);
4011     }
4012 
4013 #ifdef CONFIG_TCG_INTERPRETER
4014     {
4015         gpointer hash = (gpointer)(uintptr_t)info->typemask;
4016         ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4017         assert(cif != NULL);
4018         tcg_out_call(s, func_addr, cif);
4019     }
4020 #else
4021     tcg_out_call(s, func_addr);
4022 #endif
4023 
4024     /* assign output registers and emit moves if needed */
4025     for(i = 0; i < nb_oargs; i++) {
4026         arg = op->args[i];
4027         ts = arg_temp(arg);
4028 
4029         /* ENV should not be modified.  */
4030         tcg_debug_assert(!temp_readonly(ts));
4031 
4032         reg = tcg_target_call_oarg_regs[i];
4033         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4034         if (ts->val_type == TEMP_VAL_REG) {
4035             s->reg_to_temp[ts->reg] = NULL;
4036         }
4037         ts->val_type = TEMP_VAL_REG;
4038         ts->reg = reg;
4039         ts->mem_coherent = 0;
4040         s->reg_to_temp[reg] = ts;
4041         if (NEED_SYNC_ARG(i)) {
4042             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4043         } else if (IS_DEAD_ARG(i)) {
4044             temp_dead(s, ts);
4045         }
4046     }
4047 }
4048 
4049 #ifdef CONFIG_PROFILER
4050 
4051 /* avoid copy/paste errors */
4052 #define PROF_ADD(to, from, field)                       \
4053     do {                                                \
4054         (to)->field += qatomic_read(&((from)->field));  \
4055     } while (0)
4056 
4057 #define PROF_MAX(to, from, field)                                       \
4058     do {                                                                \
4059         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4060         if (val__ > (to)->field) {                                      \
4061             (to)->field = val__;                                        \
4062         }                                                               \
4063     } while (0)
4064 
4065 /* Pass in a zero'ed @prof */
4066 static inline
4067 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4068 {
4069     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4070     unsigned int i;
4071 
4072     for (i = 0; i < n_ctxs; i++) {
4073         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4074         const TCGProfile *orig = &s->prof;
4075 
4076         if (counters) {
4077             PROF_ADD(prof, orig, cpu_exec_time);
4078             PROF_ADD(prof, orig, tb_count1);
4079             PROF_ADD(prof, orig, tb_count);
4080             PROF_ADD(prof, orig, op_count);
4081             PROF_MAX(prof, orig, op_count_max);
4082             PROF_ADD(prof, orig, temp_count);
4083             PROF_MAX(prof, orig, temp_count_max);
4084             PROF_ADD(prof, orig, del_op_count);
4085             PROF_ADD(prof, orig, code_in_len);
4086             PROF_ADD(prof, orig, code_out_len);
4087             PROF_ADD(prof, orig, search_out_len);
4088             PROF_ADD(prof, orig, interm_time);
4089             PROF_ADD(prof, orig, code_time);
4090             PROF_ADD(prof, orig, la_time);
4091             PROF_ADD(prof, orig, opt_time);
4092             PROF_ADD(prof, orig, restore_count);
4093             PROF_ADD(prof, orig, restore_time);
4094         }
4095         if (table) {
4096             int i;
4097 
4098             for (i = 0; i < NB_OPS; i++) {
4099                 PROF_ADD(prof, orig, table_op_count[i]);
4100             }
4101         }
4102     }
4103 }
4104 
4105 #undef PROF_ADD
4106 #undef PROF_MAX
4107 
4108 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4109 {
4110     tcg_profile_snapshot(prof, true, false);
4111 }
4112 
4113 static void tcg_profile_snapshot_table(TCGProfile *prof)
4114 {
4115     tcg_profile_snapshot(prof, false, true);
4116 }
4117 
4118 void tcg_dump_op_count(void)
4119 {
4120     TCGProfile prof = {};
4121     int i;
4122 
4123     tcg_profile_snapshot_table(&prof);
4124     for (i = 0; i < NB_OPS; i++) {
4125         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4126                     prof.table_op_count[i]);
4127     }
4128 }
4129 
4130 int64_t tcg_cpu_exec_time(void)
4131 {
4132     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4133     unsigned int i;
4134     int64_t ret = 0;
4135 
4136     for (i = 0; i < n_ctxs; i++) {
4137         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4138         const TCGProfile *prof = &s->prof;
4139 
4140         ret += qatomic_read(&prof->cpu_exec_time);
4141     }
4142     return ret;
4143 }
4144 #else
4145 void tcg_dump_op_count(void)
4146 {
4147     qemu_printf("[TCG profiler not compiled]\n");
4148 }
4149 
4150 int64_t tcg_cpu_exec_time(void)
4151 {
4152     error_report("%s: TCG profiler not compiled", __func__);
4153     exit(EXIT_FAILURE);
4154 }
4155 #endif
4156 
4157 
4158 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4159 {
4160 #ifdef CONFIG_PROFILER
4161     TCGProfile *prof = &s->prof;
4162 #endif
4163     int i, num_insns;
4164     TCGOp *op;
4165 
4166 #ifdef CONFIG_PROFILER
4167     {
4168         int n = 0;
4169 
4170         QTAILQ_FOREACH(op, &s->ops, link) {
4171             n++;
4172         }
4173         qatomic_set(&prof->op_count, prof->op_count + n);
4174         if (n > prof->op_count_max) {
4175             qatomic_set(&prof->op_count_max, n);
4176         }
4177 
4178         n = s->nb_temps;
4179         qatomic_set(&prof->temp_count, prof->temp_count + n);
4180         if (n > prof->temp_count_max) {
4181             qatomic_set(&prof->temp_count_max, n);
4182         }
4183     }
4184 #endif
4185 
4186 #ifdef DEBUG_DISAS
4187     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4188                  && qemu_log_in_addr_range(tb->pc))) {
4189         FILE *logfile = qemu_log_lock();
4190         qemu_log("OP:\n");
4191         tcg_dump_ops(s, false);
4192         qemu_log("\n");
4193         qemu_log_unlock(logfile);
4194     }
4195 #endif
4196 
4197 #ifdef CONFIG_DEBUG_TCG
4198     /* Ensure all labels referenced have been emitted.  */
4199     {
4200         TCGLabel *l;
4201         bool error = false;
4202 
4203         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4204             if (unlikely(!l->present) && l->refs) {
4205                 qemu_log_mask(CPU_LOG_TB_OP,
4206                               "$L%d referenced but not present.\n", l->id);
4207                 error = true;
4208             }
4209         }
4210         assert(!error);
4211     }
4212 #endif
4213 
4214 #ifdef CONFIG_PROFILER
4215     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4216 #endif
4217 
4218 #ifdef USE_TCG_OPTIMIZATIONS
4219     tcg_optimize(s);
4220 #endif
4221 
4222 #ifdef CONFIG_PROFILER
4223     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4224     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4225 #endif
4226 
4227     reachable_code_pass(s);
4228     liveness_pass_1(s);
4229 
4230     if (s->nb_indirects > 0) {
4231 #ifdef DEBUG_DISAS
4232         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4233                      && qemu_log_in_addr_range(tb->pc))) {
4234             FILE *logfile = qemu_log_lock();
4235             qemu_log("OP before indirect lowering:\n");
4236             tcg_dump_ops(s, false);
4237             qemu_log("\n");
4238             qemu_log_unlock(logfile);
4239         }
4240 #endif
4241         /* Replace indirect temps with direct temps.  */
4242         if (liveness_pass_2(s)) {
4243             /* If changes were made, re-run liveness.  */
4244             liveness_pass_1(s);
4245         }
4246     }
4247 
4248 #ifdef CONFIG_PROFILER
4249     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4250 #endif
4251 
4252 #ifdef DEBUG_DISAS
4253     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4254                  && qemu_log_in_addr_range(tb->pc))) {
4255         FILE *logfile = qemu_log_lock();
4256         qemu_log("OP after optimization and liveness analysis:\n");
4257         tcg_dump_ops(s, true);
4258         qemu_log("\n");
4259         qemu_log_unlock(logfile);
4260     }
4261 #endif
4262 
4263     tcg_reg_alloc_start(s);
4264 
4265     /*
4266      * Reset the buffer pointers when restarting after overflow.
4267      * TODO: Move this into translate-all.c with the rest of the
4268      * buffer management.  Having only this done here is confusing.
4269      */
4270     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4271     s->code_ptr = s->code_buf;
4272 
4273 #ifdef TCG_TARGET_NEED_LDST_LABELS
4274     QSIMPLEQ_INIT(&s->ldst_labels);
4275 #endif
4276 #ifdef TCG_TARGET_NEED_POOL_LABELS
4277     s->pool_labels = NULL;
4278 #endif
4279 
4280     num_insns = -1;
4281     QTAILQ_FOREACH(op, &s->ops, link) {
4282         TCGOpcode opc = op->opc;
4283 
4284 #ifdef CONFIG_PROFILER
4285         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4286 #endif
4287 
4288         switch (opc) {
4289         case INDEX_op_mov_i32:
4290         case INDEX_op_mov_i64:
4291         case INDEX_op_mov_vec:
4292             tcg_reg_alloc_mov(s, op);
4293             break;
4294         case INDEX_op_dup_vec:
4295             tcg_reg_alloc_dup(s, op);
4296             break;
4297         case INDEX_op_insn_start:
4298             if (num_insns >= 0) {
4299                 size_t off = tcg_current_code_size(s);
4300                 s->gen_insn_end_off[num_insns] = off;
4301                 /* Assert that we do not overflow our stored offset.  */
4302                 assert(s->gen_insn_end_off[num_insns] == off);
4303             }
4304             num_insns++;
4305             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4306                 target_ulong a;
4307 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4308                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4309 #else
4310                 a = op->args[i];
4311 #endif
4312                 s->gen_insn_data[num_insns][i] = a;
4313             }
4314             break;
4315         case INDEX_op_discard:
4316             temp_dead(s, arg_temp(op->args[0]));
4317             break;
4318         case INDEX_op_set_label:
4319             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4320             tcg_out_label(s, arg_label(op->args[0]));
4321             break;
4322         case INDEX_op_call:
4323             tcg_reg_alloc_call(s, op);
4324             break;
4325         case INDEX_op_dup2_vec:
4326             if (tcg_reg_alloc_dup2(s, op)) {
4327                 break;
4328             }
4329             /* fall through */
4330         default:
4331             /* Sanity check that we've not introduced any unhandled opcodes. */
4332             tcg_debug_assert(tcg_op_supported(opc));
4333             /* Note: in order to speed up the code, it would be much
4334                faster to have specialized register allocator functions for
4335                some common argument patterns */
4336             tcg_reg_alloc_op(s, op);
4337             break;
4338         }
4339 #ifdef CONFIG_DEBUG_TCG
4340         check_regs(s);
4341 #endif
4342         /* Test for (pending) buffer overflow.  The assumption is that any
4343            one operation beginning below the high water mark cannot overrun
4344            the buffer completely.  Thus we can test for overflow after
4345            generating code without having to check during generation.  */
4346         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4347             return -1;
4348         }
4349         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4350         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4351             return -2;
4352         }
4353     }
4354     tcg_debug_assert(num_insns >= 0);
4355     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4356 
4357     /* Generate TB finalization at the end of block */
4358 #ifdef TCG_TARGET_NEED_LDST_LABELS
4359     i = tcg_out_ldst_finalize(s);
4360     if (i < 0) {
4361         return i;
4362     }
4363 #endif
4364 #ifdef TCG_TARGET_NEED_POOL_LABELS
4365     i = tcg_out_pool_finalize(s);
4366     if (i < 0) {
4367         return i;
4368     }
4369 #endif
4370     if (!tcg_resolve_relocs(s)) {
4371         return -2;
4372     }
4373 
4374 #ifndef CONFIG_TCG_INTERPRETER
4375     /* flush instruction cache */
4376     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4377                         (uintptr_t)s->code_buf,
4378                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4379 #endif
4380 
4381     return tcg_current_code_size(s);
4382 }
4383 
4384 #ifdef CONFIG_PROFILER
4385 void tcg_dump_info(void)
4386 {
4387     TCGProfile prof = {};
4388     const TCGProfile *s;
4389     int64_t tb_count;
4390     int64_t tb_div_count;
4391     int64_t tot;
4392 
4393     tcg_profile_snapshot_counters(&prof);
4394     s = &prof;
4395     tb_count = s->tb_count;
4396     tb_div_count = tb_count ? tb_count : 1;
4397     tot = s->interm_time + s->code_time;
4398 
4399     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4400                 tot, tot / 2.4e9);
4401     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4402                 " %0.1f%%)\n",
4403                 tb_count, s->tb_count1 - tb_count,
4404                 (double)(s->tb_count1 - s->tb_count)
4405                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4406     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4407                 (double)s->op_count / tb_div_count, s->op_count_max);
4408     qemu_printf("deleted ops/TB      %0.2f\n",
4409                 (double)s->del_op_count / tb_div_count);
4410     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4411                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4412     qemu_printf("avg host code/TB    %0.1f\n",
4413                 (double)s->code_out_len / tb_div_count);
4414     qemu_printf("avg search data/TB  %0.1f\n",
4415                 (double)s->search_out_len / tb_div_count);
4416 
4417     qemu_printf("cycles/op           %0.1f\n",
4418                 s->op_count ? (double)tot / s->op_count : 0);
4419     qemu_printf("cycles/in byte      %0.1f\n",
4420                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4421     qemu_printf("cycles/out byte     %0.1f\n",
4422                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4423     qemu_printf("cycles/search byte     %0.1f\n",
4424                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4425     if (tot == 0) {
4426         tot = 1;
4427     }
4428     qemu_printf("  gen_interm time   %0.1f%%\n",
4429                 (double)s->interm_time / tot * 100.0);
4430     qemu_printf("  gen_code time     %0.1f%%\n",
4431                 (double)s->code_time / tot * 100.0);
4432     qemu_printf("optim./code time    %0.1f%%\n",
4433                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4434                 * 100.0);
4435     qemu_printf("liveness/code time  %0.1f%%\n",
4436                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4437     qemu_printf("cpu_restore count   %" PRId64 "\n",
4438                 s->restore_count);
4439     qemu_printf("  avg cycles        %0.1f\n",
4440                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4441 }
4442 #else
4443 void tcg_dump_info(void)
4444 {
4445     qemu_printf("[TCG profiler not compiled]\n");
4446 }
4447 #endif
4448 
4449 #ifdef ELF_HOST_MACHINE
4450 /* In order to use this feature, the backend needs to do three things:
4451 
4452    (1) Define ELF_HOST_MACHINE to indicate both what value to
4453        put into the ELF image and to indicate support for the feature.
4454 
4455    (2) Define tcg_register_jit.  This should create a buffer containing
4456        the contents of a .debug_frame section that describes the post-
4457        prologue unwind info for the tcg machine.
4458 
4459    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4460 */
4461 
4462 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4463 typedef enum {
4464     JIT_NOACTION = 0,
4465     JIT_REGISTER_FN,
4466     JIT_UNREGISTER_FN
4467 } jit_actions_t;
4468 
4469 struct jit_code_entry {
4470     struct jit_code_entry *next_entry;
4471     struct jit_code_entry *prev_entry;
4472     const void *symfile_addr;
4473     uint64_t symfile_size;
4474 };
4475 
4476 struct jit_descriptor {
4477     uint32_t version;
4478     uint32_t action_flag;
4479     struct jit_code_entry *relevant_entry;
4480     struct jit_code_entry *first_entry;
4481 };
4482 
4483 void __jit_debug_register_code(void) __attribute__((noinline));
4484 void __jit_debug_register_code(void)
4485 {
4486     asm("");
4487 }
4488 
4489 /* Must statically initialize the version, because GDB may check
4490    the version before we can set it.  */
4491 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4492 
4493 /* End GDB interface.  */
4494 
4495 static int find_string(const char *strtab, const char *str)
4496 {
4497     const char *p = strtab + 1;
4498 
4499     while (1) {
4500         if (strcmp(p, str) == 0) {
4501             return p - strtab;
4502         }
4503         p += strlen(p) + 1;
4504     }
4505 }
4506 
4507 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4508                                  const void *debug_frame,
4509                                  size_t debug_frame_size)
4510 {
4511     struct __attribute__((packed)) DebugInfo {
4512         uint32_t  len;
4513         uint16_t  version;
4514         uint32_t  abbrev;
4515         uint8_t   ptr_size;
4516         uint8_t   cu_die;
4517         uint16_t  cu_lang;
4518         uintptr_t cu_low_pc;
4519         uintptr_t cu_high_pc;
4520         uint8_t   fn_die;
4521         char      fn_name[16];
4522         uintptr_t fn_low_pc;
4523         uintptr_t fn_high_pc;
4524         uint8_t   cu_eoc;
4525     };
4526 
4527     struct ElfImage {
4528         ElfW(Ehdr) ehdr;
4529         ElfW(Phdr) phdr;
4530         ElfW(Shdr) shdr[7];
4531         ElfW(Sym)  sym[2];
4532         struct DebugInfo di;
4533         uint8_t    da[24];
4534         char       str[80];
4535     };
4536 
4537     struct ElfImage *img;
4538 
4539     static const struct ElfImage img_template = {
4540         .ehdr = {
4541             .e_ident[EI_MAG0] = ELFMAG0,
4542             .e_ident[EI_MAG1] = ELFMAG1,
4543             .e_ident[EI_MAG2] = ELFMAG2,
4544             .e_ident[EI_MAG3] = ELFMAG3,
4545             .e_ident[EI_CLASS] = ELF_CLASS,
4546             .e_ident[EI_DATA] = ELF_DATA,
4547             .e_ident[EI_VERSION] = EV_CURRENT,
4548             .e_type = ET_EXEC,
4549             .e_machine = ELF_HOST_MACHINE,
4550             .e_version = EV_CURRENT,
4551             .e_phoff = offsetof(struct ElfImage, phdr),
4552             .e_shoff = offsetof(struct ElfImage, shdr),
4553             .e_ehsize = sizeof(ElfW(Shdr)),
4554             .e_phentsize = sizeof(ElfW(Phdr)),
4555             .e_phnum = 1,
4556             .e_shentsize = sizeof(ElfW(Shdr)),
4557             .e_shnum = ARRAY_SIZE(img->shdr),
4558             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4559 #ifdef ELF_HOST_FLAGS
4560             .e_flags = ELF_HOST_FLAGS,
4561 #endif
4562 #ifdef ELF_OSABI
4563             .e_ident[EI_OSABI] = ELF_OSABI,
4564 #endif
4565         },
4566         .phdr = {
4567             .p_type = PT_LOAD,
4568             .p_flags = PF_X,
4569         },
4570         .shdr = {
4571             [0] = { .sh_type = SHT_NULL },
4572             /* Trick: The contents of code_gen_buffer are not present in
4573                this fake ELF file; that got allocated elsewhere.  Therefore
4574                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4575                will not look for contents.  We can record any address.  */
4576             [1] = { /* .text */
4577                 .sh_type = SHT_NOBITS,
4578                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4579             },
4580             [2] = { /* .debug_info */
4581                 .sh_type = SHT_PROGBITS,
4582                 .sh_offset = offsetof(struct ElfImage, di),
4583                 .sh_size = sizeof(struct DebugInfo),
4584             },
4585             [3] = { /* .debug_abbrev */
4586                 .sh_type = SHT_PROGBITS,
4587                 .sh_offset = offsetof(struct ElfImage, da),
4588                 .sh_size = sizeof(img->da),
4589             },
4590             [4] = { /* .debug_frame */
4591                 .sh_type = SHT_PROGBITS,
4592                 .sh_offset = sizeof(struct ElfImage),
4593             },
4594             [5] = { /* .symtab */
4595                 .sh_type = SHT_SYMTAB,
4596                 .sh_offset = offsetof(struct ElfImage, sym),
4597                 .sh_size = sizeof(img->sym),
4598                 .sh_info = 1,
4599                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4600                 .sh_entsize = sizeof(ElfW(Sym)),
4601             },
4602             [6] = { /* .strtab */
4603                 .sh_type = SHT_STRTAB,
4604                 .sh_offset = offsetof(struct ElfImage, str),
4605                 .sh_size = sizeof(img->str),
4606             }
4607         },
4608         .sym = {
4609             [1] = { /* code_gen_buffer */
4610                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4611                 .st_shndx = 1,
4612             }
4613         },
4614         .di = {
4615             .len = sizeof(struct DebugInfo) - 4,
4616             .version = 2,
4617             .ptr_size = sizeof(void *),
4618             .cu_die = 1,
4619             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4620             .fn_die = 2,
4621             .fn_name = "code_gen_buffer"
4622         },
4623         .da = {
4624             1,          /* abbrev number (the cu) */
4625             0x11, 1,    /* DW_TAG_compile_unit, has children */
4626             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4627             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4628             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4629             0, 0,       /* end of abbrev */
4630             2,          /* abbrev number (the fn) */
4631             0x2e, 0,    /* DW_TAG_subprogram, no children */
4632             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4633             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4634             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4635             0, 0,       /* end of abbrev */
4636             0           /* no more abbrev */
4637         },
4638         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4639                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4640     };
4641 
4642     /* We only need a single jit entry; statically allocate it.  */
4643     static struct jit_code_entry one_entry;
4644 
4645     uintptr_t buf = (uintptr_t)buf_ptr;
4646     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4647     DebugFrameHeader *dfh;
4648 
4649     img = g_malloc(img_size);
4650     *img = img_template;
4651 
4652     img->phdr.p_vaddr = buf;
4653     img->phdr.p_paddr = buf;
4654     img->phdr.p_memsz = buf_size;
4655 
4656     img->shdr[1].sh_name = find_string(img->str, ".text");
4657     img->shdr[1].sh_addr = buf;
4658     img->shdr[1].sh_size = buf_size;
4659 
4660     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4661     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4662 
4663     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4664     img->shdr[4].sh_size = debug_frame_size;
4665 
4666     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4667     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4668 
4669     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4670     img->sym[1].st_value = buf;
4671     img->sym[1].st_size = buf_size;
4672 
4673     img->di.cu_low_pc = buf;
4674     img->di.cu_high_pc = buf + buf_size;
4675     img->di.fn_low_pc = buf;
4676     img->di.fn_high_pc = buf + buf_size;
4677 
4678     dfh = (DebugFrameHeader *)(img + 1);
4679     memcpy(dfh, debug_frame, debug_frame_size);
4680     dfh->fde.func_start = buf;
4681     dfh->fde.func_len = buf_size;
4682 
4683 #ifdef DEBUG_JIT
4684     /* Enable this block to be able to debug the ELF image file creation.
4685        One can use readelf, objdump, or other inspection utilities.  */
4686     {
4687         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4688         if (f) {
4689             if (fwrite(img, img_size, 1, f) != img_size) {
4690                 /* Avoid stupid unused return value warning for fwrite.  */
4691             }
4692             fclose(f);
4693         }
4694     }
4695 #endif
4696 
4697     one_entry.symfile_addr = img;
4698     one_entry.symfile_size = img_size;
4699 
4700     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4701     __jit_debug_descriptor.relevant_entry = &one_entry;
4702     __jit_debug_descriptor.first_entry = &one_entry;
4703     __jit_debug_register_code();
4704 }
4705 #else
4706 /* No support for the feature.  Provide the entry point expected by exec.c,
4707    and implement the internal function we declared earlier.  */
4708 
4709 static void tcg_register_jit_int(const void *buf, size_t size,
4710                                  const void *debug_frame,
4711                                  size_t debug_frame_size)
4712 {
4713 }
4714 
4715 void tcg_register_jit(const void *buf, size_t buf_size)
4716 {
4717 }
4718 #endif /* ELF_HOST_MACHINE */
4719 
4720 #if !TCG_TARGET_MAYBE_vec
4721 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4722 {
4723     g_assert_not_reached();
4724 }
4725 #endif
4726