xref: /openbmc/qemu/tcg/tcg.c (revision 3a8233dc)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 
45 #include "exec/exec-all.h"
46 #include "tcg/tcg-op.h"
47 
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS  ELFCLASS32
50 #else
51 # define ELF_CLASS  ELFCLASS64
52 #endif
53 #ifdef HOST_WORDS_BIGENDIAN
54 # define ELF_DATA   ELFDATA2MSB
55 #else
56 # define ELF_DATA   ELFDATA2LSB
57 #endif
58 
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "tcg-internal.h"
62 
63 #ifdef CONFIG_TCG_INTERPRETER
64 #include <ffi.h>
65 #endif
66 
67 /* Forward declarations for functions declared in tcg-target.c.inc and
68    used here. */
69 static void tcg_target_init(TCGContext *s);
70 static void tcg_target_qemu_prologue(TCGContext *s);
71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
72                         intptr_t value, intptr_t addend);
73 
74 /* The CIE and FDE header definitions will be common to all hosts.  */
75 typedef struct {
76     uint32_t len __attribute__((aligned((sizeof(void *)))));
77     uint32_t id;
78     uint8_t version;
79     char augmentation[1];
80     uint8_t code_align;
81     uint8_t data_align;
82     uint8_t return_column;
83 } DebugFrameCIE;
84 
85 typedef struct QEMU_PACKED {
86     uint32_t len __attribute__((aligned((sizeof(void *)))));
87     uint32_t cie_offset;
88     uintptr_t func_start;
89     uintptr_t func_len;
90 } DebugFrameFDEHeader;
91 
92 typedef struct QEMU_PACKED {
93     DebugFrameCIE cie;
94     DebugFrameFDEHeader fde;
95 } DebugFrameHeader;
96 
97 static void tcg_register_jit_int(const void *buf, size_t size,
98                                  const void *debug_frame,
99                                  size_t debug_frame_size)
100     __attribute__((unused));
101 
102 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
104                        intptr_t arg2);
105 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106 static void tcg_out_movi(TCGContext *s, TCGType type,
107                          TCGReg ret, tcg_target_long arg);
108 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
109                        const TCGArg args[TCG_MAX_OP_ARGS],
110                        const int const_args[TCG_MAX_OP_ARGS]);
111 #if TCG_TARGET_MAYBE_vec
112 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
113                             TCGReg dst, TCGReg src);
114 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
115                              TCGReg dst, TCGReg base, intptr_t offset);
116 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, int64_t arg);
118 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
119                            unsigned vecl, unsigned vece,
120                            const TCGArg args[TCG_MAX_OP_ARGS],
121                            const int const_args[TCG_MAX_OP_ARGS]);
122 #else
123 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
124                                    TCGReg dst, TCGReg src)
125 {
126     g_assert_not_reached();
127 }
128 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
129                                     TCGReg dst, TCGReg base, intptr_t offset)
130 {
131     g_assert_not_reached();
132 }
133 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
134                                     TCGReg dst, int64_t arg)
135 {
136     g_assert_not_reached();
137 }
138 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
139                                   unsigned vecl, unsigned vece,
140                                   const TCGArg args[TCG_MAX_OP_ARGS],
141                                   const int const_args[TCG_MAX_OP_ARGS])
142 {
143     g_assert_not_reached();
144 }
145 #endif
146 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
147                        intptr_t arg2);
148 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
149                         TCGReg base, intptr_t ofs);
150 #ifdef CONFIG_TCG_INTERPRETER
151 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
152                          ffi_cif *cif);
153 #else
154 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
155 #endif
156 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
157 #ifdef TCG_TARGET_NEED_LDST_LABELS
158 static int tcg_out_ldst_finalize(TCGContext *s);
159 #endif
160 
161 TCGContext tcg_init_ctx;
162 __thread TCGContext *tcg_ctx;
163 
164 TCGContext **tcg_ctxs;
165 unsigned int tcg_cur_ctxs;
166 unsigned int tcg_max_ctxs;
167 TCGv_env cpu_env = 0;
168 const void *tcg_code_gen_epilogue;
169 uintptr_t tcg_splitwx_diff;
170 
171 #ifndef CONFIG_TCG_INTERPRETER
172 tcg_prologue_fn *tcg_qemu_tb_exec;
173 #endif
174 
175 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
176 static TCGRegSet tcg_target_call_clobber_regs;
177 
178 #if TCG_TARGET_INSN_UNIT_SIZE == 1
179 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
180 {
181     *s->code_ptr++ = v;
182 }
183 
184 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
185                                                       uint8_t v)
186 {
187     *p = v;
188 }
189 #endif
190 
191 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
192 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
193 {
194     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
195         *s->code_ptr++ = v;
196     } else {
197         tcg_insn_unit *p = s->code_ptr;
198         memcpy(p, &v, sizeof(v));
199         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
200     }
201 }
202 
203 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
204                                                        uint16_t v)
205 {
206     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
207         *p = v;
208     } else {
209         memcpy(p, &v, sizeof(v));
210     }
211 }
212 #endif
213 
214 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
215 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
216 {
217     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
218         *s->code_ptr++ = v;
219     } else {
220         tcg_insn_unit *p = s->code_ptr;
221         memcpy(p, &v, sizeof(v));
222         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
223     }
224 }
225 
226 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
227                                                        uint32_t v)
228 {
229     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
230         *p = v;
231     } else {
232         memcpy(p, &v, sizeof(v));
233     }
234 }
235 #endif
236 
237 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
238 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
239 {
240     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
241         *s->code_ptr++ = v;
242     } else {
243         tcg_insn_unit *p = s->code_ptr;
244         memcpy(p, &v, sizeof(v));
245         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
246     }
247 }
248 
249 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
250                                                        uint64_t v)
251 {
252     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
253         *p = v;
254     } else {
255         memcpy(p, &v, sizeof(v));
256     }
257 }
258 #endif
259 
260 /* label relocation processing */
261 
262 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
263                           TCGLabel *l, intptr_t addend)
264 {
265     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
266 
267     r->type = type;
268     r->ptr = code_ptr;
269     r->addend = addend;
270     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
271 }
272 
273 static void tcg_out_label(TCGContext *s, TCGLabel *l)
274 {
275     tcg_debug_assert(!l->has_value);
276     l->has_value = 1;
277     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
278 }
279 
280 TCGLabel *gen_new_label(void)
281 {
282     TCGContext *s = tcg_ctx;
283     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
284 
285     memset(l, 0, sizeof(TCGLabel));
286     l->id = s->nb_labels++;
287     QSIMPLEQ_INIT(&l->relocs);
288 
289     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
290 
291     return l;
292 }
293 
294 static bool tcg_resolve_relocs(TCGContext *s)
295 {
296     TCGLabel *l;
297 
298     QSIMPLEQ_FOREACH(l, &s->labels, next) {
299         TCGRelocation *r;
300         uintptr_t value = l->u.value;
301 
302         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
303             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
304                 return false;
305             }
306         }
307     }
308     return true;
309 }
310 
311 static void set_jmp_reset_offset(TCGContext *s, int which)
312 {
313     /*
314      * We will check for overflow at the end of the opcode loop in
315      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
316      */
317     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
318 }
319 
320 /* Signal overflow, starting over with fewer guest insns. */
321 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
322 {
323     siglongjmp(s->jmp_trans, -2);
324 }
325 
326 #define C_PFX1(P, A)                    P##A
327 #define C_PFX2(P, A, B)                 P##A##_##B
328 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
329 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
330 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
331 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
332 
333 /* Define an enumeration for the various combinations. */
334 
335 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
336 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
337 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
338 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
339 
340 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
341 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
342 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
343 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
344 
345 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
346 
347 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
348 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
349 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
350 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
351 
352 typedef enum {
353 #include "tcg-target-con-set.h"
354 } TCGConstraintSetIndex;
355 
356 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
357 
358 #undef C_O0_I1
359 #undef C_O0_I2
360 #undef C_O0_I3
361 #undef C_O0_I4
362 #undef C_O1_I1
363 #undef C_O1_I2
364 #undef C_O1_I3
365 #undef C_O1_I4
366 #undef C_N1_I2
367 #undef C_O2_I1
368 #undef C_O2_I2
369 #undef C_O2_I3
370 #undef C_O2_I4
371 
372 /* Put all of the constraint sets into an array, indexed by the enum. */
373 
374 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
375 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
376 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
377 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
378 
379 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
380 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
381 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
382 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
383 
384 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
385 
386 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
387 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
388 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
389 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
390 
391 static const TCGTargetOpDef constraint_sets[] = {
392 #include "tcg-target-con-set.h"
393 };
394 
395 
396 #undef C_O0_I1
397 #undef C_O0_I2
398 #undef C_O0_I3
399 #undef C_O0_I4
400 #undef C_O1_I1
401 #undef C_O1_I2
402 #undef C_O1_I3
403 #undef C_O1_I4
404 #undef C_N1_I2
405 #undef C_O2_I1
406 #undef C_O2_I2
407 #undef C_O2_I3
408 #undef C_O2_I4
409 
410 /* Expand the enumerator to be returned from tcg_target_op_def(). */
411 
412 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
413 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
414 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
415 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
416 
417 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
418 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
419 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
420 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
421 
422 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
423 
424 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
425 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
426 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
427 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
428 
429 #include "tcg-target.c.inc"
430 
431 static void alloc_tcg_plugin_context(TCGContext *s)
432 {
433 #ifdef CONFIG_PLUGIN
434     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
435     s->plugin_tb->insns =
436         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
437 #endif
438 }
439 
440 /*
441  * All TCG threads except the parent (i.e. the one that called tcg_context_init
442  * and registered the target's TCG globals) must register with this function
443  * before initiating translation.
444  *
445  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
446  * of tcg_region_init() for the reasoning behind this.
447  *
448  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
449  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
450  * is not used anymore for translation once this function is called.
451  *
452  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
453  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
454  */
455 #ifdef CONFIG_USER_ONLY
456 void tcg_register_thread(void)
457 {
458     tcg_ctx = &tcg_init_ctx;
459 }
460 #else
461 void tcg_register_thread(void)
462 {
463     TCGContext *s = g_malloc(sizeof(*s));
464     unsigned int i, n;
465 
466     *s = tcg_init_ctx;
467 
468     /* Relink mem_base.  */
469     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
470         if (tcg_init_ctx.temps[i].mem_base) {
471             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
472             tcg_debug_assert(b >= 0 && b < n);
473             s->temps[i].mem_base = &s->temps[b];
474         }
475     }
476 
477     /* Claim an entry in tcg_ctxs */
478     n = qatomic_fetch_inc(&tcg_cur_ctxs);
479     g_assert(n < tcg_max_ctxs);
480     qatomic_set(&tcg_ctxs[n], s);
481 
482     if (n > 0) {
483         alloc_tcg_plugin_context(s);
484         tcg_region_initial_alloc(s);
485     }
486 
487     tcg_ctx = s;
488 }
489 #endif /* !CONFIG_USER_ONLY */
490 
491 /* pool based memory allocation */
492 void *tcg_malloc_internal(TCGContext *s, int size)
493 {
494     TCGPool *p;
495     int pool_size;
496 
497     if (size > TCG_POOL_CHUNK_SIZE) {
498         /* big malloc: insert a new pool (XXX: could optimize) */
499         p = g_malloc(sizeof(TCGPool) + size);
500         p->size = size;
501         p->next = s->pool_first_large;
502         s->pool_first_large = p;
503         return p->data;
504     } else {
505         p = s->pool_current;
506         if (!p) {
507             p = s->pool_first;
508             if (!p)
509                 goto new_pool;
510         } else {
511             if (!p->next) {
512             new_pool:
513                 pool_size = TCG_POOL_CHUNK_SIZE;
514                 p = g_malloc(sizeof(TCGPool) + pool_size);
515                 p->size = pool_size;
516                 p->next = NULL;
517                 if (s->pool_current)
518                     s->pool_current->next = p;
519                 else
520                     s->pool_first = p;
521             } else {
522                 p = p->next;
523             }
524         }
525     }
526     s->pool_current = p;
527     s->pool_cur = p->data + size;
528     s->pool_end = p->data + p->size;
529     return p->data;
530 }
531 
532 void tcg_pool_reset(TCGContext *s)
533 {
534     TCGPool *p, *t;
535     for (p = s->pool_first_large; p; p = t) {
536         t = p->next;
537         g_free(p);
538     }
539     s->pool_first_large = NULL;
540     s->pool_cur = s->pool_end = NULL;
541     s->pool_current = NULL;
542 }
543 
544 #include "exec/helper-proto.h"
545 
546 static const TCGHelperInfo all_helpers[] = {
547 #include "exec/helper-tcg.h"
548 };
549 static GHashTable *helper_table;
550 
551 #ifdef CONFIG_TCG_INTERPRETER
552 static GHashTable *ffi_table;
553 
554 static ffi_type * const typecode_to_ffi[8] = {
555     [dh_typecode_void] = &ffi_type_void,
556     [dh_typecode_i32]  = &ffi_type_uint32,
557     [dh_typecode_s32]  = &ffi_type_sint32,
558     [dh_typecode_i64]  = &ffi_type_uint64,
559     [dh_typecode_s64]  = &ffi_type_sint64,
560     [dh_typecode_ptr]  = &ffi_type_pointer,
561 };
562 #endif
563 
564 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
565 static void process_op_defs(TCGContext *s);
566 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
567                                             TCGReg reg, const char *name);
568 
569 static void tcg_context_init(unsigned max_cpus)
570 {
571     TCGContext *s = &tcg_init_ctx;
572     int op, total_args, n, i;
573     TCGOpDef *def;
574     TCGArgConstraint *args_ct;
575     TCGTemp *ts;
576 
577     memset(s, 0, sizeof(*s));
578     s->nb_globals = 0;
579 
580     /* Count total number of arguments and allocate the corresponding
581        space */
582     total_args = 0;
583     for(op = 0; op < NB_OPS; op++) {
584         def = &tcg_op_defs[op];
585         n = def->nb_iargs + def->nb_oargs;
586         total_args += n;
587     }
588 
589     args_ct = g_new0(TCGArgConstraint, total_args);
590 
591     for(op = 0; op < NB_OPS; op++) {
592         def = &tcg_op_defs[op];
593         def->args_ct = args_ct;
594         n = def->nb_iargs + def->nb_oargs;
595         args_ct += n;
596     }
597 
598     /* Register helpers.  */
599     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
600     helper_table = g_hash_table_new(NULL, NULL);
601 
602     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
603         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
604                             (gpointer)&all_helpers[i]);
605     }
606 
607 #ifdef CONFIG_TCG_INTERPRETER
608     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
609     ffi_table = g_hash_table_new(NULL, NULL);
610     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
611         struct {
612             ffi_cif cif;
613             ffi_type *args[];
614         } *ca;
615         uint32_t typemask = all_helpers[i].typemask;
616         gpointer hash = (gpointer)(uintptr_t)typemask;
617         ffi_status status;
618         int nargs;
619 
620         if (g_hash_table_lookup(ffi_table, hash)) {
621             continue;
622         }
623 
624         /* Ignoring the return type, find the last non-zero field. */
625         nargs = 32 - clz32(typemask >> 3);
626         nargs = DIV_ROUND_UP(nargs, 3);
627 
628         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
629         ca->cif.rtype = typecode_to_ffi[typemask & 7];
630         ca->cif.nargs = nargs;
631 
632         if (nargs != 0) {
633             ca->cif.arg_types = ca->args;
634             for (i = 0; i < nargs; ++i) {
635                 int typecode = extract32(typemask, (i + 1) * 3, 3);
636                 ca->args[i] = typecode_to_ffi[typecode];
637             }
638         }
639 
640         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
641                               ca->cif.rtype, ca->cif.arg_types);
642         assert(status == FFI_OK);
643 
644         g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
645     }
646 #endif
647 
648     tcg_target_init(s);
649     process_op_defs(s);
650 
651     /* Reverse the order of the saved registers, assuming they're all at
652        the start of tcg_target_reg_alloc_order.  */
653     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
654         int r = tcg_target_reg_alloc_order[n];
655         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
656             break;
657         }
658     }
659     for (i = 0; i < n; ++i) {
660         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
661     }
662     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
663         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
664     }
665 
666     alloc_tcg_plugin_context(s);
667 
668     tcg_ctx = s;
669     /*
670      * In user-mode we simply share the init context among threads, since we
671      * use a single region. See the documentation tcg_region_init() for the
672      * reasoning behind this.
673      * In softmmu we will have at most max_cpus TCG threads.
674      */
675 #ifdef CONFIG_USER_ONLY
676     tcg_ctxs = &tcg_ctx;
677     tcg_cur_ctxs = 1;
678     tcg_max_ctxs = 1;
679 #else
680     tcg_max_ctxs = max_cpus;
681     tcg_ctxs = g_new0(TCGContext *, max_cpus);
682 #endif
683 
684     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
685     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
686     cpu_env = temp_tcgv_ptr(ts);
687 }
688 
689 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
690 {
691     tcg_context_init(max_cpus);
692     tcg_region_init(tb_size, splitwx, max_cpus);
693 }
694 
695 /*
696  * Allocate TBs right before their corresponding translated code, making
697  * sure that TBs and code are on different cache lines.
698  */
699 TranslationBlock *tcg_tb_alloc(TCGContext *s)
700 {
701     uintptr_t align = qemu_icache_linesize;
702     TranslationBlock *tb;
703     void *next;
704 
705  retry:
706     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
707     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
708 
709     if (unlikely(next > s->code_gen_highwater)) {
710         if (tcg_region_alloc(s)) {
711             return NULL;
712         }
713         goto retry;
714     }
715     qatomic_set(&s->code_gen_ptr, next);
716     s->data_gen_ptr = NULL;
717     return tb;
718 }
719 
720 void tcg_prologue_init(TCGContext *s)
721 {
722     size_t prologue_size;
723 
724     s->code_ptr = s->code_gen_ptr;
725     s->code_buf = s->code_gen_ptr;
726     s->data_gen_ptr = NULL;
727 
728 #ifndef CONFIG_TCG_INTERPRETER
729     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
730 #endif
731 
732 #ifdef TCG_TARGET_NEED_POOL_LABELS
733     s->pool_labels = NULL;
734 #endif
735 
736     qemu_thread_jit_write();
737     /* Generate the prologue.  */
738     tcg_target_qemu_prologue(s);
739 
740 #ifdef TCG_TARGET_NEED_POOL_LABELS
741     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
742     {
743         int result = tcg_out_pool_finalize(s);
744         tcg_debug_assert(result == 0);
745     }
746 #endif
747 
748     prologue_size = tcg_current_code_size(s);
749 
750 #ifndef CONFIG_TCG_INTERPRETER
751     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
752                         (uintptr_t)s->code_buf, prologue_size);
753 #endif
754 
755     tcg_region_prologue_set(s);
756 
757 #ifdef DEBUG_DISAS
758     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
759         FILE *logfile = qemu_log_lock();
760         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
761         if (s->data_gen_ptr) {
762             size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
763             size_t data_size = prologue_size - code_size;
764             size_t i;
765 
766             log_disas(s->code_gen_ptr, code_size);
767 
768             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
769                 if (sizeof(tcg_target_ulong) == 8) {
770                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
771                              (uintptr_t)s->data_gen_ptr + i,
772                              *(uint64_t *)(s->data_gen_ptr + i));
773                 } else {
774                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
775                              (uintptr_t)s->data_gen_ptr + i,
776                              *(uint32_t *)(s->data_gen_ptr + i));
777                 }
778             }
779         } else {
780             log_disas(s->code_gen_ptr, prologue_size);
781         }
782         qemu_log("\n");
783         qemu_log_flush();
784         qemu_log_unlock(logfile);
785     }
786 #endif
787 
788 #ifndef CONFIG_TCG_INTERPRETER
789     /*
790      * Assert that goto_ptr is implemented completely, setting an epilogue.
791      * For tci, we use NULL as the signal to return from the interpreter,
792      * so skip this check.
793      */
794     if (TCG_TARGET_HAS_goto_ptr) {
795         tcg_debug_assert(tcg_code_gen_epilogue != NULL);
796     }
797 #endif
798 }
799 
800 void tcg_func_start(TCGContext *s)
801 {
802     tcg_pool_reset(s);
803     s->nb_temps = s->nb_globals;
804 
805     /* No temps have been previously allocated for size or locality.  */
806     memset(s->free_temps, 0, sizeof(s->free_temps));
807 
808     /* No constant temps have been previously allocated. */
809     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
810         if (s->const_table[i]) {
811             g_hash_table_remove_all(s->const_table[i]);
812         }
813     }
814 
815     s->nb_ops = 0;
816     s->nb_labels = 0;
817     s->current_frame_offset = s->frame_start;
818 
819 #ifdef CONFIG_DEBUG_TCG
820     s->goto_tb_issue_mask = 0;
821 #endif
822 
823     QTAILQ_INIT(&s->ops);
824     QTAILQ_INIT(&s->free_ops);
825     QSIMPLEQ_INIT(&s->labels);
826 }
827 
828 static TCGTemp *tcg_temp_alloc(TCGContext *s)
829 {
830     int n = s->nb_temps++;
831 
832     if (n >= TCG_MAX_TEMPS) {
833         tcg_raise_tb_overflow(s);
834     }
835     return memset(&s->temps[n], 0, sizeof(TCGTemp));
836 }
837 
838 static TCGTemp *tcg_global_alloc(TCGContext *s)
839 {
840     TCGTemp *ts;
841 
842     tcg_debug_assert(s->nb_globals == s->nb_temps);
843     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
844     s->nb_globals++;
845     ts = tcg_temp_alloc(s);
846     ts->kind = TEMP_GLOBAL;
847 
848     return ts;
849 }
850 
851 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
852                                             TCGReg reg, const char *name)
853 {
854     TCGTemp *ts;
855 
856     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
857         tcg_abort();
858     }
859 
860     ts = tcg_global_alloc(s);
861     ts->base_type = type;
862     ts->type = type;
863     ts->kind = TEMP_FIXED;
864     ts->reg = reg;
865     ts->name = name;
866     tcg_regset_set_reg(s->reserved_regs, reg);
867 
868     return ts;
869 }
870 
871 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
872 {
873     s->frame_start = start;
874     s->frame_end = start + size;
875     s->frame_temp
876         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
877 }
878 
879 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
880                                      intptr_t offset, const char *name)
881 {
882     TCGContext *s = tcg_ctx;
883     TCGTemp *base_ts = tcgv_ptr_temp(base);
884     TCGTemp *ts = tcg_global_alloc(s);
885     int indirect_reg = 0, bigendian = 0;
886 #ifdef HOST_WORDS_BIGENDIAN
887     bigendian = 1;
888 #endif
889 
890     switch (base_ts->kind) {
891     case TEMP_FIXED:
892         break;
893     case TEMP_GLOBAL:
894         /* We do not support double-indirect registers.  */
895         tcg_debug_assert(!base_ts->indirect_reg);
896         base_ts->indirect_base = 1;
897         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
898                             ? 2 : 1);
899         indirect_reg = 1;
900         break;
901     default:
902         g_assert_not_reached();
903     }
904 
905     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
906         TCGTemp *ts2 = tcg_global_alloc(s);
907         char buf[64];
908 
909         ts->base_type = TCG_TYPE_I64;
910         ts->type = TCG_TYPE_I32;
911         ts->indirect_reg = indirect_reg;
912         ts->mem_allocated = 1;
913         ts->mem_base = base_ts;
914         ts->mem_offset = offset + bigendian * 4;
915         pstrcpy(buf, sizeof(buf), name);
916         pstrcat(buf, sizeof(buf), "_0");
917         ts->name = strdup(buf);
918 
919         tcg_debug_assert(ts2 == ts + 1);
920         ts2->base_type = TCG_TYPE_I64;
921         ts2->type = TCG_TYPE_I32;
922         ts2->indirect_reg = indirect_reg;
923         ts2->mem_allocated = 1;
924         ts2->mem_base = base_ts;
925         ts2->mem_offset = offset + (1 - bigendian) * 4;
926         pstrcpy(buf, sizeof(buf), name);
927         pstrcat(buf, sizeof(buf), "_1");
928         ts2->name = strdup(buf);
929     } else {
930         ts->base_type = type;
931         ts->type = type;
932         ts->indirect_reg = indirect_reg;
933         ts->mem_allocated = 1;
934         ts->mem_base = base_ts;
935         ts->mem_offset = offset;
936         ts->name = name;
937     }
938     return ts;
939 }
940 
941 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
942 {
943     TCGContext *s = tcg_ctx;
944     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
945     TCGTemp *ts;
946     int idx, k;
947 
948     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
949     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
950     if (idx < TCG_MAX_TEMPS) {
951         /* There is already an available temp with the right type.  */
952         clear_bit(idx, s->free_temps[k].l);
953 
954         ts = &s->temps[idx];
955         ts->temp_allocated = 1;
956         tcg_debug_assert(ts->base_type == type);
957         tcg_debug_assert(ts->kind == kind);
958     } else {
959         ts = tcg_temp_alloc(s);
960         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
961             TCGTemp *ts2 = tcg_temp_alloc(s);
962 
963             ts->base_type = type;
964             ts->type = TCG_TYPE_I32;
965             ts->temp_allocated = 1;
966             ts->kind = kind;
967 
968             tcg_debug_assert(ts2 == ts + 1);
969             ts2->base_type = TCG_TYPE_I64;
970             ts2->type = TCG_TYPE_I32;
971             ts2->temp_allocated = 1;
972             ts2->kind = kind;
973         } else {
974             ts->base_type = type;
975             ts->type = type;
976             ts->temp_allocated = 1;
977             ts->kind = kind;
978         }
979     }
980 
981 #if defined(CONFIG_DEBUG_TCG)
982     s->temps_in_use++;
983 #endif
984     return ts;
985 }
986 
987 TCGv_vec tcg_temp_new_vec(TCGType type)
988 {
989     TCGTemp *t;
990 
991 #ifdef CONFIG_DEBUG_TCG
992     switch (type) {
993     case TCG_TYPE_V64:
994         assert(TCG_TARGET_HAS_v64);
995         break;
996     case TCG_TYPE_V128:
997         assert(TCG_TARGET_HAS_v128);
998         break;
999     case TCG_TYPE_V256:
1000         assert(TCG_TARGET_HAS_v256);
1001         break;
1002     default:
1003         g_assert_not_reached();
1004     }
1005 #endif
1006 
1007     t = tcg_temp_new_internal(type, 0);
1008     return temp_tcgv_vec(t);
1009 }
1010 
1011 /* Create a new temp of the same type as an existing temp.  */
1012 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1013 {
1014     TCGTemp *t = tcgv_vec_temp(match);
1015 
1016     tcg_debug_assert(t->temp_allocated != 0);
1017 
1018     t = tcg_temp_new_internal(t->base_type, 0);
1019     return temp_tcgv_vec(t);
1020 }
1021 
1022 void tcg_temp_free_internal(TCGTemp *ts)
1023 {
1024     TCGContext *s = tcg_ctx;
1025     int k, idx;
1026 
1027     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1028     if (ts->kind == TEMP_CONST) {
1029         return;
1030     }
1031 
1032 #if defined(CONFIG_DEBUG_TCG)
1033     s->temps_in_use--;
1034     if (s->temps_in_use < 0) {
1035         fprintf(stderr, "More temporaries freed than allocated!\n");
1036     }
1037 #endif
1038 
1039     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1040     tcg_debug_assert(ts->temp_allocated != 0);
1041     ts->temp_allocated = 0;
1042 
1043     idx = temp_idx(ts);
1044     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1045     set_bit(idx, s->free_temps[k].l);
1046 }
1047 
1048 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1049 {
1050     TCGContext *s = tcg_ctx;
1051     GHashTable *h = s->const_table[type];
1052     TCGTemp *ts;
1053 
1054     if (h == NULL) {
1055         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1056         s->const_table[type] = h;
1057     }
1058 
1059     ts = g_hash_table_lookup(h, &val);
1060     if (ts == NULL) {
1061         ts = tcg_temp_alloc(s);
1062 
1063         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1064             TCGTemp *ts2 = tcg_temp_alloc(s);
1065 
1066             ts->base_type = TCG_TYPE_I64;
1067             ts->type = TCG_TYPE_I32;
1068             ts->kind = TEMP_CONST;
1069             ts->temp_allocated = 1;
1070             /*
1071              * Retain the full value of the 64-bit constant in the low
1072              * part, so that the hash table works.  Actual uses will
1073              * truncate the value to the low part.
1074              */
1075             ts->val = val;
1076 
1077             tcg_debug_assert(ts2 == ts + 1);
1078             ts2->base_type = TCG_TYPE_I64;
1079             ts2->type = TCG_TYPE_I32;
1080             ts2->kind = TEMP_CONST;
1081             ts2->temp_allocated = 1;
1082             ts2->val = val >> 32;
1083         } else {
1084             ts->base_type = type;
1085             ts->type = type;
1086             ts->kind = TEMP_CONST;
1087             ts->temp_allocated = 1;
1088             ts->val = val;
1089         }
1090         g_hash_table_insert(h, &ts->val, ts);
1091     }
1092 
1093     return ts;
1094 }
1095 
1096 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1097 {
1098     val = dup_const(vece, val);
1099     return temp_tcgv_vec(tcg_constant_internal(type, val));
1100 }
1101 
1102 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1103 {
1104     TCGTemp *t = tcgv_vec_temp(match);
1105 
1106     tcg_debug_assert(t->temp_allocated != 0);
1107     return tcg_constant_vec(t->base_type, vece, val);
1108 }
1109 
1110 TCGv_i32 tcg_const_i32(int32_t val)
1111 {
1112     TCGv_i32 t0;
1113     t0 = tcg_temp_new_i32();
1114     tcg_gen_movi_i32(t0, val);
1115     return t0;
1116 }
1117 
1118 TCGv_i64 tcg_const_i64(int64_t val)
1119 {
1120     TCGv_i64 t0;
1121     t0 = tcg_temp_new_i64();
1122     tcg_gen_movi_i64(t0, val);
1123     return t0;
1124 }
1125 
1126 TCGv_i32 tcg_const_local_i32(int32_t val)
1127 {
1128     TCGv_i32 t0;
1129     t0 = tcg_temp_local_new_i32();
1130     tcg_gen_movi_i32(t0, val);
1131     return t0;
1132 }
1133 
1134 TCGv_i64 tcg_const_local_i64(int64_t val)
1135 {
1136     TCGv_i64 t0;
1137     t0 = tcg_temp_local_new_i64();
1138     tcg_gen_movi_i64(t0, val);
1139     return t0;
1140 }
1141 
1142 #if defined(CONFIG_DEBUG_TCG)
1143 void tcg_clear_temp_count(void)
1144 {
1145     TCGContext *s = tcg_ctx;
1146     s->temps_in_use = 0;
1147 }
1148 
1149 int tcg_check_temp_count(void)
1150 {
1151     TCGContext *s = tcg_ctx;
1152     if (s->temps_in_use) {
1153         /* Clear the count so that we don't give another
1154          * warning immediately next time around.
1155          */
1156         s->temps_in_use = 0;
1157         return 1;
1158     }
1159     return 0;
1160 }
1161 #endif
1162 
1163 /* Return true if OP may appear in the opcode stream.
1164    Test the runtime variable that controls each opcode.  */
1165 bool tcg_op_supported(TCGOpcode op)
1166 {
1167     const bool have_vec
1168         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1169 
1170     switch (op) {
1171     case INDEX_op_discard:
1172     case INDEX_op_set_label:
1173     case INDEX_op_call:
1174     case INDEX_op_br:
1175     case INDEX_op_mb:
1176     case INDEX_op_insn_start:
1177     case INDEX_op_exit_tb:
1178     case INDEX_op_goto_tb:
1179     case INDEX_op_qemu_ld_i32:
1180     case INDEX_op_qemu_st_i32:
1181     case INDEX_op_qemu_ld_i64:
1182     case INDEX_op_qemu_st_i64:
1183         return true;
1184 
1185     case INDEX_op_qemu_st8_i32:
1186         return TCG_TARGET_HAS_qemu_st8_i32;
1187 
1188     case INDEX_op_goto_ptr:
1189         return TCG_TARGET_HAS_goto_ptr;
1190 
1191     case INDEX_op_mov_i32:
1192     case INDEX_op_setcond_i32:
1193     case INDEX_op_brcond_i32:
1194     case INDEX_op_ld8u_i32:
1195     case INDEX_op_ld8s_i32:
1196     case INDEX_op_ld16u_i32:
1197     case INDEX_op_ld16s_i32:
1198     case INDEX_op_ld_i32:
1199     case INDEX_op_st8_i32:
1200     case INDEX_op_st16_i32:
1201     case INDEX_op_st_i32:
1202     case INDEX_op_add_i32:
1203     case INDEX_op_sub_i32:
1204     case INDEX_op_mul_i32:
1205     case INDEX_op_and_i32:
1206     case INDEX_op_or_i32:
1207     case INDEX_op_xor_i32:
1208     case INDEX_op_shl_i32:
1209     case INDEX_op_shr_i32:
1210     case INDEX_op_sar_i32:
1211         return true;
1212 
1213     case INDEX_op_movcond_i32:
1214         return TCG_TARGET_HAS_movcond_i32;
1215     case INDEX_op_div_i32:
1216     case INDEX_op_divu_i32:
1217         return TCG_TARGET_HAS_div_i32;
1218     case INDEX_op_rem_i32:
1219     case INDEX_op_remu_i32:
1220         return TCG_TARGET_HAS_rem_i32;
1221     case INDEX_op_div2_i32:
1222     case INDEX_op_divu2_i32:
1223         return TCG_TARGET_HAS_div2_i32;
1224     case INDEX_op_rotl_i32:
1225     case INDEX_op_rotr_i32:
1226         return TCG_TARGET_HAS_rot_i32;
1227     case INDEX_op_deposit_i32:
1228         return TCG_TARGET_HAS_deposit_i32;
1229     case INDEX_op_extract_i32:
1230         return TCG_TARGET_HAS_extract_i32;
1231     case INDEX_op_sextract_i32:
1232         return TCG_TARGET_HAS_sextract_i32;
1233     case INDEX_op_extract2_i32:
1234         return TCG_TARGET_HAS_extract2_i32;
1235     case INDEX_op_add2_i32:
1236         return TCG_TARGET_HAS_add2_i32;
1237     case INDEX_op_sub2_i32:
1238         return TCG_TARGET_HAS_sub2_i32;
1239     case INDEX_op_mulu2_i32:
1240         return TCG_TARGET_HAS_mulu2_i32;
1241     case INDEX_op_muls2_i32:
1242         return TCG_TARGET_HAS_muls2_i32;
1243     case INDEX_op_muluh_i32:
1244         return TCG_TARGET_HAS_muluh_i32;
1245     case INDEX_op_mulsh_i32:
1246         return TCG_TARGET_HAS_mulsh_i32;
1247     case INDEX_op_ext8s_i32:
1248         return TCG_TARGET_HAS_ext8s_i32;
1249     case INDEX_op_ext16s_i32:
1250         return TCG_TARGET_HAS_ext16s_i32;
1251     case INDEX_op_ext8u_i32:
1252         return TCG_TARGET_HAS_ext8u_i32;
1253     case INDEX_op_ext16u_i32:
1254         return TCG_TARGET_HAS_ext16u_i32;
1255     case INDEX_op_bswap16_i32:
1256         return TCG_TARGET_HAS_bswap16_i32;
1257     case INDEX_op_bswap32_i32:
1258         return TCG_TARGET_HAS_bswap32_i32;
1259     case INDEX_op_not_i32:
1260         return TCG_TARGET_HAS_not_i32;
1261     case INDEX_op_neg_i32:
1262         return TCG_TARGET_HAS_neg_i32;
1263     case INDEX_op_andc_i32:
1264         return TCG_TARGET_HAS_andc_i32;
1265     case INDEX_op_orc_i32:
1266         return TCG_TARGET_HAS_orc_i32;
1267     case INDEX_op_eqv_i32:
1268         return TCG_TARGET_HAS_eqv_i32;
1269     case INDEX_op_nand_i32:
1270         return TCG_TARGET_HAS_nand_i32;
1271     case INDEX_op_nor_i32:
1272         return TCG_TARGET_HAS_nor_i32;
1273     case INDEX_op_clz_i32:
1274         return TCG_TARGET_HAS_clz_i32;
1275     case INDEX_op_ctz_i32:
1276         return TCG_TARGET_HAS_ctz_i32;
1277     case INDEX_op_ctpop_i32:
1278         return TCG_TARGET_HAS_ctpop_i32;
1279 
1280     case INDEX_op_brcond2_i32:
1281     case INDEX_op_setcond2_i32:
1282         return TCG_TARGET_REG_BITS == 32;
1283 
1284     case INDEX_op_mov_i64:
1285     case INDEX_op_setcond_i64:
1286     case INDEX_op_brcond_i64:
1287     case INDEX_op_ld8u_i64:
1288     case INDEX_op_ld8s_i64:
1289     case INDEX_op_ld16u_i64:
1290     case INDEX_op_ld16s_i64:
1291     case INDEX_op_ld32u_i64:
1292     case INDEX_op_ld32s_i64:
1293     case INDEX_op_ld_i64:
1294     case INDEX_op_st8_i64:
1295     case INDEX_op_st16_i64:
1296     case INDEX_op_st32_i64:
1297     case INDEX_op_st_i64:
1298     case INDEX_op_add_i64:
1299     case INDEX_op_sub_i64:
1300     case INDEX_op_mul_i64:
1301     case INDEX_op_and_i64:
1302     case INDEX_op_or_i64:
1303     case INDEX_op_xor_i64:
1304     case INDEX_op_shl_i64:
1305     case INDEX_op_shr_i64:
1306     case INDEX_op_sar_i64:
1307     case INDEX_op_ext_i32_i64:
1308     case INDEX_op_extu_i32_i64:
1309         return TCG_TARGET_REG_BITS == 64;
1310 
1311     case INDEX_op_movcond_i64:
1312         return TCG_TARGET_HAS_movcond_i64;
1313     case INDEX_op_div_i64:
1314     case INDEX_op_divu_i64:
1315         return TCG_TARGET_HAS_div_i64;
1316     case INDEX_op_rem_i64:
1317     case INDEX_op_remu_i64:
1318         return TCG_TARGET_HAS_rem_i64;
1319     case INDEX_op_div2_i64:
1320     case INDEX_op_divu2_i64:
1321         return TCG_TARGET_HAS_div2_i64;
1322     case INDEX_op_rotl_i64:
1323     case INDEX_op_rotr_i64:
1324         return TCG_TARGET_HAS_rot_i64;
1325     case INDEX_op_deposit_i64:
1326         return TCG_TARGET_HAS_deposit_i64;
1327     case INDEX_op_extract_i64:
1328         return TCG_TARGET_HAS_extract_i64;
1329     case INDEX_op_sextract_i64:
1330         return TCG_TARGET_HAS_sextract_i64;
1331     case INDEX_op_extract2_i64:
1332         return TCG_TARGET_HAS_extract2_i64;
1333     case INDEX_op_extrl_i64_i32:
1334         return TCG_TARGET_HAS_extrl_i64_i32;
1335     case INDEX_op_extrh_i64_i32:
1336         return TCG_TARGET_HAS_extrh_i64_i32;
1337     case INDEX_op_ext8s_i64:
1338         return TCG_TARGET_HAS_ext8s_i64;
1339     case INDEX_op_ext16s_i64:
1340         return TCG_TARGET_HAS_ext16s_i64;
1341     case INDEX_op_ext32s_i64:
1342         return TCG_TARGET_HAS_ext32s_i64;
1343     case INDEX_op_ext8u_i64:
1344         return TCG_TARGET_HAS_ext8u_i64;
1345     case INDEX_op_ext16u_i64:
1346         return TCG_TARGET_HAS_ext16u_i64;
1347     case INDEX_op_ext32u_i64:
1348         return TCG_TARGET_HAS_ext32u_i64;
1349     case INDEX_op_bswap16_i64:
1350         return TCG_TARGET_HAS_bswap16_i64;
1351     case INDEX_op_bswap32_i64:
1352         return TCG_TARGET_HAS_bswap32_i64;
1353     case INDEX_op_bswap64_i64:
1354         return TCG_TARGET_HAS_bswap64_i64;
1355     case INDEX_op_not_i64:
1356         return TCG_TARGET_HAS_not_i64;
1357     case INDEX_op_neg_i64:
1358         return TCG_TARGET_HAS_neg_i64;
1359     case INDEX_op_andc_i64:
1360         return TCG_TARGET_HAS_andc_i64;
1361     case INDEX_op_orc_i64:
1362         return TCG_TARGET_HAS_orc_i64;
1363     case INDEX_op_eqv_i64:
1364         return TCG_TARGET_HAS_eqv_i64;
1365     case INDEX_op_nand_i64:
1366         return TCG_TARGET_HAS_nand_i64;
1367     case INDEX_op_nor_i64:
1368         return TCG_TARGET_HAS_nor_i64;
1369     case INDEX_op_clz_i64:
1370         return TCG_TARGET_HAS_clz_i64;
1371     case INDEX_op_ctz_i64:
1372         return TCG_TARGET_HAS_ctz_i64;
1373     case INDEX_op_ctpop_i64:
1374         return TCG_TARGET_HAS_ctpop_i64;
1375     case INDEX_op_add2_i64:
1376         return TCG_TARGET_HAS_add2_i64;
1377     case INDEX_op_sub2_i64:
1378         return TCG_TARGET_HAS_sub2_i64;
1379     case INDEX_op_mulu2_i64:
1380         return TCG_TARGET_HAS_mulu2_i64;
1381     case INDEX_op_muls2_i64:
1382         return TCG_TARGET_HAS_muls2_i64;
1383     case INDEX_op_muluh_i64:
1384         return TCG_TARGET_HAS_muluh_i64;
1385     case INDEX_op_mulsh_i64:
1386         return TCG_TARGET_HAS_mulsh_i64;
1387 
1388     case INDEX_op_mov_vec:
1389     case INDEX_op_dup_vec:
1390     case INDEX_op_dupm_vec:
1391     case INDEX_op_ld_vec:
1392     case INDEX_op_st_vec:
1393     case INDEX_op_add_vec:
1394     case INDEX_op_sub_vec:
1395     case INDEX_op_and_vec:
1396     case INDEX_op_or_vec:
1397     case INDEX_op_xor_vec:
1398     case INDEX_op_cmp_vec:
1399         return have_vec;
1400     case INDEX_op_dup2_vec:
1401         return have_vec && TCG_TARGET_REG_BITS == 32;
1402     case INDEX_op_not_vec:
1403         return have_vec && TCG_TARGET_HAS_not_vec;
1404     case INDEX_op_neg_vec:
1405         return have_vec && TCG_TARGET_HAS_neg_vec;
1406     case INDEX_op_abs_vec:
1407         return have_vec && TCG_TARGET_HAS_abs_vec;
1408     case INDEX_op_andc_vec:
1409         return have_vec && TCG_TARGET_HAS_andc_vec;
1410     case INDEX_op_orc_vec:
1411         return have_vec && TCG_TARGET_HAS_orc_vec;
1412     case INDEX_op_mul_vec:
1413         return have_vec && TCG_TARGET_HAS_mul_vec;
1414     case INDEX_op_shli_vec:
1415     case INDEX_op_shri_vec:
1416     case INDEX_op_sari_vec:
1417         return have_vec && TCG_TARGET_HAS_shi_vec;
1418     case INDEX_op_shls_vec:
1419     case INDEX_op_shrs_vec:
1420     case INDEX_op_sars_vec:
1421         return have_vec && TCG_TARGET_HAS_shs_vec;
1422     case INDEX_op_shlv_vec:
1423     case INDEX_op_shrv_vec:
1424     case INDEX_op_sarv_vec:
1425         return have_vec && TCG_TARGET_HAS_shv_vec;
1426     case INDEX_op_rotli_vec:
1427         return have_vec && TCG_TARGET_HAS_roti_vec;
1428     case INDEX_op_rotls_vec:
1429         return have_vec && TCG_TARGET_HAS_rots_vec;
1430     case INDEX_op_rotlv_vec:
1431     case INDEX_op_rotrv_vec:
1432         return have_vec && TCG_TARGET_HAS_rotv_vec;
1433     case INDEX_op_ssadd_vec:
1434     case INDEX_op_usadd_vec:
1435     case INDEX_op_sssub_vec:
1436     case INDEX_op_ussub_vec:
1437         return have_vec && TCG_TARGET_HAS_sat_vec;
1438     case INDEX_op_smin_vec:
1439     case INDEX_op_umin_vec:
1440     case INDEX_op_smax_vec:
1441     case INDEX_op_umax_vec:
1442         return have_vec && TCG_TARGET_HAS_minmax_vec;
1443     case INDEX_op_bitsel_vec:
1444         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1445     case INDEX_op_cmpsel_vec:
1446         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1447 
1448     default:
1449         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1450         return true;
1451     }
1452 }
1453 
1454 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1455    and endian swap. Maybe it would be better to do the alignment
1456    and endian swap in tcg_reg_alloc_call(). */
1457 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1458 {
1459     int i, real_args, nb_rets, pi;
1460     unsigned typemask;
1461     const TCGHelperInfo *info;
1462     TCGOp *op;
1463 
1464     info = g_hash_table_lookup(helper_table, (gpointer)func);
1465     typemask = info->typemask;
1466 
1467 #ifdef CONFIG_PLUGIN
1468     /* detect non-plugin helpers */
1469     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1470         tcg_ctx->plugin_insn->calls_helpers = true;
1471     }
1472 #endif
1473 
1474 #if defined(__sparc__) && !defined(__arch64__) \
1475     && !defined(CONFIG_TCG_INTERPRETER)
1476     /* We have 64-bit values in one register, but need to pass as two
1477        separate parameters.  Split them.  */
1478     int orig_typemask = typemask;
1479     int orig_nargs = nargs;
1480     TCGv_i64 retl, reth;
1481     TCGTemp *split_args[MAX_OPC_PARAM];
1482 
1483     retl = NULL;
1484     reth = NULL;
1485     typemask = 0;
1486     for (i = real_args = 0; i < nargs; ++i) {
1487         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1488         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1489 
1490         if (is_64bit) {
1491             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1492             TCGv_i32 h = tcg_temp_new_i32();
1493             TCGv_i32 l = tcg_temp_new_i32();
1494             tcg_gen_extr_i64_i32(l, h, orig);
1495             split_args[real_args++] = tcgv_i32_temp(h);
1496             typemask |= dh_typecode_i32 << (real_args * 3);
1497             split_args[real_args++] = tcgv_i32_temp(l);
1498             typemask |= dh_typecode_i32 << (real_args * 3);
1499         } else {
1500             split_args[real_args++] = args[i];
1501             typemask |= argtype << (real_args * 3);
1502         }
1503     }
1504     nargs = real_args;
1505     args = split_args;
1506 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1507     for (i = 0; i < nargs; ++i) {
1508         int argtype = extract32(typemask, (i + 1) * 3, 3);
1509         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1510         bool is_signed = argtype & 1;
1511 
1512         if (is_32bit) {
1513             TCGv_i64 temp = tcg_temp_new_i64();
1514             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1515             if (is_signed) {
1516                 tcg_gen_ext32s_i64(temp, orig);
1517             } else {
1518                 tcg_gen_ext32u_i64(temp, orig);
1519             }
1520             args[i] = tcgv_i64_temp(temp);
1521         }
1522     }
1523 #endif /* TCG_TARGET_EXTEND_ARGS */
1524 
1525     op = tcg_emit_op(INDEX_op_call);
1526 
1527     pi = 0;
1528     if (ret != NULL) {
1529 #if defined(__sparc__) && !defined(__arch64__) \
1530     && !defined(CONFIG_TCG_INTERPRETER)
1531         if ((typemask & 6) == dh_typecode_i64) {
1532             /* The 32-bit ABI is going to return the 64-bit value in
1533                the %o0/%o1 register pair.  Prepare for this by using
1534                two return temporaries, and reassemble below.  */
1535             retl = tcg_temp_new_i64();
1536             reth = tcg_temp_new_i64();
1537             op->args[pi++] = tcgv_i64_arg(reth);
1538             op->args[pi++] = tcgv_i64_arg(retl);
1539             nb_rets = 2;
1540         } else {
1541             op->args[pi++] = temp_arg(ret);
1542             nb_rets = 1;
1543         }
1544 #else
1545         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1546 #ifdef HOST_WORDS_BIGENDIAN
1547             op->args[pi++] = temp_arg(ret + 1);
1548             op->args[pi++] = temp_arg(ret);
1549 #else
1550             op->args[pi++] = temp_arg(ret);
1551             op->args[pi++] = temp_arg(ret + 1);
1552 #endif
1553             nb_rets = 2;
1554         } else {
1555             op->args[pi++] = temp_arg(ret);
1556             nb_rets = 1;
1557         }
1558 #endif
1559     } else {
1560         nb_rets = 0;
1561     }
1562     TCGOP_CALLO(op) = nb_rets;
1563 
1564     real_args = 0;
1565     for (i = 0; i < nargs; i++) {
1566         int argtype = extract32(typemask, (i + 1) * 3, 3);
1567         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1568         bool want_align = false;
1569 
1570 #if defined(CONFIG_TCG_INTERPRETER)
1571         /*
1572          * Align all arguments, so that they land in predictable places
1573          * for passing off to ffi_call.
1574          */
1575         want_align = true;
1576 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1577         /* Some targets want aligned 64 bit args */
1578         want_align = is_64bit;
1579 #endif
1580 
1581         if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1582             op->args[pi++] = TCG_CALL_DUMMY_ARG;
1583             real_args++;
1584         }
1585 
1586         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1587             /*
1588              * If stack grows up, then we will be placing successive
1589              * arguments at lower addresses, which means we need to
1590              * reverse the order compared to how we would normally
1591              * treat either big or little-endian.  For those arguments
1592              * that will wind up in registers, this still works for
1593              * HPPA (the only current STACK_GROWSUP target) since the
1594              * argument registers are *also* allocated in decreasing
1595              * order.  If another such target is added, this logic may
1596              * have to get more complicated to differentiate between
1597              * stack arguments and register arguments.
1598              */
1599 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1600             op->args[pi++] = temp_arg(args[i] + 1);
1601             op->args[pi++] = temp_arg(args[i]);
1602 #else
1603             op->args[pi++] = temp_arg(args[i]);
1604             op->args[pi++] = temp_arg(args[i] + 1);
1605 #endif
1606             real_args += 2;
1607             continue;
1608         }
1609 
1610         op->args[pi++] = temp_arg(args[i]);
1611         real_args++;
1612     }
1613     op->args[pi++] = (uintptr_t)func;
1614     op->args[pi++] = (uintptr_t)info;
1615     TCGOP_CALLI(op) = real_args;
1616 
1617     /* Make sure the fields didn't overflow.  */
1618     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1619     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1620 
1621 #if defined(__sparc__) && !defined(__arch64__) \
1622     && !defined(CONFIG_TCG_INTERPRETER)
1623     /* Free all of the parts we allocated above.  */
1624     for (i = real_args = 0; i < orig_nargs; ++i) {
1625         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1626         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1627 
1628         if (is_64bit) {
1629             tcg_temp_free_internal(args[real_args++]);
1630             tcg_temp_free_internal(args[real_args++]);
1631         } else {
1632             real_args++;
1633         }
1634     }
1635     if ((orig_typemask & 6) == dh_typecode_i64) {
1636         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1637            Note that describing these as TCGv_i64 eliminates an unnecessary
1638            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1639         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1640         tcg_temp_free_i64(retl);
1641         tcg_temp_free_i64(reth);
1642     }
1643 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1644     for (i = 0; i < nargs; ++i) {
1645         int argtype = extract32(typemask, (i + 1) * 3, 3);
1646         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1647 
1648         if (is_32bit) {
1649             tcg_temp_free_internal(args[i]);
1650         }
1651     }
1652 #endif /* TCG_TARGET_EXTEND_ARGS */
1653 }
1654 
1655 static void tcg_reg_alloc_start(TCGContext *s)
1656 {
1657     int i, n;
1658 
1659     for (i = 0, n = s->nb_temps; i < n; i++) {
1660         TCGTemp *ts = &s->temps[i];
1661         TCGTempVal val = TEMP_VAL_MEM;
1662 
1663         switch (ts->kind) {
1664         case TEMP_CONST:
1665             val = TEMP_VAL_CONST;
1666             break;
1667         case TEMP_FIXED:
1668             val = TEMP_VAL_REG;
1669             break;
1670         case TEMP_GLOBAL:
1671             break;
1672         case TEMP_NORMAL:
1673             val = TEMP_VAL_DEAD;
1674             /* fall through */
1675         case TEMP_LOCAL:
1676             ts->mem_allocated = 0;
1677             break;
1678         default:
1679             g_assert_not_reached();
1680         }
1681         ts->val_type = val;
1682     }
1683 
1684     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1685 }
1686 
1687 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1688                                  TCGTemp *ts)
1689 {
1690     int idx = temp_idx(ts);
1691 
1692     switch (ts->kind) {
1693     case TEMP_FIXED:
1694     case TEMP_GLOBAL:
1695         pstrcpy(buf, buf_size, ts->name);
1696         break;
1697     case TEMP_LOCAL:
1698         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1699         break;
1700     case TEMP_NORMAL:
1701         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1702         break;
1703     case TEMP_CONST:
1704         switch (ts->type) {
1705         case TCG_TYPE_I32:
1706             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1707             break;
1708 #if TCG_TARGET_REG_BITS > 32
1709         case TCG_TYPE_I64:
1710             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1711             break;
1712 #endif
1713         case TCG_TYPE_V64:
1714         case TCG_TYPE_V128:
1715         case TCG_TYPE_V256:
1716             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1717                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1718             break;
1719         default:
1720             g_assert_not_reached();
1721         }
1722         break;
1723     }
1724     return buf;
1725 }
1726 
1727 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1728                              int buf_size, TCGArg arg)
1729 {
1730     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1731 }
1732 
1733 static const char * const cond_name[] =
1734 {
1735     [TCG_COND_NEVER] = "never",
1736     [TCG_COND_ALWAYS] = "always",
1737     [TCG_COND_EQ] = "eq",
1738     [TCG_COND_NE] = "ne",
1739     [TCG_COND_LT] = "lt",
1740     [TCG_COND_GE] = "ge",
1741     [TCG_COND_LE] = "le",
1742     [TCG_COND_GT] = "gt",
1743     [TCG_COND_LTU] = "ltu",
1744     [TCG_COND_GEU] = "geu",
1745     [TCG_COND_LEU] = "leu",
1746     [TCG_COND_GTU] = "gtu"
1747 };
1748 
1749 static const char * const ldst_name[] =
1750 {
1751     [MO_UB]   = "ub",
1752     [MO_SB]   = "sb",
1753     [MO_LEUW] = "leuw",
1754     [MO_LESW] = "lesw",
1755     [MO_LEUL] = "leul",
1756     [MO_LESL] = "lesl",
1757     [MO_LEQ]  = "leq",
1758     [MO_BEUW] = "beuw",
1759     [MO_BESW] = "besw",
1760     [MO_BEUL] = "beul",
1761     [MO_BESL] = "besl",
1762     [MO_BEQ]  = "beq",
1763 };
1764 
1765 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1766 #ifdef TARGET_ALIGNED_ONLY
1767     [MO_UNALN >> MO_ASHIFT]    = "un+",
1768     [MO_ALIGN >> MO_ASHIFT]    = "",
1769 #else
1770     [MO_UNALN >> MO_ASHIFT]    = "",
1771     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1772 #endif
1773     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1774     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1775     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1776     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1777     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1778     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1779 };
1780 
1781 static inline bool tcg_regset_single(TCGRegSet d)
1782 {
1783     return (d & (d - 1)) == 0;
1784 }
1785 
1786 static inline TCGReg tcg_regset_first(TCGRegSet d)
1787 {
1788     if (TCG_TARGET_NB_REGS <= 32) {
1789         return ctz32(d);
1790     } else {
1791         return ctz64(d);
1792     }
1793 }
1794 
1795 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1796 {
1797     char buf[128];
1798     TCGOp *op;
1799 
1800     QTAILQ_FOREACH(op, &s->ops, link) {
1801         int i, k, nb_oargs, nb_iargs, nb_cargs;
1802         const TCGOpDef *def;
1803         TCGOpcode c;
1804         int col = 0;
1805 
1806         c = op->opc;
1807         def = &tcg_op_defs[c];
1808 
1809         if (c == INDEX_op_insn_start) {
1810             nb_oargs = 0;
1811             col += qemu_log("\n ----");
1812 
1813             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1814                 target_ulong a;
1815 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1816                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1817 #else
1818                 a = op->args[i];
1819 #endif
1820                 col += qemu_log(" " TARGET_FMT_lx, a);
1821             }
1822         } else if (c == INDEX_op_call) {
1823             const TCGHelperInfo *info = tcg_call_info(op);
1824             void *func = tcg_call_func(op);
1825 
1826             /* variable number of arguments */
1827             nb_oargs = TCGOP_CALLO(op);
1828             nb_iargs = TCGOP_CALLI(op);
1829             nb_cargs = def->nb_cargs;
1830 
1831             col += qemu_log(" %s ", def->name);
1832 
1833             /*
1834              * Print the function name from TCGHelperInfo, if available.
1835              * Note that plugins have a template function for the info,
1836              * but the actual function pointer comes from the plugin.
1837              */
1838             if (func == info->func) {
1839                 col += qemu_log("%s", info->name);
1840             } else {
1841                 col += qemu_log("plugin(%p)", func);
1842             }
1843 
1844             col += qemu_log("$0x%x,$%d", info->flags, nb_oargs);
1845             for (i = 0; i < nb_oargs; i++) {
1846                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1847                                                        op->args[i]));
1848             }
1849             for (i = 0; i < nb_iargs; i++) {
1850                 TCGArg arg = op->args[nb_oargs + i];
1851                 const char *t = "<dummy>";
1852                 if (arg != TCG_CALL_DUMMY_ARG) {
1853                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1854                 }
1855                 col += qemu_log(",%s", t);
1856             }
1857         } else {
1858             col += qemu_log(" %s ", def->name);
1859 
1860             nb_oargs = def->nb_oargs;
1861             nb_iargs = def->nb_iargs;
1862             nb_cargs = def->nb_cargs;
1863 
1864             if (def->flags & TCG_OPF_VECTOR) {
1865                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1866                                 8 << TCGOP_VECE(op));
1867             }
1868 
1869             k = 0;
1870             for (i = 0; i < nb_oargs; i++) {
1871                 if (k != 0) {
1872                     col += qemu_log(",");
1873                 }
1874                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1875                                                       op->args[k++]));
1876             }
1877             for (i = 0; i < nb_iargs; i++) {
1878                 if (k != 0) {
1879                     col += qemu_log(",");
1880                 }
1881                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1882                                                       op->args[k++]));
1883             }
1884             switch (c) {
1885             case INDEX_op_brcond_i32:
1886             case INDEX_op_setcond_i32:
1887             case INDEX_op_movcond_i32:
1888             case INDEX_op_brcond2_i32:
1889             case INDEX_op_setcond2_i32:
1890             case INDEX_op_brcond_i64:
1891             case INDEX_op_setcond_i64:
1892             case INDEX_op_movcond_i64:
1893             case INDEX_op_cmp_vec:
1894             case INDEX_op_cmpsel_vec:
1895                 if (op->args[k] < ARRAY_SIZE(cond_name)
1896                     && cond_name[op->args[k]]) {
1897                     col += qemu_log(",%s", cond_name[op->args[k++]]);
1898                 } else {
1899                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1900                 }
1901                 i = 1;
1902                 break;
1903             case INDEX_op_qemu_ld_i32:
1904             case INDEX_op_qemu_st_i32:
1905             case INDEX_op_qemu_st8_i32:
1906             case INDEX_op_qemu_ld_i64:
1907             case INDEX_op_qemu_st_i64:
1908                 {
1909                     TCGMemOpIdx oi = op->args[k++];
1910                     MemOp op = get_memop(oi);
1911                     unsigned ix = get_mmuidx(oi);
1912 
1913                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1914                         col += qemu_log(",$0x%x,%u", op, ix);
1915                     } else {
1916                         const char *s_al, *s_op;
1917                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1918                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1919                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1920                     }
1921                     i = 1;
1922                 }
1923                 break;
1924             default:
1925                 i = 0;
1926                 break;
1927             }
1928             switch (c) {
1929             case INDEX_op_set_label:
1930             case INDEX_op_br:
1931             case INDEX_op_brcond_i32:
1932             case INDEX_op_brcond_i64:
1933             case INDEX_op_brcond2_i32:
1934                 col += qemu_log("%s$L%d", k ? "," : "",
1935                                 arg_label(op->args[k])->id);
1936                 i++, k++;
1937                 break;
1938             default:
1939                 break;
1940             }
1941             for (; i < nb_cargs; i++, k++) {
1942                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1943             }
1944         }
1945 
1946         if (have_prefs || op->life) {
1947 
1948             QemuLogFile *logfile;
1949 
1950             rcu_read_lock();
1951             logfile = qatomic_rcu_read(&qemu_logfile);
1952             if (logfile) {
1953                 for (; col < 40; ++col) {
1954                     putc(' ', logfile->fd);
1955                 }
1956             }
1957             rcu_read_unlock();
1958         }
1959 
1960         if (op->life) {
1961             unsigned life = op->life;
1962 
1963             if (life & (SYNC_ARG * 3)) {
1964                 qemu_log("  sync:");
1965                 for (i = 0; i < 2; ++i) {
1966                     if (life & (SYNC_ARG << i)) {
1967                         qemu_log(" %d", i);
1968                     }
1969                 }
1970             }
1971             life /= DEAD_ARG;
1972             if (life) {
1973                 qemu_log("  dead:");
1974                 for (i = 0; life; ++i, life >>= 1) {
1975                     if (life & 1) {
1976                         qemu_log(" %d", i);
1977                     }
1978                 }
1979             }
1980         }
1981 
1982         if (have_prefs) {
1983             for (i = 0; i < nb_oargs; ++i) {
1984                 TCGRegSet set = op->output_pref[i];
1985 
1986                 if (i == 0) {
1987                     qemu_log("  pref=");
1988                 } else {
1989                     qemu_log(",");
1990                 }
1991                 if (set == 0) {
1992                     qemu_log("none");
1993                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
1994                     qemu_log("all");
1995 #ifdef CONFIG_DEBUG_TCG
1996                 } else if (tcg_regset_single(set)) {
1997                     TCGReg reg = tcg_regset_first(set);
1998                     qemu_log("%s", tcg_target_reg_names[reg]);
1999 #endif
2000                 } else if (TCG_TARGET_NB_REGS <= 32) {
2001                     qemu_log("%#x", (uint32_t)set);
2002                 } else {
2003                     qemu_log("%#" PRIx64, (uint64_t)set);
2004                 }
2005             }
2006         }
2007 
2008         qemu_log("\n");
2009     }
2010 }
2011 
2012 /* we give more priority to constraints with less registers */
2013 static int get_constraint_priority(const TCGOpDef *def, int k)
2014 {
2015     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2016     int n;
2017 
2018     if (arg_ct->oalias) {
2019         /* an alias is equivalent to a single register */
2020         n = 1;
2021     } else {
2022         n = ctpop64(arg_ct->regs);
2023     }
2024     return TCG_TARGET_NB_REGS - n + 1;
2025 }
2026 
2027 /* sort from highest priority to lowest */
2028 static void sort_constraints(TCGOpDef *def, int start, int n)
2029 {
2030     int i, j;
2031     TCGArgConstraint *a = def->args_ct;
2032 
2033     for (i = 0; i < n; i++) {
2034         a[start + i].sort_index = start + i;
2035     }
2036     if (n <= 1) {
2037         return;
2038     }
2039     for (i = 0; i < n - 1; i++) {
2040         for (j = i + 1; j < n; j++) {
2041             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2042             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2043             if (p1 < p2) {
2044                 int tmp = a[start + i].sort_index;
2045                 a[start + i].sort_index = a[start + j].sort_index;
2046                 a[start + j].sort_index = tmp;
2047             }
2048         }
2049     }
2050 }
2051 
2052 static void process_op_defs(TCGContext *s)
2053 {
2054     TCGOpcode op;
2055 
2056     for (op = 0; op < NB_OPS; op++) {
2057         TCGOpDef *def = &tcg_op_defs[op];
2058         const TCGTargetOpDef *tdefs;
2059         int i, nb_args;
2060 
2061         if (def->flags & TCG_OPF_NOT_PRESENT) {
2062             continue;
2063         }
2064 
2065         nb_args = def->nb_iargs + def->nb_oargs;
2066         if (nb_args == 0) {
2067             continue;
2068         }
2069 
2070         /*
2071          * Macro magic should make it impossible, but double-check that
2072          * the array index is in range.  Since the signness of an enum
2073          * is implementation defined, force the result to unsigned.
2074          */
2075         unsigned con_set = tcg_target_op_def(op);
2076         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2077         tdefs = &constraint_sets[con_set];
2078 
2079         for (i = 0; i < nb_args; i++) {
2080             const char *ct_str = tdefs->args_ct_str[i];
2081             /* Incomplete TCGTargetOpDef entry. */
2082             tcg_debug_assert(ct_str != NULL);
2083 
2084             while (*ct_str != '\0') {
2085                 switch(*ct_str) {
2086                 case '0' ... '9':
2087                     {
2088                         int oarg = *ct_str - '0';
2089                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2090                         tcg_debug_assert(oarg < def->nb_oargs);
2091                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2092                         def->args_ct[i] = def->args_ct[oarg];
2093                         /* The output sets oalias.  */
2094                         def->args_ct[oarg].oalias = true;
2095                         def->args_ct[oarg].alias_index = i;
2096                         /* The input sets ialias. */
2097                         def->args_ct[i].ialias = true;
2098                         def->args_ct[i].alias_index = oarg;
2099                     }
2100                     ct_str++;
2101                     break;
2102                 case '&':
2103                     def->args_ct[i].newreg = true;
2104                     ct_str++;
2105                     break;
2106                 case 'i':
2107                     def->args_ct[i].ct |= TCG_CT_CONST;
2108                     ct_str++;
2109                     break;
2110 
2111                 /* Include all of the target-specific constraints. */
2112 
2113 #undef CONST
2114 #define CONST(CASE, MASK) \
2115     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2116 #define REGS(CASE, MASK) \
2117     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2118 
2119 #include "tcg-target-con-str.h"
2120 
2121 #undef REGS
2122 #undef CONST
2123                 default:
2124                     /* Typo in TCGTargetOpDef constraint. */
2125                     g_assert_not_reached();
2126                 }
2127             }
2128         }
2129 
2130         /* TCGTargetOpDef entry with too much information? */
2131         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2132 
2133         /* sort the constraints (XXX: this is just an heuristic) */
2134         sort_constraints(def, 0, def->nb_oargs);
2135         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2136     }
2137 }
2138 
2139 void tcg_op_remove(TCGContext *s, TCGOp *op)
2140 {
2141     TCGLabel *label;
2142 
2143     switch (op->opc) {
2144     case INDEX_op_br:
2145         label = arg_label(op->args[0]);
2146         label->refs--;
2147         break;
2148     case INDEX_op_brcond_i32:
2149     case INDEX_op_brcond_i64:
2150         label = arg_label(op->args[3]);
2151         label->refs--;
2152         break;
2153     case INDEX_op_brcond2_i32:
2154         label = arg_label(op->args[5]);
2155         label->refs--;
2156         break;
2157     default:
2158         break;
2159     }
2160 
2161     QTAILQ_REMOVE(&s->ops, op, link);
2162     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2163     s->nb_ops--;
2164 
2165 #ifdef CONFIG_PROFILER
2166     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2167 #endif
2168 }
2169 
2170 void tcg_remove_ops_after(TCGOp *op)
2171 {
2172     TCGContext *s = tcg_ctx;
2173 
2174     while (true) {
2175         TCGOp *last = tcg_last_op();
2176         if (last == op) {
2177             return;
2178         }
2179         tcg_op_remove(s, last);
2180     }
2181 }
2182 
2183 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2184 {
2185     TCGContext *s = tcg_ctx;
2186     TCGOp *op;
2187 
2188     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2189         op = tcg_malloc(sizeof(TCGOp));
2190     } else {
2191         op = QTAILQ_FIRST(&s->free_ops);
2192         QTAILQ_REMOVE(&s->free_ops, op, link);
2193     }
2194     memset(op, 0, offsetof(TCGOp, link));
2195     op->opc = opc;
2196     s->nb_ops++;
2197 
2198     return op;
2199 }
2200 
2201 TCGOp *tcg_emit_op(TCGOpcode opc)
2202 {
2203     TCGOp *op = tcg_op_alloc(opc);
2204     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2205     return op;
2206 }
2207 
2208 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2209 {
2210     TCGOp *new_op = tcg_op_alloc(opc);
2211     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2212     return new_op;
2213 }
2214 
2215 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2216 {
2217     TCGOp *new_op = tcg_op_alloc(opc);
2218     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2219     return new_op;
2220 }
2221 
2222 /* Reachable analysis : remove unreachable code.  */
2223 static void reachable_code_pass(TCGContext *s)
2224 {
2225     TCGOp *op, *op_next;
2226     bool dead = false;
2227 
2228     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2229         bool remove = dead;
2230         TCGLabel *label;
2231 
2232         switch (op->opc) {
2233         case INDEX_op_set_label:
2234             label = arg_label(op->args[0]);
2235             if (label->refs == 0) {
2236                 /*
2237                  * While there is an occasional backward branch, virtually
2238                  * all branches generated by the translators are forward.
2239                  * Which means that generally we will have already removed
2240                  * all references to the label that will be, and there is
2241                  * little to be gained by iterating.
2242                  */
2243                 remove = true;
2244             } else {
2245                 /* Once we see a label, insns become live again.  */
2246                 dead = false;
2247                 remove = false;
2248 
2249                 /*
2250                  * Optimization can fold conditional branches to unconditional.
2251                  * If we find a label with one reference which is preceded by
2252                  * an unconditional branch to it, remove both.  This needed to
2253                  * wait until the dead code in between them was removed.
2254                  */
2255                 if (label->refs == 1) {
2256                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2257                     if (op_prev->opc == INDEX_op_br &&
2258                         label == arg_label(op_prev->args[0])) {
2259                         tcg_op_remove(s, op_prev);
2260                         remove = true;
2261                     }
2262                 }
2263             }
2264             break;
2265 
2266         case INDEX_op_br:
2267         case INDEX_op_exit_tb:
2268         case INDEX_op_goto_ptr:
2269             /* Unconditional branches; everything following is dead.  */
2270             dead = true;
2271             break;
2272 
2273         case INDEX_op_call:
2274             /* Notice noreturn helper calls, raising exceptions.  */
2275             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2276                 dead = true;
2277             }
2278             break;
2279 
2280         case INDEX_op_insn_start:
2281             /* Never remove -- we need to keep these for unwind.  */
2282             remove = false;
2283             break;
2284 
2285         default:
2286             break;
2287         }
2288 
2289         if (remove) {
2290             tcg_op_remove(s, op);
2291         }
2292     }
2293 }
2294 
2295 #define TS_DEAD  1
2296 #define TS_MEM   2
2297 
2298 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2299 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2300 
2301 /* For liveness_pass_1, the register preferences for a given temp.  */
2302 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2303 {
2304     return ts->state_ptr;
2305 }
2306 
2307 /* For liveness_pass_1, reset the preferences for a given temp to the
2308  * maximal regset for its type.
2309  */
2310 static inline void la_reset_pref(TCGTemp *ts)
2311 {
2312     *la_temp_pref(ts)
2313         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2314 }
2315 
2316 /* liveness analysis: end of function: all temps are dead, and globals
2317    should be in memory. */
2318 static void la_func_end(TCGContext *s, int ng, int nt)
2319 {
2320     int i;
2321 
2322     for (i = 0; i < ng; ++i) {
2323         s->temps[i].state = TS_DEAD | TS_MEM;
2324         la_reset_pref(&s->temps[i]);
2325     }
2326     for (i = ng; i < nt; ++i) {
2327         s->temps[i].state = TS_DEAD;
2328         la_reset_pref(&s->temps[i]);
2329     }
2330 }
2331 
2332 /* liveness analysis: end of basic block: all temps are dead, globals
2333    and local temps should be in memory. */
2334 static void la_bb_end(TCGContext *s, int ng, int nt)
2335 {
2336     int i;
2337 
2338     for (i = 0; i < nt; ++i) {
2339         TCGTemp *ts = &s->temps[i];
2340         int state;
2341 
2342         switch (ts->kind) {
2343         case TEMP_FIXED:
2344         case TEMP_GLOBAL:
2345         case TEMP_LOCAL:
2346             state = TS_DEAD | TS_MEM;
2347             break;
2348         case TEMP_NORMAL:
2349         case TEMP_CONST:
2350             state = TS_DEAD;
2351             break;
2352         default:
2353             g_assert_not_reached();
2354         }
2355         ts->state = state;
2356         la_reset_pref(ts);
2357     }
2358 }
2359 
2360 /* liveness analysis: sync globals back to memory.  */
2361 static void la_global_sync(TCGContext *s, int ng)
2362 {
2363     int i;
2364 
2365     for (i = 0; i < ng; ++i) {
2366         int state = s->temps[i].state;
2367         s->temps[i].state = state | TS_MEM;
2368         if (state == TS_DEAD) {
2369             /* If the global was previously dead, reset prefs.  */
2370             la_reset_pref(&s->temps[i]);
2371         }
2372     }
2373 }
2374 
2375 /*
2376  * liveness analysis: conditional branch: all temps are dead,
2377  * globals and local temps should be synced.
2378  */
2379 static void la_bb_sync(TCGContext *s, int ng, int nt)
2380 {
2381     la_global_sync(s, ng);
2382 
2383     for (int i = ng; i < nt; ++i) {
2384         TCGTemp *ts = &s->temps[i];
2385         int state;
2386 
2387         switch (ts->kind) {
2388         case TEMP_LOCAL:
2389             state = ts->state;
2390             ts->state = state | TS_MEM;
2391             if (state != TS_DEAD) {
2392                 continue;
2393             }
2394             break;
2395         case TEMP_NORMAL:
2396             s->temps[i].state = TS_DEAD;
2397             break;
2398         case TEMP_CONST:
2399             continue;
2400         default:
2401             g_assert_not_reached();
2402         }
2403         la_reset_pref(&s->temps[i]);
2404     }
2405 }
2406 
2407 /* liveness analysis: sync globals back to memory and kill.  */
2408 static void la_global_kill(TCGContext *s, int ng)
2409 {
2410     int i;
2411 
2412     for (i = 0; i < ng; i++) {
2413         s->temps[i].state = TS_DEAD | TS_MEM;
2414         la_reset_pref(&s->temps[i]);
2415     }
2416 }
2417 
2418 /* liveness analysis: note live globals crossing calls.  */
2419 static void la_cross_call(TCGContext *s, int nt)
2420 {
2421     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2422     int i;
2423 
2424     for (i = 0; i < nt; i++) {
2425         TCGTemp *ts = &s->temps[i];
2426         if (!(ts->state & TS_DEAD)) {
2427             TCGRegSet *pset = la_temp_pref(ts);
2428             TCGRegSet set = *pset;
2429 
2430             set &= mask;
2431             /* If the combination is not possible, restart.  */
2432             if (set == 0) {
2433                 set = tcg_target_available_regs[ts->type] & mask;
2434             }
2435             *pset = set;
2436         }
2437     }
2438 }
2439 
2440 /* Liveness analysis : update the opc_arg_life array to tell if a
2441    given input arguments is dead. Instructions updating dead
2442    temporaries are removed. */
2443 static void liveness_pass_1(TCGContext *s)
2444 {
2445     int nb_globals = s->nb_globals;
2446     int nb_temps = s->nb_temps;
2447     TCGOp *op, *op_prev;
2448     TCGRegSet *prefs;
2449     int i;
2450 
2451     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2452     for (i = 0; i < nb_temps; ++i) {
2453         s->temps[i].state_ptr = prefs + i;
2454     }
2455 
2456     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2457     la_func_end(s, nb_globals, nb_temps);
2458 
2459     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2460         int nb_iargs, nb_oargs;
2461         TCGOpcode opc_new, opc_new2;
2462         bool have_opc_new2;
2463         TCGLifeData arg_life = 0;
2464         TCGTemp *ts;
2465         TCGOpcode opc = op->opc;
2466         const TCGOpDef *def = &tcg_op_defs[opc];
2467 
2468         switch (opc) {
2469         case INDEX_op_call:
2470             {
2471                 int call_flags;
2472                 int nb_call_regs;
2473 
2474                 nb_oargs = TCGOP_CALLO(op);
2475                 nb_iargs = TCGOP_CALLI(op);
2476                 call_flags = tcg_call_flags(op);
2477 
2478                 /* pure functions can be removed if their result is unused */
2479                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2480                     for (i = 0; i < nb_oargs; i++) {
2481                         ts = arg_temp(op->args[i]);
2482                         if (ts->state != TS_DEAD) {
2483                             goto do_not_remove_call;
2484                         }
2485                     }
2486                     goto do_remove;
2487                 }
2488             do_not_remove_call:
2489 
2490                 /* Output args are dead.  */
2491                 for (i = 0; i < nb_oargs; i++) {
2492                     ts = arg_temp(op->args[i]);
2493                     if (ts->state & TS_DEAD) {
2494                         arg_life |= DEAD_ARG << i;
2495                     }
2496                     if (ts->state & TS_MEM) {
2497                         arg_life |= SYNC_ARG << i;
2498                     }
2499                     ts->state = TS_DEAD;
2500                     la_reset_pref(ts);
2501 
2502                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2503                     op->output_pref[i] = 0;
2504                 }
2505 
2506                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2507                                     TCG_CALL_NO_READ_GLOBALS))) {
2508                     la_global_kill(s, nb_globals);
2509                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2510                     la_global_sync(s, nb_globals);
2511                 }
2512 
2513                 /* Record arguments that die in this helper.  */
2514                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2515                     ts = arg_temp(op->args[i]);
2516                     if (ts && ts->state & TS_DEAD) {
2517                         arg_life |= DEAD_ARG << i;
2518                     }
2519                 }
2520 
2521                 /* For all live registers, remove call-clobbered prefs.  */
2522                 la_cross_call(s, nb_temps);
2523 
2524                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2525 
2526                 /* Input arguments are live for preceding opcodes.  */
2527                 for (i = 0; i < nb_iargs; i++) {
2528                     ts = arg_temp(op->args[i + nb_oargs]);
2529                     if (ts && ts->state & TS_DEAD) {
2530                         /* For those arguments that die, and will be allocated
2531                          * in registers, clear the register set for that arg,
2532                          * to be filled in below.  For args that will be on
2533                          * the stack, reset to any available reg.
2534                          */
2535                         *la_temp_pref(ts)
2536                             = (i < nb_call_regs ? 0 :
2537                                tcg_target_available_regs[ts->type]);
2538                         ts->state &= ~TS_DEAD;
2539                     }
2540                 }
2541 
2542                 /* For each input argument, add its input register to prefs.
2543                    If a temp is used once, this produces a single set bit.  */
2544                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2545                     ts = arg_temp(op->args[i + nb_oargs]);
2546                     if (ts) {
2547                         tcg_regset_set_reg(*la_temp_pref(ts),
2548                                            tcg_target_call_iarg_regs[i]);
2549                     }
2550                 }
2551             }
2552             break;
2553         case INDEX_op_insn_start:
2554             break;
2555         case INDEX_op_discard:
2556             /* mark the temporary as dead */
2557             ts = arg_temp(op->args[0]);
2558             ts->state = TS_DEAD;
2559             la_reset_pref(ts);
2560             break;
2561 
2562         case INDEX_op_add2_i32:
2563             opc_new = INDEX_op_add_i32;
2564             goto do_addsub2;
2565         case INDEX_op_sub2_i32:
2566             opc_new = INDEX_op_sub_i32;
2567             goto do_addsub2;
2568         case INDEX_op_add2_i64:
2569             opc_new = INDEX_op_add_i64;
2570             goto do_addsub2;
2571         case INDEX_op_sub2_i64:
2572             opc_new = INDEX_op_sub_i64;
2573         do_addsub2:
2574             nb_iargs = 4;
2575             nb_oargs = 2;
2576             /* Test if the high part of the operation is dead, but not
2577                the low part.  The result can be optimized to a simple
2578                add or sub.  This happens often for x86_64 guest when the
2579                cpu mode is set to 32 bit.  */
2580             if (arg_temp(op->args[1])->state == TS_DEAD) {
2581                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2582                     goto do_remove;
2583                 }
2584                 /* Replace the opcode and adjust the args in place,
2585                    leaving 3 unused args at the end.  */
2586                 op->opc = opc = opc_new;
2587                 op->args[1] = op->args[2];
2588                 op->args[2] = op->args[4];
2589                 /* Fall through and mark the single-word operation live.  */
2590                 nb_iargs = 2;
2591                 nb_oargs = 1;
2592             }
2593             goto do_not_remove;
2594 
2595         case INDEX_op_mulu2_i32:
2596             opc_new = INDEX_op_mul_i32;
2597             opc_new2 = INDEX_op_muluh_i32;
2598             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2599             goto do_mul2;
2600         case INDEX_op_muls2_i32:
2601             opc_new = INDEX_op_mul_i32;
2602             opc_new2 = INDEX_op_mulsh_i32;
2603             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2604             goto do_mul2;
2605         case INDEX_op_mulu2_i64:
2606             opc_new = INDEX_op_mul_i64;
2607             opc_new2 = INDEX_op_muluh_i64;
2608             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2609             goto do_mul2;
2610         case INDEX_op_muls2_i64:
2611             opc_new = INDEX_op_mul_i64;
2612             opc_new2 = INDEX_op_mulsh_i64;
2613             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2614             goto do_mul2;
2615         do_mul2:
2616             nb_iargs = 2;
2617             nb_oargs = 2;
2618             if (arg_temp(op->args[1])->state == TS_DEAD) {
2619                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2620                     /* Both parts of the operation are dead.  */
2621                     goto do_remove;
2622                 }
2623                 /* The high part of the operation is dead; generate the low. */
2624                 op->opc = opc = opc_new;
2625                 op->args[1] = op->args[2];
2626                 op->args[2] = op->args[3];
2627             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2628                 /* The low part of the operation is dead; generate the high. */
2629                 op->opc = opc = opc_new2;
2630                 op->args[0] = op->args[1];
2631                 op->args[1] = op->args[2];
2632                 op->args[2] = op->args[3];
2633             } else {
2634                 goto do_not_remove;
2635             }
2636             /* Mark the single-word operation live.  */
2637             nb_oargs = 1;
2638             goto do_not_remove;
2639 
2640         default:
2641             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2642             nb_iargs = def->nb_iargs;
2643             nb_oargs = def->nb_oargs;
2644 
2645             /* Test if the operation can be removed because all
2646                its outputs are dead. We assume that nb_oargs == 0
2647                implies side effects */
2648             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2649                 for (i = 0; i < nb_oargs; i++) {
2650                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2651                         goto do_not_remove;
2652                     }
2653                 }
2654                 goto do_remove;
2655             }
2656             goto do_not_remove;
2657 
2658         do_remove:
2659             tcg_op_remove(s, op);
2660             break;
2661 
2662         do_not_remove:
2663             for (i = 0; i < nb_oargs; i++) {
2664                 ts = arg_temp(op->args[i]);
2665 
2666                 /* Remember the preference of the uses that followed.  */
2667                 op->output_pref[i] = *la_temp_pref(ts);
2668 
2669                 /* Output args are dead.  */
2670                 if (ts->state & TS_DEAD) {
2671                     arg_life |= DEAD_ARG << i;
2672                 }
2673                 if (ts->state & TS_MEM) {
2674                     arg_life |= SYNC_ARG << i;
2675                 }
2676                 ts->state = TS_DEAD;
2677                 la_reset_pref(ts);
2678             }
2679 
2680             /* If end of basic block, update.  */
2681             if (def->flags & TCG_OPF_BB_EXIT) {
2682                 la_func_end(s, nb_globals, nb_temps);
2683             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2684                 la_bb_sync(s, nb_globals, nb_temps);
2685             } else if (def->flags & TCG_OPF_BB_END) {
2686                 la_bb_end(s, nb_globals, nb_temps);
2687             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2688                 la_global_sync(s, nb_globals);
2689                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2690                     la_cross_call(s, nb_temps);
2691                 }
2692             }
2693 
2694             /* Record arguments that die in this opcode.  */
2695             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2696                 ts = arg_temp(op->args[i]);
2697                 if (ts->state & TS_DEAD) {
2698                     arg_life |= DEAD_ARG << i;
2699                 }
2700             }
2701 
2702             /* Input arguments are live for preceding opcodes.  */
2703             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2704                 ts = arg_temp(op->args[i]);
2705                 if (ts->state & TS_DEAD) {
2706                     /* For operands that were dead, initially allow
2707                        all regs for the type.  */
2708                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2709                     ts->state &= ~TS_DEAD;
2710                 }
2711             }
2712 
2713             /* Incorporate constraints for this operand.  */
2714             switch (opc) {
2715             case INDEX_op_mov_i32:
2716             case INDEX_op_mov_i64:
2717                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2718                    have proper constraints.  That said, special case
2719                    moves to propagate preferences backward.  */
2720                 if (IS_DEAD_ARG(1)) {
2721                     *la_temp_pref(arg_temp(op->args[0]))
2722                         = *la_temp_pref(arg_temp(op->args[1]));
2723                 }
2724                 break;
2725 
2726             default:
2727                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2728                     const TCGArgConstraint *ct = &def->args_ct[i];
2729                     TCGRegSet set, *pset;
2730 
2731                     ts = arg_temp(op->args[i]);
2732                     pset = la_temp_pref(ts);
2733                     set = *pset;
2734 
2735                     set &= ct->regs;
2736                     if (ct->ialias) {
2737                         set &= op->output_pref[ct->alias_index];
2738                     }
2739                     /* If the combination is not possible, restart.  */
2740                     if (set == 0) {
2741                         set = ct->regs;
2742                     }
2743                     *pset = set;
2744                 }
2745                 break;
2746             }
2747             break;
2748         }
2749         op->life = arg_life;
2750     }
2751 }
2752 
2753 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2754 static bool liveness_pass_2(TCGContext *s)
2755 {
2756     int nb_globals = s->nb_globals;
2757     int nb_temps, i;
2758     bool changes = false;
2759     TCGOp *op, *op_next;
2760 
2761     /* Create a temporary for each indirect global.  */
2762     for (i = 0; i < nb_globals; ++i) {
2763         TCGTemp *its = &s->temps[i];
2764         if (its->indirect_reg) {
2765             TCGTemp *dts = tcg_temp_alloc(s);
2766             dts->type = its->type;
2767             dts->base_type = its->base_type;
2768             its->state_ptr = dts;
2769         } else {
2770             its->state_ptr = NULL;
2771         }
2772         /* All globals begin dead.  */
2773         its->state = TS_DEAD;
2774     }
2775     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2776         TCGTemp *its = &s->temps[i];
2777         its->state_ptr = NULL;
2778         its->state = TS_DEAD;
2779     }
2780 
2781     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2782         TCGOpcode opc = op->opc;
2783         const TCGOpDef *def = &tcg_op_defs[opc];
2784         TCGLifeData arg_life = op->life;
2785         int nb_iargs, nb_oargs, call_flags;
2786         TCGTemp *arg_ts, *dir_ts;
2787 
2788         if (opc == INDEX_op_call) {
2789             nb_oargs = TCGOP_CALLO(op);
2790             nb_iargs = TCGOP_CALLI(op);
2791             call_flags = tcg_call_flags(op);
2792         } else {
2793             nb_iargs = def->nb_iargs;
2794             nb_oargs = def->nb_oargs;
2795 
2796             /* Set flags similar to how calls require.  */
2797             if (def->flags & TCG_OPF_COND_BRANCH) {
2798                 /* Like reading globals: sync_globals */
2799                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2800             } else if (def->flags & TCG_OPF_BB_END) {
2801                 /* Like writing globals: save_globals */
2802                 call_flags = 0;
2803             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2804                 /* Like reading globals: sync_globals */
2805                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2806             } else {
2807                 /* No effect on globals.  */
2808                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2809                               TCG_CALL_NO_WRITE_GLOBALS);
2810             }
2811         }
2812 
2813         /* Make sure that input arguments are available.  */
2814         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2815             arg_ts = arg_temp(op->args[i]);
2816             if (arg_ts) {
2817                 dir_ts = arg_ts->state_ptr;
2818                 if (dir_ts && arg_ts->state == TS_DEAD) {
2819                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2820                                       ? INDEX_op_ld_i32
2821                                       : INDEX_op_ld_i64);
2822                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2823 
2824                     lop->args[0] = temp_arg(dir_ts);
2825                     lop->args[1] = temp_arg(arg_ts->mem_base);
2826                     lop->args[2] = arg_ts->mem_offset;
2827 
2828                     /* Loaded, but synced with memory.  */
2829                     arg_ts->state = TS_MEM;
2830                 }
2831             }
2832         }
2833 
2834         /* Perform input replacement, and mark inputs that became dead.
2835            No action is required except keeping temp_state up to date
2836            so that we reload when needed.  */
2837         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2838             arg_ts = arg_temp(op->args[i]);
2839             if (arg_ts) {
2840                 dir_ts = arg_ts->state_ptr;
2841                 if (dir_ts) {
2842                     op->args[i] = temp_arg(dir_ts);
2843                     changes = true;
2844                     if (IS_DEAD_ARG(i)) {
2845                         arg_ts->state = TS_DEAD;
2846                     }
2847                 }
2848             }
2849         }
2850 
2851         /* Liveness analysis should ensure that the following are
2852            all correct, for call sites and basic block end points.  */
2853         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2854             /* Nothing to do */
2855         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2856             for (i = 0; i < nb_globals; ++i) {
2857                 /* Liveness should see that globals are synced back,
2858                    that is, either TS_DEAD or TS_MEM.  */
2859                 arg_ts = &s->temps[i];
2860                 tcg_debug_assert(arg_ts->state_ptr == 0
2861                                  || arg_ts->state != 0);
2862             }
2863         } else {
2864             for (i = 0; i < nb_globals; ++i) {
2865                 /* Liveness should see that globals are saved back,
2866                    that is, TS_DEAD, waiting to be reloaded.  */
2867                 arg_ts = &s->temps[i];
2868                 tcg_debug_assert(arg_ts->state_ptr == 0
2869                                  || arg_ts->state == TS_DEAD);
2870             }
2871         }
2872 
2873         /* Outputs become available.  */
2874         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2875             arg_ts = arg_temp(op->args[0]);
2876             dir_ts = arg_ts->state_ptr;
2877             if (dir_ts) {
2878                 op->args[0] = temp_arg(dir_ts);
2879                 changes = true;
2880 
2881                 /* The output is now live and modified.  */
2882                 arg_ts->state = 0;
2883 
2884                 if (NEED_SYNC_ARG(0)) {
2885                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2886                                       ? INDEX_op_st_i32
2887                                       : INDEX_op_st_i64);
2888                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2889                     TCGTemp *out_ts = dir_ts;
2890 
2891                     if (IS_DEAD_ARG(0)) {
2892                         out_ts = arg_temp(op->args[1]);
2893                         arg_ts->state = TS_DEAD;
2894                         tcg_op_remove(s, op);
2895                     } else {
2896                         arg_ts->state = TS_MEM;
2897                     }
2898 
2899                     sop->args[0] = temp_arg(out_ts);
2900                     sop->args[1] = temp_arg(arg_ts->mem_base);
2901                     sop->args[2] = arg_ts->mem_offset;
2902                 } else {
2903                     tcg_debug_assert(!IS_DEAD_ARG(0));
2904                 }
2905             }
2906         } else {
2907             for (i = 0; i < nb_oargs; i++) {
2908                 arg_ts = arg_temp(op->args[i]);
2909                 dir_ts = arg_ts->state_ptr;
2910                 if (!dir_ts) {
2911                     continue;
2912                 }
2913                 op->args[i] = temp_arg(dir_ts);
2914                 changes = true;
2915 
2916                 /* The output is now live and modified.  */
2917                 arg_ts->state = 0;
2918 
2919                 /* Sync outputs upon their last write.  */
2920                 if (NEED_SYNC_ARG(i)) {
2921                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2922                                       ? INDEX_op_st_i32
2923                                       : INDEX_op_st_i64);
2924                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2925 
2926                     sop->args[0] = temp_arg(dir_ts);
2927                     sop->args[1] = temp_arg(arg_ts->mem_base);
2928                     sop->args[2] = arg_ts->mem_offset;
2929 
2930                     arg_ts->state = TS_MEM;
2931                 }
2932                 /* Drop outputs that are dead.  */
2933                 if (IS_DEAD_ARG(i)) {
2934                     arg_ts->state = TS_DEAD;
2935                 }
2936             }
2937         }
2938     }
2939 
2940     return changes;
2941 }
2942 
2943 #ifdef CONFIG_DEBUG_TCG
2944 static void dump_regs(TCGContext *s)
2945 {
2946     TCGTemp *ts;
2947     int i;
2948     char buf[64];
2949 
2950     for(i = 0; i < s->nb_temps; i++) {
2951         ts = &s->temps[i];
2952         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2953         switch(ts->val_type) {
2954         case TEMP_VAL_REG:
2955             printf("%s", tcg_target_reg_names[ts->reg]);
2956             break;
2957         case TEMP_VAL_MEM:
2958             printf("%d(%s)", (int)ts->mem_offset,
2959                    tcg_target_reg_names[ts->mem_base->reg]);
2960             break;
2961         case TEMP_VAL_CONST:
2962             printf("$0x%" PRIx64, ts->val);
2963             break;
2964         case TEMP_VAL_DEAD:
2965             printf("D");
2966             break;
2967         default:
2968             printf("???");
2969             break;
2970         }
2971         printf("\n");
2972     }
2973 
2974     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2975         if (s->reg_to_temp[i] != NULL) {
2976             printf("%s: %s\n",
2977                    tcg_target_reg_names[i],
2978                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2979         }
2980     }
2981 }
2982 
2983 static void check_regs(TCGContext *s)
2984 {
2985     int reg;
2986     int k;
2987     TCGTemp *ts;
2988     char buf[64];
2989 
2990     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2991         ts = s->reg_to_temp[reg];
2992         if (ts != NULL) {
2993             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2994                 printf("Inconsistency for register %s:\n",
2995                        tcg_target_reg_names[reg]);
2996                 goto fail;
2997             }
2998         }
2999     }
3000     for (k = 0; k < s->nb_temps; k++) {
3001         ts = &s->temps[k];
3002         if (ts->val_type == TEMP_VAL_REG
3003             && ts->kind != TEMP_FIXED
3004             && s->reg_to_temp[ts->reg] != ts) {
3005             printf("Inconsistency for temp %s:\n",
3006                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3007         fail:
3008             printf("reg state:\n");
3009             dump_regs(s);
3010             tcg_abort();
3011         }
3012     }
3013 }
3014 #endif
3015 
3016 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3017 {
3018     intptr_t off, size, align;
3019 
3020     switch (ts->type) {
3021     case TCG_TYPE_I32:
3022         size = align = 4;
3023         break;
3024     case TCG_TYPE_I64:
3025     case TCG_TYPE_V64:
3026         size = align = 8;
3027         break;
3028     case TCG_TYPE_V128:
3029         size = align = 16;
3030         break;
3031     case TCG_TYPE_V256:
3032         /* Note that we do not require aligned storage for V256. */
3033         size = 32, align = 16;
3034         break;
3035     default:
3036         g_assert_not_reached();
3037     }
3038 
3039     assert(align <= TCG_TARGET_STACK_ALIGN);
3040     off = ROUND_UP(s->current_frame_offset, align);
3041 
3042     /* If we've exhausted the stack frame, restart with a smaller TB. */
3043     if (off + size > s->frame_end) {
3044         tcg_raise_tb_overflow(s);
3045     }
3046     s->current_frame_offset = off + size;
3047 
3048     ts->mem_offset = off;
3049 #if defined(__sparc__)
3050     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3051 #endif
3052     ts->mem_base = s->frame_temp;
3053     ts->mem_allocated = 1;
3054 }
3055 
3056 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3057 
3058 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3059    mark it free; otherwise mark it dead.  */
3060 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3061 {
3062     TCGTempVal new_type;
3063 
3064     switch (ts->kind) {
3065     case TEMP_FIXED:
3066         return;
3067     case TEMP_GLOBAL:
3068     case TEMP_LOCAL:
3069         new_type = TEMP_VAL_MEM;
3070         break;
3071     case TEMP_NORMAL:
3072         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3073         break;
3074     case TEMP_CONST:
3075         new_type = TEMP_VAL_CONST;
3076         break;
3077     default:
3078         g_assert_not_reached();
3079     }
3080     if (ts->val_type == TEMP_VAL_REG) {
3081         s->reg_to_temp[ts->reg] = NULL;
3082     }
3083     ts->val_type = new_type;
3084 }
3085 
3086 /* Mark a temporary as dead.  */
3087 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3088 {
3089     temp_free_or_dead(s, ts, 1);
3090 }
3091 
3092 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3093    registers needs to be allocated to store a constant.  If 'free_or_dead'
3094    is non-zero, subsequently release the temporary; if it is positive, the
3095    temp is dead; if it is negative, the temp is free.  */
3096 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3097                       TCGRegSet preferred_regs, int free_or_dead)
3098 {
3099     if (!temp_readonly(ts) && !ts->mem_coherent) {
3100         if (!ts->mem_allocated) {
3101             temp_allocate_frame(s, ts);
3102         }
3103         switch (ts->val_type) {
3104         case TEMP_VAL_CONST:
3105             /* If we're going to free the temp immediately, then we won't
3106                require it later in a register, so attempt to store the
3107                constant to memory directly.  */
3108             if (free_or_dead
3109                 && tcg_out_sti(s, ts->type, ts->val,
3110                                ts->mem_base->reg, ts->mem_offset)) {
3111                 break;
3112             }
3113             temp_load(s, ts, tcg_target_available_regs[ts->type],
3114                       allocated_regs, preferred_regs);
3115             /* fallthrough */
3116 
3117         case TEMP_VAL_REG:
3118             tcg_out_st(s, ts->type, ts->reg,
3119                        ts->mem_base->reg, ts->mem_offset);
3120             break;
3121 
3122         case TEMP_VAL_MEM:
3123             break;
3124 
3125         case TEMP_VAL_DEAD:
3126         default:
3127             tcg_abort();
3128         }
3129         ts->mem_coherent = 1;
3130     }
3131     if (free_or_dead) {
3132         temp_free_or_dead(s, ts, free_or_dead);
3133     }
3134 }
3135 
3136 /* free register 'reg' by spilling the corresponding temporary if necessary */
3137 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3138 {
3139     TCGTemp *ts = s->reg_to_temp[reg];
3140     if (ts != NULL) {
3141         temp_sync(s, ts, allocated_regs, 0, -1);
3142     }
3143 }
3144 
3145 /**
3146  * tcg_reg_alloc:
3147  * @required_regs: Set of registers in which we must allocate.
3148  * @allocated_regs: Set of registers which must be avoided.
3149  * @preferred_regs: Set of registers we should prefer.
3150  * @rev: True if we search the registers in "indirect" order.
3151  *
3152  * The allocated register must be in @required_regs & ~@allocated_regs,
3153  * but if we can put it in @preferred_regs we may save a move later.
3154  */
3155 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3156                             TCGRegSet allocated_regs,
3157                             TCGRegSet preferred_regs, bool rev)
3158 {
3159     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3160     TCGRegSet reg_ct[2];
3161     const int *order;
3162 
3163     reg_ct[1] = required_regs & ~allocated_regs;
3164     tcg_debug_assert(reg_ct[1] != 0);
3165     reg_ct[0] = reg_ct[1] & preferred_regs;
3166 
3167     /* Skip the preferred_regs option if it cannot be satisfied,
3168        or if the preference made no difference.  */
3169     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3170 
3171     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3172 
3173     /* Try free registers, preferences first.  */
3174     for (j = f; j < 2; j++) {
3175         TCGRegSet set = reg_ct[j];
3176 
3177         if (tcg_regset_single(set)) {
3178             /* One register in the set.  */
3179             TCGReg reg = tcg_regset_first(set);
3180             if (s->reg_to_temp[reg] == NULL) {
3181                 return reg;
3182             }
3183         } else {
3184             for (i = 0; i < n; i++) {
3185                 TCGReg reg = order[i];
3186                 if (s->reg_to_temp[reg] == NULL &&
3187                     tcg_regset_test_reg(set, reg)) {
3188                     return reg;
3189                 }
3190             }
3191         }
3192     }
3193 
3194     /* We must spill something.  */
3195     for (j = f; j < 2; j++) {
3196         TCGRegSet set = reg_ct[j];
3197 
3198         if (tcg_regset_single(set)) {
3199             /* One register in the set.  */
3200             TCGReg reg = tcg_regset_first(set);
3201             tcg_reg_free(s, reg, allocated_regs);
3202             return reg;
3203         } else {
3204             for (i = 0; i < n; i++) {
3205                 TCGReg reg = order[i];
3206                 if (tcg_regset_test_reg(set, reg)) {
3207                     tcg_reg_free(s, reg, allocated_regs);
3208                     return reg;
3209                 }
3210             }
3211         }
3212     }
3213 
3214     tcg_abort();
3215 }
3216 
3217 /* Make sure the temporary is in a register.  If needed, allocate the register
3218    from DESIRED while avoiding ALLOCATED.  */
3219 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3220                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3221 {
3222     TCGReg reg;
3223 
3224     switch (ts->val_type) {
3225     case TEMP_VAL_REG:
3226         return;
3227     case TEMP_VAL_CONST:
3228         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3229                             preferred_regs, ts->indirect_base);
3230         if (ts->type <= TCG_TYPE_I64) {
3231             tcg_out_movi(s, ts->type, reg, ts->val);
3232         } else {
3233             uint64_t val = ts->val;
3234             MemOp vece = MO_64;
3235 
3236             /*
3237              * Find the minimal vector element that matches the constant.
3238              * The targets will, in general, have to do this search anyway,
3239              * do this generically.
3240              */
3241             if (val == dup_const(MO_8, val)) {
3242                 vece = MO_8;
3243             } else if (val == dup_const(MO_16, val)) {
3244                 vece = MO_16;
3245             } else if (val == dup_const(MO_32, val)) {
3246                 vece = MO_32;
3247             }
3248 
3249             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3250         }
3251         ts->mem_coherent = 0;
3252         break;
3253     case TEMP_VAL_MEM:
3254         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3255                             preferred_regs, ts->indirect_base);
3256         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3257         ts->mem_coherent = 1;
3258         break;
3259     case TEMP_VAL_DEAD:
3260     default:
3261         tcg_abort();
3262     }
3263     ts->reg = reg;
3264     ts->val_type = TEMP_VAL_REG;
3265     s->reg_to_temp[reg] = ts;
3266 }
3267 
3268 /* Save a temporary to memory. 'allocated_regs' is used in case a
3269    temporary registers needs to be allocated to store a constant.  */
3270 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3271 {
3272     /* The liveness analysis already ensures that globals are back
3273        in memory. Keep an tcg_debug_assert for safety. */
3274     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3275 }
3276 
3277 /* save globals to their canonical location and assume they can be
3278    modified be the following code. 'allocated_regs' is used in case a
3279    temporary registers needs to be allocated to store a constant. */
3280 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3281 {
3282     int i, n;
3283 
3284     for (i = 0, n = s->nb_globals; i < n; i++) {
3285         temp_save(s, &s->temps[i], allocated_regs);
3286     }
3287 }
3288 
3289 /* sync globals to their canonical location and assume they can be
3290    read by the following code. 'allocated_regs' is used in case a
3291    temporary registers needs to be allocated to store a constant. */
3292 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3293 {
3294     int i, n;
3295 
3296     for (i = 0, n = s->nb_globals; i < n; i++) {
3297         TCGTemp *ts = &s->temps[i];
3298         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3299                          || ts->kind == TEMP_FIXED
3300                          || ts->mem_coherent);
3301     }
3302 }
3303 
3304 /* at the end of a basic block, we assume all temporaries are dead and
3305    all globals are stored at their canonical location. */
3306 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3307 {
3308     int i;
3309 
3310     for (i = s->nb_globals; i < s->nb_temps; i++) {
3311         TCGTemp *ts = &s->temps[i];
3312 
3313         switch (ts->kind) {
3314         case TEMP_LOCAL:
3315             temp_save(s, ts, allocated_regs);
3316             break;
3317         case TEMP_NORMAL:
3318             /* The liveness analysis already ensures that temps are dead.
3319                Keep an tcg_debug_assert for safety. */
3320             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3321             break;
3322         case TEMP_CONST:
3323             /* Similarly, we should have freed any allocated register. */
3324             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3325             break;
3326         default:
3327             g_assert_not_reached();
3328         }
3329     }
3330 
3331     save_globals(s, allocated_regs);
3332 }
3333 
3334 /*
3335  * At a conditional branch, we assume all temporaries are dead and
3336  * all globals and local temps are synced to their location.
3337  */
3338 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3339 {
3340     sync_globals(s, allocated_regs);
3341 
3342     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3343         TCGTemp *ts = &s->temps[i];
3344         /*
3345          * The liveness analysis already ensures that temps are dead.
3346          * Keep tcg_debug_asserts for safety.
3347          */
3348         switch (ts->kind) {
3349         case TEMP_LOCAL:
3350             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3351             break;
3352         case TEMP_NORMAL:
3353             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3354             break;
3355         case TEMP_CONST:
3356             break;
3357         default:
3358             g_assert_not_reached();
3359         }
3360     }
3361 }
3362 
3363 /*
3364  * Specialized code generation for INDEX_op_mov_* with a constant.
3365  */
3366 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3367                                   tcg_target_ulong val, TCGLifeData arg_life,
3368                                   TCGRegSet preferred_regs)
3369 {
3370     /* ENV should not be modified.  */
3371     tcg_debug_assert(!temp_readonly(ots));
3372 
3373     /* The movi is not explicitly generated here.  */
3374     if (ots->val_type == TEMP_VAL_REG) {
3375         s->reg_to_temp[ots->reg] = NULL;
3376     }
3377     ots->val_type = TEMP_VAL_CONST;
3378     ots->val = val;
3379     ots->mem_coherent = 0;
3380     if (NEED_SYNC_ARG(0)) {
3381         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3382     } else if (IS_DEAD_ARG(0)) {
3383         temp_dead(s, ots);
3384     }
3385 }
3386 
3387 /*
3388  * Specialized code generation for INDEX_op_mov_*.
3389  */
3390 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3391 {
3392     const TCGLifeData arg_life = op->life;
3393     TCGRegSet allocated_regs, preferred_regs;
3394     TCGTemp *ts, *ots;
3395     TCGType otype, itype;
3396 
3397     allocated_regs = s->reserved_regs;
3398     preferred_regs = op->output_pref[0];
3399     ots = arg_temp(op->args[0]);
3400     ts = arg_temp(op->args[1]);
3401 
3402     /* ENV should not be modified.  */
3403     tcg_debug_assert(!temp_readonly(ots));
3404 
3405     /* Note that otype != itype for no-op truncation.  */
3406     otype = ots->type;
3407     itype = ts->type;
3408 
3409     if (ts->val_type == TEMP_VAL_CONST) {
3410         /* propagate constant or generate sti */
3411         tcg_target_ulong val = ts->val;
3412         if (IS_DEAD_ARG(1)) {
3413             temp_dead(s, ts);
3414         }
3415         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3416         return;
3417     }
3418 
3419     /* If the source value is in memory we're going to be forced
3420        to have it in a register in order to perform the copy.  Copy
3421        the SOURCE value into its own register first, that way we
3422        don't have to reload SOURCE the next time it is used. */
3423     if (ts->val_type == TEMP_VAL_MEM) {
3424         temp_load(s, ts, tcg_target_available_regs[itype],
3425                   allocated_regs, preferred_regs);
3426     }
3427 
3428     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3429     if (IS_DEAD_ARG(0)) {
3430         /* mov to a non-saved dead register makes no sense (even with
3431            liveness analysis disabled). */
3432         tcg_debug_assert(NEED_SYNC_ARG(0));
3433         if (!ots->mem_allocated) {
3434             temp_allocate_frame(s, ots);
3435         }
3436         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3437         if (IS_DEAD_ARG(1)) {
3438             temp_dead(s, ts);
3439         }
3440         temp_dead(s, ots);
3441     } else {
3442         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3443             /* the mov can be suppressed */
3444             if (ots->val_type == TEMP_VAL_REG) {
3445                 s->reg_to_temp[ots->reg] = NULL;
3446             }
3447             ots->reg = ts->reg;
3448             temp_dead(s, ts);
3449         } else {
3450             if (ots->val_type != TEMP_VAL_REG) {
3451                 /* When allocating a new register, make sure to not spill the
3452                    input one. */
3453                 tcg_regset_set_reg(allocated_regs, ts->reg);
3454                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3455                                          allocated_regs, preferred_regs,
3456                                          ots->indirect_base);
3457             }
3458             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3459                 /*
3460                  * Cross register class move not supported.
3461                  * Store the source register into the destination slot
3462                  * and leave the destination temp as TEMP_VAL_MEM.
3463                  */
3464                 assert(!temp_readonly(ots));
3465                 if (!ts->mem_allocated) {
3466                     temp_allocate_frame(s, ots);
3467                 }
3468                 tcg_out_st(s, ts->type, ts->reg,
3469                            ots->mem_base->reg, ots->mem_offset);
3470                 ots->mem_coherent = 1;
3471                 temp_free_or_dead(s, ots, -1);
3472                 return;
3473             }
3474         }
3475         ots->val_type = TEMP_VAL_REG;
3476         ots->mem_coherent = 0;
3477         s->reg_to_temp[ots->reg] = ots;
3478         if (NEED_SYNC_ARG(0)) {
3479             temp_sync(s, ots, allocated_regs, 0, 0);
3480         }
3481     }
3482 }
3483 
3484 /*
3485  * Specialized code generation for INDEX_op_dup_vec.
3486  */
3487 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3488 {
3489     const TCGLifeData arg_life = op->life;
3490     TCGRegSet dup_out_regs, dup_in_regs;
3491     TCGTemp *its, *ots;
3492     TCGType itype, vtype;
3493     intptr_t endian_fixup;
3494     unsigned vece;
3495     bool ok;
3496 
3497     ots = arg_temp(op->args[0]);
3498     its = arg_temp(op->args[1]);
3499 
3500     /* ENV should not be modified.  */
3501     tcg_debug_assert(!temp_readonly(ots));
3502 
3503     itype = its->type;
3504     vece = TCGOP_VECE(op);
3505     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3506 
3507     if (its->val_type == TEMP_VAL_CONST) {
3508         /* Propagate constant via movi -> dupi.  */
3509         tcg_target_ulong val = its->val;
3510         if (IS_DEAD_ARG(1)) {
3511             temp_dead(s, its);
3512         }
3513         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3514         return;
3515     }
3516 
3517     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3518     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3519 
3520     /* Allocate the output register now.  */
3521     if (ots->val_type != TEMP_VAL_REG) {
3522         TCGRegSet allocated_regs = s->reserved_regs;
3523 
3524         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3525             /* Make sure to not spill the input register. */
3526             tcg_regset_set_reg(allocated_regs, its->reg);
3527         }
3528         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3529                                  op->output_pref[0], ots->indirect_base);
3530         ots->val_type = TEMP_VAL_REG;
3531         ots->mem_coherent = 0;
3532         s->reg_to_temp[ots->reg] = ots;
3533     }
3534 
3535     switch (its->val_type) {
3536     case TEMP_VAL_REG:
3537         /*
3538          * The dup constriaints must be broad, covering all possible VECE.
3539          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3540          * to fail, indicating that extra moves are required for that case.
3541          */
3542         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3543             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3544                 goto done;
3545             }
3546             /* Try again from memory or a vector input register.  */
3547         }
3548         if (!its->mem_coherent) {
3549             /*
3550              * The input register is not synced, and so an extra store
3551              * would be required to use memory.  Attempt an integer-vector
3552              * register move first.  We do not have a TCGRegSet for this.
3553              */
3554             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3555                 break;
3556             }
3557             /* Sync the temp back to its slot and load from there.  */
3558             temp_sync(s, its, s->reserved_regs, 0, 0);
3559         }
3560         /* fall through */
3561 
3562     case TEMP_VAL_MEM:
3563 #ifdef HOST_WORDS_BIGENDIAN
3564         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3565         endian_fixup -= 1 << vece;
3566 #else
3567         endian_fixup = 0;
3568 #endif
3569         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3570                              its->mem_offset + endian_fixup)) {
3571             goto done;
3572         }
3573         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3574         break;
3575 
3576     default:
3577         g_assert_not_reached();
3578     }
3579 
3580     /* We now have a vector input register, so dup must succeed. */
3581     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3582     tcg_debug_assert(ok);
3583 
3584  done:
3585     if (IS_DEAD_ARG(1)) {
3586         temp_dead(s, its);
3587     }
3588     if (NEED_SYNC_ARG(0)) {
3589         temp_sync(s, ots, s->reserved_regs, 0, 0);
3590     }
3591     if (IS_DEAD_ARG(0)) {
3592         temp_dead(s, ots);
3593     }
3594 }
3595 
3596 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3597 {
3598     const TCGLifeData arg_life = op->life;
3599     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3600     TCGRegSet i_allocated_regs;
3601     TCGRegSet o_allocated_regs;
3602     int i, k, nb_iargs, nb_oargs;
3603     TCGReg reg;
3604     TCGArg arg;
3605     const TCGArgConstraint *arg_ct;
3606     TCGTemp *ts;
3607     TCGArg new_args[TCG_MAX_OP_ARGS];
3608     int const_args[TCG_MAX_OP_ARGS];
3609 
3610     nb_oargs = def->nb_oargs;
3611     nb_iargs = def->nb_iargs;
3612 
3613     /* copy constants */
3614     memcpy(new_args + nb_oargs + nb_iargs,
3615            op->args + nb_oargs + nb_iargs,
3616            sizeof(TCGArg) * def->nb_cargs);
3617 
3618     i_allocated_regs = s->reserved_regs;
3619     o_allocated_regs = s->reserved_regs;
3620 
3621     /* satisfy input constraints */
3622     for (k = 0; k < nb_iargs; k++) {
3623         TCGRegSet i_preferred_regs, o_preferred_regs;
3624 
3625         i = def->args_ct[nb_oargs + k].sort_index;
3626         arg = op->args[i];
3627         arg_ct = &def->args_ct[i];
3628         ts = arg_temp(arg);
3629 
3630         if (ts->val_type == TEMP_VAL_CONST
3631             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3632             /* constant is OK for instruction */
3633             const_args[i] = 1;
3634             new_args[i] = ts->val;
3635             continue;
3636         }
3637 
3638         i_preferred_regs = o_preferred_regs = 0;
3639         if (arg_ct->ialias) {
3640             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3641 
3642             /*
3643              * If the input is readonly, then it cannot also be an
3644              * output and aliased to itself.  If the input is not
3645              * dead after the instruction, we must allocate a new
3646              * register and move it.
3647              */
3648             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3649                 goto allocate_in_reg;
3650             }
3651 
3652             /*
3653              * Check if the current register has already been allocated
3654              * for another input aliased to an output.
3655              */
3656             if (ts->val_type == TEMP_VAL_REG) {
3657                 reg = ts->reg;
3658                 for (int k2 = 0; k2 < k; k2++) {
3659                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3660                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3661                         goto allocate_in_reg;
3662                     }
3663                 }
3664             }
3665             i_preferred_regs = o_preferred_regs;
3666         }
3667 
3668         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3669         reg = ts->reg;
3670 
3671         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3672  allocate_in_reg:
3673             /*
3674              * Allocate a new register matching the constraint
3675              * and move the temporary register into it.
3676              */
3677             temp_load(s, ts, tcg_target_available_regs[ts->type],
3678                       i_allocated_regs, 0);
3679             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3680                                 o_preferred_regs, ts->indirect_base);
3681             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3682                 /*
3683                  * Cross register class move not supported.  Sync the
3684                  * temp back to its slot and load from there.
3685                  */
3686                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3687                 tcg_out_ld(s, ts->type, reg,
3688                            ts->mem_base->reg, ts->mem_offset);
3689             }
3690         }
3691         new_args[i] = reg;
3692         const_args[i] = 0;
3693         tcg_regset_set_reg(i_allocated_regs, reg);
3694     }
3695 
3696     /* mark dead temporaries and free the associated registers */
3697     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3698         if (IS_DEAD_ARG(i)) {
3699             temp_dead(s, arg_temp(op->args[i]));
3700         }
3701     }
3702 
3703     if (def->flags & TCG_OPF_COND_BRANCH) {
3704         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3705     } else if (def->flags & TCG_OPF_BB_END) {
3706         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3707     } else {
3708         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3709             /* XXX: permit generic clobber register list ? */
3710             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3711                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3712                     tcg_reg_free(s, i, i_allocated_regs);
3713                 }
3714             }
3715         }
3716         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3717             /* sync globals if the op has side effects and might trigger
3718                an exception. */
3719             sync_globals(s, i_allocated_regs);
3720         }
3721 
3722         /* satisfy the output constraints */
3723         for(k = 0; k < nb_oargs; k++) {
3724             i = def->args_ct[k].sort_index;
3725             arg = op->args[i];
3726             arg_ct = &def->args_ct[i];
3727             ts = arg_temp(arg);
3728 
3729             /* ENV should not be modified.  */
3730             tcg_debug_assert(!temp_readonly(ts));
3731 
3732             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3733                 reg = new_args[arg_ct->alias_index];
3734             } else if (arg_ct->newreg) {
3735                 reg = tcg_reg_alloc(s, arg_ct->regs,
3736                                     i_allocated_regs | o_allocated_regs,
3737                                     op->output_pref[k], ts->indirect_base);
3738             } else {
3739                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3740                                     op->output_pref[k], ts->indirect_base);
3741             }
3742             tcg_regset_set_reg(o_allocated_regs, reg);
3743             if (ts->val_type == TEMP_VAL_REG) {
3744                 s->reg_to_temp[ts->reg] = NULL;
3745             }
3746             ts->val_type = TEMP_VAL_REG;
3747             ts->reg = reg;
3748             /*
3749              * Temp value is modified, so the value kept in memory is
3750              * potentially not the same.
3751              */
3752             ts->mem_coherent = 0;
3753             s->reg_to_temp[reg] = ts;
3754             new_args[i] = reg;
3755         }
3756     }
3757 
3758     /* emit instruction */
3759     if (def->flags & TCG_OPF_VECTOR) {
3760         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3761                        new_args, const_args);
3762     } else {
3763         tcg_out_op(s, op->opc, new_args, const_args);
3764     }
3765 
3766     /* move the outputs in the correct register if needed */
3767     for(i = 0; i < nb_oargs; i++) {
3768         ts = arg_temp(op->args[i]);
3769 
3770         /* ENV should not be modified.  */
3771         tcg_debug_assert(!temp_readonly(ts));
3772 
3773         if (NEED_SYNC_ARG(i)) {
3774             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3775         } else if (IS_DEAD_ARG(i)) {
3776             temp_dead(s, ts);
3777         }
3778     }
3779 }
3780 
3781 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3782 {
3783     const TCGLifeData arg_life = op->life;
3784     TCGTemp *ots, *itsl, *itsh;
3785     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3786 
3787     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3788     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3789     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3790 
3791     ots = arg_temp(op->args[0]);
3792     itsl = arg_temp(op->args[1]);
3793     itsh = arg_temp(op->args[2]);
3794 
3795     /* ENV should not be modified.  */
3796     tcg_debug_assert(!temp_readonly(ots));
3797 
3798     /* Allocate the output register now.  */
3799     if (ots->val_type != TEMP_VAL_REG) {
3800         TCGRegSet allocated_regs = s->reserved_regs;
3801         TCGRegSet dup_out_regs =
3802             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3803 
3804         /* Make sure to not spill the input registers. */
3805         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3806             tcg_regset_set_reg(allocated_regs, itsl->reg);
3807         }
3808         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3809             tcg_regset_set_reg(allocated_regs, itsh->reg);
3810         }
3811 
3812         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3813                                  op->output_pref[0], ots->indirect_base);
3814         ots->val_type = TEMP_VAL_REG;
3815         ots->mem_coherent = 0;
3816         s->reg_to_temp[ots->reg] = ots;
3817     }
3818 
3819     /* Promote dup2 of immediates to dupi_vec. */
3820     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3821         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3822         MemOp vece = MO_64;
3823 
3824         if (val == dup_const(MO_8, val)) {
3825             vece = MO_8;
3826         } else if (val == dup_const(MO_16, val)) {
3827             vece = MO_16;
3828         } else if (val == dup_const(MO_32, val)) {
3829             vece = MO_32;
3830         }
3831 
3832         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3833         goto done;
3834     }
3835 
3836     /* If the two inputs form one 64-bit value, try dupm_vec. */
3837     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3838         if (!itsl->mem_coherent) {
3839             temp_sync(s, itsl, s->reserved_regs, 0, 0);
3840         }
3841         if (!itsh->mem_coherent) {
3842             temp_sync(s, itsh, s->reserved_regs, 0, 0);
3843         }
3844 #ifdef HOST_WORDS_BIGENDIAN
3845         TCGTemp *its = itsh;
3846 #else
3847         TCGTemp *its = itsl;
3848 #endif
3849         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3850                              its->mem_base->reg, its->mem_offset)) {
3851             goto done;
3852         }
3853     }
3854 
3855     /* Fall back to generic expansion. */
3856     return false;
3857 
3858  done:
3859     if (IS_DEAD_ARG(1)) {
3860         temp_dead(s, itsl);
3861     }
3862     if (IS_DEAD_ARG(2)) {
3863         temp_dead(s, itsh);
3864     }
3865     if (NEED_SYNC_ARG(0)) {
3866         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3867     } else if (IS_DEAD_ARG(0)) {
3868         temp_dead(s, ots);
3869     }
3870     return true;
3871 }
3872 
3873 #ifdef TCG_TARGET_STACK_GROWSUP
3874 #define STACK_DIR(x) (-(x))
3875 #else
3876 #define STACK_DIR(x) (x)
3877 #endif
3878 
3879 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3880 {
3881     const int nb_oargs = TCGOP_CALLO(op);
3882     const int nb_iargs = TCGOP_CALLI(op);
3883     const TCGLifeData arg_life = op->life;
3884     const TCGHelperInfo *info;
3885     int flags, nb_regs, i;
3886     TCGReg reg;
3887     TCGArg arg;
3888     TCGTemp *ts;
3889     intptr_t stack_offset;
3890     size_t call_stack_size;
3891     tcg_insn_unit *func_addr;
3892     int allocate_args;
3893     TCGRegSet allocated_regs;
3894 
3895     func_addr = tcg_call_func(op);
3896     info = tcg_call_info(op);
3897     flags = info->flags;
3898 
3899     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3900     if (nb_regs > nb_iargs) {
3901         nb_regs = nb_iargs;
3902     }
3903 
3904     /* assign stack slots first */
3905     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3906     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3907         ~(TCG_TARGET_STACK_ALIGN - 1);
3908     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3909     if (allocate_args) {
3910         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3911            preallocate call stack */
3912         tcg_abort();
3913     }
3914 
3915     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3916     for (i = nb_regs; i < nb_iargs; i++) {
3917         arg = op->args[nb_oargs + i];
3918 #ifdef TCG_TARGET_STACK_GROWSUP
3919         stack_offset -= sizeof(tcg_target_long);
3920 #endif
3921         if (arg != TCG_CALL_DUMMY_ARG) {
3922             ts = arg_temp(arg);
3923             temp_load(s, ts, tcg_target_available_regs[ts->type],
3924                       s->reserved_regs, 0);
3925             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3926         }
3927 #ifndef TCG_TARGET_STACK_GROWSUP
3928         stack_offset += sizeof(tcg_target_long);
3929 #endif
3930     }
3931 
3932     /* assign input registers */
3933     allocated_regs = s->reserved_regs;
3934     for (i = 0; i < nb_regs; i++) {
3935         arg = op->args[nb_oargs + i];
3936         if (arg != TCG_CALL_DUMMY_ARG) {
3937             ts = arg_temp(arg);
3938             reg = tcg_target_call_iarg_regs[i];
3939 
3940             if (ts->val_type == TEMP_VAL_REG) {
3941                 if (ts->reg != reg) {
3942                     tcg_reg_free(s, reg, allocated_regs);
3943                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3944                         /*
3945                          * Cross register class move not supported.  Sync the
3946                          * temp back to its slot and load from there.
3947                          */
3948                         temp_sync(s, ts, allocated_regs, 0, 0);
3949                         tcg_out_ld(s, ts->type, reg,
3950                                    ts->mem_base->reg, ts->mem_offset);
3951                     }
3952                 }
3953             } else {
3954                 TCGRegSet arg_set = 0;
3955 
3956                 tcg_reg_free(s, reg, allocated_regs);
3957                 tcg_regset_set_reg(arg_set, reg);
3958                 temp_load(s, ts, arg_set, allocated_regs, 0);
3959             }
3960 
3961             tcg_regset_set_reg(allocated_regs, reg);
3962         }
3963     }
3964 
3965     /* mark dead temporaries and free the associated registers */
3966     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3967         if (IS_DEAD_ARG(i)) {
3968             temp_dead(s, arg_temp(op->args[i]));
3969         }
3970     }
3971 
3972     /* clobber call registers */
3973     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3974         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3975             tcg_reg_free(s, i, allocated_regs);
3976         }
3977     }
3978 
3979     /* Save globals if they might be written by the helper, sync them if
3980        they might be read. */
3981     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3982         /* Nothing to do */
3983     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3984         sync_globals(s, allocated_regs);
3985     } else {
3986         save_globals(s, allocated_regs);
3987     }
3988 
3989 #ifdef CONFIG_TCG_INTERPRETER
3990     {
3991         gpointer hash = (gpointer)(uintptr_t)info->typemask;
3992         ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
3993         assert(cif != NULL);
3994         tcg_out_call(s, func_addr, cif);
3995     }
3996 #else
3997     tcg_out_call(s, func_addr);
3998 #endif
3999 
4000     /* assign output registers and emit moves if needed */
4001     for(i = 0; i < nb_oargs; i++) {
4002         arg = op->args[i];
4003         ts = arg_temp(arg);
4004 
4005         /* ENV should not be modified.  */
4006         tcg_debug_assert(!temp_readonly(ts));
4007 
4008         reg = tcg_target_call_oarg_regs[i];
4009         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4010         if (ts->val_type == TEMP_VAL_REG) {
4011             s->reg_to_temp[ts->reg] = NULL;
4012         }
4013         ts->val_type = TEMP_VAL_REG;
4014         ts->reg = reg;
4015         ts->mem_coherent = 0;
4016         s->reg_to_temp[reg] = ts;
4017         if (NEED_SYNC_ARG(i)) {
4018             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4019         } else if (IS_DEAD_ARG(i)) {
4020             temp_dead(s, ts);
4021         }
4022     }
4023 }
4024 
4025 #ifdef CONFIG_PROFILER
4026 
4027 /* avoid copy/paste errors */
4028 #define PROF_ADD(to, from, field)                       \
4029     do {                                                \
4030         (to)->field += qatomic_read(&((from)->field));  \
4031     } while (0)
4032 
4033 #define PROF_MAX(to, from, field)                                       \
4034     do {                                                                \
4035         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4036         if (val__ > (to)->field) {                                      \
4037             (to)->field = val__;                                        \
4038         }                                                               \
4039     } while (0)
4040 
4041 /* Pass in a zero'ed @prof */
4042 static inline
4043 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4044 {
4045     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4046     unsigned int i;
4047 
4048     for (i = 0; i < n_ctxs; i++) {
4049         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4050         const TCGProfile *orig = &s->prof;
4051 
4052         if (counters) {
4053             PROF_ADD(prof, orig, cpu_exec_time);
4054             PROF_ADD(prof, orig, tb_count1);
4055             PROF_ADD(prof, orig, tb_count);
4056             PROF_ADD(prof, orig, op_count);
4057             PROF_MAX(prof, orig, op_count_max);
4058             PROF_ADD(prof, orig, temp_count);
4059             PROF_MAX(prof, orig, temp_count_max);
4060             PROF_ADD(prof, orig, del_op_count);
4061             PROF_ADD(prof, orig, code_in_len);
4062             PROF_ADD(prof, orig, code_out_len);
4063             PROF_ADD(prof, orig, search_out_len);
4064             PROF_ADD(prof, orig, interm_time);
4065             PROF_ADD(prof, orig, code_time);
4066             PROF_ADD(prof, orig, la_time);
4067             PROF_ADD(prof, orig, opt_time);
4068             PROF_ADD(prof, orig, restore_count);
4069             PROF_ADD(prof, orig, restore_time);
4070         }
4071         if (table) {
4072             int i;
4073 
4074             for (i = 0; i < NB_OPS; i++) {
4075                 PROF_ADD(prof, orig, table_op_count[i]);
4076             }
4077         }
4078     }
4079 }
4080 
4081 #undef PROF_ADD
4082 #undef PROF_MAX
4083 
4084 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4085 {
4086     tcg_profile_snapshot(prof, true, false);
4087 }
4088 
4089 static void tcg_profile_snapshot_table(TCGProfile *prof)
4090 {
4091     tcg_profile_snapshot(prof, false, true);
4092 }
4093 
4094 void tcg_dump_op_count(void)
4095 {
4096     TCGProfile prof = {};
4097     int i;
4098 
4099     tcg_profile_snapshot_table(&prof);
4100     for (i = 0; i < NB_OPS; i++) {
4101         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4102                     prof.table_op_count[i]);
4103     }
4104 }
4105 
4106 int64_t tcg_cpu_exec_time(void)
4107 {
4108     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4109     unsigned int i;
4110     int64_t ret = 0;
4111 
4112     for (i = 0; i < n_ctxs; i++) {
4113         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4114         const TCGProfile *prof = &s->prof;
4115 
4116         ret += qatomic_read(&prof->cpu_exec_time);
4117     }
4118     return ret;
4119 }
4120 #else
4121 void tcg_dump_op_count(void)
4122 {
4123     qemu_printf("[TCG profiler not compiled]\n");
4124 }
4125 
4126 int64_t tcg_cpu_exec_time(void)
4127 {
4128     error_report("%s: TCG profiler not compiled", __func__);
4129     exit(EXIT_FAILURE);
4130 }
4131 #endif
4132 
4133 
4134 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4135 {
4136 #ifdef CONFIG_PROFILER
4137     TCGProfile *prof = &s->prof;
4138 #endif
4139     int i, num_insns;
4140     TCGOp *op;
4141 
4142 #ifdef CONFIG_PROFILER
4143     {
4144         int n = 0;
4145 
4146         QTAILQ_FOREACH(op, &s->ops, link) {
4147             n++;
4148         }
4149         qatomic_set(&prof->op_count, prof->op_count + n);
4150         if (n > prof->op_count_max) {
4151             qatomic_set(&prof->op_count_max, n);
4152         }
4153 
4154         n = s->nb_temps;
4155         qatomic_set(&prof->temp_count, prof->temp_count + n);
4156         if (n > prof->temp_count_max) {
4157             qatomic_set(&prof->temp_count_max, n);
4158         }
4159     }
4160 #endif
4161 
4162 #ifdef DEBUG_DISAS
4163     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4164                  && qemu_log_in_addr_range(tb->pc))) {
4165         FILE *logfile = qemu_log_lock();
4166         qemu_log("OP:\n");
4167         tcg_dump_ops(s, false);
4168         qemu_log("\n");
4169         qemu_log_unlock(logfile);
4170     }
4171 #endif
4172 
4173 #ifdef CONFIG_DEBUG_TCG
4174     /* Ensure all labels referenced have been emitted.  */
4175     {
4176         TCGLabel *l;
4177         bool error = false;
4178 
4179         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4180             if (unlikely(!l->present) && l->refs) {
4181                 qemu_log_mask(CPU_LOG_TB_OP,
4182                               "$L%d referenced but not present.\n", l->id);
4183                 error = true;
4184             }
4185         }
4186         assert(!error);
4187     }
4188 #endif
4189 
4190 #ifdef CONFIG_PROFILER
4191     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4192 #endif
4193 
4194 #ifdef USE_TCG_OPTIMIZATIONS
4195     tcg_optimize(s);
4196 #endif
4197 
4198 #ifdef CONFIG_PROFILER
4199     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4200     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4201 #endif
4202 
4203     reachable_code_pass(s);
4204     liveness_pass_1(s);
4205 
4206     if (s->nb_indirects > 0) {
4207 #ifdef DEBUG_DISAS
4208         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4209                      && qemu_log_in_addr_range(tb->pc))) {
4210             FILE *logfile = qemu_log_lock();
4211             qemu_log("OP before indirect lowering:\n");
4212             tcg_dump_ops(s, false);
4213             qemu_log("\n");
4214             qemu_log_unlock(logfile);
4215         }
4216 #endif
4217         /* Replace indirect temps with direct temps.  */
4218         if (liveness_pass_2(s)) {
4219             /* If changes were made, re-run liveness.  */
4220             liveness_pass_1(s);
4221         }
4222     }
4223 
4224 #ifdef CONFIG_PROFILER
4225     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4226 #endif
4227 
4228 #ifdef DEBUG_DISAS
4229     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4230                  && qemu_log_in_addr_range(tb->pc))) {
4231         FILE *logfile = qemu_log_lock();
4232         qemu_log("OP after optimization and liveness analysis:\n");
4233         tcg_dump_ops(s, true);
4234         qemu_log("\n");
4235         qemu_log_unlock(logfile);
4236     }
4237 #endif
4238 
4239     tcg_reg_alloc_start(s);
4240 
4241     /*
4242      * Reset the buffer pointers when restarting after overflow.
4243      * TODO: Move this into translate-all.c with the rest of the
4244      * buffer management.  Having only this done here is confusing.
4245      */
4246     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4247     s->code_ptr = s->code_buf;
4248 
4249 #ifdef TCG_TARGET_NEED_LDST_LABELS
4250     QSIMPLEQ_INIT(&s->ldst_labels);
4251 #endif
4252 #ifdef TCG_TARGET_NEED_POOL_LABELS
4253     s->pool_labels = NULL;
4254 #endif
4255 
4256     num_insns = -1;
4257     QTAILQ_FOREACH(op, &s->ops, link) {
4258         TCGOpcode opc = op->opc;
4259 
4260 #ifdef CONFIG_PROFILER
4261         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4262 #endif
4263 
4264         switch (opc) {
4265         case INDEX_op_mov_i32:
4266         case INDEX_op_mov_i64:
4267         case INDEX_op_mov_vec:
4268             tcg_reg_alloc_mov(s, op);
4269             break;
4270         case INDEX_op_dup_vec:
4271             tcg_reg_alloc_dup(s, op);
4272             break;
4273         case INDEX_op_insn_start:
4274             if (num_insns >= 0) {
4275                 size_t off = tcg_current_code_size(s);
4276                 s->gen_insn_end_off[num_insns] = off;
4277                 /* Assert that we do not overflow our stored offset.  */
4278                 assert(s->gen_insn_end_off[num_insns] == off);
4279             }
4280             num_insns++;
4281             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4282                 target_ulong a;
4283 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4284                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4285 #else
4286                 a = op->args[i];
4287 #endif
4288                 s->gen_insn_data[num_insns][i] = a;
4289             }
4290             break;
4291         case INDEX_op_discard:
4292             temp_dead(s, arg_temp(op->args[0]));
4293             break;
4294         case INDEX_op_set_label:
4295             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4296             tcg_out_label(s, arg_label(op->args[0]));
4297             break;
4298         case INDEX_op_call:
4299             tcg_reg_alloc_call(s, op);
4300             break;
4301         case INDEX_op_dup2_vec:
4302             if (tcg_reg_alloc_dup2(s, op)) {
4303                 break;
4304             }
4305             /* fall through */
4306         default:
4307             /* Sanity check that we've not introduced any unhandled opcodes. */
4308             tcg_debug_assert(tcg_op_supported(opc));
4309             /* Note: in order to speed up the code, it would be much
4310                faster to have specialized register allocator functions for
4311                some common argument patterns */
4312             tcg_reg_alloc_op(s, op);
4313             break;
4314         }
4315 #ifdef CONFIG_DEBUG_TCG
4316         check_regs(s);
4317 #endif
4318         /* Test for (pending) buffer overflow.  The assumption is that any
4319            one operation beginning below the high water mark cannot overrun
4320            the buffer completely.  Thus we can test for overflow after
4321            generating code without having to check during generation.  */
4322         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4323             return -1;
4324         }
4325         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4326         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4327             return -2;
4328         }
4329     }
4330     tcg_debug_assert(num_insns >= 0);
4331     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4332 
4333     /* Generate TB finalization at the end of block */
4334 #ifdef TCG_TARGET_NEED_LDST_LABELS
4335     i = tcg_out_ldst_finalize(s);
4336     if (i < 0) {
4337         return i;
4338     }
4339 #endif
4340 #ifdef TCG_TARGET_NEED_POOL_LABELS
4341     i = tcg_out_pool_finalize(s);
4342     if (i < 0) {
4343         return i;
4344     }
4345 #endif
4346     if (!tcg_resolve_relocs(s)) {
4347         return -2;
4348     }
4349 
4350 #ifndef CONFIG_TCG_INTERPRETER
4351     /* flush instruction cache */
4352     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4353                         (uintptr_t)s->code_buf,
4354                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4355 #endif
4356 
4357     return tcg_current_code_size(s);
4358 }
4359 
4360 #ifdef CONFIG_PROFILER
4361 void tcg_dump_info(void)
4362 {
4363     TCGProfile prof = {};
4364     const TCGProfile *s;
4365     int64_t tb_count;
4366     int64_t tb_div_count;
4367     int64_t tot;
4368 
4369     tcg_profile_snapshot_counters(&prof);
4370     s = &prof;
4371     tb_count = s->tb_count;
4372     tb_div_count = tb_count ? tb_count : 1;
4373     tot = s->interm_time + s->code_time;
4374 
4375     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4376                 tot, tot / 2.4e9);
4377     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4378                 " %0.1f%%)\n",
4379                 tb_count, s->tb_count1 - tb_count,
4380                 (double)(s->tb_count1 - s->tb_count)
4381                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4382     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4383                 (double)s->op_count / tb_div_count, s->op_count_max);
4384     qemu_printf("deleted ops/TB      %0.2f\n",
4385                 (double)s->del_op_count / tb_div_count);
4386     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4387                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4388     qemu_printf("avg host code/TB    %0.1f\n",
4389                 (double)s->code_out_len / tb_div_count);
4390     qemu_printf("avg search data/TB  %0.1f\n",
4391                 (double)s->search_out_len / tb_div_count);
4392 
4393     qemu_printf("cycles/op           %0.1f\n",
4394                 s->op_count ? (double)tot / s->op_count : 0);
4395     qemu_printf("cycles/in byte      %0.1f\n",
4396                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4397     qemu_printf("cycles/out byte     %0.1f\n",
4398                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4399     qemu_printf("cycles/search byte     %0.1f\n",
4400                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4401     if (tot == 0) {
4402         tot = 1;
4403     }
4404     qemu_printf("  gen_interm time   %0.1f%%\n",
4405                 (double)s->interm_time / tot * 100.0);
4406     qemu_printf("  gen_code time     %0.1f%%\n",
4407                 (double)s->code_time / tot * 100.0);
4408     qemu_printf("optim./code time    %0.1f%%\n",
4409                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4410                 * 100.0);
4411     qemu_printf("liveness/code time  %0.1f%%\n",
4412                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4413     qemu_printf("cpu_restore count   %" PRId64 "\n",
4414                 s->restore_count);
4415     qemu_printf("  avg cycles        %0.1f\n",
4416                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4417 }
4418 #else
4419 void tcg_dump_info(void)
4420 {
4421     qemu_printf("[TCG profiler not compiled]\n");
4422 }
4423 #endif
4424 
4425 #ifdef ELF_HOST_MACHINE
4426 /* In order to use this feature, the backend needs to do three things:
4427 
4428    (1) Define ELF_HOST_MACHINE to indicate both what value to
4429        put into the ELF image and to indicate support for the feature.
4430 
4431    (2) Define tcg_register_jit.  This should create a buffer containing
4432        the contents of a .debug_frame section that describes the post-
4433        prologue unwind info for the tcg machine.
4434 
4435    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4436 */
4437 
4438 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4439 typedef enum {
4440     JIT_NOACTION = 0,
4441     JIT_REGISTER_FN,
4442     JIT_UNREGISTER_FN
4443 } jit_actions_t;
4444 
4445 struct jit_code_entry {
4446     struct jit_code_entry *next_entry;
4447     struct jit_code_entry *prev_entry;
4448     const void *symfile_addr;
4449     uint64_t symfile_size;
4450 };
4451 
4452 struct jit_descriptor {
4453     uint32_t version;
4454     uint32_t action_flag;
4455     struct jit_code_entry *relevant_entry;
4456     struct jit_code_entry *first_entry;
4457 };
4458 
4459 void __jit_debug_register_code(void) __attribute__((noinline));
4460 void __jit_debug_register_code(void)
4461 {
4462     asm("");
4463 }
4464 
4465 /* Must statically initialize the version, because GDB may check
4466    the version before we can set it.  */
4467 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4468 
4469 /* End GDB interface.  */
4470 
4471 static int find_string(const char *strtab, const char *str)
4472 {
4473     const char *p = strtab + 1;
4474 
4475     while (1) {
4476         if (strcmp(p, str) == 0) {
4477             return p - strtab;
4478         }
4479         p += strlen(p) + 1;
4480     }
4481 }
4482 
4483 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4484                                  const void *debug_frame,
4485                                  size_t debug_frame_size)
4486 {
4487     struct __attribute__((packed)) DebugInfo {
4488         uint32_t  len;
4489         uint16_t  version;
4490         uint32_t  abbrev;
4491         uint8_t   ptr_size;
4492         uint8_t   cu_die;
4493         uint16_t  cu_lang;
4494         uintptr_t cu_low_pc;
4495         uintptr_t cu_high_pc;
4496         uint8_t   fn_die;
4497         char      fn_name[16];
4498         uintptr_t fn_low_pc;
4499         uintptr_t fn_high_pc;
4500         uint8_t   cu_eoc;
4501     };
4502 
4503     struct ElfImage {
4504         ElfW(Ehdr) ehdr;
4505         ElfW(Phdr) phdr;
4506         ElfW(Shdr) shdr[7];
4507         ElfW(Sym)  sym[2];
4508         struct DebugInfo di;
4509         uint8_t    da[24];
4510         char       str[80];
4511     };
4512 
4513     struct ElfImage *img;
4514 
4515     static const struct ElfImage img_template = {
4516         .ehdr = {
4517             .e_ident[EI_MAG0] = ELFMAG0,
4518             .e_ident[EI_MAG1] = ELFMAG1,
4519             .e_ident[EI_MAG2] = ELFMAG2,
4520             .e_ident[EI_MAG3] = ELFMAG3,
4521             .e_ident[EI_CLASS] = ELF_CLASS,
4522             .e_ident[EI_DATA] = ELF_DATA,
4523             .e_ident[EI_VERSION] = EV_CURRENT,
4524             .e_type = ET_EXEC,
4525             .e_machine = ELF_HOST_MACHINE,
4526             .e_version = EV_CURRENT,
4527             .e_phoff = offsetof(struct ElfImage, phdr),
4528             .e_shoff = offsetof(struct ElfImage, shdr),
4529             .e_ehsize = sizeof(ElfW(Shdr)),
4530             .e_phentsize = sizeof(ElfW(Phdr)),
4531             .e_phnum = 1,
4532             .e_shentsize = sizeof(ElfW(Shdr)),
4533             .e_shnum = ARRAY_SIZE(img->shdr),
4534             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4535 #ifdef ELF_HOST_FLAGS
4536             .e_flags = ELF_HOST_FLAGS,
4537 #endif
4538 #ifdef ELF_OSABI
4539             .e_ident[EI_OSABI] = ELF_OSABI,
4540 #endif
4541         },
4542         .phdr = {
4543             .p_type = PT_LOAD,
4544             .p_flags = PF_X,
4545         },
4546         .shdr = {
4547             [0] = { .sh_type = SHT_NULL },
4548             /* Trick: The contents of code_gen_buffer are not present in
4549                this fake ELF file; that got allocated elsewhere.  Therefore
4550                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4551                will not look for contents.  We can record any address.  */
4552             [1] = { /* .text */
4553                 .sh_type = SHT_NOBITS,
4554                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4555             },
4556             [2] = { /* .debug_info */
4557                 .sh_type = SHT_PROGBITS,
4558                 .sh_offset = offsetof(struct ElfImage, di),
4559                 .sh_size = sizeof(struct DebugInfo),
4560             },
4561             [3] = { /* .debug_abbrev */
4562                 .sh_type = SHT_PROGBITS,
4563                 .sh_offset = offsetof(struct ElfImage, da),
4564                 .sh_size = sizeof(img->da),
4565             },
4566             [4] = { /* .debug_frame */
4567                 .sh_type = SHT_PROGBITS,
4568                 .sh_offset = sizeof(struct ElfImage),
4569             },
4570             [5] = { /* .symtab */
4571                 .sh_type = SHT_SYMTAB,
4572                 .sh_offset = offsetof(struct ElfImage, sym),
4573                 .sh_size = sizeof(img->sym),
4574                 .sh_info = 1,
4575                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4576                 .sh_entsize = sizeof(ElfW(Sym)),
4577             },
4578             [6] = { /* .strtab */
4579                 .sh_type = SHT_STRTAB,
4580                 .sh_offset = offsetof(struct ElfImage, str),
4581                 .sh_size = sizeof(img->str),
4582             }
4583         },
4584         .sym = {
4585             [1] = { /* code_gen_buffer */
4586                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4587                 .st_shndx = 1,
4588             }
4589         },
4590         .di = {
4591             .len = sizeof(struct DebugInfo) - 4,
4592             .version = 2,
4593             .ptr_size = sizeof(void *),
4594             .cu_die = 1,
4595             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4596             .fn_die = 2,
4597             .fn_name = "code_gen_buffer"
4598         },
4599         .da = {
4600             1,          /* abbrev number (the cu) */
4601             0x11, 1,    /* DW_TAG_compile_unit, has children */
4602             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4603             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4604             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4605             0, 0,       /* end of abbrev */
4606             2,          /* abbrev number (the fn) */
4607             0x2e, 0,    /* DW_TAG_subprogram, no children */
4608             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4609             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4610             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4611             0, 0,       /* end of abbrev */
4612             0           /* no more abbrev */
4613         },
4614         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4615                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4616     };
4617 
4618     /* We only need a single jit entry; statically allocate it.  */
4619     static struct jit_code_entry one_entry;
4620 
4621     uintptr_t buf = (uintptr_t)buf_ptr;
4622     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4623     DebugFrameHeader *dfh;
4624 
4625     img = g_malloc(img_size);
4626     *img = img_template;
4627 
4628     img->phdr.p_vaddr = buf;
4629     img->phdr.p_paddr = buf;
4630     img->phdr.p_memsz = buf_size;
4631 
4632     img->shdr[1].sh_name = find_string(img->str, ".text");
4633     img->shdr[1].sh_addr = buf;
4634     img->shdr[1].sh_size = buf_size;
4635 
4636     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4637     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4638 
4639     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4640     img->shdr[4].sh_size = debug_frame_size;
4641 
4642     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4643     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4644 
4645     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4646     img->sym[1].st_value = buf;
4647     img->sym[1].st_size = buf_size;
4648 
4649     img->di.cu_low_pc = buf;
4650     img->di.cu_high_pc = buf + buf_size;
4651     img->di.fn_low_pc = buf;
4652     img->di.fn_high_pc = buf + buf_size;
4653 
4654     dfh = (DebugFrameHeader *)(img + 1);
4655     memcpy(dfh, debug_frame, debug_frame_size);
4656     dfh->fde.func_start = buf;
4657     dfh->fde.func_len = buf_size;
4658 
4659 #ifdef DEBUG_JIT
4660     /* Enable this block to be able to debug the ELF image file creation.
4661        One can use readelf, objdump, or other inspection utilities.  */
4662     {
4663         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4664         if (f) {
4665             if (fwrite(img, img_size, 1, f) != img_size) {
4666                 /* Avoid stupid unused return value warning for fwrite.  */
4667             }
4668             fclose(f);
4669         }
4670     }
4671 #endif
4672 
4673     one_entry.symfile_addr = img;
4674     one_entry.symfile_size = img_size;
4675 
4676     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4677     __jit_debug_descriptor.relevant_entry = &one_entry;
4678     __jit_debug_descriptor.first_entry = &one_entry;
4679     __jit_debug_register_code();
4680 }
4681 #else
4682 /* No support for the feature.  Provide the entry point expected by exec.c,
4683    and implement the internal function we declared earlier.  */
4684 
4685 static void tcg_register_jit_int(const void *buf, size_t size,
4686                                  const void *debug_frame,
4687                                  size_t debug_frame_size)
4688 {
4689 }
4690 
4691 void tcg_register_jit(const void *buf, size_t buf_size)
4692 {
4693 }
4694 #endif /* ELF_HOST_MACHINE */
4695 
4696 #if !TCG_TARGET_MAYBE_vec
4697 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4698 {
4699     g_assert_not_reached();
4700 }
4701 #endif
4702