xref: /openbmc/qemu/tcg/tcg.c (revision f7a6df5f)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
45 
46 #include "exec/exec-all.h"
47 
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
51 
52 #include "tcg/tcg-op.h"
53 
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS  ELFCLASS32
56 #else
57 # define ELF_CLASS  ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA   ELFDATA2MSB
61 #else
62 # define ELF_DATA   ELFDATA2LSB
63 #endif
64 
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(const void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106                        intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109                          TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111                        const TCGArg args[TCG_MAX_OP_ARGS],
112                        const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115                             TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121                            unsigned vecl, unsigned vece,
122                            const TCGArg args[TCG_MAX_OP_ARGS],
123                            const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136                                     TCGReg dst, int64_t arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141                                   unsigned vecl, unsigned vece,
142                                   const TCGArg args[TCG_MAX_OP_ARGS],
143                                   const int const_args[TCG_MAX_OP_ARGS])
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
153 static int tcg_target_const_match(tcg_target_long val, TCGType type,
154                                   const TCGArgConstraint *arg_ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
158 
159 #define TCG_HIGHWATER 1024
160 
161 static TCGContext **tcg_ctxs;
162 static unsigned int n_tcg_ctxs;
163 TCGv_env cpu_env = 0;
164 const void *tcg_code_gen_epilogue;
165 uintptr_t tcg_splitwx_diff;
166 
167 #ifndef CONFIG_TCG_INTERPRETER
168 tcg_prologue_fn *tcg_qemu_tb_exec;
169 #endif
170 
171 struct tcg_region_tree {
172     QemuMutex lock;
173     GTree *tree;
174     /* padding to avoid false sharing is computed at run-time */
175 };
176 
177 /*
178  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
179  * dynamically allocate from as demand dictates. Given appropriate region
180  * sizing, this minimizes flushes even when some TCG threads generate a lot
181  * more code than others.
182  */
183 struct tcg_region_state {
184     QemuMutex lock;
185 
186     /* fields set at init time */
187     void *start;
188     void *start_aligned;
189     void *end;
190     size_t n;
191     size_t size; /* size of one region */
192     size_t stride; /* .size + guard size */
193 
194     /* fields protected by the lock */
195     size_t current; /* current region index */
196     size_t agg_size_full; /* aggregate size of full regions */
197 };
198 
199 static struct tcg_region_state region;
200 /*
201  * This is an array of struct tcg_region_tree's, with padding.
202  * We use void * to simplify the computation of region_trees[i]; each
203  * struct is found every tree_size bytes.
204  */
205 static void *region_trees;
206 static size_t tree_size;
207 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
208 static TCGRegSet tcg_target_call_clobber_regs;
209 
210 #if TCG_TARGET_INSN_UNIT_SIZE == 1
211 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
212 {
213     *s->code_ptr++ = v;
214 }
215 
216 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
217                                                       uint8_t v)
218 {
219     *p = v;
220 }
221 #endif
222 
223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
224 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
225 {
226     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
227         *s->code_ptr++ = v;
228     } else {
229         tcg_insn_unit *p = s->code_ptr;
230         memcpy(p, &v, sizeof(v));
231         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
232     }
233 }
234 
235 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
236                                                        uint16_t v)
237 {
238     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
239         *p = v;
240     } else {
241         memcpy(p, &v, sizeof(v));
242     }
243 }
244 #endif
245 
246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
247 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
248 {
249     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
250         *s->code_ptr++ = v;
251     } else {
252         tcg_insn_unit *p = s->code_ptr;
253         memcpy(p, &v, sizeof(v));
254         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
255     }
256 }
257 
258 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
259                                                        uint32_t v)
260 {
261     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
262         *p = v;
263     } else {
264         memcpy(p, &v, sizeof(v));
265     }
266 }
267 #endif
268 
269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
270 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
271 {
272     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
273         *s->code_ptr++ = v;
274     } else {
275         tcg_insn_unit *p = s->code_ptr;
276         memcpy(p, &v, sizeof(v));
277         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
278     }
279 }
280 
281 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
282                                                        uint64_t v)
283 {
284     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
285         *p = v;
286     } else {
287         memcpy(p, &v, sizeof(v));
288     }
289 }
290 #endif
291 
292 /* label relocation processing */
293 
294 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
295                           TCGLabel *l, intptr_t addend)
296 {
297     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
298 
299     r->type = type;
300     r->ptr = code_ptr;
301     r->addend = addend;
302     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
303 }
304 
305 static void tcg_out_label(TCGContext *s, TCGLabel *l)
306 {
307     tcg_debug_assert(!l->has_value);
308     l->has_value = 1;
309     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
310 }
311 
312 TCGLabel *gen_new_label(void)
313 {
314     TCGContext *s = tcg_ctx;
315     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
316 
317     memset(l, 0, sizeof(TCGLabel));
318     l->id = s->nb_labels++;
319     QSIMPLEQ_INIT(&l->relocs);
320 
321     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
322 
323     return l;
324 }
325 
326 static bool tcg_resolve_relocs(TCGContext *s)
327 {
328     TCGLabel *l;
329 
330     QSIMPLEQ_FOREACH(l, &s->labels, next) {
331         TCGRelocation *r;
332         uintptr_t value = l->u.value;
333 
334         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
335             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
336                 return false;
337             }
338         }
339     }
340     return true;
341 }
342 
343 static void set_jmp_reset_offset(TCGContext *s, int which)
344 {
345     /*
346      * We will check for overflow at the end of the opcode loop in
347      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
348      */
349     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
350 }
351 
352 /* Signal overflow, starting over with fewer guest insns. */
353 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
354 {
355     siglongjmp(s->jmp_trans, -2);
356 }
357 
358 #define C_PFX1(P, A)                    P##A
359 #define C_PFX2(P, A, B)                 P##A##_##B
360 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
361 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
362 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
363 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
364 
365 /* Define an enumeration for the various combinations. */
366 
367 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
368 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
369 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
370 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
371 
372 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
373 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
374 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
375 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
376 
377 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
378 
379 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
380 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
381 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
382 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
383 
384 typedef enum {
385 #include "tcg-target-con-set.h"
386 } TCGConstraintSetIndex;
387 
388 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
389 
390 #undef C_O0_I1
391 #undef C_O0_I2
392 #undef C_O0_I3
393 #undef C_O0_I4
394 #undef C_O1_I1
395 #undef C_O1_I2
396 #undef C_O1_I3
397 #undef C_O1_I4
398 #undef C_N1_I2
399 #undef C_O2_I1
400 #undef C_O2_I2
401 #undef C_O2_I3
402 #undef C_O2_I4
403 
404 /* Put all of the constraint sets into an array, indexed by the enum. */
405 
406 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
407 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
408 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
409 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
410 
411 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
412 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
413 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
414 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
415 
416 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
417 
418 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
419 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
420 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
421 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
422 
423 static const TCGTargetOpDef constraint_sets[] = {
424 #include "tcg-target-con-set.h"
425 };
426 
427 
428 #undef C_O0_I1
429 #undef C_O0_I2
430 #undef C_O0_I3
431 #undef C_O0_I4
432 #undef C_O1_I1
433 #undef C_O1_I2
434 #undef C_O1_I3
435 #undef C_O1_I4
436 #undef C_N1_I2
437 #undef C_O2_I1
438 #undef C_O2_I2
439 #undef C_O2_I3
440 #undef C_O2_I4
441 
442 /* Expand the enumerator to be returned from tcg_target_op_def(). */
443 
444 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
445 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
446 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
447 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
448 
449 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
450 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
451 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
452 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
453 
454 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
455 
456 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
457 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
458 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
459 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
460 
461 #include "tcg-target.c.inc"
462 
463 /* compare a pointer @ptr and a tb_tc @s */
464 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
465 {
466     if (ptr >= s->ptr + s->size) {
467         return 1;
468     } else if (ptr < s->ptr) {
469         return -1;
470     }
471     return 0;
472 }
473 
474 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
475 {
476     const struct tb_tc *a = ap;
477     const struct tb_tc *b = bp;
478 
479     /*
480      * When both sizes are set, we know this isn't a lookup.
481      * This is the most likely case: every TB must be inserted; lookups
482      * are a lot less frequent.
483      */
484     if (likely(a->size && b->size)) {
485         if (a->ptr > b->ptr) {
486             return 1;
487         } else if (a->ptr < b->ptr) {
488             return -1;
489         }
490         /* a->ptr == b->ptr should happen only on deletions */
491         g_assert(a->size == b->size);
492         return 0;
493     }
494     /*
495      * All lookups have either .size field set to 0.
496      * From the glib sources we see that @ap is always the lookup key. However
497      * the docs provide no guarantee, so we just mark this case as likely.
498      */
499     if (likely(a->size == 0)) {
500         return ptr_cmp_tb_tc(a->ptr, b);
501     }
502     return ptr_cmp_tb_tc(b->ptr, a);
503 }
504 
505 static void tcg_region_trees_init(void)
506 {
507     size_t i;
508 
509     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
510     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
511     for (i = 0; i < region.n; i++) {
512         struct tcg_region_tree *rt = region_trees + i * tree_size;
513 
514         qemu_mutex_init(&rt->lock);
515         rt->tree = g_tree_new(tb_tc_cmp);
516     }
517 }
518 
519 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
520 {
521     size_t region_idx;
522 
523     /*
524      * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
525      * a signal handler over which the caller has no control.
526      */
527     if (!in_code_gen_buffer(p)) {
528         p -= tcg_splitwx_diff;
529         if (!in_code_gen_buffer(p)) {
530             return NULL;
531         }
532     }
533 
534     if (p < region.start_aligned) {
535         region_idx = 0;
536     } else {
537         ptrdiff_t offset = p - region.start_aligned;
538 
539         if (offset > region.stride * (region.n - 1)) {
540             region_idx = region.n - 1;
541         } else {
542             region_idx = offset / region.stride;
543         }
544     }
545     return region_trees + region_idx * tree_size;
546 }
547 
548 void tcg_tb_insert(TranslationBlock *tb)
549 {
550     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
551 
552     g_assert(rt != NULL);
553     qemu_mutex_lock(&rt->lock);
554     g_tree_insert(rt->tree, &tb->tc, tb);
555     qemu_mutex_unlock(&rt->lock);
556 }
557 
558 void tcg_tb_remove(TranslationBlock *tb)
559 {
560     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
561 
562     g_assert(rt != NULL);
563     qemu_mutex_lock(&rt->lock);
564     g_tree_remove(rt->tree, &tb->tc);
565     qemu_mutex_unlock(&rt->lock);
566 }
567 
568 /*
569  * Find the TB 'tb' such that
570  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
571  * Return NULL if not found.
572  */
573 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
574 {
575     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
576     TranslationBlock *tb;
577     struct tb_tc s = { .ptr = (void *)tc_ptr };
578 
579     if (rt == NULL) {
580         return NULL;
581     }
582 
583     qemu_mutex_lock(&rt->lock);
584     tb = g_tree_lookup(rt->tree, &s);
585     qemu_mutex_unlock(&rt->lock);
586     return tb;
587 }
588 
589 static void tcg_region_tree_lock_all(void)
590 {
591     size_t i;
592 
593     for (i = 0; i < region.n; i++) {
594         struct tcg_region_tree *rt = region_trees + i * tree_size;
595 
596         qemu_mutex_lock(&rt->lock);
597     }
598 }
599 
600 static void tcg_region_tree_unlock_all(void)
601 {
602     size_t i;
603 
604     for (i = 0; i < region.n; i++) {
605         struct tcg_region_tree *rt = region_trees + i * tree_size;
606 
607         qemu_mutex_unlock(&rt->lock);
608     }
609 }
610 
611 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
612 {
613     size_t i;
614 
615     tcg_region_tree_lock_all();
616     for (i = 0; i < region.n; i++) {
617         struct tcg_region_tree *rt = region_trees + i * tree_size;
618 
619         g_tree_foreach(rt->tree, func, user_data);
620     }
621     tcg_region_tree_unlock_all();
622 }
623 
624 size_t tcg_nb_tbs(void)
625 {
626     size_t nb_tbs = 0;
627     size_t i;
628 
629     tcg_region_tree_lock_all();
630     for (i = 0; i < region.n; i++) {
631         struct tcg_region_tree *rt = region_trees + i * tree_size;
632 
633         nb_tbs += g_tree_nnodes(rt->tree);
634     }
635     tcg_region_tree_unlock_all();
636     return nb_tbs;
637 }
638 
639 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
640 {
641     TranslationBlock *tb = v;
642 
643     tb_destroy(tb);
644     return FALSE;
645 }
646 
647 static void tcg_region_tree_reset_all(void)
648 {
649     size_t i;
650 
651     tcg_region_tree_lock_all();
652     for (i = 0; i < region.n; i++) {
653         struct tcg_region_tree *rt = region_trees + i * tree_size;
654 
655         g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
656         /* Increment the refcount first so that destroy acts as a reset */
657         g_tree_ref(rt->tree);
658         g_tree_destroy(rt->tree);
659     }
660     tcg_region_tree_unlock_all();
661 }
662 
663 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
664 {
665     void *start, *end;
666 
667     start = region.start_aligned + curr_region * region.stride;
668     end = start + region.size;
669 
670     if (curr_region == 0) {
671         start = region.start;
672     }
673     if (curr_region == region.n - 1) {
674         end = region.end;
675     }
676 
677     *pstart = start;
678     *pend = end;
679 }
680 
681 static void tcg_region_assign(TCGContext *s, size_t curr_region)
682 {
683     void *start, *end;
684 
685     tcg_region_bounds(curr_region, &start, &end);
686 
687     s->code_gen_buffer = start;
688     s->code_gen_ptr = start;
689     s->code_gen_buffer_size = end - start;
690     s->code_gen_highwater = end - TCG_HIGHWATER;
691 }
692 
693 static bool tcg_region_alloc__locked(TCGContext *s)
694 {
695     if (region.current == region.n) {
696         return true;
697     }
698     tcg_region_assign(s, region.current);
699     region.current++;
700     return false;
701 }
702 
703 /*
704  * Request a new region once the one in use has filled up.
705  * Returns true on error.
706  */
707 static bool tcg_region_alloc(TCGContext *s)
708 {
709     bool err;
710     /* read the region size now; alloc__locked will overwrite it on success */
711     size_t size_full = s->code_gen_buffer_size;
712 
713     qemu_mutex_lock(&region.lock);
714     err = tcg_region_alloc__locked(s);
715     if (!err) {
716         region.agg_size_full += size_full - TCG_HIGHWATER;
717     }
718     qemu_mutex_unlock(&region.lock);
719     return err;
720 }
721 
722 /*
723  * Perform a context's first region allocation.
724  * This function does _not_ increment region.agg_size_full.
725  */
726 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
727 {
728     return tcg_region_alloc__locked(s);
729 }
730 
731 /* Call from a safe-work context */
732 void tcg_region_reset_all(void)
733 {
734     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
735     unsigned int i;
736 
737     qemu_mutex_lock(&region.lock);
738     region.current = 0;
739     region.agg_size_full = 0;
740 
741     for (i = 0; i < n_ctxs; i++) {
742         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
743         bool err = tcg_region_initial_alloc__locked(s);
744 
745         g_assert(!err);
746     }
747     qemu_mutex_unlock(&region.lock);
748 
749     tcg_region_tree_reset_all();
750 }
751 
752 #ifdef CONFIG_USER_ONLY
753 static size_t tcg_n_regions(void)
754 {
755     return 1;
756 }
757 #else
758 /*
759  * It is likely that some vCPUs will translate more code than others, so we
760  * first try to set more regions than max_cpus, with those regions being of
761  * reasonable size. If that's not possible we make do by evenly dividing
762  * the code_gen_buffer among the vCPUs.
763  */
764 static size_t tcg_n_regions(void)
765 {
766     size_t i;
767 
768     /* Use a single region if all we have is one vCPU thread */
769 #if !defined(CONFIG_USER_ONLY)
770     MachineState *ms = MACHINE(qdev_get_machine());
771     unsigned int max_cpus = ms->smp.max_cpus;
772 #endif
773     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
774         return 1;
775     }
776 
777     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
778     for (i = 8; i > 0; i--) {
779         size_t regions_per_thread = i;
780         size_t region_size;
781 
782         region_size = tcg_init_ctx.code_gen_buffer_size;
783         region_size /= max_cpus * regions_per_thread;
784 
785         if (region_size >= 2 * 1024u * 1024) {
786             return max_cpus * regions_per_thread;
787         }
788     }
789     /* If we can't, then just allocate one region per vCPU thread */
790     return max_cpus;
791 }
792 #endif
793 
794 /*
795  * Initializes region partitioning.
796  *
797  * Called at init time from the parent thread (i.e. the one calling
798  * tcg_context_init), after the target's TCG globals have been set.
799  *
800  * Region partitioning works by splitting code_gen_buffer into separate regions,
801  * and then assigning regions to TCG threads so that the threads can translate
802  * code in parallel without synchronization.
803  *
804  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
805  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
806  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
807  * must have been parsed before calling this function, since it calls
808  * qemu_tcg_mttcg_enabled().
809  *
810  * In user-mode we use a single region.  Having multiple regions in user-mode
811  * is not supported, because the number of vCPU threads (recall that each thread
812  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
813  * OS, and usually this number is huge (tens of thousands is not uncommon).
814  * Thus, given this large bound on the number of vCPU threads and the fact
815  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
816  * that the availability of at least one region per vCPU thread.
817  *
818  * However, this user-mode limitation is unlikely to be a significant problem
819  * in practice. Multi-threaded guests share most if not all of their translated
820  * code, which makes parallel code generation less appealing than in softmmu.
821  */
822 void tcg_region_init(void)
823 {
824     void *buf = tcg_init_ctx.code_gen_buffer;
825     void *aligned;
826     size_t size = tcg_init_ctx.code_gen_buffer_size;
827     size_t page_size = qemu_real_host_page_size;
828     size_t region_size;
829     size_t n_regions;
830     size_t i;
831 
832     n_regions = tcg_n_regions();
833 
834     /* The first region will be 'aligned - buf' bytes larger than the others */
835     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
836     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
837     /*
838      * Make region_size a multiple of page_size, using aligned as the start.
839      * As a result of this we might end up with a few extra pages at the end of
840      * the buffer; we will assign those to the last region.
841      */
842     region_size = (size - (aligned - buf)) / n_regions;
843     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
844 
845     /* A region must have at least 2 pages; one code, one guard */
846     g_assert(region_size >= 2 * page_size);
847 
848     /* init the region struct */
849     qemu_mutex_init(&region.lock);
850     region.n = n_regions;
851     region.size = region_size - page_size;
852     region.stride = region_size;
853     region.start = buf;
854     region.start_aligned = aligned;
855     /* page-align the end, since its last page will be a guard page */
856     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
857     /* account for that last guard page */
858     region.end -= page_size;
859 
860     /*
861      * Set guard pages in the rw buffer, as that's the one into which
862      * buffer overruns could occur.  Do not set guard pages in the rx
863      * buffer -- let that one use hugepages throughout.
864      */
865     for (i = 0; i < region.n; i++) {
866         void *start, *end;
867 
868         tcg_region_bounds(i, &start, &end);
869 
870         /*
871          * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
872          * rejects a permission change from RWX -> NONE.  Guard pages are
873          * nice for bug detection but are not essential; ignore any failure.
874          */
875         (void)qemu_mprotect_none(end, page_size);
876     }
877 
878     tcg_region_trees_init();
879 
880     /* In user-mode we support only one ctx, so do the initial allocation now */
881 #ifdef CONFIG_USER_ONLY
882     {
883         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
884 
885         g_assert(!err);
886     }
887 #endif
888 }
889 
890 #ifdef CONFIG_DEBUG_TCG
891 const void *tcg_splitwx_to_rx(void *rw)
892 {
893     /* Pass NULL pointers unchanged. */
894     if (rw) {
895         g_assert(in_code_gen_buffer(rw));
896         rw += tcg_splitwx_diff;
897     }
898     return rw;
899 }
900 
901 void *tcg_splitwx_to_rw(const void *rx)
902 {
903     /* Pass NULL pointers unchanged. */
904     if (rx) {
905         rx -= tcg_splitwx_diff;
906         /* Assert that we end with a pointer in the rw region. */
907         g_assert(in_code_gen_buffer(rx));
908     }
909     return (void *)rx;
910 }
911 #endif /* CONFIG_DEBUG_TCG */
912 
913 static void alloc_tcg_plugin_context(TCGContext *s)
914 {
915 #ifdef CONFIG_PLUGIN
916     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
917     s->plugin_tb->insns =
918         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
919 #endif
920 }
921 
922 /*
923  * All TCG threads except the parent (i.e. the one that called tcg_context_init
924  * and registered the target's TCG globals) must register with this function
925  * before initiating translation.
926  *
927  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
928  * of tcg_region_init() for the reasoning behind this.
929  *
930  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
931  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
932  * is not used anymore for translation once this function is called.
933  *
934  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
935  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
936  */
937 #ifdef CONFIG_USER_ONLY
938 void tcg_register_thread(void)
939 {
940     tcg_ctx = &tcg_init_ctx;
941 }
942 #else
943 void tcg_register_thread(void)
944 {
945     MachineState *ms = MACHINE(qdev_get_machine());
946     TCGContext *s = g_malloc(sizeof(*s));
947     unsigned int i, n;
948     bool err;
949 
950     *s = tcg_init_ctx;
951 
952     /* Relink mem_base.  */
953     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
954         if (tcg_init_ctx.temps[i].mem_base) {
955             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
956             tcg_debug_assert(b >= 0 && b < n);
957             s->temps[i].mem_base = &s->temps[b];
958         }
959     }
960 
961     /* Claim an entry in tcg_ctxs */
962     n = qatomic_fetch_inc(&n_tcg_ctxs);
963     g_assert(n < ms->smp.max_cpus);
964     qatomic_set(&tcg_ctxs[n], s);
965 
966     if (n > 0) {
967         alloc_tcg_plugin_context(s);
968     }
969 
970     tcg_ctx = s;
971     qemu_mutex_lock(&region.lock);
972     err = tcg_region_initial_alloc__locked(tcg_ctx);
973     g_assert(!err);
974     qemu_mutex_unlock(&region.lock);
975 }
976 #endif /* !CONFIG_USER_ONLY */
977 
978 /*
979  * Returns the size (in bytes) of all translated code (i.e. from all regions)
980  * currently in the cache.
981  * See also: tcg_code_capacity()
982  * Do not confuse with tcg_current_code_size(); that one applies to a single
983  * TCG context.
984  */
985 size_t tcg_code_size(void)
986 {
987     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
988     unsigned int i;
989     size_t total;
990 
991     qemu_mutex_lock(&region.lock);
992     total = region.agg_size_full;
993     for (i = 0; i < n_ctxs; i++) {
994         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
995         size_t size;
996 
997         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
998         g_assert(size <= s->code_gen_buffer_size);
999         total += size;
1000     }
1001     qemu_mutex_unlock(&region.lock);
1002     return total;
1003 }
1004 
1005 /*
1006  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
1007  * regions.
1008  * See also: tcg_code_size()
1009  */
1010 size_t tcg_code_capacity(void)
1011 {
1012     size_t guard_size, capacity;
1013 
1014     /* no need for synchronization; these variables are set at init time */
1015     guard_size = region.stride - region.size;
1016     capacity = region.end + guard_size - region.start;
1017     capacity -= region.n * (guard_size + TCG_HIGHWATER);
1018     return capacity;
1019 }
1020 
1021 size_t tcg_tb_phys_invalidate_count(void)
1022 {
1023     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
1024     unsigned int i;
1025     size_t total = 0;
1026 
1027     for (i = 0; i < n_ctxs; i++) {
1028         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
1029 
1030         total += qatomic_read(&s->tb_phys_invalidate_count);
1031     }
1032     return total;
1033 }
1034 
1035 /* pool based memory allocation */
1036 void *tcg_malloc_internal(TCGContext *s, int size)
1037 {
1038     TCGPool *p;
1039     int pool_size;
1040 
1041     if (size > TCG_POOL_CHUNK_SIZE) {
1042         /* big malloc: insert a new pool (XXX: could optimize) */
1043         p = g_malloc(sizeof(TCGPool) + size);
1044         p->size = size;
1045         p->next = s->pool_first_large;
1046         s->pool_first_large = p;
1047         return p->data;
1048     } else {
1049         p = s->pool_current;
1050         if (!p) {
1051             p = s->pool_first;
1052             if (!p)
1053                 goto new_pool;
1054         } else {
1055             if (!p->next) {
1056             new_pool:
1057                 pool_size = TCG_POOL_CHUNK_SIZE;
1058                 p = g_malloc(sizeof(TCGPool) + pool_size);
1059                 p->size = pool_size;
1060                 p->next = NULL;
1061                 if (s->pool_current)
1062                     s->pool_current->next = p;
1063                 else
1064                     s->pool_first = p;
1065             } else {
1066                 p = p->next;
1067             }
1068         }
1069     }
1070     s->pool_current = p;
1071     s->pool_cur = p->data + size;
1072     s->pool_end = p->data + p->size;
1073     return p->data;
1074 }
1075 
1076 void tcg_pool_reset(TCGContext *s)
1077 {
1078     TCGPool *p, *t;
1079     for (p = s->pool_first_large; p; p = t) {
1080         t = p->next;
1081         g_free(p);
1082     }
1083     s->pool_first_large = NULL;
1084     s->pool_cur = s->pool_end = NULL;
1085     s->pool_current = NULL;
1086 }
1087 
1088 typedef struct TCGHelperInfo {
1089     void *func;
1090     const char *name;
1091     unsigned flags;
1092     unsigned sizemask;
1093 } TCGHelperInfo;
1094 
1095 #include "exec/helper-proto.h"
1096 
1097 static const TCGHelperInfo all_helpers[] = {
1098 #include "exec/helper-tcg.h"
1099 };
1100 static GHashTable *helper_table;
1101 
1102 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1103 static void process_op_defs(TCGContext *s);
1104 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1105                                             TCGReg reg, const char *name);
1106 
1107 void tcg_context_init(TCGContext *s)
1108 {
1109     int op, total_args, n, i;
1110     TCGOpDef *def;
1111     TCGArgConstraint *args_ct;
1112     TCGTemp *ts;
1113 
1114     memset(s, 0, sizeof(*s));
1115     s->nb_globals = 0;
1116 
1117     /* Count total number of arguments and allocate the corresponding
1118        space */
1119     total_args = 0;
1120     for(op = 0; op < NB_OPS; op++) {
1121         def = &tcg_op_defs[op];
1122         n = def->nb_iargs + def->nb_oargs;
1123         total_args += n;
1124     }
1125 
1126     args_ct = g_new0(TCGArgConstraint, total_args);
1127 
1128     for(op = 0; op < NB_OPS; op++) {
1129         def = &tcg_op_defs[op];
1130         def->args_ct = args_ct;
1131         n = def->nb_iargs + def->nb_oargs;
1132         args_ct += n;
1133     }
1134 
1135     /* Register helpers.  */
1136     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1137     helper_table = g_hash_table_new(NULL, NULL);
1138 
1139     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1140         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1141                             (gpointer)&all_helpers[i]);
1142     }
1143 
1144     tcg_target_init(s);
1145     process_op_defs(s);
1146 
1147     /* Reverse the order of the saved registers, assuming they're all at
1148        the start of tcg_target_reg_alloc_order.  */
1149     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1150         int r = tcg_target_reg_alloc_order[n];
1151         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1152             break;
1153         }
1154     }
1155     for (i = 0; i < n; ++i) {
1156         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1157     }
1158     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1159         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1160     }
1161 
1162     alloc_tcg_plugin_context(s);
1163 
1164     tcg_ctx = s;
1165     /*
1166      * In user-mode we simply share the init context among threads, since we
1167      * use a single region. See the documentation tcg_region_init() for the
1168      * reasoning behind this.
1169      * In softmmu we will have at most max_cpus TCG threads.
1170      */
1171 #ifdef CONFIG_USER_ONLY
1172     tcg_ctxs = &tcg_ctx;
1173     n_tcg_ctxs = 1;
1174 #else
1175     MachineState *ms = MACHINE(qdev_get_machine());
1176     unsigned int max_cpus = ms->smp.max_cpus;
1177     tcg_ctxs = g_new(TCGContext *, max_cpus);
1178 #endif
1179 
1180     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1181     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1182     cpu_env = temp_tcgv_ptr(ts);
1183 }
1184 
1185 /*
1186  * Allocate TBs right before their corresponding translated code, making
1187  * sure that TBs and code are on different cache lines.
1188  */
1189 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1190 {
1191     uintptr_t align = qemu_icache_linesize;
1192     TranslationBlock *tb;
1193     void *next;
1194 
1195  retry:
1196     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1197     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1198 
1199     if (unlikely(next > s->code_gen_highwater)) {
1200         if (tcg_region_alloc(s)) {
1201             return NULL;
1202         }
1203         goto retry;
1204     }
1205     qatomic_set(&s->code_gen_ptr, next);
1206     s->data_gen_ptr = NULL;
1207     return tb;
1208 }
1209 
1210 void tcg_prologue_init(TCGContext *s)
1211 {
1212     size_t prologue_size, total_size;
1213     void *buf0, *buf1;
1214 
1215     /* Put the prologue at the beginning of code_gen_buffer.  */
1216     buf0 = s->code_gen_buffer;
1217     total_size = s->code_gen_buffer_size;
1218     s->code_ptr = buf0;
1219     s->code_buf = buf0;
1220     s->data_gen_ptr = NULL;
1221 
1222     /*
1223      * The region trees are not yet configured, but tcg_splitwx_to_rx
1224      * needs the bounds for an assert.
1225      */
1226     region.start = buf0;
1227     region.end = buf0 + total_size;
1228 
1229 #ifndef CONFIG_TCG_INTERPRETER
1230     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1231 #endif
1232 
1233     /* Compute a high-water mark, at which we voluntarily flush the buffer
1234        and start over.  The size here is arbitrary, significantly larger
1235        than we expect the code generation for any one opcode to require.  */
1236     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1237 
1238 #ifdef TCG_TARGET_NEED_POOL_LABELS
1239     s->pool_labels = NULL;
1240 #endif
1241 
1242     qemu_thread_jit_write();
1243     /* Generate the prologue.  */
1244     tcg_target_qemu_prologue(s);
1245 
1246 #ifdef TCG_TARGET_NEED_POOL_LABELS
1247     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1248     {
1249         int result = tcg_out_pool_finalize(s);
1250         tcg_debug_assert(result == 0);
1251     }
1252 #endif
1253 
1254     buf1 = s->code_ptr;
1255 #ifndef CONFIG_TCG_INTERPRETER
1256     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1257                         tcg_ptr_byte_diff(buf1, buf0));
1258 #endif
1259 
1260     /* Deduct the prologue from the buffer.  */
1261     prologue_size = tcg_current_code_size(s);
1262     s->code_gen_ptr = buf1;
1263     s->code_gen_buffer = buf1;
1264     s->code_buf = buf1;
1265     total_size -= prologue_size;
1266     s->code_gen_buffer_size = total_size;
1267 
1268     tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1269 
1270 #ifdef DEBUG_DISAS
1271     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1272         FILE *logfile = qemu_log_lock();
1273         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1274         if (s->data_gen_ptr) {
1275             size_t code_size = s->data_gen_ptr - buf0;
1276             size_t data_size = prologue_size - code_size;
1277             size_t i;
1278 
1279             log_disas(buf0, code_size);
1280 
1281             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1282                 if (sizeof(tcg_target_ulong) == 8) {
1283                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1284                              (uintptr_t)s->data_gen_ptr + i,
1285                              *(uint64_t *)(s->data_gen_ptr + i));
1286                 } else {
1287                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1288                              (uintptr_t)s->data_gen_ptr + i,
1289                              *(uint32_t *)(s->data_gen_ptr + i));
1290                 }
1291             }
1292         } else {
1293             log_disas(buf0, prologue_size);
1294         }
1295         qemu_log("\n");
1296         qemu_log_flush();
1297         qemu_log_unlock(logfile);
1298     }
1299 #endif
1300 
1301     /* Assert that goto_ptr is implemented completely.  */
1302     if (TCG_TARGET_HAS_goto_ptr) {
1303         tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1304     }
1305 }
1306 
1307 void tcg_func_start(TCGContext *s)
1308 {
1309     tcg_pool_reset(s);
1310     s->nb_temps = s->nb_globals;
1311 
1312     /* No temps have been previously allocated for size or locality.  */
1313     memset(s->free_temps, 0, sizeof(s->free_temps));
1314 
1315     /* No constant temps have been previously allocated. */
1316     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1317         if (s->const_table[i]) {
1318             g_hash_table_remove_all(s->const_table[i]);
1319         }
1320     }
1321 
1322     s->nb_ops = 0;
1323     s->nb_labels = 0;
1324     s->current_frame_offset = s->frame_start;
1325 
1326 #ifdef CONFIG_DEBUG_TCG
1327     s->goto_tb_issue_mask = 0;
1328 #endif
1329 
1330     QTAILQ_INIT(&s->ops);
1331     QTAILQ_INIT(&s->free_ops);
1332     QSIMPLEQ_INIT(&s->labels);
1333 }
1334 
1335 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1336 {
1337     int n = s->nb_temps++;
1338 
1339     if (n >= TCG_MAX_TEMPS) {
1340         tcg_raise_tb_overflow(s);
1341     }
1342     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1343 }
1344 
1345 static TCGTemp *tcg_global_alloc(TCGContext *s)
1346 {
1347     TCGTemp *ts;
1348 
1349     tcg_debug_assert(s->nb_globals == s->nb_temps);
1350     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1351     s->nb_globals++;
1352     ts = tcg_temp_alloc(s);
1353     ts->kind = TEMP_GLOBAL;
1354 
1355     return ts;
1356 }
1357 
1358 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1359                                             TCGReg reg, const char *name)
1360 {
1361     TCGTemp *ts;
1362 
1363     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1364         tcg_abort();
1365     }
1366 
1367     ts = tcg_global_alloc(s);
1368     ts->base_type = type;
1369     ts->type = type;
1370     ts->kind = TEMP_FIXED;
1371     ts->reg = reg;
1372     ts->name = name;
1373     tcg_regset_set_reg(s->reserved_regs, reg);
1374 
1375     return ts;
1376 }
1377 
1378 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1379 {
1380     s->frame_start = start;
1381     s->frame_end = start + size;
1382     s->frame_temp
1383         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1384 }
1385 
1386 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1387                                      intptr_t offset, const char *name)
1388 {
1389     TCGContext *s = tcg_ctx;
1390     TCGTemp *base_ts = tcgv_ptr_temp(base);
1391     TCGTemp *ts = tcg_global_alloc(s);
1392     int indirect_reg = 0, bigendian = 0;
1393 #ifdef HOST_WORDS_BIGENDIAN
1394     bigendian = 1;
1395 #endif
1396 
1397     switch (base_ts->kind) {
1398     case TEMP_FIXED:
1399         break;
1400     case TEMP_GLOBAL:
1401         /* We do not support double-indirect registers.  */
1402         tcg_debug_assert(!base_ts->indirect_reg);
1403         base_ts->indirect_base = 1;
1404         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1405                             ? 2 : 1);
1406         indirect_reg = 1;
1407         break;
1408     default:
1409         g_assert_not_reached();
1410     }
1411 
1412     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1413         TCGTemp *ts2 = tcg_global_alloc(s);
1414         char buf[64];
1415 
1416         ts->base_type = TCG_TYPE_I64;
1417         ts->type = TCG_TYPE_I32;
1418         ts->indirect_reg = indirect_reg;
1419         ts->mem_allocated = 1;
1420         ts->mem_base = base_ts;
1421         ts->mem_offset = offset + bigendian * 4;
1422         pstrcpy(buf, sizeof(buf), name);
1423         pstrcat(buf, sizeof(buf), "_0");
1424         ts->name = strdup(buf);
1425 
1426         tcg_debug_assert(ts2 == ts + 1);
1427         ts2->base_type = TCG_TYPE_I64;
1428         ts2->type = TCG_TYPE_I32;
1429         ts2->indirect_reg = indirect_reg;
1430         ts2->mem_allocated = 1;
1431         ts2->mem_base = base_ts;
1432         ts2->mem_offset = offset + (1 - bigendian) * 4;
1433         pstrcpy(buf, sizeof(buf), name);
1434         pstrcat(buf, sizeof(buf), "_1");
1435         ts2->name = strdup(buf);
1436     } else {
1437         ts->base_type = type;
1438         ts->type = type;
1439         ts->indirect_reg = indirect_reg;
1440         ts->mem_allocated = 1;
1441         ts->mem_base = base_ts;
1442         ts->mem_offset = offset;
1443         ts->name = name;
1444     }
1445     return ts;
1446 }
1447 
1448 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1449 {
1450     TCGContext *s = tcg_ctx;
1451     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1452     TCGTemp *ts;
1453     int idx, k;
1454 
1455     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1456     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1457     if (idx < TCG_MAX_TEMPS) {
1458         /* There is already an available temp with the right type.  */
1459         clear_bit(idx, s->free_temps[k].l);
1460 
1461         ts = &s->temps[idx];
1462         ts->temp_allocated = 1;
1463         tcg_debug_assert(ts->base_type == type);
1464         tcg_debug_assert(ts->kind == kind);
1465     } else {
1466         ts = tcg_temp_alloc(s);
1467         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1468             TCGTemp *ts2 = tcg_temp_alloc(s);
1469 
1470             ts->base_type = type;
1471             ts->type = TCG_TYPE_I32;
1472             ts->temp_allocated = 1;
1473             ts->kind = kind;
1474 
1475             tcg_debug_assert(ts2 == ts + 1);
1476             ts2->base_type = TCG_TYPE_I64;
1477             ts2->type = TCG_TYPE_I32;
1478             ts2->temp_allocated = 1;
1479             ts2->kind = kind;
1480         } else {
1481             ts->base_type = type;
1482             ts->type = type;
1483             ts->temp_allocated = 1;
1484             ts->kind = kind;
1485         }
1486     }
1487 
1488 #if defined(CONFIG_DEBUG_TCG)
1489     s->temps_in_use++;
1490 #endif
1491     return ts;
1492 }
1493 
1494 TCGv_vec tcg_temp_new_vec(TCGType type)
1495 {
1496     TCGTemp *t;
1497 
1498 #ifdef CONFIG_DEBUG_TCG
1499     switch (type) {
1500     case TCG_TYPE_V64:
1501         assert(TCG_TARGET_HAS_v64);
1502         break;
1503     case TCG_TYPE_V128:
1504         assert(TCG_TARGET_HAS_v128);
1505         break;
1506     case TCG_TYPE_V256:
1507         assert(TCG_TARGET_HAS_v256);
1508         break;
1509     default:
1510         g_assert_not_reached();
1511     }
1512 #endif
1513 
1514     t = tcg_temp_new_internal(type, 0);
1515     return temp_tcgv_vec(t);
1516 }
1517 
1518 /* Create a new temp of the same type as an existing temp.  */
1519 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1520 {
1521     TCGTemp *t = tcgv_vec_temp(match);
1522 
1523     tcg_debug_assert(t->temp_allocated != 0);
1524 
1525     t = tcg_temp_new_internal(t->base_type, 0);
1526     return temp_tcgv_vec(t);
1527 }
1528 
1529 void tcg_temp_free_internal(TCGTemp *ts)
1530 {
1531     TCGContext *s = tcg_ctx;
1532     int k, idx;
1533 
1534     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1535     if (ts->kind == TEMP_CONST) {
1536         return;
1537     }
1538 
1539 #if defined(CONFIG_DEBUG_TCG)
1540     s->temps_in_use--;
1541     if (s->temps_in_use < 0) {
1542         fprintf(stderr, "More temporaries freed than allocated!\n");
1543     }
1544 #endif
1545 
1546     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1547     tcg_debug_assert(ts->temp_allocated != 0);
1548     ts->temp_allocated = 0;
1549 
1550     idx = temp_idx(ts);
1551     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1552     set_bit(idx, s->free_temps[k].l);
1553 }
1554 
1555 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1556 {
1557     TCGContext *s = tcg_ctx;
1558     GHashTable *h = s->const_table[type];
1559     TCGTemp *ts;
1560 
1561     if (h == NULL) {
1562         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1563         s->const_table[type] = h;
1564     }
1565 
1566     ts = g_hash_table_lookup(h, &val);
1567     if (ts == NULL) {
1568         ts = tcg_temp_alloc(s);
1569 
1570         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1571             TCGTemp *ts2 = tcg_temp_alloc(s);
1572 
1573             ts->base_type = TCG_TYPE_I64;
1574             ts->type = TCG_TYPE_I32;
1575             ts->kind = TEMP_CONST;
1576             ts->temp_allocated = 1;
1577             /*
1578              * Retain the full value of the 64-bit constant in the low
1579              * part, so that the hash table works.  Actual uses will
1580              * truncate the value to the low part.
1581              */
1582             ts->val = val;
1583 
1584             tcg_debug_assert(ts2 == ts + 1);
1585             ts2->base_type = TCG_TYPE_I64;
1586             ts2->type = TCG_TYPE_I32;
1587             ts2->kind = TEMP_CONST;
1588             ts2->temp_allocated = 1;
1589             ts2->val = val >> 32;
1590         } else {
1591             ts->base_type = type;
1592             ts->type = type;
1593             ts->kind = TEMP_CONST;
1594             ts->temp_allocated = 1;
1595             ts->val = val;
1596         }
1597         g_hash_table_insert(h, &ts->val, ts);
1598     }
1599 
1600     return ts;
1601 }
1602 
1603 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1604 {
1605     val = dup_const(vece, val);
1606     return temp_tcgv_vec(tcg_constant_internal(type, val));
1607 }
1608 
1609 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1610 {
1611     TCGTemp *t = tcgv_vec_temp(match);
1612 
1613     tcg_debug_assert(t->temp_allocated != 0);
1614     return tcg_constant_vec(t->base_type, vece, val);
1615 }
1616 
1617 TCGv_i32 tcg_const_i32(int32_t val)
1618 {
1619     TCGv_i32 t0;
1620     t0 = tcg_temp_new_i32();
1621     tcg_gen_movi_i32(t0, val);
1622     return t0;
1623 }
1624 
1625 TCGv_i64 tcg_const_i64(int64_t val)
1626 {
1627     TCGv_i64 t0;
1628     t0 = tcg_temp_new_i64();
1629     tcg_gen_movi_i64(t0, val);
1630     return t0;
1631 }
1632 
1633 TCGv_i32 tcg_const_local_i32(int32_t val)
1634 {
1635     TCGv_i32 t0;
1636     t0 = tcg_temp_local_new_i32();
1637     tcg_gen_movi_i32(t0, val);
1638     return t0;
1639 }
1640 
1641 TCGv_i64 tcg_const_local_i64(int64_t val)
1642 {
1643     TCGv_i64 t0;
1644     t0 = tcg_temp_local_new_i64();
1645     tcg_gen_movi_i64(t0, val);
1646     return t0;
1647 }
1648 
1649 #if defined(CONFIG_DEBUG_TCG)
1650 void tcg_clear_temp_count(void)
1651 {
1652     TCGContext *s = tcg_ctx;
1653     s->temps_in_use = 0;
1654 }
1655 
1656 int tcg_check_temp_count(void)
1657 {
1658     TCGContext *s = tcg_ctx;
1659     if (s->temps_in_use) {
1660         /* Clear the count so that we don't give another
1661          * warning immediately next time around.
1662          */
1663         s->temps_in_use = 0;
1664         return 1;
1665     }
1666     return 0;
1667 }
1668 #endif
1669 
1670 /* Return true if OP may appear in the opcode stream.
1671    Test the runtime variable that controls each opcode.  */
1672 bool tcg_op_supported(TCGOpcode op)
1673 {
1674     const bool have_vec
1675         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1676 
1677     switch (op) {
1678     case INDEX_op_discard:
1679     case INDEX_op_set_label:
1680     case INDEX_op_call:
1681     case INDEX_op_br:
1682     case INDEX_op_mb:
1683     case INDEX_op_insn_start:
1684     case INDEX_op_exit_tb:
1685     case INDEX_op_goto_tb:
1686     case INDEX_op_qemu_ld_i32:
1687     case INDEX_op_qemu_st_i32:
1688     case INDEX_op_qemu_ld_i64:
1689     case INDEX_op_qemu_st_i64:
1690         return true;
1691 
1692     case INDEX_op_qemu_st8_i32:
1693         return TCG_TARGET_HAS_qemu_st8_i32;
1694 
1695     case INDEX_op_goto_ptr:
1696         return TCG_TARGET_HAS_goto_ptr;
1697 
1698     case INDEX_op_mov_i32:
1699     case INDEX_op_setcond_i32:
1700     case INDEX_op_brcond_i32:
1701     case INDEX_op_ld8u_i32:
1702     case INDEX_op_ld8s_i32:
1703     case INDEX_op_ld16u_i32:
1704     case INDEX_op_ld16s_i32:
1705     case INDEX_op_ld_i32:
1706     case INDEX_op_st8_i32:
1707     case INDEX_op_st16_i32:
1708     case INDEX_op_st_i32:
1709     case INDEX_op_add_i32:
1710     case INDEX_op_sub_i32:
1711     case INDEX_op_mul_i32:
1712     case INDEX_op_and_i32:
1713     case INDEX_op_or_i32:
1714     case INDEX_op_xor_i32:
1715     case INDEX_op_shl_i32:
1716     case INDEX_op_shr_i32:
1717     case INDEX_op_sar_i32:
1718         return true;
1719 
1720     case INDEX_op_movcond_i32:
1721         return TCG_TARGET_HAS_movcond_i32;
1722     case INDEX_op_div_i32:
1723     case INDEX_op_divu_i32:
1724         return TCG_TARGET_HAS_div_i32;
1725     case INDEX_op_rem_i32:
1726     case INDEX_op_remu_i32:
1727         return TCG_TARGET_HAS_rem_i32;
1728     case INDEX_op_div2_i32:
1729     case INDEX_op_divu2_i32:
1730         return TCG_TARGET_HAS_div2_i32;
1731     case INDEX_op_rotl_i32:
1732     case INDEX_op_rotr_i32:
1733         return TCG_TARGET_HAS_rot_i32;
1734     case INDEX_op_deposit_i32:
1735         return TCG_TARGET_HAS_deposit_i32;
1736     case INDEX_op_extract_i32:
1737         return TCG_TARGET_HAS_extract_i32;
1738     case INDEX_op_sextract_i32:
1739         return TCG_TARGET_HAS_sextract_i32;
1740     case INDEX_op_extract2_i32:
1741         return TCG_TARGET_HAS_extract2_i32;
1742     case INDEX_op_add2_i32:
1743         return TCG_TARGET_HAS_add2_i32;
1744     case INDEX_op_sub2_i32:
1745         return TCG_TARGET_HAS_sub2_i32;
1746     case INDEX_op_mulu2_i32:
1747         return TCG_TARGET_HAS_mulu2_i32;
1748     case INDEX_op_muls2_i32:
1749         return TCG_TARGET_HAS_muls2_i32;
1750     case INDEX_op_muluh_i32:
1751         return TCG_TARGET_HAS_muluh_i32;
1752     case INDEX_op_mulsh_i32:
1753         return TCG_TARGET_HAS_mulsh_i32;
1754     case INDEX_op_ext8s_i32:
1755         return TCG_TARGET_HAS_ext8s_i32;
1756     case INDEX_op_ext16s_i32:
1757         return TCG_TARGET_HAS_ext16s_i32;
1758     case INDEX_op_ext8u_i32:
1759         return TCG_TARGET_HAS_ext8u_i32;
1760     case INDEX_op_ext16u_i32:
1761         return TCG_TARGET_HAS_ext16u_i32;
1762     case INDEX_op_bswap16_i32:
1763         return TCG_TARGET_HAS_bswap16_i32;
1764     case INDEX_op_bswap32_i32:
1765         return TCG_TARGET_HAS_bswap32_i32;
1766     case INDEX_op_not_i32:
1767         return TCG_TARGET_HAS_not_i32;
1768     case INDEX_op_neg_i32:
1769         return TCG_TARGET_HAS_neg_i32;
1770     case INDEX_op_andc_i32:
1771         return TCG_TARGET_HAS_andc_i32;
1772     case INDEX_op_orc_i32:
1773         return TCG_TARGET_HAS_orc_i32;
1774     case INDEX_op_eqv_i32:
1775         return TCG_TARGET_HAS_eqv_i32;
1776     case INDEX_op_nand_i32:
1777         return TCG_TARGET_HAS_nand_i32;
1778     case INDEX_op_nor_i32:
1779         return TCG_TARGET_HAS_nor_i32;
1780     case INDEX_op_clz_i32:
1781         return TCG_TARGET_HAS_clz_i32;
1782     case INDEX_op_ctz_i32:
1783         return TCG_TARGET_HAS_ctz_i32;
1784     case INDEX_op_ctpop_i32:
1785         return TCG_TARGET_HAS_ctpop_i32;
1786 
1787     case INDEX_op_brcond2_i32:
1788     case INDEX_op_setcond2_i32:
1789         return TCG_TARGET_REG_BITS == 32;
1790 
1791     case INDEX_op_mov_i64:
1792     case INDEX_op_setcond_i64:
1793     case INDEX_op_brcond_i64:
1794     case INDEX_op_ld8u_i64:
1795     case INDEX_op_ld8s_i64:
1796     case INDEX_op_ld16u_i64:
1797     case INDEX_op_ld16s_i64:
1798     case INDEX_op_ld32u_i64:
1799     case INDEX_op_ld32s_i64:
1800     case INDEX_op_ld_i64:
1801     case INDEX_op_st8_i64:
1802     case INDEX_op_st16_i64:
1803     case INDEX_op_st32_i64:
1804     case INDEX_op_st_i64:
1805     case INDEX_op_add_i64:
1806     case INDEX_op_sub_i64:
1807     case INDEX_op_mul_i64:
1808     case INDEX_op_and_i64:
1809     case INDEX_op_or_i64:
1810     case INDEX_op_xor_i64:
1811     case INDEX_op_shl_i64:
1812     case INDEX_op_shr_i64:
1813     case INDEX_op_sar_i64:
1814     case INDEX_op_ext_i32_i64:
1815     case INDEX_op_extu_i32_i64:
1816         return TCG_TARGET_REG_BITS == 64;
1817 
1818     case INDEX_op_movcond_i64:
1819         return TCG_TARGET_HAS_movcond_i64;
1820     case INDEX_op_div_i64:
1821     case INDEX_op_divu_i64:
1822         return TCG_TARGET_HAS_div_i64;
1823     case INDEX_op_rem_i64:
1824     case INDEX_op_remu_i64:
1825         return TCG_TARGET_HAS_rem_i64;
1826     case INDEX_op_div2_i64:
1827     case INDEX_op_divu2_i64:
1828         return TCG_TARGET_HAS_div2_i64;
1829     case INDEX_op_rotl_i64:
1830     case INDEX_op_rotr_i64:
1831         return TCG_TARGET_HAS_rot_i64;
1832     case INDEX_op_deposit_i64:
1833         return TCG_TARGET_HAS_deposit_i64;
1834     case INDEX_op_extract_i64:
1835         return TCG_TARGET_HAS_extract_i64;
1836     case INDEX_op_sextract_i64:
1837         return TCG_TARGET_HAS_sextract_i64;
1838     case INDEX_op_extract2_i64:
1839         return TCG_TARGET_HAS_extract2_i64;
1840     case INDEX_op_extrl_i64_i32:
1841         return TCG_TARGET_HAS_extrl_i64_i32;
1842     case INDEX_op_extrh_i64_i32:
1843         return TCG_TARGET_HAS_extrh_i64_i32;
1844     case INDEX_op_ext8s_i64:
1845         return TCG_TARGET_HAS_ext8s_i64;
1846     case INDEX_op_ext16s_i64:
1847         return TCG_TARGET_HAS_ext16s_i64;
1848     case INDEX_op_ext32s_i64:
1849         return TCG_TARGET_HAS_ext32s_i64;
1850     case INDEX_op_ext8u_i64:
1851         return TCG_TARGET_HAS_ext8u_i64;
1852     case INDEX_op_ext16u_i64:
1853         return TCG_TARGET_HAS_ext16u_i64;
1854     case INDEX_op_ext32u_i64:
1855         return TCG_TARGET_HAS_ext32u_i64;
1856     case INDEX_op_bswap16_i64:
1857         return TCG_TARGET_HAS_bswap16_i64;
1858     case INDEX_op_bswap32_i64:
1859         return TCG_TARGET_HAS_bswap32_i64;
1860     case INDEX_op_bswap64_i64:
1861         return TCG_TARGET_HAS_bswap64_i64;
1862     case INDEX_op_not_i64:
1863         return TCG_TARGET_HAS_not_i64;
1864     case INDEX_op_neg_i64:
1865         return TCG_TARGET_HAS_neg_i64;
1866     case INDEX_op_andc_i64:
1867         return TCG_TARGET_HAS_andc_i64;
1868     case INDEX_op_orc_i64:
1869         return TCG_TARGET_HAS_orc_i64;
1870     case INDEX_op_eqv_i64:
1871         return TCG_TARGET_HAS_eqv_i64;
1872     case INDEX_op_nand_i64:
1873         return TCG_TARGET_HAS_nand_i64;
1874     case INDEX_op_nor_i64:
1875         return TCG_TARGET_HAS_nor_i64;
1876     case INDEX_op_clz_i64:
1877         return TCG_TARGET_HAS_clz_i64;
1878     case INDEX_op_ctz_i64:
1879         return TCG_TARGET_HAS_ctz_i64;
1880     case INDEX_op_ctpop_i64:
1881         return TCG_TARGET_HAS_ctpop_i64;
1882     case INDEX_op_add2_i64:
1883         return TCG_TARGET_HAS_add2_i64;
1884     case INDEX_op_sub2_i64:
1885         return TCG_TARGET_HAS_sub2_i64;
1886     case INDEX_op_mulu2_i64:
1887         return TCG_TARGET_HAS_mulu2_i64;
1888     case INDEX_op_muls2_i64:
1889         return TCG_TARGET_HAS_muls2_i64;
1890     case INDEX_op_muluh_i64:
1891         return TCG_TARGET_HAS_muluh_i64;
1892     case INDEX_op_mulsh_i64:
1893         return TCG_TARGET_HAS_mulsh_i64;
1894 
1895     case INDEX_op_mov_vec:
1896     case INDEX_op_dup_vec:
1897     case INDEX_op_dupm_vec:
1898     case INDEX_op_ld_vec:
1899     case INDEX_op_st_vec:
1900     case INDEX_op_add_vec:
1901     case INDEX_op_sub_vec:
1902     case INDEX_op_and_vec:
1903     case INDEX_op_or_vec:
1904     case INDEX_op_xor_vec:
1905     case INDEX_op_cmp_vec:
1906         return have_vec;
1907     case INDEX_op_dup2_vec:
1908         return have_vec && TCG_TARGET_REG_BITS == 32;
1909     case INDEX_op_not_vec:
1910         return have_vec && TCG_TARGET_HAS_not_vec;
1911     case INDEX_op_neg_vec:
1912         return have_vec && TCG_TARGET_HAS_neg_vec;
1913     case INDEX_op_abs_vec:
1914         return have_vec && TCG_TARGET_HAS_abs_vec;
1915     case INDEX_op_andc_vec:
1916         return have_vec && TCG_TARGET_HAS_andc_vec;
1917     case INDEX_op_orc_vec:
1918         return have_vec && TCG_TARGET_HAS_orc_vec;
1919     case INDEX_op_mul_vec:
1920         return have_vec && TCG_TARGET_HAS_mul_vec;
1921     case INDEX_op_shli_vec:
1922     case INDEX_op_shri_vec:
1923     case INDEX_op_sari_vec:
1924         return have_vec && TCG_TARGET_HAS_shi_vec;
1925     case INDEX_op_shls_vec:
1926     case INDEX_op_shrs_vec:
1927     case INDEX_op_sars_vec:
1928         return have_vec && TCG_TARGET_HAS_shs_vec;
1929     case INDEX_op_shlv_vec:
1930     case INDEX_op_shrv_vec:
1931     case INDEX_op_sarv_vec:
1932         return have_vec && TCG_TARGET_HAS_shv_vec;
1933     case INDEX_op_rotli_vec:
1934         return have_vec && TCG_TARGET_HAS_roti_vec;
1935     case INDEX_op_rotls_vec:
1936         return have_vec && TCG_TARGET_HAS_rots_vec;
1937     case INDEX_op_rotlv_vec:
1938     case INDEX_op_rotrv_vec:
1939         return have_vec && TCG_TARGET_HAS_rotv_vec;
1940     case INDEX_op_ssadd_vec:
1941     case INDEX_op_usadd_vec:
1942     case INDEX_op_sssub_vec:
1943     case INDEX_op_ussub_vec:
1944         return have_vec && TCG_TARGET_HAS_sat_vec;
1945     case INDEX_op_smin_vec:
1946     case INDEX_op_umin_vec:
1947     case INDEX_op_smax_vec:
1948     case INDEX_op_umax_vec:
1949         return have_vec && TCG_TARGET_HAS_minmax_vec;
1950     case INDEX_op_bitsel_vec:
1951         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1952     case INDEX_op_cmpsel_vec:
1953         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1954 
1955     default:
1956         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1957         return true;
1958     }
1959 }
1960 
1961 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1962    and endian swap. Maybe it would be better to do the alignment
1963    and endian swap in tcg_reg_alloc_call(). */
1964 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1965 {
1966     int i, real_args, nb_rets, pi;
1967     unsigned sizemask, flags;
1968     TCGHelperInfo *info;
1969     TCGOp *op;
1970 
1971     info = g_hash_table_lookup(helper_table, (gpointer)func);
1972     flags = info->flags;
1973     sizemask = info->sizemask;
1974 
1975 #ifdef CONFIG_PLUGIN
1976     /* detect non-plugin helpers */
1977     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1978         tcg_ctx->plugin_insn->calls_helpers = true;
1979     }
1980 #endif
1981 
1982 #if defined(__sparc__) && !defined(__arch64__) \
1983     && !defined(CONFIG_TCG_INTERPRETER)
1984     /* We have 64-bit values in one register, but need to pass as two
1985        separate parameters.  Split them.  */
1986     int orig_sizemask = sizemask;
1987     int orig_nargs = nargs;
1988     TCGv_i64 retl, reth;
1989     TCGTemp *split_args[MAX_OPC_PARAM];
1990 
1991     retl = NULL;
1992     reth = NULL;
1993     if (sizemask != 0) {
1994         for (i = real_args = 0; i < nargs; ++i) {
1995             int is_64bit = sizemask & (1 << (i+1)*2);
1996             if (is_64bit) {
1997                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1998                 TCGv_i32 h = tcg_temp_new_i32();
1999                 TCGv_i32 l = tcg_temp_new_i32();
2000                 tcg_gen_extr_i64_i32(l, h, orig);
2001                 split_args[real_args++] = tcgv_i32_temp(h);
2002                 split_args[real_args++] = tcgv_i32_temp(l);
2003             } else {
2004                 split_args[real_args++] = args[i];
2005             }
2006         }
2007         nargs = real_args;
2008         args = split_args;
2009         sizemask = 0;
2010     }
2011 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2012     for (i = 0; i < nargs; ++i) {
2013         int is_64bit = sizemask & (1 << (i+1)*2);
2014         int is_signed = sizemask & (2 << (i+1)*2);
2015         if (!is_64bit) {
2016             TCGv_i64 temp = tcg_temp_new_i64();
2017             TCGv_i64 orig = temp_tcgv_i64(args[i]);
2018             if (is_signed) {
2019                 tcg_gen_ext32s_i64(temp, orig);
2020             } else {
2021                 tcg_gen_ext32u_i64(temp, orig);
2022             }
2023             args[i] = tcgv_i64_temp(temp);
2024         }
2025     }
2026 #endif /* TCG_TARGET_EXTEND_ARGS */
2027 
2028     op = tcg_emit_op(INDEX_op_call);
2029 
2030     pi = 0;
2031     if (ret != NULL) {
2032 #if defined(__sparc__) && !defined(__arch64__) \
2033     && !defined(CONFIG_TCG_INTERPRETER)
2034         if (orig_sizemask & 1) {
2035             /* The 32-bit ABI is going to return the 64-bit value in
2036                the %o0/%o1 register pair.  Prepare for this by using
2037                two return temporaries, and reassemble below.  */
2038             retl = tcg_temp_new_i64();
2039             reth = tcg_temp_new_i64();
2040             op->args[pi++] = tcgv_i64_arg(reth);
2041             op->args[pi++] = tcgv_i64_arg(retl);
2042             nb_rets = 2;
2043         } else {
2044             op->args[pi++] = temp_arg(ret);
2045             nb_rets = 1;
2046         }
2047 #else
2048         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
2049 #ifdef HOST_WORDS_BIGENDIAN
2050             op->args[pi++] = temp_arg(ret + 1);
2051             op->args[pi++] = temp_arg(ret);
2052 #else
2053             op->args[pi++] = temp_arg(ret);
2054             op->args[pi++] = temp_arg(ret + 1);
2055 #endif
2056             nb_rets = 2;
2057         } else {
2058             op->args[pi++] = temp_arg(ret);
2059             nb_rets = 1;
2060         }
2061 #endif
2062     } else {
2063         nb_rets = 0;
2064     }
2065     TCGOP_CALLO(op) = nb_rets;
2066 
2067     real_args = 0;
2068     for (i = 0; i < nargs; i++) {
2069         int is_64bit = sizemask & (1 << (i+1)*2);
2070         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
2071 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
2072             /* some targets want aligned 64 bit args */
2073             if (real_args & 1) {
2074                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
2075                 real_args++;
2076             }
2077 #endif
2078            /* If stack grows up, then we will be placing successive
2079               arguments at lower addresses, which means we need to
2080               reverse the order compared to how we would normally
2081               treat either big or little-endian.  For those arguments
2082               that will wind up in registers, this still works for
2083               HPPA (the only current STACK_GROWSUP target) since the
2084               argument registers are *also* allocated in decreasing
2085               order.  If another such target is added, this logic may
2086               have to get more complicated to differentiate between
2087               stack arguments and register arguments.  */
2088 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2089             op->args[pi++] = temp_arg(args[i] + 1);
2090             op->args[pi++] = temp_arg(args[i]);
2091 #else
2092             op->args[pi++] = temp_arg(args[i]);
2093             op->args[pi++] = temp_arg(args[i] + 1);
2094 #endif
2095             real_args += 2;
2096             continue;
2097         }
2098 
2099         op->args[pi++] = temp_arg(args[i]);
2100         real_args++;
2101     }
2102     op->args[pi++] = (uintptr_t)func;
2103     op->args[pi++] = flags;
2104     TCGOP_CALLI(op) = real_args;
2105 
2106     /* Make sure the fields didn't overflow.  */
2107     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
2108     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
2109 
2110 #if defined(__sparc__) && !defined(__arch64__) \
2111     && !defined(CONFIG_TCG_INTERPRETER)
2112     /* Free all of the parts we allocated above.  */
2113     for (i = real_args = 0; i < orig_nargs; ++i) {
2114         int is_64bit = orig_sizemask & (1 << (i+1)*2);
2115         if (is_64bit) {
2116             tcg_temp_free_internal(args[real_args++]);
2117             tcg_temp_free_internal(args[real_args++]);
2118         } else {
2119             real_args++;
2120         }
2121     }
2122     if (orig_sizemask & 1) {
2123         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
2124            Note that describing these as TCGv_i64 eliminates an unnecessary
2125            zero-extension that tcg_gen_concat_i32_i64 would create.  */
2126         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2127         tcg_temp_free_i64(retl);
2128         tcg_temp_free_i64(reth);
2129     }
2130 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2131     for (i = 0; i < nargs; ++i) {
2132         int is_64bit = sizemask & (1 << (i+1)*2);
2133         if (!is_64bit) {
2134             tcg_temp_free_internal(args[i]);
2135         }
2136     }
2137 #endif /* TCG_TARGET_EXTEND_ARGS */
2138 }
2139 
2140 static void tcg_reg_alloc_start(TCGContext *s)
2141 {
2142     int i, n;
2143 
2144     for (i = 0, n = s->nb_temps; i < n; i++) {
2145         TCGTemp *ts = &s->temps[i];
2146         TCGTempVal val = TEMP_VAL_MEM;
2147 
2148         switch (ts->kind) {
2149         case TEMP_CONST:
2150             val = TEMP_VAL_CONST;
2151             break;
2152         case TEMP_FIXED:
2153             val = TEMP_VAL_REG;
2154             break;
2155         case TEMP_GLOBAL:
2156             break;
2157         case TEMP_NORMAL:
2158             val = TEMP_VAL_DEAD;
2159             /* fall through */
2160         case TEMP_LOCAL:
2161             ts->mem_allocated = 0;
2162             break;
2163         default:
2164             g_assert_not_reached();
2165         }
2166         ts->val_type = val;
2167     }
2168 
2169     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2170 }
2171 
2172 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2173                                  TCGTemp *ts)
2174 {
2175     int idx = temp_idx(ts);
2176 
2177     switch (ts->kind) {
2178     case TEMP_FIXED:
2179     case TEMP_GLOBAL:
2180         pstrcpy(buf, buf_size, ts->name);
2181         break;
2182     case TEMP_LOCAL:
2183         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2184         break;
2185     case TEMP_NORMAL:
2186         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2187         break;
2188     case TEMP_CONST:
2189         switch (ts->type) {
2190         case TCG_TYPE_I32:
2191             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2192             break;
2193 #if TCG_TARGET_REG_BITS > 32
2194         case TCG_TYPE_I64:
2195             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2196             break;
2197 #endif
2198         case TCG_TYPE_V64:
2199         case TCG_TYPE_V128:
2200         case TCG_TYPE_V256:
2201             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2202                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2203             break;
2204         default:
2205             g_assert_not_reached();
2206         }
2207         break;
2208     }
2209     return buf;
2210 }
2211 
2212 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2213                              int buf_size, TCGArg arg)
2214 {
2215     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2216 }
2217 
2218 /* Find helper name.  */
2219 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2220 {
2221     const char *ret = NULL;
2222     if (helper_table) {
2223         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2224         if (info) {
2225             ret = info->name;
2226         }
2227     }
2228     return ret;
2229 }
2230 
2231 static const char * const cond_name[] =
2232 {
2233     [TCG_COND_NEVER] = "never",
2234     [TCG_COND_ALWAYS] = "always",
2235     [TCG_COND_EQ] = "eq",
2236     [TCG_COND_NE] = "ne",
2237     [TCG_COND_LT] = "lt",
2238     [TCG_COND_GE] = "ge",
2239     [TCG_COND_LE] = "le",
2240     [TCG_COND_GT] = "gt",
2241     [TCG_COND_LTU] = "ltu",
2242     [TCG_COND_GEU] = "geu",
2243     [TCG_COND_LEU] = "leu",
2244     [TCG_COND_GTU] = "gtu"
2245 };
2246 
2247 static const char * const ldst_name[] =
2248 {
2249     [MO_UB]   = "ub",
2250     [MO_SB]   = "sb",
2251     [MO_LEUW] = "leuw",
2252     [MO_LESW] = "lesw",
2253     [MO_LEUL] = "leul",
2254     [MO_LESL] = "lesl",
2255     [MO_LEQ]  = "leq",
2256     [MO_BEUW] = "beuw",
2257     [MO_BESW] = "besw",
2258     [MO_BEUL] = "beul",
2259     [MO_BESL] = "besl",
2260     [MO_BEQ]  = "beq",
2261 };
2262 
2263 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2264 #ifdef TARGET_ALIGNED_ONLY
2265     [MO_UNALN >> MO_ASHIFT]    = "un+",
2266     [MO_ALIGN >> MO_ASHIFT]    = "",
2267 #else
2268     [MO_UNALN >> MO_ASHIFT]    = "",
2269     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2270 #endif
2271     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2272     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2273     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2274     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2275     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2276     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2277 };
2278 
2279 static inline bool tcg_regset_single(TCGRegSet d)
2280 {
2281     return (d & (d - 1)) == 0;
2282 }
2283 
2284 static inline TCGReg tcg_regset_first(TCGRegSet d)
2285 {
2286     if (TCG_TARGET_NB_REGS <= 32) {
2287         return ctz32(d);
2288     } else {
2289         return ctz64(d);
2290     }
2291 }
2292 
2293 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2294 {
2295     char buf[128];
2296     TCGOp *op;
2297 
2298     QTAILQ_FOREACH(op, &s->ops, link) {
2299         int i, k, nb_oargs, nb_iargs, nb_cargs;
2300         const TCGOpDef *def;
2301         TCGOpcode c;
2302         int col = 0;
2303 
2304         c = op->opc;
2305         def = &tcg_op_defs[c];
2306 
2307         if (c == INDEX_op_insn_start) {
2308             nb_oargs = 0;
2309             col += qemu_log("\n ----");
2310 
2311             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2312                 target_ulong a;
2313 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2314                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2315 #else
2316                 a = op->args[i];
2317 #endif
2318                 col += qemu_log(" " TARGET_FMT_lx, a);
2319             }
2320         } else if (c == INDEX_op_call) {
2321             /* variable number of arguments */
2322             nb_oargs = TCGOP_CALLO(op);
2323             nb_iargs = TCGOP_CALLI(op);
2324             nb_cargs = def->nb_cargs;
2325 
2326             /* function name, flags, out args */
2327             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2328                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2329                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2330             for (i = 0; i < nb_oargs; i++) {
2331                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2332                                                        op->args[i]));
2333             }
2334             for (i = 0; i < nb_iargs; i++) {
2335                 TCGArg arg = op->args[nb_oargs + i];
2336                 const char *t = "<dummy>";
2337                 if (arg != TCG_CALL_DUMMY_ARG) {
2338                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2339                 }
2340                 col += qemu_log(",%s", t);
2341             }
2342         } else {
2343             col += qemu_log(" %s ", def->name);
2344 
2345             nb_oargs = def->nb_oargs;
2346             nb_iargs = def->nb_iargs;
2347             nb_cargs = def->nb_cargs;
2348 
2349             if (def->flags & TCG_OPF_VECTOR) {
2350                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2351                                 8 << TCGOP_VECE(op));
2352             }
2353 
2354             k = 0;
2355             for (i = 0; i < nb_oargs; i++) {
2356                 if (k != 0) {
2357                     col += qemu_log(",");
2358                 }
2359                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2360                                                       op->args[k++]));
2361             }
2362             for (i = 0; i < nb_iargs; i++) {
2363                 if (k != 0) {
2364                     col += qemu_log(",");
2365                 }
2366                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2367                                                       op->args[k++]));
2368             }
2369             switch (c) {
2370             case INDEX_op_brcond_i32:
2371             case INDEX_op_setcond_i32:
2372             case INDEX_op_movcond_i32:
2373             case INDEX_op_brcond2_i32:
2374             case INDEX_op_setcond2_i32:
2375             case INDEX_op_brcond_i64:
2376             case INDEX_op_setcond_i64:
2377             case INDEX_op_movcond_i64:
2378             case INDEX_op_cmp_vec:
2379             case INDEX_op_cmpsel_vec:
2380                 if (op->args[k] < ARRAY_SIZE(cond_name)
2381                     && cond_name[op->args[k]]) {
2382                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2383                 } else {
2384                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2385                 }
2386                 i = 1;
2387                 break;
2388             case INDEX_op_qemu_ld_i32:
2389             case INDEX_op_qemu_st_i32:
2390             case INDEX_op_qemu_st8_i32:
2391             case INDEX_op_qemu_ld_i64:
2392             case INDEX_op_qemu_st_i64:
2393                 {
2394                     TCGMemOpIdx oi = op->args[k++];
2395                     MemOp op = get_memop(oi);
2396                     unsigned ix = get_mmuidx(oi);
2397 
2398                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2399                         col += qemu_log(",$0x%x,%u", op, ix);
2400                     } else {
2401                         const char *s_al, *s_op;
2402                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2403                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2404                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2405                     }
2406                     i = 1;
2407                 }
2408                 break;
2409             default:
2410                 i = 0;
2411                 break;
2412             }
2413             switch (c) {
2414             case INDEX_op_set_label:
2415             case INDEX_op_br:
2416             case INDEX_op_brcond_i32:
2417             case INDEX_op_brcond_i64:
2418             case INDEX_op_brcond2_i32:
2419                 col += qemu_log("%s$L%d", k ? "," : "",
2420                                 arg_label(op->args[k])->id);
2421                 i++, k++;
2422                 break;
2423             default:
2424                 break;
2425             }
2426             for (; i < nb_cargs; i++, k++) {
2427                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2428             }
2429         }
2430 
2431         if (have_prefs || op->life) {
2432 
2433             QemuLogFile *logfile;
2434 
2435             rcu_read_lock();
2436             logfile = qatomic_rcu_read(&qemu_logfile);
2437             if (logfile) {
2438                 for (; col < 40; ++col) {
2439                     putc(' ', logfile->fd);
2440                 }
2441             }
2442             rcu_read_unlock();
2443         }
2444 
2445         if (op->life) {
2446             unsigned life = op->life;
2447 
2448             if (life & (SYNC_ARG * 3)) {
2449                 qemu_log("  sync:");
2450                 for (i = 0; i < 2; ++i) {
2451                     if (life & (SYNC_ARG << i)) {
2452                         qemu_log(" %d", i);
2453                     }
2454                 }
2455             }
2456             life /= DEAD_ARG;
2457             if (life) {
2458                 qemu_log("  dead:");
2459                 for (i = 0; life; ++i, life >>= 1) {
2460                     if (life & 1) {
2461                         qemu_log(" %d", i);
2462                     }
2463                 }
2464             }
2465         }
2466 
2467         if (have_prefs) {
2468             for (i = 0; i < nb_oargs; ++i) {
2469                 TCGRegSet set = op->output_pref[i];
2470 
2471                 if (i == 0) {
2472                     qemu_log("  pref=");
2473                 } else {
2474                     qemu_log(",");
2475                 }
2476                 if (set == 0) {
2477                     qemu_log("none");
2478                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2479                     qemu_log("all");
2480 #ifdef CONFIG_DEBUG_TCG
2481                 } else if (tcg_regset_single(set)) {
2482                     TCGReg reg = tcg_regset_first(set);
2483                     qemu_log("%s", tcg_target_reg_names[reg]);
2484 #endif
2485                 } else if (TCG_TARGET_NB_REGS <= 32) {
2486                     qemu_log("%#x", (uint32_t)set);
2487                 } else {
2488                     qemu_log("%#" PRIx64, (uint64_t)set);
2489                 }
2490             }
2491         }
2492 
2493         qemu_log("\n");
2494     }
2495 }
2496 
2497 /* we give more priority to constraints with less registers */
2498 static int get_constraint_priority(const TCGOpDef *def, int k)
2499 {
2500     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2501     int n;
2502 
2503     if (arg_ct->oalias) {
2504         /* an alias is equivalent to a single register */
2505         n = 1;
2506     } else {
2507         n = ctpop64(arg_ct->regs);
2508     }
2509     return TCG_TARGET_NB_REGS - n + 1;
2510 }
2511 
2512 /* sort from highest priority to lowest */
2513 static void sort_constraints(TCGOpDef *def, int start, int n)
2514 {
2515     int i, j;
2516     TCGArgConstraint *a = def->args_ct;
2517 
2518     for (i = 0; i < n; i++) {
2519         a[start + i].sort_index = start + i;
2520     }
2521     if (n <= 1) {
2522         return;
2523     }
2524     for (i = 0; i < n - 1; i++) {
2525         for (j = i + 1; j < n; j++) {
2526             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2527             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2528             if (p1 < p2) {
2529                 int tmp = a[start + i].sort_index;
2530                 a[start + i].sort_index = a[start + j].sort_index;
2531                 a[start + j].sort_index = tmp;
2532             }
2533         }
2534     }
2535 }
2536 
2537 static void process_op_defs(TCGContext *s)
2538 {
2539     TCGOpcode op;
2540 
2541     for (op = 0; op < NB_OPS; op++) {
2542         TCGOpDef *def = &tcg_op_defs[op];
2543         const TCGTargetOpDef *tdefs;
2544         int i, nb_args;
2545 
2546         if (def->flags & TCG_OPF_NOT_PRESENT) {
2547             continue;
2548         }
2549 
2550         nb_args = def->nb_iargs + def->nb_oargs;
2551         if (nb_args == 0) {
2552             continue;
2553         }
2554 
2555         /*
2556          * Macro magic should make it impossible, but double-check that
2557          * the array index is in range.  Since the signness of an enum
2558          * is implementation defined, force the result to unsigned.
2559          */
2560         unsigned con_set = tcg_target_op_def(op);
2561         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2562         tdefs = &constraint_sets[con_set];
2563 
2564         for (i = 0; i < nb_args; i++) {
2565             const char *ct_str = tdefs->args_ct_str[i];
2566             /* Incomplete TCGTargetOpDef entry. */
2567             tcg_debug_assert(ct_str != NULL);
2568 
2569             while (*ct_str != '\0') {
2570                 switch(*ct_str) {
2571                 case '0' ... '9':
2572                     {
2573                         int oarg = *ct_str - '0';
2574                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2575                         tcg_debug_assert(oarg < def->nb_oargs);
2576                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2577                         def->args_ct[i] = def->args_ct[oarg];
2578                         /* The output sets oalias.  */
2579                         def->args_ct[oarg].oalias = true;
2580                         def->args_ct[oarg].alias_index = i;
2581                         /* The input sets ialias. */
2582                         def->args_ct[i].ialias = true;
2583                         def->args_ct[i].alias_index = oarg;
2584                     }
2585                     ct_str++;
2586                     break;
2587                 case '&':
2588                     def->args_ct[i].newreg = true;
2589                     ct_str++;
2590                     break;
2591                 case 'i':
2592                     def->args_ct[i].ct |= TCG_CT_CONST;
2593                     ct_str++;
2594                     break;
2595 
2596                 /* Include all of the target-specific constraints. */
2597 
2598 #undef CONST
2599 #define CONST(CASE, MASK) \
2600     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2601 #define REGS(CASE, MASK) \
2602     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2603 
2604 #include "tcg-target-con-str.h"
2605 
2606 #undef REGS
2607 #undef CONST
2608                 default:
2609                     /* Typo in TCGTargetOpDef constraint. */
2610                     g_assert_not_reached();
2611                 }
2612             }
2613         }
2614 
2615         /* TCGTargetOpDef entry with too much information? */
2616         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2617 
2618         /* sort the constraints (XXX: this is just an heuristic) */
2619         sort_constraints(def, 0, def->nb_oargs);
2620         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2621     }
2622 }
2623 
2624 void tcg_op_remove(TCGContext *s, TCGOp *op)
2625 {
2626     TCGLabel *label;
2627 
2628     switch (op->opc) {
2629     case INDEX_op_br:
2630         label = arg_label(op->args[0]);
2631         label->refs--;
2632         break;
2633     case INDEX_op_brcond_i32:
2634     case INDEX_op_brcond_i64:
2635         label = arg_label(op->args[3]);
2636         label->refs--;
2637         break;
2638     case INDEX_op_brcond2_i32:
2639         label = arg_label(op->args[5]);
2640         label->refs--;
2641         break;
2642     default:
2643         break;
2644     }
2645 
2646     QTAILQ_REMOVE(&s->ops, op, link);
2647     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2648     s->nb_ops--;
2649 
2650 #ifdef CONFIG_PROFILER
2651     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2652 #endif
2653 }
2654 
2655 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2656 {
2657     TCGContext *s = tcg_ctx;
2658     TCGOp *op;
2659 
2660     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2661         op = tcg_malloc(sizeof(TCGOp));
2662     } else {
2663         op = QTAILQ_FIRST(&s->free_ops);
2664         QTAILQ_REMOVE(&s->free_ops, op, link);
2665     }
2666     memset(op, 0, offsetof(TCGOp, link));
2667     op->opc = opc;
2668     s->nb_ops++;
2669 
2670     return op;
2671 }
2672 
2673 TCGOp *tcg_emit_op(TCGOpcode opc)
2674 {
2675     TCGOp *op = tcg_op_alloc(opc);
2676     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2677     return op;
2678 }
2679 
2680 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2681 {
2682     TCGOp *new_op = tcg_op_alloc(opc);
2683     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2684     return new_op;
2685 }
2686 
2687 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2688 {
2689     TCGOp *new_op = tcg_op_alloc(opc);
2690     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2691     return new_op;
2692 }
2693 
2694 /* Reachable analysis : remove unreachable code.  */
2695 static void reachable_code_pass(TCGContext *s)
2696 {
2697     TCGOp *op, *op_next;
2698     bool dead = false;
2699 
2700     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2701         bool remove = dead;
2702         TCGLabel *label;
2703         int call_flags;
2704 
2705         switch (op->opc) {
2706         case INDEX_op_set_label:
2707             label = arg_label(op->args[0]);
2708             if (label->refs == 0) {
2709                 /*
2710                  * While there is an occasional backward branch, virtually
2711                  * all branches generated by the translators are forward.
2712                  * Which means that generally we will have already removed
2713                  * all references to the label that will be, and there is
2714                  * little to be gained by iterating.
2715                  */
2716                 remove = true;
2717             } else {
2718                 /* Once we see a label, insns become live again.  */
2719                 dead = false;
2720                 remove = false;
2721 
2722                 /*
2723                  * Optimization can fold conditional branches to unconditional.
2724                  * If we find a label with one reference which is preceded by
2725                  * an unconditional branch to it, remove both.  This needed to
2726                  * wait until the dead code in between them was removed.
2727                  */
2728                 if (label->refs == 1) {
2729                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2730                     if (op_prev->opc == INDEX_op_br &&
2731                         label == arg_label(op_prev->args[0])) {
2732                         tcg_op_remove(s, op_prev);
2733                         remove = true;
2734                     }
2735                 }
2736             }
2737             break;
2738 
2739         case INDEX_op_br:
2740         case INDEX_op_exit_tb:
2741         case INDEX_op_goto_ptr:
2742             /* Unconditional branches; everything following is dead.  */
2743             dead = true;
2744             break;
2745 
2746         case INDEX_op_call:
2747             /* Notice noreturn helper calls, raising exceptions.  */
2748             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2749             if (call_flags & TCG_CALL_NO_RETURN) {
2750                 dead = true;
2751             }
2752             break;
2753 
2754         case INDEX_op_insn_start:
2755             /* Never remove -- we need to keep these for unwind.  */
2756             remove = false;
2757             break;
2758 
2759         default:
2760             break;
2761         }
2762 
2763         if (remove) {
2764             tcg_op_remove(s, op);
2765         }
2766     }
2767 }
2768 
2769 #define TS_DEAD  1
2770 #define TS_MEM   2
2771 
2772 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2773 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2774 
2775 /* For liveness_pass_1, the register preferences for a given temp.  */
2776 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2777 {
2778     return ts->state_ptr;
2779 }
2780 
2781 /* For liveness_pass_1, reset the preferences for a given temp to the
2782  * maximal regset for its type.
2783  */
2784 static inline void la_reset_pref(TCGTemp *ts)
2785 {
2786     *la_temp_pref(ts)
2787         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2788 }
2789 
2790 /* liveness analysis: end of function: all temps are dead, and globals
2791    should be in memory. */
2792 static void la_func_end(TCGContext *s, int ng, int nt)
2793 {
2794     int i;
2795 
2796     for (i = 0; i < ng; ++i) {
2797         s->temps[i].state = TS_DEAD | TS_MEM;
2798         la_reset_pref(&s->temps[i]);
2799     }
2800     for (i = ng; i < nt; ++i) {
2801         s->temps[i].state = TS_DEAD;
2802         la_reset_pref(&s->temps[i]);
2803     }
2804 }
2805 
2806 /* liveness analysis: end of basic block: all temps are dead, globals
2807    and local temps should be in memory. */
2808 static void la_bb_end(TCGContext *s, int ng, int nt)
2809 {
2810     int i;
2811 
2812     for (i = 0; i < nt; ++i) {
2813         TCGTemp *ts = &s->temps[i];
2814         int state;
2815 
2816         switch (ts->kind) {
2817         case TEMP_FIXED:
2818         case TEMP_GLOBAL:
2819         case TEMP_LOCAL:
2820             state = TS_DEAD | TS_MEM;
2821             break;
2822         case TEMP_NORMAL:
2823         case TEMP_CONST:
2824             state = TS_DEAD;
2825             break;
2826         default:
2827             g_assert_not_reached();
2828         }
2829         ts->state = state;
2830         la_reset_pref(ts);
2831     }
2832 }
2833 
2834 /* liveness analysis: sync globals back to memory.  */
2835 static void la_global_sync(TCGContext *s, int ng)
2836 {
2837     int i;
2838 
2839     for (i = 0; i < ng; ++i) {
2840         int state = s->temps[i].state;
2841         s->temps[i].state = state | TS_MEM;
2842         if (state == TS_DEAD) {
2843             /* If the global was previously dead, reset prefs.  */
2844             la_reset_pref(&s->temps[i]);
2845         }
2846     }
2847 }
2848 
2849 /*
2850  * liveness analysis: conditional branch: all temps are dead,
2851  * globals and local temps should be synced.
2852  */
2853 static void la_bb_sync(TCGContext *s, int ng, int nt)
2854 {
2855     la_global_sync(s, ng);
2856 
2857     for (int i = ng; i < nt; ++i) {
2858         TCGTemp *ts = &s->temps[i];
2859         int state;
2860 
2861         switch (ts->kind) {
2862         case TEMP_LOCAL:
2863             state = ts->state;
2864             ts->state = state | TS_MEM;
2865             if (state != TS_DEAD) {
2866                 continue;
2867             }
2868             break;
2869         case TEMP_NORMAL:
2870             s->temps[i].state = TS_DEAD;
2871             break;
2872         case TEMP_CONST:
2873             continue;
2874         default:
2875             g_assert_not_reached();
2876         }
2877         la_reset_pref(&s->temps[i]);
2878     }
2879 }
2880 
2881 /* liveness analysis: sync globals back to memory and kill.  */
2882 static void la_global_kill(TCGContext *s, int ng)
2883 {
2884     int i;
2885 
2886     for (i = 0; i < ng; i++) {
2887         s->temps[i].state = TS_DEAD | TS_MEM;
2888         la_reset_pref(&s->temps[i]);
2889     }
2890 }
2891 
2892 /* liveness analysis: note live globals crossing calls.  */
2893 static void la_cross_call(TCGContext *s, int nt)
2894 {
2895     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2896     int i;
2897 
2898     for (i = 0; i < nt; i++) {
2899         TCGTemp *ts = &s->temps[i];
2900         if (!(ts->state & TS_DEAD)) {
2901             TCGRegSet *pset = la_temp_pref(ts);
2902             TCGRegSet set = *pset;
2903 
2904             set &= mask;
2905             /* If the combination is not possible, restart.  */
2906             if (set == 0) {
2907                 set = tcg_target_available_regs[ts->type] & mask;
2908             }
2909             *pset = set;
2910         }
2911     }
2912 }
2913 
2914 /* Liveness analysis : update the opc_arg_life array to tell if a
2915    given input arguments is dead. Instructions updating dead
2916    temporaries are removed. */
2917 static void liveness_pass_1(TCGContext *s)
2918 {
2919     int nb_globals = s->nb_globals;
2920     int nb_temps = s->nb_temps;
2921     TCGOp *op, *op_prev;
2922     TCGRegSet *prefs;
2923     int i;
2924 
2925     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2926     for (i = 0; i < nb_temps; ++i) {
2927         s->temps[i].state_ptr = prefs + i;
2928     }
2929 
2930     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2931     la_func_end(s, nb_globals, nb_temps);
2932 
2933     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2934         int nb_iargs, nb_oargs;
2935         TCGOpcode opc_new, opc_new2;
2936         bool have_opc_new2;
2937         TCGLifeData arg_life = 0;
2938         TCGTemp *ts;
2939         TCGOpcode opc = op->opc;
2940         const TCGOpDef *def = &tcg_op_defs[opc];
2941 
2942         switch (opc) {
2943         case INDEX_op_call:
2944             {
2945                 int call_flags;
2946                 int nb_call_regs;
2947 
2948                 nb_oargs = TCGOP_CALLO(op);
2949                 nb_iargs = TCGOP_CALLI(op);
2950                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2951 
2952                 /* pure functions can be removed if their result is unused */
2953                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2954                     for (i = 0; i < nb_oargs; i++) {
2955                         ts = arg_temp(op->args[i]);
2956                         if (ts->state != TS_DEAD) {
2957                             goto do_not_remove_call;
2958                         }
2959                     }
2960                     goto do_remove;
2961                 }
2962             do_not_remove_call:
2963 
2964                 /* Output args are dead.  */
2965                 for (i = 0; i < nb_oargs; i++) {
2966                     ts = arg_temp(op->args[i]);
2967                     if (ts->state & TS_DEAD) {
2968                         arg_life |= DEAD_ARG << i;
2969                     }
2970                     if (ts->state & TS_MEM) {
2971                         arg_life |= SYNC_ARG << i;
2972                     }
2973                     ts->state = TS_DEAD;
2974                     la_reset_pref(ts);
2975 
2976                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2977                     op->output_pref[i] = 0;
2978                 }
2979 
2980                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2981                                     TCG_CALL_NO_READ_GLOBALS))) {
2982                     la_global_kill(s, nb_globals);
2983                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2984                     la_global_sync(s, nb_globals);
2985                 }
2986 
2987                 /* Record arguments that die in this helper.  */
2988                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2989                     ts = arg_temp(op->args[i]);
2990                     if (ts && ts->state & TS_DEAD) {
2991                         arg_life |= DEAD_ARG << i;
2992                     }
2993                 }
2994 
2995                 /* For all live registers, remove call-clobbered prefs.  */
2996                 la_cross_call(s, nb_temps);
2997 
2998                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2999 
3000                 /* Input arguments are live for preceding opcodes.  */
3001                 for (i = 0; i < nb_iargs; i++) {
3002                     ts = arg_temp(op->args[i + nb_oargs]);
3003                     if (ts && ts->state & TS_DEAD) {
3004                         /* For those arguments that die, and will be allocated
3005                          * in registers, clear the register set for that arg,
3006                          * to be filled in below.  For args that will be on
3007                          * the stack, reset to any available reg.
3008                          */
3009                         *la_temp_pref(ts)
3010                             = (i < nb_call_regs ? 0 :
3011                                tcg_target_available_regs[ts->type]);
3012                         ts->state &= ~TS_DEAD;
3013                     }
3014                 }
3015 
3016                 /* For each input argument, add its input register to prefs.
3017                    If a temp is used once, this produces a single set bit.  */
3018                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
3019                     ts = arg_temp(op->args[i + nb_oargs]);
3020                     if (ts) {
3021                         tcg_regset_set_reg(*la_temp_pref(ts),
3022                                            tcg_target_call_iarg_regs[i]);
3023                     }
3024                 }
3025             }
3026             break;
3027         case INDEX_op_insn_start:
3028             break;
3029         case INDEX_op_discard:
3030             /* mark the temporary as dead */
3031             ts = arg_temp(op->args[0]);
3032             ts->state = TS_DEAD;
3033             la_reset_pref(ts);
3034             break;
3035 
3036         case INDEX_op_add2_i32:
3037             opc_new = INDEX_op_add_i32;
3038             goto do_addsub2;
3039         case INDEX_op_sub2_i32:
3040             opc_new = INDEX_op_sub_i32;
3041             goto do_addsub2;
3042         case INDEX_op_add2_i64:
3043             opc_new = INDEX_op_add_i64;
3044             goto do_addsub2;
3045         case INDEX_op_sub2_i64:
3046             opc_new = INDEX_op_sub_i64;
3047         do_addsub2:
3048             nb_iargs = 4;
3049             nb_oargs = 2;
3050             /* Test if the high part of the operation is dead, but not
3051                the low part.  The result can be optimized to a simple
3052                add or sub.  This happens often for x86_64 guest when the
3053                cpu mode is set to 32 bit.  */
3054             if (arg_temp(op->args[1])->state == TS_DEAD) {
3055                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3056                     goto do_remove;
3057                 }
3058                 /* Replace the opcode and adjust the args in place,
3059                    leaving 3 unused args at the end.  */
3060                 op->opc = opc = opc_new;
3061                 op->args[1] = op->args[2];
3062                 op->args[2] = op->args[4];
3063                 /* Fall through and mark the single-word operation live.  */
3064                 nb_iargs = 2;
3065                 nb_oargs = 1;
3066             }
3067             goto do_not_remove;
3068 
3069         case INDEX_op_mulu2_i32:
3070             opc_new = INDEX_op_mul_i32;
3071             opc_new2 = INDEX_op_muluh_i32;
3072             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3073             goto do_mul2;
3074         case INDEX_op_muls2_i32:
3075             opc_new = INDEX_op_mul_i32;
3076             opc_new2 = INDEX_op_mulsh_i32;
3077             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3078             goto do_mul2;
3079         case INDEX_op_mulu2_i64:
3080             opc_new = INDEX_op_mul_i64;
3081             opc_new2 = INDEX_op_muluh_i64;
3082             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3083             goto do_mul2;
3084         case INDEX_op_muls2_i64:
3085             opc_new = INDEX_op_mul_i64;
3086             opc_new2 = INDEX_op_mulsh_i64;
3087             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3088             goto do_mul2;
3089         do_mul2:
3090             nb_iargs = 2;
3091             nb_oargs = 2;
3092             if (arg_temp(op->args[1])->state == TS_DEAD) {
3093                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3094                     /* Both parts of the operation are dead.  */
3095                     goto do_remove;
3096                 }
3097                 /* The high part of the operation is dead; generate the low. */
3098                 op->opc = opc = opc_new;
3099                 op->args[1] = op->args[2];
3100                 op->args[2] = op->args[3];
3101             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3102                 /* The low part of the operation is dead; generate the high. */
3103                 op->opc = opc = opc_new2;
3104                 op->args[0] = op->args[1];
3105                 op->args[1] = op->args[2];
3106                 op->args[2] = op->args[3];
3107             } else {
3108                 goto do_not_remove;
3109             }
3110             /* Mark the single-word operation live.  */
3111             nb_oargs = 1;
3112             goto do_not_remove;
3113 
3114         default:
3115             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3116             nb_iargs = def->nb_iargs;
3117             nb_oargs = def->nb_oargs;
3118 
3119             /* Test if the operation can be removed because all
3120                its outputs are dead. We assume that nb_oargs == 0
3121                implies side effects */
3122             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3123                 for (i = 0; i < nb_oargs; i++) {
3124                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3125                         goto do_not_remove;
3126                     }
3127                 }
3128                 goto do_remove;
3129             }
3130             goto do_not_remove;
3131 
3132         do_remove:
3133             tcg_op_remove(s, op);
3134             break;
3135 
3136         do_not_remove:
3137             for (i = 0; i < nb_oargs; i++) {
3138                 ts = arg_temp(op->args[i]);
3139 
3140                 /* Remember the preference of the uses that followed.  */
3141                 op->output_pref[i] = *la_temp_pref(ts);
3142 
3143                 /* Output args are dead.  */
3144                 if (ts->state & TS_DEAD) {
3145                     arg_life |= DEAD_ARG << i;
3146                 }
3147                 if (ts->state & TS_MEM) {
3148                     arg_life |= SYNC_ARG << i;
3149                 }
3150                 ts->state = TS_DEAD;
3151                 la_reset_pref(ts);
3152             }
3153 
3154             /* If end of basic block, update.  */
3155             if (def->flags & TCG_OPF_BB_EXIT) {
3156                 la_func_end(s, nb_globals, nb_temps);
3157             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3158                 la_bb_sync(s, nb_globals, nb_temps);
3159             } else if (def->flags & TCG_OPF_BB_END) {
3160                 la_bb_end(s, nb_globals, nb_temps);
3161             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3162                 la_global_sync(s, nb_globals);
3163                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3164                     la_cross_call(s, nb_temps);
3165                 }
3166             }
3167 
3168             /* Record arguments that die in this opcode.  */
3169             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3170                 ts = arg_temp(op->args[i]);
3171                 if (ts->state & TS_DEAD) {
3172                     arg_life |= DEAD_ARG << i;
3173                 }
3174             }
3175 
3176             /* Input arguments are live for preceding opcodes.  */
3177             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3178                 ts = arg_temp(op->args[i]);
3179                 if (ts->state & TS_DEAD) {
3180                     /* For operands that were dead, initially allow
3181                        all regs for the type.  */
3182                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3183                     ts->state &= ~TS_DEAD;
3184                 }
3185             }
3186 
3187             /* Incorporate constraints for this operand.  */
3188             switch (opc) {
3189             case INDEX_op_mov_i32:
3190             case INDEX_op_mov_i64:
3191                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3192                    have proper constraints.  That said, special case
3193                    moves to propagate preferences backward.  */
3194                 if (IS_DEAD_ARG(1)) {
3195                     *la_temp_pref(arg_temp(op->args[0]))
3196                         = *la_temp_pref(arg_temp(op->args[1]));
3197                 }
3198                 break;
3199 
3200             default:
3201                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3202                     const TCGArgConstraint *ct = &def->args_ct[i];
3203                     TCGRegSet set, *pset;
3204 
3205                     ts = arg_temp(op->args[i]);
3206                     pset = la_temp_pref(ts);
3207                     set = *pset;
3208 
3209                     set &= ct->regs;
3210                     if (ct->ialias) {
3211                         set &= op->output_pref[ct->alias_index];
3212                     }
3213                     /* If the combination is not possible, restart.  */
3214                     if (set == 0) {
3215                         set = ct->regs;
3216                     }
3217                     *pset = set;
3218                 }
3219                 break;
3220             }
3221             break;
3222         }
3223         op->life = arg_life;
3224     }
3225 }
3226 
3227 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3228 static bool liveness_pass_2(TCGContext *s)
3229 {
3230     int nb_globals = s->nb_globals;
3231     int nb_temps, i;
3232     bool changes = false;
3233     TCGOp *op, *op_next;
3234 
3235     /* Create a temporary for each indirect global.  */
3236     for (i = 0; i < nb_globals; ++i) {
3237         TCGTemp *its = &s->temps[i];
3238         if (its->indirect_reg) {
3239             TCGTemp *dts = tcg_temp_alloc(s);
3240             dts->type = its->type;
3241             dts->base_type = its->base_type;
3242             its->state_ptr = dts;
3243         } else {
3244             its->state_ptr = NULL;
3245         }
3246         /* All globals begin dead.  */
3247         its->state = TS_DEAD;
3248     }
3249     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3250         TCGTemp *its = &s->temps[i];
3251         its->state_ptr = NULL;
3252         its->state = TS_DEAD;
3253     }
3254 
3255     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3256         TCGOpcode opc = op->opc;
3257         const TCGOpDef *def = &tcg_op_defs[opc];
3258         TCGLifeData arg_life = op->life;
3259         int nb_iargs, nb_oargs, call_flags;
3260         TCGTemp *arg_ts, *dir_ts;
3261 
3262         if (opc == INDEX_op_call) {
3263             nb_oargs = TCGOP_CALLO(op);
3264             nb_iargs = TCGOP_CALLI(op);
3265             call_flags = op->args[nb_oargs + nb_iargs + 1];
3266         } else {
3267             nb_iargs = def->nb_iargs;
3268             nb_oargs = def->nb_oargs;
3269 
3270             /* Set flags similar to how calls require.  */
3271             if (def->flags & TCG_OPF_COND_BRANCH) {
3272                 /* Like reading globals: sync_globals */
3273                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3274             } else if (def->flags & TCG_OPF_BB_END) {
3275                 /* Like writing globals: save_globals */
3276                 call_flags = 0;
3277             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3278                 /* Like reading globals: sync_globals */
3279                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3280             } else {
3281                 /* No effect on globals.  */
3282                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3283                               TCG_CALL_NO_WRITE_GLOBALS);
3284             }
3285         }
3286 
3287         /* Make sure that input arguments are available.  */
3288         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3289             arg_ts = arg_temp(op->args[i]);
3290             if (arg_ts) {
3291                 dir_ts = arg_ts->state_ptr;
3292                 if (dir_ts && arg_ts->state == TS_DEAD) {
3293                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3294                                       ? INDEX_op_ld_i32
3295                                       : INDEX_op_ld_i64);
3296                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3297 
3298                     lop->args[0] = temp_arg(dir_ts);
3299                     lop->args[1] = temp_arg(arg_ts->mem_base);
3300                     lop->args[2] = arg_ts->mem_offset;
3301 
3302                     /* Loaded, but synced with memory.  */
3303                     arg_ts->state = TS_MEM;
3304                 }
3305             }
3306         }
3307 
3308         /* Perform input replacement, and mark inputs that became dead.
3309            No action is required except keeping temp_state up to date
3310            so that we reload when needed.  */
3311         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3312             arg_ts = arg_temp(op->args[i]);
3313             if (arg_ts) {
3314                 dir_ts = arg_ts->state_ptr;
3315                 if (dir_ts) {
3316                     op->args[i] = temp_arg(dir_ts);
3317                     changes = true;
3318                     if (IS_DEAD_ARG(i)) {
3319                         arg_ts->state = TS_DEAD;
3320                     }
3321                 }
3322             }
3323         }
3324 
3325         /* Liveness analysis should ensure that the following are
3326            all correct, for call sites and basic block end points.  */
3327         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3328             /* Nothing to do */
3329         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3330             for (i = 0; i < nb_globals; ++i) {
3331                 /* Liveness should see that globals are synced back,
3332                    that is, either TS_DEAD or TS_MEM.  */
3333                 arg_ts = &s->temps[i];
3334                 tcg_debug_assert(arg_ts->state_ptr == 0
3335                                  || arg_ts->state != 0);
3336             }
3337         } else {
3338             for (i = 0; i < nb_globals; ++i) {
3339                 /* Liveness should see that globals are saved back,
3340                    that is, TS_DEAD, waiting to be reloaded.  */
3341                 arg_ts = &s->temps[i];
3342                 tcg_debug_assert(arg_ts->state_ptr == 0
3343                                  || arg_ts->state == TS_DEAD);
3344             }
3345         }
3346 
3347         /* Outputs become available.  */
3348         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3349             arg_ts = arg_temp(op->args[0]);
3350             dir_ts = arg_ts->state_ptr;
3351             if (dir_ts) {
3352                 op->args[0] = temp_arg(dir_ts);
3353                 changes = true;
3354 
3355                 /* The output is now live and modified.  */
3356                 arg_ts->state = 0;
3357 
3358                 if (NEED_SYNC_ARG(0)) {
3359                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3360                                       ? INDEX_op_st_i32
3361                                       : INDEX_op_st_i64);
3362                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3363                     TCGTemp *out_ts = dir_ts;
3364 
3365                     if (IS_DEAD_ARG(0)) {
3366                         out_ts = arg_temp(op->args[1]);
3367                         arg_ts->state = TS_DEAD;
3368                         tcg_op_remove(s, op);
3369                     } else {
3370                         arg_ts->state = TS_MEM;
3371                     }
3372 
3373                     sop->args[0] = temp_arg(out_ts);
3374                     sop->args[1] = temp_arg(arg_ts->mem_base);
3375                     sop->args[2] = arg_ts->mem_offset;
3376                 } else {
3377                     tcg_debug_assert(!IS_DEAD_ARG(0));
3378                 }
3379             }
3380         } else {
3381             for (i = 0; i < nb_oargs; i++) {
3382                 arg_ts = arg_temp(op->args[i]);
3383                 dir_ts = arg_ts->state_ptr;
3384                 if (!dir_ts) {
3385                     continue;
3386                 }
3387                 op->args[i] = temp_arg(dir_ts);
3388                 changes = true;
3389 
3390                 /* The output is now live and modified.  */
3391                 arg_ts->state = 0;
3392 
3393                 /* Sync outputs upon their last write.  */
3394                 if (NEED_SYNC_ARG(i)) {
3395                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3396                                       ? INDEX_op_st_i32
3397                                       : INDEX_op_st_i64);
3398                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3399 
3400                     sop->args[0] = temp_arg(dir_ts);
3401                     sop->args[1] = temp_arg(arg_ts->mem_base);
3402                     sop->args[2] = arg_ts->mem_offset;
3403 
3404                     arg_ts->state = TS_MEM;
3405                 }
3406                 /* Drop outputs that are dead.  */
3407                 if (IS_DEAD_ARG(i)) {
3408                     arg_ts->state = TS_DEAD;
3409                 }
3410             }
3411         }
3412     }
3413 
3414     return changes;
3415 }
3416 
3417 #ifdef CONFIG_DEBUG_TCG
3418 static void dump_regs(TCGContext *s)
3419 {
3420     TCGTemp *ts;
3421     int i;
3422     char buf[64];
3423 
3424     for(i = 0; i < s->nb_temps; i++) {
3425         ts = &s->temps[i];
3426         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3427         switch(ts->val_type) {
3428         case TEMP_VAL_REG:
3429             printf("%s", tcg_target_reg_names[ts->reg]);
3430             break;
3431         case TEMP_VAL_MEM:
3432             printf("%d(%s)", (int)ts->mem_offset,
3433                    tcg_target_reg_names[ts->mem_base->reg]);
3434             break;
3435         case TEMP_VAL_CONST:
3436             printf("$0x%" PRIx64, ts->val);
3437             break;
3438         case TEMP_VAL_DEAD:
3439             printf("D");
3440             break;
3441         default:
3442             printf("???");
3443             break;
3444         }
3445         printf("\n");
3446     }
3447 
3448     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3449         if (s->reg_to_temp[i] != NULL) {
3450             printf("%s: %s\n",
3451                    tcg_target_reg_names[i],
3452                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3453         }
3454     }
3455 }
3456 
3457 static void check_regs(TCGContext *s)
3458 {
3459     int reg;
3460     int k;
3461     TCGTemp *ts;
3462     char buf[64];
3463 
3464     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3465         ts = s->reg_to_temp[reg];
3466         if (ts != NULL) {
3467             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3468                 printf("Inconsistency for register %s:\n",
3469                        tcg_target_reg_names[reg]);
3470                 goto fail;
3471             }
3472         }
3473     }
3474     for (k = 0; k < s->nb_temps; k++) {
3475         ts = &s->temps[k];
3476         if (ts->val_type == TEMP_VAL_REG
3477             && ts->kind != TEMP_FIXED
3478             && s->reg_to_temp[ts->reg] != ts) {
3479             printf("Inconsistency for temp %s:\n",
3480                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3481         fail:
3482             printf("reg state:\n");
3483             dump_regs(s);
3484             tcg_abort();
3485         }
3486     }
3487 }
3488 #endif
3489 
3490 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3491 {
3492 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3493     /* Sparc64 stack is accessed with offset of 2047 */
3494     s->current_frame_offset = (s->current_frame_offset +
3495                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3496         ~(sizeof(tcg_target_long) - 1);
3497 #endif
3498     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3499         s->frame_end) {
3500         tcg_abort();
3501     }
3502     ts->mem_offset = s->current_frame_offset;
3503     ts->mem_base = s->frame_temp;
3504     ts->mem_allocated = 1;
3505     s->current_frame_offset += sizeof(tcg_target_long);
3506 }
3507 
3508 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3509 
3510 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3511    mark it free; otherwise mark it dead.  */
3512 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3513 {
3514     TCGTempVal new_type;
3515 
3516     switch (ts->kind) {
3517     case TEMP_FIXED:
3518         return;
3519     case TEMP_GLOBAL:
3520     case TEMP_LOCAL:
3521         new_type = TEMP_VAL_MEM;
3522         break;
3523     case TEMP_NORMAL:
3524         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3525         break;
3526     case TEMP_CONST:
3527         new_type = TEMP_VAL_CONST;
3528         break;
3529     default:
3530         g_assert_not_reached();
3531     }
3532     if (ts->val_type == TEMP_VAL_REG) {
3533         s->reg_to_temp[ts->reg] = NULL;
3534     }
3535     ts->val_type = new_type;
3536 }
3537 
3538 /* Mark a temporary as dead.  */
3539 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3540 {
3541     temp_free_or_dead(s, ts, 1);
3542 }
3543 
3544 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3545    registers needs to be allocated to store a constant.  If 'free_or_dead'
3546    is non-zero, subsequently release the temporary; if it is positive, the
3547    temp is dead; if it is negative, the temp is free.  */
3548 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3549                       TCGRegSet preferred_regs, int free_or_dead)
3550 {
3551     if (!temp_readonly(ts) && !ts->mem_coherent) {
3552         if (!ts->mem_allocated) {
3553             temp_allocate_frame(s, ts);
3554         }
3555         switch (ts->val_type) {
3556         case TEMP_VAL_CONST:
3557             /* If we're going to free the temp immediately, then we won't
3558                require it later in a register, so attempt to store the
3559                constant to memory directly.  */
3560             if (free_or_dead
3561                 && tcg_out_sti(s, ts->type, ts->val,
3562                                ts->mem_base->reg, ts->mem_offset)) {
3563                 break;
3564             }
3565             temp_load(s, ts, tcg_target_available_regs[ts->type],
3566                       allocated_regs, preferred_regs);
3567             /* fallthrough */
3568 
3569         case TEMP_VAL_REG:
3570             tcg_out_st(s, ts->type, ts->reg,
3571                        ts->mem_base->reg, ts->mem_offset);
3572             break;
3573 
3574         case TEMP_VAL_MEM:
3575             break;
3576 
3577         case TEMP_VAL_DEAD:
3578         default:
3579             tcg_abort();
3580         }
3581         ts->mem_coherent = 1;
3582     }
3583     if (free_or_dead) {
3584         temp_free_or_dead(s, ts, free_or_dead);
3585     }
3586 }
3587 
3588 /* free register 'reg' by spilling the corresponding temporary if necessary */
3589 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3590 {
3591     TCGTemp *ts = s->reg_to_temp[reg];
3592     if (ts != NULL) {
3593         temp_sync(s, ts, allocated_regs, 0, -1);
3594     }
3595 }
3596 
3597 /**
3598  * tcg_reg_alloc:
3599  * @required_regs: Set of registers in which we must allocate.
3600  * @allocated_regs: Set of registers which must be avoided.
3601  * @preferred_regs: Set of registers we should prefer.
3602  * @rev: True if we search the registers in "indirect" order.
3603  *
3604  * The allocated register must be in @required_regs & ~@allocated_regs,
3605  * but if we can put it in @preferred_regs we may save a move later.
3606  */
3607 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3608                             TCGRegSet allocated_regs,
3609                             TCGRegSet preferred_regs, bool rev)
3610 {
3611     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3612     TCGRegSet reg_ct[2];
3613     const int *order;
3614 
3615     reg_ct[1] = required_regs & ~allocated_regs;
3616     tcg_debug_assert(reg_ct[1] != 0);
3617     reg_ct[0] = reg_ct[1] & preferred_regs;
3618 
3619     /* Skip the preferred_regs option if it cannot be satisfied,
3620        or if the preference made no difference.  */
3621     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3622 
3623     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3624 
3625     /* Try free registers, preferences first.  */
3626     for (j = f; j < 2; j++) {
3627         TCGRegSet set = reg_ct[j];
3628 
3629         if (tcg_regset_single(set)) {
3630             /* One register in the set.  */
3631             TCGReg reg = tcg_regset_first(set);
3632             if (s->reg_to_temp[reg] == NULL) {
3633                 return reg;
3634             }
3635         } else {
3636             for (i = 0; i < n; i++) {
3637                 TCGReg reg = order[i];
3638                 if (s->reg_to_temp[reg] == NULL &&
3639                     tcg_regset_test_reg(set, reg)) {
3640                     return reg;
3641                 }
3642             }
3643         }
3644     }
3645 
3646     /* We must spill something.  */
3647     for (j = f; j < 2; j++) {
3648         TCGRegSet set = reg_ct[j];
3649 
3650         if (tcg_regset_single(set)) {
3651             /* One register in the set.  */
3652             TCGReg reg = tcg_regset_first(set);
3653             tcg_reg_free(s, reg, allocated_regs);
3654             return reg;
3655         } else {
3656             for (i = 0; i < n; i++) {
3657                 TCGReg reg = order[i];
3658                 if (tcg_regset_test_reg(set, reg)) {
3659                     tcg_reg_free(s, reg, allocated_regs);
3660                     return reg;
3661                 }
3662             }
3663         }
3664     }
3665 
3666     tcg_abort();
3667 }
3668 
3669 /* Make sure the temporary is in a register.  If needed, allocate the register
3670    from DESIRED while avoiding ALLOCATED.  */
3671 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3672                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3673 {
3674     TCGReg reg;
3675 
3676     switch (ts->val_type) {
3677     case TEMP_VAL_REG:
3678         return;
3679     case TEMP_VAL_CONST:
3680         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3681                             preferred_regs, ts->indirect_base);
3682         if (ts->type <= TCG_TYPE_I64) {
3683             tcg_out_movi(s, ts->type, reg, ts->val);
3684         } else {
3685             uint64_t val = ts->val;
3686             MemOp vece = MO_64;
3687 
3688             /*
3689              * Find the minimal vector element that matches the constant.
3690              * The targets will, in general, have to do this search anyway,
3691              * do this generically.
3692              */
3693             if (val == dup_const(MO_8, val)) {
3694                 vece = MO_8;
3695             } else if (val == dup_const(MO_16, val)) {
3696                 vece = MO_16;
3697             } else if (val == dup_const(MO_32, val)) {
3698                 vece = MO_32;
3699             }
3700 
3701             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3702         }
3703         ts->mem_coherent = 0;
3704         break;
3705     case TEMP_VAL_MEM:
3706         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3707                             preferred_regs, ts->indirect_base);
3708         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3709         ts->mem_coherent = 1;
3710         break;
3711     case TEMP_VAL_DEAD:
3712     default:
3713         tcg_abort();
3714     }
3715     ts->reg = reg;
3716     ts->val_type = TEMP_VAL_REG;
3717     s->reg_to_temp[reg] = ts;
3718 }
3719 
3720 /* Save a temporary to memory. 'allocated_regs' is used in case a
3721    temporary registers needs to be allocated to store a constant.  */
3722 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3723 {
3724     /* The liveness analysis already ensures that globals are back
3725        in memory. Keep an tcg_debug_assert for safety. */
3726     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3727 }
3728 
3729 /* save globals to their canonical location and assume they can be
3730    modified be the following code. 'allocated_regs' is used in case a
3731    temporary registers needs to be allocated to store a constant. */
3732 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3733 {
3734     int i, n;
3735 
3736     for (i = 0, n = s->nb_globals; i < n; i++) {
3737         temp_save(s, &s->temps[i], allocated_regs);
3738     }
3739 }
3740 
3741 /* sync globals to their canonical location and assume they can be
3742    read by the following code. 'allocated_regs' is used in case a
3743    temporary registers needs to be allocated to store a constant. */
3744 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3745 {
3746     int i, n;
3747 
3748     for (i = 0, n = s->nb_globals; i < n; i++) {
3749         TCGTemp *ts = &s->temps[i];
3750         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3751                          || ts->kind == TEMP_FIXED
3752                          || ts->mem_coherent);
3753     }
3754 }
3755 
3756 /* at the end of a basic block, we assume all temporaries are dead and
3757    all globals are stored at their canonical location. */
3758 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3759 {
3760     int i;
3761 
3762     for (i = s->nb_globals; i < s->nb_temps; i++) {
3763         TCGTemp *ts = &s->temps[i];
3764 
3765         switch (ts->kind) {
3766         case TEMP_LOCAL:
3767             temp_save(s, ts, allocated_regs);
3768             break;
3769         case TEMP_NORMAL:
3770             /* The liveness analysis already ensures that temps are dead.
3771                Keep an tcg_debug_assert for safety. */
3772             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3773             break;
3774         case TEMP_CONST:
3775             /* Similarly, we should have freed any allocated register. */
3776             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3777             break;
3778         default:
3779             g_assert_not_reached();
3780         }
3781     }
3782 
3783     save_globals(s, allocated_regs);
3784 }
3785 
3786 /*
3787  * At a conditional branch, we assume all temporaries are dead and
3788  * all globals and local temps are synced to their location.
3789  */
3790 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3791 {
3792     sync_globals(s, allocated_regs);
3793 
3794     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3795         TCGTemp *ts = &s->temps[i];
3796         /*
3797          * The liveness analysis already ensures that temps are dead.
3798          * Keep tcg_debug_asserts for safety.
3799          */
3800         switch (ts->kind) {
3801         case TEMP_LOCAL:
3802             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3803             break;
3804         case TEMP_NORMAL:
3805             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3806             break;
3807         case TEMP_CONST:
3808             break;
3809         default:
3810             g_assert_not_reached();
3811         }
3812     }
3813 }
3814 
3815 /*
3816  * Specialized code generation for INDEX_op_mov_* with a constant.
3817  */
3818 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3819                                   tcg_target_ulong val, TCGLifeData arg_life,
3820                                   TCGRegSet preferred_regs)
3821 {
3822     /* ENV should not be modified.  */
3823     tcg_debug_assert(!temp_readonly(ots));
3824 
3825     /* The movi is not explicitly generated here.  */
3826     if (ots->val_type == TEMP_VAL_REG) {
3827         s->reg_to_temp[ots->reg] = NULL;
3828     }
3829     ots->val_type = TEMP_VAL_CONST;
3830     ots->val = val;
3831     ots->mem_coherent = 0;
3832     if (NEED_SYNC_ARG(0)) {
3833         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3834     } else if (IS_DEAD_ARG(0)) {
3835         temp_dead(s, ots);
3836     }
3837 }
3838 
3839 /*
3840  * Specialized code generation for INDEX_op_mov_*.
3841  */
3842 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3843 {
3844     const TCGLifeData arg_life = op->life;
3845     TCGRegSet allocated_regs, preferred_regs;
3846     TCGTemp *ts, *ots;
3847     TCGType otype, itype;
3848 
3849     allocated_regs = s->reserved_regs;
3850     preferred_regs = op->output_pref[0];
3851     ots = arg_temp(op->args[0]);
3852     ts = arg_temp(op->args[1]);
3853 
3854     /* ENV should not be modified.  */
3855     tcg_debug_assert(!temp_readonly(ots));
3856 
3857     /* Note that otype != itype for no-op truncation.  */
3858     otype = ots->type;
3859     itype = ts->type;
3860 
3861     if (ts->val_type == TEMP_VAL_CONST) {
3862         /* propagate constant or generate sti */
3863         tcg_target_ulong val = ts->val;
3864         if (IS_DEAD_ARG(1)) {
3865             temp_dead(s, ts);
3866         }
3867         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3868         return;
3869     }
3870 
3871     /* If the source value is in memory we're going to be forced
3872        to have it in a register in order to perform the copy.  Copy
3873        the SOURCE value into its own register first, that way we
3874        don't have to reload SOURCE the next time it is used. */
3875     if (ts->val_type == TEMP_VAL_MEM) {
3876         temp_load(s, ts, tcg_target_available_regs[itype],
3877                   allocated_regs, preferred_regs);
3878     }
3879 
3880     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3881     if (IS_DEAD_ARG(0)) {
3882         /* mov to a non-saved dead register makes no sense (even with
3883            liveness analysis disabled). */
3884         tcg_debug_assert(NEED_SYNC_ARG(0));
3885         if (!ots->mem_allocated) {
3886             temp_allocate_frame(s, ots);
3887         }
3888         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3889         if (IS_DEAD_ARG(1)) {
3890             temp_dead(s, ts);
3891         }
3892         temp_dead(s, ots);
3893     } else {
3894         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3895             /* the mov can be suppressed */
3896             if (ots->val_type == TEMP_VAL_REG) {
3897                 s->reg_to_temp[ots->reg] = NULL;
3898             }
3899             ots->reg = ts->reg;
3900             temp_dead(s, ts);
3901         } else {
3902             if (ots->val_type != TEMP_VAL_REG) {
3903                 /* When allocating a new register, make sure to not spill the
3904                    input one. */
3905                 tcg_regset_set_reg(allocated_regs, ts->reg);
3906                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3907                                          allocated_regs, preferred_regs,
3908                                          ots->indirect_base);
3909             }
3910             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3911                 /*
3912                  * Cross register class move not supported.
3913                  * Store the source register into the destination slot
3914                  * and leave the destination temp as TEMP_VAL_MEM.
3915                  */
3916                 assert(!temp_readonly(ots));
3917                 if (!ts->mem_allocated) {
3918                     temp_allocate_frame(s, ots);
3919                 }
3920                 tcg_out_st(s, ts->type, ts->reg,
3921                            ots->mem_base->reg, ots->mem_offset);
3922                 ots->mem_coherent = 1;
3923                 temp_free_or_dead(s, ots, -1);
3924                 return;
3925             }
3926         }
3927         ots->val_type = TEMP_VAL_REG;
3928         ots->mem_coherent = 0;
3929         s->reg_to_temp[ots->reg] = ots;
3930         if (NEED_SYNC_ARG(0)) {
3931             temp_sync(s, ots, allocated_regs, 0, 0);
3932         }
3933     }
3934 }
3935 
3936 /*
3937  * Specialized code generation for INDEX_op_dup_vec.
3938  */
3939 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3940 {
3941     const TCGLifeData arg_life = op->life;
3942     TCGRegSet dup_out_regs, dup_in_regs;
3943     TCGTemp *its, *ots;
3944     TCGType itype, vtype;
3945     intptr_t endian_fixup;
3946     unsigned vece;
3947     bool ok;
3948 
3949     ots = arg_temp(op->args[0]);
3950     its = arg_temp(op->args[1]);
3951 
3952     /* ENV should not be modified.  */
3953     tcg_debug_assert(!temp_readonly(ots));
3954 
3955     itype = its->type;
3956     vece = TCGOP_VECE(op);
3957     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3958 
3959     if (its->val_type == TEMP_VAL_CONST) {
3960         /* Propagate constant via movi -> dupi.  */
3961         tcg_target_ulong val = its->val;
3962         if (IS_DEAD_ARG(1)) {
3963             temp_dead(s, its);
3964         }
3965         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3966         return;
3967     }
3968 
3969     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3970     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3971 
3972     /* Allocate the output register now.  */
3973     if (ots->val_type != TEMP_VAL_REG) {
3974         TCGRegSet allocated_regs = s->reserved_regs;
3975 
3976         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3977             /* Make sure to not spill the input register. */
3978             tcg_regset_set_reg(allocated_regs, its->reg);
3979         }
3980         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3981                                  op->output_pref[0], ots->indirect_base);
3982         ots->val_type = TEMP_VAL_REG;
3983         ots->mem_coherent = 0;
3984         s->reg_to_temp[ots->reg] = ots;
3985     }
3986 
3987     switch (its->val_type) {
3988     case TEMP_VAL_REG:
3989         /*
3990          * The dup constriaints must be broad, covering all possible VECE.
3991          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3992          * to fail, indicating that extra moves are required for that case.
3993          */
3994         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3995             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3996                 goto done;
3997             }
3998             /* Try again from memory or a vector input register.  */
3999         }
4000         if (!its->mem_coherent) {
4001             /*
4002              * The input register is not synced, and so an extra store
4003              * would be required to use memory.  Attempt an integer-vector
4004              * register move first.  We do not have a TCGRegSet for this.
4005              */
4006             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4007                 break;
4008             }
4009             /* Sync the temp back to its slot and load from there.  */
4010             temp_sync(s, its, s->reserved_regs, 0, 0);
4011         }
4012         /* fall through */
4013 
4014     case TEMP_VAL_MEM:
4015 #ifdef HOST_WORDS_BIGENDIAN
4016         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
4017         endian_fixup -= 1 << vece;
4018 #else
4019         endian_fixup = 0;
4020 #endif
4021         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4022                              its->mem_offset + endian_fixup)) {
4023             goto done;
4024         }
4025         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4026         break;
4027 
4028     default:
4029         g_assert_not_reached();
4030     }
4031 
4032     /* We now have a vector input register, so dup must succeed. */
4033     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4034     tcg_debug_assert(ok);
4035 
4036  done:
4037     if (IS_DEAD_ARG(1)) {
4038         temp_dead(s, its);
4039     }
4040     if (NEED_SYNC_ARG(0)) {
4041         temp_sync(s, ots, s->reserved_regs, 0, 0);
4042     }
4043     if (IS_DEAD_ARG(0)) {
4044         temp_dead(s, ots);
4045     }
4046 }
4047 
4048 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4049 {
4050     const TCGLifeData arg_life = op->life;
4051     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4052     TCGRegSet i_allocated_regs;
4053     TCGRegSet o_allocated_regs;
4054     int i, k, nb_iargs, nb_oargs;
4055     TCGReg reg;
4056     TCGArg arg;
4057     const TCGArgConstraint *arg_ct;
4058     TCGTemp *ts;
4059     TCGArg new_args[TCG_MAX_OP_ARGS];
4060     int const_args[TCG_MAX_OP_ARGS];
4061 
4062     nb_oargs = def->nb_oargs;
4063     nb_iargs = def->nb_iargs;
4064 
4065     /* copy constants */
4066     memcpy(new_args + nb_oargs + nb_iargs,
4067            op->args + nb_oargs + nb_iargs,
4068            sizeof(TCGArg) * def->nb_cargs);
4069 
4070     i_allocated_regs = s->reserved_regs;
4071     o_allocated_regs = s->reserved_regs;
4072 
4073     /* satisfy input constraints */
4074     for (k = 0; k < nb_iargs; k++) {
4075         TCGRegSet i_preferred_regs, o_preferred_regs;
4076 
4077         i = def->args_ct[nb_oargs + k].sort_index;
4078         arg = op->args[i];
4079         arg_ct = &def->args_ct[i];
4080         ts = arg_temp(arg);
4081 
4082         if (ts->val_type == TEMP_VAL_CONST
4083             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
4084             /* constant is OK for instruction */
4085             const_args[i] = 1;
4086             new_args[i] = ts->val;
4087             continue;
4088         }
4089 
4090         i_preferred_regs = o_preferred_regs = 0;
4091         if (arg_ct->ialias) {
4092             o_preferred_regs = op->output_pref[arg_ct->alias_index];
4093 
4094             /*
4095              * If the input is readonly, then it cannot also be an
4096              * output and aliased to itself.  If the input is not
4097              * dead after the instruction, we must allocate a new
4098              * register and move it.
4099              */
4100             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4101                 goto allocate_in_reg;
4102             }
4103 
4104             /*
4105              * Check if the current register has already been allocated
4106              * for another input aliased to an output.
4107              */
4108             if (ts->val_type == TEMP_VAL_REG) {
4109                 reg = ts->reg;
4110                 for (int k2 = 0; k2 < k; k2++) {
4111                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
4112                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
4113                         goto allocate_in_reg;
4114                     }
4115                 }
4116             }
4117             i_preferred_regs = o_preferred_regs;
4118         }
4119 
4120         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
4121         reg = ts->reg;
4122 
4123         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
4124  allocate_in_reg:
4125             /*
4126              * Allocate a new register matching the constraint
4127              * and move the temporary register into it.
4128              */
4129             temp_load(s, ts, tcg_target_available_regs[ts->type],
4130                       i_allocated_regs, 0);
4131             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
4132                                 o_preferred_regs, ts->indirect_base);
4133             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4134                 /*
4135                  * Cross register class move not supported.  Sync the
4136                  * temp back to its slot and load from there.
4137                  */
4138                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4139                 tcg_out_ld(s, ts->type, reg,
4140                            ts->mem_base->reg, ts->mem_offset);
4141             }
4142         }
4143         new_args[i] = reg;
4144         const_args[i] = 0;
4145         tcg_regset_set_reg(i_allocated_regs, reg);
4146     }
4147 
4148     /* mark dead temporaries and free the associated registers */
4149     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4150         if (IS_DEAD_ARG(i)) {
4151             temp_dead(s, arg_temp(op->args[i]));
4152         }
4153     }
4154 
4155     if (def->flags & TCG_OPF_COND_BRANCH) {
4156         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4157     } else if (def->flags & TCG_OPF_BB_END) {
4158         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4159     } else {
4160         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4161             /* XXX: permit generic clobber register list ? */
4162             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4163                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4164                     tcg_reg_free(s, i, i_allocated_regs);
4165                 }
4166             }
4167         }
4168         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4169             /* sync globals if the op has side effects and might trigger
4170                an exception. */
4171             sync_globals(s, i_allocated_regs);
4172         }
4173 
4174         /* satisfy the output constraints */
4175         for(k = 0; k < nb_oargs; k++) {
4176             i = def->args_ct[k].sort_index;
4177             arg = op->args[i];
4178             arg_ct = &def->args_ct[i];
4179             ts = arg_temp(arg);
4180 
4181             /* ENV should not be modified.  */
4182             tcg_debug_assert(!temp_readonly(ts));
4183 
4184             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4185                 reg = new_args[arg_ct->alias_index];
4186             } else if (arg_ct->newreg) {
4187                 reg = tcg_reg_alloc(s, arg_ct->regs,
4188                                     i_allocated_regs | o_allocated_regs,
4189                                     op->output_pref[k], ts->indirect_base);
4190             } else {
4191                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4192                                     op->output_pref[k], ts->indirect_base);
4193             }
4194             tcg_regset_set_reg(o_allocated_regs, reg);
4195             if (ts->val_type == TEMP_VAL_REG) {
4196                 s->reg_to_temp[ts->reg] = NULL;
4197             }
4198             ts->val_type = TEMP_VAL_REG;
4199             ts->reg = reg;
4200             /*
4201              * Temp value is modified, so the value kept in memory is
4202              * potentially not the same.
4203              */
4204             ts->mem_coherent = 0;
4205             s->reg_to_temp[reg] = ts;
4206             new_args[i] = reg;
4207         }
4208     }
4209 
4210     /* emit instruction */
4211     if (def->flags & TCG_OPF_VECTOR) {
4212         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4213                        new_args, const_args);
4214     } else {
4215         tcg_out_op(s, op->opc, new_args, const_args);
4216     }
4217 
4218     /* move the outputs in the correct register if needed */
4219     for(i = 0; i < nb_oargs; i++) {
4220         ts = arg_temp(op->args[i]);
4221 
4222         /* ENV should not be modified.  */
4223         tcg_debug_assert(!temp_readonly(ts));
4224 
4225         if (NEED_SYNC_ARG(i)) {
4226             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4227         } else if (IS_DEAD_ARG(i)) {
4228             temp_dead(s, ts);
4229         }
4230     }
4231 }
4232 
4233 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4234 {
4235     const TCGLifeData arg_life = op->life;
4236     TCGTemp *ots, *itsl, *itsh;
4237     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4238 
4239     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4240     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4241     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4242 
4243     ots = arg_temp(op->args[0]);
4244     itsl = arg_temp(op->args[1]);
4245     itsh = arg_temp(op->args[2]);
4246 
4247     /* ENV should not be modified.  */
4248     tcg_debug_assert(!temp_readonly(ots));
4249 
4250     /* Allocate the output register now.  */
4251     if (ots->val_type != TEMP_VAL_REG) {
4252         TCGRegSet allocated_regs = s->reserved_regs;
4253         TCGRegSet dup_out_regs =
4254             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4255 
4256         /* Make sure to not spill the input registers. */
4257         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4258             tcg_regset_set_reg(allocated_regs, itsl->reg);
4259         }
4260         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4261             tcg_regset_set_reg(allocated_regs, itsh->reg);
4262         }
4263 
4264         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4265                                  op->output_pref[0], ots->indirect_base);
4266         ots->val_type = TEMP_VAL_REG;
4267         ots->mem_coherent = 0;
4268         s->reg_to_temp[ots->reg] = ots;
4269     }
4270 
4271     /* Promote dup2 of immediates to dupi_vec. */
4272     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4273         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4274         MemOp vece = MO_64;
4275 
4276         if (val == dup_const(MO_8, val)) {
4277             vece = MO_8;
4278         } else if (val == dup_const(MO_16, val)) {
4279             vece = MO_16;
4280         } else if (val == dup_const(MO_32, val)) {
4281             vece = MO_32;
4282         }
4283 
4284         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4285         goto done;
4286     }
4287 
4288     /* If the two inputs form one 64-bit value, try dupm_vec. */
4289     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4290         if (!itsl->mem_coherent) {
4291             temp_sync(s, itsl, s->reserved_regs, 0, 0);
4292         }
4293         if (!itsh->mem_coherent) {
4294             temp_sync(s, itsh, s->reserved_regs, 0, 0);
4295         }
4296 #ifdef HOST_WORDS_BIGENDIAN
4297         TCGTemp *its = itsh;
4298 #else
4299         TCGTemp *its = itsl;
4300 #endif
4301         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4302                              its->mem_base->reg, its->mem_offset)) {
4303             goto done;
4304         }
4305     }
4306 
4307     /* Fall back to generic expansion. */
4308     return false;
4309 
4310  done:
4311     if (IS_DEAD_ARG(1)) {
4312         temp_dead(s, itsl);
4313     }
4314     if (IS_DEAD_ARG(2)) {
4315         temp_dead(s, itsh);
4316     }
4317     if (NEED_SYNC_ARG(0)) {
4318         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4319     } else if (IS_DEAD_ARG(0)) {
4320         temp_dead(s, ots);
4321     }
4322     return true;
4323 }
4324 
4325 #ifdef TCG_TARGET_STACK_GROWSUP
4326 #define STACK_DIR(x) (-(x))
4327 #else
4328 #define STACK_DIR(x) (x)
4329 #endif
4330 
4331 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4332 {
4333     const int nb_oargs = TCGOP_CALLO(op);
4334     const int nb_iargs = TCGOP_CALLI(op);
4335     const TCGLifeData arg_life = op->life;
4336     int flags, nb_regs, i;
4337     TCGReg reg;
4338     TCGArg arg;
4339     TCGTemp *ts;
4340     intptr_t stack_offset;
4341     size_t call_stack_size;
4342     tcg_insn_unit *func_addr;
4343     int allocate_args;
4344     TCGRegSet allocated_regs;
4345 
4346     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4347     flags = op->args[nb_oargs + nb_iargs + 1];
4348 
4349     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4350     if (nb_regs > nb_iargs) {
4351         nb_regs = nb_iargs;
4352     }
4353 
4354     /* assign stack slots first */
4355     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4356     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4357         ~(TCG_TARGET_STACK_ALIGN - 1);
4358     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4359     if (allocate_args) {
4360         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4361            preallocate call stack */
4362         tcg_abort();
4363     }
4364 
4365     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4366     for (i = nb_regs; i < nb_iargs; i++) {
4367         arg = op->args[nb_oargs + i];
4368 #ifdef TCG_TARGET_STACK_GROWSUP
4369         stack_offset -= sizeof(tcg_target_long);
4370 #endif
4371         if (arg != TCG_CALL_DUMMY_ARG) {
4372             ts = arg_temp(arg);
4373             temp_load(s, ts, tcg_target_available_regs[ts->type],
4374                       s->reserved_regs, 0);
4375             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4376         }
4377 #ifndef TCG_TARGET_STACK_GROWSUP
4378         stack_offset += sizeof(tcg_target_long);
4379 #endif
4380     }
4381 
4382     /* assign input registers */
4383     allocated_regs = s->reserved_regs;
4384     for (i = 0; i < nb_regs; i++) {
4385         arg = op->args[nb_oargs + i];
4386         if (arg != TCG_CALL_DUMMY_ARG) {
4387             ts = arg_temp(arg);
4388             reg = tcg_target_call_iarg_regs[i];
4389 
4390             if (ts->val_type == TEMP_VAL_REG) {
4391                 if (ts->reg != reg) {
4392                     tcg_reg_free(s, reg, allocated_regs);
4393                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4394                         /*
4395                          * Cross register class move not supported.  Sync the
4396                          * temp back to its slot and load from there.
4397                          */
4398                         temp_sync(s, ts, allocated_regs, 0, 0);
4399                         tcg_out_ld(s, ts->type, reg,
4400                                    ts->mem_base->reg, ts->mem_offset);
4401                     }
4402                 }
4403             } else {
4404                 TCGRegSet arg_set = 0;
4405 
4406                 tcg_reg_free(s, reg, allocated_regs);
4407                 tcg_regset_set_reg(arg_set, reg);
4408                 temp_load(s, ts, arg_set, allocated_regs, 0);
4409             }
4410 
4411             tcg_regset_set_reg(allocated_regs, reg);
4412         }
4413     }
4414 
4415     /* mark dead temporaries and free the associated registers */
4416     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4417         if (IS_DEAD_ARG(i)) {
4418             temp_dead(s, arg_temp(op->args[i]));
4419         }
4420     }
4421 
4422     /* clobber call registers */
4423     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4424         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4425             tcg_reg_free(s, i, allocated_regs);
4426         }
4427     }
4428 
4429     /* Save globals if they might be written by the helper, sync them if
4430        they might be read. */
4431     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4432         /* Nothing to do */
4433     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4434         sync_globals(s, allocated_regs);
4435     } else {
4436         save_globals(s, allocated_regs);
4437     }
4438 
4439     tcg_out_call(s, func_addr);
4440 
4441     /* assign output registers and emit moves if needed */
4442     for(i = 0; i < nb_oargs; i++) {
4443         arg = op->args[i];
4444         ts = arg_temp(arg);
4445 
4446         /* ENV should not be modified.  */
4447         tcg_debug_assert(!temp_readonly(ts));
4448 
4449         reg = tcg_target_call_oarg_regs[i];
4450         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4451         if (ts->val_type == TEMP_VAL_REG) {
4452             s->reg_to_temp[ts->reg] = NULL;
4453         }
4454         ts->val_type = TEMP_VAL_REG;
4455         ts->reg = reg;
4456         ts->mem_coherent = 0;
4457         s->reg_to_temp[reg] = ts;
4458         if (NEED_SYNC_ARG(i)) {
4459             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4460         } else if (IS_DEAD_ARG(i)) {
4461             temp_dead(s, ts);
4462         }
4463     }
4464 }
4465 
4466 #ifdef CONFIG_PROFILER
4467 
4468 /* avoid copy/paste errors */
4469 #define PROF_ADD(to, from, field)                       \
4470     do {                                                \
4471         (to)->field += qatomic_read(&((from)->field));  \
4472     } while (0)
4473 
4474 #define PROF_MAX(to, from, field)                                       \
4475     do {                                                                \
4476         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4477         if (val__ > (to)->field) {                                      \
4478             (to)->field = val__;                                        \
4479         }                                                               \
4480     } while (0)
4481 
4482 /* Pass in a zero'ed @prof */
4483 static inline
4484 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4485 {
4486     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4487     unsigned int i;
4488 
4489     for (i = 0; i < n_ctxs; i++) {
4490         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4491         const TCGProfile *orig = &s->prof;
4492 
4493         if (counters) {
4494             PROF_ADD(prof, orig, cpu_exec_time);
4495             PROF_ADD(prof, orig, tb_count1);
4496             PROF_ADD(prof, orig, tb_count);
4497             PROF_ADD(prof, orig, op_count);
4498             PROF_MAX(prof, orig, op_count_max);
4499             PROF_ADD(prof, orig, temp_count);
4500             PROF_MAX(prof, orig, temp_count_max);
4501             PROF_ADD(prof, orig, del_op_count);
4502             PROF_ADD(prof, orig, code_in_len);
4503             PROF_ADD(prof, orig, code_out_len);
4504             PROF_ADD(prof, orig, search_out_len);
4505             PROF_ADD(prof, orig, interm_time);
4506             PROF_ADD(prof, orig, code_time);
4507             PROF_ADD(prof, orig, la_time);
4508             PROF_ADD(prof, orig, opt_time);
4509             PROF_ADD(prof, orig, restore_count);
4510             PROF_ADD(prof, orig, restore_time);
4511         }
4512         if (table) {
4513             int i;
4514 
4515             for (i = 0; i < NB_OPS; i++) {
4516                 PROF_ADD(prof, orig, table_op_count[i]);
4517             }
4518         }
4519     }
4520 }
4521 
4522 #undef PROF_ADD
4523 #undef PROF_MAX
4524 
4525 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4526 {
4527     tcg_profile_snapshot(prof, true, false);
4528 }
4529 
4530 static void tcg_profile_snapshot_table(TCGProfile *prof)
4531 {
4532     tcg_profile_snapshot(prof, false, true);
4533 }
4534 
4535 void tcg_dump_op_count(void)
4536 {
4537     TCGProfile prof = {};
4538     int i;
4539 
4540     tcg_profile_snapshot_table(&prof);
4541     for (i = 0; i < NB_OPS; i++) {
4542         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4543                     prof.table_op_count[i]);
4544     }
4545 }
4546 
4547 int64_t tcg_cpu_exec_time(void)
4548 {
4549     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4550     unsigned int i;
4551     int64_t ret = 0;
4552 
4553     for (i = 0; i < n_ctxs; i++) {
4554         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4555         const TCGProfile *prof = &s->prof;
4556 
4557         ret += qatomic_read(&prof->cpu_exec_time);
4558     }
4559     return ret;
4560 }
4561 #else
4562 void tcg_dump_op_count(void)
4563 {
4564     qemu_printf("[TCG profiler not compiled]\n");
4565 }
4566 
4567 int64_t tcg_cpu_exec_time(void)
4568 {
4569     error_report("%s: TCG profiler not compiled", __func__);
4570     exit(EXIT_FAILURE);
4571 }
4572 #endif
4573 
4574 
4575 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4576 {
4577 #ifdef CONFIG_PROFILER
4578     TCGProfile *prof = &s->prof;
4579 #endif
4580     int i, num_insns;
4581     TCGOp *op;
4582 
4583 #ifdef CONFIG_PROFILER
4584     {
4585         int n = 0;
4586 
4587         QTAILQ_FOREACH(op, &s->ops, link) {
4588             n++;
4589         }
4590         qatomic_set(&prof->op_count, prof->op_count + n);
4591         if (n > prof->op_count_max) {
4592             qatomic_set(&prof->op_count_max, n);
4593         }
4594 
4595         n = s->nb_temps;
4596         qatomic_set(&prof->temp_count, prof->temp_count + n);
4597         if (n > prof->temp_count_max) {
4598             qatomic_set(&prof->temp_count_max, n);
4599         }
4600     }
4601 #endif
4602 
4603 #ifdef DEBUG_DISAS
4604     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4605                  && qemu_log_in_addr_range(tb->pc))) {
4606         FILE *logfile = qemu_log_lock();
4607         qemu_log("OP:\n");
4608         tcg_dump_ops(s, false);
4609         qemu_log("\n");
4610         qemu_log_unlock(logfile);
4611     }
4612 #endif
4613 
4614 #ifdef CONFIG_DEBUG_TCG
4615     /* Ensure all labels referenced have been emitted.  */
4616     {
4617         TCGLabel *l;
4618         bool error = false;
4619 
4620         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4621             if (unlikely(!l->present) && l->refs) {
4622                 qemu_log_mask(CPU_LOG_TB_OP,
4623                               "$L%d referenced but not present.\n", l->id);
4624                 error = true;
4625             }
4626         }
4627         assert(!error);
4628     }
4629 #endif
4630 
4631 #ifdef CONFIG_PROFILER
4632     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4633 #endif
4634 
4635 #ifdef USE_TCG_OPTIMIZATIONS
4636     tcg_optimize(s);
4637 #endif
4638 
4639 #ifdef CONFIG_PROFILER
4640     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4641     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4642 #endif
4643 
4644     reachable_code_pass(s);
4645     liveness_pass_1(s);
4646 
4647     if (s->nb_indirects > 0) {
4648 #ifdef DEBUG_DISAS
4649         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4650                      && qemu_log_in_addr_range(tb->pc))) {
4651             FILE *logfile = qemu_log_lock();
4652             qemu_log("OP before indirect lowering:\n");
4653             tcg_dump_ops(s, false);
4654             qemu_log("\n");
4655             qemu_log_unlock(logfile);
4656         }
4657 #endif
4658         /* Replace indirect temps with direct temps.  */
4659         if (liveness_pass_2(s)) {
4660             /* If changes were made, re-run liveness.  */
4661             liveness_pass_1(s);
4662         }
4663     }
4664 
4665 #ifdef CONFIG_PROFILER
4666     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4667 #endif
4668 
4669 #ifdef DEBUG_DISAS
4670     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4671                  && qemu_log_in_addr_range(tb->pc))) {
4672         FILE *logfile = qemu_log_lock();
4673         qemu_log("OP after optimization and liveness analysis:\n");
4674         tcg_dump_ops(s, true);
4675         qemu_log("\n");
4676         qemu_log_unlock(logfile);
4677     }
4678 #endif
4679 
4680     tcg_reg_alloc_start(s);
4681 
4682     /*
4683      * Reset the buffer pointers when restarting after overflow.
4684      * TODO: Move this into translate-all.c with the rest of the
4685      * buffer management.  Having only this done here is confusing.
4686      */
4687     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4688     s->code_ptr = s->code_buf;
4689 
4690 #ifdef TCG_TARGET_NEED_LDST_LABELS
4691     QSIMPLEQ_INIT(&s->ldst_labels);
4692 #endif
4693 #ifdef TCG_TARGET_NEED_POOL_LABELS
4694     s->pool_labels = NULL;
4695 #endif
4696 
4697     num_insns = -1;
4698     QTAILQ_FOREACH(op, &s->ops, link) {
4699         TCGOpcode opc = op->opc;
4700 
4701 #ifdef CONFIG_PROFILER
4702         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4703 #endif
4704 
4705         switch (opc) {
4706         case INDEX_op_mov_i32:
4707         case INDEX_op_mov_i64:
4708         case INDEX_op_mov_vec:
4709             tcg_reg_alloc_mov(s, op);
4710             break;
4711         case INDEX_op_dup_vec:
4712             tcg_reg_alloc_dup(s, op);
4713             break;
4714         case INDEX_op_insn_start:
4715             if (num_insns >= 0) {
4716                 size_t off = tcg_current_code_size(s);
4717                 s->gen_insn_end_off[num_insns] = off;
4718                 /* Assert that we do not overflow our stored offset.  */
4719                 assert(s->gen_insn_end_off[num_insns] == off);
4720             }
4721             num_insns++;
4722             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4723                 target_ulong a;
4724 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4725                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4726 #else
4727                 a = op->args[i];
4728 #endif
4729                 s->gen_insn_data[num_insns][i] = a;
4730             }
4731             break;
4732         case INDEX_op_discard:
4733             temp_dead(s, arg_temp(op->args[0]));
4734             break;
4735         case INDEX_op_set_label:
4736             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4737             tcg_out_label(s, arg_label(op->args[0]));
4738             break;
4739         case INDEX_op_call:
4740             tcg_reg_alloc_call(s, op);
4741             break;
4742         case INDEX_op_dup2_vec:
4743             if (tcg_reg_alloc_dup2(s, op)) {
4744                 break;
4745             }
4746             /* fall through */
4747         default:
4748             /* Sanity check that we've not introduced any unhandled opcodes. */
4749             tcg_debug_assert(tcg_op_supported(opc));
4750             /* Note: in order to speed up the code, it would be much
4751                faster to have specialized register allocator functions for
4752                some common argument patterns */
4753             tcg_reg_alloc_op(s, op);
4754             break;
4755         }
4756 #ifdef CONFIG_DEBUG_TCG
4757         check_regs(s);
4758 #endif
4759         /* Test for (pending) buffer overflow.  The assumption is that any
4760            one operation beginning below the high water mark cannot overrun
4761            the buffer completely.  Thus we can test for overflow after
4762            generating code without having to check during generation.  */
4763         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4764             return -1;
4765         }
4766         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4767         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4768             return -2;
4769         }
4770     }
4771     tcg_debug_assert(num_insns >= 0);
4772     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4773 
4774     /* Generate TB finalization at the end of block */
4775 #ifdef TCG_TARGET_NEED_LDST_LABELS
4776     i = tcg_out_ldst_finalize(s);
4777     if (i < 0) {
4778         return i;
4779     }
4780 #endif
4781 #ifdef TCG_TARGET_NEED_POOL_LABELS
4782     i = tcg_out_pool_finalize(s);
4783     if (i < 0) {
4784         return i;
4785     }
4786 #endif
4787     if (!tcg_resolve_relocs(s)) {
4788         return -2;
4789     }
4790 
4791 #ifndef CONFIG_TCG_INTERPRETER
4792     /* flush instruction cache */
4793     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4794                         (uintptr_t)s->code_buf,
4795                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4796 #endif
4797 
4798     return tcg_current_code_size(s);
4799 }
4800 
4801 #ifdef CONFIG_PROFILER
4802 void tcg_dump_info(void)
4803 {
4804     TCGProfile prof = {};
4805     const TCGProfile *s;
4806     int64_t tb_count;
4807     int64_t tb_div_count;
4808     int64_t tot;
4809 
4810     tcg_profile_snapshot_counters(&prof);
4811     s = &prof;
4812     tb_count = s->tb_count;
4813     tb_div_count = tb_count ? tb_count : 1;
4814     tot = s->interm_time + s->code_time;
4815 
4816     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4817                 tot, tot / 2.4e9);
4818     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4819                 " %0.1f%%)\n",
4820                 tb_count, s->tb_count1 - tb_count,
4821                 (double)(s->tb_count1 - s->tb_count)
4822                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4823     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4824                 (double)s->op_count / tb_div_count, s->op_count_max);
4825     qemu_printf("deleted ops/TB      %0.2f\n",
4826                 (double)s->del_op_count / tb_div_count);
4827     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4828                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4829     qemu_printf("avg host code/TB    %0.1f\n",
4830                 (double)s->code_out_len / tb_div_count);
4831     qemu_printf("avg search data/TB  %0.1f\n",
4832                 (double)s->search_out_len / tb_div_count);
4833 
4834     qemu_printf("cycles/op           %0.1f\n",
4835                 s->op_count ? (double)tot / s->op_count : 0);
4836     qemu_printf("cycles/in byte      %0.1f\n",
4837                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4838     qemu_printf("cycles/out byte     %0.1f\n",
4839                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4840     qemu_printf("cycles/search byte     %0.1f\n",
4841                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4842     if (tot == 0) {
4843         tot = 1;
4844     }
4845     qemu_printf("  gen_interm time   %0.1f%%\n",
4846                 (double)s->interm_time / tot * 100.0);
4847     qemu_printf("  gen_code time     %0.1f%%\n",
4848                 (double)s->code_time / tot * 100.0);
4849     qemu_printf("optim./code time    %0.1f%%\n",
4850                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4851                 * 100.0);
4852     qemu_printf("liveness/code time  %0.1f%%\n",
4853                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4854     qemu_printf("cpu_restore count   %" PRId64 "\n",
4855                 s->restore_count);
4856     qemu_printf("  avg cycles        %0.1f\n",
4857                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4858 }
4859 #else
4860 void tcg_dump_info(void)
4861 {
4862     qemu_printf("[TCG profiler not compiled]\n");
4863 }
4864 #endif
4865 
4866 #ifdef ELF_HOST_MACHINE
4867 /* In order to use this feature, the backend needs to do three things:
4868 
4869    (1) Define ELF_HOST_MACHINE to indicate both what value to
4870        put into the ELF image and to indicate support for the feature.
4871 
4872    (2) Define tcg_register_jit.  This should create a buffer containing
4873        the contents of a .debug_frame section that describes the post-
4874        prologue unwind info for the tcg machine.
4875 
4876    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4877 */
4878 
4879 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4880 typedef enum {
4881     JIT_NOACTION = 0,
4882     JIT_REGISTER_FN,
4883     JIT_UNREGISTER_FN
4884 } jit_actions_t;
4885 
4886 struct jit_code_entry {
4887     struct jit_code_entry *next_entry;
4888     struct jit_code_entry *prev_entry;
4889     const void *symfile_addr;
4890     uint64_t symfile_size;
4891 };
4892 
4893 struct jit_descriptor {
4894     uint32_t version;
4895     uint32_t action_flag;
4896     struct jit_code_entry *relevant_entry;
4897     struct jit_code_entry *first_entry;
4898 };
4899 
4900 void __jit_debug_register_code(void) __attribute__((noinline));
4901 void __jit_debug_register_code(void)
4902 {
4903     asm("");
4904 }
4905 
4906 /* Must statically initialize the version, because GDB may check
4907    the version before we can set it.  */
4908 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4909 
4910 /* End GDB interface.  */
4911 
4912 static int find_string(const char *strtab, const char *str)
4913 {
4914     const char *p = strtab + 1;
4915 
4916     while (1) {
4917         if (strcmp(p, str) == 0) {
4918             return p - strtab;
4919         }
4920         p += strlen(p) + 1;
4921     }
4922 }
4923 
4924 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4925                                  const void *debug_frame,
4926                                  size_t debug_frame_size)
4927 {
4928     struct __attribute__((packed)) DebugInfo {
4929         uint32_t  len;
4930         uint16_t  version;
4931         uint32_t  abbrev;
4932         uint8_t   ptr_size;
4933         uint8_t   cu_die;
4934         uint16_t  cu_lang;
4935         uintptr_t cu_low_pc;
4936         uintptr_t cu_high_pc;
4937         uint8_t   fn_die;
4938         char      fn_name[16];
4939         uintptr_t fn_low_pc;
4940         uintptr_t fn_high_pc;
4941         uint8_t   cu_eoc;
4942     };
4943 
4944     struct ElfImage {
4945         ElfW(Ehdr) ehdr;
4946         ElfW(Phdr) phdr;
4947         ElfW(Shdr) shdr[7];
4948         ElfW(Sym)  sym[2];
4949         struct DebugInfo di;
4950         uint8_t    da[24];
4951         char       str[80];
4952     };
4953 
4954     struct ElfImage *img;
4955 
4956     static const struct ElfImage img_template = {
4957         .ehdr = {
4958             .e_ident[EI_MAG0] = ELFMAG0,
4959             .e_ident[EI_MAG1] = ELFMAG1,
4960             .e_ident[EI_MAG2] = ELFMAG2,
4961             .e_ident[EI_MAG3] = ELFMAG3,
4962             .e_ident[EI_CLASS] = ELF_CLASS,
4963             .e_ident[EI_DATA] = ELF_DATA,
4964             .e_ident[EI_VERSION] = EV_CURRENT,
4965             .e_type = ET_EXEC,
4966             .e_machine = ELF_HOST_MACHINE,
4967             .e_version = EV_CURRENT,
4968             .e_phoff = offsetof(struct ElfImage, phdr),
4969             .e_shoff = offsetof(struct ElfImage, shdr),
4970             .e_ehsize = sizeof(ElfW(Shdr)),
4971             .e_phentsize = sizeof(ElfW(Phdr)),
4972             .e_phnum = 1,
4973             .e_shentsize = sizeof(ElfW(Shdr)),
4974             .e_shnum = ARRAY_SIZE(img->shdr),
4975             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4976 #ifdef ELF_HOST_FLAGS
4977             .e_flags = ELF_HOST_FLAGS,
4978 #endif
4979 #ifdef ELF_OSABI
4980             .e_ident[EI_OSABI] = ELF_OSABI,
4981 #endif
4982         },
4983         .phdr = {
4984             .p_type = PT_LOAD,
4985             .p_flags = PF_X,
4986         },
4987         .shdr = {
4988             [0] = { .sh_type = SHT_NULL },
4989             /* Trick: The contents of code_gen_buffer are not present in
4990                this fake ELF file; that got allocated elsewhere.  Therefore
4991                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4992                will not look for contents.  We can record any address.  */
4993             [1] = { /* .text */
4994                 .sh_type = SHT_NOBITS,
4995                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4996             },
4997             [2] = { /* .debug_info */
4998                 .sh_type = SHT_PROGBITS,
4999                 .sh_offset = offsetof(struct ElfImage, di),
5000                 .sh_size = sizeof(struct DebugInfo),
5001             },
5002             [3] = { /* .debug_abbrev */
5003                 .sh_type = SHT_PROGBITS,
5004                 .sh_offset = offsetof(struct ElfImage, da),
5005                 .sh_size = sizeof(img->da),
5006             },
5007             [4] = { /* .debug_frame */
5008                 .sh_type = SHT_PROGBITS,
5009                 .sh_offset = sizeof(struct ElfImage),
5010             },
5011             [5] = { /* .symtab */
5012                 .sh_type = SHT_SYMTAB,
5013                 .sh_offset = offsetof(struct ElfImage, sym),
5014                 .sh_size = sizeof(img->sym),
5015                 .sh_info = 1,
5016                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5017                 .sh_entsize = sizeof(ElfW(Sym)),
5018             },
5019             [6] = { /* .strtab */
5020                 .sh_type = SHT_STRTAB,
5021                 .sh_offset = offsetof(struct ElfImage, str),
5022                 .sh_size = sizeof(img->str),
5023             }
5024         },
5025         .sym = {
5026             [1] = { /* code_gen_buffer */
5027                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5028                 .st_shndx = 1,
5029             }
5030         },
5031         .di = {
5032             .len = sizeof(struct DebugInfo) - 4,
5033             .version = 2,
5034             .ptr_size = sizeof(void *),
5035             .cu_die = 1,
5036             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5037             .fn_die = 2,
5038             .fn_name = "code_gen_buffer"
5039         },
5040         .da = {
5041             1,          /* abbrev number (the cu) */
5042             0x11, 1,    /* DW_TAG_compile_unit, has children */
5043             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5044             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5045             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5046             0, 0,       /* end of abbrev */
5047             2,          /* abbrev number (the fn) */
5048             0x2e, 0,    /* DW_TAG_subprogram, no children */
5049             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5050             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5051             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5052             0, 0,       /* end of abbrev */
5053             0           /* no more abbrev */
5054         },
5055         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5056                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5057     };
5058 
5059     /* We only need a single jit entry; statically allocate it.  */
5060     static struct jit_code_entry one_entry;
5061 
5062     uintptr_t buf = (uintptr_t)buf_ptr;
5063     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5064     DebugFrameHeader *dfh;
5065 
5066     img = g_malloc(img_size);
5067     *img = img_template;
5068 
5069     img->phdr.p_vaddr = buf;
5070     img->phdr.p_paddr = buf;
5071     img->phdr.p_memsz = buf_size;
5072 
5073     img->shdr[1].sh_name = find_string(img->str, ".text");
5074     img->shdr[1].sh_addr = buf;
5075     img->shdr[1].sh_size = buf_size;
5076 
5077     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5078     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5079 
5080     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5081     img->shdr[4].sh_size = debug_frame_size;
5082 
5083     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5084     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5085 
5086     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5087     img->sym[1].st_value = buf;
5088     img->sym[1].st_size = buf_size;
5089 
5090     img->di.cu_low_pc = buf;
5091     img->di.cu_high_pc = buf + buf_size;
5092     img->di.fn_low_pc = buf;
5093     img->di.fn_high_pc = buf + buf_size;
5094 
5095     dfh = (DebugFrameHeader *)(img + 1);
5096     memcpy(dfh, debug_frame, debug_frame_size);
5097     dfh->fde.func_start = buf;
5098     dfh->fde.func_len = buf_size;
5099 
5100 #ifdef DEBUG_JIT
5101     /* Enable this block to be able to debug the ELF image file creation.
5102        One can use readelf, objdump, or other inspection utilities.  */
5103     {
5104         FILE *f = fopen("/tmp/qemu.jit", "w+b");
5105         if (f) {
5106             if (fwrite(img, img_size, 1, f) != img_size) {
5107                 /* Avoid stupid unused return value warning for fwrite.  */
5108             }
5109             fclose(f);
5110         }
5111     }
5112 #endif
5113 
5114     one_entry.symfile_addr = img;
5115     one_entry.symfile_size = img_size;
5116 
5117     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5118     __jit_debug_descriptor.relevant_entry = &one_entry;
5119     __jit_debug_descriptor.first_entry = &one_entry;
5120     __jit_debug_register_code();
5121 }
5122 #else
5123 /* No support for the feature.  Provide the entry point expected by exec.c,
5124    and implement the internal function we declared earlier.  */
5125 
5126 static void tcg_register_jit_int(const void *buf, size_t size,
5127                                  const void *debug_frame,
5128                                  size_t debug_frame_size)
5129 {
5130 }
5131 
5132 void tcg_register_jit(const void *buf, size_t buf_size)
5133 {
5134 }
5135 #endif /* ELF_HOST_MACHINE */
5136 
5137 #if !TCG_TARGET_MAYBE_vec
5138 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5139 {
5140     g_assert_not_reached();
5141 }
5142 #endif
5143