xref: /openbmc/qemu/tcg/tcg.c (revision 719f0f60)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 
45 #include "exec/exec-all.h"
46 
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
49 #endif
50 
51 #include "tcg/tcg-op.h"
52 
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS  ELFCLASS32
55 #else
56 # define ELF_CLASS  ELFCLASS64
57 #endif
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA   ELFDATA2MSB
60 #else
61 # define ELF_DATA   ELFDATA2LSB
62 #endif
63 
64 #include "elf.h"
65 #include "exec/log.h"
66 
67 /* Forward declarations for functions declared in tcg-target.c.inc and
68    used here. */
69 static void tcg_target_init(TCGContext *s);
70 static void tcg_target_qemu_prologue(TCGContext *s);
71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
72                         intptr_t value, intptr_t addend);
73 
74 /* The CIE and FDE header definitions will be common to all hosts.  */
75 typedef struct {
76     uint32_t len __attribute__((aligned((sizeof(void *)))));
77     uint32_t id;
78     uint8_t version;
79     char augmentation[1];
80     uint8_t code_align;
81     uint8_t data_align;
82     uint8_t return_column;
83 } DebugFrameCIE;
84 
85 typedef struct QEMU_PACKED {
86     uint32_t len __attribute__((aligned((sizeof(void *)))));
87     uint32_t cie_offset;
88     uintptr_t func_start;
89     uintptr_t func_len;
90 } DebugFrameFDEHeader;
91 
92 typedef struct QEMU_PACKED {
93     DebugFrameCIE cie;
94     DebugFrameFDEHeader fde;
95 } DebugFrameHeader;
96 
97 static void tcg_register_jit_int(const void *buf, size_t size,
98                                  const void *debug_frame,
99                                  size_t debug_frame_size)
100     __attribute__((unused));
101 
102 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
104                        intptr_t arg2);
105 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106 static void tcg_out_movi(TCGContext *s, TCGType type,
107                          TCGReg ret, tcg_target_long arg);
108 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
109                        const TCGArg args[TCG_MAX_OP_ARGS],
110                        const int const_args[TCG_MAX_OP_ARGS]);
111 #if TCG_TARGET_MAYBE_vec
112 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
113                             TCGReg dst, TCGReg src);
114 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
115                              TCGReg dst, TCGReg base, intptr_t offset);
116 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, int64_t arg);
118 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
119                            unsigned vecl, unsigned vece,
120                            const TCGArg args[TCG_MAX_OP_ARGS],
121                            const int const_args[TCG_MAX_OP_ARGS]);
122 #else
123 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
124                                    TCGReg dst, TCGReg src)
125 {
126     g_assert_not_reached();
127 }
128 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
129                                     TCGReg dst, TCGReg base, intptr_t offset)
130 {
131     g_assert_not_reached();
132 }
133 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
134                                     TCGReg dst, int64_t arg)
135 {
136     g_assert_not_reached();
137 }
138 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
139                                   unsigned vecl, unsigned vece,
140                                   const TCGArg args[TCG_MAX_OP_ARGS],
141                                   const int const_args[TCG_MAX_OP_ARGS])
142 {
143     g_assert_not_reached();
144 }
145 #endif
146 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
147                        intptr_t arg2);
148 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
149                         TCGReg base, intptr_t ofs);
150 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
151 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
152 #ifdef TCG_TARGET_NEED_LDST_LABELS
153 static int tcg_out_ldst_finalize(TCGContext *s);
154 #endif
155 
156 #define TCG_HIGHWATER 1024
157 
158 static TCGContext **tcg_ctxs;
159 static unsigned int n_tcg_ctxs;
160 TCGv_env cpu_env = 0;
161 const void *tcg_code_gen_epilogue;
162 uintptr_t tcg_splitwx_diff;
163 
164 #ifndef CONFIG_TCG_INTERPRETER
165 tcg_prologue_fn *tcg_qemu_tb_exec;
166 #endif
167 
168 struct tcg_region_tree {
169     QemuMutex lock;
170     GTree *tree;
171     /* padding to avoid false sharing is computed at run-time */
172 };
173 
174 /*
175  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
176  * dynamically allocate from as demand dictates. Given appropriate region
177  * sizing, this minimizes flushes even when some TCG threads generate a lot
178  * more code than others.
179  */
180 struct tcg_region_state {
181     QemuMutex lock;
182 
183     /* fields set at init time */
184     void *start;
185     void *start_aligned;
186     void *end;
187     size_t n;
188     size_t size; /* size of one region */
189     size_t stride; /* .size + guard size */
190 
191     /* fields protected by the lock */
192     size_t current; /* current region index */
193     size_t agg_size_full; /* aggregate size of full regions */
194 };
195 
196 static struct tcg_region_state region;
197 /*
198  * This is an array of struct tcg_region_tree's, with padding.
199  * We use void * to simplify the computation of region_trees[i]; each
200  * struct is found every tree_size bytes.
201  */
202 static void *region_trees;
203 static size_t tree_size;
204 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
205 static TCGRegSet tcg_target_call_clobber_regs;
206 
207 #if TCG_TARGET_INSN_UNIT_SIZE == 1
208 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
209 {
210     *s->code_ptr++ = v;
211 }
212 
213 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
214                                                       uint8_t v)
215 {
216     *p = v;
217 }
218 #endif
219 
220 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
221 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
222 {
223     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
224         *s->code_ptr++ = v;
225     } else {
226         tcg_insn_unit *p = s->code_ptr;
227         memcpy(p, &v, sizeof(v));
228         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
229     }
230 }
231 
232 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
233                                                        uint16_t v)
234 {
235     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
236         *p = v;
237     } else {
238         memcpy(p, &v, sizeof(v));
239     }
240 }
241 #endif
242 
243 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
244 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
245 {
246     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
247         *s->code_ptr++ = v;
248     } else {
249         tcg_insn_unit *p = s->code_ptr;
250         memcpy(p, &v, sizeof(v));
251         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
252     }
253 }
254 
255 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
256                                                        uint32_t v)
257 {
258     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
259         *p = v;
260     } else {
261         memcpy(p, &v, sizeof(v));
262     }
263 }
264 #endif
265 
266 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
267 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
268 {
269     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
270         *s->code_ptr++ = v;
271     } else {
272         tcg_insn_unit *p = s->code_ptr;
273         memcpy(p, &v, sizeof(v));
274         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
275     }
276 }
277 
278 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
279                                                        uint64_t v)
280 {
281     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
282         *p = v;
283     } else {
284         memcpy(p, &v, sizeof(v));
285     }
286 }
287 #endif
288 
289 /* label relocation processing */
290 
291 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
292                           TCGLabel *l, intptr_t addend)
293 {
294     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
295 
296     r->type = type;
297     r->ptr = code_ptr;
298     r->addend = addend;
299     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
300 }
301 
302 static void tcg_out_label(TCGContext *s, TCGLabel *l)
303 {
304     tcg_debug_assert(!l->has_value);
305     l->has_value = 1;
306     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
307 }
308 
309 TCGLabel *gen_new_label(void)
310 {
311     TCGContext *s = tcg_ctx;
312     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
313 
314     memset(l, 0, sizeof(TCGLabel));
315     l->id = s->nb_labels++;
316     QSIMPLEQ_INIT(&l->relocs);
317 
318     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
319 
320     return l;
321 }
322 
323 static bool tcg_resolve_relocs(TCGContext *s)
324 {
325     TCGLabel *l;
326 
327     QSIMPLEQ_FOREACH(l, &s->labels, next) {
328         TCGRelocation *r;
329         uintptr_t value = l->u.value;
330 
331         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
332             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
333                 return false;
334             }
335         }
336     }
337     return true;
338 }
339 
340 static void set_jmp_reset_offset(TCGContext *s, int which)
341 {
342     /*
343      * We will check for overflow at the end of the opcode loop in
344      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
345      */
346     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
347 }
348 
349 /* Signal overflow, starting over with fewer guest insns. */
350 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
351 {
352     siglongjmp(s->jmp_trans, -2);
353 }
354 
355 #define C_PFX1(P, A)                    P##A
356 #define C_PFX2(P, A, B)                 P##A##_##B
357 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
358 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
359 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
360 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
361 
362 /* Define an enumeration for the various combinations. */
363 
364 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
365 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
366 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
367 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
368 
369 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
370 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
371 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
372 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
373 
374 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
375 
376 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
377 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
378 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
379 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
380 
381 typedef enum {
382 #include "tcg-target-con-set.h"
383 } TCGConstraintSetIndex;
384 
385 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
386 
387 #undef C_O0_I1
388 #undef C_O0_I2
389 #undef C_O0_I3
390 #undef C_O0_I4
391 #undef C_O1_I1
392 #undef C_O1_I2
393 #undef C_O1_I3
394 #undef C_O1_I4
395 #undef C_N1_I2
396 #undef C_O2_I1
397 #undef C_O2_I2
398 #undef C_O2_I3
399 #undef C_O2_I4
400 
401 /* Put all of the constraint sets into an array, indexed by the enum. */
402 
403 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
404 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
405 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
406 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
407 
408 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
409 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
410 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
411 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
412 
413 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
414 
415 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
416 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
417 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
418 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
419 
420 static const TCGTargetOpDef constraint_sets[] = {
421 #include "tcg-target-con-set.h"
422 };
423 
424 
425 #undef C_O0_I1
426 #undef C_O0_I2
427 #undef C_O0_I3
428 #undef C_O0_I4
429 #undef C_O1_I1
430 #undef C_O1_I2
431 #undef C_O1_I3
432 #undef C_O1_I4
433 #undef C_N1_I2
434 #undef C_O2_I1
435 #undef C_O2_I2
436 #undef C_O2_I3
437 #undef C_O2_I4
438 
439 /* Expand the enumerator to be returned from tcg_target_op_def(). */
440 
441 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
442 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
443 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
444 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
445 
446 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
447 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
448 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
449 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
450 
451 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
452 
453 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
454 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
455 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
456 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
457 
458 #include "tcg-target.c.inc"
459 
460 /* compare a pointer @ptr and a tb_tc @s */
461 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
462 {
463     if (ptr >= s->ptr + s->size) {
464         return 1;
465     } else if (ptr < s->ptr) {
466         return -1;
467     }
468     return 0;
469 }
470 
471 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
472 {
473     const struct tb_tc *a = ap;
474     const struct tb_tc *b = bp;
475 
476     /*
477      * When both sizes are set, we know this isn't a lookup.
478      * This is the most likely case: every TB must be inserted; lookups
479      * are a lot less frequent.
480      */
481     if (likely(a->size && b->size)) {
482         if (a->ptr > b->ptr) {
483             return 1;
484         } else if (a->ptr < b->ptr) {
485             return -1;
486         }
487         /* a->ptr == b->ptr should happen only on deletions */
488         g_assert(a->size == b->size);
489         return 0;
490     }
491     /*
492      * All lookups have either .size field set to 0.
493      * From the glib sources we see that @ap is always the lookup key. However
494      * the docs provide no guarantee, so we just mark this case as likely.
495      */
496     if (likely(a->size == 0)) {
497         return ptr_cmp_tb_tc(a->ptr, b);
498     }
499     return ptr_cmp_tb_tc(b->ptr, a);
500 }
501 
502 static void tcg_region_trees_init(void)
503 {
504     size_t i;
505 
506     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
507     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
508     for (i = 0; i < region.n; i++) {
509         struct tcg_region_tree *rt = region_trees + i * tree_size;
510 
511         qemu_mutex_init(&rt->lock);
512         rt->tree = g_tree_new(tb_tc_cmp);
513     }
514 }
515 
516 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
517 {
518     size_t region_idx;
519 
520     /*
521      * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
522      * a signal handler over which the caller has no control.
523      */
524     if (!in_code_gen_buffer(p)) {
525         p -= tcg_splitwx_diff;
526         if (!in_code_gen_buffer(p)) {
527             return NULL;
528         }
529     }
530 
531     if (p < region.start_aligned) {
532         region_idx = 0;
533     } else {
534         ptrdiff_t offset = p - region.start_aligned;
535 
536         if (offset > region.stride * (region.n - 1)) {
537             region_idx = region.n - 1;
538         } else {
539             region_idx = offset / region.stride;
540         }
541     }
542     return region_trees + region_idx * tree_size;
543 }
544 
545 void tcg_tb_insert(TranslationBlock *tb)
546 {
547     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
548 
549     g_assert(rt != NULL);
550     qemu_mutex_lock(&rt->lock);
551     g_tree_insert(rt->tree, &tb->tc, tb);
552     qemu_mutex_unlock(&rt->lock);
553 }
554 
555 void tcg_tb_remove(TranslationBlock *tb)
556 {
557     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
558 
559     g_assert(rt != NULL);
560     qemu_mutex_lock(&rt->lock);
561     g_tree_remove(rt->tree, &tb->tc);
562     qemu_mutex_unlock(&rt->lock);
563 }
564 
565 /*
566  * Find the TB 'tb' such that
567  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
568  * Return NULL if not found.
569  */
570 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
571 {
572     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
573     TranslationBlock *tb;
574     struct tb_tc s = { .ptr = (void *)tc_ptr };
575 
576     if (rt == NULL) {
577         return NULL;
578     }
579 
580     qemu_mutex_lock(&rt->lock);
581     tb = g_tree_lookup(rt->tree, &s);
582     qemu_mutex_unlock(&rt->lock);
583     return tb;
584 }
585 
586 static void tcg_region_tree_lock_all(void)
587 {
588     size_t i;
589 
590     for (i = 0; i < region.n; i++) {
591         struct tcg_region_tree *rt = region_trees + i * tree_size;
592 
593         qemu_mutex_lock(&rt->lock);
594     }
595 }
596 
597 static void tcg_region_tree_unlock_all(void)
598 {
599     size_t i;
600 
601     for (i = 0; i < region.n; i++) {
602         struct tcg_region_tree *rt = region_trees + i * tree_size;
603 
604         qemu_mutex_unlock(&rt->lock);
605     }
606 }
607 
608 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
609 {
610     size_t i;
611 
612     tcg_region_tree_lock_all();
613     for (i = 0; i < region.n; i++) {
614         struct tcg_region_tree *rt = region_trees + i * tree_size;
615 
616         g_tree_foreach(rt->tree, func, user_data);
617     }
618     tcg_region_tree_unlock_all();
619 }
620 
621 size_t tcg_nb_tbs(void)
622 {
623     size_t nb_tbs = 0;
624     size_t i;
625 
626     tcg_region_tree_lock_all();
627     for (i = 0; i < region.n; i++) {
628         struct tcg_region_tree *rt = region_trees + i * tree_size;
629 
630         nb_tbs += g_tree_nnodes(rt->tree);
631     }
632     tcg_region_tree_unlock_all();
633     return nb_tbs;
634 }
635 
636 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
637 {
638     TranslationBlock *tb = v;
639 
640     tb_destroy(tb);
641     return FALSE;
642 }
643 
644 static void tcg_region_tree_reset_all(void)
645 {
646     size_t i;
647 
648     tcg_region_tree_lock_all();
649     for (i = 0; i < region.n; i++) {
650         struct tcg_region_tree *rt = region_trees + i * tree_size;
651 
652         g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
653         /* Increment the refcount first so that destroy acts as a reset */
654         g_tree_ref(rt->tree);
655         g_tree_destroy(rt->tree);
656     }
657     tcg_region_tree_unlock_all();
658 }
659 
660 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
661 {
662     void *start, *end;
663 
664     start = region.start_aligned + curr_region * region.stride;
665     end = start + region.size;
666 
667     if (curr_region == 0) {
668         start = region.start;
669     }
670     if (curr_region == region.n - 1) {
671         end = region.end;
672     }
673 
674     *pstart = start;
675     *pend = end;
676 }
677 
678 static void tcg_region_assign(TCGContext *s, size_t curr_region)
679 {
680     void *start, *end;
681 
682     tcg_region_bounds(curr_region, &start, &end);
683 
684     s->code_gen_buffer = start;
685     s->code_gen_ptr = start;
686     s->code_gen_buffer_size = end - start;
687     s->code_gen_highwater = end - TCG_HIGHWATER;
688 }
689 
690 static bool tcg_region_alloc__locked(TCGContext *s)
691 {
692     if (region.current == region.n) {
693         return true;
694     }
695     tcg_region_assign(s, region.current);
696     region.current++;
697     return false;
698 }
699 
700 /*
701  * Request a new region once the one in use has filled up.
702  * Returns true on error.
703  */
704 static bool tcg_region_alloc(TCGContext *s)
705 {
706     bool err;
707     /* read the region size now; alloc__locked will overwrite it on success */
708     size_t size_full = s->code_gen_buffer_size;
709 
710     qemu_mutex_lock(&region.lock);
711     err = tcg_region_alloc__locked(s);
712     if (!err) {
713         region.agg_size_full += size_full - TCG_HIGHWATER;
714     }
715     qemu_mutex_unlock(&region.lock);
716     return err;
717 }
718 
719 /*
720  * Perform a context's first region allocation.
721  * This function does _not_ increment region.agg_size_full.
722  */
723 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
724 {
725     return tcg_region_alloc__locked(s);
726 }
727 
728 /* Call from a safe-work context */
729 void tcg_region_reset_all(void)
730 {
731     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
732     unsigned int i;
733 
734     qemu_mutex_lock(&region.lock);
735     region.current = 0;
736     region.agg_size_full = 0;
737 
738     for (i = 0; i < n_ctxs; i++) {
739         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
740         bool err = tcg_region_initial_alloc__locked(s);
741 
742         g_assert(!err);
743     }
744     qemu_mutex_unlock(&region.lock);
745 
746     tcg_region_tree_reset_all();
747 }
748 
749 #ifdef CONFIG_USER_ONLY
750 static size_t tcg_n_regions(void)
751 {
752     return 1;
753 }
754 #else
755 /*
756  * It is likely that some vCPUs will translate more code than others, so we
757  * first try to set more regions than max_cpus, with those regions being of
758  * reasonable size. If that's not possible we make do by evenly dividing
759  * the code_gen_buffer among the vCPUs.
760  */
761 static size_t tcg_n_regions(void)
762 {
763     size_t i;
764 
765     /* Use a single region if all we have is one vCPU thread */
766 #if !defined(CONFIG_USER_ONLY)
767     MachineState *ms = MACHINE(qdev_get_machine());
768     unsigned int max_cpus = ms->smp.max_cpus;
769 #endif
770     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
771         return 1;
772     }
773 
774     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
775     for (i = 8; i > 0; i--) {
776         size_t regions_per_thread = i;
777         size_t region_size;
778 
779         region_size = tcg_init_ctx.code_gen_buffer_size;
780         region_size /= max_cpus * regions_per_thread;
781 
782         if (region_size >= 2 * 1024u * 1024) {
783             return max_cpus * regions_per_thread;
784         }
785     }
786     /* If we can't, then just allocate one region per vCPU thread */
787     return max_cpus;
788 }
789 #endif
790 
791 /*
792  * Initializes region partitioning.
793  *
794  * Called at init time from the parent thread (i.e. the one calling
795  * tcg_context_init), after the target's TCG globals have been set.
796  *
797  * Region partitioning works by splitting code_gen_buffer into separate regions,
798  * and then assigning regions to TCG threads so that the threads can translate
799  * code in parallel without synchronization.
800  *
801  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
802  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
803  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
804  * must have been parsed before calling this function, since it calls
805  * qemu_tcg_mttcg_enabled().
806  *
807  * In user-mode we use a single region.  Having multiple regions in user-mode
808  * is not supported, because the number of vCPU threads (recall that each thread
809  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
810  * OS, and usually this number is huge (tens of thousands is not uncommon).
811  * Thus, given this large bound on the number of vCPU threads and the fact
812  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
813  * that the availability of at least one region per vCPU thread.
814  *
815  * However, this user-mode limitation is unlikely to be a significant problem
816  * in practice. Multi-threaded guests share most if not all of their translated
817  * code, which makes parallel code generation less appealing than in softmmu.
818  */
819 void tcg_region_init(void)
820 {
821     void *buf = tcg_init_ctx.code_gen_buffer;
822     void *aligned;
823     size_t size = tcg_init_ctx.code_gen_buffer_size;
824     size_t page_size = qemu_real_host_page_size;
825     size_t region_size;
826     size_t n_regions;
827     size_t i;
828 
829     n_regions = tcg_n_regions();
830 
831     /* The first region will be 'aligned - buf' bytes larger than the others */
832     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
833     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
834     /*
835      * Make region_size a multiple of page_size, using aligned as the start.
836      * As a result of this we might end up with a few extra pages at the end of
837      * the buffer; we will assign those to the last region.
838      */
839     region_size = (size - (aligned - buf)) / n_regions;
840     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
841 
842     /* A region must have at least 2 pages; one code, one guard */
843     g_assert(region_size >= 2 * page_size);
844 
845     /* init the region struct */
846     qemu_mutex_init(&region.lock);
847     region.n = n_regions;
848     region.size = region_size - page_size;
849     region.stride = region_size;
850     region.start = buf;
851     region.start_aligned = aligned;
852     /* page-align the end, since its last page will be a guard page */
853     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
854     /* account for that last guard page */
855     region.end -= page_size;
856 
857     /*
858      * Set guard pages in the rw buffer, as that's the one into which
859      * buffer overruns could occur.  Do not set guard pages in the rx
860      * buffer -- let that one use hugepages throughout.
861      */
862     for (i = 0; i < region.n; i++) {
863         void *start, *end;
864 
865         tcg_region_bounds(i, &start, &end);
866 
867         /*
868          * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
869          * rejects a permission change from RWX -> NONE.  Guard pages are
870          * nice for bug detection but are not essential; ignore any failure.
871          */
872         (void)qemu_mprotect_none(end, page_size);
873     }
874 
875     tcg_region_trees_init();
876 
877     /* In user-mode we support only one ctx, so do the initial allocation now */
878 #ifdef CONFIG_USER_ONLY
879     {
880         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
881 
882         g_assert(!err);
883     }
884 #endif
885 }
886 
887 #ifdef CONFIG_DEBUG_TCG
888 const void *tcg_splitwx_to_rx(void *rw)
889 {
890     /* Pass NULL pointers unchanged. */
891     if (rw) {
892         g_assert(in_code_gen_buffer(rw));
893         rw += tcg_splitwx_diff;
894     }
895     return rw;
896 }
897 
898 void *tcg_splitwx_to_rw(const void *rx)
899 {
900     /* Pass NULL pointers unchanged. */
901     if (rx) {
902         rx -= tcg_splitwx_diff;
903         /* Assert that we end with a pointer in the rw region. */
904         g_assert(in_code_gen_buffer(rx));
905     }
906     return (void *)rx;
907 }
908 #endif /* CONFIG_DEBUG_TCG */
909 
910 static void alloc_tcg_plugin_context(TCGContext *s)
911 {
912 #ifdef CONFIG_PLUGIN
913     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
914     s->plugin_tb->insns =
915         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
916 #endif
917 }
918 
919 /*
920  * All TCG threads except the parent (i.e. the one that called tcg_context_init
921  * and registered the target's TCG globals) must register with this function
922  * before initiating translation.
923  *
924  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
925  * of tcg_region_init() for the reasoning behind this.
926  *
927  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
928  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
929  * is not used anymore for translation once this function is called.
930  *
931  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
932  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
933  */
934 #ifdef CONFIG_USER_ONLY
935 void tcg_register_thread(void)
936 {
937     tcg_ctx = &tcg_init_ctx;
938 }
939 #else
940 void tcg_register_thread(void)
941 {
942     MachineState *ms = MACHINE(qdev_get_machine());
943     TCGContext *s = g_malloc(sizeof(*s));
944     unsigned int i, n;
945     bool err;
946 
947     *s = tcg_init_ctx;
948 
949     /* Relink mem_base.  */
950     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
951         if (tcg_init_ctx.temps[i].mem_base) {
952             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
953             tcg_debug_assert(b >= 0 && b < n);
954             s->temps[i].mem_base = &s->temps[b];
955         }
956     }
957 
958     /* Claim an entry in tcg_ctxs */
959     n = qatomic_fetch_inc(&n_tcg_ctxs);
960     g_assert(n < ms->smp.max_cpus);
961     qatomic_set(&tcg_ctxs[n], s);
962 
963     if (n > 0) {
964         alloc_tcg_plugin_context(s);
965     }
966 
967     tcg_ctx = s;
968     qemu_mutex_lock(&region.lock);
969     err = tcg_region_initial_alloc__locked(tcg_ctx);
970     g_assert(!err);
971     qemu_mutex_unlock(&region.lock);
972 }
973 #endif /* !CONFIG_USER_ONLY */
974 
975 /*
976  * Returns the size (in bytes) of all translated code (i.e. from all regions)
977  * currently in the cache.
978  * See also: tcg_code_capacity()
979  * Do not confuse with tcg_current_code_size(); that one applies to a single
980  * TCG context.
981  */
982 size_t tcg_code_size(void)
983 {
984     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
985     unsigned int i;
986     size_t total;
987 
988     qemu_mutex_lock(&region.lock);
989     total = region.agg_size_full;
990     for (i = 0; i < n_ctxs; i++) {
991         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
992         size_t size;
993 
994         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
995         g_assert(size <= s->code_gen_buffer_size);
996         total += size;
997     }
998     qemu_mutex_unlock(&region.lock);
999     return total;
1000 }
1001 
1002 /*
1003  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
1004  * regions.
1005  * See also: tcg_code_size()
1006  */
1007 size_t tcg_code_capacity(void)
1008 {
1009     size_t guard_size, capacity;
1010 
1011     /* no need for synchronization; these variables are set at init time */
1012     guard_size = region.stride - region.size;
1013     capacity = region.end + guard_size - region.start;
1014     capacity -= region.n * (guard_size + TCG_HIGHWATER);
1015     return capacity;
1016 }
1017 
1018 size_t tcg_tb_phys_invalidate_count(void)
1019 {
1020     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
1021     unsigned int i;
1022     size_t total = 0;
1023 
1024     for (i = 0; i < n_ctxs; i++) {
1025         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
1026 
1027         total += qatomic_read(&s->tb_phys_invalidate_count);
1028     }
1029     return total;
1030 }
1031 
1032 /* pool based memory allocation */
1033 void *tcg_malloc_internal(TCGContext *s, int size)
1034 {
1035     TCGPool *p;
1036     int pool_size;
1037 
1038     if (size > TCG_POOL_CHUNK_SIZE) {
1039         /* big malloc: insert a new pool (XXX: could optimize) */
1040         p = g_malloc(sizeof(TCGPool) + size);
1041         p->size = size;
1042         p->next = s->pool_first_large;
1043         s->pool_first_large = p;
1044         return p->data;
1045     } else {
1046         p = s->pool_current;
1047         if (!p) {
1048             p = s->pool_first;
1049             if (!p)
1050                 goto new_pool;
1051         } else {
1052             if (!p->next) {
1053             new_pool:
1054                 pool_size = TCG_POOL_CHUNK_SIZE;
1055                 p = g_malloc(sizeof(TCGPool) + pool_size);
1056                 p->size = pool_size;
1057                 p->next = NULL;
1058                 if (s->pool_current)
1059                     s->pool_current->next = p;
1060                 else
1061                     s->pool_first = p;
1062             } else {
1063                 p = p->next;
1064             }
1065         }
1066     }
1067     s->pool_current = p;
1068     s->pool_cur = p->data + size;
1069     s->pool_end = p->data + p->size;
1070     return p->data;
1071 }
1072 
1073 void tcg_pool_reset(TCGContext *s)
1074 {
1075     TCGPool *p, *t;
1076     for (p = s->pool_first_large; p; p = t) {
1077         t = p->next;
1078         g_free(p);
1079     }
1080     s->pool_first_large = NULL;
1081     s->pool_cur = s->pool_end = NULL;
1082     s->pool_current = NULL;
1083 }
1084 
1085 typedef struct TCGHelperInfo {
1086     void *func;
1087     const char *name;
1088     unsigned flags;
1089     unsigned sizemask;
1090 } TCGHelperInfo;
1091 
1092 #include "exec/helper-proto.h"
1093 
1094 static const TCGHelperInfo all_helpers[] = {
1095 #include "exec/helper-tcg.h"
1096 };
1097 static GHashTable *helper_table;
1098 
1099 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1100 static void process_op_defs(TCGContext *s);
1101 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1102                                             TCGReg reg, const char *name);
1103 
1104 void tcg_context_init(TCGContext *s)
1105 {
1106     int op, total_args, n, i;
1107     TCGOpDef *def;
1108     TCGArgConstraint *args_ct;
1109     TCGTemp *ts;
1110 
1111     memset(s, 0, sizeof(*s));
1112     s->nb_globals = 0;
1113 
1114     /* Count total number of arguments and allocate the corresponding
1115        space */
1116     total_args = 0;
1117     for(op = 0; op < NB_OPS; op++) {
1118         def = &tcg_op_defs[op];
1119         n = def->nb_iargs + def->nb_oargs;
1120         total_args += n;
1121     }
1122 
1123     args_ct = g_new0(TCGArgConstraint, total_args);
1124 
1125     for(op = 0; op < NB_OPS; op++) {
1126         def = &tcg_op_defs[op];
1127         def->args_ct = args_ct;
1128         n = def->nb_iargs + def->nb_oargs;
1129         args_ct += n;
1130     }
1131 
1132     /* Register helpers.  */
1133     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1134     helper_table = g_hash_table_new(NULL, NULL);
1135 
1136     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1137         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1138                             (gpointer)&all_helpers[i]);
1139     }
1140 
1141     tcg_target_init(s);
1142     process_op_defs(s);
1143 
1144     /* Reverse the order of the saved registers, assuming they're all at
1145        the start of tcg_target_reg_alloc_order.  */
1146     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1147         int r = tcg_target_reg_alloc_order[n];
1148         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1149             break;
1150         }
1151     }
1152     for (i = 0; i < n; ++i) {
1153         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1154     }
1155     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1156         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1157     }
1158 
1159     alloc_tcg_plugin_context(s);
1160 
1161     tcg_ctx = s;
1162     /*
1163      * In user-mode we simply share the init context among threads, since we
1164      * use a single region. See the documentation tcg_region_init() for the
1165      * reasoning behind this.
1166      * In softmmu we will have at most max_cpus TCG threads.
1167      */
1168 #ifdef CONFIG_USER_ONLY
1169     tcg_ctxs = &tcg_ctx;
1170     n_tcg_ctxs = 1;
1171 #else
1172     MachineState *ms = MACHINE(qdev_get_machine());
1173     unsigned int max_cpus = ms->smp.max_cpus;
1174     tcg_ctxs = g_new(TCGContext *, max_cpus);
1175 #endif
1176 
1177     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1178     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1179     cpu_env = temp_tcgv_ptr(ts);
1180 }
1181 
1182 /*
1183  * Allocate TBs right before their corresponding translated code, making
1184  * sure that TBs and code are on different cache lines.
1185  */
1186 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1187 {
1188     uintptr_t align = qemu_icache_linesize;
1189     TranslationBlock *tb;
1190     void *next;
1191 
1192  retry:
1193     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1194     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1195 
1196     if (unlikely(next > s->code_gen_highwater)) {
1197         if (tcg_region_alloc(s)) {
1198             return NULL;
1199         }
1200         goto retry;
1201     }
1202     qatomic_set(&s->code_gen_ptr, next);
1203     s->data_gen_ptr = NULL;
1204     return tb;
1205 }
1206 
1207 void tcg_prologue_init(TCGContext *s)
1208 {
1209     size_t prologue_size, total_size;
1210     void *buf0, *buf1;
1211 
1212     /* Put the prologue at the beginning of code_gen_buffer.  */
1213     buf0 = s->code_gen_buffer;
1214     total_size = s->code_gen_buffer_size;
1215     s->code_ptr = buf0;
1216     s->code_buf = buf0;
1217     s->data_gen_ptr = NULL;
1218 
1219     /*
1220      * The region trees are not yet configured, but tcg_splitwx_to_rx
1221      * needs the bounds for an assert.
1222      */
1223     region.start = buf0;
1224     region.end = buf0 + total_size;
1225 
1226 #ifndef CONFIG_TCG_INTERPRETER
1227     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1228 #endif
1229 
1230     /* Compute a high-water mark, at which we voluntarily flush the buffer
1231        and start over.  The size here is arbitrary, significantly larger
1232        than we expect the code generation for any one opcode to require.  */
1233     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1234 
1235 #ifdef TCG_TARGET_NEED_POOL_LABELS
1236     s->pool_labels = NULL;
1237 #endif
1238 
1239     qemu_thread_jit_write();
1240     /* Generate the prologue.  */
1241     tcg_target_qemu_prologue(s);
1242 
1243 #ifdef TCG_TARGET_NEED_POOL_LABELS
1244     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1245     {
1246         int result = tcg_out_pool_finalize(s);
1247         tcg_debug_assert(result == 0);
1248     }
1249 #endif
1250 
1251     buf1 = s->code_ptr;
1252 #ifndef CONFIG_TCG_INTERPRETER
1253     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1254                         tcg_ptr_byte_diff(buf1, buf0));
1255 #endif
1256 
1257     /* Deduct the prologue from the buffer.  */
1258     prologue_size = tcg_current_code_size(s);
1259     s->code_gen_ptr = buf1;
1260     s->code_gen_buffer = buf1;
1261     s->code_buf = buf1;
1262     total_size -= prologue_size;
1263     s->code_gen_buffer_size = total_size;
1264 
1265     tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1266 
1267 #ifdef DEBUG_DISAS
1268     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1269         FILE *logfile = qemu_log_lock();
1270         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1271         if (s->data_gen_ptr) {
1272             size_t code_size = s->data_gen_ptr - buf0;
1273             size_t data_size = prologue_size - code_size;
1274             size_t i;
1275 
1276             log_disas(buf0, code_size);
1277 
1278             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1279                 if (sizeof(tcg_target_ulong) == 8) {
1280                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1281                              (uintptr_t)s->data_gen_ptr + i,
1282                              *(uint64_t *)(s->data_gen_ptr + i));
1283                 } else {
1284                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1285                              (uintptr_t)s->data_gen_ptr + i,
1286                              *(uint32_t *)(s->data_gen_ptr + i));
1287                 }
1288             }
1289         } else {
1290             log_disas(buf0, prologue_size);
1291         }
1292         qemu_log("\n");
1293         qemu_log_flush();
1294         qemu_log_unlock(logfile);
1295     }
1296 #endif
1297 
1298     /* Assert that goto_ptr is implemented completely.  */
1299     if (TCG_TARGET_HAS_goto_ptr) {
1300         tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1301     }
1302 }
1303 
1304 void tcg_func_start(TCGContext *s)
1305 {
1306     tcg_pool_reset(s);
1307     s->nb_temps = s->nb_globals;
1308 
1309     /* No temps have been previously allocated for size or locality.  */
1310     memset(s->free_temps, 0, sizeof(s->free_temps));
1311 
1312     /* No constant temps have been previously allocated. */
1313     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1314         if (s->const_table[i]) {
1315             g_hash_table_remove_all(s->const_table[i]);
1316         }
1317     }
1318 
1319     s->nb_ops = 0;
1320     s->nb_labels = 0;
1321     s->current_frame_offset = s->frame_start;
1322 
1323 #ifdef CONFIG_DEBUG_TCG
1324     s->goto_tb_issue_mask = 0;
1325 #endif
1326 
1327     QTAILQ_INIT(&s->ops);
1328     QTAILQ_INIT(&s->free_ops);
1329     QSIMPLEQ_INIT(&s->labels);
1330 }
1331 
1332 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1333 {
1334     int n = s->nb_temps++;
1335 
1336     if (n >= TCG_MAX_TEMPS) {
1337         tcg_raise_tb_overflow(s);
1338     }
1339     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1340 }
1341 
1342 static TCGTemp *tcg_global_alloc(TCGContext *s)
1343 {
1344     TCGTemp *ts;
1345 
1346     tcg_debug_assert(s->nb_globals == s->nb_temps);
1347     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1348     s->nb_globals++;
1349     ts = tcg_temp_alloc(s);
1350     ts->kind = TEMP_GLOBAL;
1351 
1352     return ts;
1353 }
1354 
1355 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1356                                             TCGReg reg, const char *name)
1357 {
1358     TCGTemp *ts;
1359 
1360     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1361         tcg_abort();
1362     }
1363 
1364     ts = tcg_global_alloc(s);
1365     ts->base_type = type;
1366     ts->type = type;
1367     ts->kind = TEMP_FIXED;
1368     ts->reg = reg;
1369     ts->name = name;
1370     tcg_regset_set_reg(s->reserved_regs, reg);
1371 
1372     return ts;
1373 }
1374 
1375 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1376 {
1377     s->frame_start = start;
1378     s->frame_end = start + size;
1379     s->frame_temp
1380         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1381 }
1382 
1383 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1384                                      intptr_t offset, const char *name)
1385 {
1386     TCGContext *s = tcg_ctx;
1387     TCGTemp *base_ts = tcgv_ptr_temp(base);
1388     TCGTemp *ts = tcg_global_alloc(s);
1389     int indirect_reg = 0, bigendian = 0;
1390 #ifdef HOST_WORDS_BIGENDIAN
1391     bigendian = 1;
1392 #endif
1393 
1394     switch (base_ts->kind) {
1395     case TEMP_FIXED:
1396         break;
1397     case TEMP_GLOBAL:
1398         /* We do not support double-indirect registers.  */
1399         tcg_debug_assert(!base_ts->indirect_reg);
1400         base_ts->indirect_base = 1;
1401         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1402                             ? 2 : 1);
1403         indirect_reg = 1;
1404         break;
1405     default:
1406         g_assert_not_reached();
1407     }
1408 
1409     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1410         TCGTemp *ts2 = tcg_global_alloc(s);
1411         char buf[64];
1412 
1413         ts->base_type = TCG_TYPE_I64;
1414         ts->type = TCG_TYPE_I32;
1415         ts->indirect_reg = indirect_reg;
1416         ts->mem_allocated = 1;
1417         ts->mem_base = base_ts;
1418         ts->mem_offset = offset + bigendian * 4;
1419         pstrcpy(buf, sizeof(buf), name);
1420         pstrcat(buf, sizeof(buf), "_0");
1421         ts->name = strdup(buf);
1422 
1423         tcg_debug_assert(ts2 == ts + 1);
1424         ts2->base_type = TCG_TYPE_I64;
1425         ts2->type = TCG_TYPE_I32;
1426         ts2->indirect_reg = indirect_reg;
1427         ts2->mem_allocated = 1;
1428         ts2->mem_base = base_ts;
1429         ts2->mem_offset = offset + (1 - bigendian) * 4;
1430         pstrcpy(buf, sizeof(buf), name);
1431         pstrcat(buf, sizeof(buf), "_1");
1432         ts2->name = strdup(buf);
1433     } else {
1434         ts->base_type = type;
1435         ts->type = type;
1436         ts->indirect_reg = indirect_reg;
1437         ts->mem_allocated = 1;
1438         ts->mem_base = base_ts;
1439         ts->mem_offset = offset;
1440         ts->name = name;
1441     }
1442     return ts;
1443 }
1444 
1445 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1446 {
1447     TCGContext *s = tcg_ctx;
1448     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1449     TCGTemp *ts;
1450     int idx, k;
1451 
1452     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1453     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1454     if (idx < TCG_MAX_TEMPS) {
1455         /* There is already an available temp with the right type.  */
1456         clear_bit(idx, s->free_temps[k].l);
1457 
1458         ts = &s->temps[idx];
1459         ts->temp_allocated = 1;
1460         tcg_debug_assert(ts->base_type == type);
1461         tcg_debug_assert(ts->kind == kind);
1462     } else {
1463         ts = tcg_temp_alloc(s);
1464         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1465             TCGTemp *ts2 = tcg_temp_alloc(s);
1466 
1467             ts->base_type = type;
1468             ts->type = TCG_TYPE_I32;
1469             ts->temp_allocated = 1;
1470             ts->kind = kind;
1471 
1472             tcg_debug_assert(ts2 == ts + 1);
1473             ts2->base_type = TCG_TYPE_I64;
1474             ts2->type = TCG_TYPE_I32;
1475             ts2->temp_allocated = 1;
1476             ts2->kind = kind;
1477         } else {
1478             ts->base_type = type;
1479             ts->type = type;
1480             ts->temp_allocated = 1;
1481             ts->kind = kind;
1482         }
1483     }
1484 
1485 #if defined(CONFIG_DEBUG_TCG)
1486     s->temps_in_use++;
1487 #endif
1488     return ts;
1489 }
1490 
1491 TCGv_vec tcg_temp_new_vec(TCGType type)
1492 {
1493     TCGTemp *t;
1494 
1495 #ifdef CONFIG_DEBUG_TCG
1496     switch (type) {
1497     case TCG_TYPE_V64:
1498         assert(TCG_TARGET_HAS_v64);
1499         break;
1500     case TCG_TYPE_V128:
1501         assert(TCG_TARGET_HAS_v128);
1502         break;
1503     case TCG_TYPE_V256:
1504         assert(TCG_TARGET_HAS_v256);
1505         break;
1506     default:
1507         g_assert_not_reached();
1508     }
1509 #endif
1510 
1511     t = tcg_temp_new_internal(type, 0);
1512     return temp_tcgv_vec(t);
1513 }
1514 
1515 /* Create a new temp of the same type as an existing temp.  */
1516 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1517 {
1518     TCGTemp *t = tcgv_vec_temp(match);
1519 
1520     tcg_debug_assert(t->temp_allocated != 0);
1521 
1522     t = tcg_temp_new_internal(t->base_type, 0);
1523     return temp_tcgv_vec(t);
1524 }
1525 
1526 void tcg_temp_free_internal(TCGTemp *ts)
1527 {
1528     TCGContext *s = tcg_ctx;
1529     int k, idx;
1530 
1531     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1532     if (ts->kind == TEMP_CONST) {
1533         return;
1534     }
1535 
1536 #if defined(CONFIG_DEBUG_TCG)
1537     s->temps_in_use--;
1538     if (s->temps_in_use < 0) {
1539         fprintf(stderr, "More temporaries freed than allocated!\n");
1540     }
1541 #endif
1542 
1543     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1544     tcg_debug_assert(ts->temp_allocated != 0);
1545     ts->temp_allocated = 0;
1546 
1547     idx = temp_idx(ts);
1548     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1549     set_bit(idx, s->free_temps[k].l);
1550 }
1551 
1552 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1553 {
1554     TCGContext *s = tcg_ctx;
1555     GHashTable *h = s->const_table[type];
1556     TCGTemp *ts;
1557 
1558     if (h == NULL) {
1559         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1560         s->const_table[type] = h;
1561     }
1562 
1563     ts = g_hash_table_lookup(h, &val);
1564     if (ts == NULL) {
1565         ts = tcg_temp_alloc(s);
1566 
1567         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1568             TCGTemp *ts2 = tcg_temp_alloc(s);
1569 
1570             ts->base_type = TCG_TYPE_I64;
1571             ts->type = TCG_TYPE_I32;
1572             ts->kind = TEMP_CONST;
1573             ts->temp_allocated = 1;
1574             /*
1575              * Retain the full value of the 64-bit constant in the low
1576              * part, so that the hash table works.  Actual uses will
1577              * truncate the value to the low part.
1578              */
1579             ts->val = val;
1580 
1581             tcg_debug_assert(ts2 == ts + 1);
1582             ts2->base_type = TCG_TYPE_I64;
1583             ts2->type = TCG_TYPE_I32;
1584             ts2->kind = TEMP_CONST;
1585             ts2->temp_allocated = 1;
1586             ts2->val = val >> 32;
1587         } else {
1588             ts->base_type = type;
1589             ts->type = type;
1590             ts->kind = TEMP_CONST;
1591             ts->temp_allocated = 1;
1592             ts->val = val;
1593         }
1594         g_hash_table_insert(h, &ts->val, ts);
1595     }
1596 
1597     return ts;
1598 }
1599 
1600 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1601 {
1602     val = dup_const(vece, val);
1603     return temp_tcgv_vec(tcg_constant_internal(type, val));
1604 }
1605 
1606 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1607 {
1608     TCGTemp *t = tcgv_vec_temp(match);
1609 
1610     tcg_debug_assert(t->temp_allocated != 0);
1611     return tcg_constant_vec(t->base_type, vece, val);
1612 }
1613 
1614 TCGv_i32 tcg_const_i32(int32_t val)
1615 {
1616     TCGv_i32 t0;
1617     t0 = tcg_temp_new_i32();
1618     tcg_gen_movi_i32(t0, val);
1619     return t0;
1620 }
1621 
1622 TCGv_i64 tcg_const_i64(int64_t val)
1623 {
1624     TCGv_i64 t0;
1625     t0 = tcg_temp_new_i64();
1626     tcg_gen_movi_i64(t0, val);
1627     return t0;
1628 }
1629 
1630 TCGv_i32 tcg_const_local_i32(int32_t val)
1631 {
1632     TCGv_i32 t0;
1633     t0 = tcg_temp_local_new_i32();
1634     tcg_gen_movi_i32(t0, val);
1635     return t0;
1636 }
1637 
1638 TCGv_i64 tcg_const_local_i64(int64_t val)
1639 {
1640     TCGv_i64 t0;
1641     t0 = tcg_temp_local_new_i64();
1642     tcg_gen_movi_i64(t0, val);
1643     return t0;
1644 }
1645 
1646 #if defined(CONFIG_DEBUG_TCG)
1647 void tcg_clear_temp_count(void)
1648 {
1649     TCGContext *s = tcg_ctx;
1650     s->temps_in_use = 0;
1651 }
1652 
1653 int tcg_check_temp_count(void)
1654 {
1655     TCGContext *s = tcg_ctx;
1656     if (s->temps_in_use) {
1657         /* Clear the count so that we don't give another
1658          * warning immediately next time around.
1659          */
1660         s->temps_in_use = 0;
1661         return 1;
1662     }
1663     return 0;
1664 }
1665 #endif
1666 
1667 /* Return true if OP may appear in the opcode stream.
1668    Test the runtime variable that controls each opcode.  */
1669 bool tcg_op_supported(TCGOpcode op)
1670 {
1671     const bool have_vec
1672         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1673 
1674     switch (op) {
1675     case INDEX_op_discard:
1676     case INDEX_op_set_label:
1677     case INDEX_op_call:
1678     case INDEX_op_br:
1679     case INDEX_op_mb:
1680     case INDEX_op_insn_start:
1681     case INDEX_op_exit_tb:
1682     case INDEX_op_goto_tb:
1683     case INDEX_op_qemu_ld_i32:
1684     case INDEX_op_qemu_st_i32:
1685     case INDEX_op_qemu_ld_i64:
1686     case INDEX_op_qemu_st_i64:
1687         return true;
1688 
1689     case INDEX_op_qemu_st8_i32:
1690         return TCG_TARGET_HAS_qemu_st8_i32;
1691 
1692     case INDEX_op_goto_ptr:
1693         return TCG_TARGET_HAS_goto_ptr;
1694 
1695     case INDEX_op_mov_i32:
1696     case INDEX_op_setcond_i32:
1697     case INDEX_op_brcond_i32:
1698     case INDEX_op_ld8u_i32:
1699     case INDEX_op_ld8s_i32:
1700     case INDEX_op_ld16u_i32:
1701     case INDEX_op_ld16s_i32:
1702     case INDEX_op_ld_i32:
1703     case INDEX_op_st8_i32:
1704     case INDEX_op_st16_i32:
1705     case INDEX_op_st_i32:
1706     case INDEX_op_add_i32:
1707     case INDEX_op_sub_i32:
1708     case INDEX_op_mul_i32:
1709     case INDEX_op_and_i32:
1710     case INDEX_op_or_i32:
1711     case INDEX_op_xor_i32:
1712     case INDEX_op_shl_i32:
1713     case INDEX_op_shr_i32:
1714     case INDEX_op_sar_i32:
1715         return true;
1716 
1717     case INDEX_op_movcond_i32:
1718         return TCG_TARGET_HAS_movcond_i32;
1719     case INDEX_op_div_i32:
1720     case INDEX_op_divu_i32:
1721         return TCG_TARGET_HAS_div_i32;
1722     case INDEX_op_rem_i32:
1723     case INDEX_op_remu_i32:
1724         return TCG_TARGET_HAS_rem_i32;
1725     case INDEX_op_div2_i32:
1726     case INDEX_op_divu2_i32:
1727         return TCG_TARGET_HAS_div2_i32;
1728     case INDEX_op_rotl_i32:
1729     case INDEX_op_rotr_i32:
1730         return TCG_TARGET_HAS_rot_i32;
1731     case INDEX_op_deposit_i32:
1732         return TCG_TARGET_HAS_deposit_i32;
1733     case INDEX_op_extract_i32:
1734         return TCG_TARGET_HAS_extract_i32;
1735     case INDEX_op_sextract_i32:
1736         return TCG_TARGET_HAS_sextract_i32;
1737     case INDEX_op_extract2_i32:
1738         return TCG_TARGET_HAS_extract2_i32;
1739     case INDEX_op_add2_i32:
1740         return TCG_TARGET_HAS_add2_i32;
1741     case INDEX_op_sub2_i32:
1742         return TCG_TARGET_HAS_sub2_i32;
1743     case INDEX_op_mulu2_i32:
1744         return TCG_TARGET_HAS_mulu2_i32;
1745     case INDEX_op_muls2_i32:
1746         return TCG_TARGET_HAS_muls2_i32;
1747     case INDEX_op_muluh_i32:
1748         return TCG_TARGET_HAS_muluh_i32;
1749     case INDEX_op_mulsh_i32:
1750         return TCG_TARGET_HAS_mulsh_i32;
1751     case INDEX_op_ext8s_i32:
1752         return TCG_TARGET_HAS_ext8s_i32;
1753     case INDEX_op_ext16s_i32:
1754         return TCG_TARGET_HAS_ext16s_i32;
1755     case INDEX_op_ext8u_i32:
1756         return TCG_TARGET_HAS_ext8u_i32;
1757     case INDEX_op_ext16u_i32:
1758         return TCG_TARGET_HAS_ext16u_i32;
1759     case INDEX_op_bswap16_i32:
1760         return TCG_TARGET_HAS_bswap16_i32;
1761     case INDEX_op_bswap32_i32:
1762         return TCG_TARGET_HAS_bswap32_i32;
1763     case INDEX_op_not_i32:
1764         return TCG_TARGET_HAS_not_i32;
1765     case INDEX_op_neg_i32:
1766         return TCG_TARGET_HAS_neg_i32;
1767     case INDEX_op_andc_i32:
1768         return TCG_TARGET_HAS_andc_i32;
1769     case INDEX_op_orc_i32:
1770         return TCG_TARGET_HAS_orc_i32;
1771     case INDEX_op_eqv_i32:
1772         return TCG_TARGET_HAS_eqv_i32;
1773     case INDEX_op_nand_i32:
1774         return TCG_TARGET_HAS_nand_i32;
1775     case INDEX_op_nor_i32:
1776         return TCG_TARGET_HAS_nor_i32;
1777     case INDEX_op_clz_i32:
1778         return TCG_TARGET_HAS_clz_i32;
1779     case INDEX_op_ctz_i32:
1780         return TCG_TARGET_HAS_ctz_i32;
1781     case INDEX_op_ctpop_i32:
1782         return TCG_TARGET_HAS_ctpop_i32;
1783 
1784     case INDEX_op_brcond2_i32:
1785     case INDEX_op_setcond2_i32:
1786         return TCG_TARGET_REG_BITS == 32;
1787 
1788     case INDEX_op_mov_i64:
1789     case INDEX_op_setcond_i64:
1790     case INDEX_op_brcond_i64:
1791     case INDEX_op_ld8u_i64:
1792     case INDEX_op_ld8s_i64:
1793     case INDEX_op_ld16u_i64:
1794     case INDEX_op_ld16s_i64:
1795     case INDEX_op_ld32u_i64:
1796     case INDEX_op_ld32s_i64:
1797     case INDEX_op_ld_i64:
1798     case INDEX_op_st8_i64:
1799     case INDEX_op_st16_i64:
1800     case INDEX_op_st32_i64:
1801     case INDEX_op_st_i64:
1802     case INDEX_op_add_i64:
1803     case INDEX_op_sub_i64:
1804     case INDEX_op_mul_i64:
1805     case INDEX_op_and_i64:
1806     case INDEX_op_or_i64:
1807     case INDEX_op_xor_i64:
1808     case INDEX_op_shl_i64:
1809     case INDEX_op_shr_i64:
1810     case INDEX_op_sar_i64:
1811     case INDEX_op_ext_i32_i64:
1812     case INDEX_op_extu_i32_i64:
1813         return TCG_TARGET_REG_BITS == 64;
1814 
1815     case INDEX_op_movcond_i64:
1816         return TCG_TARGET_HAS_movcond_i64;
1817     case INDEX_op_div_i64:
1818     case INDEX_op_divu_i64:
1819         return TCG_TARGET_HAS_div_i64;
1820     case INDEX_op_rem_i64:
1821     case INDEX_op_remu_i64:
1822         return TCG_TARGET_HAS_rem_i64;
1823     case INDEX_op_div2_i64:
1824     case INDEX_op_divu2_i64:
1825         return TCG_TARGET_HAS_div2_i64;
1826     case INDEX_op_rotl_i64:
1827     case INDEX_op_rotr_i64:
1828         return TCG_TARGET_HAS_rot_i64;
1829     case INDEX_op_deposit_i64:
1830         return TCG_TARGET_HAS_deposit_i64;
1831     case INDEX_op_extract_i64:
1832         return TCG_TARGET_HAS_extract_i64;
1833     case INDEX_op_sextract_i64:
1834         return TCG_TARGET_HAS_sextract_i64;
1835     case INDEX_op_extract2_i64:
1836         return TCG_TARGET_HAS_extract2_i64;
1837     case INDEX_op_extrl_i64_i32:
1838         return TCG_TARGET_HAS_extrl_i64_i32;
1839     case INDEX_op_extrh_i64_i32:
1840         return TCG_TARGET_HAS_extrh_i64_i32;
1841     case INDEX_op_ext8s_i64:
1842         return TCG_TARGET_HAS_ext8s_i64;
1843     case INDEX_op_ext16s_i64:
1844         return TCG_TARGET_HAS_ext16s_i64;
1845     case INDEX_op_ext32s_i64:
1846         return TCG_TARGET_HAS_ext32s_i64;
1847     case INDEX_op_ext8u_i64:
1848         return TCG_TARGET_HAS_ext8u_i64;
1849     case INDEX_op_ext16u_i64:
1850         return TCG_TARGET_HAS_ext16u_i64;
1851     case INDEX_op_ext32u_i64:
1852         return TCG_TARGET_HAS_ext32u_i64;
1853     case INDEX_op_bswap16_i64:
1854         return TCG_TARGET_HAS_bswap16_i64;
1855     case INDEX_op_bswap32_i64:
1856         return TCG_TARGET_HAS_bswap32_i64;
1857     case INDEX_op_bswap64_i64:
1858         return TCG_TARGET_HAS_bswap64_i64;
1859     case INDEX_op_not_i64:
1860         return TCG_TARGET_HAS_not_i64;
1861     case INDEX_op_neg_i64:
1862         return TCG_TARGET_HAS_neg_i64;
1863     case INDEX_op_andc_i64:
1864         return TCG_TARGET_HAS_andc_i64;
1865     case INDEX_op_orc_i64:
1866         return TCG_TARGET_HAS_orc_i64;
1867     case INDEX_op_eqv_i64:
1868         return TCG_TARGET_HAS_eqv_i64;
1869     case INDEX_op_nand_i64:
1870         return TCG_TARGET_HAS_nand_i64;
1871     case INDEX_op_nor_i64:
1872         return TCG_TARGET_HAS_nor_i64;
1873     case INDEX_op_clz_i64:
1874         return TCG_TARGET_HAS_clz_i64;
1875     case INDEX_op_ctz_i64:
1876         return TCG_TARGET_HAS_ctz_i64;
1877     case INDEX_op_ctpop_i64:
1878         return TCG_TARGET_HAS_ctpop_i64;
1879     case INDEX_op_add2_i64:
1880         return TCG_TARGET_HAS_add2_i64;
1881     case INDEX_op_sub2_i64:
1882         return TCG_TARGET_HAS_sub2_i64;
1883     case INDEX_op_mulu2_i64:
1884         return TCG_TARGET_HAS_mulu2_i64;
1885     case INDEX_op_muls2_i64:
1886         return TCG_TARGET_HAS_muls2_i64;
1887     case INDEX_op_muluh_i64:
1888         return TCG_TARGET_HAS_muluh_i64;
1889     case INDEX_op_mulsh_i64:
1890         return TCG_TARGET_HAS_mulsh_i64;
1891 
1892     case INDEX_op_mov_vec:
1893     case INDEX_op_dup_vec:
1894     case INDEX_op_dupm_vec:
1895     case INDEX_op_ld_vec:
1896     case INDEX_op_st_vec:
1897     case INDEX_op_add_vec:
1898     case INDEX_op_sub_vec:
1899     case INDEX_op_and_vec:
1900     case INDEX_op_or_vec:
1901     case INDEX_op_xor_vec:
1902     case INDEX_op_cmp_vec:
1903         return have_vec;
1904     case INDEX_op_dup2_vec:
1905         return have_vec && TCG_TARGET_REG_BITS == 32;
1906     case INDEX_op_not_vec:
1907         return have_vec && TCG_TARGET_HAS_not_vec;
1908     case INDEX_op_neg_vec:
1909         return have_vec && TCG_TARGET_HAS_neg_vec;
1910     case INDEX_op_abs_vec:
1911         return have_vec && TCG_TARGET_HAS_abs_vec;
1912     case INDEX_op_andc_vec:
1913         return have_vec && TCG_TARGET_HAS_andc_vec;
1914     case INDEX_op_orc_vec:
1915         return have_vec && TCG_TARGET_HAS_orc_vec;
1916     case INDEX_op_mul_vec:
1917         return have_vec && TCG_TARGET_HAS_mul_vec;
1918     case INDEX_op_shli_vec:
1919     case INDEX_op_shri_vec:
1920     case INDEX_op_sari_vec:
1921         return have_vec && TCG_TARGET_HAS_shi_vec;
1922     case INDEX_op_shls_vec:
1923     case INDEX_op_shrs_vec:
1924     case INDEX_op_sars_vec:
1925         return have_vec && TCG_TARGET_HAS_shs_vec;
1926     case INDEX_op_shlv_vec:
1927     case INDEX_op_shrv_vec:
1928     case INDEX_op_sarv_vec:
1929         return have_vec && TCG_TARGET_HAS_shv_vec;
1930     case INDEX_op_rotli_vec:
1931         return have_vec && TCG_TARGET_HAS_roti_vec;
1932     case INDEX_op_rotls_vec:
1933         return have_vec && TCG_TARGET_HAS_rots_vec;
1934     case INDEX_op_rotlv_vec:
1935     case INDEX_op_rotrv_vec:
1936         return have_vec && TCG_TARGET_HAS_rotv_vec;
1937     case INDEX_op_ssadd_vec:
1938     case INDEX_op_usadd_vec:
1939     case INDEX_op_sssub_vec:
1940     case INDEX_op_ussub_vec:
1941         return have_vec && TCG_TARGET_HAS_sat_vec;
1942     case INDEX_op_smin_vec:
1943     case INDEX_op_umin_vec:
1944     case INDEX_op_smax_vec:
1945     case INDEX_op_umax_vec:
1946         return have_vec && TCG_TARGET_HAS_minmax_vec;
1947     case INDEX_op_bitsel_vec:
1948         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1949     case INDEX_op_cmpsel_vec:
1950         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1951 
1952     default:
1953         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1954         return true;
1955     }
1956 }
1957 
1958 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1959    and endian swap. Maybe it would be better to do the alignment
1960    and endian swap in tcg_reg_alloc_call(). */
1961 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1962 {
1963     int i, real_args, nb_rets, pi;
1964     unsigned sizemask, flags;
1965     TCGHelperInfo *info;
1966     TCGOp *op;
1967 
1968     info = g_hash_table_lookup(helper_table, (gpointer)func);
1969     flags = info->flags;
1970     sizemask = info->sizemask;
1971 
1972 #ifdef CONFIG_PLUGIN
1973     /* detect non-plugin helpers */
1974     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1975         tcg_ctx->plugin_insn->calls_helpers = true;
1976     }
1977 #endif
1978 
1979 #if defined(__sparc__) && !defined(__arch64__) \
1980     && !defined(CONFIG_TCG_INTERPRETER)
1981     /* We have 64-bit values in one register, but need to pass as two
1982        separate parameters.  Split them.  */
1983     int orig_sizemask = sizemask;
1984     int orig_nargs = nargs;
1985     TCGv_i64 retl, reth;
1986     TCGTemp *split_args[MAX_OPC_PARAM];
1987 
1988     retl = NULL;
1989     reth = NULL;
1990     if (sizemask != 0) {
1991         for (i = real_args = 0; i < nargs; ++i) {
1992             int is_64bit = sizemask & (1 << (i+1)*2);
1993             if (is_64bit) {
1994                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1995                 TCGv_i32 h = tcg_temp_new_i32();
1996                 TCGv_i32 l = tcg_temp_new_i32();
1997                 tcg_gen_extr_i64_i32(l, h, orig);
1998                 split_args[real_args++] = tcgv_i32_temp(h);
1999                 split_args[real_args++] = tcgv_i32_temp(l);
2000             } else {
2001                 split_args[real_args++] = args[i];
2002             }
2003         }
2004         nargs = real_args;
2005         args = split_args;
2006         sizemask = 0;
2007     }
2008 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2009     for (i = 0; i < nargs; ++i) {
2010         int is_64bit = sizemask & (1 << (i+1)*2);
2011         int is_signed = sizemask & (2 << (i+1)*2);
2012         if (!is_64bit) {
2013             TCGv_i64 temp = tcg_temp_new_i64();
2014             TCGv_i64 orig = temp_tcgv_i64(args[i]);
2015             if (is_signed) {
2016                 tcg_gen_ext32s_i64(temp, orig);
2017             } else {
2018                 tcg_gen_ext32u_i64(temp, orig);
2019             }
2020             args[i] = tcgv_i64_temp(temp);
2021         }
2022     }
2023 #endif /* TCG_TARGET_EXTEND_ARGS */
2024 
2025     op = tcg_emit_op(INDEX_op_call);
2026 
2027     pi = 0;
2028     if (ret != NULL) {
2029 #if defined(__sparc__) && !defined(__arch64__) \
2030     && !defined(CONFIG_TCG_INTERPRETER)
2031         if (orig_sizemask & 1) {
2032             /* The 32-bit ABI is going to return the 64-bit value in
2033                the %o0/%o1 register pair.  Prepare for this by using
2034                two return temporaries, and reassemble below.  */
2035             retl = tcg_temp_new_i64();
2036             reth = tcg_temp_new_i64();
2037             op->args[pi++] = tcgv_i64_arg(reth);
2038             op->args[pi++] = tcgv_i64_arg(retl);
2039             nb_rets = 2;
2040         } else {
2041             op->args[pi++] = temp_arg(ret);
2042             nb_rets = 1;
2043         }
2044 #else
2045         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
2046 #ifdef HOST_WORDS_BIGENDIAN
2047             op->args[pi++] = temp_arg(ret + 1);
2048             op->args[pi++] = temp_arg(ret);
2049 #else
2050             op->args[pi++] = temp_arg(ret);
2051             op->args[pi++] = temp_arg(ret + 1);
2052 #endif
2053             nb_rets = 2;
2054         } else {
2055             op->args[pi++] = temp_arg(ret);
2056             nb_rets = 1;
2057         }
2058 #endif
2059     } else {
2060         nb_rets = 0;
2061     }
2062     TCGOP_CALLO(op) = nb_rets;
2063 
2064     real_args = 0;
2065     for (i = 0; i < nargs; i++) {
2066         int is_64bit = sizemask & (1 << (i+1)*2);
2067         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
2068 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
2069             /* some targets want aligned 64 bit args */
2070             if (real_args & 1) {
2071                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
2072                 real_args++;
2073             }
2074 #endif
2075            /* If stack grows up, then we will be placing successive
2076               arguments at lower addresses, which means we need to
2077               reverse the order compared to how we would normally
2078               treat either big or little-endian.  For those arguments
2079               that will wind up in registers, this still works for
2080               HPPA (the only current STACK_GROWSUP target) since the
2081               argument registers are *also* allocated in decreasing
2082               order.  If another such target is added, this logic may
2083               have to get more complicated to differentiate between
2084               stack arguments and register arguments.  */
2085 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2086             op->args[pi++] = temp_arg(args[i] + 1);
2087             op->args[pi++] = temp_arg(args[i]);
2088 #else
2089             op->args[pi++] = temp_arg(args[i]);
2090             op->args[pi++] = temp_arg(args[i] + 1);
2091 #endif
2092             real_args += 2;
2093             continue;
2094         }
2095 
2096         op->args[pi++] = temp_arg(args[i]);
2097         real_args++;
2098     }
2099     op->args[pi++] = (uintptr_t)func;
2100     op->args[pi++] = flags;
2101     TCGOP_CALLI(op) = real_args;
2102 
2103     /* Make sure the fields didn't overflow.  */
2104     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
2105     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
2106 
2107 #if defined(__sparc__) && !defined(__arch64__) \
2108     && !defined(CONFIG_TCG_INTERPRETER)
2109     /* Free all of the parts we allocated above.  */
2110     for (i = real_args = 0; i < orig_nargs; ++i) {
2111         int is_64bit = orig_sizemask & (1 << (i+1)*2);
2112         if (is_64bit) {
2113             tcg_temp_free_internal(args[real_args++]);
2114             tcg_temp_free_internal(args[real_args++]);
2115         } else {
2116             real_args++;
2117         }
2118     }
2119     if (orig_sizemask & 1) {
2120         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
2121            Note that describing these as TCGv_i64 eliminates an unnecessary
2122            zero-extension that tcg_gen_concat_i32_i64 would create.  */
2123         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2124         tcg_temp_free_i64(retl);
2125         tcg_temp_free_i64(reth);
2126     }
2127 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2128     for (i = 0; i < nargs; ++i) {
2129         int is_64bit = sizemask & (1 << (i+1)*2);
2130         if (!is_64bit) {
2131             tcg_temp_free_internal(args[i]);
2132         }
2133     }
2134 #endif /* TCG_TARGET_EXTEND_ARGS */
2135 }
2136 
2137 static void tcg_reg_alloc_start(TCGContext *s)
2138 {
2139     int i, n;
2140 
2141     for (i = 0, n = s->nb_temps; i < n; i++) {
2142         TCGTemp *ts = &s->temps[i];
2143         TCGTempVal val = TEMP_VAL_MEM;
2144 
2145         switch (ts->kind) {
2146         case TEMP_CONST:
2147             val = TEMP_VAL_CONST;
2148             break;
2149         case TEMP_FIXED:
2150             val = TEMP_VAL_REG;
2151             break;
2152         case TEMP_GLOBAL:
2153             break;
2154         case TEMP_NORMAL:
2155             val = TEMP_VAL_DEAD;
2156             /* fall through */
2157         case TEMP_LOCAL:
2158             ts->mem_allocated = 0;
2159             break;
2160         default:
2161             g_assert_not_reached();
2162         }
2163         ts->val_type = val;
2164     }
2165 
2166     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2167 }
2168 
2169 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2170                                  TCGTemp *ts)
2171 {
2172     int idx = temp_idx(ts);
2173 
2174     switch (ts->kind) {
2175     case TEMP_FIXED:
2176     case TEMP_GLOBAL:
2177         pstrcpy(buf, buf_size, ts->name);
2178         break;
2179     case TEMP_LOCAL:
2180         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2181         break;
2182     case TEMP_NORMAL:
2183         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2184         break;
2185     case TEMP_CONST:
2186         switch (ts->type) {
2187         case TCG_TYPE_I32:
2188             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2189             break;
2190 #if TCG_TARGET_REG_BITS > 32
2191         case TCG_TYPE_I64:
2192             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2193             break;
2194 #endif
2195         case TCG_TYPE_V64:
2196         case TCG_TYPE_V128:
2197         case TCG_TYPE_V256:
2198             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2199                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2200             break;
2201         default:
2202             g_assert_not_reached();
2203         }
2204         break;
2205     }
2206     return buf;
2207 }
2208 
2209 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2210                              int buf_size, TCGArg arg)
2211 {
2212     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2213 }
2214 
2215 /* Find helper name.  */
2216 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2217 {
2218     const char *ret = NULL;
2219     if (helper_table) {
2220         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2221         if (info) {
2222             ret = info->name;
2223         }
2224     }
2225     return ret;
2226 }
2227 
2228 static const char * const cond_name[] =
2229 {
2230     [TCG_COND_NEVER] = "never",
2231     [TCG_COND_ALWAYS] = "always",
2232     [TCG_COND_EQ] = "eq",
2233     [TCG_COND_NE] = "ne",
2234     [TCG_COND_LT] = "lt",
2235     [TCG_COND_GE] = "ge",
2236     [TCG_COND_LE] = "le",
2237     [TCG_COND_GT] = "gt",
2238     [TCG_COND_LTU] = "ltu",
2239     [TCG_COND_GEU] = "geu",
2240     [TCG_COND_LEU] = "leu",
2241     [TCG_COND_GTU] = "gtu"
2242 };
2243 
2244 static const char * const ldst_name[] =
2245 {
2246     [MO_UB]   = "ub",
2247     [MO_SB]   = "sb",
2248     [MO_LEUW] = "leuw",
2249     [MO_LESW] = "lesw",
2250     [MO_LEUL] = "leul",
2251     [MO_LESL] = "lesl",
2252     [MO_LEQ]  = "leq",
2253     [MO_BEUW] = "beuw",
2254     [MO_BESW] = "besw",
2255     [MO_BEUL] = "beul",
2256     [MO_BESL] = "besl",
2257     [MO_BEQ]  = "beq",
2258 };
2259 
2260 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2261 #ifdef TARGET_ALIGNED_ONLY
2262     [MO_UNALN >> MO_ASHIFT]    = "un+",
2263     [MO_ALIGN >> MO_ASHIFT]    = "",
2264 #else
2265     [MO_UNALN >> MO_ASHIFT]    = "",
2266     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2267 #endif
2268     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2269     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2270     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2271     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2272     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2273     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2274 };
2275 
2276 static inline bool tcg_regset_single(TCGRegSet d)
2277 {
2278     return (d & (d - 1)) == 0;
2279 }
2280 
2281 static inline TCGReg tcg_regset_first(TCGRegSet d)
2282 {
2283     if (TCG_TARGET_NB_REGS <= 32) {
2284         return ctz32(d);
2285     } else {
2286         return ctz64(d);
2287     }
2288 }
2289 
2290 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2291 {
2292     char buf[128];
2293     TCGOp *op;
2294 
2295     QTAILQ_FOREACH(op, &s->ops, link) {
2296         int i, k, nb_oargs, nb_iargs, nb_cargs;
2297         const TCGOpDef *def;
2298         TCGOpcode c;
2299         int col = 0;
2300 
2301         c = op->opc;
2302         def = &tcg_op_defs[c];
2303 
2304         if (c == INDEX_op_insn_start) {
2305             nb_oargs = 0;
2306             col += qemu_log("\n ----");
2307 
2308             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2309                 target_ulong a;
2310 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2311                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2312 #else
2313                 a = op->args[i];
2314 #endif
2315                 col += qemu_log(" " TARGET_FMT_lx, a);
2316             }
2317         } else if (c == INDEX_op_call) {
2318             /* variable number of arguments */
2319             nb_oargs = TCGOP_CALLO(op);
2320             nb_iargs = TCGOP_CALLI(op);
2321             nb_cargs = def->nb_cargs;
2322 
2323             /* function name, flags, out args */
2324             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2325                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2326                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2327             for (i = 0; i < nb_oargs; i++) {
2328                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2329                                                        op->args[i]));
2330             }
2331             for (i = 0; i < nb_iargs; i++) {
2332                 TCGArg arg = op->args[nb_oargs + i];
2333                 const char *t = "<dummy>";
2334                 if (arg != TCG_CALL_DUMMY_ARG) {
2335                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2336                 }
2337                 col += qemu_log(",%s", t);
2338             }
2339         } else {
2340             col += qemu_log(" %s ", def->name);
2341 
2342             nb_oargs = def->nb_oargs;
2343             nb_iargs = def->nb_iargs;
2344             nb_cargs = def->nb_cargs;
2345 
2346             if (def->flags & TCG_OPF_VECTOR) {
2347                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2348                                 8 << TCGOP_VECE(op));
2349             }
2350 
2351             k = 0;
2352             for (i = 0; i < nb_oargs; i++) {
2353                 if (k != 0) {
2354                     col += qemu_log(",");
2355                 }
2356                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2357                                                       op->args[k++]));
2358             }
2359             for (i = 0; i < nb_iargs; i++) {
2360                 if (k != 0) {
2361                     col += qemu_log(",");
2362                 }
2363                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2364                                                       op->args[k++]));
2365             }
2366             switch (c) {
2367             case INDEX_op_brcond_i32:
2368             case INDEX_op_setcond_i32:
2369             case INDEX_op_movcond_i32:
2370             case INDEX_op_brcond2_i32:
2371             case INDEX_op_setcond2_i32:
2372             case INDEX_op_brcond_i64:
2373             case INDEX_op_setcond_i64:
2374             case INDEX_op_movcond_i64:
2375             case INDEX_op_cmp_vec:
2376             case INDEX_op_cmpsel_vec:
2377                 if (op->args[k] < ARRAY_SIZE(cond_name)
2378                     && cond_name[op->args[k]]) {
2379                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2380                 } else {
2381                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2382                 }
2383                 i = 1;
2384                 break;
2385             case INDEX_op_qemu_ld_i32:
2386             case INDEX_op_qemu_st_i32:
2387             case INDEX_op_qemu_st8_i32:
2388             case INDEX_op_qemu_ld_i64:
2389             case INDEX_op_qemu_st_i64:
2390                 {
2391                     TCGMemOpIdx oi = op->args[k++];
2392                     MemOp op = get_memop(oi);
2393                     unsigned ix = get_mmuidx(oi);
2394 
2395                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2396                         col += qemu_log(",$0x%x,%u", op, ix);
2397                     } else {
2398                         const char *s_al, *s_op;
2399                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2400                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2401                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2402                     }
2403                     i = 1;
2404                 }
2405                 break;
2406             default:
2407                 i = 0;
2408                 break;
2409             }
2410             switch (c) {
2411             case INDEX_op_set_label:
2412             case INDEX_op_br:
2413             case INDEX_op_brcond_i32:
2414             case INDEX_op_brcond_i64:
2415             case INDEX_op_brcond2_i32:
2416                 col += qemu_log("%s$L%d", k ? "," : "",
2417                                 arg_label(op->args[k])->id);
2418                 i++, k++;
2419                 break;
2420             default:
2421                 break;
2422             }
2423             for (; i < nb_cargs; i++, k++) {
2424                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2425             }
2426         }
2427 
2428         if (have_prefs || op->life) {
2429 
2430             QemuLogFile *logfile;
2431 
2432             rcu_read_lock();
2433             logfile = qatomic_rcu_read(&qemu_logfile);
2434             if (logfile) {
2435                 for (; col < 40; ++col) {
2436                     putc(' ', logfile->fd);
2437                 }
2438             }
2439             rcu_read_unlock();
2440         }
2441 
2442         if (op->life) {
2443             unsigned life = op->life;
2444 
2445             if (life & (SYNC_ARG * 3)) {
2446                 qemu_log("  sync:");
2447                 for (i = 0; i < 2; ++i) {
2448                     if (life & (SYNC_ARG << i)) {
2449                         qemu_log(" %d", i);
2450                     }
2451                 }
2452             }
2453             life /= DEAD_ARG;
2454             if (life) {
2455                 qemu_log("  dead:");
2456                 for (i = 0; life; ++i, life >>= 1) {
2457                     if (life & 1) {
2458                         qemu_log(" %d", i);
2459                     }
2460                 }
2461             }
2462         }
2463 
2464         if (have_prefs) {
2465             for (i = 0; i < nb_oargs; ++i) {
2466                 TCGRegSet set = op->output_pref[i];
2467 
2468                 if (i == 0) {
2469                     qemu_log("  pref=");
2470                 } else {
2471                     qemu_log(",");
2472                 }
2473                 if (set == 0) {
2474                     qemu_log("none");
2475                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2476                     qemu_log("all");
2477 #ifdef CONFIG_DEBUG_TCG
2478                 } else if (tcg_regset_single(set)) {
2479                     TCGReg reg = tcg_regset_first(set);
2480                     qemu_log("%s", tcg_target_reg_names[reg]);
2481 #endif
2482                 } else if (TCG_TARGET_NB_REGS <= 32) {
2483                     qemu_log("%#x", (uint32_t)set);
2484                 } else {
2485                     qemu_log("%#" PRIx64, (uint64_t)set);
2486                 }
2487             }
2488         }
2489 
2490         qemu_log("\n");
2491     }
2492 }
2493 
2494 /* we give more priority to constraints with less registers */
2495 static int get_constraint_priority(const TCGOpDef *def, int k)
2496 {
2497     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2498     int n;
2499 
2500     if (arg_ct->oalias) {
2501         /* an alias is equivalent to a single register */
2502         n = 1;
2503     } else {
2504         n = ctpop64(arg_ct->regs);
2505     }
2506     return TCG_TARGET_NB_REGS - n + 1;
2507 }
2508 
2509 /* sort from highest priority to lowest */
2510 static void sort_constraints(TCGOpDef *def, int start, int n)
2511 {
2512     int i, j;
2513     TCGArgConstraint *a = def->args_ct;
2514 
2515     for (i = 0; i < n; i++) {
2516         a[start + i].sort_index = start + i;
2517     }
2518     if (n <= 1) {
2519         return;
2520     }
2521     for (i = 0; i < n - 1; i++) {
2522         for (j = i + 1; j < n; j++) {
2523             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2524             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2525             if (p1 < p2) {
2526                 int tmp = a[start + i].sort_index;
2527                 a[start + i].sort_index = a[start + j].sort_index;
2528                 a[start + j].sort_index = tmp;
2529             }
2530         }
2531     }
2532 }
2533 
2534 static void process_op_defs(TCGContext *s)
2535 {
2536     TCGOpcode op;
2537 
2538     for (op = 0; op < NB_OPS; op++) {
2539         TCGOpDef *def = &tcg_op_defs[op];
2540         const TCGTargetOpDef *tdefs;
2541         int i, nb_args;
2542 
2543         if (def->flags & TCG_OPF_NOT_PRESENT) {
2544             continue;
2545         }
2546 
2547         nb_args = def->nb_iargs + def->nb_oargs;
2548         if (nb_args == 0) {
2549             continue;
2550         }
2551 
2552         /*
2553          * Macro magic should make it impossible, but double-check that
2554          * the array index is in range.  Since the signness of an enum
2555          * is implementation defined, force the result to unsigned.
2556          */
2557         unsigned con_set = tcg_target_op_def(op);
2558         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2559         tdefs = &constraint_sets[con_set];
2560 
2561         for (i = 0; i < nb_args; i++) {
2562             const char *ct_str = tdefs->args_ct_str[i];
2563             /* Incomplete TCGTargetOpDef entry. */
2564             tcg_debug_assert(ct_str != NULL);
2565 
2566             while (*ct_str != '\0') {
2567                 switch(*ct_str) {
2568                 case '0' ... '9':
2569                     {
2570                         int oarg = *ct_str - '0';
2571                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2572                         tcg_debug_assert(oarg < def->nb_oargs);
2573                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2574                         def->args_ct[i] = def->args_ct[oarg];
2575                         /* The output sets oalias.  */
2576                         def->args_ct[oarg].oalias = true;
2577                         def->args_ct[oarg].alias_index = i;
2578                         /* The input sets ialias. */
2579                         def->args_ct[i].ialias = true;
2580                         def->args_ct[i].alias_index = oarg;
2581                     }
2582                     ct_str++;
2583                     break;
2584                 case '&':
2585                     def->args_ct[i].newreg = true;
2586                     ct_str++;
2587                     break;
2588                 case 'i':
2589                     def->args_ct[i].ct |= TCG_CT_CONST;
2590                     ct_str++;
2591                     break;
2592 
2593                 /* Include all of the target-specific constraints. */
2594 
2595 #undef CONST
2596 #define CONST(CASE, MASK) \
2597     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2598 #define REGS(CASE, MASK) \
2599     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2600 
2601 #include "tcg-target-con-str.h"
2602 
2603 #undef REGS
2604 #undef CONST
2605                 default:
2606                     /* Typo in TCGTargetOpDef constraint. */
2607                     g_assert_not_reached();
2608                 }
2609             }
2610         }
2611 
2612         /* TCGTargetOpDef entry with too much information? */
2613         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2614 
2615         /* sort the constraints (XXX: this is just an heuristic) */
2616         sort_constraints(def, 0, def->nb_oargs);
2617         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2618     }
2619 }
2620 
2621 void tcg_op_remove(TCGContext *s, TCGOp *op)
2622 {
2623     TCGLabel *label;
2624 
2625     switch (op->opc) {
2626     case INDEX_op_br:
2627         label = arg_label(op->args[0]);
2628         label->refs--;
2629         break;
2630     case INDEX_op_brcond_i32:
2631     case INDEX_op_brcond_i64:
2632         label = arg_label(op->args[3]);
2633         label->refs--;
2634         break;
2635     case INDEX_op_brcond2_i32:
2636         label = arg_label(op->args[5]);
2637         label->refs--;
2638         break;
2639     default:
2640         break;
2641     }
2642 
2643     QTAILQ_REMOVE(&s->ops, op, link);
2644     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2645     s->nb_ops--;
2646 
2647 #ifdef CONFIG_PROFILER
2648     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2649 #endif
2650 }
2651 
2652 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2653 {
2654     TCGContext *s = tcg_ctx;
2655     TCGOp *op;
2656 
2657     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2658         op = tcg_malloc(sizeof(TCGOp));
2659     } else {
2660         op = QTAILQ_FIRST(&s->free_ops);
2661         QTAILQ_REMOVE(&s->free_ops, op, link);
2662     }
2663     memset(op, 0, offsetof(TCGOp, link));
2664     op->opc = opc;
2665     s->nb_ops++;
2666 
2667     return op;
2668 }
2669 
2670 TCGOp *tcg_emit_op(TCGOpcode opc)
2671 {
2672     TCGOp *op = tcg_op_alloc(opc);
2673     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2674     return op;
2675 }
2676 
2677 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2678 {
2679     TCGOp *new_op = tcg_op_alloc(opc);
2680     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2681     return new_op;
2682 }
2683 
2684 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2685 {
2686     TCGOp *new_op = tcg_op_alloc(opc);
2687     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2688     return new_op;
2689 }
2690 
2691 /* Reachable analysis : remove unreachable code.  */
2692 static void reachable_code_pass(TCGContext *s)
2693 {
2694     TCGOp *op, *op_next;
2695     bool dead = false;
2696 
2697     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2698         bool remove = dead;
2699         TCGLabel *label;
2700         int call_flags;
2701 
2702         switch (op->opc) {
2703         case INDEX_op_set_label:
2704             label = arg_label(op->args[0]);
2705             if (label->refs == 0) {
2706                 /*
2707                  * While there is an occasional backward branch, virtually
2708                  * all branches generated by the translators are forward.
2709                  * Which means that generally we will have already removed
2710                  * all references to the label that will be, and there is
2711                  * little to be gained by iterating.
2712                  */
2713                 remove = true;
2714             } else {
2715                 /* Once we see a label, insns become live again.  */
2716                 dead = false;
2717                 remove = false;
2718 
2719                 /*
2720                  * Optimization can fold conditional branches to unconditional.
2721                  * If we find a label with one reference which is preceded by
2722                  * an unconditional branch to it, remove both.  This needed to
2723                  * wait until the dead code in between them was removed.
2724                  */
2725                 if (label->refs == 1) {
2726                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2727                     if (op_prev->opc == INDEX_op_br &&
2728                         label == arg_label(op_prev->args[0])) {
2729                         tcg_op_remove(s, op_prev);
2730                         remove = true;
2731                     }
2732                 }
2733             }
2734             break;
2735 
2736         case INDEX_op_br:
2737         case INDEX_op_exit_tb:
2738         case INDEX_op_goto_ptr:
2739             /* Unconditional branches; everything following is dead.  */
2740             dead = true;
2741             break;
2742 
2743         case INDEX_op_call:
2744             /* Notice noreturn helper calls, raising exceptions.  */
2745             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2746             if (call_flags & TCG_CALL_NO_RETURN) {
2747                 dead = true;
2748             }
2749             break;
2750 
2751         case INDEX_op_insn_start:
2752             /* Never remove -- we need to keep these for unwind.  */
2753             remove = false;
2754             break;
2755 
2756         default:
2757             break;
2758         }
2759 
2760         if (remove) {
2761             tcg_op_remove(s, op);
2762         }
2763     }
2764 }
2765 
2766 #define TS_DEAD  1
2767 #define TS_MEM   2
2768 
2769 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2770 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2771 
2772 /* For liveness_pass_1, the register preferences for a given temp.  */
2773 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2774 {
2775     return ts->state_ptr;
2776 }
2777 
2778 /* For liveness_pass_1, reset the preferences for a given temp to the
2779  * maximal regset for its type.
2780  */
2781 static inline void la_reset_pref(TCGTemp *ts)
2782 {
2783     *la_temp_pref(ts)
2784         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2785 }
2786 
2787 /* liveness analysis: end of function: all temps are dead, and globals
2788    should be in memory. */
2789 static void la_func_end(TCGContext *s, int ng, int nt)
2790 {
2791     int i;
2792 
2793     for (i = 0; i < ng; ++i) {
2794         s->temps[i].state = TS_DEAD | TS_MEM;
2795         la_reset_pref(&s->temps[i]);
2796     }
2797     for (i = ng; i < nt; ++i) {
2798         s->temps[i].state = TS_DEAD;
2799         la_reset_pref(&s->temps[i]);
2800     }
2801 }
2802 
2803 /* liveness analysis: end of basic block: all temps are dead, globals
2804    and local temps should be in memory. */
2805 static void la_bb_end(TCGContext *s, int ng, int nt)
2806 {
2807     int i;
2808 
2809     for (i = 0; i < nt; ++i) {
2810         TCGTemp *ts = &s->temps[i];
2811         int state;
2812 
2813         switch (ts->kind) {
2814         case TEMP_FIXED:
2815         case TEMP_GLOBAL:
2816         case TEMP_LOCAL:
2817             state = TS_DEAD | TS_MEM;
2818             break;
2819         case TEMP_NORMAL:
2820         case TEMP_CONST:
2821             state = TS_DEAD;
2822             break;
2823         default:
2824             g_assert_not_reached();
2825         }
2826         ts->state = state;
2827         la_reset_pref(ts);
2828     }
2829 }
2830 
2831 /* liveness analysis: sync globals back to memory.  */
2832 static void la_global_sync(TCGContext *s, int ng)
2833 {
2834     int i;
2835 
2836     for (i = 0; i < ng; ++i) {
2837         int state = s->temps[i].state;
2838         s->temps[i].state = state | TS_MEM;
2839         if (state == TS_DEAD) {
2840             /* If the global was previously dead, reset prefs.  */
2841             la_reset_pref(&s->temps[i]);
2842         }
2843     }
2844 }
2845 
2846 /*
2847  * liveness analysis: conditional branch: all temps are dead,
2848  * globals and local temps should be synced.
2849  */
2850 static void la_bb_sync(TCGContext *s, int ng, int nt)
2851 {
2852     la_global_sync(s, ng);
2853 
2854     for (int i = ng; i < nt; ++i) {
2855         TCGTemp *ts = &s->temps[i];
2856         int state;
2857 
2858         switch (ts->kind) {
2859         case TEMP_LOCAL:
2860             state = ts->state;
2861             ts->state = state | TS_MEM;
2862             if (state != TS_DEAD) {
2863                 continue;
2864             }
2865             break;
2866         case TEMP_NORMAL:
2867             s->temps[i].state = TS_DEAD;
2868             break;
2869         case TEMP_CONST:
2870             continue;
2871         default:
2872             g_assert_not_reached();
2873         }
2874         la_reset_pref(&s->temps[i]);
2875     }
2876 }
2877 
2878 /* liveness analysis: sync globals back to memory and kill.  */
2879 static void la_global_kill(TCGContext *s, int ng)
2880 {
2881     int i;
2882 
2883     for (i = 0; i < ng; i++) {
2884         s->temps[i].state = TS_DEAD | TS_MEM;
2885         la_reset_pref(&s->temps[i]);
2886     }
2887 }
2888 
2889 /* liveness analysis: note live globals crossing calls.  */
2890 static void la_cross_call(TCGContext *s, int nt)
2891 {
2892     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2893     int i;
2894 
2895     for (i = 0; i < nt; i++) {
2896         TCGTemp *ts = &s->temps[i];
2897         if (!(ts->state & TS_DEAD)) {
2898             TCGRegSet *pset = la_temp_pref(ts);
2899             TCGRegSet set = *pset;
2900 
2901             set &= mask;
2902             /* If the combination is not possible, restart.  */
2903             if (set == 0) {
2904                 set = tcg_target_available_regs[ts->type] & mask;
2905             }
2906             *pset = set;
2907         }
2908     }
2909 }
2910 
2911 /* Liveness analysis : update the opc_arg_life array to tell if a
2912    given input arguments is dead. Instructions updating dead
2913    temporaries are removed. */
2914 static void liveness_pass_1(TCGContext *s)
2915 {
2916     int nb_globals = s->nb_globals;
2917     int nb_temps = s->nb_temps;
2918     TCGOp *op, *op_prev;
2919     TCGRegSet *prefs;
2920     int i;
2921 
2922     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2923     for (i = 0; i < nb_temps; ++i) {
2924         s->temps[i].state_ptr = prefs + i;
2925     }
2926 
2927     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2928     la_func_end(s, nb_globals, nb_temps);
2929 
2930     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2931         int nb_iargs, nb_oargs;
2932         TCGOpcode opc_new, opc_new2;
2933         bool have_opc_new2;
2934         TCGLifeData arg_life = 0;
2935         TCGTemp *ts;
2936         TCGOpcode opc = op->opc;
2937         const TCGOpDef *def = &tcg_op_defs[opc];
2938 
2939         switch (opc) {
2940         case INDEX_op_call:
2941             {
2942                 int call_flags;
2943                 int nb_call_regs;
2944 
2945                 nb_oargs = TCGOP_CALLO(op);
2946                 nb_iargs = TCGOP_CALLI(op);
2947                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2948 
2949                 /* pure functions can be removed if their result is unused */
2950                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2951                     for (i = 0; i < nb_oargs; i++) {
2952                         ts = arg_temp(op->args[i]);
2953                         if (ts->state != TS_DEAD) {
2954                             goto do_not_remove_call;
2955                         }
2956                     }
2957                     goto do_remove;
2958                 }
2959             do_not_remove_call:
2960 
2961                 /* Output args are dead.  */
2962                 for (i = 0; i < nb_oargs; i++) {
2963                     ts = arg_temp(op->args[i]);
2964                     if (ts->state & TS_DEAD) {
2965                         arg_life |= DEAD_ARG << i;
2966                     }
2967                     if (ts->state & TS_MEM) {
2968                         arg_life |= SYNC_ARG << i;
2969                     }
2970                     ts->state = TS_DEAD;
2971                     la_reset_pref(ts);
2972 
2973                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2974                     op->output_pref[i] = 0;
2975                 }
2976 
2977                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2978                                     TCG_CALL_NO_READ_GLOBALS))) {
2979                     la_global_kill(s, nb_globals);
2980                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2981                     la_global_sync(s, nb_globals);
2982                 }
2983 
2984                 /* Record arguments that die in this helper.  */
2985                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2986                     ts = arg_temp(op->args[i]);
2987                     if (ts && ts->state & TS_DEAD) {
2988                         arg_life |= DEAD_ARG << i;
2989                     }
2990                 }
2991 
2992                 /* For all live registers, remove call-clobbered prefs.  */
2993                 la_cross_call(s, nb_temps);
2994 
2995                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2996 
2997                 /* Input arguments are live for preceding opcodes.  */
2998                 for (i = 0; i < nb_iargs; i++) {
2999                     ts = arg_temp(op->args[i + nb_oargs]);
3000                     if (ts && ts->state & TS_DEAD) {
3001                         /* For those arguments that die, and will be allocated
3002                          * in registers, clear the register set for that arg,
3003                          * to be filled in below.  For args that will be on
3004                          * the stack, reset to any available reg.
3005                          */
3006                         *la_temp_pref(ts)
3007                             = (i < nb_call_regs ? 0 :
3008                                tcg_target_available_regs[ts->type]);
3009                         ts->state &= ~TS_DEAD;
3010                     }
3011                 }
3012 
3013                 /* For each input argument, add its input register to prefs.
3014                    If a temp is used once, this produces a single set bit.  */
3015                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
3016                     ts = arg_temp(op->args[i + nb_oargs]);
3017                     if (ts) {
3018                         tcg_regset_set_reg(*la_temp_pref(ts),
3019                                            tcg_target_call_iarg_regs[i]);
3020                     }
3021                 }
3022             }
3023             break;
3024         case INDEX_op_insn_start:
3025             break;
3026         case INDEX_op_discard:
3027             /* mark the temporary as dead */
3028             ts = arg_temp(op->args[0]);
3029             ts->state = TS_DEAD;
3030             la_reset_pref(ts);
3031             break;
3032 
3033         case INDEX_op_add2_i32:
3034             opc_new = INDEX_op_add_i32;
3035             goto do_addsub2;
3036         case INDEX_op_sub2_i32:
3037             opc_new = INDEX_op_sub_i32;
3038             goto do_addsub2;
3039         case INDEX_op_add2_i64:
3040             opc_new = INDEX_op_add_i64;
3041             goto do_addsub2;
3042         case INDEX_op_sub2_i64:
3043             opc_new = INDEX_op_sub_i64;
3044         do_addsub2:
3045             nb_iargs = 4;
3046             nb_oargs = 2;
3047             /* Test if the high part of the operation is dead, but not
3048                the low part.  The result can be optimized to a simple
3049                add or sub.  This happens often for x86_64 guest when the
3050                cpu mode is set to 32 bit.  */
3051             if (arg_temp(op->args[1])->state == TS_DEAD) {
3052                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3053                     goto do_remove;
3054                 }
3055                 /* Replace the opcode and adjust the args in place,
3056                    leaving 3 unused args at the end.  */
3057                 op->opc = opc = opc_new;
3058                 op->args[1] = op->args[2];
3059                 op->args[2] = op->args[4];
3060                 /* Fall through and mark the single-word operation live.  */
3061                 nb_iargs = 2;
3062                 nb_oargs = 1;
3063             }
3064             goto do_not_remove;
3065 
3066         case INDEX_op_mulu2_i32:
3067             opc_new = INDEX_op_mul_i32;
3068             opc_new2 = INDEX_op_muluh_i32;
3069             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3070             goto do_mul2;
3071         case INDEX_op_muls2_i32:
3072             opc_new = INDEX_op_mul_i32;
3073             opc_new2 = INDEX_op_mulsh_i32;
3074             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3075             goto do_mul2;
3076         case INDEX_op_mulu2_i64:
3077             opc_new = INDEX_op_mul_i64;
3078             opc_new2 = INDEX_op_muluh_i64;
3079             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3080             goto do_mul2;
3081         case INDEX_op_muls2_i64:
3082             opc_new = INDEX_op_mul_i64;
3083             opc_new2 = INDEX_op_mulsh_i64;
3084             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3085             goto do_mul2;
3086         do_mul2:
3087             nb_iargs = 2;
3088             nb_oargs = 2;
3089             if (arg_temp(op->args[1])->state == TS_DEAD) {
3090                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3091                     /* Both parts of the operation are dead.  */
3092                     goto do_remove;
3093                 }
3094                 /* The high part of the operation is dead; generate the low. */
3095                 op->opc = opc = opc_new;
3096                 op->args[1] = op->args[2];
3097                 op->args[2] = op->args[3];
3098             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3099                 /* The low part of the operation is dead; generate the high. */
3100                 op->opc = opc = opc_new2;
3101                 op->args[0] = op->args[1];
3102                 op->args[1] = op->args[2];
3103                 op->args[2] = op->args[3];
3104             } else {
3105                 goto do_not_remove;
3106             }
3107             /* Mark the single-word operation live.  */
3108             nb_oargs = 1;
3109             goto do_not_remove;
3110 
3111         default:
3112             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3113             nb_iargs = def->nb_iargs;
3114             nb_oargs = def->nb_oargs;
3115 
3116             /* Test if the operation can be removed because all
3117                its outputs are dead. We assume that nb_oargs == 0
3118                implies side effects */
3119             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3120                 for (i = 0; i < nb_oargs; i++) {
3121                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3122                         goto do_not_remove;
3123                     }
3124                 }
3125                 goto do_remove;
3126             }
3127             goto do_not_remove;
3128 
3129         do_remove:
3130             tcg_op_remove(s, op);
3131             break;
3132 
3133         do_not_remove:
3134             for (i = 0; i < nb_oargs; i++) {
3135                 ts = arg_temp(op->args[i]);
3136 
3137                 /* Remember the preference of the uses that followed.  */
3138                 op->output_pref[i] = *la_temp_pref(ts);
3139 
3140                 /* Output args are dead.  */
3141                 if (ts->state & TS_DEAD) {
3142                     arg_life |= DEAD_ARG << i;
3143                 }
3144                 if (ts->state & TS_MEM) {
3145                     arg_life |= SYNC_ARG << i;
3146                 }
3147                 ts->state = TS_DEAD;
3148                 la_reset_pref(ts);
3149             }
3150 
3151             /* If end of basic block, update.  */
3152             if (def->flags & TCG_OPF_BB_EXIT) {
3153                 la_func_end(s, nb_globals, nb_temps);
3154             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3155                 la_bb_sync(s, nb_globals, nb_temps);
3156             } else if (def->flags & TCG_OPF_BB_END) {
3157                 la_bb_end(s, nb_globals, nb_temps);
3158             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3159                 la_global_sync(s, nb_globals);
3160                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3161                     la_cross_call(s, nb_temps);
3162                 }
3163             }
3164 
3165             /* Record arguments that die in this opcode.  */
3166             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3167                 ts = arg_temp(op->args[i]);
3168                 if (ts->state & TS_DEAD) {
3169                     arg_life |= DEAD_ARG << i;
3170                 }
3171             }
3172 
3173             /* Input arguments are live for preceding opcodes.  */
3174             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3175                 ts = arg_temp(op->args[i]);
3176                 if (ts->state & TS_DEAD) {
3177                     /* For operands that were dead, initially allow
3178                        all regs for the type.  */
3179                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3180                     ts->state &= ~TS_DEAD;
3181                 }
3182             }
3183 
3184             /* Incorporate constraints for this operand.  */
3185             switch (opc) {
3186             case INDEX_op_mov_i32:
3187             case INDEX_op_mov_i64:
3188                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3189                    have proper constraints.  That said, special case
3190                    moves to propagate preferences backward.  */
3191                 if (IS_DEAD_ARG(1)) {
3192                     *la_temp_pref(arg_temp(op->args[0]))
3193                         = *la_temp_pref(arg_temp(op->args[1]));
3194                 }
3195                 break;
3196 
3197             default:
3198                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3199                     const TCGArgConstraint *ct = &def->args_ct[i];
3200                     TCGRegSet set, *pset;
3201 
3202                     ts = arg_temp(op->args[i]);
3203                     pset = la_temp_pref(ts);
3204                     set = *pset;
3205 
3206                     set &= ct->regs;
3207                     if (ct->ialias) {
3208                         set &= op->output_pref[ct->alias_index];
3209                     }
3210                     /* If the combination is not possible, restart.  */
3211                     if (set == 0) {
3212                         set = ct->regs;
3213                     }
3214                     *pset = set;
3215                 }
3216                 break;
3217             }
3218             break;
3219         }
3220         op->life = arg_life;
3221     }
3222 }
3223 
3224 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3225 static bool liveness_pass_2(TCGContext *s)
3226 {
3227     int nb_globals = s->nb_globals;
3228     int nb_temps, i;
3229     bool changes = false;
3230     TCGOp *op, *op_next;
3231 
3232     /* Create a temporary for each indirect global.  */
3233     for (i = 0; i < nb_globals; ++i) {
3234         TCGTemp *its = &s->temps[i];
3235         if (its->indirect_reg) {
3236             TCGTemp *dts = tcg_temp_alloc(s);
3237             dts->type = its->type;
3238             dts->base_type = its->base_type;
3239             its->state_ptr = dts;
3240         } else {
3241             its->state_ptr = NULL;
3242         }
3243         /* All globals begin dead.  */
3244         its->state = TS_DEAD;
3245     }
3246     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3247         TCGTemp *its = &s->temps[i];
3248         its->state_ptr = NULL;
3249         its->state = TS_DEAD;
3250     }
3251 
3252     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3253         TCGOpcode opc = op->opc;
3254         const TCGOpDef *def = &tcg_op_defs[opc];
3255         TCGLifeData arg_life = op->life;
3256         int nb_iargs, nb_oargs, call_flags;
3257         TCGTemp *arg_ts, *dir_ts;
3258 
3259         if (opc == INDEX_op_call) {
3260             nb_oargs = TCGOP_CALLO(op);
3261             nb_iargs = TCGOP_CALLI(op);
3262             call_flags = op->args[nb_oargs + nb_iargs + 1];
3263         } else {
3264             nb_iargs = def->nb_iargs;
3265             nb_oargs = def->nb_oargs;
3266 
3267             /* Set flags similar to how calls require.  */
3268             if (def->flags & TCG_OPF_COND_BRANCH) {
3269                 /* Like reading globals: sync_globals */
3270                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3271             } else if (def->flags & TCG_OPF_BB_END) {
3272                 /* Like writing globals: save_globals */
3273                 call_flags = 0;
3274             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3275                 /* Like reading globals: sync_globals */
3276                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3277             } else {
3278                 /* No effect on globals.  */
3279                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3280                               TCG_CALL_NO_WRITE_GLOBALS);
3281             }
3282         }
3283 
3284         /* Make sure that input arguments are available.  */
3285         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3286             arg_ts = arg_temp(op->args[i]);
3287             if (arg_ts) {
3288                 dir_ts = arg_ts->state_ptr;
3289                 if (dir_ts && arg_ts->state == TS_DEAD) {
3290                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3291                                       ? INDEX_op_ld_i32
3292                                       : INDEX_op_ld_i64);
3293                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3294 
3295                     lop->args[0] = temp_arg(dir_ts);
3296                     lop->args[1] = temp_arg(arg_ts->mem_base);
3297                     lop->args[2] = arg_ts->mem_offset;
3298 
3299                     /* Loaded, but synced with memory.  */
3300                     arg_ts->state = TS_MEM;
3301                 }
3302             }
3303         }
3304 
3305         /* Perform input replacement, and mark inputs that became dead.
3306            No action is required except keeping temp_state up to date
3307            so that we reload when needed.  */
3308         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3309             arg_ts = arg_temp(op->args[i]);
3310             if (arg_ts) {
3311                 dir_ts = arg_ts->state_ptr;
3312                 if (dir_ts) {
3313                     op->args[i] = temp_arg(dir_ts);
3314                     changes = true;
3315                     if (IS_DEAD_ARG(i)) {
3316                         arg_ts->state = TS_DEAD;
3317                     }
3318                 }
3319             }
3320         }
3321 
3322         /* Liveness analysis should ensure that the following are
3323            all correct, for call sites and basic block end points.  */
3324         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3325             /* Nothing to do */
3326         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3327             for (i = 0; i < nb_globals; ++i) {
3328                 /* Liveness should see that globals are synced back,
3329                    that is, either TS_DEAD or TS_MEM.  */
3330                 arg_ts = &s->temps[i];
3331                 tcg_debug_assert(arg_ts->state_ptr == 0
3332                                  || arg_ts->state != 0);
3333             }
3334         } else {
3335             for (i = 0; i < nb_globals; ++i) {
3336                 /* Liveness should see that globals are saved back,
3337                    that is, TS_DEAD, waiting to be reloaded.  */
3338                 arg_ts = &s->temps[i];
3339                 tcg_debug_assert(arg_ts->state_ptr == 0
3340                                  || arg_ts->state == TS_DEAD);
3341             }
3342         }
3343 
3344         /* Outputs become available.  */
3345         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3346             arg_ts = arg_temp(op->args[0]);
3347             dir_ts = arg_ts->state_ptr;
3348             if (dir_ts) {
3349                 op->args[0] = temp_arg(dir_ts);
3350                 changes = true;
3351 
3352                 /* The output is now live and modified.  */
3353                 arg_ts->state = 0;
3354 
3355                 if (NEED_SYNC_ARG(0)) {
3356                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3357                                       ? INDEX_op_st_i32
3358                                       : INDEX_op_st_i64);
3359                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3360                     TCGTemp *out_ts = dir_ts;
3361 
3362                     if (IS_DEAD_ARG(0)) {
3363                         out_ts = arg_temp(op->args[1]);
3364                         arg_ts->state = TS_DEAD;
3365                         tcg_op_remove(s, op);
3366                     } else {
3367                         arg_ts->state = TS_MEM;
3368                     }
3369 
3370                     sop->args[0] = temp_arg(out_ts);
3371                     sop->args[1] = temp_arg(arg_ts->mem_base);
3372                     sop->args[2] = arg_ts->mem_offset;
3373                 } else {
3374                     tcg_debug_assert(!IS_DEAD_ARG(0));
3375                 }
3376             }
3377         } else {
3378             for (i = 0; i < nb_oargs; i++) {
3379                 arg_ts = arg_temp(op->args[i]);
3380                 dir_ts = arg_ts->state_ptr;
3381                 if (!dir_ts) {
3382                     continue;
3383                 }
3384                 op->args[i] = temp_arg(dir_ts);
3385                 changes = true;
3386 
3387                 /* The output is now live and modified.  */
3388                 arg_ts->state = 0;
3389 
3390                 /* Sync outputs upon their last write.  */
3391                 if (NEED_SYNC_ARG(i)) {
3392                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3393                                       ? INDEX_op_st_i32
3394                                       : INDEX_op_st_i64);
3395                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3396 
3397                     sop->args[0] = temp_arg(dir_ts);
3398                     sop->args[1] = temp_arg(arg_ts->mem_base);
3399                     sop->args[2] = arg_ts->mem_offset;
3400 
3401                     arg_ts->state = TS_MEM;
3402                 }
3403                 /* Drop outputs that are dead.  */
3404                 if (IS_DEAD_ARG(i)) {
3405                     arg_ts->state = TS_DEAD;
3406                 }
3407             }
3408         }
3409     }
3410 
3411     return changes;
3412 }
3413 
3414 #ifdef CONFIG_DEBUG_TCG
3415 static void dump_regs(TCGContext *s)
3416 {
3417     TCGTemp *ts;
3418     int i;
3419     char buf[64];
3420 
3421     for(i = 0; i < s->nb_temps; i++) {
3422         ts = &s->temps[i];
3423         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3424         switch(ts->val_type) {
3425         case TEMP_VAL_REG:
3426             printf("%s", tcg_target_reg_names[ts->reg]);
3427             break;
3428         case TEMP_VAL_MEM:
3429             printf("%d(%s)", (int)ts->mem_offset,
3430                    tcg_target_reg_names[ts->mem_base->reg]);
3431             break;
3432         case TEMP_VAL_CONST:
3433             printf("$0x%" PRIx64, ts->val);
3434             break;
3435         case TEMP_VAL_DEAD:
3436             printf("D");
3437             break;
3438         default:
3439             printf("???");
3440             break;
3441         }
3442         printf("\n");
3443     }
3444 
3445     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3446         if (s->reg_to_temp[i] != NULL) {
3447             printf("%s: %s\n",
3448                    tcg_target_reg_names[i],
3449                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3450         }
3451     }
3452 }
3453 
3454 static void check_regs(TCGContext *s)
3455 {
3456     int reg;
3457     int k;
3458     TCGTemp *ts;
3459     char buf[64];
3460 
3461     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3462         ts = s->reg_to_temp[reg];
3463         if (ts != NULL) {
3464             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3465                 printf("Inconsistency for register %s:\n",
3466                        tcg_target_reg_names[reg]);
3467                 goto fail;
3468             }
3469         }
3470     }
3471     for (k = 0; k < s->nb_temps; k++) {
3472         ts = &s->temps[k];
3473         if (ts->val_type == TEMP_VAL_REG
3474             && ts->kind != TEMP_FIXED
3475             && s->reg_to_temp[ts->reg] != ts) {
3476             printf("Inconsistency for temp %s:\n",
3477                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3478         fail:
3479             printf("reg state:\n");
3480             dump_regs(s);
3481             tcg_abort();
3482         }
3483     }
3484 }
3485 #endif
3486 
3487 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3488 {
3489 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3490     /* Sparc64 stack is accessed with offset of 2047 */
3491     s->current_frame_offset = (s->current_frame_offset +
3492                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3493         ~(sizeof(tcg_target_long) - 1);
3494 #endif
3495     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3496         s->frame_end) {
3497         tcg_abort();
3498     }
3499     ts->mem_offset = s->current_frame_offset;
3500     ts->mem_base = s->frame_temp;
3501     ts->mem_allocated = 1;
3502     s->current_frame_offset += sizeof(tcg_target_long);
3503 }
3504 
3505 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3506 
3507 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3508    mark it free; otherwise mark it dead.  */
3509 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3510 {
3511     TCGTempVal new_type;
3512 
3513     switch (ts->kind) {
3514     case TEMP_FIXED:
3515         return;
3516     case TEMP_GLOBAL:
3517     case TEMP_LOCAL:
3518         new_type = TEMP_VAL_MEM;
3519         break;
3520     case TEMP_NORMAL:
3521         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3522         break;
3523     case TEMP_CONST:
3524         new_type = TEMP_VAL_CONST;
3525         break;
3526     default:
3527         g_assert_not_reached();
3528     }
3529     if (ts->val_type == TEMP_VAL_REG) {
3530         s->reg_to_temp[ts->reg] = NULL;
3531     }
3532     ts->val_type = new_type;
3533 }
3534 
3535 /* Mark a temporary as dead.  */
3536 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3537 {
3538     temp_free_or_dead(s, ts, 1);
3539 }
3540 
3541 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3542    registers needs to be allocated to store a constant.  If 'free_or_dead'
3543    is non-zero, subsequently release the temporary; if it is positive, the
3544    temp is dead; if it is negative, the temp is free.  */
3545 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3546                       TCGRegSet preferred_regs, int free_or_dead)
3547 {
3548     if (!temp_readonly(ts) && !ts->mem_coherent) {
3549         if (!ts->mem_allocated) {
3550             temp_allocate_frame(s, ts);
3551         }
3552         switch (ts->val_type) {
3553         case TEMP_VAL_CONST:
3554             /* If we're going to free the temp immediately, then we won't
3555                require it later in a register, so attempt to store the
3556                constant to memory directly.  */
3557             if (free_or_dead
3558                 && tcg_out_sti(s, ts->type, ts->val,
3559                                ts->mem_base->reg, ts->mem_offset)) {
3560                 break;
3561             }
3562             temp_load(s, ts, tcg_target_available_regs[ts->type],
3563                       allocated_regs, preferred_regs);
3564             /* fallthrough */
3565 
3566         case TEMP_VAL_REG:
3567             tcg_out_st(s, ts->type, ts->reg,
3568                        ts->mem_base->reg, ts->mem_offset);
3569             break;
3570 
3571         case TEMP_VAL_MEM:
3572             break;
3573 
3574         case TEMP_VAL_DEAD:
3575         default:
3576             tcg_abort();
3577         }
3578         ts->mem_coherent = 1;
3579     }
3580     if (free_or_dead) {
3581         temp_free_or_dead(s, ts, free_or_dead);
3582     }
3583 }
3584 
3585 /* free register 'reg' by spilling the corresponding temporary if necessary */
3586 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3587 {
3588     TCGTemp *ts = s->reg_to_temp[reg];
3589     if (ts != NULL) {
3590         temp_sync(s, ts, allocated_regs, 0, -1);
3591     }
3592 }
3593 
3594 /**
3595  * tcg_reg_alloc:
3596  * @required_regs: Set of registers in which we must allocate.
3597  * @allocated_regs: Set of registers which must be avoided.
3598  * @preferred_regs: Set of registers we should prefer.
3599  * @rev: True if we search the registers in "indirect" order.
3600  *
3601  * The allocated register must be in @required_regs & ~@allocated_regs,
3602  * but if we can put it in @preferred_regs we may save a move later.
3603  */
3604 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3605                             TCGRegSet allocated_regs,
3606                             TCGRegSet preferred_regs, bool rev)
3607 {
3608     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3609     TCGRegSet reg_ct[2];
3610     const int *order;
3611 
3612     reg_ct[1] = required_regs & ~allocated_regs;
3613     tcg_debug_assert(reg_ct[1] != 0);
3614     reg_ct[0] = reg_ct[1] & preferred_regs;
3615 
3616     /* Skip the preferred_regs option if it cannot be satisfied,
3617        or if the preference made no difference.  */
3618     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3619 
3620     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3621 
3622     /* Try free registers, preferences first.  */
3623     for (j = f; j < 2; j++) {
3624         TCGRegSet set = reg_ct[j];
3625 
3626         if (tcg_regset_single(set)) {
3627             /* One register in the set.  */
3628             TCGReg reg = tcg_regset_first(set);
3629             if (s->reg_to_temp[reg] == NULL) {
3630                 return reg;
3631             }
3632         } else {
3633             for (i = 0; i < n; i++) {
3634                 TCGReg reg = order[i];
3635                 if (s->reg_to_temp[reg] == NULL &&
3636                     tcg_regset_test_reg(set, reg)) {
3637                     return reg;
3638                 }
3639             }
3640         }
3641     }
3642 
3643     /* We must spill something.  */
3644     for (j = f; j < 2; j++) {
3645         TCGRegSet set = reg_ct[j];
3646 
3647         if (tcg_regset_single(set)) {
3648             /* One register in the set.  */
3649             TCGReg reg = tcg_regset_first(set);
3650             tcg_reg_free(s, reg, allocated_regs);
3651             return reg;
3652         } else {
3653             for (i = 0; i < n; i++) {
3654                 TCGReg reg = order[i];
3655                 if (tcg_regset_test_reg(set, reg)) {
3656                     tcg_reg_free(s, reg, allocated_regs);
3657                     return reg;
3658                 }
3659             }
3660         }
3661     }
3662 
3663     tcg_abort();
3664 }
3665 
3666 /* Make sure the temporary is in a register.  If needed, allocate the register
3667    from DESIRED while avoiding ALLOCATED.  */
3668 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3669                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3670 {
3671     TCGReg reg;
3672 
3673     switch (ts->val_type) {
3674     case TEMP_VAL_REG:
3675         return;
3676     case TEMP_VAL_CONST:
3677         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3678                             preferred_regs, ts->indirect_base);
3679         if (ts->type <= TCG_TYPE_I64) {
3680             tcg_out_movi(s, ts->type, reg, ts->val);
3681         } else {
3682             uint64_t val = ts->val;
3683             MemOp vece = MO_64;
3684 
3685             /*
3686              * Find the minimal vector element that matches the constant.
3687              * The targets will, in general, have to do this search anyway,
3688              * do this generically.
3689              */
3690             if (val == dup_const(MO_8, val)) {
3691                 vece = MO_8;
3692             } else if (val == dup_const(MO_16, val)) {
3693                 vece = MO_16;
3694             } else if (val == dup_const(MO_32, val)) {
3695                 vece = MO_32;
3696             }
3697 
3698             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3699         }
3700         ts->mem_coherent = 0;
3701         break;
3702     case TEMP_VAL_MEM:
3703         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3704                             preferred_regs, ts->indirect_base);
3705         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3706         ts->mem_coherent = 1;
3707         break;
3708     case TEMP_VAL_DEAD:
3709     default:
3710         tcg_abort();
3711     }
3712     ts->reg = reg;
3713     ts->val_type = TEMP_VAL_REG;
3714     s->reg_to_temp[reg] = ts;
3715 }
3716 
3717 /* Save a temporary to memory. 'allocated_regs' is used in case a
3718    temporary registers needs to be allocated to store a constant.  */
3719 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3720 {
3721     /* The liveness analysis already ensures that globals are back
3722        in memory. Keep an tcg_debug_assert for safety. */
3723     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3724 }
3725 
3726 /* save globals to their canonical location and assume they can be
3727    modified be the following code. 'allocated_regs' is used in case a
3728    temporary registers needs to be allocated to store a constant. */
3729 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3730 {
3731     int i, n;
3732 
3733     for (i = 0, n = s->nb_globals; i < n; i++) {
3734         temp_save(s, &s->temps[i], allocated_regs);
3735     }
3736 }
3737 
3738 /* sync globals to their canonical location and assume they can be
3739    read by the following code. 'allocated_regs' is used in case a
3740    temporary registers needs to be allocated to store a constant. */
3741 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3742 {
3743     int i, n;
3744 
3745     for (i = 0, n = s->nb_globals; i < n; i++) {
3746         TCGTemp *ts = &s->temps[i];
3747         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3748                          || ts->kind == TEMP_FIXED
3749                          || ts->mem_coherent);
3750     }
3751 }
3752 
3753 /* at the end of a basic block, we assume all temporaries are dead and
3754    all globals are stored at their canonical location. */
3755 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3756 {
3757     int i;
3758 
3759     for (i = s->nb_globals; i < s->nb_temps; i++) {
3760         TCGTemp *ts = &s->temps[i];
3761 
3762         switch (ts->kind) {
3763         case TEMP_LOCAL:
3764             temp_save(s, ts, allocated_regs);
3765             break;
3766         case TEMP_NORMAL:
3767             /* The liveness analysis already ensures that temps are dead.
3768                Keep an tcg_debug_assert for safety. */
3769             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3770             break;
3771         case TEMP_CONST:
3772             /* Similarly, we should have freed any allocated register. */
3773             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3774             break;
3775         default:
3776             g_assert_not_reached();
3777         }
3778     }
3779 
3780     save_globals(s, allocated_regs);
3781 }
3782 
3783 /*
3784  * At a conditional branch, we assume all temporaries are dead and
3785  * all globals and local temps are synced to their location.
3786  */
3787 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3788 {
3789     sync_globals(s, allocated_regs);
3790 
3791     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3792         TCGTemp *ts = &s->temps[i];
3793         /*
3794          * The liveness analysis already ensures that temps are dead.
3795          * Keep tcg_debug_asserts for safety.
3796          */
3797         switch (ts->kind) {
3798         case TEMP_LOCAL:
3799             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3800             break;
3801         case TEMP_NORMAL:
3802             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3803             break;
3804         case TEMP_CONST:
3805             break;
3806         default:
3807             g_assert_not_reached();
3808         }
3809     }
3810 }
3811 
3812 /*
3813  * Specialized code generation for INDEX_op_mov_* with a constant.
3814  */
3815 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3816                                   tcg_target_ulong val, TCGLifeData arg_life,
3817                                   TCGRegSet preferred_regs)
3818 {
3819     /* ENV should not be modified.  */
3820     tcg_debug_assert(!temp_readonly(ots));
3821 
3822     /* The movi is not explicitly generated here.  */
3823     if (ots->val_type == TEMP_VAL_REG) {
3824         s->reg_to_temp[ots->reg] = NULL;
3825     }
3826     ots->val_type = TEMP_VAL_CONST;
3827     ots->val = val;
3828     ots->mem_coherent = 0;
3829     if (NEED_SYNC_ARG(0)) {
3830         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3831     } else if (IS_DEAD_ARG(0)) {
3832         temp_dead(s, ots);
3833     }
3834 }
3835 
3836 /*
3837  * Specialized code generation for INDEX_op_mov_*.
3838  */
3839 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3840 {
3841     const TCGLifeData arg_life = op->life;
3842     TCGRegSet allocated_regs, preferred_regs;
3843     TCGTemp *ts, *ots;
3844     TCGType otype, itype;
3845 
3846     allocated_regs = s->reserved_regs;
3847     preferred_regs = op->output_pref[0];
3848     ots = arg_temp(op->args[0]);
3849     ts = arg_temp(op->args[1]);
3850 
3851     /* ENV should not be modified.  */
3852     tcg_debug_assert(!temp_readonly(ots));
3853 
3854     /* Note that otype != itype for no-op truncation.  */
3855     otype = ots->type;
3856     itype = ts->type;
3857 
3858     if (ts->val_type == TEMP_VAL_CONST) {
3859         /* propagate constant or generate sti */
3860         tcg_target_ulong val = ts->val;
3861         if (IS_DEAD_ARG(1)) {
3862             temp_dead(s, ts);
3863         }
3864         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3865         return;
3866     }
3867 
3868     /* If the source value is in memory we're going to be forced
3869        to have it in a register in order to perform the copy.  Copy
3870        the SOURCE value into its own register first, that way we
3871        don't have to reload SOURCE the next time it is used. */
3872     if (ts->val_type == TEMP_VAL_MEM) {
3873         temp_load(s, ts, tcg_target_available_regs[itype],
3874                   allocated_regs, preferred_regs);
3875     }
3876 
3877     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3878     if (IS_DEAD_ARG(0)) {
3879         /* mov to a non-saved dead register makes no sense (even with
3880            liveness analysis disabled). */
3881         tcg_debug_assert(NEED_SYNC_ARG(0));
3882         if (!ots->mem_allocated) {
3883             temp_allocate_frame(s, ots);
3884         }
3885         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3886         if (IS_DEAD_ARG(1)) {
3887             temp_dead(s, ts);
3888         }
3889         temp_dead(s, ots);
3890     } else {
3891         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3892             /* the mov can be suppressed */
3893             if (ots->val_type == TEMP_VAL_REG) {
3894                 s->reg_to_temp[ots->reg] = NULL;
3895             }
3896             ots->reg = ts->reg;
3897             temp_dead(s, ts);
3898         } else {
3899             if (ots->val_type != TEMP_VAL_REG) {
3900                 /* When allocating a new register, make sure to not spill the
3901                    input one. */
3902                 tcg_regset_set_reg(allocated_regs, ts->reg);
3903                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3904                                          allocated_regs, preferred_regs,
3905                                          ots->indirect_base);
3906             }
3907             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3908                 /*
3909                  * Cross register class move not supported.
3910                  * Store the source register into the destination slot
3911                  * and leave the destination temp as TEMP_VAL_MEM.
3912                  */
3913                 assert(!temp_readonly(ots));
3914                 if (!ts->mem_allocated) {
3915                     temp_allocate_frame(s, ots);
3916                 }
3917                 tcg_out_st(s, ts->type, ts->reg,
3918                            ots->mem_base->reg, ots->mem_offset);
3919                 ots->mem_coherent = 1;
3920                 temp_free_or_dead(s, ots, -1);
3921                 return;
3922             }
3923         }
3924         ots->val_type = TEMP_VAL_REG;
3925         ots->mem_coherent = 0;
3926         s->reg_to_temp[ots->reg] = ots;
3927         if (NEED_SYNC_ARG(0)) {
3928             temp_sync(s, ots, allocated_regs, 0, 0);
3929         }
3930     }
3931 }
3932 
3933 /*
3934  * Specialized code generation for INDEX_op_dup_vec.
3935  */
3936 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3937 {
3938     const TCGLifeData arg_life = op->life;
3939     TCGRegSet dup_out_regs, dup_in_regs;
3940     TCGTemp *its, *ots;
3941     TCGType itype, vtype;
3942     intptr_t endian_fixup;
3943     unsigned vece;
3944     bool ok;
3945 
3946     ots = arg_temp(op->args[0]);
3947     its = arg_temp(op->args[1]);
3948 
3949     /* ENV should not be modified.  */
3950     tcg_debug_assert(!temp_readonly(ots));
3951 
3952     itype = its->type;
3953     vece = TCGOP_VECE(op);
3954     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3955 
3956     if (its->val_type == TEMP_VAL_CONST) {
3957         /* Propagate constant via movi -> dupi.  */
3958         tcg_target_ulong val = its->val;
3959         if (IS_DEAD_ARG(1)) {
3960             temp_dead(s, its);
3961         }
3962         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3963         return;
3964     }
3965 
3966     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3967     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3968 
3969     /* Allocate the output register now.  */
3970     if (ots->val_type != TEMP_VAL_REG) {
3971         TCGRegSet allocated_regs = s->reserved_regs;
3972 
3973         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3974             /* Make sure to not spill the input register. */
3975             tcg_regset_set_reg(allocated_regs, its->reg);
3976         }
3977         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3978                                  op->output_pref[0], ots->indirect_base);
3979         ots->val_type = TEMP_VAL_REG;
3980         ots->mem_coherent = 0;
3981         s->reg_to_temp[ots->reg] = ots;
3982     }
3983 
3984     switch (its->val_type) {
3985     case TEMP_VAL_REG:
3986         /*
3987          * The dup constriaints must be broad, covering all possible VECE.
3988          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3989          * to fail, indicating that extra moves are required for that case.
3990          */
3991         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3992             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3993                 goto done;
3994             }
3995             /* Try again from memory or a vector input register.  */
3996         }
3997         if (!its->mem_coherent) {
3998             /*
3999              * The input register is not synced, and so an extra store
4000              * would be required to use memory.  Attempt an integer-vector
4001              * register move first.  We do not have a TCGRegSet for this.
4002              */
4003             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4004                 break;
4005             }
4006             /* Sync the temp back to its slot and load from there.  */
4007             temp_sync(s, its, s->reserved_regs, 0, 0);
4008         }
4009         /* fall through */
4010 
4011     case TEMP_VAL_MEM:
4012 #ifdef HOST_WORDS_BIGENDIAN
4013         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
4014         endian_fixup -= 1 << vece;
4015 #else
4016         endian_fixup = 0;
4017 #endif
4018         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4019                              its->mem_offset + endian_fixup)) {
4020             goto done;
4021         }
4022         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4023         break;
4024 
4025     default:
4026         g_assert_not_reached();
4027     }
4028 
4029     /* We now have a vector input register, so dup must succeed. */
4030     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4031     tcg_debug_assert(ok);
4032 
4033  done:
4034     if (IS_DEAD_ARG(1)) {
4035         temp_dead(s, its);
4036     }
4037     if (NEED_SYNC_ARG(0)) {
4038         temp_sync(s, ots, s->reserved_regs, 0, 0);
4039     }
4040     if (IS_DEAD_ARG(0)) {
4041         temp_dead(s, ots);
4042     }
4043 }
4044 
4045 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4046 {
4047     const TCGLifeData arg_life = op->life;
4048     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4049     TCGRegSet i_allocated_regs;
4050     TCGRegSet o_allocated_regs;
4051     int i, k, nb_iargs, nb_oargs;
4052     TCGReg reg;
4053     TCGArg arg;
4054     const TCGArgConstraint *arg_ct;
4055     TCGTemp *ts;
4056     TCGArg new_args[TCG_MAX_OP_ARGS];
4057     int const_args[TCG_MAX_OP_ARGS];
4058 
4059     nb_oargs = def->nb_oargs;
4060     nb_iargs = def->nb_iargs;
4061 
4062     /* copy constants */
4063     memcpy(new_args + nb_oargs + nb_iargs,
4064            op->args + nb_oargs + nb_iargs,
4065            sizeof(TCGArg) * def->nb_cargs);
4066 
4067     i_allocated_regs = s->reserved_regs;
4068     o_allocated_regs = s->reserved_regs;
4069 
4070     /* satisfy input constraints */
4071     for (k = 0; k < nb_iargs; k++) {
4072         TCGRegSet i_preferred_regs, o_preferred_regs;
4073 
4074         i = def->args_ct[nb_oargs + k].sort_index;
4075         arg = op->args[i];
4076         arg_ct = &def->args_ct[i];
4077         ts = arg_temp(arg);
4078 
4079         if (ts->val_type == TEMP_VAL_CONST
4080             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4081             /* constant is OK for instruction */
4082             const_args[i] = 1;
4083             new_args[i] = ts->val;
4084             continue;
4085         }
4086 
4087         i_preferred_regs = o_preferred_regs = 0;
4088         if (arg_ct->ialias) {
4089             o_preferred_regs = op->output_pref[arg_ct->alias_index];
4090 
4091             /*
4092              * If the input is readonly, then it cannot also be an
4093              * output and aliased to itself.  If the input is not
4094              * dead after the instruction, we must allocate a new
4095              * register and move it.
4096              */
4097             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4098                 goto allocate_in_reg;
4099             }
4100 
4101             /*
4102              * Check if the current register has already been allocated
4103              * for another input aliased to an output.
4104              */
4105             if (ts->val_type == TEMP_VAL_REG) {
4106                 reg = ts->reg;
4107                 for (int k2 = 0; k2 < k; k2++) {
4108                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
4109                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
4110                         goto allocate_in_reg;
4111                     }
4112                 }
4113             }
4114             i_preferred_regs = o_preferred_regs;
4115         }
4116 
4117         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
4118         reg = ts->reg;
4119 
4120         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
4121  allocate_in_reg:
4122             /*
4123              * Allocate a new register matching the constraint
4124              * and move the temporary register into it.
4125              */
4126             temp_load(s, ts, tcg_target_available_regs[ts->type],
4127                       i_allocated_regs, 0);
4128             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
4129                                 o_preferred_regs, ts->indirect_base);
4130             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4131                 /*
4132                  * Cross register class move not supported.  Sync the
4133                  * temp back to its slot and load from there.
4134                  */
4135                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4136                 tcg_out_ld(s, ts->type, reg,
4137                            ts->mem_base->reg, ts->mem_offset);
4138             }
4139         }
4140         new_args[i] = reg;
4141         const_args[i] = 0;
4142         tcg_regset_set_reg(i_allocated_regs, reg);
4143     }
4144 
4145     /* mark dead temporaries and free the associated registers */
4146     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4147         if (IS_DEAD_ARG(i)) {
4148             temp_dead(s, arg_temp(op->args[i]));
4149         }
4150     }
4151 
4152     if (def->flags & TCG_OPF_COND_BRANCH) {
4153         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4154     } else if (def->flags & TCG_OPF_BB_END) {
4155         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4156     } else {
4157         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4158             /* XXX: permit generic clobber register list ? */
4159             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4160                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4161                     tcg_reg_free(s, i, i_allocated_regs);
4162                 }
4163             }
4164         }
4165         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4166             /* sync globals if the op has side effects and might trigger
4167                an exception. */
4168             sync_globals(s, i_allocated_regs);
4169         }
4170 
4171         /* satisfy the output constraints */
4172         for(k = 0; k < nb_oargs; k++) {
4173             i = def->args_ct[k].sort_index;
4174             arg = op->args[i];
4175             arg_ct = &def->args_ct[i];
4176             ts = arg_temp(arg);
4177 
4178             /* ENV should not be modified.  */
4179             tcg_debug_assert(!temp_readonly(ts));
4180 
4181             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4182                 reg = new_args[arg_ct->alias_index];
4183             } else if (arg_ct->newreg) {
4184                 reg = tcg_reg_alloc(s, arg_ct->regs,
4185                                     i_allocated_regs | o_allocated_regs,
4186                                     op->output_pref[k], ts->indirect_base);
4187             } else {
4188                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4189                                     op->output_pref[k], ts->indirect_base);
4190             }
4191             tcg_regset_set_reg(o_allocated_regs, reg);
4192             if (ts->val_type == TEMP_VAL_REG) {
4193                 s->reg_to_temp[ts->reg] = NULL;
4194             }
4195             ts->val_type = TEMP_VAL_REG;
4196             ts->reg = reg;
4197             /*
4198              * Temp value is modified, so the value kept in memory is
4199              * potentially not the same.
4200              */
4201             ts->mem_coherent = 0;
4202             s->reg_to_temp[reg] = ts;
4203             new_args[i] = reg;
4204         }
4205     }
4206 
4207     /* emit instruction */
4208     if (def->flags & TCG_OPF_VECTOR) {
4209         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4210                        new_args, const_args);
4211     } else {
4212         tcg_out_op(s, op->opc, new_args, const_args);
4213     }
4214 
4215     /* move the outputs in the correct register if needed */
4216     for(i = 0; i < nb_oargs; i++) {
4217         ts = arg_temp(op->args[i]);
4218 
4219         /* ENV should not be modified.  */
4220         tcg_debug_assert(!temp_readonly(ts));
4221 
4222         if (NEED_SYNC_ARG(i)) {
4223             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4224         } else if (IS_DEAD_ARG(i)) {
4225             temp_dead(s, ts);
4226         }
4227     }
4228 }
4229 
4230 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4231 {
4232     const TCGLifeData arg_life = op->life;
4233     TCGTemp *ots, *itsl, *itsh;
4234     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4235 
4236     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4237     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4238     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4239 
4240     ots = arg_temp(op->args[0]);
4241     itsl = arg_temp(op->args[1]);
4242     itsh = arg_temp(op->args[2]);
4243 
4244     /* ENV should not be modified.  */
4245     tcg_debug_assert(!temp_readonly(ots));
4246 
4247     /* Allocate the output register now.  */
4248     if (ots->val_type != TEMP_VAL_REG) {
4249         TCGRegSet allocated_regs = s->reserved_regs;
4250         TCGRegSet dup_out_regs =
4251             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4252 
4253         /* Make sure to not spill the input registers. */
4254         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4255             tcg_regset_set_reg(allocated_regs, itsl->reg);
4256         }
4257         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4258             tcg_regset_set_reg(allocated_regs, itsh->reg);
4259         }
4260 
4261         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4262                                  op->output_pref[0], ots->indirect_base);
4263         ots->val_type = TEMP_VAL_REG;
4264         ots->mem_coherent = 0;
4265         s->reg_to_temp[ots->reg] = ots;
4266     }
4267 
4268     /* Promote dup2 of immediates to dupi_vec. */
4269     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4270         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4271         MemOp vece = MO_64;
4272 
4273         if (val == dup_const(MO_8, val)) {
4274             vece = MO_8;
4275         } else if (val == dup_const(MO_16, val)) {
4276             vece = MO_16;
4277         } else if (val == dup_const(MO_32, val)) {
4278             vece = MO_32;
4279         }
4280 
4281         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4282         goto done;
4283     }
4284 
4285     /* If the two inputs form one 64-bit value, try dupm_vec. */
4286     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4287         if (!itsl->mem_coherent) {
4288             temp_sync(s, itsl, s->reserved_regs, 0, 0);
4289         }
4290         if (!itsh->mem_coherent) {
4291             temp_sync(s, itsh, s->reserved_regs, 0, 0);
4292         }
4293 #ifdef HOST_WORDS_BIGENDIAN
4294         TCGTemp *its = itsh;
4295 #else
4296         TCGTemp *its = itsl;
4297 #endif
4298         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4299                              its->mem_base->reg, its->mem_offset)) {
4300             goto done;
4301         }
4302     }
4303 
4304     /* Fall back to generic expansion. */
4305     return false;
4306 
4307  done:
4308     if (IS_DEAD_ARG(1)) {
4309         temp_dead(s, itsl);
4310     }
4311     if (IS_DEAD_ARG(2)) {
4312         temp_dead(s, itsh);
4313     }
4314     if (NEED_SYNC_ARG(0)) {
4315         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4316     } else if (IS_DEAD_ARG(0)) {
4317         temp_dead(s, ots);
4318     }
4319     return true;
4320 }
4321 
4322 #ifdef TCG_TARGET_STACK_GROWSUP
4323 #define STACK_DIR(x) (-(x))
4324 #else
4325 #define STACK_DIR(x) (x)
4326 #endif
4327 
4328 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4329 {
4330     const int nb_oargs = TCGOP_CALLO(op);
4331     const int nb_iargs = TCGOP_CALLI(op);
4332     const TCGLifeData arg_life = op->life;
4333     int flags, nb_regs, i;
4334     TCGReg reg;
4335     TCGArg arg;
4336     TCGTemp *ts;
4337     intptr_t stack_offset;
4338     size_t call_stack_size;
4339     tcg_insn_unit *func_addr;
4340     int allocate_args;
4341     TCGRegSet allocated_regs;
4342 
4343     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4344     flags = op->args[nb_oargs + nb_iargs + 1];
4345 
4346     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4347     if (nb_regs > nb_iargs) {
4348         nb_regs = nb_iargs;
4349     }
4350 
4351     /* assign stack slots first */
4352     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4353     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4354         ~(TCG_TARGET_STACK_ALIGN - 1);
4355     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4356     if (allocate_args) {
4357         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4358            preallocate call stack */
4359         tcg_abort();
4360     }
4361 
4362     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4363     for (i = nb_regs; i < nb_iargs; i++) {
4364         arg = op->args[nb_oargs + i];
4365 #ifdef TCG_TARGET_STACK_GROWSUP
4366         stack_offset -= sizeof(tcg_target_long);
4367 #endif
4368         if (arg != TCG_CALL_DUMMY_ARG) {
4369             ts = arg_temp(arg);
4370             temp_load(s, ts, tcg_target_available_regs[ts->type],
4371                       s->reserved_regs, 0);
4372             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4373         }
4374 #ifndef TCG_TARGET_STACK_GROWSUP
4375         stack_offset += sizeof(tcg_target_long);
4376 #endif
4377     }
4378 
4379     /* assign input registers */
4380     allocated_regs = s->reserved_regs;
4381     for (i = 0; i < nb_regs; i++) {
4382         arg = op->args[nb_oargs + i];
4383         if (arg != TCG_CALL_DUMMY_ARG) {
4384             ts = arg_temp(arg);
4385             reg = tcg_target_call_iarg_regs[i];
4386 
4387             if (ts->val_type == TEMP_VAL_REG) {
4388                 if (ts->reg != reg) {
4389                     tcg_reg_free(s, reg, allocated_regs);
4390                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4391                         /*
4392                          * Cross register class move not supported.  Sync the
4393                          * temp back to its slot and load from there.
4394                          */
4395                         temp_sync(s, ts, allocated_regs, 0, 0);
4396                         tcg_out_ld(s, ts->type, reg,
4397                                    ts->mem_base->reg, ts->mem_offset);
4398                     }
4399                 }
4400             } else {
4401                 TCGRegSet arg_set = 0;
4402 
4403                 tcg_reg_free(s, reg, allocated_regs);
4404                 tcg_regset_set_reg(arg_set, reg);
4405                 temp_load(s, ts, arg_set, allocated_regs, 0);
4406             }
4407 
4408             tcg_regset_set_reg(allocated_regs, reg);
4409         }
4410     }
4411 
4412     /* mark dead temporaries and free the associated registers */
4413     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4414         if (IS_DEAD_ARG(i)) {
4415             temp_dead(s, arg_temp(op->args[i]));
4416         }
4417     }
4418 
4419     /* clobber call registers */
4420     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4421         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4422             tcg_reg_free(s, i, allocated_regs);
4423         }
4424     }
4425 
4426     /* Save globals if they might be written by the helper, sync them if
4427        they might be read. */
4428     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4429         /* Nothing to do */
4430     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4431         sync_globals(s, allocated_regs);
4432     } else {
4433         save_globals(s, allocated_regs);
4434     }
4435 
4436     tcg_out_call(s, func_addr);
4437 
4438     /* assign output registers and emit moves if needed */
4439     for(i = 0; i < nb_oargs; i++) {
4440         arg = op->args[i];
4441         ts = arg_temp(arg);
4442 
4443         /* ENV should not be modified.  */
4444         tcg_debug_assert(!temp_readonly(ts));
4445 
4446         reg = tcg_target_call_oarg_regs[i];
4447         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4448         if (ts->val_type == TEMP_VAL_REG) {
4449             s->reg_to_temp[ts->reg] = NULL;
4450         }
4451         ts->val_type = TEMP_VAL_REG;
4452         ts->reg = reg;
4453         ts->mem_coherent = 0;
4454         s->reg_to_temp[reg] = ts;
4455         if (NEED_SYNC_ARG(i)) {
4456             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4457         } else if (IS_DEAD_ARG(i)) {
4458             temp_dead(s, ts);
4459         }
4460     }
4461 }
4462 
4463 #ifdef CONFIG_PROFILER
4464 
4465 /* avoid copy/paste errors */
4466 #define PROF_ADD(to, from, field)                       \
4467     do {                                                \
4468         (to)->field += qatomic_read(&((from)->field));  \
4469     } while (0)
4470 
4471 #define PROF_MAX(to, from, field)                                       \
4472     do {                                                                \
4473         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4474         if (val__ > (to)->field) {                                      \
4475             (to)->field = val__;                                        \
4476         }                                                               \
4477     } while (0)
4478 
4479 /* Pass in a zero'ed @prof */
4480 static inline
4481 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4482 {
4483     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4484     unsigned int i;
4485 
4486     for (i = 0; i < n_ctxs; i++) {
4487         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4488         const TCGProfile *orig = &s->prof;
4489 
4490         if (counters) {
4491             PROF_ADD(prof, orig, cpu_exec_time);
4492             PROF_ADD(prof, orig, tb_count1);
4493             PROF_ADD(prof, orig, tb_count);
4494             PROF_ADD(prof, orig, op_count);
4495             PROF_MAX(prof, orig, op_count_max);
4496             PROF_ADD(prof, orig, temp_count);
4497             PROF_MAX(prof, orig, temp_count_max);
4498             PROF_ADD(prof, orig, del_op_count);
4499             PROF_ADD(prof, orig, code_in_len);
4500             PROF_ADD(prof, orig, code_out_len);
4501             PROF_ADD(prof, orig, search_out_len);
4502             PROF_ADD(prof, orig, interm_time);
4503             PROF_ADD(prof, orig, code_time);
4504             PROF_ADD(prof, orig, la_time);
4505             PROF_ADD(prof, orig, opt_time);
4506             PROF_ADD(prof, orig, restore_count);
4507             PROF_ADD(prof, orig, restore_time);
4508         }
4509         if (table) {
4510             int i;
4511 
4512             for (i = 0; i < NB_OPS; i++) {
4513                 PROF_ADD(prof, orig, table_op_count[i]);
4514             }
4515         }
4516     }
4517 }
4518 
4519 #undef PROF_ADD
4520 #undef PROF_MAX
4521 
4522 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4523 {
4524     tcg_profile_snapshot(prof, true, false);
4525 }
4526 
4527 static void tcg_profile_snapshot_table(TCGProfile *prof)
4528 {
4529     tcg_profile_snapshot(prof, false, true);
4530 }
4531 
4532 void tcg_dump_op_count(void)
4533 {
4534     TCGProfile prof = {};
4535     int i;
4536 
4537     tcg_profile_snapshot_table(&prof);
4538     for (i = 0; i < NB_OPS; i++) {
4539         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4540                     prof.table_op_count[i]);
4541     }
4542 }
4543 
4544 int64_t tcg_cpu_exec_time(void)
4545 {
4546     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4547     unsigned int i;
4548     int64_t ret = 0;
4549 
4550     for (i = 0; i < n_ctxs; i++) {
4551         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4552         const TCGProfile *prof = &s->prof;
4553 
4554         ret += qatomic_read(&prof->cpu_exec_time);
4555     }
4556     return ret;
4557 }
4558 #else
4559 void tcg_dump_op_count(void)
4560 {
4561     qemu_printf("[TCG profiler not compiled]\n");
4562 }
4563 
4564 int64_t tcg_cpu_exec_time(void)
4565 {
4566     error_report("%s: TCG profiler not compiled", __func__);
4567     exit(EXIT_FAILURE);
4568 }
4569 #endif
4570 
4571 
4572 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4573 {
4574 #ifdef CONFIG_PROFILER
4575     TCGProfile *prof = &s->prof;
4576 #endif
4577     int i, num_insns;
4578     TCGOp *op;
4579 
4580 #ifdef CONFIG_PROFILER
4581     {
4582         int n = 0;
4583 
4584         QTAILQ_FOREACH(op, &s->ops, link) {
4585             n++;
4586         }
4587         qatomic_set(&prof->op_count, prof->op_count + n);
4588         if (n > prof->op_count_max) {
4589             qatomic_set(&prof->op_count_max, n);
4590         }
4591 
4592         n = s->nb_temps;
4593         qatomic_set(&prof->temp_count, prof->temp_count + n);
4594         if (n > prof->temp_count_max) {
4595             qatomic_set(&prof->temp_count_max, n);
4596         }
4597     }
4598 #endif
4599 
4600 #ifdef DEBUG_DISAS
4601     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4602                  && qemu_log_in_addr_range(tb->pc))) {
4603         FILE *logfile = qemu_log_lock();
4604         qemu_log("OP:\n");
4605         tcg_dump_ops(s, false);
4606         qemu_log("\n");
4607         qemu_log_unlock(logfile);
4608     }
4609 #endif
4610 
4611 #ifdef CONFIG_DEBUG_TCG
4612     /* Ensure all labels referenced have been emitted.  */
4613     {
4614         TCGLabel *l;
4615         bool error = false;
4616 
4617         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4618             if (unlikely(!l->present) && l->refs) {
4619                 qemu_log_mask(CPU_LOG_TB_OP,
4620                               "$L%d referenced but not present.\n", l->id);
4621                 error = true;
4622             }
4623         }
4624         assert(!error);
4625     }
4626 #endif
4627 
4628 #ifdef CONFIG_PROFILER
4629     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4630 #endif
4631 
4632 #ifdef USE_TCG_OPTIMIZATIONS
4633     tcg_optimize(s);
4634 #endif
4635 
4636 #ifdef CONFIG_PROFILER
4637     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4638     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4639 #endif
4640 
4641     reachable_code_pass(s);
4642     liveness_pass_1(s);
4643 
4644     if (s->nb_indirects > 0) {
4645 #ifdef DEBUG_DISAS
4646         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4647                      && qemu_log_in_addr_range(tb->pc))) {
4648             FILE *logfile = qemu_log_lock();
4649             qemu_log("OP before indirect lowering:\n");
4650             tcg_dump_ops(s, false);
4651             qemu_log("\n");
4652             qemu_log_unlock(logfile);
4653         }
4654 #endif
4655         /* Replace indirect temps with direct temps.  */
4656         if (liveness_pass_2(s)) {
4657             /* If changes were made, re-run liveness.  */
4658             liveness_pass_1(s);
4659         }
4660     }
4661 
4662 #ifdef CONFIG_PROFILER
4663     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4664 #endif
4665 
4666 #ifdef DEBUG_DISAS
4667     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4668                  && qemu_log_in_addr_range(tb->pc))) {
4669         FILE *logfile = qemu_log_lock();
4670         qemu_log("OP after optimization and liveness analysis:\n");
4671         tcg_dump_ops(s, true);
4672         qemu_log("\n");
4673         qemu_log_unlock(logfile);
4674     }
4675 #endif
4676 
4677     tcg_reg_alloc_start(s);
4678 
4679     /*
4680      * Reset the buffer pointers when restarting after overflow.
4681      * TODO: Move this into translate-all.c with the rest of the
4682      * buffer management.  Having only this done here is confusing.
4683      */
4684     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4685     s->code_ptr = s->code_buf;
4686 
4687 #ifdef TCG_TARGET_NEED_LDST_LABELS
4688     QSIMPLEQ_INIT(&s->ldst_labels);
4689 #endif
4690 #ifdef TCG_TARGET_NEED_POOL_LABELS
4691     s->pool_labels = NULL;
4692 #endif
4693 
4694     num_insns = -1;
4695     QTAILQ_FOREACH(op, &s->ops, link) {
4696         TCGOpcode opc = op->opc;
4697 
4698 #ifdef CONFIG_PROFILER
4699         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4700 #endif
4701 
4702         switch (opc) {
4703         case INDEX_op_mov_i32:
4704         case INDEX_op_mov_i64:
4705         case INDEX_op_mov_vec:
4706             tcg_reg_alloc_mov(s, op);
4707             break;
4708         case INDEX_op_dup_vec:
4709             tcg_reg_alloc_dup(s, op);
4710             break;
4711         case INDEX_op_insn_start:
4712             if (num_insns >= 0) {
4713                 size_t off = tcg_current_code_size(s);
4714                 s->gen_insn_end_off[num_insns] = off;
4715                 /* Assert that we do not overflow our stored offset.  */
4716                 assert(s->gen_insn_end_off[num_insns] == off);
4717             }
4718             num_insns++;
4719             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4720                 target_ulong a;
4721 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4722                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4723 #else
4724                 a = op->args[i];
4725 #endif
4726                 s->gen_insn_data[num_insns][i] = a;
4727             }
4728             break;
4729         case INDEX_op_discard:
4730             temp_dead(s, arg_temp(op->args[0]));
4731             break;
4732         case INDEX_op_set_label:
4733             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4734             tcg_out_label(s, arg_label(op->args[0]));
4735             break;
4736         case INDEX_op_call:
4737             tcg_reg_alloc_call(s, op);
4738             break;
4739         case INDEX_op_dup2_vec:
4740             if (tcg_reg_alloc_dup2(s, op)) {
4741                 break;
4742             }
4743             /* fall through */
4744         default:
4745             /* Sanity check that we've not introduced any unhandled opcodes. */
4746             tcg_debug_assert(tcg_op_supported(opc));
4747             /* Note: in order to speed up the code, it would be much
4748                faster to have specialized register allocator functions for
4749                some common argument patterns */
4750             tcg_reg_alloc_op(s, op);
4751             break;
4752         }
4753 #ifdef CONFIG_DEBUG_TCG
4754         check_regs(s);
4755 #endif
4756         /* Test for (pending) buffer overflow.  The assumption is that any
4757            one operation beginning below the high water mark cannot overrun
4758            the buffer completely.  Thus we can test for overflow after
4759            generating code without having to check during generation.  */
4760         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4761             return -1;
4762         }
4763         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4764         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4765             return -2;
4766         }
4767     }
4768     tcg_debug_assert(num_insns >= 0);
4769     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4770 
4771     /* Generate TB finalization at the end of block */
4772 #ifdef TCG_TARGET_NEED_LDST_LABELS
4773     i = tcg_out_ldst_finalize(s);
4774     if (i < 0) {
4775         return i;
4776     }
4777 #endif
4778 #ifdef TCG_TARGET_NEED_POOL_LABELS
4779     i = tcg_out_pool_finalize(s);
4780     if (i < 0) {
4781         return i;
4782     }
4783 #endif
4784     if (!tcg_resolve_relocs(s)) {
4785         return -2;
4786     }
4787 
4788 #ifndef CONFIG_TCG_INTERPRETER
4789     /* flush instruction cache */
4790     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4791                         (uintptr_t)s->code_buf,
4792                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4793 #endif
4794 
4795     return tcg_current_code_size(s);
4796 }
4797 
4798 #ifdef CONFIG_PROFILER
4799 void tcg_dump_info(void)
4800 {
4801     TCGProfile prof = {};
4802     const TCGProfile *s;
4803     int64_t tb_count;
4804     int64_t tb_div_count;
4805     int64_t tot;
4806 
4807     tcg_profile_snapshot_counters(&prof);
4808     s = &prof;
4809     tb_count = s->tb_count;
4810     tb_div_count = tb_count ? tb_count : 1;
4811     tot = s->interm_time + s->code_time;
4812 
4813     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4814                 tot, tot / 2.4e9);
4815     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4816                 " %0.1f%%)\n",
4817                 tb_count, s->tb_count1 - tb_count,
4818                 (double)(s->tb_count1 - s->tb_count)
4819                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4820     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4821                 (double)s->op_count / tb_div_count, s->op_count_max);
4822     qemu_printf("deleted ops/TB      %0.2f\n",
4823                 (double)s->del_op_count / tb_div_count);
4824     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4825                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4826     qemu_printf("avg host code/TB    %0.1f\n",
4827                 (double)s->code_out_len / tb_div_count);
4828     qemu_printf("avg search data/TB  %0.1f\n",
4829                 (double)s->search_out_len / tb_div_count);
4830 
4831     qemu_printf("cycles/op           %0.1f\n",
4832                 s->op_count ? (double)tot / s->op_count : 0);
4833     qemu_printf("cycles/in byte      %0.1f\n",
4834                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4835     qemu_printf("cycles/out byte     %0.1f\n",
4836                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4837     qemu_printf("cycles/search byte     %0.1f\n",
4838                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4839     if (tot == 0) {
4840         tot = 1;
4841     }
4842     qemu_printf("  gen_interm time   %0.1f%%\n",
4843                 (double)s->interm_time / tot * 100.0);
4844     qemu_printf("  gen_code time     %0.1f%%\n",
4845                 (double)s->code_time / tot * 100.0);
4846     qemu_printf("optim./code time    %0.1f%%\n",
4847                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4848                 * 100.0);
4849     qemu_printf("liveness/code time  %0.1f%%\n",
4850                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4851     qemu_printf("cpu_restore count   %" PRId64 "\n",
4852                 s->restore_count);
4853     qemu_printf("  avg cycles        %0.1f\n",
4854                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4855 }
4856 #else
4857 void tcg_dump_info(void)
4858 {
4859     qemu_printf("[TCG profiler not compiled]\n");
4860 }
4861 #endif
4862 
4863 #ifdef ELF_HOST_MACHINE
4864 /* In order to use this feature, the backend needs to do three things:
4865 
4866    (1) Define ELF_HOST_MACHINE to indicate both what value to
4867        put into the ELF image and to indicate support for the feature.
4868 
4869    (2) Define tcg_register_jit.  This should create a buffer containing
4870        the contents of a .debug_frame section that describes the post-
4871        prologue unwind info for the tcg machine.
4872 
4873    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4874 */
4875 
4876 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4877 typedef enum {
4878     JIT_NOACTION = 0,
4879     JIT_REGISTER_FN,
4880     JIT_UNREGISTER_FN
4881 } jit_actions_t;
4882 
4883 struct jit_code_entry {
4884     struct jit_code_entry *next_entry;
4885     struct jit_code_entry *prev_entry;
4886     const void *symfile_addr;
4887     uint64_t symfile_size;
4888 };
4889 
4890 struct jit_descriptor {
4891     uint32_t version;
4892     uint32_t action_flag;
4893     struct jit_code_entry *relevant_entry;
4894     struct jit_code_entry *first_entry;
4895 };
4896 
4897 void __jit_debug_register_code(void) __attribute__((noinline));
4898 void __jit_debug_register_code(void)
4899 {
4900     asm("");
4901 }
4902 
4903 /* Must statically initialize the version, because GDB may check
4904    the version before we can set it.  */
4905 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4906 
4907 /* End GDB interface.  */
4908 
4909 static int find_string(const char *strtab, const char *str)
4910 {
4911     const char *p = strtab + 1;
4912 
4913     while (1) {
4914         if (strcmp(p, str) == 0) {
4915             return p - strtab;
4916         }
4917         p += strlen(p) + 1;
4918     }
4919 }
4920 
4921 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4922                                  const void *debug_frame,
4923                                  size_t debug_frame_size)
4924 {
4925     struct __attribute__((packed)) DebugInfo {
4926         uint32_t  len;
4927         uint16_t  version;
4928         uint32_t  abbrev;
4929         uint8_t   ptr_size;
4930         uint8_t   cu_die;
4931         uint16_t  cu_lang;
4932         uintptr_t cu_low_pc;
4933         uintptr_t cu_high_pc;
4934         uint8_t   fn_die;
4935         char      fn_name[16];
4936         uintptr_t fn_low_pc;
4937         uintptr_t fn_high_pc;
4938         uint8_t   cu_eoc;
4939     };
4940 
4941     struct ElfImage {
4942         ElfW(Ehdr) ehdr;
4943         ElfW(Phdr) phdr;
4944         ElfW(Shdr) shdr[7];
4945         ElfW(Sym)  sym[2];
4946         struct DebugInfo di;
4947         uint8_t    da[24];
4948         char       str[80];
4949     };
4950 
4951     struct ElfImage *img;
4952 
4953     static const struct ElfImage img_template = {
4954         .ehdr = {
4955             .e_ident[EI_MAG0] = ELFMAG0,
4956             .e_ident[EI_MAG1] = ELFMAG1,
4957             .e_ident[EI_MAG2] = ELFMAG2,
4958             .e_ident[EI_MAG3] = ELFMAG3,
4959             .e_ident[EI_CLASS] = ELF_CLASS,
4960             .e_ident[EI_DATA] = ELF_DATA,
4961             .e_ident[EI_VERSION] = EV_CURRENT,
4962             .e_type = ET_EXEC,
4963             .e_machine = ELF_HOST_MACHINE,
4964             .e_version = EV_CURRENT,
4965             .e_phoff = offsetof(struct ElfImage, phdr),
4966             .e_shoff = offsetof(struct ElfImage, shdr),
4967             .e_ehsize = sizeof(ElfW(Shdr)),
4968             .e_phentsize = sizeof(ElfW(Phdr)),
4969             .e_phnum = 1,
4970             .e_shentsize = sizeof(ElfW(Shdr)),
4971             .e_shnum = ARRAY_SIZE(img->shdr),
4972             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4973 #ifdef ELF_HOST_FLAGS
4974             .e_flags = ELF_HOST_FLAGS,
4975 #endif
4976 #ifdef ELF_OSABI
4977             .e_ident[EI_OSABI] = ELF_OSABI,
4978 #endif
4979         },
4980         .phdr = {
4981             .p_type = PT_LOAD,
4982             .p_flags = PF_X,
4983         },
4984         .shdr = {
4985             [0] = { .sh_type = SHT_NULL },
4986             /* Trick: The contents of code_gen_buffer are not present in
4987                this fake ELF file; that got allocated elsewhere.  Therefore
4988                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4989                will not look for contents.  We can record any address.  */
4990             [1] = { /* .text */
4991                 .sh_type = SHT_NOBITS,
4992                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4993             },
4994             [2] = { /* .debug_info */
4995                 .sh_type = SHT_PROGBITS,
4996                 .sh_offset = offsetof(struct ElfImage, di),
4997                 .sh_size = sizeof(struct DebugInfo),
4998             },
4999             [3] = { /* .debug_abbrev */
5000                 .sh_type = SHT_PROGBITS,
5001                 .sh_offset = offsetof(struct ElfImage, da),
5002                 .sh_size = sizeof(img->da),
5003             },
5004             [4] = { /* .debug_frame */
5005                 .sh_type = SHT_PROGBITS,
5006                 .sh_offset = sizeof(struct ElfImage),
5007             },
5008             [5] = { /* .symtab */
5009                 .sh_type = SHT_SYMTAB,
5010                 .sh_offset = offsetof(struct ElfImage, sym),
5011                 .sh_size = sizeof(img->sym),
5012                 .sh_info = 1,
5013                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5014                 .sh_entsize = sizeof(ElfW(Sym)),
5015             },
5016             [6] = { /* .strtab */
5017                 .sh_type = SHT_STRTAB,
5018                 .sh_offset = offsetof(struct ElfImage, str),
5019                 .sh_size = sizeof(img->str),
5020             }
5021         },
5022         .sym = {
5023             [1] = { /* code_gen_buffer */
5024                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5025                 .st_shndx = 1,
5026             }
5027         },
5028         .di = {
5029             .len = sizeof(struct DebugInfo) - 4,
5030             .version = 2,
5031             .ptr_size = sizeof(void *),
5032             .cu_die = 1,
5033             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5034             .fn_die = 2,
5035             .fn_name = "code_gen_buffer"
5036         },
5037         .da = {
5038             1,          /* abbrev number (the cu) */
5039             0x11, 1,    /* DW_TAG_compile_unit, has children */
5040             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5041             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5042             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5043             0, 0,       /* end of abbrev */
5044             2,          /* abbrev number (the fn) */
5045             0x2e, 0,    /* DW_TAG_subprogram, no children */
5046             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5047             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5048             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5049             0, 0,       /* end of abbrev */
5050             0           /* no more abbrev */
5051         },
5052         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5053                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5054     };
5055 
5056     /* We only need a single jit entry; statically allocate it.  */
5057     static struct jit_code_entry one_entry;
5058 
5059     uintptr_t buf = (uintptr_t)buf_ptr;
5060     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5061     DebugFrameHeader *dfh;
5062 
5063     img = g_malloc(img_size);
5064     *img = img_template;
5065 
5066     img->phdr.p_vaddr = buf;
5067     img->phdr.p_paddr = buf;
5068     img->phdr.p_memsz = buf_size;
5069 
5070     img->shdr[1].sh_name = find_string(img->str, ".text");
5071     img->shdr[1].sh_addr = buf;
5072     img->shdr[1].sh_size = buf_size;
5073 
5074     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5075     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5076 
5077     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5078     img->shdr[4].sh_size = debug_frame_size;
5079 
5080     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5081     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5082 
5083     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5084     img->sym[1].st_value = buf;
5085     img->sym[1].st_size = buf_size;
5086 
5087     img->di.cu_low_pc = buf;
5088     img->di.cu_high_pc = buf + buf_size;
5089     img->di.fn_low_pc = buf;
5090     img->di.fn_high_pc = buf + buf_size;
5091 
5092     dfh = (DebugFrameHeader *)(img + 1);
5093     memcpy(dfh, debug_frame, debug_frame_size);
5094     dfh->fde.func_start = buf;
5095     dfh->fde.func_len = buf_size;
5096 
5097 #ifdef DEBUG_JIT
5098     /* Enable this block to be able to debug the ELF image file creation.
5099        One can use readelf, objdump, or other inspection utilities.  */
5100     {
5101         FILE *f = fopen("/tmp/qemu.jit", "w+b");
5102         if (f) {
5103             if (fwrite(img, img_size, 1, f) != img_size) {
5104                 /* Avoid stupid unused return value warning for fwrite.  */
5105             }
5106             fclose(f);
5107         }
5108     }
5109 #endif
5110 
5111     one_entry.symfile_addr = img;
5112     one_entry.symfile_size = img_size;
5113 
5114     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5115     __jit_debug_descriptor.relevant_entry = &one_entry;
5116     __jit_debug_descriptor.first_entry = &one_entry;
5117     __jit_debug_register_code();
5118 }
5119 #else
5120 /* No support for the feature.  Provide the entry point expected by exec.c,
5121    and implement the internal function we declared earlier.  */
5122 
5123 static void tcg_register_jit_int(const void *buf, size_t size,
5124                                  const void *debug_frame,
5125                                  size_t debug_frame_size)
5126 {
5127 }
5128 
5129 void tcg_register_jit(const void *buf, size_t buf_size)
5130 {
5131 }
5132 #endif /* ELF_HOST_MACHINE */
5133 
5134 #if !TCG_TARGET_MAYBE_vec
5135 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5136 {
5137     g_assert_not_reached();
5138 }
5139 #endif
5140