xref: /openbmc/qemu/tcg/tcg.c (revision 38dc1294)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/cpu-common.h"
46 #include "exec/exec-all.h"
47 
48 #include "tcg-op.h"
49 
50 #if UINTPTR_MAX == UINT32_MAX
51 # define ELF_CLASS  ELFCLASS32
52 #else
53 # define ELF_CLASS  ELFCLASS64
54 #endif
55 #ifdef HOST_WORDS_BIGENDIAN
56 # define ELF_DATA   ELFDATA2MSB
57 #else
58 # define ELF_DATA   ELFDATA2LSB
59 #endif
60 
61 #include "elf.h"
62 #include "exec/log.h"
63 #include "sysemu/sysemu.h"
64 
65 /* Forward declarations for functions declared in tcg-target.inc.c and
66    used here. */
67 static void tcg_target_init(TCGContext *s);
68 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
69 static void tcg_target_qemu_prologue(TCGContext *s);
70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
71                         intptr_t value, intptr_t addend);
72 
73 /* The CIE and FDE header definitions will be common to all hosts.  */
74 typedef struct {
75     uint32_t len __attribute__((aligned((sizeof(void *)))));
76     uint32_t id;
77     uint8_t version;
78     char augmentation[1];
79     uint8_t code_align;
80     uint8_t data_align;
81     uint8_t return_column;
82 } DebugFrameCIE;
83 
84 typedef struct QEMU_PACKED {
85     uint32_t len __attribute__((aligned((sizeof(void *)))));
86     uint32_t cie_offset;
87     uintptr_t func_start;
88     uintptr_t func_len;
89 } DebugFrameFDEHeader;
90 
91 typedef struct QEMU_PACKED {
92     DebugFrameCIE cie;
93     DebugFrameFDEHeader fde;
94 } DebugFrameHeader;
95 
96 static void tcg_register_jit_int(void *buf, size_t size,
97                                  const void *debug_frame,
98                                  size_t debug_frame_size)
99     __attribute__((unused));
100 
101 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
102 static const char *target_parse_constraint(TCGArgConstraint *ct,
103                                            const char *ct_str, TCGType type);
104 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
105                        intptr_t arg2);
106 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
107 static void tcg_out_movi(TCGContext *s, TCGType type,
108                          TCGReg ret, tcg_target_long arg);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
110                        const int *const_args);
111 #if TCG_TARGET_MAYBE_vec
112 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
113                             TCGReg dst, TCGReg src);
114 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
115                              TCGReg dst, TCGReg base, intptr_t offset);
116 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
117                              TCGReg dst, tcg_target_long arg);
118 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
119                            unsigned vece, const TCGArg *args,
120                            const int *const_args);
121 #else
122 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
123                                    TCGReg dst, TCGReg src)
124 {
125     g_assert_not_reached();
126 }
127 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
128                                     TCGReg dst, TCGReg base, intptr_t offset)
129 {
130     g_assert_not_reached();
131 }
132 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
133                                     TCGReg dst, tcg_target_long arg)
134 {
135     g_assert_not_reached();
136 }
137 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
138                                   unsigned vece, const TCGArg *args,
139                                   const int *const_args)
140 {
141     g_assert_not_reached();
142 }
143 #endif
144 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
145                        intptr_t arg2);
146 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
147                         TCGReg base, intptr_t ofs);
148 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
149 static int tcg_target_const_match(tcg_target_long val, TCGType type,
150                                   const TCGArgConstraint *arg_ct);
151 #ifdef TCG_TARGET_NEED_LDST_LABELS
152 static int tcg_out_ldst_finalize(TCGContext *s);
153 #endif
154 
155 #define TCG_HIGHWATER 1024
156 
157 static TCGContext **tcg_ctxs;
158 static unsigned int n_tcg_ctxs;
159 TCGv_env cpu_env = 0;
160 
161 struct tcg_region_tree {
162     QemuMutex lock;
163     GTree *tree;
164     /* padding to avoid false sharing is computed at run-time */
165 };
166 
167 /*
168  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
169  * dynamically allocate from as demand dictates. Given appropriate region
170  * sizing, this minimizes flushes even when some TCG threads generate a lot
171  * more code than others.
172  */
173 struct tcg_region_state {
174     QemuMutex lock;
175 
176     /* fields set at init time */
177     void *start;
178     void *start_aligned;
179     void *end;
180     size_t n;
181     size_t size; /* size of one region */
182     size_t stride; /* .size + guard size */
183 
184     /* fields protected by the lock */
185     size_t current; /* current region index */
186     size_t agg_size_full; /* aggregate size of full regions */
187 };
188 
189 static struct tcg_region_state region;
190 /*
191  * This is an array of struct tcg_region_tree's, with padding.
192  * We use void * to simplify the computation of region_trees[i]; each
193  * struct is found every tree_size bytes.
194  */
195 static void *region_trees;
196 static size_t tree_size;
197 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
198 static TCGRegSet tcg_target_call_clobber_regs;
199 
200 #if TCG_TARGET_INSN_UNIT_SIZE == 1
201 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
202 {
203     *s->code_ptr++ = v;
204 }
205 
206 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
207                                                       uint8_t v)
208 {
209     *p = v;
210 }
211 #endif
212 
213 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
214 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
215 {
216     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
217         *s->code_ptr++ = v;
218     } else {
219         tcg_insn_unit *p = s->code_ptr;
220         memcpy(p, &v, sizeof(v));
221         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
222     }
223 }
224 
225 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
226                                                        uint16_t v)
227 {
228     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
229         *p = v;
230     } else {
231         memcpy(p, &v, sizeof(v));
232     }
233 }
234 #endif
235 
236 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
237 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
238 {
239     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
240         *s->code_ptr++ = v;
241     } else {
242         tcg_insn_unit *p = s->code_ptr;
243         memcpy(p, &v, sizeof(v));
244         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
245     }
246 }
247 
248 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
249                                                        uint32_t v)
250 {
251     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
252         *p = v;
253     } else {
254         memcpy(p, &v, sizeof(v));
255     }
256 }
257 #endif
258 
259 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
260 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
261 {
262     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
263         *s->code_ptr++ = v;
264     } else {
265         tcg_insn_unit *p = s->code_ptr;
266         memcpy(p, &v, sizeof(v));
267         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
268     }
269 }
270 
271 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
272                                                        uint64_t v)
273 {
274     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
275         *p = v;
276     } else {
277         memcpy(p, &v, sizeof(v));
278     }
279 }
280 #endif
281 
282 /* label relocation processing */
283 
284 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
285                           TCGLabel *l, intptr_t addend)
286 {
287     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
288 
289     r->type = type;
290     r->ptr = code_ptr;
291     r->addend = addend;
292     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
293 }
294 
295 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
296 {
297     tcg_debug_assert(!l->has_value);
298     l->has_value = 1;
299     l->u.value_ptr = ptr;
300 }
301 
302 TCGLabel *gen_new_label(void)
303 {
304     TCGContext *s = tcg_ctx;
305     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
306 
307     memset(l, 0, sizeof(TCGLabel));
308     l->id = s->nb_labels++;
309     QSIMPLEQ_INIT(&l->relocs);
310 
311     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
312 
313     return l;
314 }
315 
316 static bool tcg_resolve_relocs(TCGContext *s)
317 {
318     TCGLabel *l;
319 
320     QSIMPLEQ_FOREACH(l, &s->labels, next) {
321         TCGRelocation *r;
322         uintptr_t value = l->u.value;
323 
324         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
325             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
326                 return false;
327             }
328         }
329     }
330     return true;
331 }
332 
333 static void set_jmp_reset_offset(TCGContext *s, int which)
334 {
335     size_t off = tcg_current_code_size(s);
336     s->tb_jmp_reset_offset[which] = off;
337     /* Make sure that we didn't overflow the stored offset.  */
338     assert(s->tb_jmp_reset_offset[which] == off);
339 }
340 
341 #include "tcg-target.inc.c"
342 
343 /* compare a pointer @ptr and a tb_tc @s */
344 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
345 {
346     if (ptr >= s->ptr + s->size) {
347         return 1;
348     } else if (ptr < s->ptr) {
349         return -1;
350     }
351     return 0;
352 }
353 
354 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
355 {
356     const struct tb_tc *a = ap;
357     const struct tb_tc *b = bp;
358 
359     /*
360      * When both sizes are set, we know this isn't a lookup.
361      * This is the most likely case: every TB must be inserted; lookups
362      * are a lot less frequent.
363      */
364     if (likely(a->size && b->size)) {
365         if (a->ptr > b->ptr) {
366             return 1;
367         } else if (a->ptr < b->ptr) {
368             return -1;
369         }
370         /* a->ptr == b->ptr should happen only on deletions */
371         g_assert(a->size == b->size);
372         return 0;
373     }
374     /*
375      * All lookups have either .size field set to 0.
376      * From the glib sources we see that @ap is always the lookup key. However
377      * the docs provide no guarantee, so we just mark this case as likely.
378      */
379     if (likely(a->size == 0)) {
380         return ptr_cmp_tb_tc(a->ptr, b);
381     }
382     return ptr_cmp_tb_tc(b->ptr, a);
383 }
384 
385 static void tcg_region_trees_init(void)
386 {
387     size_t i;
388 
389     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
390     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
391     for (i = 0; i < region.n; i++) {
392         struct tcg_region_tree *rt = region_trees + i * tree_size;
393 
394         qemu_mutex_init(&rt->lock);
395         rt->tree = g_tree_new(tb_tc_cmp);
396     }
397 }
398 
399 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
400 {
401     size_t region_idx;
402 
403     if (p < region.start_aligned) {
404         region_idx = 0;
405     } else {
406         ptrdiff_t offset = p - region.start_aligned;
407 
408         if (offset > region.stride * (region.n - 1)) {
409             region_idx = region.n - 1;
410         } else {
411             region_idx = offset / region.stride;
412         }
413     }
414     return region_trees + region_idx * tree_size;
415 }
416 
417 void tcg_tb_insert(TranslationBlock *tb)
418 {
419     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
420 
421     qemu_mutex_lock(&rt->lock);
422     g_tree_insert(rt->tree, &tb->tc, tb);
423     qemu_mutex_unlock(&rt->lock);
424 }
425 
426 void tcg_tb_remove(TranslationBlock *tb)
427 {
428     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
429 
430     qemu_mutex_lock(&rt->lock);
431     g_tree_remove(rt->tree, &tb->tc);
432     qemu_mutex_unlock(&rt->lock);
433 }
434 
435 /*
436  * Find the TB 'tb' such that
437  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
438  * Return NULL if not found.
439  */
440 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
441 {
442     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
443     TranslationBlock *tb;
444     struct tb_tc s = { .ptr = (void *)tc_ptr };
445 
446     qemu_mutex_lock(&rt->lock);
447     tb = g_tree_lookup(rt->tree, &s);
448     qemu_mutex_unlock(&rt->lock);
449     return tb;
450 }
451 
452 static void tcg_region_tree_lock_all(void)
453 {
454     size_t i;
455 
456     for (i = 0; i < region.n; i++) {
457         struct tcg_region_tree *rt = region_trees + i * tree_size;
458 
459         qemu_mutex_lock(&rt->lock);
460     }
461 }
462 
463 static void tcg_region_tree_unlock_all(void)
464 {
465     size_t i;
466 
467     for (i = 0; i < region.n; i++) {
468         struct tcg_region_tree *rt = region_trees + i * tree_size;
469 
470         qemu_mutex_unlock(&rt->lock);
471     }
472 }
473 
474 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
475 {
476     size_t i;
477 
478     tcg_region_tree_lock_all();
479     for (i = 0; i < region.n; i++) {
480         struct tcg_region_tree *rt = region_trees + i * tree_size;
481 
482         g_tree_foreach(rt->tree, func, user_data);
483     }
484     tcg_region_tree_unlock_all();
485 }
486 
487 size_t tcg_nb_tbs(void)
488 {
489     size_t nb_tbs = 0;
490     size_t i;
491 
492     tcg_region_tree_lock_all();
493     for (i = 0; i < region.n; i++) {
494         struct tcg_region_tree *rt = region_trees + i * tree_size;
495 
496         nb_tbs += g_tree_nnodes(rt->tree);
497     }
498     tcg_region_tree_unlock_all();
499     return nb_tbs;
500 }
501 
502 static void tcg_region_tree_reset_all(void)
503 {
504     size_t i;
505 
506     tcg_region_tree_lock_all();
507     for (i = 0; i < region.n; i++) {
508         struct tcg_region_tree *rt = region_trees + i * tree_size;
509 
510         /* Increment the refcount first so that destroy acts as a reset */
511         g_tree_ref(rt->tree);
512         g_tree_destroy(rt->tree);
513     }
514     tcg_region_tree_unlock_all();
515 }
516 
517 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
518 {
519     void *start, *end;
520 
521     start = region.start_aligned + curr_region * region.stride;
522     end = start + region.size;
523 
524     if (curr_region == 0) {
525         start = region.start;
526     }
527     if (curr_region == region.n - 1) {
528         end = region.end;
529     }
530 
531     *pstart = start;
532     *pend = end;
533 }
534 
535 static void tcg_region_assign(TCGContext *s, size_t curr_region)
536 {
537     void *start, *end;
538 
539     tcg_region_bounds(curr_region, &start, &end);
540 
541     s->code_gen_buffer = start;
542     s->code_gen_ptr = start;
543     s->code_gen_buffer_size = end - start;
544     s->code_gen_highwater = end - TCG_HIGHWATER;
545 }
546 
547 static bool tcg_region_alloc__locked(TCGContext *s)
548 {
549     if (region.current == region.n) {
550         return true;
551     }
552     tcg_region_assign(s, region.current);
553     region.current++;
554     return false;
555 }
556 
557 /*
558  * Request a new region once the one in use has filled up.
559  * Returns true on error.
560  */
561 static bool tcg_region_alloc(TCGContext *s)
562 {
563     bool err;
564     /* read the region size now; alloc__locked will overwrite it on success */
565     size_t size_full = s->code_gen_buffer_size;
566 
567     qemu_mutex_lock(&region.lock);
568     err = tcg_region_alloc__locked(s);
569     if (!err) {
570         region.agg_size_full += size_full - TCG_HIGHWATER;
571     }
572     qemu_mutex_unlock(&region.lock);
573     return err;
574 }
575 
576 /*
577  * Perform a context's first region allocation.
578  * This function does _not_ increment region.agg_size_full.
579  */
580 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
581 {
582     return tcg_region_alloc__locked(s);
583 }
584 
585 /* Call from a safe-work context */
586 void tcg_region_reset_all(void)
587 {
588     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
589     unsigned int i;
590 
591     qemu_mutex_lock(&region.lock);
592     region.current = 0;
593     region.agg_size_full = 0;
594 
595     for (i = 0; i < n_ctxs; i++) {
596         TCGContext *s = atomic_read(&tcg_ctxs[i]);
597         bool err = tcg_region_initial_alloc__locked(s);
598 
599         g_assert(!err);
600     }
601     qemu_mutex_unlock(&region.lock);
602 
603     tcg_region_tree_reset_all();
604 }
605 
606 #ifdef CONFIG_USER_ONLY
607 static size_t tcg_n_regions(void)
608 {
609     return 1;
610 }
611 #else
612 /*
613  * It is likely that some vCPUs will translate more code than others, so we
614  * first try to set more regions than max_cpus, with those regions being of
615  * reasonable size. If that's not possible we make do by evenly dividing
616  * the code_gen_buffer among the vCPUs.
617  */
618 static size_t tcg_n_regions(void)
619 {
620     size_t i;
621 
622     /* Use a single region if all we have is one vCPU thread */
623     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
624         return 1;
625     }
626 
627     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
628     for (i = 8; i > 0; i--) {
629         size_t regions_per_thread = i;
630         size_t region_size;
631 
632         region_size = tcg_init_ctx.code_gen_buffer_size;
633         region_size /= max_cpus * regions_per_thread;
634 
635         if (region_size >= 2 * 1024u * 1024) {
636             return max_cpus * regions_per_thread;
637         }
638     }
639     /* If we can't, then just allocate one region per vCPU thread */
640     return max_cpus;
641 }
642 #endif
643 
644 /*
645  * Initializes region partitioning.
646  *
647  * Called at init time from the parent thread (i.e. the one calling
648  * tcg_context_init), after the target's TCG globals have been set.
649  *
650  * Region partitioning works by splitting code_gen_buffer into separate regions,
651  * and then assigning regions to TCG threads so that the threads can translate
652  * code in parallel without synchronization.
653  *
654  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
655  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
656  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
657  * must have been parsed before calling this function, since it calls
658  * qemu_tcg_mttcg_enabled().
659  *
660  * In user-mode we use a single region.  Having multiple regions in user-mode
661  * is not supported, because the number of vCPU threads (recall that each thread
662  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
663  * OS, and usually this number is huge (tens of thousands is not uncommon).
664  * Thus, given this large bound on the number of vCPU threads and the fact
665  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
666  * that the availability of at least one region per vCPU thread.
667  *
668  * However, this user-mode limitation is unlikely to be a significant problem
669  * in practice. Multi-threaded guests share most if not all of their translated
670  * code, which makes parallel code generation less appealing than in softmmu.
671  */
672 void tcg_region_init(void)
673 {
674     void *buf = tcg_init_ctx.code_gen_buffer;
675     void *aligned;
676     size_t size = tcg_init_ctx.code_gen_buffer_size;
677     size_t page_size = qemu_real_host_page_size;
678     size_t region_size;
679     size_t n_regions;
680     size_t i;
681 
682     n_regions = tcg_n_regions();
683 
684     /* The first region will be 'aligned - buf' bytes larger than the others */
685     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
686     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
687     /*
688      * Make region_size a multiple of page_size, using aligned as the start.
689      * As a result of this we might end up with a few extra pages at the end of
690      * the buffer; we will assign those to the last region.
691      */
692     region_size = (size - (aligned - buf)) / n_regions;
693     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
694 
695     /* A region must have at least 2 pages; one code, one guard */
696     g_assert(region_size >= 2 * page_size);
697 
698     /* init the region struct */
699     qemu_mutex_init(&region.lock);
700     region.n = n_regions;
701     region.size = region_size - page_size;
702     region.stride = region_size;
703     region.start = buf;
704     region.start_aligned = aligned;
705     /* page-align the end, since its last page will be a guard page */
706     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
707     /* account for that last guard page */
708     region.end -= page_size;
709 
710     /* set guard pages */
711     for (i = 0; i < region.n; i++) {
712         void *start, *end;
713         int rc;
714 
715         tcg_region_bounds(i, &start, &end);
716         rc = qemu_mprotect_none(end, page_size);
717         g_assert(!rc);
718     }
719 
720     tcg_region_trees_init();
721 
722     /* In user-mode we support only one ctx, so do the initial allocation now */
723 #ifdef CONFIG_USER_ONLY
724     {
725         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
726 
727         g_assert(!err);
728     }
729 #endif
730 }
731 
732 /*
733  * All TCG threads except the parent (i.e. the one that called tcg_context_init
734  * and registered the target's TCG globals) must register with this function
735  * before initiating translation.
736  *
737  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
738  * of tcg_region_init() for the reasoning behind this.
739  *
740  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
741  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
742  * is not used anymore for translation once this function is called.
743  *
744  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
745  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
746  */
747 #ifdef CONFIG_USER_ONLY
748 void tcg_register_thread(void)
749 {
750     tcg_ctx = &tcg_init_ctx;
751 }
752 #else
753 void tcg_register_thread(void)
754 {
755     TCGContext *s = g_malloc(sizeof(*s));
756     unsigned int i, n;
757     bool err;
758 
759     *s = tcg_init_ctx;
760 
761     /* Relink mem_base.  */
762     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
763         if (tcg_init_ctx.temps[i].mem_base) {
764             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
765             tcg_debug_assert(b >= 0 && b < n);
766             s->temps[i].mem_base = &s->temps[b];
767         }
768     }
769 
770     /* Claim an entry in tcg_ctxs */
771     n = atomic_fetch_inc(&n_tcg_ctxs);
772     g_assert(n < max_cpus);
773     atomic_set(&tcg_ctxs[n], s);
774 
775     tcg_ctx = s;
776     qemu_mutex_lock(&region.lock);
777     err = tcg_region_initial_alloc__locked(tcg_ctx);
778     g_assert(!err);
779     qemu_mutex_unlock(&region.lock);
780 }
781 #endif /* !CONFIG_USER_ONLY */
782 
783 /*
784  * Returns the size (in bytes) of all translated code (i.e. from all regions)
785  * currently in the cache.
786  * See also: tcg_code_capacity()
787  * Do not confuse with tcg_current_code_size(); that one applies to a single
788  * TCG context.
789  */
790 size_t tcg_code_size(void)
791 {
792     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
793     unsigned int i;
794     size_t total;
795 
796     qemu_mutex_lock(&region.lock);
797     total = region.agg_size_full;
798     for (i = 0; i < n_ctxs; i++) {
799         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
800         size_t size;
801 
802         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
803         g_assert(size <= s->code_gen_buffer_size);
804         total += size;
805     }
806     qemu_mutex_unlock(&region.lock);
807     return total;
808 }
809 
810 /*
811  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
812  * regions.
813  * See also: tcg_code_size()
814  */
815 size_t tcg_code_capacity(void)
816 {
817     size_t guard_size, capacity;
818 
819     /* no need for synchronization; these variables are set at init time */
820     guard_size = region.stride - region.size;
821     capacity = region.end + guard_size - region.start;
822     capacity -= region.n * (guard_size + TCG_HIGHWATER);
823     return capacity;
824 }
825 
826 size_t tcg_tb_phys_invalidate_count(void)
827 {
828     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
829     unsigned int i;
830     size_t total = 0;
831 
832     for (i = 0; i < n_ctxs; i++) {
833         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
834 
835         total += atomic_read(&s->tb_phys_invalidate_count);
836     }
837     return total;
838 }
839 
840 /* pool based memory allocation */
841 void *tcg_malloc_internal(TCGContext *s, int size)
842 {
843     TCGPool *p;
844     int pool_size;
845 
846     if (size > TCG_POOL_CHUNK_SIZE) {
847         /* big malloc: insert a new pool (XXX: could optimize) */
848         p = g_malloc(sizeof(TCGPool) + size);
849         p->size = size;
850         p->next = s->pool_first_large;
851         s->pool_first_large = p;
852         return p->data;
853     } else {
854         p = s->pool_current;
855         if (!p) {
856             p = s->pool_first;
857             if (!p)
858                 goto new_pool;
859         } else {
860             if (!p->next) {
861             new_pool:
862                 pool_size = TCG_POOL_CHUNK_SIZE;
863                 p = g_malloc(sizeof(TCGPool) + pool_size);
864                 p->size = pool_size;
865                 p->next = NULL;
866                 if (s->pool_current)
867                     s->pool_current->next = p;
868                 else
869                     s->pool_first = p;
870             } else {
871                 p = p->next;
872             }
873         }
874     }
875     s->pool_current = p;
876     s->pool_cur = p->data + size;
877     s->pool_end = p->data + p->size;
878     return p->data;
879 }
880 
881 void tcg_pool_reset(TCGContext *s)
882 {
883     TCGPool *p, *t;
884     for (p = s->pool_first_large; p; p = t) {
885         t = p->next;
886         g_free(p);
887     }
888     s->pool_first_large = NULL;
889     s->pool_cur = s->pool_end = NULL;
890     s->pool_current = NULL;
891 }
892 
893 typedef struct TCGHelperInfo {
894     void *func;
895     const char *name;
896     unsigned flags;
897     unsigned sizemask;
898 } TCGHelperInfo;
899 
900 #include "exec/helper-proto.h"
901 
902 static const TCGHelperInfo all_helpers[] = {
903 #include "exec/helper-tcg.h"
904 };
905 static GHashTable *helper_table;
906 
907 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
908 static void process_op_defs(TCGContext *s);
909 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
910                                             TCGReg reg, const char *name);
911 
912 void tcg_context_init(TCGContext *s)
913 {
914     int op, total_args, n, i;
915     TCGOpDef *def;
916     TCGArgConstraint *args_ct;
917     int *sorted_args;
918     TCGTemp *ts;
919 
920     memset(s, 0, sizeof(*s));
921     s->nb_globals = 0;
922 
923     /* Count total number of arguments and allocate the corresponding
924        space */
925     total_args = 0;
926     for(op = 0; op < NB_OPS; op++) {
927         def = &tcg_op_defs[op];
928         n = def->nb_iargs + def->nb_oargs;
929         total_args += n;
930     }
931 
932     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
933     sorted_args = g_malloc(sizeof(int) * total_args);
934 
935     for(op = 0; op < NB_OPS; op++) {
936         def = &tcg_op_defs[op];
937         def->args_ct = args_ct;
938         def->sorted_args = sorted_args;
939         n = def->nb_iargs + def->nb_oargs;
940         sorted_args += n;
941         args_ct += n;
942     }
943 
944     /* Register helpers.  */
945     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
946     helper_table = g_hash_table_new(NULL, NULL);
947 
948     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
949         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
950                             (gpointer)&all_helpers[i]);
951     }
952 
953     tcg_target_init(s);
954     process_op_defs(s);
955 
956     /* Reverse the order of the saved registers, assuming they're all at
957        the start of tcg_target_reg_alloc_order.  */
958     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
959         int r = tcg_target_reg_alloc_order[n];
960         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
961             break;
962         }
963     }
964     for (i = 0; i < n; ++i) {
965         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
966     }
967     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
968         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
969     }
970 
971     tcg_ctx = s;
972     /*
973      * In user-mode we simply share the init context among threads, since we
974      * use a single region. See the documentation tcg_region_init() for the
975      * reasoning behind this.
976      * In softmmu we will have at most max_cpus TCG threads.
977      */
978 #ifdef CONFIG_USER_ONLY
979     tcg_ctxs = &tcg_ctx;
980     n_tcg_ctxs = 1;
981 #else
982     tcg_ctxs = g_new(TCGContext *, max_cpus);
983 #endif
984 
985     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
986     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
987     cpu_env = temp_tcgv_ptr(ts);
988 }
989 
990 /*
991  * Allocate TBs right before their corresponding translated code, making
992  * sure that TBs and code are on different cache lines.
993  */
994 TranslationBlock *tcg_tb_alloc(TCGContext *s)
995 {
996     uintptr_t align = qemu_icache_linesize;
997     TranslationBlock *tb;
998     void *next;
999 
1000  retry:
1001     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1002     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1003 
1004     if (unlikely(next > s->code_gen_highwater)) {
1005         if (tcg_region_alloc(s)) {
1006             return NULL;
1007         }
1008         goto retry;
1009     }
1010     atomic_set(&s->code_gen_ptr, next);
1011     s->data_gen_ptr = NULL;
1012     return tb;
1013 }
1014 
1015 void tcg_prologue_init(TCGContext *s)
1016 {
1017     size_t prologue_size, total_size;
1018     void *buf0, *buf1;
1019 
1020     /* Put the prologue at the beginning of code_gen_buffer.  */
1021     buf0 = s->code_gen_buffer;
1022     total_size = s->code_gen_buffer_size;
1023     s->code_ptr = buf0;
1024     s->code_buf = buf0;
1025     s->data_gen_ptr = NULL;
1026     s->code_gen_prologue = buf0;
1027 
1028     /* Compute a high-water mark, at which we voluntarily flush the buffer
1029        and start over.  The size here is arbitrary, significantly larger
1030        than we expect the code generation for any one opcode to require.  */
1031     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1032 
1033 #ifdef TCG_TARGET_NEED_POOL_LABELS
1034     s->pool_labels = NULL;
1035 #endif
1036 
1037     /* Generate the prologue.  */
1038     tcg_target_qemu_prologue(s);
1039 
1040 #ifdef TCG_TARGET_NEED_POOL_LABELS
1041     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1042     {
1043         int result = tcg_out_pool_finalize(s);
1044         tcg_debug_assert(result == 0);
1045     }
1046 #endif
1047 
1048     buf1 = s->code_ptr;
1049     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1050 
1051     /* Deduct the prologue from the buffer.  */
1052     prologue_size = tcg_current_code_size(s);
1053     s->code_gen_ptr = buf1;
1054     s->code_gen_buffer = buf1;
1055     s->code_buf = buf1;
1056     total_size -= prologue_size;
1057     s->code_gen_buffer_size = total_size;
1058 
1059     tcg_register_jit(s->code_gen_buffer, total_size);
1060 
1061 #ifdef DEBUG_DISAS
1062     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1063         qemu_log_lock();
1064         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1065         if (s->data_gen_ptr) {
1066             size_t code_size = s->data_gen_ptr - buf0;
1067             size_t data_size = prologue_size - code_size;
1068             size_t i;
1069 
1070             log_disas(buf0, code_size);
1071 
1072             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1073                 if (sizeof(tcg_target_ulong) == 8) {
1074                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1075                              (uintptr_t)s->data_gen_ptr + i,
1076                              *(uint64_t *)(s->data_gen_ptr + i));
1077                 } else {
1078                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1079                              (uintptr_t)s->data_gen_ptr + i,
1080                              *(uint32_t *)(s->data_gen_ptr + i));
1081                 }
1082             }
1083         } else {
1084             log_disas(buf0, prologue_size);
1085         }
1086         qemu_log("\n");
1087         qemu_log_flush();
1088         qemu_log_unlock();
1089     }
1090 #endif
1091 
1092     /* Assert that goto_ptr is implemented completely.  */
1093     if (TCG_TARGET_HAS_goto_ptr) {
1094         tcg_debug_assert(s->code_gen_epilogue != NULL);
1095     }
1096 }
1097 
1098 void tcg_func_start(TCGContext *s)
1099 {
1100     tcg_pool_reset(s);
1101     s->nb_temps = s->nb_globals;
1102 
1103     /* No temps have been previously allocated for size or locality.  */
1104     memset(s->free_temps, 0, sizeof(s->free_temps));
1105 
1106     s->nb_ops = 0;
1107     s->nb_labels = 0;
1108     s->current_frame_offset = s->frame_start;
1109 
1110 #ifdef CONFIG_DEBUG_TCG
1111     s->goto_tb_issue_mask = 0;
1112 #endif
1113 
1114     QTAILQ_INIT(&s->ops);
1115     QTAILQ_INIT(&s->free_ops);
1116     QSIMPLEQ_INIT(&s->labels);
1117 }
1118 
1119 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1120 {
1121     int n = s->nb_temps++;
1122     tcg_debug_assert(n < TCG_MAX_TEMPS);
1123     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1124 }
1125 
1126 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1127 {
1128     TCGTemp *ts;
1129 
1130     tcg_debug_assert(s->nb_globals == s->nb_temps);
1131     s->nb_globals++;
1132     ts = tcg_temp_alloc(s);
1133     ts->temp_global = 1;
1134 
1135     return ts;
1136 }
1137 
1138 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1139                                             TCGReg reg, const char *name)
1140 {
1141     TCGTemp *ts;
1142 
1143     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1144         tcg_abort();
1145     }
1146 
1147     ts = tcg_global_alloc(s);
1148     ts->base_type = type;
1149     ts->type = type;
1150     ts->fixed_reg = 1;
1151     ts->reg = reg;
1152     ts->name = name;
1153     tcg_regset_set_reg(s->reserved_regs, reg);
1154 
1155     return ts;
1156 }
1157 
1158 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1159 {
1160     s->frame_start = start;
1161     s->frame_end = start + size;
1162     s->frame_temp
1163         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1164 }
1165 
1166 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1167                                      intptr_t offset, const char *name)
1168 {
1169     TCGContext *s = tcg_ctx;
1170     TCGTemp *base_ts = tcgv_ptr_temp(base);
1171     TCGTemp *ts = tcg_global_alloc(s);
1172     int indirect_reg = 0, bigendian = 0;
1173 #ifdef HOST_WORDS_BIGENDIAN
1174     bigendian = 1;
1175 #endif
1176 
1177     if (!base_ts->fixed_reg) {
1178         /* We do not support double-indirect registers.  */
1179         tcg_debug_assert(!base_ts->indirect_reg);
1180         base_ts->indirect_base = 1;
1181         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1182                             ? 2 : 1);
1183         indirect_reg = 1;
1184     }
1185 
1186     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1187         TCGTemp *ts2 = tcg_global_alloc(s);
1188         char buf[64];
1189 
1190         ts->base_type = TCG_TYPE_I64;
1191         ts->type = TCG_TYPE_I32;
1192         ts->indirect_reg = indirect_reg;
1193         ts->mem_allocated = 1;
1194         ts->mem_base = base_ts;
1195         ts->mem_offset = offset + bigendian * 4;
1196         pstrcpy(buf, sizeof(buf), name);
1197         pstrcat(buf, sizeof(buf), "_0");
1198         ts->name = strdup(buf);
1199 
1200         tcg_debug_assert(ts2 == ts + 1);
1201         ts2->base_type = TCG_TYPE_I64;
1202         ts2->type = TCG_TYPE_I32;
1203         ts2->indirect_reg = indirect_reg;
1204         ts2->mem_allocated = 1;
1205         ts2->mem_base = base_ts;
1206         ts2->mem_offset = offset + (1 - bigendian) * 4;
1207         pstrcpy(buf, sizeof(buf), name);
1208         pstrcat(buf, sizeof(buf), "_1");
1209         ts2->name = strdup(buf);
1210     } else {
1211         ts->base_type = type;
1212         ts->type = type;
1213         ts->indirect_reg = indirect_reg;
1214         ts->mem_allocated = 1;
1215         ts->mem_base = base_ts;
1216         ts->mem_offset = offset;
1217         ts->name = name;
1218     }
1219     return ts;
1220 }
1221 
1222 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1223 {
1224     TCGContext *s = tcg_ctx;
1225     TCGTemp *ts;
1226     int idx, k;
1227 
1228     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1229     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1230     if (idx < TCG_MAX_TEMPS) {
1231         /* There is already an available temp with the right type.  */
1232         clear_bit(idx, s->free_temps[k].l);
1233 
1234         ts = &s->temps[idx];
1235         ts->temp_allocated = 1;
1236         tcg_debug_assert(ts->base_type == type);
1237         tcg_debug_assert(ts->temp_local == temp_local);
1238     } else {
1239         ts = tcg_temp_alloc(s);
1240         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1241             TCGTemp *ts2 = tcg_temp_alloc(s);
1242 
1243             ts->base_type = type;
1244             ts->type = TCG_TYPE_I32;
1245             ts->temp_allocated = 1;
1246             ts->temp_local = temp_local;
1247 
1248             tcg_debug_assert(ts2 == ts + 1);
1249             ts2->base_type = TCG_TYPE_I64;
1250             ts2->type = TCG_TYPE_I32;
1251             ts2->temp_allocated = 1;
1252             ts2->temp_local = temp_local;
1253         } else {
1254             ts->base_type = type;
1255             ts->type = type;
1256             ts->temp_allocated = 1;
1257             ts->temp_local = temp_local;
1258         }
1259     }
1260 
1261 #if defined(CONFIG_DEBUG_TCG)
1262     s->temps_in_use++;
1263 #endif
1264     return ts;
1265 }
1266 
1267 TCGv_vec tcg_temp_new_vec(TCGType type)
1268 {
1269     TCGTemp *t;
1270 
1271 #ifdef CONFIG_DEBUG_TCG
1272     switch (type) {
1273     case TCG_TYPE_V64:
1274         assert(TCG_TARGET_HAS_v64);
1275         break;
1276     case TCG_TYPE_V128:
1277         assert(TCG_TARGET_HAS_v128);
1278         break;
1279     case TCG_TYPE_V256:
1280         assert(TCG_TARGET_HAS_v256);
1281         break;
1282     default:
1283         g_assert_not_reached();
1284     }
1285 #endif
1286 
1287     t = tcg_temp_new_internal(type, 0);
1288     return temp_tcgv_vec(t);
1289 }
1290 
1291 /* Create a new temp of the same type as an existing temp.  */
1292 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1293 {
1294     TCGTemp *t = tcgv_vec_temp(match);
1295 
1296     tcg_debug_assert(t->temp_allocated != 0);
1297 
1298     t = tcg_temp_new_internal(t->base_type, 0);
1299     return temp_tcgv_vec(t);
1300 }
1301 
1302 void tcg_temp_free_internal(TCGTemp *ts)
1303 {
1304     TCGContext *s = tcg_ctx;
1305     int k, idx;
1306 
1307 #if defined(CONFIG_DEBUG_TCG)
1308     s->temps_in_use--;
1309     if (s->temps_in_use < 0) {
1310         fprintf(stderr, "More temporaries freed than allocated!\n");
1311     }
1312 #endif
1313 
1314     tcg_debug_assert(ts->temp_global == 0);
1315     tcg_debug_assert(ts->temp_allocated != 0);
1316     ts->temp_allocated = 0;
1317 
1318     idx = temp_idx(ts);
1319     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1320     set_bit(idx, s->free_temps[k].l);
1321 }
1322 
1323 TCGv_i32 tcg_const_i32(int32_t val)
1324 {
1325     TCGv_i32 t0;
1326     t0 = tcg_temp_new_i32();
1327     tcg_gen_movi_i32(t0, val);
1328     return t0;
1329 }
1330 
1331 TCGv_i64 tcg_const_i64(int64_t val)
1332 {
1333     TCGv_i64 t0;
1334     t0 = tcg_temp_new_i64();
1335     tcg_gen_movi_i64(t0, val);
1336     return t0;
1337 }
1338 
1339 TCGv_i32 tcg_const_local_i32(int32_t val)
1340 {
1341     TCGv_i32 t0;
1342     t0 = tcg_temp_local_new_i32();
1343     tcg_gen_movi_i32(t0, val);
1344     return t0;
1345 }
1346 
1347 TCGv_i64 tcg_const_local_i64(int64_t val)
1348 {
1349     TCGv_i64 t0;
1350     t0 = tcg_temp_local_new_i64();
1351     tcg_gen_movi_i64(t0, val);
1352     return t0;
1353 }
1354 
1355 #if defined(CONFIG_DEBUG_TCG)
1356 void tcg_clear_temp_count(void)
1357 {
1358     TCGContext *s = tcg_ctx;
1359     s->temps_in_use = 0;
1360 }
1361 
1362 int tcg_check_temp_count(void)
1363 {
1364     TCGContext *s = tcg_ctx;
1365     if (s->temps_in_use) {
1366         /* Clear the count so that we don't give another
1367          * warning immediately next time around.
1368          */
1369         s->temps_in_use = 0;
1370         return 1;
1371     }
1372     return 0;
1373 }
1374 #endif
1375 
1376 /* Return true if OP may appear in the opcode stream.
1377    Test the runtime variable that controls each opcode.  */
1378 bool tcg_op_supported(TCGOpcode op)
1379 {
1380     const bool have_vec
1381         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1382 
1383     switch (op) {
1384     case INDEX_op_discard:
1385     case INDEX_op_set_label:
1386     case INDEX_op_call:
1387     case INDEX_op_br:
1388     case INDEX_op_mb:
1389     case INDEX_op_insn_start:
1390     case INDEX_op_exit_tb:
1391     case INDEX_op_goto_tb:
1392     case INDEX_op_qemu_ld_i32:
1393     case INDEX_op_qemu_st_i32:
1394     case INDEX_op_qemu_ld_i64:
1395     case INDEX_op_qemu_st_i64:
1396         return true;
1397 
1398     case INDEX_op_goto_ptr:
1399         return TCG_TARGET_HAS_goto_ptr;
1400 
1401     case INDEX_op_mov_i32:
1402     case INDEX_op_movi_i32:
1403     case INDEX_op_setcond_i32:
1404     case INDEX_op_brcond_i32:
1405     case INDEX_op_ld8u_i32:
1406     case INDEX_op_ld8s_i32:
1407     case INDEX_op_ld16u_i32:
1408     case INDEX_op_ld16s_i32:
1409     case INDEX_op_ld_i32:
1410     case INDEX_op_st8_i32:
1411     case INDEX_op_st16_i32:
1412     case INDEX_op_st_i32:
1413     case INDEX_op_add_i32:
1414     case INDEX_op_sub_i32:
1415     case INDEX_op_mul_i32:
1416     case INDEX_op_and_i32:
1417     case INDEX_op_or_i32:
1418     case INDEX_op_xor_i32:
1419     case INDEX_op_shl_i32:
1420     case INDEX_op_shr_i32:
1421     case INDEX_op_sar_i32:
1422         return true;
1423 
1424     case INDEX_op_movcond_i32:
1425         return TCG_TARGET_HAS_movcond_i32;
1426     case INDEX_op_div_i32:
1427     case INDEX_op_divu_i32:
1428         return TCG_TARGET_HAS_div_i32;
1429     case INDEX_op_rem_i32:
1430     case INDEX_op_remu_i32:
1431         return TCG_TARGET_HAS_rem_i32;
1432     case INDEX_op_div2_i32:
1433     case INDEX_op_divu2_i32:
1434         return TCG_TARGET_HAS_div2_i32;
1435     case INDEX_op_rotl_i32:
1436     case INDEX_op_rotr_i32:
1437         return TCG_TARGET_HAS_rot_i32;
1438     case INDEX_op_deposit_i32:
1439         return TCG_TARGET_HAS_deposit_i32;
1440     case INDEX_op_extract_i32:
1441         return TCG_TARGET_HAS_extract_i32;
1442     case INDEX_op_sextract_i32:
1443         return TCG_TARGET_HAS_sextract_i32;
1444     case INDEX_op_extract2_i32:
1445         return TCG_TARGET_HAS_extract2_i32;
1446     case INDEX_op_add2_i32:
1447         return TCG_TARGET_HAS_add2_i32;
1448     case INDEX_op_sub2_i32:
1449         return TCG_TARGET_HAS_sub2_i32;
1450     case INDEX_op_mulu2_i32:
1451         return TCG_TARGET_HAS_mulu2_i32;
1452     case INDEX_op_muls2_i32:
1453         return TCG_TARGET_HAS_muls2_i32;
1454     case INDEX_op_muluh_i32:
1455         return TCG_TARGET_HAS_muluh_i32;
1456     case INDEX_op_mulsh_i32:
1457         return TCG_TARGET_HAS_mulsh_i32;
1458     case INDEX_op_ext8s_i32:
1459         return TCG_TARGET_HAS_ext8s_i32;
1460     case INDEX_op_ext16s_i32:
1461         return TCG_TARGET_HAS_ext16s_i32;
1462     case INDEX_op_ext8u_i32:
1463         return TCG_TARGET_HAS_ext8u_i32;
1464     case INDEX_op_ext16u_i32:
1465         return TCG_TARGET_HAS_ext16u_i32;
1466     case INDEX_op_bswap16_i32:
1467         return TCG_TARGET_HAS_bswap16_i32;
1468     case INDEX_op_bswap32_i32:
1469         return TCG_TARGET_HAS_bswap32_i32;
1470     case INDEX_op_not_i32:
1471         return TCG_TARGET_HAS_not_i32;
1472     case INDEX_op_neg_i32:
1473         return TCG_TARGET_HAS_neg_i32;
1474     case INDEX_op_andc_i32:
1475         return TCG_TARGET_HAS_andc_i32;
1476     case INDEX_op_orc_i32:
1477         return TCG_TARGET_HAS_orc_i32;
1478     case INDEX_op_eqv_i32:
1479         return TCG_TARGET_HAS_eqv_i32;
1480     case INDEX_op_nand_i32:
1481         return TCG_TARGET_HAS_nand_i32;
1482     case INDEX_op_nor_i32:
1483         return TCG_TARGET_HAS_nor_i32;
1484     case INDEX_op_clz_i32:
1485         return TCG_TARGET_HAS_clz_i32;
1486     case INDEX_op_ctz_i32:
1487         return TCG_TARGET_HAS_ctz_i32;
1488     case INDEX_op_ctpop_i32:
1489         return TCG_TARGET_HAS_ctpop_i32;
1490 
1491     case INDEX_op_brcond2_i32:
1492     case INDEX_op_setcond2_i32:
1493         return TCG_TARGET_REG_BITS == 32;
1494 
1495     case INDEX_op_mov_i64:
1496     case INDEX_op_movi_i64:
1497     case INDEX_op_setcond_i64:
1498     case INDEX_op_brcond_i64:
1499     case INDEX_op_ld8u_i64:
1500     case INDEX_op_ld8s_i64:
1501     case INDEX_op_ld16u_i64:
1502     case INDEX_op_ld16s_i64:
1503     case INDEX_op_ld32u_i64:
1504     case INDEX_op_ld32s_i64:
1505     case INDEX_op_ld_i64:
1506     case INDEX_op_st8_i64:
1507     case INDEX_op_st16_i64:
1508     case INDEX_op_st32_i64:
1509     case INDEX_op_st_i64:
1510     case INDEX_op_add_i64:
1511     case INDEX_op_sub_i64:
1512     case INDEX_op_mul_i64:
1513     case INDEX_op_and_i64:
1514     case INDEX_op_or_i64:
1515     case INDEX_op_xor_i64:
1516     case INDEX_op_shl_i64:
1517     case INDEX_op_shr_i64:
1518     case INDEX_op_sar_i64:
1519     case INDEX_op_ext_i32_i64:
1520     case INDEX_op_extu_i32_i64:
1521         return TCG_TARGET_REG_BITS == 64;
1522 
1523     case INDEX_op_movcond_i64:
1524         return TCG_TARGET_HAS_movcond_i64;
1525     case INDEX_op_div_i64:
1526     case INDEX_op_divu_i64:
1527         return TCG_TARGET_HAS_div_i64;
1528     case INDEX_op_rem_i64:
1529     case INDEX_op_remu_i64:
1530         return TCG_TARGET_HAS_rem_i64;
1531     case INDEX_op_div2_i64:
1532     case INDEX_op_divu2_i64:
1533         return TCG_TARGET_HAS_div2_i64;
1534     case INDEX_op_rotl_i64:
1535     case INDEX_op_rotr_i64:
1536         return TCG_TARGET_HAS_rot_i64;
1537     case INDEX_op_deposit_i64:
1538         return TCG_TARGET_HAS_deposit_i64;
1539     case INDEX_op_extract_i64:
1540         return TCG_TARGET_HAS_extract_i64;
1541     case INDEX_op_sextract_i64:
1542         return TCG_TARGET_HAS_sextract_i64;
1543     case INDEX_op_extract2_i64:
1544         return TCG_TARGET_HAS_extract2_i64;
1545     case INDEX_op_extrl_i64_i32:
1546         return TCG_TARGET_HAS_extrl_i64_i32;
1547     case INDEX_op_extrh_i64_i32:
1548         return TCG_TARGET_HAS_extrh_i64_i32;
1549     case INDEX_op_ext8s_i64:
1550         return TCG_TARGET_HAS_ext8s_i64;
1551     case INDEX_op_ext16s_i64:
1552         return TCG_TARGET_HAS_ext16s_i64;
1553     case INDEX_op_ext32s_i64:
1554         return TCG_TARGET_HAS_ext32s_i64;
1555     case INDEX_op_ext8u_i64:
1556         return TCG_TARGET_HAS_ext8u_i64;
1557     case INDEX_op_ext16u_i64:
1558         return TCG_TARGET_HAS_ext16u_i64;
1559     case INDEX_op_ext32u_i64:
1560         return TCG_TARGET_HAS_ext32u_i64;
1561     case INDEX_op_bswap16_i64:
1562         return TCG_TARGET_HAS_bswap16_i64;
1563     case INDEX_op_bswap32_i64:
1564         return TCG_TARGET_HAS_bswap32_i64;
1565     case INDEX_op_bswap64_i64:
1566         return TCG_TARGET_HAS_bswap64_i64;
1567     case INDEX_op_not_i64:
1568         return TCG_TARGET_HAS_not_i64;
1569     case INDEX_op_neg_i64:
1570         return TCG_TARGET_HAS_neg_i64;
1571     case INDEX_op_andc_i64:
1572         return TCG_TARGET_HAS_andc_i64;
1573     case INDEX_op_orc_i64:
1574         return TCG_TARGET_HAS_orc_i64;
1575     case INDEX_op_eqv_i64:
1576         return TCG_TARGET_HAS_eqv_i64;
1577     case INDEX_op_nand_i64:
1578         return TCG_TARGET_HAS_nand_i64;
1579     case INDEX_op_nor_i64:
1580         return TCG_TARGET_HAS_nor_i64;
1581     case INDEX_op_clz_i64:
1582         return TCG_TARGET_HAS_clz_i64;
1583     case INDEX_op_ctz_i64:
1584         return TCG_TARGET_HAS_ctz_i64;
1585     case INDEX_op_ctpop_i64:
1586         return TCG_TARGET_HAS_ctpop_i64;
1587     case INDEX_op_add2_i64:
1588         return TCG_TARGET_HAS_add2_i64;
1589     case INDEX_op_sub2_i64:
1590         return TCG_TARGET_HAS_sub2_i64;
1591     case INDEX_op_mulu2_i64:
1592         return TCG_TARGET_HAS_mulu2_i64;
1593     case INDEX_op_muls2_i64:
1594         return TCG_TARGET_HAS_muls2_i64;
1595     case INDEX_op_muluh_i64:
1596         return TCG_TARGET_HAS_muluh_i64;
1597     case INDEX_op_mulsh_i64:
1598         return TCG_TARGET_HAS_mulsh_i64;
1599 
1600     case INDEX_op_mov_vec:
1601     case INDEX_op_dup_vec:
1602     case INDEX_op_dupi_vec:
1603     case INDEX_op_dupm_vec:
1604     case INDEX_op_ld_vec:
1605     case INDEX_op_st_vec:
1606     case INDEX_op_add_vec:
1607     case INDEX_op_sub_vec:
1608     case INDEX_op_and_vec:
1609     case INDEX_op_or_vec:
1610     case INDEX_op_xor_vec:
1611     case INDEX_op_cmp_vec:
1612         return have_vec;
1613     case INDEX_op_dup2_vec:
1614         return have_vec && TCG_TARGET_REG_BITS == 32;
1615     case INDEX_op_not_vec:
1616         return have_vec && TCG_TARGET_HAS_not_vec;
1617     case INDEX_op_neg_vec:
1618         return have_vec && TCG_TARGET_HAS_neg_vec;
1619     case INDEX_op_abs_vec:
1620         return have_vec && TCG_TARGET_HAS_abs_vec;
1621     case INDEX_op_andc_vec:
1622         return have_vec && TCG_TARGET_HAS_andc_vec;
1623     case INDEX_op_orc_vec:
1624         return have_vec && TCG_TARGET_HAS_orc_vec;
1625     case INDEX_op_mul_vec:
1626         return have_vec && TCG_TARGET_HAS_mul_vec;
1627     case INDEX_op_shli_vec:
1628     case INDEX_op_shri_vec:
1629     case INDEX_op_sari_vec:
1630         return have_vec && TCG_TARGET_HAS_shi_vec;
1631     case INDEX_op_shls_vec:
1632     case INDEX_op_shrs_vec:
1633     case INDEX_op_sars_vec:
1634         return have_vec && TCG_TARGET_HAS_shs_vec;
1635     case INDEX_op_shlv_vec:
1636     case INDEX_op_shrv_vec:
1637     case INDEX_op_sarv_vec:
1638         return have_vec && TCG_TARGET_HAS_shv_vec;
1639     case INDEX_op_ssadd_vec:
1640     case INDEX_op_usadd_vec:
1641     case INDEX_op_sssub_vec:
1642     case INDEX_op_ussub_vec:
1643         return have_vec && TCG_TARGET_HAS_sat_vec;
1644     case INDEX_op_smin_vec:
1645     case INDEX_op_umin_vec:
1646     case INDEX_op_smax_vec:
1647     case INDEX_op_umax_vec:
1648         return have_vec && TCG_TARGET_HAS_minmax_vec;
1649     case INDEX_op_bitsel_vec:
1650         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1651 
1652     default:
1653         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1654         return true;
1655     }
1656 }
1657 
1658 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1659    and endian swap. Maybe it would be better to do the alignment
1660    and endian swap in tcg_reg_alloc_call(). */
1661 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1662 {
1663     int i, real_args, nb_rets, pi;
1664     unsigned sizemask, flags;
1665     TCGHelperInfo *info;
1666     TCGOp *op;
1667 
1668     info = g_hash_table_lookup(helper_table, (gpointer)func);
1669     flags = info->flags;
1670     sizemask = info->sizemask;
1671 
1672 #if defined(__sparc__) && !defined(__arch64__) \
1673     && !defined(CONFIG_TCG_INTERPRETER)
1674     /* We have 64-bit values in one register, but need to pass as two
1675        separate parameters.  Split them.  */
1676     int orig_sizemask = sizemask;
1677     int orig_nargs = nargs;
1678     TCGv_i64 retl, reth;
1679     TCGTemp *split_args[MAX_OPC_PARAM];
1680 
1681     retl = NULL;
1682     reth = NULL;
1683     if (sizemask != 0) {
1684         for (i = real_args = 0; i < nargs; ++i) {
1685             int is_64bit = sizemask & (1 << (i+1)*2);
1686             if (is_64bit) {
1687                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1688                 TCGv_i32 h = tcg_temp_new_i32();
1689                 TCGv_i32 l = tcg_temp_new_i32();
1690                 tcg_gen_extr_i64_i32(l, h, orig);
1691                 split_args[real_args++] = tcgv_i32_temp(h);
1692                 split_args[real_args++] = tcgv_i32_temp(l);
1693             } else {
1694                 split_args[real_args++] = args[i];
1695             }
1696         }
1697         nargs = real_args;
1698         args = split_args;
1699         sizemask = 0;
1700     }
1701 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1702     for (i = 0; i < nargs; ++i) {
1703         int is_64bit = sizemask & (1 << (i+1)*2);
1704         int is_signed = sizemask & (2 << (i+1)*2);
1705         if (!is_64bit) {
1706             TCGv_i64 temp = tcg_temp_new_i64();
1707             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1708             if (is_signed) {
1709                 tcg_gen_ext32s_i64(temp, orig);
1710             } else {
1711                 tcg_gen_ext32u_i64(temp, orig);
1712             }
1713             args[i] = tcgv_i64_temp(temp);
1714         }
1715     }
1716 #endif /* TCG_TARGET_EXTEND_ARGS */
1717 
1718     op = tcg_emit_op(INDEX_op_call);
1719 
1720     pi = 0;
1721     if (ret != NULL) {
1722 #if defined(__sparc__) && !defined(__arch64__) \
1723     && !defined(CONFIG_TCG_INTERPRETER)
1724         if (orig_sizemask & 1) {
1725             /* The 32-bit ABI is going to return the 64-bit value in
1726                the %o0/%o1 register pair.  Prepare for this by using
1727                two return temporaries, and reassemble below.  */
1728             retl = tcg_temp_new_i64();
1729             reth = tcg_temp_new_i64();
1730             op->args[pi++] = tcgv_i64_arg(reth);
1731             op->args[pi++] = tcgv_i64_arg(retl);
1732             nb_rets = 2;
1733         } else {
1734             op->args[pi++] = temp_arg(ret);
1735             nb_rets = 1;
1736         }
1737 #else
1738         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1739 #ifdef HOST_WORDS_BIGENDIAN
1740             op->args[pi++] = temp_arg(ret + 1);
1741             op->args[pi++] = temp_arg(ret);
1742 #else
1743             op->args[pi++] = temp_arg(ret);
1744             op->args[pi++] = temp_arg(ret + 1);
1745 #endif
1746             nb_rets = 2;
1747         } else {
1748             op->args[pi++] = temp_arg(ret);
1749             nb_rets = 1;
1750         }
1751 #endif
1752     } else {
1753         nb_rets = 0;
1754     }
1755     TCGOP_CALLO(op) = nb_rets;
1756 
1757     real_args = 0;
1758     for (i = 0; i < nargs; i++) {
1759         int is_64bit = sizemask & (1 << (i+1)*2);
1760         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1761 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1762             /* some targets want aligned 64 bit args */
1763             if (real_args & 1) {
1764                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1765                 real_args++;
1766             }
1767 #endif
1768            /* If stack grows up, then we will be placing successive
1769               arguments at lower addresses, which means we need to
1770               reverse the order compared to how we would normally
1771               treat either big or little-endian.  For those arguments
1772               that will wind up in registers, this still works for
1773               HPPA (the only current STACK_GROWSUP target) since the
1774               argument registers are *also* allocated in decreasing
1775               order.  If another such target is added, this logic may
1776               have to get more complicated to differentiate between
1777               stack arguments and register arguments.  */
1778 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1779             op->args[pi++] = temp_arg(args[i] + 1);
1780             op->args[pi++] = temp_arg(args[i]);
1781 #else
1782             op->args[pi++] = temp_arg(args[i]);
1783             op->args[pi++] = temp_arg(args[i] + 1);
1784 #endif
1785             real_args += 2;
1786             continue;
1787         }
1788 
1789         op->args[pi++] = temp_arg(args[i]);
1790         real_args++;
1791     }
1792     op->args[pi++] = (uintptr_t)func;
1793     op->args[pi++] = flags;
1794     TCGOP_CALLI(op) = real_args;
1795 
1796     /* Make sure the fields didn't overflow.  */
1797     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1798     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1799 
1800 #if defined(__sparc__) && !defined(__arch64__) \
1801     && !defined(CONFIG_TCG_INTERPRETER)
1802     /* Free all of the parts we allocated above.  */
1803     for (i = real_args = 0; i < orig_nargs; ++i) {
1804         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1805         if (is_64bit) {
1806             tcg_temp_free_internal(args[real_args++]);
1807             tcg_temp_free_internal(args[real_args++]);
1808         } else {
1809             real_args++;
1810         }
1811     }
1812     if (orig_sizemask & 1) {
1813         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1814            Note that describing these as TCGv_i64 eliminates an unnecessary
1815            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1816         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1817         tcg_temp_free_i64(retl);
1818         tcg_temp_free_i64(reth);
1819     }
1820 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1821     for (i = 0; i < nargs; ++i) {
1822         int is_64bit = sizemask & (1 << (i+1)*2);
1823         if (!is_64bit) {
1824             tcg_temp_free_internal(args[i]);
1825         }
1826     }
1827 #endif /* TCG_TARGET_EXTEND_ARGS */
1828 }
1829 
1830 static void tcg_reg_alloc_start(TCGContext *s)
1831 {
1832     int i, n;
1833     TCGTemp *ts;
1834 
1835     for (i = 0, n = s->nb_globals; i < n; i++) {
1836         ts = &s->temps[i];
1837         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1838     }
1839     for (n = s->nb_temps; i < n; i++) {
1840         ts = &s->temps[i];
1841         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1842         ts->mem_allocated = 0;
1843         ts->fixed_reg = 0;
1844     }
1845 
1846     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1847 }
1848 
1849 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1850                                  TCGTemp *ts)
1851 {
1852     int idx = temp_idx(ts);
1853 
1854     if (ts->temp_global) {
1855         pstrcpy(buf, buf_size, ts->name);
1856     } else if (ts->temp_local) {
1857         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1858     } else {
1859         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1860     }
1861     return buf;
1862 }
1863 
1864 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1865                              int buf_size, TCGArg arg)
1866 {
1867     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1868 }
1869 
1870 /* Find helper name.  */
1871 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1872 {
1873     const char *ret = NULL;
1874     if (helper_table) {
1875         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1876         if (info) {
1877             ret = info->name;
1878         }
1879     }
1880     return ret;
1881 }
1882 
1883 static const char * const cond_name[] =
1884 {
1885     [TCG_COND_NEVER] = "never",
1886     [TCG_COND_ALWAYS] = "always",
1887     [TCG_COND_EQ] = "eq",
1888     [TCG_COND_NE] = "ne",
1889     [TCG_COND_LT] = "lt",
1890     [TCG_COND_GE] = "ge",
1891     [TCG_COND_LE] = "le",
1892     [TCG_COND_GT] = "gt",
1893     [TCG_COND_LTU] = "ltu",
1894     [TCG_COND_GEU] = "geu",
1895     [TCG_COND_LEU] = "leu",
1896     [TCG_COND_GTU] = "gtu"
1897 };
1898 
1899 static const char * const ldst_name[] =
1900 {
1901     [MO_UB]   = "ub",
1902     [MO_SB]   = "sb",
1903     [MO_LEUW] = "leuw",
1904     [MO_LESW] = "lesw",
1905     [MO_LEUL] = "leul",
1906     [MO_LESL] = "lesl",
1907     [MO_LEQ]  = "leq",
1908     [MO_BEUW] = "beuw",
1909     [MO_BESW] = "besw",
1910     [MO_BEUL] = "beul",
1911     [MO_BESL] = "besl",
1912     [MO_BEQ]  = "beq",
1913 };
1914 
1915 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1916 #ifdef ALIGNED_ONLY
1917     [MO_UNALN >> MO_ASHIFT]    = "un+",
1918     [MO_ALIGN >> MO_ASHIFT]    = "",
1919 #else
1920     [MO_UNALN >> MO_ASHIFT]    = "",
1921     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1922 #endif
1923     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1924     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1925     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1926     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1927     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1928     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1929 };
1930 
1931 static inline bool tcg_regset_single(TCGRegSet d)
1932 {
1933     return (d & (d - 1)) == 0;
1934 }
1935 
1936 static inline TCGReg tcg_regset_first(TCGRegSet d)
1937 {
1938     if (TCG_TARGET_NB_REGS <= 32) {
1939         return ctz32(d);
1940     } else {
1941         return ctz64(d);
1942     }
1943 }
1944 
1945 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1946 {
1947     char buf[128];
1948     TCGOp *op;
1949 
1950     QTAILQ_FOREACH(op, &s->ops, link) {
1951         int i, k, nb_oargs, nb_iargs, nb_cargs;
1952         const TCGOpDef *def;
1953         TCGOpcode c;
1954         int col = 0;
1955 
1956         c = op->opc;
1957         def = &tcg_op_defs[c];
1958 
1959         if (c == INDEX_op_insn_start) {
1960             nb_oargs = 0;
1961             col += qemu_log("\n ----");
1962 
1963             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1964                 target_ulong a;
1965 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1966                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1967 #else
1968                 a = op->args[i];
1969 #endif
1970                 col += qemu_log(" " TARGET_FMT_lx, a);
1971             }
1972         } else if (c == INDEX_op_call) {
1973             /* variable number of arguments */
1974             nb_oargs = TCGOP_CALLO(op);
1975             nb_iargs = TCGOP_CALLI(op);
1976             nb_cargs = def->nb_cargs;
1977 
1978             /* function name, flags, out args */
1979             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1980                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1981                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1982             for (i = 0; i < nb_oargs; i++) {
1983                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1984                                                        op->args[i]));
1985             }
1986             for (i = 0; i < nb_iargs; i++) {
1987                 TCGArg arg = op->args[nb_oargs + i];
1988                 const char *t = "<dummy>";
1989                 if (arg != TCG_CALL_DUMMY_ARG) {
1990                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1991                 }
1992                 col += qemu_log(",%s", t);
1993             }
1994         } else {
1995             col += qemu_log(" %s ", def->name);
1996 
1997             nb_oargs = def->nb_oargs;
1998             nb_iargs = def->nb_iargs;
1999             nb_cargs = def->nb_cargs;
2000 
2001             if (def->flags & TCG_OPF_VECTOR) {
2002                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2003                                 8 << TCGOP_VECE(op));
2004             }
2005 
2006             k = 0;
2007             for (i = 0; i < nb_oargs; i++) {
2008                 if (k != 0) {
2009                     col += qemu_log(",");
2010                 }
2011                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2012                                                       op->args[k++]));
2013             }
2014             for (i = 0; i < nb_iargs; i++) {
2015                 if (k != 0) {
2016                     col += qemu_log(",");
2017                 }
2018                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2019                                                       op->args[k++]));
2020             }
2021             switch (c) {
2022             case INDEX_op_brcond_i32:
2023             case INDEX_op_setcond_i32:
2024             case INDEX_op_movcond_i32:
2025             case INDEX_op_brcond2_i32:
2026             case INDEX_op_setcond2_i32:
2027             case INDEX_op_brcond_i64:
2028             case INDEX_op_setcond_i64:
2029             case INDEX_op_movcond_i64:
2030             case INDEX_op_cmp_vec:
2031                 if (op->args[k] < ARRAY_SIZE(cond_name)
2032                     && cond_name[op->args[k]]) {
2033                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2034                 } else {
2035                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2036                 }
2037                 i = 1;
2038                 break;
2039             case INDEX_op_qemu_ld_i32:
2040             case INDEX_op_qemu_st_i32:
2041             case INDEX_op_qemu_ld_i64:
2042             case INDEX_op_qemu_st_i64:
2043                 {
2044                     TCGMemOpIdx oi = op->args[k++];
2045                     TCGMemOp op = get_memop(oi);
2046                     unsigned ix = get_mmuidx(oi);
2047 
2048                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2049                         col += qemu_log(",$0x%x,%u", op, ix);
2050                     } else {
2051                         const char *s_al, *s_op;
2052                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2053                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2054                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2055                     }
2056                     i = 1;
2057                 }
2058                 break;
2059             default:
2060                 i = 0;
2061                 break;
2062             }
2063             switch (c) {
2064             case INDEX_op_set_label:
2065             case INDEX_op_br:
2066             case INDEX_op_brcond_i32:
2067             case INDEX_op_brcond_i64:
2068             case INDEX_op_brcond2_i32:
2069                 col += qemu_log("%s$L%d", k ? "," : "",
2070                                 arg_label(op->args[k])->id);
2071                 i++, k++;
2072                 break;
2073             default:
2074                 break;
2075             }
2076             for (; i < nb_cargs; i++, k++) {
2077                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2078             }
2079         }
2080 
2081         if (have_prefs || op->life) {
2082             for (; col < 40; ++col) {
2083                 putc(' ', qemu_logfile);
2084             }
2085         }
2086 
2087         if (op->life) {
2088             unsigned life = op->life;
2089 
2090             if (life & (SYNC_ARG * 3)) {
2091                 qemu_log("  sync:");
2092                 for (i = 0; i < 2; ++i) {
2093                     if (life & (SYNC_ARG << i)) {
2094                         qemu_log(" %d", i);
2095                     }
2096                 }
2097             }
2098             life /= DEAD_ARG;
2099             if (life) {
2100                 qemu_log("  dead:");
2101                 for (i = 0; life; ++i, life >>= 1) {
2102                     if (life & 1) {
2103                         qemu_log(" %d", i);
2104                     }
2105                 }
2106             }
2107         }
2108 
2109         if (have_prefs) {
2110             for (i = 0; i < nb_oargs; ++i) {
2111                 TCGRegSet set = op->output_pref[i];
2112 
2113                 if (i == 0) {
2114                     qemu_log("  pref=");
2115                 } else {
2116                     qemu_log(",");
2117                 }
2118                 if (set == 0) {
2119                     qemu_log("none");
2120                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2121                     qemu_log("all");
2122 #ifdef CONFIG_DEBUG_TCG
2123                 } else if (tcg_regset_single(set)) {
2124                     TCGReg reg = tcg_regset_first(set);
2125                     qemu_log("%s", tcg_target_reg_names[reg]);
2126 #endif
2127                 } else if (TCG_TARGET_NB_REGS <= 32) {
2128                     qemu_log("%#x", (uint32_t)set);
2129                 } else {
2130                     qemu_log("%#" PRIx64, (uint64_t)set);
2131                 }
2132             }
2133         }
2134 
2135         qemu_log("\n");
2136     }
2137 }
2138 
2139 /* we give more priority to constraints with less registers */
2140 static int get_constraint_priority(const TCGOpDef *def, int k)
2141 {
2142     const TCGArgConstraint *arg_ct;
2143 
2144     int i, n;
2145     arg_ct = &def->args_ct[k];
2146     if (arg_ct->ct & TCG_CT_ALIAS) {
2147         /* an alias is equivalent to a single register */
2148         n = 1;
2149     } else {
2150         if (!(arg_ct->ct & TCG_CT_REG))
2151             return 0;
2152         n = 0;
2153         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2154             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2155                 n++;
2156         }
2157     }
2158     return TCG_TARGET_NB_REGS - n + 1;
2159 }
2160 
2161 /* sort from highest priority to lowest */
2162 static void sort_constraints(TCGOpDef *def, int start, int n)
2163 {
2164     int i, j, p1, p2, tmp;
2165 
2166     for(i = 0; i < n; i++)
2167         def->sorted_args[start + i] = start + i;
2168     if (n <= 1)
2169         return;
2170     for(i = 0; i < n - 1; i++) {
2171         for(j = i + 1; j < n; j++) {
2172             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2173             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2174             if (p1 < p2) {
2175                 tmp = def->sorted_args[start + i];
2176                 def->sorted_args[start + i] = def->sorted_args[start + j];
2177                 def->sorted_args[start + j] = tmp;
2178             }
2179         }
2180     }
2181 }
2182 
2183 static void process_op_defs(TCGContext *s)
2184 {
2185     TCGOpcode op;
2186 
2187     for (op = 0; op < NB_OPS; op++) {
2188         TCGOpDef *def = &tcg_op_defs[op];
2189         const TCGTargetOpDef *tdefs;
2190         TCGType type;
2191         int i, nb_args;
2192 
2193         if (def->flags & TCG_OPF_NOT_PRESENT) {
2194             continue;
2195         }
2196 
2197         nb_args = def->nb_iargs + def->nb_oargs;
2198         if (nb_args == 0) {
2199             continue;
2200         }
2201 
2202         tdefs = tcg_target_op_def(op);
2203         /* Missing TCGTargetOpDef entry. */
2204         tcg_debug_assert(tdefs != NULL);
2205 
2206         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2207         for (i = 0; i < nb_args; i++) {
2208             const char *ct_str = tdefs->args_ct_str[i];
2209             /* Incomplete TCGTargetOpDef entry. */
2210             tcg_debug_assert(ct_str != NULL);
2211 
2212             def->args_ct[i].u.regs = 0;
2213             def->args_ct[i].ct = 0;
2214             while (*ct_str != '\0') {
2215                 switch(*ct_str) {
2216                 case '0' ... '9':
2217                     {
2218                         int oarg = *ct_str - '0';
2219                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2220                         tcg_debug_assert(oarg < def->nb_oargs);
2221                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2222                         /* TCG_CT_ALIAS is for the output arguments.
2223                            The input is tagged with TCG_CT_IALIAS. */
2224                         def->args_ct[i] = def->args_ct[oarg];
2225                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2226                         def->args_ct[oarg].alias_index = i;
2227                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2228                         def->args_ct[i].alias_index = oarg;
2229                     }
2230                     ct_str++;
2231                     break;
2232                 case '&':
2233                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2234                     ct_str++;
2235                     break;
2236                 case 'i':
2237                     def->args_ct[i].ct |= TCG_CT_CONST;
2238                     ct_str++;
2239                     break;
2240                 default:
2241                     ct_str = target_parse_constraint(&def->args_ct[i],
2242                                                      ct_str, type);
2243                     /* Typo in TCGTargetOpDef constraint. */
2244                     tcg_debug_assert(ct_str != NULL);
2245                 }
2246             }
2247         }
2248 
2249         /* TCGTargetOpDef entry with too much information? */
2250         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2251 
2252         /* sort the constraints (XXX: this is just an heuristic) */
2253         sort_constraints(def, 0, def->nb_oargs);
2254         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2255     }
2256 }
2257 
2258 void tcg_op_remove(TCGContext *s, TCGOp *op)
2259 {
2260     TCGLabel *label;
2261 
2262     switch (op->opc) {
2263     case INDEX_op_br:
2264         label = arg_label(op->args[0]);
2265         label->refs--;
2266         break;
2267     case INDEX_op_brcond_i32:
2268     case INDEX_op_brcond_i64:
2269         label = arg_label(op->args[3]);
2270         label->refs--;
2271         break;
2272     case INDEX_op_brcond2_i32:
2273         label = arg_label(op->args[5]);
2274         label->refs--;
2275         break;
2276     default:
2277         break;
2278     }
2279 
2280     QTAILQ_REMOVE(&s->ops, op, link);
2281     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2282     s->nb_ops--;
2283 
2284 #ifdef CONFIG_PROFILER
2285     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2286 #endif
2287 }
2288 
2289 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2290 {
2291     TCGContext *s = tcg_ctx;
2292     TCGOp *op;
2293 
2294     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2295         op = tcg_malloc(sizeof(TCGOp));
2296     } else {
2297         op = QTAILQ_FIRST(&s->free_ops);
2298         QTAILQ_REMOVE(&s->free_ops, op, link);
2299     }
2300     memset(op, 0, offsetof(TCGOp, link));
2301     op->opc = opc;
2302     s->nb_ops++;
2303 
2304     return op;
2305 }
2306 
2307 TCGOp *tcg_emit_op(TCGOpcode opc)
2308 {
2309     TCGOp *op = tcg_op_alloc(opc);
2310     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2311     return op;
2312 }
2313 
2314 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2315 {
2316     TCGOp *new_op = tcg_op_alloc(opc);
2317     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2318     return new_op;
2319 }
2320 
2321 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2322 {
2323     TCGOp *new_op = tcg_op_alloc(opc);
2324     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2325     return new_op;
2326 }
2327 
2328 /* Reachable analysis : remove unreachable code.  */
2329 static void reachable_code_pass(TCGContext *s)
2330 {
2331     TCGOp *op, *op_next;
2332     bool dead = false;
2333 
2334     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2335         bool remove = dead;
2336         TCGLabel *label;
2337         int call_flags;
2338 
2339         switch (op->opc) {
2340         case INDEX_op_set_label:
2341             label = arg_label(op->args[0]);
2342             if (label->refs == 0) {
2343                 /*
2344                  * While there is an occasional backward branch, virtually
2345                  * all branches generated by the translators are forward.
2346                  * Which means that generally we will have already removed
2347                  * all references to the label that will be, and there is
2348                  * little to be gained by iterating.
2349                  */
2350                 remove = true;
2351             } else {
2352                 /* Once we see a label, insns become live again.  */
2353                 dead = false;
2354                 remove = false;
2355 
2356                 /*
2357                  * Optimization can fold conditional branches to unconditional.
2358                  * If we find a label with one reference which is preceded by
2359                  * an unconditional branch to it, remove both.  This needed to
2360                  * wait until the dead code in between them was removed.
2361                  */
2362                 if (label->refs == 1) {
2363                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2364                     if (op_prev->opc == INDEX_op_br &&
2365                         label == arg_label(op_prev->args[0])) {
2366                         tcg_op_remove(s, op_prev);
2367                         remove = true;
2368                     }
2369                 }
2370             }
2371             break;
2372 
2373         case INDEX_op_br:
2374         case INDEX_op_exit_tb:
2375         case INDEX_op_goto_ptr:
2376             /* Unconditional branches; everything following is dead.  */
2377             dead = true;
2378             break;
2379 
2380         case INDEX_op_call:
2381             /* Notice noreturn helper calls, raising exceptions.  */
2382             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2383             if (call_flags & TCG_CALL_NO_RETURN) {
2384                 dead = true;
2385             }
2386             break;
2387 
2388         case INDEX_op_insn_start:
2389             /* Never remove -- we need to keep these for unwind.  */
2390             remove = false;
2391             break;
2392 
2393         default:
2394             break;
2395         }
2396 
2397         if (remove) {
2398             tcg_op_remove(s, op);
2399         }
2400     }
2401 }
2402 
2403 #define TS_DEAD  1
2404 #define TS_MEM   2
2405 
2406 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2407 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2408 
2409 /* For liveness_pass_1, the register preferences for a given temp.  */
2410 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2411 {
2412     return ts->state_ptr;
2413 }
2414 
2415 /* For liveness_pass_1, reset the preferences for a given temp to the
2416  * maximal regset for its type.
2417  */
2418 static inline void la_reset_pref(TCGTemp *ts)
2419 {
2420     *la_temp_pref(ts)
2421         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2422 }
2423 
2424 /* liveness analysis: end of function: all temps are dead, and globals
2425    should be in memory. */
2426 static void la_func_end(TCGContext *s, int ng, int nt)
2427 {
2428     int i;
2429 
2430     for (i = 0; i < ng; ++i) {
2431         s->temps[i].state = TS_DEAD | TS_MEM;
2432         la_reset_pref(&s->temps[i]);
2433     }
2434     for (i = ng; i < nt; ++i) {
2435         s->temps[i].state = TS_DEAD;
2436         la_reset_pref(&s->temps[i]);
2437     }
2438 }
2439 
2440 /* liveness analysis: end of basic block: all temps are dead, globals
2441    and local temps should be in memory. */
2442 static void la_bb_end(TCGContext *s, int ng, int nt)
2443 {
2444     int i;
2445 
2446     for (i = 0; i < ng; ++i) {
2447         s->temps[i].state = TS_DEAD | TS_MEM;
2448         la_reset_pref(&s->temps[i]);
2449     }
2450     for (i = ng; i < nt; ++i) {
2451         s->temps[i].state = (s->temps[i].temp_local
2452                              ? TS_DEAD | TS_MEM
2453                              : TS_DEAD);
2454         la_reset_pref(&s->temps[i]);
2455     }
2456 }
2457 
2458 /* liveness analysis: sync globals back to memory.  */
2459 static void la_global_sync(TCGContext *s, int ng)
2460 {
2461     int i;
2462 
2463     for (i = 0; i < ng; ++i) {
2464         int state = s->temps[i].state;
2465         s->temps[i].state = state | TS_MEM;
2466         if (state == TS_DEAD) {
2467             /* If the global was previously dead, reset prefs.  */
2468             la_reset_pref(&s->temps[i]);
2469         }
2470     }
2471 }
2472 
2473 /* liveness analysis: sync globals back to memory and kill.  */
2474 static void la_global_kill(TCGContext *s, int ng)
2475 {
2476     int i;
2477 
2478     for (i = 0; i < ng; i++) {
2479         s->temps[i].state = TS_DEAD | TS_MEM;
2480         la_reset_pref(&s->temps[i]);
2481     }
2482 }
2483 
2484 /* liveness analysis: note live globals crossing calls.  */
2485 static void la_cross_call(TCGContext *s, int nt)
2486 {
2487     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2488     int i;
2489 
2490     for (i = 0; i < nt; i++) {
2491         TCGTemp *ts = &s->temps[i];
2492         if (!(ts->state & TS_DEAD)) {
2493             TCGRegSet *pset = la_temp_pref(ts);
2494             TCGRegSet set = *pset;
2495 
2496             set &= mask;
2497             /* If the combination is not possible, restart.  */
2498             if (set == 0) {
2499                 set = tcg_target_available_regs[ts->type] & mask;
2500             }
2501             *pset = set;
2502         }
2503     }
2504 }
2505 
2506 /* Liveness analysis : update the opc_arg_life array to tell if a
2507    given input arguments is dead. Instructions updating dead
2508    temporaries are removed. */
2509 static void liveness_pass_1(TCGContext *s)
2510 {
2511     int nb_globals = s->nb_globals;
2512     int nb_temps = s->nb_temps;
2513     TCGOp *op, *op_prev;
2514     TCGRegSet *prefs;
2515     int i;
2516 
2517     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2518     for (i = 0; i < nb_temps; ++i) {
2519         s->temps[i].state_ptr = prefs + i;
2520     }
2521 
2522     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2523     la_func_end(s, nb_globals, nb_temps);
2524 
2525     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2526         int nb_iargs, nb_oargs;
2527         TCGOpcode opc_new, opc_new2;
2528         bool have_opc_new2;
2529         TCGLifeData arg_life = 0;
2530         TCGTemp *ts;
2531         TCGOpcode opc = op->opc;
2532         const TCGOpDef *def = &tcg_op_defs[opc];
2533 
2534         switch (opc) {
2535         case INDEX_op_call:
2536             {
2537                 int call_flags;
2538                 int nb_call_regs;
2539 
2540                 nb_oargs = TCGOP_CALLO(op);
2541                 nb_iargs = TCGOP_CALLI(op);
2542                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2543 
2544                 /* pure functions can be removed if their result is unused */
2545                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2546                     for (i = 0; i < nb_oargs; i++) {
2547                         ts = arg_temp(op->args[i]);
2548                         if (ts->state != TS_DEAD) {
2549                             goto do_not_remove_call;
2550                         }
2551                     }
2552                     goto do_remove;
2553                 }
2554             do_not_remove_call:
2555 
2556                 /* Output args are dead.  */
2557                 for (i = 0; i < nb_oargs; i++) {
2558                     ts = arg_temp(op->args[i]);
2559                     if (ts->state & TS_DEAD) {
2560                         arg_life |= DEAD_ARG << i;
2561                     }
2562                     if (ts->state & TS_MEM) {
2563                         arg_life |= SYNC_ARG << i;
2564                     }
2565                     ts->state = TS_DEAD;
2566                     la_reset_pref(ts);
2567 
2568                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2569                     op->output_pref[i] = 0;
2570                 }
2571 
2572                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2573                                     TCG_CALL_NO_READ_GLOBALS))) {
2574                     la_global_kill(s, nb_globals);
2575                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2576                     la_global_sync(s, nb_globals);
2577                 }
2578 
2579                 /* Record arguments that die in this helper.  */
2580                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2581                     ts = arg_temp(op->args[i]);
2582                     if (ts && ts->state & TS_DEAD) {
2583                         arg_life |= DEAD_ARG << i;
2584                     }
2585                 }
2586 
2587                 /* For all live registers, remove call-clobbered prefs.  */
2588                 la_cross_call(s, nb_temps);
2589 
2590                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2591 
2592                 /* Input arguments are live for preceding opcodes.  */
2593                 for (i = 0; i < nb_iargs; i++) {
2594                     ts = arg_temp(op->args[i + nb_oargs]);
2595                     if (ts && ts->state & TS_DEAD) {
2596                         /* For those arguments that die, and will be allocated
2597                          * in registers, clear the register set for that arg,
2598                          * to be filled in below.  For args that will be on
2599                          * the stack, reset to any available reg.
2600                          */
2601                         *la_temp_pref(ts)
2602                             = (i < nb_call_regs ? 0 :
2603                                tcg_target_available_regs[ts->type]);
2604                         ts->state &= ~TS_DEAD;
2605                     }
2606                 }
2607 
2608                 /* For each input argument, add its input register to prefs.
2609                    If a temp is used once, this produces a single set bit.  */
2610                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2611                     ts = arg_temp(op->args[i + nb_oargs]);
2612                     if (ts) {
2613                         tcg_regset_set_reg(*la_temp_pref(ts),
2614                                            tcg_target_call_iarg_regs[i]);
2615                     }
2616                 }
2617             }
2618             break;
2619         case INDEX_op_insn_start:
2620             break;
2621         case INDEX_op_discard:
2622             /* mark the temporary as dead */
2623             ts = arg_temp(op->args[0]);
2624             ts->state = TS_DEAD;
2625             la_reset_pref(ts);
2626             break;
2627 
2628         case INDEX_op_add2_i32:
2629             opc_new = INDEX_op_add_i32;
2630             goto do_addsub2;
2631         case INDEX_op_sub2_i32:
2632             opc_new = INDEX_op_sub_i32;
2633             goto do_addsub2;
2634         case INDEX_op_add2_i64:
2635             opc_new = INDEX_op_add_i64;
2636             goto do_addsub2;
2637         case INDEX_op_sub2_i64:
2638             opc_new = INDEX_op_sub_i64;
2639         do_addsub2:
2640             nb_iargs = 4;
2641             nb_oargs = 2;
2642             /* Test if the high part of the operation is dead, but not
2643                the low part.  The result can be optimized to a simple
2644                add or sub.  This happens often for x86_64 guest when the
2645                cpu mode is set to 32 bit.  */
2646             if (arg_temp(op->args[1])->state == TS_DEAD) {
2647                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2648                     goto do_remove;
2649                 }
2650                 /* Replace the opcode and adjust the args in place,
2651                    leaving 3 unused args at the end.  */
2652                 op->opc = opc = opc_new;
2653                 op->args[1] = op->args[2];
2654                 op->args[2] = op->args[4];
2655                 /* Fall through and mark the single-word operation live.  */
2656                 nb_iargs = 2;
2657                 nb_oargs = 1;
2658             }
2659             goto do_not_remove;
2660 
2661         case INDEX_op_mulu2_i32:
2662             opc_new = INDEX_op_mul_i32;
2663             opc_new2 = INDEX_op_muluh_i32;
2664             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2665             goto do_mul2;
2666         case INDEX_op_muls2_i32:
2667             opc_new = INDEX_op_mul_i32;
2668             opc_new2 = INDEX_op_mulsh_i32;
2669             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2670             goto do_mul2;
2671         case INDEX_op_mulu2_i64:
2672             opc_new = INDEX_op_mul_i64;
2673             opc_new2 = INDEX_op_muluh_i64;
2674             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2675             goto do_mul2;
2676         case INDEX_op_muls2_i64:
2677             opc_new = INDEX_op_mul_i64;
2678             opc_new2 = INDEX_op_mulsh_i64;
2679             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2680             goto do_mul2;
2681         do_mul2:
2682             nb_iargs = 2;
2683             nb_oargs = 2;
2684             if (arg_temp(op->args[1])->state == TS_DEAD) {
2685                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2686                     /* Both parts of the operation are dead.  */
2687                     goto do_remove;
2688                 }
2689                 /* The high part of the operation is dead; generate the low. */
2690                 op->opc = opc = opc_new;
2691                 op->args[1] = op->args[2];
2692                 op->args[2] = op->args[3];
2693             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2694                 /* The low part of the operation is dead; generate the high. */
2695                 op->opc = opc = opc_new2;
2696                 op->args[0] = op->args[1];
2697                 op->args[1] = op->args[2];
2698                 op->args[2] = op->args[3];
2699             } else {
2700                 goto do_not_remove;
2701             }
2702             /* Mark the single-word operation live.  */
2703             nb_oargs = 1;
2704             goto do_not_remove;
2705 
2706         default:
2707             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2708             nb_iargs = def->nb_iargs;
2709             nb_oargs = def->nb_oargs;
2710 
2711             /* Test if the operation can be removed because all
2712                its outputs are dead. We assume that nb_oargs == 0
2713                implies side effects */
2714             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2715                 for (i = 0; i < nb_oargs; i++) {
2716                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2717                         goto do_not_remove;
2718                     }
2719                 }
2720                 goto do_remove;
2721             }
2722             goto do_not_remove;
2723 
2724         do_remove:
2725             tcg_op_remove(s, op);
2726             break;
2727 
2728         do_not_remove:
2729             for (i = 0; i < nb_oargs; i++) {
2730                 ts = arg_temp(op->args[i]);
2731 
2732                 /* Remember the preference of the uses that followed.  */
2733                 op->output_pref[i] = *la_temp_pref(ts);
2734 
2735                 /* Output args are dead.  */
2736                 if (ts->state & TS_DEAD) {
2737                     arg_life |= DEAD_ARG << i;
2738                 }
2739                 if (ts->state & TS_MEM) {
2740                     arg_life |= SYNC_ARG << i;
2741                 }
2742                 ts->state = TS_DEAD;
2743                 la_reset_pref(ts);
2744             }
2745 
2746             /* If end of basic block, update.  */
2747             if (def->flags & TCG_OPF_BB_EXIT) {
2748                 la_func_end(s, nb_globals, nb_temps);
2749             } else if (def->flags & TCG_OPF_BB_END) {
2750                 la_bb_end(s, nb_globals, nb_temps);
2751             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2752                 la_global_sync(s, nb_globals);
2753                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2754                     la_cross_call(s, nb_temps);
2755                 }
2756             }
2757 
2758             /* Record arguments that die in this opcode.  */
2759             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2760                 ts = arg_temp(op->args[i]);
2761                 if (ts->state & TS_DEAD) {
2762                     arg_life |= DEAD_ARG << i;
2763                 }
2764             }
2765 
2766             /* Input arguments are live for preceding opcodes.  */
2767             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2768                 ts = arg_temp(op->args[i]);
2769                 if (ts->state & TS_DEAD) {
2770                     /* For operands that were dead, initially allow
2771                        all regs for the type.  */
2772                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2773                     ts->state &= ~TS_DEAD;
2774                 }
2775             }
2776 
2777             /* Incorporate constraints for this operand.  */
2778             switch (opc) {
2779             case INDEX_op_mov_i32:
2780             case INDEX_op_mov_i64:
2781                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2782                    have proper constraints.  That said, special case
2783                    moves to propagate preferences backward.  */
2784                 if (IS_DEAD_ARG(1)) {
2785                     *la_temp_pref(arg_temp(op->args[0]))
2786                         = *la_temp_pref(arg_temp(op->args[1]));
2787                 }
2788                 break;
2789 
2790             default:
2791                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2792                     const TCGArgConstraint *ct = &def->args_ct[i];
2793                     TCGRegSet set, *pset;
2794 
2795                     ts = arg_temp(op->args[i]);
2796                     pset = la_temp_pref(ts);
2797                     set = *pset;
2798 
2799                     set &= ct->u.regs;
2800                     if (ct->ct & TCG_CT_IALIAS) {
2801                         set &= op->output_pref[ct->alias_index];
2802                     }
2803                     /* If the combination is not possible, restart.  */
2804                     if (set == 0) {
2805                         set = ct->u.regs;
2806                     }
2807                     *pset = set;
2808                 }
2809                 break;
2810             }
2811             break;
2812         }
2813         op->life = arg_life;
2814     }
2815 }
2816 
2817 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2818 static bool liveness_pass_2(TCGContext *s)
2819 {
2820     int nb_globals = s->nb_globals;
2821     int nb_temps, i;
2822     bool changes = false;
2823     TCGOp *op, *op_next;
2824 
2825     /* Create a temporary for each indirect global.  */
2826     for (i = 0; i < nb_globals; ++i) {
2827         TCGTemp *its = &s->temps[i];
2828         if (its->indirect_reg) {
2829             TCGTemp *dts = tcg_temp_alloc(s);
2830             dts->type = its->type;
2831             dts->base_type = its->base_type;
2832             its->state_ptr = dts;
2833         } else {
2834             its->state_ptr = NULL;
2835         }
2836         /* All globals begin dead.  */
2837         its->state = TS_DEAD;
2838     }
2839     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2840         TCGTemp *its = &s->temps[i];
2841         its->state_ptr = NULL;
2842         its->state = TS_DEAD;
2843     }
2844 
2845     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2846         TCGOpcode opc = op->opc;
2847         const TCGOpDef *def = &tcg_op_defs[opc];
2848         TCGLifeData arg_life = op->life;
2849         int nb_iargs, nb_oargs, call_flags;
2850         TCGTemp *arg_ts, *dir_ts;
2851 
2852         if (opc == INDEX_op_call) {
2853             nb_oargs = TCGOP_CALLO(op);
2854             nb_iargs = TCGOP_CALLI(op);
2855             call_flags = op->args[nb_oargs + nb_iargs + 1];
2856         } else {
2857             nb_iargs = def->nb_iargs;
2858             nb_oargs = def->nb_oargs;
2859 
2860             /* Set flags similar to how calls require.  */
2861             if (def->flags & TCG_OPF_BB_END) {
2862                 /* Like writing globals: save_globals */
2863                 call_flags = 0;
2864             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2865                 /* Like reading globals: sync_globals */
2866                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2867             } else {
2868                 /* No effect on globals.  */
2869                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2870                               TCG_CALL_NO_WRITE_GLOBALS);
2871             }
2872         }
2873 
2874         /* Make sure that input arguments are available.  */
2875         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2876             arg_ts = arg_temp(op->args[i]);
2877             if (arg_ts) {
2878                 dir_ts = arg_ts->state_ptr;
2879                 if (dir_ts && arg_ts->state == TS_DEAD) {
2880                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2881                                       ? INDEX_op_ld_i32
2882                                       : INDEX_op_ld_i64);
2883                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2884 
2885                     lop->args[0] = temp_arg(dir_ts);
2886                     lop->args[1] = temp_arg(arg_ts->mem_base);
2887                     lop->args[2] = arg_ts->mem_offset;
2888 
2889                     /* Loaded, but synced with memory.  */
2890                     arg_ts->state = TS_MEM;
2891                 }
2892             }
2893         }
2894 
2895         /* Perform input replacement, and mark inputs that became dead.
2896            No action is required except keeping temp_state up to date
2897            so that we reload when needed.  */
2898         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2899             arg_ts = arg_temp(op->args[i]);
2900             if (arg_ts) {
2901                 dir_ts = arg_ts->state_ptr;
2902                 if (dir_ts) {
2903                     op->args[i] = temp_arg(dir_ts);
2904                     changes = true;
2905                     if (IS_DEAD_ARG(i)) {
2906                         arg_ts->state = TS_DEAD;
2907                     }
2908                 }
2909             }
2910         }
2911 
2912         /* Liveness analysis should ensure that the following are
2913            all correct, for call sites and basic block end points.  */
2914         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2915             /* Nothing to do */
2916         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2917             for (i = 0; i < nb_globals; ++i) {
2918                 /* Liveness should see that globals are synced back,
2919                    that is, either TS_DEAD or TS_MEM.  */
2920                 arg_ts = &s->temps[i];
2921                 tcg_debug_assert(arg_ts->state_ptr == 0
2922                                  || arg_ts->state != 0);
2923             }
2924         } else {
2925             for (i = 0; i < nb_globals; ++i) {
2926                 /* Liveness should see that globals are saved back,
2927                    that is, TS_DEAD, waiting to be reloaded.  */
2928                 arg_ts = &s->temps[i];
2929                 tcg_debug_assert(arg_ts->state_ptr == 0
2930                                  || arg_ts->state == TS_DEAD);
2931             }
2932         }
2933 
2934         /* Outputs become available.  */
2935         for (i = 0; i < nb_oargs; i++) {
2936             arg_ts = arg_temp(op->args[i]);
2937             dir_ts = arg_ts->state_ptr;
2938             if (!dir_ts) {
2939                 continue;
2940             }
2941             op->args[i] = temp_arg(dir_ts);
2942             changes = true;
2943 
2944             /* The output is now live and modified.  */
2945             arg_ts->state = 0;
2946 
2947             /* Sync outputs upon their last write.  */
2948             if (NEED_SYNC_ARG(i)) {
2949                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2950                                   ? INDEX_op_st_i32
2951                                   : INDEX_op_st_i64);
2952                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2953 
2954                 sop->args[0] = temp_arg(dir_ts);
2955                 sop->args[1] = temp_arg(arg_ts->mem_base);
2956                 sop->args[2] = arg_ts->mem_offset;
2957 
2958                 arg_ts->state = TS_MEM;
2959             }
2960             /* Drop outputs that are dead.  */
2961             if (IS_DEAD_ARG(i)) {
2962                 arg_ts->state = TS_DEAD;
2963             }
2964         }
2965     }
2966 
2967     return changes;
2968 }
2969 
2970 #ifdef CONFIG_DEBUG_TCG
2971 static void dump_regs(TCGContext *s)
2972 {
2973     TCGTemp *ts;
2974     int i;
2975     char buf[64];
2976 
2977     for(i = 0; i < s->nb_temps; i++) {
2978         ts = &s->temps[i];
2979         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2980         switch(ts->val_type) {
2981         case TEMP_VAL_REG:
2982             printf("%s", tcg_target_reg_names[ts->reg]);
2983             break;
2984         case TEMP_VAL_MEM:
2985             printf("%d(%s)", (int)ts->mem_offset,
2986                    tcg_target_reg_names[ts->mem_base->reg]);
2987             break;
2988         case TEMP_VAL_CONST:
2989             printf("$0x%" TCG_PRIlx, ts->val);
2990             break;
2991         case TEMP_VAL_DEAD:
2992             printf("D");
2993             break;
2994         default:
2995             printf("???");
2996             break;
2997         }
2998         printf("\n");
2999     }
3000 
3001     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3002         if (s->reg_to_temp[i] != NULL) {
3003             printf("%s: %s\n",
3004                    tcg_target_reg_names[i],
3005                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3006         }
3007     }
3008 }
3009 
3010 static void check_regs(TCGContext *s)
3011 {
3012     int reg;
3013     int k;
3014     TCGTemp *ts;
3015     char buf[64];
3016 
3017     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3018         ts = s->reg_to_temp[reg];
3019         if (ts != NULL) {
3020             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3021                 printf("Inconsistency for register %s:\n",
3022                        tcg_target_reg_names[reg]);
3023                 goto fail;
3024             }
3025         }
3026     }
3027     for (k = 0; k < s->nb_temps; k++) {
3028         ts = &s->temps[k];
3029         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3030             && s->reg_to_temp[ts->reg] != ts) {
3031             printf("Inconsistency for temp %s:\n",
3032                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3033         fail:
3034             printf("reg state:\n");
3035             dump_regs(s);
3036             tcg_abort();
3037         }
3038     }
3039 }
3040 #endif
3041 
3042 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3043 {
3044 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3045     /* Sparc64 stack is accessed with offset of 2047 */
3046     s->current_frame_offset = (s->current_frame_offset +
3047                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3048         ~(sizeof(tcg_target_long) - 1);
3049 #endif
3050     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3051         s->frame_end) {
3052         tcg_abort();
3053     }
3054     ts->mem_offset = s->current_frame_offset;
3055     ts->mem_base = s->frame_temp;
3056     ts->mem_allocated = 1;
3057     s->current_frame_offset += sizeof(tcg_target_long);
3058 }
3059 
3060 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3061 
3062 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3063    mark it free; otherwise mark it dead.  */
3064 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3065 {
3066     if (ts->fixed_reg) {
3067         return;
3068     }
3069     if (ts->val_type == TEMP_VAL_REG) {
3070         s->reg_to_temp[ts->reg] = NULL;
3071     }
3072     ts->val_type = (free_or_dead < 0
3073                     || ts->temp_local
3074                     || ts->temp_global
3075                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3076 }
3077 
3078 /* Mark a temporary as dead.  */
3079 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3080 {
3081     temp_free_or_dead(s, ts, 1);
3082 }
3083 
3084 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3085    registers needs to be allocated to store a constant.  If 'free_or_dead'
3086    is non-zero, subsequently release the temporary; if it is positive, the
3087    temp is dead; if it is negative, the temp is free.  */
3088 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3089                       TCGRegSet preferred_regs, int free_or_dead)
3090 {
3091     if (ts->fixed_reg) {
3092         return;
3093     }
3094     if (!ts->mem_coherent) {
3095         if (!ts->mem_allocated) {
3096             temp_allocate_frame(s, ts);
3097         }
3098         switch (ts->val_type) {
3099         case TEMP_VAL_CONST:
3100             /* If we're going to free the temp immediately, then we won't
3101                require it later in a register, so attempt to store the
3102                constant to memory directly.  */
3103             if (free_or_dead
3104                 && tcg_out_sti(s, ts->type, ts->val,
3105                                ts->mem_base->reg, ts->mem_offset)) {
3106                 break;
3107             }
3108             temp_load(s, ts, tcg_target_available_regs[ts->type],
3109                       allocated_regs, preferred_regs);
3110             /* fallthrough */
3111 
3112         case TEMP_VAL_REG:
3113             tcg_out_st(s, ts->type, ts->reg,
3114                        ts->mem_base->reg, ts->mem_offset);
3115             break;
3116 
3117         case TEMP_VAL_MEM:
3118             break;
3119 
3120         case TEMP_VAL_DEAD:
3121         default:
3122             tcg_abort();
3123         }
3124         ts->mem_coherent = 1;
3125     }
3126     if (free_or_dead) {
3127         temp_free_or_dead(s, ts, free_or_dead);
3128     }
3129 }
3130 
3131 /* free register 'reg' by spilling the corresponding temporary if necessary */
3132 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3133 {
3134     TCGTemp *ts = s->reg_to_temp[reg];
3135     if (ts != NULL) {
3136         temp_sync(s, ts, allocated_regs, 0, -1);
3137     }
3138 }
3139 
3140 /**
3141  * tcg_reg_alloc:
3142  * @required_regs: Set of registers in which we must allocate.
3143  * @allocated_regs: Set of registers which must be avoided.
3144  * @preferred_regs: Set of registers we should prefer.
3145  * @rev: True if we search the registers in "indirect" order.
3146  *
3147  * The allocated register must be in @required_regs & ~@allocated_regs,
3148  * but if we can put it in @preferred_regs we may save a move later.
3149  */
3150 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3151                             TCGRegSet allocated_regs,
3152                             TCGRegSet preferred_regs, bool rev)
3153 {
3154     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3155     TCGRegSet reg_ct[2];
3156     const int *order;
3157 
3158     reg_ct[1] = required_regs & ~allocated_regs;
3159     tcg_debug_assert(reg_ct[1] != 0);
3160     reg_ct[0] = reg_ct[1] & preferred_regs;
3161 
3162     /* Skip the preferred_regs option if it cannot be satisfied,
3163        or if the preference made no difference.  */
3164     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3165 
3166     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3167 
3168     /* Try free registers, preferences first.  */
3169     for (j = f; j < 2; j++) {
3170         TCGRegSet set = reg_ct[j];
3171 
3172         if (tcg_regset_single(set)) {
3173             /* One register in the set.  */
3174             TCGReg reg = tcg_regset_first(set);
3175             if (s->reg_to_temp[reg] == NULL) {
3176                 return reg;
3177             }
3178         } else {
3179             for (i = 0; i < n; i++) {
3180                 TCGReg reg = order[i];
3181                 if (s->reg_to_temp[reg] == NULL &&
3182                     tcg_regset_test_reg(set, reg)) {
3183                     return reg;
3184                 }
3185             }
3186         }
3187     }
3188 
3189     /* We must spill something.  */
3190     for (j = f; j < 2; j++) {
3191         TCGRegSet set = reg_ct[j];
3192 
3193         if (tcg_regset_single(set)) {
3194             /* One register in the set.  */
3195             TCGReg reg = tcg_regset_first(set);
3196             tcg_reg_free(s, reg, allocated_regs);
3197             return reg;
3198         } else {
3199             for (i = 0; i < n; i++) {
3200                 TCGReg reg = order[i];
3201                 if (tcg_regset_test_reg(set, reg)) {
3202                     tcg_reg_free(s, reg, allocated_regs);
3203                     return reg;
3204                 }
3205             }
3206         }
3207     }
3208 
3209     tcg_abort();
3210 }
3211 
3212 /* Make sure the temporary is in a register.  If needed, allocate the register
3213    from DESIRED while avoiding ALLOCATED.  */
3214 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3215                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3216 {
3217     TCGReg reg;
3218 
3219     switch (ts->val_type) {
3220     case TEMP_VAL_REG:
3221         return;
3222     case TEMP_VAL_CONST:
3223         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3224                             preferred_regs, ts->indirect_base);
3225         tcg_out_movi(s, ts->type, reg, ts->val);
3226         ts->mem_coherent = 0;
3227         break;
3228     case TEMP_VAL_MEM:
3229         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3230                             preferred_regs, ts->indirect_base);
3231         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3232         ts->mem_coherent = 1;
3233         break;
3234     case TEMP_VAL_DEAD:
3235     default:
3236         tcg_abort();
3237     }
3238     ts->reg = reg;
3239     ts->val_type = TEMP_VAL_REG;
3240     s->reg_to_temp[reg] = ts;
3241 }
3242 
3243 /* Save a temporary to memory. 'allocated_regs' is used in case a
3244    temporary registers needs to be allocated to store a constant.  */
3245 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3246 {
3247     /* The liveness analysis already ensures that globals are back
3248        in memory. Keep an tcg_debug_assert for safety. */
3249     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3250 }
3251 
3252 /* save globals to their canonical location and assume they can be
3253    modified be the following code. 'allocated_regs' is used in case a
3254    temporary registers needs to be allocated to store a constant. */
3255 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3256 {
3257     int i, n;
3258 
3259     for (i = 0, n = s->nb_globals; i < n; i++) {
3260         temp_save(s, &s->temps[i], allocated_regs);
3261     }
3262 }
3263 
3264 /* sync globals to their canonical location and assume they can be
3265    read by the following code. 'allocated_regs' is used in case a
3266    temporary registers needs to be allocated to store a constant. */
3267 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3268 {
3269     int i, n;
3270 
3271     for (i = 0, n = s->nb_globals; i < n; i++) {
3272         TCGTemp *ts = &s->temps[i];
3273         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3274                          || ts->fixed_reg
3275                          || ts->mem_coherent);
3276     }
3277 }
3278 
3279 /* at the end of a basic block, we assume all temporaries are dead and
3280    all globals are stored at their canonical location. */
3281 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3282 {
3283     int i;
3284 
3285     for (i = s->nb_globals; i < s->nb_temps; i++) {
3286         TCGTemp *ts = &s->temps[i];
3287         if (ts->temp_local) {
3288             temp_save(s, ts, allocated_regs);
3289         } else {
3290             /* The liveness analysis already ensures that temps are dead.
3291                Keep an tcg_debug_assert for safety. */
3292             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3293         }
3294     }
3295 
3296     save_globals(s, allocated_regs);
3297 }
3298 
3299 /*
3300  * Specialized code generation for INDEX_op_movi_*.
3301  */
3302 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3303                                   tcg_target_ulong val, TCGLifeData arg_life,
3304                                   TCGRegSet preferred_regs)
3305 {
3306     /* ENV should not be modified.  */
3307     tcg_debug_assert(!ots->fixed_reg);
3308 
3309     /* The movi is not explicitly generated here.  */
3310     if (ots->val_type == TEMP_VAL_REG) {
3311         s->reg_to_temp[ots->reg] = NULL;
3312     }
3313     ots->val_type = TEMP_VAL_CONST;
3314     ots->val = val;
3315     ots->mem_coherent = 0;
3316     if (NEED_SYNC_ARG(0)) {
3317         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3318     } else if (IS_DEAD_ARG(0)) {
3319         temp_dead(s, ots);
3320     }
3321 }
3322 
3323 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3324 {
3325     TCGTemp *ots = arg_temp(op->args[0]);
3326     tcg_target_ulong val = op->args[1];
3327 
3328     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3329 }
3330 
3331 /*
3332  * Specialized code generation for INDEX_op_mov_*.
3333  */
3334 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3335 {
3336     const TCGLifeData arg_life = op->life;
3337     TCGRegSet allocated_regs, preferred_regs;
3338     TCGTemp *ts, *ots;
3339     TCGType otype, itype;
3340 
3341     allocated_regs = s->reserved_regs;
3342     preferred_regs = op->output_pref[0];
3343     ots = arg_temp(op->args[0]);
3344     ts = arg_temp(op->args[1]);
3345 
3346     /* ENV should not be modified.  */
3347     tcg_debug_assert(!ots->fixed_reg);
3348 
3349     /* Note that otype != itype for no-op truncation.  */
3350     otype = ots->type;
3351     itype = ts->type;
3352 
3353     if (ts->val_type == TEMP_VAL_CONST) {
3354         /* propagate constant or generate sti */
3355         tcg_target_ulong val = ts->val;
3356         if (IS_DEAD_ARG(1)) {
3357             temp_dead(s, ts);
3358         }
3359         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3360         return;
3361     }
3362 
3363     /* If the source value is in memory we're going to be forced
3364        to have it in a register in order to perform the copy.  Copy
3365        the SOURCE value into its own register first, that way we
3366        don't have to reload SOURCE the next time it is used. */
3367     if (ts->val_type == TEMP_VAL_MEM) {
3368         temp_load(s, ts, tcg_target_available_regs[itype],
3369                   allocated_regs, preferred_regs);
3370     }
3371 
3372     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3373     if (IS_DEAD_ARG(0)) {
3374         /* mov to a non-saved dead register makes no sense (even with
3375            liveness analysis disabled). */
3376         tcg_debug_assert(NEED_SYNC_ARG(0));
3377         if (!ots->mem_allocated) {
3378             temp_allocate_frame(s, ots);
3379         }
3380         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3381         if (IS_DEAD_ARG(1)) {
3382             temp_dead(s, ts);
3383         }
3384         temp_dead(s, ots);
3385     } else {
3386         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3387             /* the mov can be suppressed */
3388             if (ots->val_type == TEMP_VAL_REG) {
3389                 s->reg_to_temp[ots->reg] = NULL;
3390             }
3391             ots->reg = ts->reg;
3392             temp_dead(s, ts);
3393         } else {
3394             if (ots->val_type != TEMP_VAL_REG) {
3395                 /* When allocating a new register, make sure to not spill the
3396                    input one. */
3397                 tcg_regset_set_reg(allocated_regs, ts->reg);
3398                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3399                                          allocated_regs, preferred_regs,
3400                                          ots->indirect_base);
3401             }
3402             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3403                 /*
3404                  * Cross register class move not supported.
3405                  * Store the source register into the destination slot
3406                  * and leave the destination temp as TEMP_VAL_MEM.
3407                  */
3408                 assert(!ots->fixed_reg);
3409                 if (!ts->mem_allocated) {
3410                     temp_allocate_frame(s, ots);
3411                 }
3412                 tcg_out_st(s, ts->type, ts->reg,
3413                            ots->mem_base->reg, ots->mem_offset);
3414                 ots->mem_coherent = 1;
3415                 temp_free_or_dead(s, ots, -1);
3416                 return;
3417             }
3418         }
3419         ots->val_type = TEMP_VAL_REG;
3420         ots->mem_coherent = 0;
3421         s->reg_to_temp[ots->reg] = ots;
3422         if (NEED_SYNC_ARG(0)) {
3423             temp_sync(s, ots, allocated_regs, 0, 0);
3424         }
3425     }
3426 }
3427 
3428 /*
3429  * Specialized code generation for INDEX_op_dup_vec.
3430  */
3431 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3432 {
3433     const TCGLifeData arg_life = op->life;
3434     TCGRegSet dup_out_regs, dup_in_regs;
3435     TCGTemp *its, *ots;
3436     TCGType itype, vtype;
3437     intptr_t endian_fixup;
3438     unsigned vece;
3439     bool ok;
3440 
3441     ots = arg_temp(op->args[0]);
3442     its = arg_temp(op->args[1]);
3443 
3444     /* ENV should not be modified.  */
3445     tcg_debug_assert(!ots->fixed_reg);
3446 
3447     itype = its->type;
3448     vece = TCGOP_VECE(op);
3449     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3450 
3451     if (its->val_type == TEMP_VAL_CONST) {
3452         /* Propagate constant via movi -> dupi.  */
3453         tcg_target_ulong val = its->val;
3454         if (IS_DEAD_ARG(1)) {
3455             temp_dead(s, its);
3456         }
3457         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3458         return;
3459     }
3460 
3461     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
3462     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
3463 
3464     /* Allocate the output register now.  */
3465     if (ots->val_type != TEMP_VAL_REG) {
3466         TCGRegSet allocated_regs = s->reserved_regs;
3467 
3468         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3469             /* Make sure to not spill the input register. */
3470             tcg_regset_set_reg(allocated_regs, its->reg);
3471         }
3472         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3473                                  op->output_pref[0], ots->indirect_base);
3474         ots->val_type = TEMP_VAL_REG;
3475         ots->mem_coherent = 0;
3476         s->reg_to_temp[ots->reg] = ots;
3477     }
3478 
3479     switch (its->val_type) {
3480     case TEMP_VAL_REG:
3481         /*
3482          * The dup constriaints must be broad, covering all possible VECE.
3483          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3484          * to fail, indicating that extra moves are required for that case.
3485          */
3486         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3487             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3488                 goto done;
3489             }
3490             /* Try again from memory or a vector input register.  */
3491         }
3492         if (!its->mem_coherent) {
3493             /*
3494              * The input register is not synced, and so an extra store
3495              * would be required to use memory.  Attempt an integer-vector
3496              * register move first.  We do not have a TCGRegSet for this.
3497              */
3498             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3499                 break;
3500             }
3501             /* Sync the temp back to its slot and load from there.  */
3502             temp_sync(s, its, s->reserved_regs, 0, 0);
3503         }
3504         /* fall through */
3505 
3506     case TEMP_VAL_MEM:
3507 #ifdef HOST_WORDS_BIGENDIAN
3508         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3509         endian_fixup -= 1 << vece;
3510 #else
3511         endian_fixup = 0;
3512 #endif
3513         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3514                              its->mem_offset + endian_fixup)) {
3515             goto done;
3516         }
3517         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3518         break;
3519 
3520     default:
3521         g_assert_not_reached();
3522     }
3523 
3524     /* We now have a vector input register, so dup must succeed. */
3525     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3526     tcg_debug_assert(ok);
3527 
3528  done:
3529     if (IS_DEAD_ARG(1)) {
3530         temp_dead(s, its);
3531     }
3532     if (NEED_SYNC_ARG(0)) {
3533         temp_sync(s, ots, s->reserved_regs, 0, 0);
3534     }
3535     if (IS_DEAD_ARG(0)) {
3536         temp_dead(s, ots);
3537     }
3538 }
3539 
3540 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3541 {
3542     const TCGLifeData arg_life = op->life;
3543     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3544     TCGRegSet i_allocated_regs;
3545     TCGRegSet o_allocated_regs;
3546     int i, k, nb_iargs, nb_oargs;
3547     TCGReg reg;
3548     TCGArg arg;
3549     const TCGArgConstraint *arg_ct;
3550     TCGTemp *ts;
3551     TCGArg new_args[TCG_MAX_OP_ARGS];
3552     int const_args[TCG_MAX_OP_ARGS];
3553 
3554     nb_oargs = def->nb_oargs;
3555     nb_iargs = def->nb_iargs;
3556 
3557     /* copy constants */
3558     memcpy(new_args + nb_oargs + nb_iargs,
3559            op->args + nb_oargs + nb_iargs,
3560            sizeof(TCGArg) * def->nb_cargs);
3561 
3562     i_allocated_regs = s->reserved_regs;
3563     o_allocated_regs = s->reserved_regs;
3564 
3565     /* satisfy input constraints */
3566     for (k = 0; k < nb_iargs; k++) {
3567         TCGRegSet i_preferred_regs, o_preferred_regs;
3568 
3569         i = def->sorted_args[nb_oargs + k];
3570         arg = op->args[i];
3571         arg_ct = &def->args_ct[i];
3572         ts = arg_temp(arg);
3573 
3574         if (ts->val_type == TEMP_VAL_CONST
3575             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3576             /* constant is OK for instruction */
3577             const_args[i] = 1;
3578             new_args[i] = ts->val;
3579             continue;
3580         }
3581 
3582         i_preferred_regs = o_preferred_regs = 0;
3583         if (arg_ct->ct & TCG_CT_IALIAS) {
3584             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3585             if (ts->fixed_reg) {
3586                 /* if fixed register, we must allocate a new register
3587                    if the alias is not the same register */
3588                 if (arg != op->args[arg_ct->alias_index]) {
3589                     goto allocate_in_reg;
3590                 }
3591             } else {
3592                 /* if the input is aliased to an output and if it is
3593                    not dead after the instruction, we must allocate
3594                    a new register and move it */
3595                 if (!IS_DEAD_ARG(i)) {
3596                     goto allocate_in_reg;
3597                 }
3598 
3599                 /* check if the current register has already been allocated
3600                    for another input aliased to an output */
3601                 if (ts->val_type == TEMP_VAL_REG) {
3602                     int k2, i2;
3603                     reg = ts->reg;
3604                     for (k2 = 0 ; k2 < k ; k2++) {
3605                         i2 = def->sorted_args[nb_oargs + k2];
3606                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3607                             reg == new_args[i2]) {
3608                             goto allocate_in_reg;
3609                         }
3610                     }
3611                 }
3612                 i_preferred_regs = o_preferred_regs;
3613             }
3614         }
3615 
3616         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3617         reg = ts->reg;
3618 
3619         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3620             /* nothing to do : the constraint is satisfied */
3621         } else {
3622         allocate_in_reg:
3623             /* allocate a new register matching the constraint
3624                and move the temporary register into it */
3625             temp_load(s, ts, tcg_target_available_regs[ts->type],
3626                       i_allocated_regs, 0);
3627             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3628                                 o_preferred_regs, ts->indirect_base);
3629             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3630                 /*
3631                  * Cross register class move not supported.  Sync the
3632                  * temp back to its slot and load from there.
3633                  */
3634                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3635                 tcg_out_ld(s, ts->type, reg,
3636                            ts->mem_base->reg, ts->mem_offset);
3637             }
3638         }
3639         new_args[i] = reg;
3640         const_args[i] = 0;
3641         tcg_regset_set_reg(i_allocated_regs, reg);
3642     }
3643 
3644     /* mark dead temporaries and free the associated registers */
3645     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3646         if (IS_DEAD_ARG(i)) {
3647             temp_dead(s, arg_temp(op->args[i]));
3648         }
3649     }
3650 
3651     if (def->flags & TCG_OPF_BB_END) {
3652         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3653     } else {
3654         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3655             /* XXX: permit generic clobber register list ? */
3656             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3657                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3658                     tcg_reg_free(s, i, i_allocated_regs);
3659                 }
3660             }
3661         }
3662         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3663             /* sync globals if the op has side effects and might trigger
3664                an exception. */
3665             sync_globals(s, i_allocated_regs);
3666         }
3667 
3668         /* satisfy the output constraints */
3669         for(k = 0; k < nb_oargs; k++) {
3670             i = def->sorted_args[k];
3671             arg = op->args[i];
3672             arg_ct = &def->args_ct[i];
3673             ts = arg_temp(arg);
3674 
3675             /* ENV should not be modified.  */
3676             tcg_debug_assert(!ts->fixed_reg);
3677 
3678             if ((arg_ct->ct & TCG_CT_ALIAS)
3679                 && !const_args[arg_ct->alias_index]) {
3680                 reg = new_args[arg_ct->alias_index];
3681             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3682                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3683                                     i_allocated_regs | o_allocated_regs,
3684                                     op->output_pref[k], ts->indirect_base);
3685             } else {
3686                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3687                                     op->output_pref[k], ts->indirect_base);
3688             }
3689             tcg_regset_set_reg(o_allocated_regs, reg);
3690             if (ts->val_type == TEMP_VAL_REG) {
3691                 s->reg_to_temp[ts->reg] = NULL;
3692             }
3693             ts->val_type = TEMP_VAL_REG;
3694             ts->reg = reg;
3695             /*
3696              * Temp value is modified, so the value kept in memory is
3697              * potentially not the same.
3698              */
3699             ts->mem_coherent = 0;
3700             s->reg_to_temp[reg] = ts;
3701             new_args[i] = reg;
3702         }
3703     }
3704 
3705     /* emit instruction */
3706     if (def->flags & TCG_OPF_VECTOR) {
3707         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3708                        new_args, const_args);
3709     } else {
3710         tcg_out_op(s, op->opc, new_args, const_args);
3711     }
3712 
3713     /* move the outputs in the correct register if needed */
3714     for(i = 0; i < nb_oargs; i++) {
3715         ts = arg_temp(op->args[i]);
3716 
3717         /* ENV should not be modified.  */
3718         tcg_debug_assert(!ts->fixed_reg);
3719 
3720         if (NEED_SYNC_ARG(i)) {
3721             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3722         } else if (IS_DEAD_ARG(i)) {
3723             temp_dead(s, ts);
3724         }
3725     }
3726 }
3727 
3728 #ifdef TCG_TARGET_STACK_GROWSUP
3729 #define STACK_DIR(x) (-(x))
3730 #else
3731 #define STACK_DIR(x) (x)
3732 #endif
3733 
3734 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3735 {
3736     const int nb_oargs = TCGOP_CALLO(op);
3737     const int nb_iargs = TCGOP_CALLI(op);
3738     const TCGLifeData arg_life = op->life;
3739     int flags, nb_regs, i;
3740     TCGReg reg;
3741     TCGArg arg;
3742     TCGTemp *ts;
3743     intptr_t stack_offset;
3744     size_t call_stack_size;
3745     tcg_insn_unit *func_addr;
3746     int allocate_args;
3747     TCGRegSet allocated_regs;
3748 
3749     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3750     flags = op->args[nb_oargs + nb_iargs + 1];
3751 
3752     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3753     if (nb_regs > nb_iargs) {
3754         nb_regs = nb_iargs;
3755     }
3756 
3757     /* assign stack slots first */
3758     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3759     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3760         ~(TCG_TARGET_STACK_ALIGN - 1);
3761     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3762     if (allocate_args) {
3763         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3764            preallocate call stack */
3765         tcg_abort();
3766     }
3767 
3768     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3769     for (i = nb_regs; i < nb_iargs; i++) {
3770         arg = op->args[nb_oargs + i];
3771 #ifdef TCG_TARGET_STACK_GROWSUP
3772         stack_offset -= sizeof(tcg_target_long);
3773 #endif
3774         if (arg != TCG_CALL_DUMMY_ARG) {
3775             ts = arg_temp(arg);
3776             temp_load(s, ts, tcg_target_available_regs[ts->type],
3777                       s->reserved_regs, 0);
3778             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3779         }
3780 #ifndef TCG_TARGET_STACK_GROWSUP
3781         stack_offset += sizeof(tcg_target_long);
3782 #endif
3783     }
3784 
3785     /* assign input registers */
3786     allocated_regs = s->reserved_regs;
3787     for (i = 0; i < nb_regs; i++) {
3788         arg = op->args[nb_oargs + i];
3789         if (arg != TCG_CALL_DUMMY_ARG) {
3790             ts = arg_temp(arg);
3791             reg = tcg_target_call_iarg_regs[i];
3792 
3793             if (ts->val_type == TEMP_VAL_REG) {
3794                 if (ts->reg != reg) {
3795                     tcg_reg_free(s, reg, allocated_regs);
3796                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3797                         /*
3798                          * Cross register class move not supported.  Sync the
3799                          * temp back to its slot and load from there.
3800                          */
3801                         temp_sync(s, ts, allocated_regs, 0, 0);
3802                         tcg_out_ld(s, ts->type, reg,
3803                                    ts->mem_base->reg, ts->mem_offset);
3804                     }
3805                 }
3806             } else {
3807                 TCGRegSet arg_set = 0;
3808 
3809                 tcg_reg_free(s, reg, allocated_regs);
3810                 tcg_regset_set_reg(arg_set, reg);
3811                 temp_load(s, ts, arg_set, allocated_regs, 0);
3812             }
3813 
3814             tcg_regset_set_reg(allocated_regs, reg);
3815         }
3816     }
3817 
3818     /* mark dead temporaries and free the associated registers */
3819     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3820         if (IS_DEAD_ARG(i)) {
3821             temp_dead(s, arg_temp(op->args[i]));
3822         }
3823     }
3824 
3825     /* clobber call registers */
3826     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3827         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3828             tcg_reg_free(s, i, allocated_regs);
3829         }
3830     }
3831 
3832     /* Save globals if they might be written by the helper, sync them if
3833        they might be read. */
3834     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3835         /* Nothing to do */
3836     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3837         sync_globals(s, allocated_regs);
3838     } else {
3839         save_globals(s, allocated_regs);
3840     }
3841 
3842     tcg_out_call(s, func_addr);
3843 
3844     /* assign output registers and emit moves if needed */
3845     for(i = 0; i < nb_oargs; i++) {
3846         arg = op->args[i];
3847         ts = arg_temp(arg);
3848 
3849         /* ENV should not be modified.  */
3850         tcg_debug_assert(!ts->fixed_reg);
3851 
3852         reg = tcg_target_call_oarg_regs[i];
3853         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3854         if (ts->val_type == TEMP_VAL_REG) {
3855             s->reg_to_temp[ts->reg] = NULL;
3856         }
3857         ts->val_type = TEMP_VAL_REG;
3858         ts->reg = reg;
3859         ts->mem_coherent = 0;
3860         s->reg_to_temp[reg] = ts;
3861         if (NEED_SYNC_ARG(i)) {
3862             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3863         } else if (IS_DEAD_ARG(i)) {
3864             temp_dead(s, ts);
3865         }
3866     }
3867 }
3868 
3869 #ifdef CONFIG_PROFILER
3870 
3871 /* avoid copy/paste errors */
3872 #define PROF_ADD(to, from, field)                       \
3873     do {                                                \
3874         (to)->field += atomic_read(&((from)->field));   \
3875     } while (0)
3876 
3877 #define PROF_MAX(to, from, field)                                       \
3878     do {                                                                \
3879         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3880         if (val__ > (to)->field) {                                      \
3881             (to)->field = val__;                                        \
3882         }                                                               \
3883     } while (0)
3884 
3885 /* Pass in a zero'ed @prof */
3886 static inline
3887 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3888 {
3889     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3890     unsigned int i;
3891 
3892     for (i = 0; i < n_ctxs; i++) {
3893         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3894         const TCGProfile *orig = &s->prof;
3895 
3896         if (counters) {
3897             PROF_ADD(prof, orig, cpu_exec_time);
3898             PROF_ADD(prof, orig, tb_count1);
3899             PROF_ADD(prof, orig, tb_count);
3900             PROF_ADD(prof, orig, op_count);
3901             PROF_MAX(prof, orig, op_count_max);
3902             PROF_ADD(prof, orig, temp_count);
3903             PROF_MAX(prof, orig, temp_count_max);
3904             PROF_ADD(prof, orig, del_op_count);
3905             PROF_ADD(prof, orig, code_in_len);
3906             PROF_ADD(prof, orig, code_out_len);
3907             PROF_ADD(prof, orig, search_out_len);
3908             PROF_ADD(prof, orig, interm_time);
3909             PROF_ADD(prof, orig, code_time);
3910             PROF_ADD(prof, orig, la_time);
3911             PROF_ADD(prof, orig, opt_time);
3912             PROF_ADD(prof, orig, restore_count);
3913             PROF_ADD(prof, orig, restore_time);
3914         }
3915         if (table) {
3916             int i;
3917 
3918             for (i = 0; i < NB_OPS; i++) {
3919                 PROF_ADD(prof, orig, table_op_count[i]);
3920             }
3921         }
3922     }
3923 }
3924 
3925 #undef PROF_ADD
3926 #undef PROF_MAX
3927 
3928 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3929 {
3930     tcg_profile_snapshot(prof, true, false);
3931 }
3932 
3933 static void tcg_profile_snapshot_table(TCGProfile *prof)
3934 {
3935     tcg_profile_snapshot(prof, false, true);
3936 }
3937 
3938 void tcg_dump_op_count(void)
3939 {
3940     TCGProfile prof = {};
3941     int i;
3942 
3943     tcg_profile_snapshot_table(&prof);
3944     for (i = 0; i < NB_OPS; i++) {
3945         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3946                     prof.table_op_count[i]);
3947     }
3948 }
3949 
3950 int64_t tcg_cpu_exec_time(void)
3951 {
3952     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3953     unsigned int i;
3954     int64_t ret = 0;
3955 
3956     for (i = 0; i < n_ctxs; i++) {
3957         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3958         const TCGProfile *prof = &s->prof;
3959 
3960         ret += atomic_read(&prof->cpu_exec_time);
3961     }
3962     return ret;
3963 }
3964 #else
3965 void tcg_dump_op_count(void)
3966 {
3967     qemu_printf("[TCG profiler not compiled]\n");
3968 }
3969 
3970 int64_t tcg_cpu_exec_time(void)
3971 {
3972     error_report("%s: TCG profiler not compiled", __func__);
3973     exit(EXIT_FAILURE);
3974 }
3975 #endif
3976 
3977 
3978 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3979 {
3980 #ifdef CONFIG_PROFILER
3981     TCGProfile *prof = &s->prof;
3982 #endif
3983     int i, num_insns;
3984     TCGOp *op;
3985 
3986 #ifdef CONFIG_PROFILER
3987     {
3988         int n = 0;
3989 
3990         QTAILQ_FOREACH(op, &s->ops, link) {
3991             n++;
3992         }
3993         atomic_set(&prof->op_count, prof->op_count + n);
3994         if (n > prof->op_count_max) {
3995             atomic_set(&prof->op_count_max, n);
3996         }
3997 
3998         n = s->nb_temps;
3999         atomic_set(&prof->temp_count, prof->temp_count + n);
4000         if (n > prof->temp_count_max) {
4001             atomic_set(&prof->temp_count_max, n);
4002         }
4003     }
4004 #endif
4005 
4006 #ifdef DEBUG_DISAS
4007     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4008                  && qemu_log_in_addr_range(tb->pc))) {
4009         qemu_log_lock();
4010         qemu_log("OP:\n");
4011         tcg_dump_ops(s, false);
4012         qemu_log("\n");
4013         qemu_log_unlock();
4014     }
4015 #endif
4016 
4017 #ifdef CONFIG_DEBUG_TCG
4018     /* Ensure all labels referenced have been emitted.  */
4019     {
4020         TCGLabel *l;
4021         bool error = false;
4022 
4023         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4024             if (unlikely(!l->present) && l->refs) {
4025                 qemu_log_mask(CPU_LOG_TB_OP,
4026                               "$L%d referenced but not present.\n", l->id);
4027                 error = true;
4028             }
4029         }
4030         assert(!error);
4031     }
4032 #endif
4033 
4034 #ifdef CONFIG_PROFILER
4035     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4036 #endif
4037 
4038 #ifdef USE_TCG_OPTIMIZATIONS
4039     tcg_optimize(s);
4040 #endif
4041 
4042 #ifdef CONFIG_PROFILER
4043     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4044     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
4045 #endif
4046 
4047     reachable_code_pass(s);
4048     liveness_pass_1(s);
4049 
4050     if (s->nb_indirects > 0) {
4051 #ifdef DEBUG_DISAS
4052         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4053                      && qemu_log_in_addr_range(tb->pc))) {
4054             qemu_log_lock();
4055             qemu_log("OP before indirect lowering:\n");
4056             tcg_dump_ops(s, false);
4057             qemu_log("\n");
4058             qemu_log_unlock();
4059         }
4060 #endif
4061         /* Replace indirect temps with direct temps.  */
4062         if (liveness_pass_2(s)) {
4063             /* If changes were made, re-run liveness.  */
4064             liveness_pass_1(s);
4065         }
4066     }
4067 
4068 #ifdef CONFIG_PROFILER
4069     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
4070 #endif
4071 
4072 #ifdef DEBUG_DISAS
4073     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4074                  && qemu_log_in_addr_range(tb->pc))) {
4075         qemu_log_lock();
4076         qemu_log("OP after optimization and liveness analysis:\n");
4077         tcg_dump_ops(s, true);
4078         qemu_log("\n");
4079         qemu_log_unlock();
4080     }
4081 #endif
4082 
4083     tcg_reg_alloc_start(s);
4084 
4085     s->code_buf = tb->tc.ptr;
4086     s->code_ptr = tb->tc.ptr;
4087 
4088 #ifdef TCG_TARGET_NEED_LDST_LABELS
4089     QSIMPLEQ_INIT(&s->ldst_labels);
4090 #endif
4091 #ifdef TCG_TARGET_NEED_POOL_LABELS
4092     s->pool_labels = NULL;
4093 #endif
4094 
4095     num_insns = -1;
4096     QTAILQ_FOREACH(op, &s->ops, link) {
4097         TCGOpcode opc = op->opc;
4098 
4099 #ifdef CONFIG_PROFILER
4100         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4101 #endif
4102 
4103         switch (opc) {
4104         case INDEX_op_mov_i32:
4105         case INDEX_op_mov_i64:
4106         case INDEX_op_mov_vec:
4107             tcg_reg_alloc_mov(s, op);
4108             break;
4109         case INDEX_op_movi_i32:
4110         case INDEX_op_movi_i64:
4111         case INDEX_op_dupi_vec:
4112             tcg_reg_alloc_movi(s, op);
4113             break;
4114         case INDEX_op_dup_vec:
4115             tcg_reg_alloc_dup(s, op);
4116             break;
4117         case INDEX_op_insn_start:
4118             if (num_insns >= 0) {
4119                 size_t off = tcg_current_code_size(s);
4120                 s->gen_insn_end_off[num_insns] = off;
4121                 /* Assert that we do not overflow our stored offset.  */
4122                 assert(s->gen_insn_end_off[num_insns] == off);
4123             }
4124             num_insns++;
4125             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4126                 target_ulong a;
4127 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4128                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4129 #else
4130                 a = op->args[i];
4131 #endif
4132                 s->gen_insn_data[num_insns][i] = a;
4133             }
4134             break;
4135         case INDEX_op_discard:
4136             temp_dead(s, arg_temp(op->args[0]));
4137             break;
4138         case INDEX_op_set_label:
4139             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4140             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4141             break;
4142         case INDEX_op_call:
4143             tcg_reg_alloc_call(s, op);
4144             break;
4145         default:
4146             /* Sanity check that we've not introduced any unhandled opcodes. */
4147             tcg_debug_assert(tcg_op_supported(opc));
4148             /* Note: in order to speed up the code, it would be much
4149                faster to have specialized register allocator functions for
4150                some common argument patterns */
4151             tcg_reg_alloc_op(s, op);
4152             break;
4153         }
4154 #ifdef CONFIG_DEBUG_TCG
4155         check_regs(s);
4156 #endif
4157         /* Test for (pending) buffer overflow.  The assumption is that any
4158            one operation beginning below the high water mark cannot overrun
4159            the buffer completely.  Thus we can test for overflow after
4160            generating code without having to check during generation.  */
4161         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4162             return -1;
4163         }
4164         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4165         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4166             return -2;
4167         }
4168     }
4169     tcg_debug_assert(num_insns >= 0);
4170     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4171 
4172     /* Generate TB finalization at the end of block */
4173 #ifdef TCG_TARGET_NEED_LDST_LABELS
4174     i = tcg_out_ldst_finalize(s);
4175     if (i < 0) {
4176         return i;
4177     }
4178 #endif
4179 #ifdef TCG_TARGET_NEED_POOL_LABELS
4180     i = tcg_out_pool_finalize(s);
4181     if (i < 0) {
4182         return i;
4183     }
4184 #endif
4185     if (!tcg_resolve_relocs(s)) {
4186         return -2;
4187     }
4188 
4189     /* flush instruction cache */
4190     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4191 
4192     return tcg_current_code_size(s);
4193 }
4194 
4195 #ifdef CONFIG_PROFILER
4196 void tcg_dump_info(void)
4197 {
4198     TCGProfile prof = {};
4199     const TCGProfile *s;
4200     int64_t tb_count;
4201     int64_t tb_div_count;
4202     int64_t tot;
4203 
4204     tcg_profile_snapshot_counters(&prof);
4205     s = &prof;
4206     tb_count = s->tb_count;
4207     tb_div_count = tb_count ? tb_count : 1;
4208     tot = s->interm_time + s->code_time;
4209 
4210     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4211                 tot, tot / 2.4e9);
4212     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4213                 " %0.1f%%)\n",
4214                 tb_count, s->tb_count1 - tb_count,
4215                 (double)(s->tb_count1 - s->tb_count)
4216                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4217     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4218                 (double)s->op_count / tb_div_count, s->op_count_max);
4219     qemu_printf("deleted ops/TB      %0.2f\n",
4220                 (double)s->del_op_count / tb_div_count);
4221     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4222                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4223     qemu_printf("avg host code/TB    %0.1f\n",
4224                 (double)s->code_out_len / tb_div_count);
4225     qemu_printf("avg search data/TB  %0.1f\n",
4226                 (double)s->search_out_len / tb_div_count);
4227 
4228     qemu_printf("cycles/op           %0.1f\n",
4229                 s->op_count ? (double)tot / s->op_count : 0);
4230     qemu_printf("cycles/in byte      %0.1f\n",
4231                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4232     qemu_printf("cycles/out byte     %0.1f\n",
4233                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4234     qemu_printf("cycles/search byte     %0.1f\n",
4235                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4236     if (tot == 0) {
4237         tot = 1;
4238     }
4239     qemu_printf("  gen_interm time   %0.1f%%\n",
4240                 (double)s->interm_time / tot * 100.0);
4241     qemu_printf("  gen_code time     %0.1f%%\n",
4242                 (double)s->code_time / tot * 100.0);
4243     qemu_printf("optim./code time    %0.1f%%\n",
4244                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4245                 * 100.0);
4246     qemu_printf("liveness/code time  %0.1f%%\n",
4247                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4248     qemu_printf("cpu_restore count   %" PRId64 "\n",
4249                 s->restore_count);
4250     qemu_printf("  avg cycles        %0.1f\n",
4251                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4252 }
4253 #else
4254 void tcg_dump_info(void)
4255 {
4256     qemu_printf("[TCG profiler not compiled]\n");
4257 }
4258 #endif
4259 
4260 #ifdef ELF_HOST_MACHINE
4261 /* In order to use this feature, the backend needs to do three things:
4262 
4263    (1) Define ELF_HOST_MACHINE to indicate both what value to
4264        put into the ELF image and to indicate support for the feature.
4265 
4266    (2) Define tcg_register_jit.  This should create a buffer containing
4267        the contents of a .debug_frame section that describes the post-
4268        prologue unwind info for the tcg machine.
4269 
4270    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4271 */
4272 
4273 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4274 typedef enum {
4275     JIT_NOACTION = 0,
4276     JIT_REGISTER_FN,
4277     JIT_UNREGISTER_FN
4278 } jit_actions_t;
4279 
4280 struct jit_code_entry {
4281     struct jit_code_entry *next_entry;
4282     struct jit_code_entry *prev_entry;
4283     const void *symfile_addr;
4284     uint64_t symfile_size;
4285 };
4286 
4287 struct jit_descriptor {
4288     uint32_t version;
4289     uint32_t action_flag;
4290     struct jit_code_entry *relevant_entry;
4291     struct jit_code_entry *first_entry;
4292 };
4293 
4294 void __jit_debug_register_code(void) __attribute__((noinline));
4295 void __jit_debug_register_code(void)
4296 {
4297     asm("");
4298 }
4299 
4300 /* Must statically initialize the version, because GDB may check
4301    the version before we can set it.  */
4302 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4303 
4304 /* End GDB interface.  */
4305 
4306 static int find_string(const char *strtab, const char *str)
4307 {
4308     const char *p = strtab + 1;
4309 
4310     while (1) {
4311         if (strcmp(p, str) == 0) {
4312             return p - strtab;
4313         }
4314         p += strlen(p) + 1;
4315     }
4316 }
4317 
4318 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4319                                  const void *debug_frame,
4320                                  size_t debug_frame_size)
4321 {
4322     struct __attribute__((packed)) DebugInfo {
4323         uint32_t  len;
4324         uint16_t  version;
4325         uint32_t  abbrev;
4326         uint8_t   ptr_size;
4327         uint8_t   cu_die;
4328         uint16_t  cu_lang;
4329         uintptr_t cu_low_pc;
4330         uintptr_t cu_high_pc;
4331         uint8_t   fn_die;
4332         char      fn_name[16];
4333         uintptr_t fn_low_pc;
4334         uintptr_t fn_high_pc;
4335         uint8_t   cu_eoc;
4336     };
4337 
4338     struct ElfImage {
4339         ElfW(Ehdr) ehdr;
4340         ElfW(Phdr) phdr;
4341         ElfW(Shdr) shdr[7];
4342         ElfW(Sym)  sym[2];
4343         struct DebugInfo di;
4344         uint8_t    da[24];
4345         char       str[80];
4346     };
4347 
4348     struct ElfImage *img;
4349 
4350     static const struct ElfImage img_template = {
4351         .ehdr = {
4352             .e_ident[EI_MAG0] = ELFMAG0,
4353             .e_ident[EI_MAG1] = ELFMAG1,
4354             .e_ident[EI_MAG2] = ELFMAG2,
4355             .e_ident[EI_MAG3] = ELFMAG3,
4356             .e_ident[EI_CLASS] = ELF_CLASS,
4357             .e_ident[EI_DATA] = ELF_DATA,
4358             .e_ident[EI_VERSION] = EV_CURRENT,
4359             .e_type = ET_EXEC,
4360             .e_machine = ELF_HOST_MACHINE,
4361             .e_version = EV_CURRENT,
4362             .e_phoff = offsetof(struct ElfImage, phdr),
4363             .e_shoff = offsetof(struct ElfImage, shdr),
4364             .e_ehsize = sizeof(ElfW(Shdr)),
4365             .e_phentsize = sizeof(ElfW(Phdr)),
4366             .e_phnum = 1,
4367             .e_shentsize = sizeof(ElfW(Shdr)),
4368             .e_shnum = ARRAY_SIZE(img->shdr),
4369             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4370 #ifdef ELF_HOST_FLAGS
4371             .e_flags = ELF_HOST_FLAGS,
4372 #endif
4373 #ifdef ELF_OSABI
4374             .e_ident[EI_OSABI] = ELF_OSABI,
4375 #endif
4376         },
4377         .phdr = {
4378             .p_type = PT_LOAD,
4379             .p_flags = PF_X,
4380         },
4381         .shdr = {
4382             [0] = { .sh_type = SHT_NULL },
4383             /* Trick: The contents of code_gen_buffer are not present in
4384                this fake ELF file; that got allocated elsewhere.  Therefore
4385                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4386                will not look for contents.  We can record any address.  */
4387             [1] = { /* .text */
4388                 .sh_type = SHT_NOBITS,
4389                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4390             },
4391             [2] = { /* .debug_info */
4392                 .sh_type = SHT_PROGBITS,
4393                 .sh_offset = offsetof(struct ElfImage, di),
4394                 .sh_size = sizeof(struct DebugInfo),
4395             },
4396             [3] = { /* .debug_abbrev */
4397                 .sh_type = SHT_PROGBITS,
4398                 .sh_offset = offsetof(struct ElfImage, da),
4399                 .sh_size = sizeof(img->da),
4400             },
4401             [4] = { /* .debug_frame */
4402                 .sh_type = SHT_PROGBITS,
4403                 .sh_offset = sizeof(struct ElfImage),
4404             },
4405             [5] = { /* .symtab */
4406                 .sh_type = SHT_SYMTAB,
4407                 .sh_offset = offsetof(struct ElfImage, sym),
4408                 .sh_size = sizeof(img->sym),
4409                 .sh_info = 1,
4410                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4411                 .sh_entsize = sizeof(ElfW(Sym)),
4412             },
4413             [6] = { /* .strtab */
4414                 .sh_type = SHT_STRTAB,
4415                 .sh_offset = offsetof(struct ElfImage, str),
4416                 .sh_size = sizeof(img->str),
4417             }
4418         },
4419         .sym = {
4420             [1] = { /* code_gen_buffer */
4421                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4422                 .st_shndx = 1,
4423             }
4424         },
4425         .di = {
4426             .len = sizeof(struct DebugInfo) - 4,
4427             .version = 2,
4428             .ptr_size = sizeof(void *),
4429             .cu_die = 1,
4430             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4431             .fn_die = 2,
4432             .fn_name = "code_gen_buffer"
4433         },
4434         .da = {
4435             1,          /* abbrev number (the cu) */
4436             0x11, 1,    /* DW_TAG_compile_unit, has children */
4437             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4438             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4439             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4440             0, 0,       /* end of abbrev */
4441             2,          /* abbrev number (the fn) */
4442             0x2e, 0,    /* DW_TAG_subprogram, no children */
4443             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4444             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4445             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4446             0, 0,       /* end of abbrev */
4447             0           /* no more abbrev */
4448         },
4449         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4450                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4451     };
4452 
4453     /* We only need a single jit entry; statically allocate it.  */
4454     static struct jit_code_entry one_entry;
4455 
4456     uintptr_t buf = (uintptr_t)buf_ptr;
4457     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4458     DebugFrameHeader *dfh;
4459 
4460     img = g_malloc(img_size);
4461     *img = img_template;
4462 
4463     img->phdr.p_vaddr = buf;
4464     img->phdr.p_paddr = buf;
4465     img->phdr.p_memsz = buf_size;
4466 
4467     img->shdr[1].sh_name = find_string(img->str, ".text");
4468     img->shdr[1].sh_addr = buf;
4469     img->shdr[1].sh_size = buf_size;
4470 
4471     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4472     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4473 
4474     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4475     img->shdr[4].sh_size = debug_frame_size;
4476 
4477     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4478     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4479 
4480     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4481     img->sym[1].st_value = buf;
4482     img->sym[1].st_size = buf_size;
4483 
4484     img->di.cu_low_pc = buf;
4485     img->di.cu_high_pc = buf + buf_size;
4486     img->di.fn_low_pc = buf;
4487     img->di.fn_high_pc = buf + buf_size;
4488 
4489     dfh = (DebugFrameHeader *)(img + 1);
4490     memcpy(dfh, debug_frame, debug_frame_size);
4491     dfh->fde.func_start = buf;
4492     dfh->fde.func_len = buf_size;
4493 
4494 #ifdef DEBUG_JIT
4495     /* Enable this block to be able to debug the ELF image file creation.
4496        One can use readelf, objdump, or other inspection utilities.  */
4497     {
4498         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4499         if (f) {
4500             if (fwrite(img, img_size, 1, f) != img_size) {
4501                 /* Avoid stupid unused return value warning for fwrite.  */
4502             }
4503             fclose(f);
4504         }
4505     }
4506 #endif
4507 
4508     one_entry.symfile_addr = img;
4509     one_entry.symfile_size = img_size;
4510 
4511     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4512     __jit_debug_descriptor.relevant_entry = &one_entry;
4513     __jit_debug_descriptor.first_entry = &one_entry;
4514     __jit_debug_register_code();
4515 }
4516 #else
4517 /* No support for the feature.  Provide the entry point expected by exec.c,
4518    and implement the internal function we declared earlier.  */
4519 
4520 static void tcg_register_jit_int(void *buf, size_t size,
4521                                  const void *debug_frame,
4522                                  size_t debug_frame_size)
4523 {
4524 }
4525 
4526 void tcg_register_jit(void *buf, size_t buf_size)
4527 {
4528 }
4529 #endif /* ELF_HOST_MACHINE */
4530 
4531 #if !TCG_TARGET_MAYBE_vec
4532 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4533 {
4534     g_assert_not_reached();
4535 }
4536 #endif
4537