xref: /openbmc/qemu/tcg/tcg.c (revision 200dbf37)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/cpu-common.h"
46 #include "exec/exec-all.h"
47 
48 #include "tcg-op.h"
49 
50 #if UINTPTR_MAX == UINT32_MAX
51 # define ELF_CLASS  ELFCLASS32
52 #else
53 # define ELF_CLASS  ELFCLASS64
54 #endif
55 #ifdef HOST_WORDS_BIGENDIAN
56 # define ELF_DATA   ELFDATA2MSB
57 #else
58 # define ELF_DATA   ELFDATA2LSB
59 #endif
60 
61 #include "elf.h"
62 #include "exec/log.h"
63 #include "sysemu/sysemu.h"
64 
65 /* Forward declarations for functions declared in tcg-target.inc.c and
66    used here. */
67 static void tcg_target_init(TCGContext *s);
68 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
69 static void tcg_target_qemu_prologue(TCGContext *s);
70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
71                         intptr_t value, intptr_t addend);
72 
73 /* The CIE and FDE header definitions will be common to all hosts.  */
74 typedef struct {
75     uint32_t len __attribute__((aligned((sizeof(void *)))));
76     uint32_t id;
77     uint8_t version;
78     char augmentation[1];
79     uint8_t code_align;
80     uint8_t data_align;
81     uint8_t return_column;
82 } DebugFrameCIE;
83 
84 typedef struct QEMU_PACKED {
85     uint32_t len __attribute__((aligned((sizeof(void *)))));
86     uint32_t cie_offset;
87     uintptr_t func_start;
88     uintptr_t func_len;
89 } DebugFrameFDEHeader;
90 
91 typedef struct QEMU_PACKED {
92     DebugFrameCIE cie;
93     DebugFrameFDEHeader fde;
94 } DebugFrameHeader;
95 
96 static void tcg_register_jit_int(void *buf, size_t size,
97                                  const void *debug_frame,
98                                  size_t debug_frame_size)
99     __attribute__((unused));
100 
101 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
102 static const char *target_parse_constraint(TCGArgConstraint *ct,
103                                            const char *ct_str, TCGType type);
104 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
105                        intptr_t arg2);
106 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
107 static void tcg_out_movi(TCGContext *s, TCGType type,
108                          TCGReg ret, tcg_target_long arg);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
110                        const int *const_args);
111 #if TCG_TARGET_MAYBE_vec
112 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
113                             TCGReg dst, TCGReg src);
114 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
115                              TCGReg dst, TCGReg base, intptr_t offset);
116 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
117                              TCGReg dst, tcg_target_long arg);
118 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
119                            unsigned vece, const TCGArg *args,
120                            const int *const_args);
121 #else
122 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
123                                    TCGReg dst, TCGReg src)
124 {
125     g_assert_not_reached();
126 }
127 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
128                                     TCGReg dst, TCGReg base, intptr_t offset)
129 {
130     g_assert_not_reached();
131 }
132 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
133                                     TCGReg dst, tcg_target_long arg)
134 {
135     g_assert_not_reached();
136 }
137 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
138                                   unsigned vece, const TCGArg *args,
139                                   const int *const_args)
140 {
141     g_assert_not_reached();
142 }
143 #endif
144 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
145                        intptr_t arg2);
146 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
147                         TCGReg base, intptr_t ofs);
148 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
149 static int tcg_target_const_match(tcg_target_long val, TCGType type,
150                                   const TCGArgConstraint *arg_ct);
151 #ifdef TCG_TARGET_NEED_LDST_LABELS
152 static int tcg_out_ldst_finalize(TCGContext *s);
153 #endif
154 
155 #define TCG_HIGHWATER 1024
156 
157 static TCGContext **tcg_ctxs;
158 static unsigned int n_tcg_ctxs;
159 TCGv_env cpu_env = 0;
160 
161 struct tcg_region_tree {
162     QemuMutex lock;
163     GTree *tree;
164     /* padding to avoid false sharing is computed at run-time */
165 };
166 
167 /*
168  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
169  * dynamically allocate from as demand dictates. Given appropriate region
170  * sizing, this minimizes flushes even when some TCG threads generate a lot
171  * more code than others.
172  */
173 struct tcg_region_state {
174     QemuMutex lock;
175 
176     /* fields set at init time */
177     void *start;
178     void *start_aligned;
179     void *end;
180     size_t n;
181     size_t size; /* size of one region */
182     size_t stride; /* .size + guard size */
183 
184     /* fields protected by the lock */
185     size_t current; /* current region index */
186     size_t agg_size_full; /* aggregate size of full regions */
187 };
188 
189 static struct tcg_region_state region;
190 /*
191  * This is an array of struct tcg_region_tree's, with padding.
192  * We use void * to simplify the computation of region_trees[i]; each
193  * struct is found every tree_size bytes.
194  */
195 static void *region_trees;
196 static size_t tree_size;
197 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
198 static TCGRegSet tcg_target_call_clobber_regs;
199 
200 #if TCG_TARGET_INSN_UNIT_SIZE == 1
201 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
202 {
203     *s->code_ptr++ = v;
204 }
205 
206 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
207                                                       uint8_t v)
208 {
209     *p = v;
210 }
211 #endif
212 
213 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
214 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
215 {
216     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
217         *s->code_ptr++ = v;
218     } else {
219         tcg_insn_unit *p = s->code_ptr;
220         memcpy(p, &v, sizeof(v));
221         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
222     }
223 }
224 
225 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
226                                                        uint16_t v)
227 {
228     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
229         *p = v;
230     } else {
231         memcpy(p, &v, sizeof(v));
232     }
233 }
234 #endif
235 
236 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
237 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
238 {
239     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
240         *s->code_ptr++ = v;
241     } else {
242         tcg_insn_unit *p = s->code_ptr;
243         memcpy(p, &v, sizeof(v));
244         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
245     }
246 }
247 
248 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
249                                                        uint32_t v)
250 {
251     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
252         *p = v;
253     } else {
254         memcpy(p, &v, sizeof(v));
255     }
256 }
257 #endif
258 
259 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
260 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
261 {
262     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
263         *s->code_ptr++ = v;
264     } else {
265         tcg_insn_unit *p = s->code_ptr;
266         memcpy(p, &v, sizeof(v));
267         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
268     }
269 }
270 
271 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
272                                                        uint64_t v)
273 {
274     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
275         *p = v;
276     } else {
277         memcpy(p, &v, sizeof(v));
278     }
279 }
280 #endif
281 
282 /* label relocation processing */
283 
284 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
285                           TCGLabel *l, intptr_t addend)
286 {
287     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
288 
289     r->type = type;
290     r->ptr = code_ptr;
291     r->addend = addend;
292     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
293 }
294 
295 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
296 {
297     tcg_debug_assert(!l->has_value);
298     l->has_value = 1;
299     l->u.value_ptr = ptr;
300 }
301 
302 TCGLabel *gen_new_label(void)
303 {
304     TCGContext *s = tcg_ctx;
305     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
306 
307     memset(l, 0, sizeof(TCGLabel));
308     l->id = s->nb_labels++;
309     QSIMPLEQ_INIT(&l->relocs);
310 
311     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
312 
313     return l;
314 }
315 
316 static bool tcg_resolve_relocs(TCGContext *s)
317 {
318     TCGLabel *l;
319 
320     QSIMPLEQ_FOREACH(l, &s->labels, next) {
321         TCGRelocation *r;
322         uintptr_t value = l->u.value;
323 
324         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
325             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
326                 return false;
327             }
328         }
329     }
330     return true;
331 }
332 
333 static void set_jmp_reset_offset(TCGContext *s, int which)
334 {
335     size_t off = tcg_current_code_size(s);
336     s->tb_jmp_reset_offset[which] = off;
337     /* Make sure that we didn't overflow the stored offset.  */
338     assert(s->tb_jmp_reset_offset[which] == off);
339 }
340 
341 #include "tcg-target.inc.c"
342 
343 /* compare a pointer @ptr and a tb_tc @s */
344 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
345 {
346     if (ptr >= s->ptr + s->size) {
347         return 1;
348     } else if (ptr < s->ptr) {
349         return -1;
350     }
351     return 0;
352 }
353 
354 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
355 {
356     const struct tb_tc *a = ap;
357     const struct tb_tc *b = bp;
358 
359     /*
360      * When both sizes are set, we know this isn't a lookup.
361      * This is the most likely case: every TB must be inserted; lookups
362      * are a lot less frequent.
363      */
364     if (likely(a->size && b->size)) {
365         if (a->ptr > b->ptr) {
366             return 1;
367         } else if (a->ptr < b->ptr) {
368             return -1;
369         }
370         /* a->ptr == b->ptr should happen only on deletions */
371         g_assert(a->size == b->size);
372         return 0;
373     }
374     /*
375      * All lookups have either .size field set to 0.
376      * From the glib sources we see that @ap is always the lookup key. However
377      * the docs provide no guarantee, so we just mark this case as likely.
378      */
379     if (likely(a->size == 0)) {
380         return ptr_cmp_tb_tc(a->ptr, b);
381     }
382     return ptr_cmp_tb_tc(b->ptr, a);
383 }
384 
385 static void tcg_region_trees_init(void)
386 {
387     size_t i;
388 
389     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
390     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
391     for (i = 0; i < region.n; i++) {
392         struct tcg_region_tree *rt = region_trees + i * tree_size;
393 
394         qemu_mutex_init(&rt->lock);
395         rt->tree = g_tree_new(tb_tc_cmp);
396     }
397 }
398 
399 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
400 {
401     size_t region_idx;
402 
403     if (p < region.start_aligned) {
404         region_idx = 0;
405     } else {
406         ptrdiff_t offset = p - region.start_aligned;
407 
408         if (offset > region.stride * (region.n - 1)) {
409             region_idx = region.n - 1;
410         } else {
411             region_idx = offset / region.stride;
412         }
413     }
414     return region_trees + region_idx * tree_size;
415 }
416 
417 void tcg_tb_insert(TranslationBlock *tb)
418 {
419     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
420 
421     qemu_mutex_lock(&rt->lock);
422     g_tree_insert(rt->tree, &tb->tc, tb);
423     qemu_mutex_unlock(&rt->lock);
424 }
425 
426 void tcg_tb_remove(TranslationBlock *tb)
427 {
428     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
429 
430     qemu_mutex_lock(&rt->lock);
431     g_tree_remove(rt->tree, &tb->tc);
432     qemu_mutex_unlock(&rt->lock);
433 }
434 
435 /*
436  * Find the TB 'tb' such that
437  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
438  * Return NULL if not found.
439  */
440 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
441 {
442     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
443     TranslationBlock *tb;
444     struct tb_tc s = { .ptr = (void *)tc_ptr };
445 
446     qemu_mutex_lock(&rt->lock);
447     tb = g_tree_lookup(rt->tree, &s);
448     qemu_mutex_unlock(&rt->lock);
449     return tb;
450 }
451 
452 static void tcg_region_tree_lock_all(void)
453 {
454     size_t i;
455 
456     for (i = 0; i < region.n; i++) {
457         struct tcg_region_tree *rt = region_trees + i * tree_size;
458 
459         qemu_mutex_lock(&rt->lock);
460     }
461 }
462 
463 static void tcg_region_tree_unlock_all(void)
464 {
465     size_t i;
466 
467     for (i = 0; i < region.n; i++) {
468         struct tcg_region_tree *rt = region_trees + i * tree_size;
469 
470         qemu_mutex_unlock(&rt->lock);
471     }
472 }
473 
474 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
475 {
476     size_t i;
477 
478     tcg_region_tree_lock_all();
479     for (i = 0; i < region.n; i++) {
480         struct tcg_region_tree *rt = region_trees + i * tree_size;
481 
482         g_tree_foreach(rt->tree, func, user_data);
483     }
484     tcg_region_tree_unlock_all();
485 }
486 
487 size_t tcg_nb_tbs(void)
488 {
489     size_t nb_tbs = 0;
490     size_t i;
491 
492     tcg_region_tree_lock_all();
493     for (i = 0; i < region.n; i++) {
494         struct tcg_region_tree *rt = region_trees + i * tree_size;
495 
496         nb_tbs += g_tree_nnodes(rt->tree);
497     }
498     tcg_region_tree_unlock_all();
499     return nb_tbs;
500 }
501 
502 static void tcg_region_tree_reset_all(void)
503 {
504     size_t i;
505 
506     tcg_region_tree_lock_all();
507     for (i = 0; i < region.n; i++) {
508         struct tcg_region_tree *rt = region_trees + i * tree_size;
509 
510         /* Increment the refcount first so that destroy acts as a reset */
511         g_tree_ref(rt->tree);
512         g_tree_destroy(rt->tree);
513     }
514     tcg_region_tree_unlock_all();
515 }
516 
517 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
518 {
519     void *start, *end;
520 
521     start = region.start_aligned + curr_region * region.stride;
522     end = start + region.size;
523 
524     if (curr_region == 0) {
525         start = region.start;
526     }
527     if (curr_region == region.n - 1) {
528         end = region.end;
529     }
530 
531     *pstart = start;
532     *pend = end;
533 }
534 
535 static void tcg_region_assign(TCGContext *s, size_t curr_region)
536 {
537     void *start, *end;
538 
539     tcg_region_bounds(curr_region, &start, &end);
540 
541     s->code_gen_buffer = start;
542     s->code_gen_ptr = start;
543     s->code_gen_buffer_size = end - start;
544     s->code_gen_highwater = end - TCG_HIGHWATER;
545 }
546 
547 static bool tcg_region_alloc__locked(TCGContext *s)
548 {
549     if (region.current == region.n) {
550         return true;
551     }
552     tcg_region_assign(s, region.current);
553     region.current++;
554     return false;
555 }
556 
557 /*
558  * Request a new region once the one in use has filled up.
559  * Returns true on error.
560  */
561 static bool tcg_region_alloc(TCGContext *s)
562 {
563     bool err;
564     /* read the region size now; alloc__locked will overwrite it on success */
565     size_t size_full = s->code_gen_buffer_size;
566 
567     qemu_mutex_lock(&region.lock);
568     err = tcg_region_alloc__locked(s);
569     if (!err) {
570         region.agg_size_full += size_full - TCG_HIGHWATER;
571     }
572     qemu_mutex_unlock(&region.lock);
573     return err;
574 }
575 
576 /*
577  * Perform a context's first region allocation.
578  * This function does _not_ increment region.agg_size_full.
579  */
580 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
581 {
582     return tcg_region_alloc__locked(s);
583 }
584 
585 /* Call from a safe-work context */
586 void tcg_region_reset_all(void)
587 {
588     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
589     unsigned int i;
590 
591     qemu_mutex_lock(&region.lock);
592     region.current = 0;
593     region.agg_size_full = 0;
594 
595     for (i = 0; i < n_ctxs; i++) {
596         TCGContext *s = atomic_read(&tcg_ctxs[i]);
597         bool err = tcg_region_initial_alloc__locked(s);
598 
599         g_assert(!err);
600     }
601     qemu_mutex_unlock(&region.lock);
602 
603     tcg_region_tree_reset_all();
604 }
605 
606 #ifdef CONFIG_USER_ONLY
607 static size_t tcg_n_regions(void)
608 {
609     return 1;
610 }
611 #else
612 /*
613  * It is likely that some vCPUs will translate more code than others, so we
614  * first try to set more regions than max_cpus, with those regions being of
615  * reasonable size. If that's not possible we make do by evenly dividing
616  * the code_gen_buffer among the vCPUs.
617  */
618 static size_t tcg_n_regions(void)
619 {
620     size_t i;
621 
622     /* Use a single region if all we have is one vCPU thread */
623     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
624         return 1;
625     }
626 
627     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
628     for (i = 8; i > 0; i--) {
629         size_t regions_per_thread = i;
630         size_t region_size;
631 
632         region_size = tcg_init_ctx.code_gen_buffer_size;
633         region_size /= max_cpus * regions_per_thread;
634 
635         if (region_size >= 2 * 1024u * 1024) {
636             return max_cpus * regions_per_thread;
637         }
638     }
639     /* If we can't, then just allocate one region per vCPU thread */
640     return max_cpus;
641 }
642 #endif
643 
644 /*
645  * Initializes region partitioning.
646  *
647  * Called at init time from the parent thread (i.e. the one calling
648  * tcg_context_init), after the target's TCG globals have been set.
649  *
650  * Region partitioning works by splitting code_gen_buffer into separate regions,
651  * and then assigning regions to TCG threads so that the threads can translate
652  * code in parallel without synchronization.
653  *
654  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
655  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
656  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
657  * must have been parsed before calling this function, since it calls
658  * qemu_tcg_mttcg_enabled().
659  *
660  * In user-mode we use a single region.  Having multiple regions in user-mode
661  * is not supported, because the number of vCPU threads (recall that each thread
662  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
663  * OS, and usually this number is huge (tens of thousands is not uncommon).
664  * Thus, given this large bound on the number of vCPU threads and the fact
665  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
666  * that the availability of at least one region per vCPU thread.
667  *
668  * However, this user-mode limitation is unlikely to be a significant problem
669  * in practice. Multi-threaded guests share most if not all of their translated
670  * code, which makes parallel code generation less appealing than in softmmu.
671  */
672 void tcg_region_init(void)
673 {
674     void *buf = tcg_init_ctx.code_gen_buffer;
675     void *aligned;
676     size_t size = tcg_init_ctx.code_gen_buffer_size;
677     size_t page_size = qemu_real_host_page_size;
678     size_t region_size;
679     size_t n_regions;
680     size_t i;
681 
682     n_regions = tcg_n_regions();
683 
684     /* The first region will be 'aligned - buf' bytes larger than the others */
685     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
686     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
687     /*
688      * Make region_size a multiple of page_size, using aligned as the start.
689      * As a result of this we might end up with a few extra pages at the end of
690      * the buffer; we will assign those to the last region.
691      */
692     region_size = (size - (aligned - buf)) / n_regions;
693     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
694 
695     /* A region must have at least 2 pages; one code, one guard */
696     g_assert(region_size >= 2 * page_size);
697 
698     /* init the region struct */
699     qemu_mutex_init(&region.lock);
700     region.n = n_regions;
701     region.size = region_size - page_size;
702     region.stride = region_size;
703     region.start = buf;
704     region.start_aligned = aligned;
705     /* page-align the end, since its last page will be a guard page */
706     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
707     /* account for that last guard page */
708     region.end -= page_size;
709 
710     /* set guard pages */
711     for (i = 0; i < region.n; i++) {
712         void *start, *end;
713         int rc;
714 
715         tcg_region_bounds(i, &start, &end);
716         rc = qemu_mprotect_none(end, page_size);
717         g_assert(!rc);
718     }
719 
720     tcg_region_trees_init();
721 
722     /* In user-mode we support only one ctx, so do the initial allocation now */
723 #ifdef CONFIG_USER_ONLY
724     {
725         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
726 
727         g_assert(!err);
728     }
729 #endif
730 }
731 
732 /*
733  * All TCG threads except the parent (i.e. the one that called tcg_context_init
734  * and registered the target's TCG globals) must register with this function
735  * before initiating translation.
736  *
737  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
738  * of tcg_region_init() for the reasoning behind this.
739  *
740  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
741  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
742  * is not used anymore for translation once this function is called.
743  *
744  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
745  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
746  */
747 #ifdef CONFIG_USER_ONLY
748 void tcg_register_thread(void)
749 {
750     tcg_ctx = &tcg_init_ctx;
751 }
752 #else
753 void tcg_register_thread(void)
754 {
755     TCGContext *s = g_malloc(sizeof(*s));
756     unsigned int i, n;
757     bool err;
758 
759     *s = tcg_init_ctx;
760 
761     /* Relink mem_base.  */
762     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
763         if (tcg_init_ctx.temps[i].mem_base) {
764             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
765             tcg_debug_assert(b >= 0 && b < n);
766             s->temps[i].mem_base = &s->temps[b];
767         }
768     }
769 
770     /* Claim an entry in tcg_ctxs */
771     n = atomic_fetch_inc(&n_tcg_ctxs);
772     g_assert(n < max_cpus);
773     atomic_set(&tcg_ctxs[n], s);
774 
775     tcg_ctx = s;
776     qemu_mutex_lock(&region.lock);
777     err = tcg_region_initial_alloc__locked(tcg_ctx);
778     g_assert(!err);
779     qemu_mutex_unlock(&region.lock);
780 }
781 #endif /* !CONFIG_USER_ONLY */
782 
783 /*
784  * Returns the size (in bytes) of all translated code (i.e. from all regions)
785  * currently in the cache.
786  * See also: tcg_code_capacity()
787  * Do not confuse with tcg_current_code_size(); that one applies to a single
788  * TCG context.
789  */
790 size_t tcg_code_size(void)
791 {
792     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
793     unsigned int i;
794     size_t total;
795 
796     qemu_mutex_lock(&region.lock);
797     total = region.agg_size_full;
798     for (i = 0; i < n_ctxs; i++) {
799         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
800         size_t size;
801 
802         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
803         g_assert(size <= s->code_gen_buffer_size);
804         total += size;
805     }
806     qemu_mutex_unlock(&region.lock);
807     return total;
808 }
809 
810 /*
811  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
812  * regions.
813  * See also: tcg_code_size()
814  */
815 size_t tcg_code_capacity(void)
816 {
817     size_t guard_size, capacity;
818 
819     /* no need for synchronization; these variables are set at init time */
820     guard_size = region.stride - region.size;
821     capacity = region.end + guard_size - region.start;
822     capacity -= region.n * (guard_size + TCG_HIGHWATER);
823     return capacity;
824 }
825 
826 size_t tcg_tb_phys_invalidate_count(void)
827 {
828     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
829     unsigned int i;
830     size_t total = 0;
831 
832     for (i = 0; i < n_ctxs; i++) {
833         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
834 
835         total += atomic_read(&s->tb_phys_invalidate_count);
836     }
837     return total;
838 }
839 
840 /* pool based memory allocation */
841 void *tcg_malloc_internal(TCGContext *s, int size)
842 {
843     TCGPool *p;
844     int pool_size;
845 
846     if (size > TCG_POOL_CHUNK_SIZE) {
847         /* big malloc: insert a new pool (XXX: could optimize) */
848         p = g_malloc(sizeof(TCGPool) + size);
849         p->size = size;
850         p->next = s->pool_first_large;
851         s->pool_first_large = p;
852         return p->data;
853     } else {
854         p = s->pool_current;
855         if (!p) {
856             p = s->pool_first;
857             if (!p)
858                 goto new_pool;
859         } else {
860             if (!p->next) {
861             new_pool:
862                 pool_size = TCG_POOL_CHUNK_SIZE;
863                 p = g_malloc(sizeof(TCGPool) + pool_size);
864                 p->size = pool_size;
865                 p->next = NULL;
866                 if (s->pool_current)
867                     s->pool_current->next = p;
868                 else
869                     s->pool_first = p;
870             } else {
871                 p = p->next;
872             }
873         }
874     }
875     s->pool_current = p;
876     s->pool_cur = p->data + size;
877     s->pool_end = p->data + p->size;
878     return p->data;
879 }
880 
881 void tcg_pool_reset(TCGContext *s)
882 {
883     TCGPool *p, *t;
884     for (p = s->pool_first_large; p; p = t) {
885         t = p->next;
886         g_free(p);
887     }
888     s->pool_first_large = NULL;
889     s->pool_cur = s->pool_end = NULL;
890     s->pool_current = NULL;
891 }
892 
893 typedef struct TCGHelperInfo {
894     void *func;
895     const char *name;
896     unsigned flags;
897     unsigned sizemask;
898 } TCGHelperInfo;
899 
900 #include "exec/helper-proto.h"
901 
902 static const TCGHelperInfo all_helpers[] = {
903 #include "exec/helper-tcg.h"
904 };
905 static GHashTable *helper_table;
906 
907 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
908 static void process_op_defs(TCGContext *s);
909 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
910                                             TCGReg reg, const char *name);
911 
912 void tcg_context_init(TCGContext *s)
913 {
914     int op, total_args, n, i;
915     TCGOpDef *def;
916     TCGArgConstraint *args_ct;
917     int *sorted_args;
918     TCGTemp *ts;
919 
920     memset(s, 0, sizeof(*s));
921     s->nb_globals = 0;
922 
923     /* Count total number of arguments and allocate the corresponding
924        space */
925     total_args = 0;
926     for(op = 0; op < NB_OPS; op++) {
927         def = &tcg_op_defs[op];
928         n = def->nb_iargs + def->nb_oargs;
929         total_args += n;
930     }
931 
932     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
933     sorted_args = g_malloc(sizeof(int) * total_args);
934 
935     for(op = 0; op < NB_OPS; op++) {
936         def = &tcg_op_defs[op];
937         def->args_ct = args_ct;
938         def->sorted_args = sorted_args;
939         n = def->nb_iargs + def->nb_oargs;
940         sorted_args += n;
941         args_ct += n;
942     }
943 
944     /* Register helpers.  */
945     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
946     helper_table = g_hash_table_new(NULL, NULL);
947 
948     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
949         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
950                             (gpointer)&all_helpers[i]);
951     }
952 
953     tcg_target_init(s);
954     process_op_defs(s);
955 
956     /* Reverse the order of the saved registers, assuming they're all at
957        the start of tcg_target_reg_alloc_order.  */
958     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
959         int r = tcg_target_reg_alloc_order[n];
960         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
961             break;
962         }
963     }
964     for (i = 0; i < n; ++i) {
965         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
966     }
967     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
968         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
969     }
970 
971     tcg_ctx = s;
972     /*
973      * In user-mode we simply share the init context among threads, since we
974      * use a single region. See the documentation tcg_region_init() for the
975      * reasoning behind this.
976      * In softmmu we will have at most max_cpus TCG threads.
977      */
978 #ifdef CONFIG_USER_ONLY
979     tcg_ctxs = &tcg_ctx;
980     n_tcg_ctxs = 1;
981 #else
982     tcg_ctxs = g_new(TCGContext *, max_cpus);
983 #endif
984 
985     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
986     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
987     cpu_env = temp_tcgv_ptr(ts);
988 }
989 
990 /*
991  * Allocate TBs right before their corresponding translated code, making
992  * sure that TBs and code are on different cache lines.
993  */
994 TranslationBlock *tcg_tb_alloc(TCGContext *s)
995 {
996     uintptr_t align = qemu_icache_linesize;
997     TranslationBlock *tb;
998     void *next;
999 
1000  retry:
1001     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1002     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1003 
1004     if (unlikely(next > s->code_gen_highwater)) {
1005         if (tcg_region_alloc(s)) {
1006             return NULL;
1007         }
1008         goto retry;
1009     }
1010     atomic_set(&s->code_gen_ptr, next);
1011     s->data_gen_ptr = NULL;
1012     return tb;
1013 }
1014 
1015 void tcg_prologue_init(TCGContext *s)
1016 {
1017     size_t prologue_size, total_size;
1018     void *buf0, *buf1;
1019 
1020     /* Put the prologue at the beginning of code_gen_buffer.  */
1021     buf0 = s->code_gen_buffer;
1022     total_size = s->code_gen_buffer_size;
1023     s->code_ptr = buf0;
1024     s->code_buf = buf0;
1025     s->data_gen_ptr = NULL;
1026     s->code_gen_prologue = buf0;
1027 
1028     /* Compute a high-water mark, at which we voluntarily flush the buffer
1029        and start over.  The size here is arbitrary, significantly larger
1030        than we expect the code generation for any one opcode to require.  */
1031     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1032 
1033 #ifdef TCG_TARGET_NEED_POOL_LABELS
1034     s->pool_labels = NULL;
1035 #endif
1036 
1037     /* Generate the prologue.  */
1038     tcg_target_qemu_prologue(s);
1039 
1040 #ifdef TCG_TARGET_NEED_POOL_LABELS
1041     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1042     {
1043         int result = tcg_out_pool_finalize(s);
1044         tcg_debug_assert(result == 0);
1045     }
1046 #endif
1047 
1048     buf1 = s->code_ptr;
1049     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1050 
1051     /* Deduct the prologue from the buffer.  */
1052     prologue_size = tcg_current_code_size(s);
1053     s->code_gen_ptr = buf1;
1054     s->code_gen_buffer = buf1;
1055     s->code_buf = buf1;
1056     total_size -= prologue_size;
1057     s->code_gen_buffer_size = total_size;
1058 
1059     tcg_register_jit(s->code_gen_buffer, total_size);
1060 
1061 #ifdef DEBUG_DISAS
1062     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1063         qemu_log_lock();
1064         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1065         if (s->data_gen_ptr) {
1066             size_t code_size = s->data_gen_ptr - buf0;
1067             size_t data_size = prologue_size - code_size;
1068             size_t i;
1069 
1070             log_disas(buf0, code_size);
1071 
1072             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1073                 if (sizeof(tcg_target_ulong) == 8) {
1074                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1075                              (uintptr_t)s->data_gen_ptr + i,
1076                              *(uint64_t *)(s->data_gen_ptr + i));
1077                 } else {
1078                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1079                              (uintptr_t)s->data_gen_ptr + i,
1080                              *(uint32_t *)(s->data_gen_ptr + i));
1081                 }
1082             }
1083         } else {
1084             log_disas(buf0, prologue_size);
1085         }
1086         qemu_log("\n");
1087         qemu_log_flush();
1088         qemu_log_unlock();
1089     }
1090 #endif
1091 
1092     /* Assert that goto_ptr is implemented completely.  */
1093     if (TCG_TARGET_HAS_goto_ptr) {
1094         tcg_debug_assert(s->code_gen_epilogue != NULL);
1095     }
1096 }
1097 
1098 void tcg_func_start(TCGContext *s)
1099 {
1100     tcg_pool_reset(s);
1101     s->nb_temps = s->nb_globals;
1102 
1103     /* No temps have been previously allocated for size or locality.  */
1104     memset(s->free_temps, 0, sizeof(s->free_temps));
1105 
1106     s->nb_ops = 0;
1107     s->nb_labels = 0;
1108     s->current_frame_offset = s->frame_start;
1109 
1110 #ifdef CONFIG_DEBUG_TCG
1111     s->goto_tb_issue_mask = 0;
1112 #endif
1113 
1114     QTAILQ_INIT(&s->ops);
1115     QTAILQ_INIT(&s->free_ops);
1116     QSIMPLEQ_INIT(&s->labels);
1117 }
1118 
1119 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1120 {
1121     int n = s->nb_temps++;
1122     tcg_debug_assert(n < TCG_MAX_TEMPS);
1123     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1124 }
1125 
1126 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1127 {
1128     TCGTemp *ts;
1129 
1130     tcg_debug_assert(s->nb_globals == s->nb_temps);
1131     s->nb_globals++;
1132     ts = tcg_temp_alloc(s);
1133     ts->temp_global = 1;
1134 
1135     return ts;
1136 }
1137 
1138 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1139                                             TCGReg reg, const char *name)
1140 {
1141     TCGTemp *ts;
1142 
1143     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1144         tcg_abort();
1145     }
1146 
1147     ts = tcg_global_alloc(s);
1148     ts->base_type = type;
1149     ts->type = type;
1150     ts->fixed_reg = 1;
1151     ts->reg = reg;
1152     ts->name = name;
1153     tcg_regset_set_reg(s->reserved_regs, reg);
1154 
1155     return ts;
1156 }
1157 
1158 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1159 {
1160     s->frame_start = start;
1161     s->frame_end = start + size;
1162     s->frame_temp
1163         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1164 }
1165 
1166 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1167                                      intptr_t offset, const char *name)
1168 {
1169     TCGContext *s = tcg_ctx;
1170     TCGTemp *base_ts = tcgv_ptr_temp(base);
1171     TCGTemp *ts = tcg_global_alloc(s);
1172     int indirect_reg = 0, bigendian = 0;
1173 #ifdef HOST_WORDS_BIGENDIAN
1174     bigendian = 1;
1175 #endif
1176 
1177     if (!base_ts->fixed_reg) {
1178         /* We do not support double-indirect registers.  */
1179         tcg_debug_assert(!base_ts->indirect_reg);
1180         base_ts->indirect_base = 1;
1181         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1182                             ? 2 : 1);
1183         indirect_reg = 1;
1184     }
1185 
1186     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1187         TCGTemp *ts2 = tcg_global_alloc(s);
1188         char buf[64];
1189 
1190         ts->base_type = TCG_TYPE_I64;
1191         ts->type = TCG_TYPE_I32;
1192         ts->indirect_reg = indirect_reg;
1193         ts->mem_allocated = 1;
1194         ts->mem_base = base_ts;
1195         ts->mem_offset = offset + bigendian * 4;
1196         pstrcpy(buf, sizeof(buf), name);
1197         pstrcat(buf, sizeof(buf), "_0");
1198         ts->name = strdup(buf);
1199 
1200         tcg_debug_assert(ts2 == ts + 1);
1201         ts2->base_type = TCG_TYPE_I64;
1202         ts2->type = TCG_TYPE_I32;
1203         ts2->indirect_reg = indirect_reg;
1204         ts2->mem_allocated = 1;
1205         ts2->mem_base = base_ts;
1206         ts2->mem_offset = offset + (1 - bigendian) * 4;
1207         pstrcpy(buf, sizeof(buf), name);
1208         pstrcat(buf, sizeof(buf), "_1");
1209         ts2->name = strdup(buf);
1210     } else {
1211         ts->base_type = type;
1212         ts->type = type;
1213         ts->indirect_reg = indirect_reg;
1214         ts->mem_allocated = 1;
1215         ts->mem_base = base_ts;
1216         ts->mem_offset = offset;
1217         ts->name = name;
1218     }
1219     return ts;
1220 }
1221 
1222 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1223 {
1224     TCGContext *s = tcg_ctx;
1225     TCGTemp *ts;
1226     int idx, k;
1227 
1228     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1229     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1230     if (idx < TCG_MAX_TEMPS) {
1231         /* There is already an available temp with the right type.  */
1232         clear_bit(idx, s->free_temps[k].l);
1233 
1234         ts = &s->temps[idx];
1235         ts->temp_allocated = 1;
1236         tcg_debug_assert(ts->base_type == type);
1237         tcg_debug_assert(ts->temp_local == temp_local);
1238     } else {
1239         ts = tcg_temp_alloc(s);
1240         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1241             TCGTemp *ts2 = tcg_temp_alloc(s);
1242 
1243             ts->base_type = type;
1244             ts->type = TCG_TYPE_I32;
1245             ts->temp_allocated = 1;
1246             ts->temp_local = temp_local;
1247 
1248             tcg_debug_assert(ts2 == ts + 1);
1249             ts2->base_type = TCG_TYPE_I64;
1250             ts2->type = TCG_TYPE_I32;
1251             ts2->temp_allocated = 1;
1252             ts2->temp_local = temp_local;
1253         } else {
1254             ts->base_type = type;
1255             ts->type = type;
1256             ts->temp_allocated = 1;
1257             ts->temp_local = temp_local;
1258         }
1259     }
1260 
1261 #if defined(CONFIG_DEBUG_TCG)
1262     s->temps_in_use++;
1263 #endif
1264     return ts;
1265 }
1266 
1267 TCGv_vec tcg_temp_new_vec(TCGType type)
1268 {
1269     TCGTemp *t;
1270 
1271 #ifdef CONFIG_DEBUG_TCG
1272     switch (type) {
1273     case TCG_TYPE_V64:
1274         assert(TCG_TARGET_HAS_v64);
1275         break;
1276     case TCG_TYPE_V128:
1277         assert(TCG_TARGET_HAS_v128);
1278         break;
1279     case TCG_TYPE_V256:
1280         assert(TCG_TARGET_HAS_v256);
1281         break;
1282     default:
1283         g_assert_not_reached();
1284     }
1285 #endif
1286 
1287     t = tcg_temp_new_internal(type, 0);
1288     return temp_tcgv_vec(t);
1289 }
1290 
1291 /* Create a new temp of the same type as an existing temp.  */
1292 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1293 {
1294     TCGTemp *t = tcgv_vec_temp(match);
1295 
1296     tcg_debug_assert(t->temp_allocated != 0);
1297 
1298     t = tcg_temp_new_internal(t->base_type, 0);
1299     return temp_tcgv_vec(t);
1300 }
1301 
1302 void tcg_temp_free_internal(TCGTemp *ts)
1303 {
1304     TCGContext *s = tcg_ctx;
1305     int k, idx;
1306 
1307 #if defined(CONFIG_DEBUG_TCG)
1308     s->temps_in_use--;
1309     if (s->temps_in_use < 0) {
1310         fprintf(stderr, "More temporaries freed than allocated!\n");
1311     }
1312 #endif
1313 
1314     tcg_debug_assert(ts->temp_global == 0);
1315     tcg_debug_assert(ts->temp_allocated != 0);
1316     ts->temp_allocated = 0;
1317 
1318     idx = temp_idx(ts);
1319     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1320     set_bit(idx, s->free_temps[k].l);
1321 }
1322 
1323 TCGv_i32 tcg_const_i32(int32_t val)
1324 {
1325     TCGv_i32 t0;
1326     t0 = tcg_temp_new_i32();
1327     tcg_gen_movi_i32(t0, val);
1328     return t0;
1329 }
1330 
1331 TCGv_i64 tcg_const_i64(int64_t val)
1332 {
1333     TCGv_i64 t0;
1334     t0 = tcg_temp_new_i64();
1335     tcg_gen_movi_i64(t0, val);
1336     return t0;
1337 }
1338 
1339 TCGv_i32 tcg_const_local_i32(int32_t val)
1340 {
1341     TCGv_i32 t0;
1342     t0 = tcg_temp_local_new_i32();
1343     tcg_gen_movi_i32(t0, val);
1344     return t0;
1345 }
1346 
1347 TCGv_i64 tcg_const_local_i64(int64_t val)
1348 {
1349     TCGv_i64 t0;
1350     t0 = tcg_temp_local_new_i64();
1351     tcg_gen_movi_i64(t0, val);
1352     return t0;
1353 }
1354 
1355 #if defined(CONFIG_DEBUG_TCG)
1356 void tcg_clear_temp_count(void)
1357 {
1358     TCGContext *s = tcg_ctx;
1359     s->temps_in_use = 0;
1360 }
1361 
1362 int tcg_check_temp_count(void)
1363 {
1364     TCGContext *s = tcg_ctx;
1365     if (s->temps_in_use) {
1366         /* Clear the count so that we don't give another
1367          * warning immediately next time around.
1368          */
1369         s->temps_in_use = 0;
1370         return 1;
1371     }
1372     return 0;
1373 }
1374 #endif
1375 
1376 /* Return true if OP may appear in the opcode stream.
1377    Test the runtime variable that controls each opcode.  */
1378 bool tcg_op_supported(TCGOpcode op)
1379 {
1380     const bool have_vec
1381         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1382 
1383     switch (op) {
1384     case INDEX_op_discard:
1385     case INDEX_op_set_label:
1386     case INDEX_op_call:
1387     case INDEX_op_br:
1388     case INDEX_op_mb:
1389     case INDEX_op_insn_start:
1390     case INDEX_op_exit_tb:
1391     case INDEX_op_goto_tb:
1392     case INDEX_op_qemu_ld_i32:
1393     case INDEX_op_qemu_st_i32:
1394     case INDEX_op_qemu_ld_i64:
1395     case INDEX_op_qemu_st_i64:
1396         return true;
1397 
1398     case INDEX_op_goto_ptr:
1399         return TCG_TARGET_HAS_goto_ptr;
1400 
1401     case INDEX_op_mov_i32:
1402     case INDEX_op_movi_i32:
1403     case INDEX_op_setcond_i32:
1404     case INDEX_op_brcond_i32:
1405     case INDEX_op_ld8u_i32:
1406     case INDEX_op_ld8s_i32:
1407     case INDEX_op_ld16u_i32:
1408     case INDEX_op_ld16s_i32:
1409     case INDEX_op_ld_i32:
1410     case INDEX_op_st8_i32:
1411     case INDEX_op_st16_i32:
1412     case INDEX_op_st_i32:
1413     case INDEX_op_add_i32:
1414     case INDEX_op_sub_i32:
1415     case INDEX_op_mul_i32:
1416     case INDEX_op_and_i32:
1417     case INDEX_op_or_i32:
1418     case INDEX_op_xor_i32:
1419     case INDEX_op_shl_i32:
1420     case INDEX_op_shr_i32:
1421     case INDEX_op_sar_i32:
1422         return true;
1423 
1424     case INDEX_op_movcond_i32:
1425         return TCG_TARGET_HAS_movcond_i32;
1426     case INDEX_op_div_i32:
1427     case INDEX_op_divu_i32:
1428         return TCG_TARGET_HAS_div_i32;
1429     case INDEX_op_rem_i32:
1430     case INDEX_op_remu_i32:
1431         return TCG_TARGET_HAS_rem_i32;
1432     case INDEX_op_div2_i32:
1433     case INDEX_op_divu2_i32:
1434         return TCG_TARGET_HAS_div2_i32;
1435     case INDEX_op_rotl_i32:
1436     case INDEX_op_rotr_i32:
1437         return TCG_TARGET_HAS_rot_i32;
1438     case INDEX_op_deposit_i32:
1439         return TCG_TARGET_HAS_deposit_i32;
1440     case INDEX_op_extract_i32:
1441         return TCG_TARGET_HAS_extract_i32;
1442     case INDEX_op_sextract_i32:
1443         return TCG_TARGET_HAS_sextract_i32;
1444     case INDEX_op_extract2_i32:
1445         return TCG_TARGET_HAS_extract2_i32;
1446     case INDEX_op_add2_i32:
1447         return TCG_TARGET_HAS_add2_i32;
1448     case INDEX_op_sub2_i32:
1449         return TCG_TARGET_HAS_sub2_i32;
1450     case INDEX_op_mulu2_i32:
1451         return TCG_TARGET_HAS_mulu2_i32;
1452     case INDEX_op_muls2_i32:
1453         return TCG_TARGET_HAS_muls2_i32;
1454     case INDEX_op_muluh_i32:
1455         return TCG_TARGET_HAS_muluh_i32;
1456     case INDEX_op_mulsh_i32:
1457         return TCG_TARGET_HAS_mulsh_i32;
1458     case INDEX_op_ext8s_i32:
1459         return TCG_TARGET_HAS_ext8s_i32;
1460     case INDEX_op_ext16s_i32:
1461         return TCG_TARGET_HAS_ext16s_i32;
1462     case INDEX_op_ext8u_i32:
1463         return TCG_TARGET_HAS_ext8u_i32;
1464     case INDEX_op_ext16u_i32:
1465         return TCG_TARGET_HAS_ext16u_i32;
1466     case INDEX_op_bswap16_i32:
1467         return TCG_TARGET_HAS_bswap16_i32;
1468     case INDEX_op_bswap32_i32:
1469         return TCG_TARGET_HAS_bswap32_i32;
1470     case INDEX_op_not_i32:
1471         return TCG_TARGET_HAS_not_i32;
1472     case INDEX_op_neg_i32:
1473         return TCG_TARGET_HAS_neg_i32;
1474     case INDEX_op_andc_i32:
1475         return TCG_TARGET_HAS_andc_i32;
1476     case INDEX_op_orc_i32:
1477         return TCG_TARGET_HAS_orc_i32;
1478     case INDEX_op_eqv_i32:
1479         return TCG_TARGET_HAS_eqv_i32;
1480     case INDEX_op_nand_i32:
1481         return TCG_TARGET_HAS_nand_i32;
1482     case INDEX_op_nor_i32:
1483         return TCG_TARGET_HAS_nor_i32;
1484     case INDEX_op_clz_i32:
1485         return TCG_TARGET_HAS_clz_i32;
1486     case INDEX_op_ctz_i32:
1487         return TCG_TARGET_HAS_ctz_i32;
1488     case INDEX_op_ctpop_i32:
1489         return TCG_TARGET_HAS_ctpop_i32;
1490 
1491     case INDEX_op_brcond2_i32:
1492     case INDEX_op_setcond2_i32:
1493         return TCG_TARGET_REG_BITS == 32;
1494 
1495     case INDEX_op_mov_i64:
1496     case INDEX_op_movi_i64:
1497     case INDEX_op_setcond_i64:
1498     case INDEX_op_brcond_i64:
1499     case INDEX_op_ld8u_i64:
1500     case INDEX_op_ld8s_i64:
1501     case INDEX_op_ld16u_i64:
1502     case INDEX_op_ld16s_i64:
1503     case INDEX_op_ld32u_i64:
1504     case INDEX_op_ld32s_i64:
1505     case INDEX_op_ld_i64:
1506     case INDEX_op_st8_i64:
1507     case INDEX_op_st16_i64:
1508     case INDEX_op_st32_i64:
1509     case INDEX_op_st_i64:
1510     case INDEX_op_add_i64:
1511     case INDEX_op_sub_i64:
1512     case INDEX_op_mul_i64:
1513     case INDEX_op_and_i64:
1514     case INDEX_op_or_i64:
1515     case INDEX_op_xor_i64:
1516     case INDEX_op_shl_i64:
1517     case INDEX_op_shr_i64:
1518     case INDEX_op_sar_i64:
1519     case INDEX_op_ext_i32_i64:
1520     case INDEX_op_extu_i32_i64:
1521         return TCG_TARGET_REG_BITS == 64;
1522 
1523     case INDEX_op_movcond_i64:
1524         return TCG_TARGET_HAS_movcond_i64;
1525     case INDEX_op_div_i64:
1526     case INDEX_op_divu_i64:
1527         return TCG_TARGET_HAS_div_i64;
1528     case INDEX_op_rem_i64:
1529     case INDEX_op_remu_i64:
1530         return TCG_TARGET_HAS_rem_i64;
1531     case INDEX_op_div2_i64:
1532     case INDEX_op_divu2_i64:
1533         return TCG_TARGET_HAS_div2_i64;
1534     case INDEX_op_rotl_i64:
1535     case INDEX_op_rotr_i64:
1536         return TCG_TARGET_HAS_rot_i64;
1537     case INDEX_op_deposit_i64:
1538         return TCG_TARGET_HAS_deposit_i64;
1539     case INDEX_op_extract_i64:
1540         return TCG_TARGET_HAS_extract_i64;
1541     case INDEX_op_sextract_i64:
1542         return TCG_TARGET_HAS_sextract_i64;
1543     case INDEX_op_extract2_i64:
1544         return TCG_TARGET_HAS_extract2_i64;
1545     case INDEX_op_extrl_i64_i32:
1546         return TCG_TARGET_HAS_extrl_i64_i32;
1547     case INDEX_op_extrh_i64_i32:
1548         return TCG_TARGET_HAS_extrh_i64_i32;
1549     case INDEX_op_ext8s_i64:
1550         return TCG_TARGET_HAS_ext8s_i64;
1551     case INDEX_op_ext16s_i64:
1552         return TCG_TARGET_HAS_ext16s_i64;
1553     case INDEX_op_ext32s_i64:
1554         return TCG_TARGET_HAS_ext32s_i64;
1555     case INDEX_op_ext8u_i64:
1556         return TCG_TARGET_HAS_ext8u_i64;
1557     case INDEX_op_ext16u_i64:
1558         return TCG_TARGET_HAS_ext16u_i64;
1559     case INDEX_op_ext32u_i64:
1560         return TCG_TARGET_HAS_ext32u_i64;
1561     case INDEX_op_bswap16_i64:
1562         return TCG_TARGET_HAS_bswap16_i64;
1563     case INDEX_op_bswap32_i64:
1564         return TCG_TARGET_HAS_bswap32_i64;
1565     case INDEX_op_bswap64_i64:
1566         return TCG_TARGET_HAS_bswap64_i64;
1567     case INDEX_op_not_i64:
1568         return TCG_TARGET_HAS_not_i64;
1569     case INDEX_op_neg_i64:
1570         return TCG_TARGET_HAS_neg_i64;
1571     case INDEX_op_andc_i64:
1572         return TCG_TARGET_HAS_andc_i64;
1573     case INDEX_op_orc_i64:
1574         return TCG_TARGET_HAS_orc_i64;
1575     case INDEX_op_eqv_i64:
1576         return TCG_TARGET_HAS_eqv_i64;
1577     case INDEX_op_nand_i64:
1578         return TCG_TARGET_HAS_nand_i64;
1579     case INDEX_op_nor_i64:
1580         return TCG_TARGET_HAS_nor_i64;
1581     case INDEX_op_clz_i64:
1582         return TCG_TARGET_HAS_clz_i64;
1583     case INDEX_op_ctz_i64:
1584         return TCG_TARGET_HAS_ctz_i64;
1585     case INDEX_op_ctpop_i64:
1586         return TCG_TARGET_HAS_ctpop_i64;
1587     case INDEX_op_add2_i64:
1588         return TCG_TARGET_HAS_add2_i64;
1589     case INDEX_op_sub2_i64:
1590         return TCG_TARGET_HAS_sub2_i64;
1591     case INDEX_op_mulu2_i64:
1592         return TCG_TARGET_HAS_mulu2_i64;
1593     case INDEX_op_muls2_i64:
1594         return TCG_TARGET_HAS_muls2_i64;
1595     case INDEX_op_muluh_i64:
1596         return TCG_TARGET_HAS_muluh_i64;
1597     case INDEX_op_mulsh_i64:
1598         return TCG_TARGET_HAS_mulsh_i64;
1599 
1600     case INDEX_op_mov_vec:
1601     case INDEX_op_dup_vec:
1602     case INDEX_op_dupi_vec:
1603     case INDEX_op_dupm_vec:
1604     case INDEX_op_ld_vec:
1605     case INDEX_op_st_vec:
1606     case INDEX_op_add_vec:
1607     case INDEX_op_sub_vec:
1608     case INDEX_op_and_vec:
1609     case INDEX_op_or_vec:
1610     case INDEX_op_xor_vec:
1611     case INDEX_op_cmp_vec:
1612         return have_vec;
1613     case INDEX_op_dup2_vec:
1614         return have_vec && TCG_TARGET_REG_BITS == 32;
1615     case INDEX_op_not_vec:
1616         return have_vec && TCG_TARGET_HAS_not_vec;
1617     case INDEX_op_neg_vec:
1618         return have_vec && TCG_TARGET_HAS_neg_vec;
1619     case INDEX_op_abs_vec:
1620         return have_vec && TCG_TARGET_HAS_abs_vec;
1621     case INDEX_op_andc_vec:
1622         return have_vec && TCG_TARGET_HAS_andc_vec;
1623     case INDEX_op_orc_vec:
1624         return have_vec && TCG_TARGET_HAS_orc_vec;
1625     case INDEX_op_mul_vec:
1626         return have_vec && TCG_TARGET_HAS_mul_vec;
1627     case INDEX_op_shli_vec:
1628     case INDEX_op_shri_vec:
1629     case INDEX_op_sari_vec:
1630         return have_vec && TCG_TARGET_HAS_shi_vec;
1631     case INDEX_op_shls_vec:
1632     case INDEX_op_shrs_vec:
1633     case INDEX_op_sars_vec:
1634         return have_vec && TCG_TARGET_HAS_shs_vec;
1635     case INDEX_op_shlv_vec:
1636     case INDEX_op_shrv_vec:
1637     case INDEX_op_sarv_vec:
1638         return have_vec && TCG_TARGET_HAS_shv_vec;
1639     case INDEX_op_ssadd_vec:
1640     case INDEX_op_usadd_vec:
1641     case INDEX_op_sssub_vec:
1642     case INDEX_op_ussub_vec:
1643         return have_vec && TCG_TARGET_HAS_sat_vec;
1644     case INDEX_op_smin_vec:
1645     case INDEX_op_umin_vec:
1646     case INDEX_op_smax_vec:
1647     case INDEX_op_umax_vec:
1648         return have_vec && TCG_TARGET_HAS_minmax_vec;
1649     case INDEX_op_bitsel_vec:
1650         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1651     case INDEX_op_cmpsel_vec:
1652         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1653 
1654     default:
1655         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1656         return true;
1657     }
1658 }
1659 
1660 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1661    and endian swap. Maybe it would be better to do the alignment
1662    and endian swap in tcg_reg_alloc_call(). */
1663 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1664 {
1665     int i, real_args, nb_rets, pi;
1666     unsigned sizemask, flags;
1667     TCGHelperInfo *info;
1668     TCGOp *op;
1669 
1670     info = g_hash_table_lookup(helper_table, (gpointer)func);
1671     flags = info->flags;
1672     sizemask = info->sizemask;
1673 
1674 #if defined(__sparc__) && !defined(__arch64__) \
1675     && !defined(CONFIG_TCG_INTERPRETER)
1676     /* We have 64-bit values in one register, but need to pass as two
1677        separate parameters.  Split them.  */
1678     int orig_sizemask = sizemask;
1679     int orig_nargs = nargs;
1680     TCGv_i64 retl, reth;
1681     TCGTemp *split_args[MAX_OPC_PARAM];
1682 
1683     retl = NULL;
1684     reth = NULL;
1685     if (sizemask != 0) {
1686         for (i = real_args = 0; i < nargs; ++i) {
1687             int is_64bit = sizemask & (1 << (i+1)*2);
1688             if (is_64bit) {
1689                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1690                 TCGv_i32 h = tcg_temp_new_i32();
1691                 TCGv_i32 l = tcg_temp_new_i32();
1692                 tcg_gen_extr_i64_i32(l, h, orig);
1693                 split_args[real_args++] = tcgv_i32_temp(h);
1694                 split_args[real_args++] = tcgv_i32_temp(l);
1695             } else {
1696                 split_args[real_args++] = args[i];
1697             }
1698         }
1699         nargs = real_args;
1700         args = split_args;
1701         sizemask = 0;
1702     }
1703 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1704     for (i = 0; i < nargs; ++i) {
1705         int is_64bit = sizemask & (1 << (i+1)*2);
1706         int is_signed = sizemask & (2 << (i+1)*2);
1707         if (!is_64bit) {
1708             TCGv_i64 temp = tcg_temp_new_i64();
1709             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1710             if (is_signed) {
1711                 tcg_gen_ext32s_i64(temp, orig);
1712             } else {
1713                 tcg_gen_ext32u_i64(temp, orig);
1714             }
1715             args[i] = tcgv_i64_temp(temp);
1716         }
1717     }
1718 #endif /* TCG_TARGET_EXTEND_ARGS */
1719 
1720     op = tcg_emit_op(INDEX_op_call);
1721 
1722     pi = 0;
1723     if (ret != NULL) {
1724 #if defined(__sparc__) && !defined(__arch64__) \
1725     && !defined(CONFIG_TCG_INTERPRETER)
1726         if (orig_sizemask & 1) {
1727             /* The 32-bit ABI is going to return the 64-bit value in
1728                the %o0/%o1 register pair.  Prepare for this by using
1729                two return temporaries, and reassemble below.  */
1730             retl = tcg_temp_new_i64();
1731             reth = tcg_temp_new_i64();
1732             op->args[pi++] = tcgv_i64_arg(reth);
1733             op->args[pi++] = tcgv_i64_arg(retl);
1734             nb_rets = 2;
1735         } else {
1736             op->args[pi++] = temp_arg(ret);
1737             nb_rets = 1;
1738         }
1739 #else
1740         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1741 #ifdef HOST_WORDS_BIGENDIAN
1742             op->args[pi++] = temp_arg(ret + 1);
1743             op->args[pi++] = temp_arg(ret);
1744 #else
1745             op->args[pi++] = temp_arg(ret);
1746             op->args[pi++] = temp_arg(ret + 1);
1747 #endif
1748             nb_rets = 2;
1749         } else {
1750             op->args[pi++] = temp_arg(ret);
1751             nb_rets = 1;
1752         }
1753 #endif
1754     } else {
1755         nb_rets = 0;
1756     }
1757     TCGOP_CALLO(op) = nb_rets;
1758 
1759     real_args = 0;
1760     for (i = 0; i < nargs; i++) {
1761         int is_64bit = sizemask & (1 << (i+1)*2);
1762         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1763 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1764             /* some targets want aligned 64 bit args */
1765             if (real_args & 1) {
1766                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1767                 real_args++;
1768             }
1769 #endif
1770            /* If stack grows up, then we will be placing successive
1771               arguments at lower addresses, which means we need to
1772               reverse the order compared to how we would normally
1773               treat either big or little-endian.  For those arguments
1774               that will wind up in registers, this still works for
1775               HPPA (the only current STACK_GROWSUP target) since the
1776               argument registers are *also* allocated in decreasing
1777               order.  If another such target is added, this logic may
1778               have to get more complicated to differentiate between
1779               stack arguments and register arguments.  */
1780 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1781             op->args[pi++] = temp_arg(args[i] + 1);
1782             op->args[pi++] = temp_arg(args[i]);
1783 #else
1784             op->args[pi++] = temp_arg(args[i]);
1785             op->args[pi++] = temp_arg(args[i] + 1);
1786 #endif
1787             real_args += 2;
1788             continue;
1789         }
1790 
1791         op->args[pi++] = temp_arg(args[i]);
1792         real_args++;
1793     }
1794     op->args[pi++] = (uintptr_t)func;
1795     op->args[pi++] = flags;
1796     TCGOP_CALLI(op) = real_args;
1797 
1798     /* Make sure the fields didn't overflow.  */
1799     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1800     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1801 
1802 #if defined(__sparc__) && !defined(__arch64__) \
1803     && !defined(CONFIG_TCG_INTERPRETER)
1804     /* Free all of the parts we allocated above.  */
1805     for (i = real_args = 0; i < orig_nargs; ++i) {
1806         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1807         if (is_64bit) {
1808             tcg_temp_free_internal(args[real_args++]);
1809             tcg_temp_free_internal(args[real_args++]);
1810         } else {
1811             real_args++;
1812         }
1813     }
1814     if (orig_sizemask & 1) {
1815         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1816            Note that describing these as TCGv_i64 eliminates an unnecessary
1817            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1818         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1819         tcg_temp_free_i64(retl);
1820         tcg_temp_free_i64(reth);
1821     }
1822 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1823     for (i = 0; i < nargs; ++i) {
1824         int is_64bit = sizemask & (1 << (i+1)*2);
1825         if (!is_64bit) {
1826             tcg_temp_free_internal(args[i]);
1827         }
1828     }
1829 #endif /* TCG_TARGET_EXTEND_ARGS */
1830 }
1831 
1832 static void tcg_reg_alloc_start(TCGContext *s)
1833 {
1834     int i, n;
1835     TCGTemp *ts;
1836 
1837     for (i = 0, n = s->nb_globals; i < n; i++) {
1838         ts = &s->temps[i];
1839         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1840     }
1841     for (n = s->nb_temps; i < n; i++) {
1842         ts = &s->temps[i];
1843         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1844         ts->mem_allocated = 0;
1845         ts->fixed_reg = 0;
1846     }
1847 
1848     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1849 }
1850 
1851 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1852                                  TCGTemp *ts)
1853 {
1854     int idx = temp_idx(ts);
1855 
1856     if (ts->temp_global) {
1857         pstrcpy(buf, buf_size, ts->name);
1858     } else if (ts->temp_local) {
1859         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1860     } else {
1861         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1862     }
1863     return buf;
1864 }
1865 
1866 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1867                              int buf_size, TCGArg arg)
1868 {
1869     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1870 }
1871 
1872 /* Find helper name.  */
1873 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1874 {
1875     const char *ret = NULL;
1876     if (helper_table) {
1877         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1878         if (info) {
1879             ret = info->name;
1880         }
1881     }
1882     return ret;
1883 }
1884 
1885 static const char * const cond_name[] =
1886 {
1887     [TCG_COND_NEVER] = "never",
1888     [TCG_COND_ALWAYS] = "always",
1889     [TCG_COND_EQ] = "eq",
1890     [TCG_COND_NE] = "ne",
1891     [TCG_COND_LT] = "lt",
1892     [TCG_COND_GE] = "ge",
1893     [TCG_COND_LE] = "le",
1894     [TCG_COND_GT] = "gt",
1895     [TCG_COND_LTU] = "ltu",
1896     [TCG_COND_GEU] = "geu",
1897     [TCG_COND_LEU] = "leu",
1898     [TCG_COND_GTU] = "gtu"
1899 };
1900 
1901 static const char * const ldst_name[] =
1902 {
1903     [MO_UB]   = "ub",
1904     [MO_SB]   = "sb",
1905     [MO_LEUW] = "leuw",
1906     [MO_LESW] = "lesw",
1907     [MO_LEUL] = "leul",
1908     [MO_LESL] = "lesl",
1909     [MO_LEQ]  = "leq",
1910     [MO_BEUW] = "beuw",
1911     [MO_BESW] = "besw",
1912     [MO_BEUL] = "beul",
1913     [MO_BESL] = "besl",
1914     [MO_BEQ]  = "beq",
1915 };
1916 
1917 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1918 #ifdef ALIGNED_ONLY
1919     [MO_UNALN >> MO_ASHIFT]    = "un+",
1920     [MO_ALIGN >> MO_ASHIFT]    = "",
1921 #else
1922     [MO_UNALN >> MO_ASHIFT]    = "",
1923     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1924 #endif
1925     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1926     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1927     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1928     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1929     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1930     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1931 };
1932 
1933 static inline bool tcg_regset_single(TCGRegSet d)
1934 {
1935     return (d & (d - 1)) == 0;
1936 }
1937 
1938 static inline TCGReg tcg_regset_first(TCGRegSet d)
1939 {
1940     if (TCG_TARGET_NB_REGS <= 32) {
1941         return ctz32(d);
1942     } else {
1943         return ctz64(d);
1944     }
1945 }
1946 
1947 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1948 {
1949     char buf[128];
1950     TCGOp *op;
1951 
1952     QTAILQ_FOREACH(op, &s->ops, link) {
1953         int i, k, nb_oargs, nb_iargs, nb_cargs;
1954         const TCGOpDef *def;
1955         TCGOpcode c;
1956         int col = 0;
1957 
1958         c = op->opc;
1959         def = &tcg_op_defs[c];
1960 
1961         if (c == INDEX_op_insn_start) {
1962             nb_oargs = 0;
1963             col += qemu_log("\n ----");
1964 
1965             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1966                 target_ulong a;
1967 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1968                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1969 #else
1970                 a = op->args[i];
1971 #endif
1972                 col += qemu_log(" " TARGET_FMT_lx, a);
1973             }
1974         } else if (c == INDEX_op_call) {
1975             /* variable number of arguments */
1976             nb_oargs = TCGOP_CALLO(op);
1977             nb_iargs = TCGOP_CALLI(op);
1978             nb_cargs = def->nb_cargs;
1979 
1980             /* function name, flags, out args */
1981             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1982                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1983                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1984             for (i = 0; i < nb_oargs; i++) {
1985                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1986                                                        op->args[i]));
1987             }
1988             for (i = 0; i < nb_iargs; i++) {
1989                 TCGArg arg = op->args[nb_oargs + i];
1990                 const char *t = "<dummy>";
1991                 if (arg != TCG_CALL_DUMMY_ARG) {
1992                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1993                 }
1994                 col += qemu_log(",%s", t);
1995             }
1996         } else {
1997             col += qemu_log(" %s ", def->name);
1998 
1999             nb_oargs = def->nb_oargs;
2000             nb_iargs = def->nb_iargs;
2001             nb_cargs = def->nb_cargs;
2002 
2003             if (def->flags & TCG_OPF_VECTOR) {
2004                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2005                                 8 << TCGOP_VECE(op));
2006             }
2007 
2008             k = 0;
2009             for (i = 0; i < nb_oargs; i++) {
2010                 if (k != 0) {
2011                     col += qemu_log(",");
2012                 }
2013                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2014                                                       op->args[k++]));
2015             }
2016             for (i = 0; i < nb_iargs; i++) {
2017                 if (k != 0) {
2018                     col += qemu_log(",");
2019                 }
2020                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2021                                                       op->args[k++]));
2022             }
2023             switch (c) {
2024             case INDEX_op_brcond_i32:
2025             case INDEX_op_setcond_i32:
2026             case INDEX_op_movcond_i32:
2027             case INDEX_op_brcond2_i32:
2028             case INDEX_op_setcond2_i32:
2029             case INDEX_op_brcond_i64:
2030             case INDEX_op_setcond_i64:
2031             case INDEX_op_movcond_i64:
2032             case INDEX_op_cmp_vec:
2033             case INDEX_op_cmpsel_vec:
2034                 if (op->args[k] < ARRAY_SIZE(cond_name)
2035                     && cond_name[op->args[k]]) {
2036                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2037                 } else {
2038                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2039                 }
2040                 i = 1;
2041                 break;
2042             case INDEX_op_qemu_ld_i32:
2043             case INDEX_op_qemu_st_i32:
2044             case INDEX_op_qemu_ld_i64:
2045             case INDEX_op_qemu_st_i64:
2046                 {
2047                     TCGMemOpIdx oi = op->args[k++];
2048                     TCGMemOp op = get_memop(oi);
2049                     unsigned ix = get_mmuidx(oi);
2050 
2051                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2052                         col += qemu_log(",$0x%x,%u", op, ix);
2053                     } else {
2054                         const char *s_al, *s_op;
2055                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2056                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2057                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2058                     }
2059                     i = 1;
2060                 }
2061                 break;
2062             default:
2063                 i = 0;
2064                 break;
2065             }
2066             switch (c) {
2067             case INDEX_op_set_label:
2068             case INDEX_op_br:
2069             case INDEX_op_brcond_i32:
2070             case INDEX_op_brcond_i64:
2071             case INDEX_op_brcond2_i32:
2072                 col += qemu_log("%s$L%d", k ? "," : "",
2073                                 arg_label(op->args[k])->id);
2074                 i++, k++;
2075                 break;
2076             default:
2077                 break;
2078             }
2079             for (; i < nb_cargs; i++, k++) {
2080                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2081             }
2082         }
2083 
2084         if (have_prefs || op->life) {
2085             for (; col < 40; ++col) {
2086                 putc(' ', qemu_logfile);
2087             }
2088         }
2089 
2090         if (op->life) {
2091             unsigned life = op->life;
2092 
2093             if (life & (SYNC_ARG * 3)) {
2094                 qemu_log("  sync:");
2095                 for (i = 0; i < 2; ++i) {
2096                     if (life & (SYNC_ARG << i)) {
2097                         qemu_log(" %d", i);
2098                     }
2099                 }
2100             }
2101             life /= DEAD_ARG;
2102             if (life) {
2103                 qemu_log("  dead:");
2104                 for (i = 0; life; ++i, life >>= 1) {
2105                     if (life & 1) {
2106                         qemu_log(" %d", i);
2107                     }
2108                 }
2109             }
2110         }
2111 
2112         if (have_prefs) {
2113             for (i = 0; i < nb_oargs; ++i) {
2114                 TCGRegSet set = op->output_pref[i];
2115 
2116                 if (i == 0) {
2117                     qemu_log("  pref=");
2118                 } else {
2119                     qemu_log(",");
2120                 }
2121                 if (set == 0) {
2122                     qemu_log("none");
2123                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2124                     qemu_log("all");
2125 #ifdef CONFIG_DEBUG_TCG
2126                 } else if (tcg_regset_single(set)) {
2127                     TCGReg reg = tcg_regset_first(set);
2128                     qemu_log("%s", tcg_target_reg_names[reg]);
2129 #endif
2130                 } else if (TCG_TARGET_NB_REGS <= 32) {
2131                     qemu_log("%#x", (uint32_t)set);
2132                 } else {
2133                     qemu_log("%#" PRIx64, (uint64_t)set);
2134                 }
2135             }
2136         }
2137 
2138         qemu_log("\n");
2139     }
2140 }
2141 
2142 /* we give more priority to constraints with less registers */
2143 static int get_constraint_priority(const TCGOpDef *def, int k)
2144 {
2145     const TCGArgConstraint *arg_ct;
2146 
2147     int i, n;
2148     arg_ct = &def->args_ct[k];
2149     if (arg_ct->ct & TCG_CT_ALIAS) {
2150         /* an alias is equivalent to a single register */
2151         n = 1;
2152     } else {
2153         if (!(arg_ct->ct & TCG_CT_REG))
2154             return 0;
2155         n = 0;
2156         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2157             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2158                 n++;
2159         }
2160     }
2161     return TCG_TARGET_NB_REGS - n + 1;
2162 }
2163 
2164 /* sort from highest priority to lowest */
2165 static void sort_constraints(TCGOpDef *def, int start, int n)
2166 {
2167     int i, j, p1, p2, tmp;
2168 
2169     for(i = 0; i < n; i++)
2170         def->sorted_args[start + i] = start + i;
2171     if (n <= 1)
2172         return;
2173     for(i = 0; i < n - 1; i++) {
2174         for(j = i + 1; j < n; j++) {
2175             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2176             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2177             if (p1 < p2) {
2178                 tmp = def->sorted_args[start + i];
2179                 def->sorted_args[start + i] = def->sorted_args[start + j];
2180                 def->sorted_args[start + j] = tmp;
2181             }
2182         }
2183     }
2184 }
2185 
2186 static void process_op_defs(TCGContext *s)
2187 {
2188     TCGOpcode op;
2189 
2190     for (op = 0; op < NB_OPS; op++) {
2191         TCGOpDef *def = &tcg_op_defs[op];
2192         const TCGTargetOpDef *tdefs;
2193         TCGType type;
2194         int i, nb_args;
2195 
2196         if (def->flags & TCG_OPF_NOT_PRESENT) {
2197             continue;
2198         }
2199 
2200         nb_args = def->nb_iargs + def->nb_oargs;
2201         if (nb_args == 0) {
2202             continue;
2203         }
2204 
2205         tdefs = tcg_target_op_def(op);
2206         /* Missing TCGTargetOpDef entry. */
2207         tcg_debug_assert(tdefs != NULL);
2208 
2209         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2210         for (i = 0; i < nb_args; i++) {
2211             const char *ct_str = tdefs->args_ct_str[i];
2212             /* Incomplete TCGTargetOpDef entry. */
2213             tcg_debug_assert(ct_str != NULL);
2214 
2215             def->args_ct[i].u.regs = 0;
2216             def->args_ct[i].ct = 0;
2217             while (*ct_str != '\0') {
2218                 switch(*ct_str) {
2219                 case '0' ... '9':
2220                     {
2221                         int oarg = *ct_str - '0';
2222                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2223                         tcg_debug_assert(oarg < def->nb_oargs);
2224                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2225                         /* TCG_CT_ALIAS is for the output arguments.
2226                            The input is tagged with TCG_CT_IALIAS. */
2227                         def->args_ct[i] = def->args_ct[oarg];
2228                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2229                         def->args_ct[oarg].alias_index = i;
2230                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2231                         def->args_ct[i].alias_index = oarg;
2232                     }
2233                     ct_str++;
2234                     break;
2235                 case '&':
2236                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2237                     ct_str++;
2238                     break;
2239                 case 'i':
2240                     def->args_ct[i].ct |= TCG_CT_CONST;
2241                     ct_str++;
2242                     break;
2243                 default:
2244                     ct_str = target_parse_constraint(&def->args_ct[i],
2245                                                      ct_str, type);
2246                     /* Typo in TCGTargetOpDef constraint. */
2247                     tcg_debug_assert(ct_str != NULL);
2248                 }
2249             }
2250         }
2251 
2252         /* TCGTargetOpDef entry with too much information? */
2253         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2254 
2255         /* sort the constraints (XXX: this is just an heuristic) */
2256         sort_constraints(def, 0, def->nb_oargs);
2257         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2258     }
2259 }
2260 
2261 void tcg_op_remove(TCGContext *s, TCGOp *op)
2262 {
2263     TCGLabel *label;
2264 
2265     switch (op->opc) {
2266     case INDEX_op_br:
2267         label = arg_label(op->args[0]);
2268         label->refs--;
2269         break;
2270     case INDEX_op_brcond_i32:
2271     case INDEX_op_brcond_i64:
2272         label = arg_label(op->args[3]);
2273         label->refs--;
2274         break;
2275     case INDEX_op_brcond2_i32:
2276         label = arg_label(op->args[5]);
2277         label->refs--;
2278         break;
2279     default:
2280         break;
2281     }
2282 
2283     QTAILQ_REMOVE(&s->ops, op, link);
2284     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2285     s->nb_ops--;
2286 
2287 #ifdef CONFIG_PROFILER
2288     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2289 #endif
2290 }
2291 
2292 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2293 {
2294     TCGContext *s = tcg_ctx;
2295     TCGOp *op;
2296 
2297     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2298         op = tcg_malloc(sizeof(TCGOp));
2299     } else {
2300         op = QTAILQ_FIRST(&s->free_ops);
2301         QTAILQ_REMOVE(&s->free_ops, op, link);
2302     }
2303     memset(op, 0, offsetof(TCGOp, link));
2304     op->opc = opc;
2305     s->nb_ops++;
2306 
2307     return op;
2308 }
2309 
2310 TCGOp *tcg_emit_op(TCGOpcode opc)
2311 {
2312     TCGOp *op = tcg_op_alloc(opc);
2313     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2314     return op;
2315 }
2316 
2317 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2318 {
2319     TCGOp *new_op = tcg_op_alloc(opc);
2320     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2321     return new_op;
2322 }
2323 
2324 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2325 {
2326     TCGOp *new_op = tcg_op_alloc(opc);
2327     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2328     return new_op;
2329 }
2330 
2331 /* Reachable analysis : remove unreachable code.  */
2332 static void reachable_code_pass(TCGContext *s)
2333 {
2334     TCGOp *op, *op_next;
2335     bool dead = false;
2336 
2337     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2338         bool remove = dead;
2339         TCGLabel *label;
2340         int call_flags;
2341 
2342         switch (op->opc) {
2343         case INDEX_op_set_label:
2344             label = arg_label(op->args[0]);
2345             if (label->refs == 0) {
2346                 /*
2347                  * While there is an occasional backward branch, virtually
2348                  * all branches generated by the translators are forward.
2349                  * Which means that generally we will have already removed
2350                  * all references to the label that will be, and there is
2351                  * little to be gained by iterating.
2352                  */
2353                 remove = true;
2354             } else {
2355                 /* Once we see a label, insns become live again.  */
2356                 dead = false;
2357                 remove = false;
2358 
2359                 /*
2360                  * Optimization can fold conditional branches to unconditional.
2361                  * If we find a label with one reference which is preceded by
2362                  * an unconditional branch to it, remove both.  This needed to
2363                  * wait until the dead code in between them was removed.
2364                  */
2365                 if (label->refs == 1) {
2366                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2367                     if (op_prev->opc == INDEX_op_br &&
2368                         label == arg_label(op_prev->args[0])) {
2369                         tcg_op_remove(s, op_prev);
2370                         remove = true;
2371                     }
2372                 }
2373             }
2374             break;
2375 
2376         case INDEX_op_br:
2377         case INDEX_op_exit_tb:
2378         case INDEX_op_goto_ptr:
2379             /* Unconditional branches; everything following is dead.  */
2380             dead = true;
2381             break;
2382 
2383         case INDEX_op_call:
2384             /* Notice noreturn helper calls, raising exceptions.  */
2385             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2386             if (call_flags & TCG_CALL_NO_RETURN) {
2387                 dead = true;
2388             }
2389             break;
2390 
2391         case INDEX_op_insn_start:
2392             /* Never remove -- we need to keep these for unwind.  */
2393             remove = false;
2394             break;
2395 
2396         default:
2397             break;
2398         }
2399 
2400         if (remove) {
2401             tcg_op_remove(s, op);
2402         }
2403     }
2404 }
2405 
2406 #define TS_DEAD  1
2407 #define TS_MEM   2
2408 
2409 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2410 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2411 
2412 /* For liveness_pass_1, the register preferences for a given temp.  */
2413 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2414 {
2415     return ts->state_ptr;
2416 }
2417 
2418 /* For liveness_pass_1, reset the preferences for a given temp to the
2419  * maximal regset for its type.
2420  */
2421 static inline void la_reset_pref(TCGTemp *ts)
2422 {
2423     *la_temp_pref(ts)
2424         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2425 }
2426 
2427 /* liveness analysis: end of function: all temps are dead, and globals
2428    should be in memory. */
2429 static void la_func_end(TCGContext *s, int ng, int nt)
2430 {
2431     int i;
2432 
2433     for (i = 0; i < ng; ++i) {
2434         s->temps[i].state = TS_DEAD | TS_MEM;
2435         la_reset_pref(&s->temps[i]);
2436     }
2437     for (i = ng; i < nt; ++i) {
2438         s->temps[i].state = TS_DEAD;
2439         la_reset_pref(&s->temps[i]);
2440     }
2441 }
2442 
2443 /* liveness analysis: end of basic block: all temps are dead, globals
2444    and local temps should be in memory. */
2445 static void la_bb_end(TCGContext *s, int ng, int nt)
2446 {
2447     int i;
2448 
2449     for (i = 0; i < ng; ++i) {
2450         s->temps[i].state = TS_DEAD | TS_MEM;
2451         la_reset_pref(&s->temps[i]);
2452     }
2453     for (i = ng; i < nt; ++i) {
2454         s->temps[i].state = (s->temps[i].temp_local
2455                              ? TS_DEAD | TS_MEM
2456                              : TS_DEAD);
2457         la_reset_pref(&s->temps[i]);
2458     }
2459 }
2460 
2461 /* liveness analysis: sync globals back to memory.  */
2462 static void la_global_sync(TCGContext *s, int ng)
2463 {
2464     int i;
2465 
2466     for (i = 0; i < ng; ++i) {
2467         int state = s->temps[i].state;
2468         s->temps[i].state = state | TS_MEM;
2469         if (state == TS_DEAD) {
2470             /* If the global was previously dead, reset prefs.  */
2471             la_reset_pref(&s->temps[i]);
2472         }
2473     }
2474 }
2475 
2476 /* liveness analysis: sync globals back to memory and kill.  */
2477 static void la_global_kill(TCGContext *s, int ng)
2478 {
2479     int i;
2480 
2481     for (i = 0; i < ng; i++) {
2482         s->temps[i].state = TS_DEAD | TS_MEM;
2483         la_reset_pref(&s->temps[i]);
2484     }
2485 }
2486 
2487 /* liveness analysis: note live globals crossing calls.  */
2488 static void la_cross_call(TCGContext *s, int nt)
2489 {
2490     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2491     int i;
2492 
2493     for (i = 0; i < nt; i++) {
2494         TCGTemp *ts = &s->temps[i];
2495         if (!(ts->state & TS_DEAD)) {
2496             TCGRegSet *pset = la_temp_pref(ts);
2497             TCGRegSet set = *pset;
2498 
2499             set &= mask;
2500             /* If the combination is not possible, restart.  */
2501             if (set == 0) {
2502                 set = tcg_target_available_regs[ts->type] & mask;
2503             }
2504             *pset = set;
2505         }
2506     }
2507 }
2508 
2509 /* Liveness analysis : update the opc_arg_life array to tell if a
2510    given input arguments is dead. Instructions updating dead
2511    temporaries are removed. */
2512 static void liveness_pass_1(TCGContext *s)
2513 {
2514     int nb_globals = s->nb_globals;
2515     int nb_temps = s->nb_temps;
2516     TCGOp *op, *op_prev;
2517     TCGRegSet *prefs;
2518     int i;
2519 
2520     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2521     for (i = 0; i < nb_temps; ++i) {
2522         s->temps[i].state_ptr = prefs + i;
2523     }
2524 
2525     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2526     la_func_end(s, nb_globals, nb_temps);
2527 
2528     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2529         int nb_iargs, nb_oargs;
2530         TCGOpcode opc_new, opc_new2;
2531         bool have_opc_new2;
2532         TCGLifeData arg_life = 0;
2533         TCGTemp *ts;
2534         TCGOpcode opc = op->opc;
2535         const TCGOpDef *def = &tcg_op_defs[opc];
2536 
2537         switch (opc) {
2538         case INDEX_op_call:
2539             {
2540                 int call_flags;
2541                 int nb_call_regs;
2542 
2543                 nb_oargs = TCGOP_CALLO(op);
2544                 nb_iargs = TCGOP_CALLI(op);
2545                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2546 
2547                 /* pure functions can be removed if their result is unused */
2548                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2549                     for (i = 0; i < nb_oargs; i++) {
2550                         ts = arg_temp(op->args[i]);
2551                         if (ts->state != TS_DEAD) {
2552                             goto do_not_remove_call;
2553                         }
2554                     }
2555                     goto do_remove;
2556                 }
2557             do_not_remove_call:
2558 
2559                 /* Output args are dead.  */
2560                 for (i = 0; i < nb_oargs; i++) {
2561                     ts = arg_temp(op->args[i]);
2562                     if (ts->state & TS_DEAD) {
2563                         arg_life |= DEAD_ARG << i;
2564                     }
2565                     if (ts->state & TS_MEM) {
2566                         arg_life |= SYNC_ARG << i;
2567                     }
2568                     ts->state = TS_DEAD;
2569                     la_reset_pref(ts);
2570 
2571                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2572                     op->output_pref[i] = 0;
2573                 }
2574 
2575                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2576                                     TCG_CALL_NO_READ_GLOBALS))) {
2577                     la_global_kill(s, nb_globals);
2578                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2579                     la_global_sync(s, nb_globals);
2580                 }
2581 
2582                 /* Record arguments that die in this helper.  */
2583                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2584                     ts = arg_temp(op->args[i]);
2585                     if (ts && ts->state & TS_DEAD) {
2586                         arg_life |= DEAD_ARG << i;
2587                     }
2588                 }
2589 
2590                 /* For all live registers, remove call-clobbered prefs.  */
2591                 la_cross_call(s, nb_temps);
2592 
2593                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2594 
2595                 /* Input arguments are live for preceding opcodes.  */
2596                 for (i = 0; i < nb_iargs; i++) {
2597                     ts = arg_temp(op->args[i + nb_oargs]);
2598                     if (ts && ts->state & TS_DEAD) {
2599                         /* For those arguments that die, and will be allocated
2600                          * in registers, clear the register set for that arg,
2601                          * to be filled in below.  For args that will be on
2602                          * the stack, reset to any available reg.
2603                          */
2604                         *la_temp_pref(ts)
2605                             = (i < nb_call_regs ? 0 :
2606                                tcg_target_available_regs[ts->type]);
2607                         ts->state &= ~TS_DEAD;
2608                     }
2609                 }
2610 
2611                 /* For each input argument, add its input register to prefs.
2612                    If a temp is used once, this produces a single set bit.  */
2613                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2614                     ts = arg_temp(op->args[i + nb_oargs]);
2615                     if (ts) {
2616                         tcg_regset_set_reg(*la_temp_pref(ts),
2617                                            tcg_target_call_iarg_regs[i]);
2618                     }
2619                 }
2620             }
2621             break;
2622         case INDEX_op_insn_start:
2623             break;
2624         case INDEX_op_discard:
2625             /* mark the temporary as dead */
2626             ts = arg_temp(op->args[0]);
2627             ts->state = TS_DEAD;
2628             la_reset_pref(ts);
2629             break;
2630 
2631         case INDEX_op_add2_i32:
2632             opc_new = INDEX_op_add_i32;
2633             goto do_addsub2;
2634         case INDEX_op_sub2_i32:
2635             opc_new = INDEX_op_sub_i32;
2636             goto do_addsub2;
2637         case INDEX_op_add2_i64:
2638             opc_new = INDEX_op_add_i64;
2639             goto do_addsub2;
2640         case INDEX_op_sub2_i64:
2641             opc_new = INDEX_op_sub_i64;
2642         do_addsub2:
2643             nb_iargs = 4;
2644             nb_oargs = 2;
2645             /* Test if the high part of the operation is dead, but not
2646                the low part.  The result can be optimized to a simple
2647                add or sub.  This happens often for x86_64 guest when the
2648                cpu mode is set to 32 bit.  */
2649             if (arg_temp(op->args[1])->state == TS_DEAD) {
2650                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2651                     goto do_remove;
2652                 }
2653                 /* Replace the opcode and adjust the args in place,
2654                    leaving 3 unused args at the end.  */
2655                 op->opc = opc = opc_new;
2656                 op->args[1] = op->args[2];
2657                 op->args[2] = op->args[4];
2658                 /* Fall through and mark the single-word operation live.  */
2659                 nb_iargs = 2;
2660                 nb_oargs = 1;
2661             }
2662             goto do_not_remove;
2663 
2664         case INDEX_op_mulu2_i32:
2665             opc_new = INDEX_op_mul_i32;
2666             opc_new2 = INDEX_op_muluh_i32;
2667             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2668             goto do_mul2;
2669         case INDEX_op_muls2_i32:
2670             opc_new = INDEX_op_mul_i32;
2671             opc_new2 = INDEX_op_mulsh_i32;
2672             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2673             goto do_mul2;
2674         case INDEX_op_mulu2_i64:
2675             opc_new = INDEX_op_mul_i64;
2676             opc_new2 = INDEX_op_muluh_i64;
2677             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2678             goto do_mul2;
2679         case INDEX_op_muls2_i64:
2680             opc_new = INDEX_op_mul_i64;
2681             opc_new2 = INDEX_op_mulsh_i64;
2682             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2683             goto do_mul2;
2684         do_mul2:
2685             nb_iargs = 2;
2686             nb_oargs = 2;
2687             if (arg_temp(op->args[1])->state == TS_DEAD) {
2688                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2689                     /* Both parts of the operation are dead.  */
2690                     goto do_remove;
2691                 }
2692                 /* The high part of the operation is dead; generate the low. */
2693                 op->opc = opc = opc_new;
2694                 op->args[1] = op->args[2];
2695                 op->args[2] = op->args[3];
2696             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2697                 /* The low part of the operation is dead; generate the high. */
2698                 op->opc = opc = opc_new2;
2699                 op->args[0] = op->args[1];
2700                 op->args[1] = op->args[2];
2701                 op->args[2] = op->args[3];
2702             } else {
2703                 goto do_not_remove;
2704             }
2705             /* Mark the single-word operation live.  */
2706             nb_oargs = 1;
2707             goto do_not_remove;
2708 
2709         default:
2710             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2711             nb_iargs = def->nb_iargs;
2712             nb_oargs = def->nb_oargs;
2713 
2714             /* Test if the operation can be removed because all
2715                its outputs are dead. We assume that nb_oargs == 0
2716                implies side effects */
2717             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2718                 for (i = 0; i < nb_oargs; i++) {
2719                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2720                         goto do_not_remove;
2721                     }
2722                 }
2723                 goto do_remove;
2724             }
2725             goto do_not_remove;
2726 
2727         do_remove:
2728             tcg_op_remove(s, op);
2729             break;
2730 
2731         do_not_remove:
2732             for (i = 0; i < nb_oargs; i++) {
2733                 ts = arg_temp(op->args[i]);
2734 
2735                 /* Remember the preference of the uses that followed.  */
2736                 op->output_pref[i] = *la_temp_pref(ts);
2737 
2738                 /* Output args are dead.  */
2739                 if (ts->state & TS_DEAD) {
2740                     arg_life |= DEAD_ARG << i;
2741                 }
2742                 if (ts->state & TS_MEM) {
2743                     arg_life |= SYNC_ARG << i;
2744                 }
2745                 ts->state = TS_DEAD;
2746                 la_reset_pref(ts);
2747             }
2748 
2749             /* If end of basic block, update.  */
2750             if (def->flags & TCG_OPF_BB_EXIT) {
2751                 la_func_end(s, nb_globals, nb_temps);
2752             } else if (def->flags & TCG_OPF_BB_END) {
2753                 la_bb_end(s, nb_globals, nb_temps);
2754             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2755                 la_global_sync(s, nb_globals);
2756                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2757                     la_cross_call(s, nb_temps);
2758                 }
2759             }
2760 
2761             /* Record arguments that die in this opcode.  */
2762             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2763                 ts = arg_temp(op->args[i]);
2764                 if (ts->state & TS_DEAD) {
2765                     arg_life |= DEAD_ARG << i;
2766                 }
2767             }
2768 
2769             /* Input arguments are live for preceding opcodes.  */
2770             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2771                 ts = arg_temp(op->args[i]);
2772                 if (ts->state & TS_DEAD) {
2773                     /* For operands that were dead, initially allow
2774                        all regs for the type.  */
2775                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2776                     ts->state &= ~TS_DEAD;
2777                 }
2778             }
2779 
2780             /* Incorporate constraints for this operand.  */
2781             switch (opc) {
2782             case INDEX_op_mov_i32:
2783             case INDEX_op_mov_i64:
2784                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2785                    have proper constraints.  That said, special case
2786                    moves to propagate preferences backward.  */
2787                 if (IS_DEAD_ARG(1)) {
2788                     *la_temp_pref(arg_temp(op->args[0]))
2789                         = *la_temp_pref(arg_temp(op->args[1]));
2790                 }
2791                 break;
2792 
2793             default:
2794                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2795                     const TCGArgConstraint *ct = &def->args_ct[i];
2796                     TCGRegSet set, *pset;
2797 
2798                     ts = arg_temp(op->args[i]);
2799                     pset = la_temp_pref(ts);
2800                     set = *pset;
2801 
2802                     set &= ct->u.regs;
2803                     if (ct->ct & TCG_CT_IALIAS) {
2804                         set &= op->output_pref[ct->alias_index];
2805                     }
2806                     /* If the combination is not possible, restart.  */
2807                     if (set == 0) {
2808                         set = ct->u.regs;
2809                     }
2810                     *pset = set;
2811                 }
2812                 break;
2813             }
2814             break;
2815         }
2816         op->life = arg_life;
2817     }
2818 }
2819 
2820 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2821 static bool liveness_pass_2(TCGContext *s)
2822 {
2823     int nb_globals = s->nb_globals;
2824     int nb_temps, i;
2825     bool changes = false;
2826     TCGOp *op, *op_next;
2827 
2828     /* Create a temporary for each indirect global.  */
2829     for (i = 0; i < nb_globals; ++i) {
2830         TCGTemp *its = &s->temps[i];
2831         if (its->indirect_reg) {
2832             TCGTemp *dts = tcg_temp_alloc(s);
2833             dts->type = its->type;
2834             dts->base_type = its->base_type;
2835             its->state_ptr = dts;
2836         } else {
2837             its->state_ptr = NULL;
2838         }
2839         /* All globals begin dead.  */
2840         its->state = TS_DEAD;
2841     }
2842     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2843         TCGTemp *its = &s->temps[i];
2844         its->state_ptr = NULL;
2845         its->state = TS_DEAD;
2846     }
2847 
2848     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2849         TCGOpcode opc = op->opc;
2850         const TCGOpDef *def = &tcg_op_defs[opc];
2851         TCGLifeData arg_life = op->life;
2852         int nb_iargs, nb_oargs, call_flags;
2853         TCGTemp *arg_ts, *dir_ts;
2854 
2855         if (opc == INDEX_op_call) {
2856             nb_oargs = TCGOP_CALLO(op);
2857             nb_iargs = TCGOP_CALLI(op);
2858             call_flags = op->args[nb_oargs + nb_iargs + 1];
2859         } else {
2860             nb_iargs = def->nb_iargs;
2861             nb_oargs = def->nb_oargs;
2862 
2863             /* Set flags similar to how calls require.  */
2864             if (def->flags & TCG_OPF_BB_END) {
2865                 /* Like writing globals: save_globals */
2866                 call_flags = 0;
2867             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2868                 /* Like reading globals: sync_globals */
2869                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2870             } else {
2871                 /* No effect on globals.  */
2872                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2873                               TCG_CALL_NO_WRITE_GLOBALS);
2874             }
2875         }
2876 
2877         /* Make sure that input arguments are available.  */
2878         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2879             arg_ts = arg_temp(op->args[i]);
2880             if (arg_ts) {
2881                 dir_ts = arg_ts->state_ptr;
2882                 if (dir_ts && arg_ts->state == TS_DEAD) {
2883                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2884                                       ? INDEX_op_ld_i32
2885                                       : INDEX_op_ld_i64);
2886                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2887 
2888                     lop->args[0] = temp_arg(dir_ts);
2889                     lop->args[1] = temp_arg(arg_ts->mem_base);
2890                     lop->args[2] = arg_ts->mem_offset;
2891 
2892                     /* Loaded, but synced with memory.  */
2893                     arg_ts->state = TS_MEM;
2894                 }
2895             }
2896         }
2897 
2898         /* Perform input replacement, and mark inputs that became dead.
2899            No action is required except keeping temp_state up to date
2900            so that we reload when needed.  */
2901         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2902             arg_ts = arg_temp(op->args[i]);
2903             if (arg_ts) {
2904                 dir_ts = arg_ts->state_ptr;
2905                 if (dir_ts) {
2906                     op->args[i] = temp_arg(dir_ts);
2907                     changes = true;
2908                     if (IS_DEAD_ARG(i)) {
2909                         arg_ts->state = TS_DEAD;
2910                     }
2911                 }
2912             }
2913         }
2914 
2915         /* Liveness analysis should ensure that the following are
2916            all correct, for call sites and basic block end points.  */
2917         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2918             /* Nothing to do */
2919         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2920             for (i = 0; i < nb_globals; ++i) {
2921                 /* Liveness should see that globals are synced back,
2922                    that is, either TS_DEAD or TS_MEM.  */
2923                 arg_ts = &s->temps[i];
2924                 tcg_debug_assert(arg_ts->state_ptr == 0
2925                                  || arg_ts->state != 0);
2926             }
2927         } else {
2928             for (i = 0; i < nb_globals; ++i) {
2929                 /* Liveness should see that globals are saved back,
2930                    that is, TS_DEAD, waiting to be reloaded.  */
2931                 arg_ts = &s->temps[i];
2932                 tcg_debug_assert(arg_ts->state_ptr == 0
2933                                  || arg_ts->state == TS_DEAD);
2934             }
2935         }
2936 
2937         /* Outputs become available.  */
2938         for (i = 0; i < nb_oargs; i++) {
2939             arg_ts = arg_temp(op->args[i]);
2940             dir_ts = arg_ts->state_ptr;
2941             if (!dir_ts) {
2942                 continue;
2943             }
2944             op->args[i] = temp_arg(dir_ts);
2945             changes = true;
2946 
2947             /* The output is now live and modified.  */
2948             arg_ts->state = 0;
2949 
2950             /* Sync outputs upon their last write.  */
2951             if (NEED_SYNC_ARG(i)) {
2952                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2953                                   ? INDEX_op_st_i32
2954                                   : INDEX_op_st_i64);
2955                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2956 
2957                 sop->args[0] = temp_arg(dir_ts);
2958                 sop->args[1] = temp_arg(arg_ts->mem_base);
2959                 sop->args[2] = arg_ts->mem_offset;
2960 
2961                 arg_ts->state = TS_MEM;
2962             }
2963             /* Drop outputs that are dead.  */
2964             if (IS_DEAD_ARG(i)) {
2965                 arg_ts->state = TS_DEAD;
2966             }
2967         }
2968     }
2969 
2970     return changes;
2971 }
2972 
2973 #ifdef CONFIG_DEBUG_TCG
2974 static void dump_regs(TCGContext *s)
2975 {
2976     TCGTemp *ts;
2977     int i;
2978     char buf[64];
2979 
2980     for(i = 0; i < s->nb_temps; i++) {
2981         ts = &s->temps[i];
2982         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2983         switch(ts->val_type) {
2984         case TEMP_VAL_REG:
2985             printf("%s", tcg_target_reg_names[ts->reg]);
2986             break;
2987         case TEMP_VAL_MEM:
2988             printf("%d(%s)", (int)ts->mem_offset,
2989                    tcg_target_reg_names[ts->mem_base->reg]);
2990             break;
2991         case TEMP_VAL_CONST:
2992             printf("$0x%" TCG_PRIlx, ts->val);
2993             break;
2994         case TEMP_VAL_DEAD:
2995             printf("D");
2996             break;
2997         default:
2998             printf("???");
2999             break;
3000         }
3001         printf("\n");
3002     }
3003 
3004     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3005         if (s->reg_to_temp[i] != NULL) {
3006             printf("%s: %s\n",
3007                    tcg_target_reg_names[i],
3008                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3009         }
3010     }
3011 }
3012 
3013 static void check_regs(TCGContext *s)
3014 {
3015     int reg;
3016     int k;
3017     TCGTemp *ts;
3018     char buf[64];
3019 
3020     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3021         ts = s->reg_to_temp[reg];
3022         if (ts != NULL) {
3023             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3024                 printf("Inconsistency for register %s:\n",
3025                        tcg_target_reg_names[reg]);
3026                 goto fail;
3027             }
3028         }
3029     }
3030     for (k = 0; k < s->nb_temps; k++) {
3031         ts = &s->temps[k];
3032         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3033             && s->reg_to_temp[ts->reg] != ts) {
3034             printf("Inconsistency for temp %s:\n",
3035                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3036         fail:
3037             printf("reg state:\n");
3038             dump_regs(s);
3039             tcg_abort();
3040         }
3041     }
3042 }
3043 #endif
3044 
3045 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3046 {
3047 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3048     /* Sparc64 stack is accessed with offset of 2047 */
3049     s->current_frame_offset = (s->current_frame_offset +
3050                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3051         ~(sizeof(tcg_target_long) - 1);
3052 #endif
3053     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3054         s->frame_end) {
3055         tcg_abort();
3056     }
3057     ts->mem_offset = s->current_frame_offset;
3058     ts->mem_base = s->frame_temp;
3059     ts->mem_allocated = 1;
3060     s->current_frame_offset += sizeof(tcg_target_long);
3061 }
3062 
3063 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3064 
3065 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3066    mark it free; otherwise mark it dead.  */
3067 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3068 {
3069     if (ts->fixed_reg) {
3070         return;
3071     }
3072     if (ts->val_type == TEMP_VAL_REG) {
3073         s->reg_to_temp[ts->reg] = NULL;
3074     }
3075     ts->val_type = (free_or_dead < 0
3076                     || ts->temp_local
3077                     || ts->temp_global
3078                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3079 }
3080 
3081 /* Mark a temporary as dead.  */
3082 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3083 {
3084     temp_free_or_dead(s, ts, 1);
3085 }
3086 
3087 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3088    registers needs to be allocated to store a constant.  If 'free_or_dead'
3089    is non-zero, subsequently release the temporary; if it is positive, the
3090    temp is dead; if it is negative, the temp is free.  */
3091 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3092                       TCGRegSet preferred_regs, int free_or_dead)
3093 {
3094     if (ts->fixed_reg) {
3095         return;
3096     }
3097     if (!ts->mem_coherent) {
3098         if (!ts->mem_allocated) {
3099             temp_allocate_frame(s, ts);
3100         }
3101         switch (ts->val_type) {
3102         case TEMP_VAL_CONST:
3103             /* If we're going to free the temp immediately, then we won't
3104                require it later in a register, so attempt to store the
3105                constant to memory directly.  */
3106             if (free_or_dead
3107                 && tcg_out_sti(s, ts->type, ts->val,
3108                                ts->mem_base->reg, ts->mem_offset)) {
3109                 break;
3110             }
3111             temp_load(s, ts, tcg_target_available_regs[ts->type],
3112                       allocated_regs, preferred_regs);
3113             /* fallthrough */
3114 
3115         case TEMP_VAL_REG:
3116             tcg_out_st(s, ts->type, ts->reg,
3117                        ts->mem_base->reg, ts->mem_offset);
3118             break;
3119 
3120         case TEMP_VAL_MEM:
3121             break;
3122 
3123         case TEMP_VAL_DEAD:
3124         default:
3125             tcg_abort();
3126         }
3127         ts->mem_coherent = 1;
3128     }
3129     if (free_or_dead) {
3130         temp_free_or_dead(s, ts, free_or_dead);
3131     }
3132 }
3133 
3134 /* free register 'reg' by spilling the corresponding temporary if necessary */
3135 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3136 {
3137     TCGTemp *ts = s->reg_to_temp[reg];
3138     if (ts != NULL) {
3139         temp_sync(s, ts, allocated_regs, 0, -1);
3140     }
3141 }
3142 
3143 /**
3144  * tcg_reg_alloc:
3145  * @required_regs: Set of registers in which we must allocate.
3146  * @allocated_regs: Set of registers which must be avoided.
3147  * @preferred_regs: Set of registers we should prefer.
3148  * @rev: True if we search the registers in "indirect" order.
3149  *
3150  * The allocated register must be in @required_regs & ~@allocated_regs,
3151  * but if we can put it in @preferred_regs we may save a move later.
3152  */
3153 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3154                             TCGRegSet allocated_regs,
3155                             TCGRegSet preferred_regs, bool rev)
3156 {
3157     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3158     TCGRegSet reg_ct[2];
3159     const int *order;
3160 
3161     reg_ct[1] = required_regs & ~allocated_regs;
3162     tcg_debug_assert(reg_ct[1] != 0);
3163     reg_ct[0] = reg_ct[1] & preferred_regs;
3164 
3165     /* Skip the preferred_regs option if it cannot be satisfied,
3166        or if the preference made no difference.  */
3167     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3168 
3169     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3170 
3171     /* Try free registers, preferences first.  */
3172     for (j = f; j < 2; j++) {
3173         TCGRegSet set = reg_ct[j];
3174 
3175         if (tcg_regset_single(set)) {
3176             /* One register in the set.  */
3177             TCGReg reg = tcg_regset_first(set);
3178             if (s->reg_to_temp[reg] == NULL) {
3179                 return reg;
3180             }
3181         } else {
3182             for (i = 0; i < n; i++) {
3183                 TCGReg reg = order[i];
3184                 if (s->reg_to_temp[reg] == NULL &&
3185                     tcg_regset_test_reg(set, reg)) {
3186                     return reg;
3187                 }
3188             }
3189         }
3190     }
3191 
3192     /* We must spill something.  */
3193     for (j = f; j < 2; j++) {
3194         TCGRegSet set = reg_ct[j];
3195 
3196         if (tcg_regset_single(set)) {
3197             /* One register in the set.  */
3198             TCGReg reg = tcg_regset_first(set);
3199             tcg_reg_free(s, reg, allocated_regs);
3200             return reg;
3201         } else {
3202             for (i = 0; i < n; i++) {
3203                 TCGReg reg = order[i];
3204                 if (tcg_regset_test_reg(set, reg)) {
3205                     tcg_reg_free(s, reg, allocated_regs);
3206                     return reg;
3207                 }
3208             }
3209         }
3210     }
3211 
3212     tcg_abort();
3213 }
3214 
3215 /* Make sure the temporary is in a register.  If needed, allocate the register
3216    from DESIRED while avoiding ALLOCATED.  */
3217 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3218                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3219 {
3220     TCGReg reg;
3221 
3222     switch (ts->val_type) {
3223     case TEMP_VAL_REG:
3224         return;
3225     case TEMP_VAL_CONST:
3226         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3227                             preferred_regs, ts->indirect_base);
3228         tcg_out_movi(s, ts->type, reg, ts->val);
3229         ts->mem_coherent = 0;
3230         break;
3231     case TEMP_VAL_MEM:
3232         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3233                             preferred_regs, ts->indirect_base);
3234         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3235         ts->mem_coherent = 1;
3236         break;
3237     case TEMP_VAL_DEAD:
3238     default:
3239         tcg_abort();
3240     }
3241     ts->reg = reg;
3242     ts->val_type = TEMP_VAL_REG;
3243     s->reg_to_temp[reg] = ts;
3244 }
3245 
3246 /* Save a temporary to memory. 'allocated_regs' is used in case a
3247    temporary registers needs to be allocated to store a constant.  */
3248 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3249 {
3250     /* The liveness analysis already ensures that globals are back
3251        in memory. Keep an tcg_debug_assert for safety. */
3252     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3253 }
3254 
3255 /* save globals to their canonical location and assume they can be
3256    modified be the following code. 'allocated_regs' is used in case a
3257    temporary registers needs to be allocated to store a constant. */
3258 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3259 {
3260     int i, n;
3261 
3262     for (i = 0, n = s->nb_globals; i < n; i++) {
3263         temp_save(s, &s->temps[i], allocated_regs);
3264     }
3265 }
3266 
3267 /* sync globals to their canonical location and assume they can be
3268    read by the following code. 'allocated_regs' is used in case a
3269    temporary registers needs to be allocated to store a constant. */
3270 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3271 {
3272     int i, n;
3273 
3274     for (i = 0, n = s->nb_globals; i < n; i++) {
3275         TCGTemp *ts = &s->temps[i];
3276         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3277                          || ts->fixed_reg
3278                          || ts->mem_coherent);
3279     }
3280 }
3281 
3282 /* at the end of a basic block, we assume all temporaries are dead and
3283    all globals are stored at their canonical location. */
3284 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3285 {
3286     int i;
3287 
3288     for (i = s->nb_globals; i < s->nb_temps; i++) {
3289         TCGTemp *ts = &s->temps[i];
3290         if (ts->temp_local) {
3291             temp_save(s, ts, allocated_regs);
3292         } else {
3293             /* The liveness analysis already ensures that temps are dead.
3294                Keep an tcg_debug_assert for safety. */
3295             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3296         }
3297     }
3298 
3299     save_globals(s, allocated_regs);
3300 }
3301 
3302 /*
3303  * Specialized code generation for INDEX_op_movi_*.
3304  */
3305 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3306                                   tcg_target_ulong val, TCGLifeData arg_life,
3307                                   TCGRegSet preferred_regs)
3308 {
3309     /* ENV should not be modified.  */
3310     tcg_debug_assert(!ots->fixed_reg);
3311 
3312     /* The movi is not explicitly generated here.  */
3313     if (ots->val_type == TEMP_VAL_REG) {
3314         s->reg_to_temp[ots->reg] = NULL;
3315     }
3316     ots->val_type = TEMP_VAL_CONST;
3317     ots->val = val;
3318     ots->mem_coherent = 0;
3319     if (NEED_SYNC_ARG(0)) {
3320         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3321     } else if (IS_DEAD_ARG(0)) {
3322         temp_dead(s, ots);
3323     }
3324 }
3325 
3326 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3327 {
3328     TCGTemp *ots = arg_temp(op->args[0]);
3329     tcg_target_ulong val = op->args[1];
3330 
3331     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3332 }
3333 
3334 /*
3335  * Specialized code generation for INDEX_op_mov_*.
3336  */
3337 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3338 {
3339     const TCGLifeData arg_life = op->life;
3340     TCGRegSet allocated_regs, preferred_regs;
3341     TCGTemp *ts, *ots;
3342     TCGType otype, itype;
3343 
3344     allocated_regs = s->reserved_regs;
3345     preferred_regs = op->output_pref[0];
3346     ots = arg_temp(op->args[0]);
3347     ts = arg_temp(op->args[1]);
3348 
3349     /* ENV should not be modified.  */
3350     tcg_debug_assert(!ots->fixed_reg);
3351 
3352     /* Note that otype != itype for no-op truncation.  */
3353     otype = ots->type;
3354     itype = ts->type;
3355 
3356     if (ts->val_type == TEMP_VAL_CONST) {
3357         /* propagate constant or generate sti */
3358         tcg_target_ulong val = ts->val;
3359         if (IS_DEAD_ARG(1)) {
3360             temp_dead(s, ts);
3361         }
3362         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3363         return;
3364     }
3365 
3366     /* If the source value is in memory we're going to be forced
3367        to have it in a register in order to perform the copy.  Copy
3368        the SOURCE value into its own register first, that way we
3369        don't have to reload SOURCE the next time it is used. */
3370     if (ts->val_type == TEMP_VAL_MEM) {
3371         temp_load(s, ts, tcg_target_available_regs[itype],
3372                   allocated_regs, preferred_regs);
3373     }
3374 
3375     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3376     if (IS_DEAD_ARG(0)) {
3377         /* mov to a non-saved dead register makes no sense (even with
3378            liveness analysis disabled). */
3379         tcg_debug_assert(NEED_SYNC_ARG(0));
3380         if (!ots->mem_allocated) {
3381             temp_allocate_frame(s, ots);
3382         }
3383         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3384         if (IS_DEAD_ARG(1)) {
3385             temp_dead(s, ts);
3386         }
3387         temp_dead(s, ots);
3388     } else {
3389         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3390             /* the mov can be suppressed */
3391             if (ots->val_type == TEMP_VAL_REG) {
3392                 s->reg_to_temp[ots->reg] = NULL;
3393             }
3394             ots->reg = ts->reg;
3395             temp_dead(s, ts);
3396         } else {
3397             if (ots->val_type != TEMP_VAL_REG) {
3398                 /* When allocating a new register, make sure to not spill the
3399                    input one. */
3400                 tcg_regset_set_reg(allocated_regs, ts->reg);
3401                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3402                                          allocated_regs, preferred_regs,
3403                                          ots->indirect_base);
3404             }
3405             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3406                 /*
3407                  * Cross register class move not supported.
3408                  * Store the source register into the destination slot
3409                  * and leave the destination temp as TEMP_VAL_MEM.
3410                  */
3411                 assert(!ots->fixed_reg);
3412                 if (!ts->mem_allocated) {
3413                     temp_allocate_frame(s, ots);
3414                 }
3415                 tcg_out_st(s, ts->type, ts->reg,
3416                            ots->mem_base->reg, ots->mem_offset);
3417                 ots->mem_coherent = 1;
3418                 temp_free_or_dead(s, ots, -1);
3419                 return;
3420             }
3421         }
3422         ots->val_type = TEMP_VAL_REG;
3423         ots->mem_coherent = 0;
3424         s->reg_to_temp[ots->reg] = ots;
3425         if (NEED_SYNC_ARG(0)) {
3426             temp_sync(s, ots, allocated_regs, 0, 0);
3427         }
3428     }
3429 }
3430 
3431 /*
3432  * Specialized code generation for INDEX_op_dup_vec.
3433  */
3434 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3435 {
3436     const TCGLifeData arg_life = op->life;
3437     TCGRegSet dup_out_regs, dup_in_regs;
3438     TCGTemp *its, *ots;
3439     TCGType itype, vtype;
3440     intptr_t endian_fixup;
3441     unsigned vece;
3442     bool ok;
3443 
3444     ots = arg_temp(op->args[0]);
3445     its = arg_temp(op->args[1]);
3446 
3447     /* ENV should not be modified.  */
3448     tcg_debug_assert(!ots->fixed_reg);
3449 
3450     itype = its->type;
3451     vece = TCGOP_VECE(op);
3452     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3453 
3454     if (its->val_type == TEMP_VAL_CONST) {
3455         /* Propagate constant via movi -> dupi.  */
3456         tcg_target_ulong val = its->val;
3457         if (IS_DEAD_ARG(1)) {
3458             temp_dead(s, its);
3459         }
3460         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3461         return;
3462     }
3463 
3464     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
3465     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
3466 
3467     /* Allocate the output register now.  */
3468     if (ots->val_type != TEMP_VAL_REG) {
3469         TCGRegSet allocated_regs = s->reserved_regs;
3470 
3471         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3472             /* Make sure to not spill the input register. */
3473             tcg_regset_set_reg(allocated_regs, its->reg);
3474         }
3475         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3476                                  op->output_pref[0], ots->indirect_base);
3477         ots->val_type = TEMP_VAL_REG;
3478         ots->mem_coherent = 0;
3479         s->reg_to_temp[ots->reg] = ots;
3480     }
3481 
3482     switch (its->val_type) {
3483     case TEMP_VAL_REG:
3484         /*
3485          * The dup constriaints must be broad, covering all possible VECE.
3486          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3487          * to fail, indicating that extra moves are required for that case.
3488          */
3489         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3490             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3491                 goto done;
3492             }
3493             /* Try again from memory or a vector input register.  */
3494         }
3495         if (!its->mem_coherent) {
3496             /*
3497              * The input register is not synced, and so an extra store
3498              * would be required to use memory.  Attempt an integer-vector
3499              * register move first.  We do not have a TCGRegSet for this.
3500              */
3501             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3502                 break;
3503             }
3504             /* Sync the temp back to its slot and load from there.  */
3505             temp_sync(s, its, s->reserved_regs, 0, 0);
3506         }
3507         /* fall through */
3508 
3509     case TEMP_VAL_MEM:
3510 #ifdef HOST_WORDS_BIGENDIAN
3511         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3512         endian_fixup -= 1 << vece;
3513 #else
3514         endian_fixup = 0;
3515 #endif
3516         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3517                              its->mem_offset + endian_fixup)) {
3518             goto done;
3519         }
3520         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3521         break;
3522 
3523     default:
3524         g_assert_not_reached();
3525     }
3526 
3527     /* We now have a vector input register, so dup must succeed. */
3528     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3529     tcg_debug_assert(ok);
3530 
3531  done:
3532     if (IS_DEAD_ARG(1)) {
3533         temp_dead(s, its);
3534     }
3535     if (NEED_SYNC_ARG(0)) {
3536         temp_sync(s, ots, s->reserved_regs, 0, 0);
3537     }
3538     if (IS_DEAD_ARG(0)) {
3539         temp_dead(s, ots);
3540     }
3541 }
3542 
3543 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3544 {
3545     const TCGLifeData arg_life = op->life;
3546     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3547     TCGRegSet i_allocated_regs;
3548     TCGRegSet o_allocated_regs;
3549     int i, k, nb_iargs, nb_oargs;
3550     TCGReg reg;
3551     TCGArg arg;
3552     const TCGArgConstraint *arg_ct;
3553     TCGTemp *ts;
3554     TCGArg new_args[TCG_MAX_OP_ARGS];
3555     int const_args[TCG_MAX_OP_ARGS];
3556 
3557     nb_oargs = def->nb_oargs;
3558     nb_iargs = def->nb_iargs;
3559 
3560     /* copy constants */
3561     memcpy(new_args + nb_oargs + nb_iargs,
3562            op->args + nb_oargs + nb_iargs,
3563            sizeof(TCGArg) * def->nb_cargs);
3564 
3565     i_allocated_regs = s->reserved_regs;
3566     o_allocated_regs = s->reserved_regs;
3567 
3568     /* satisfy input constraints */
3569     for (k = 0; k < nb_iargs; k++) {
3570         TCGRegSet i_preferred_regs, o_preferred_regs;
3571 
3572         i = def->sorted_args[nb_oargs + k];
3573         arg = op->args[i];
3574         arg_ct = &def->args_ct[i];
3575         ts = arg_temp(arg);
3576 
3577         if (ts->val_type == TEMP_VAL_CONST
3578             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3579             /* constant is OK for instruction */
3580             const_args[i] = 1;
3581             new_args[i] = ts->val;
3582             continue;
3583         }
3584 
3585         i_preferred_regs = o_preferred_regs = 0;
3586         if (arg_ct->ct & TCG_CT_IALIAS) {
3587             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3588             if (ts->fixed_reg) {
3589                 /* if fixed register, we must allocate a new register
3590                    if the alias is not the same register */
3591                 if (arg != op->args[arg_ct->alias_index]) {
3592                     goto allocate_in_reg;
3593                 }
3594             } else {
3595                 /* if the input is aliased to an output and if it is
3596                    not dead after the instruction, we must allocate
3597                    a new register and move it */
3598                 if (!IS_DEAD_ARG(i)) {
3599                     goto allocate_in_reg;
3600                 }
3601 
3602                 /* check if the current register has already been allocated
3603                    for another input aliased to an output */
3604                 if (ts->val_type == TEMP_VAL_REG) {
3605                     int k2, i2;
3606                     reg = ts->reg;
3607                     for (k2 = 0 ; k2 < k ; k2++) {
3608                         i2 = def->sorted_args[nb_oargs + k2];
3609                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3610                             reg == new_args[i2]) {
3611                             goto allocate_in_reg;
3612                         }
3613                     }
3614                 }
3615                 i_preferred_regs = o_preferred_regs;
3616             }
3617         }
3618 
3619         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3620         reg = ts->reg;
3621 
3622         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3623             /* nothing to do : the constraint is satisfied */
3624         } else {
3625         allocate_in_reg:
3626             /* allocate a new register matching the constraint
3627                and move the temporary register into it */
3628             temp_load(s, ts, tcg_target_available_regs[ts->type],
3629                       i_allocated_regs, 0);
3630             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3631                                 o_preferred_regs, ts->indirect_base);
3632             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3633                 /*
3634                  * Cross register class move not supported.  Sync the
3635                  * temp back to its slot and load from there.
3636                  */
3637                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3638                 tcg_out_ld(s, ts->type, reg,
3639                            ts->mem_base->reg, ts->mem_offset);
3640             }
3641         }
3642         new_args[i] = reg;
3643         const_args[i] = 0;
3644         tcg_regset_set_reg(i_allocated_regs, reg);
3645     }
3646 
3647     /* mark dead temporaries and free the associated registers */
3648     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3649         if (IS_DEAD_ARG(i)) {
3650             temp_dead(s, arg_temp(op->args[i]));
3651         }
3652     }
3653 
3654     if (def->flags & TCG_OPF_BB_END) {
3655         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3656     } else {
3657         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3658             /* XXX: permit generic clobber register list ? */
3659             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3660                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3661                     tcg_reg_free(s, i, i_allocated_regs);
3662                 }
3663             }
3664         }
3665         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3666             /* sync globals if the op has side effects and might trigger
3667                an exception. */
3668             sync_globals(s, i_allocated_regs);
3669         }
3670 
3671         /* satisfy the output constraints */
3672         for(k = 0; k < nb_oargs; k++) {
3673             i = def->sorted_args[k];
3674             arg = op->args[i];
3675             arg_ct = &def->args_ct[i];
3676             ts = arg_temp(arg);
3677 
3678             /* ENV should not be modified.  */
3679             tcg_debug_assert(!ts->fixed_reg);
3680 
3681             if ((arg_ct->ct & TCG_CT_ALIAS)
3682                 && !const_args[arg_ct->alias_index]) {
3683                 reg = new_args[arg_ct->alias_index];
3684             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3685                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3686                                     i_allocated_regs | o_allocated_regs,
3687                                     op->output_pref[k], ts->indirect_base);
3688             } else {
3689                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3690                                     op->output_pref[k], ts->indirect_base);
3691             }
3692             tcg_regset_set_reg(o_allocated_regs, reg);
3693             if (ts->val_type == TEMP_VAL_REG) {
3694                 s->reg_to_temp[ts->reg] = NULL;
3695             }
3696             ts->val_type = TEMP_VAL_REG;
3697             ts->reg = reg;
3698             /*
3699              * Temp value is modified, so the value kept in memory is
3700              * potentially not the same.
3701              */
3702             ts->mem_coherent = 0;
3703             s->reg_to_temp[reg] = ts;
3704             new_args[i] = reg;
3705         }
3706     }
3707 
3708     /* emit instruction */
3709     if (def->flags & TCG_OPF_VECTOR) {
3710         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3711                        new_args, const_args);
3712     } else {
3713         tcg_out_op(s, op->opc, new_args, const_args);
3714     }
3715 
3716     /* move the outputs in the correct register if needed */
3717     for(i = 0; i < nb_oargs; i++) {
3718         ts = arg_temp(op->args[i]);
3719 
3720         /* ENV should not be modified.  */
3721         tcg_debug_assert(!ts->fixed_reg);
3722 
3723         if (NEED_SYNC_ARG(i)) {
3724             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3725         } else if (IS_DEAD_ARG(i)) {
3726             temp_dead(s, ts);
3727         }
3728     }
3729 }
3730 
3731 #ifdef TCG_TARGET_STACK_GROWSUP
3732 #define STACK_DIR(x) (-(x))
3733 #else
3734 #define STACK_DIR(x) (x)
3735 #endif
3736 
3737 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3738 {
3739     const int nb_oargs = TCGOP_CALLO(op);
3740     const int nb_iargs = TCGOP_CALLI(op);
3741     const TCGLifeData arg_life = op->life;
3742     int flags, nb_regs, i;
3743     TCGReg reg;
3744     TCGArg arg;
3745     TCGTemp *ts;
3746     intptr_t stack_offset;
3747     size_t call_stack_size;
3748     tcg_insn_unit *func_addr;
3749     int allocate_args;
3750     TCGRegSet allocated_regs;
3751 
3752     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3753     flags = op->args[nb_oargs + nb_iargs + 1];
3754 
3755     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3756     if (nb_regs > nb_iargs) {
3757         nb_regs = nb_iargs;
3758     }
3759 
3760     /* assign stack slots first */
3761     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3762     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3763         ~(TCG_TARGET_STACK_ALIGN - 1);
3764     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3765     if (allocate_args) {
3766         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3767            preallocate call stack */
3768         tcg_abort();
3769     }
3770 
3771     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3772     for (i = nb_regs; i < nb_iargs; i++) {
3773         arg = op->args[nb_oargs + i];
3774 #ifdef TCG_TARGET_STACK_GROWSUP
3775         stack_offset -= sizeof(tcg_target_long);
3776 #endif
3777         if (arg != TCG_CALL_DUMMY_ARG) {
3778             ts = arg_temp(arg);
3779             temp_load(s, ts, tcg_target_available_regs[ts->type],
3780                       s->reserved_regs, 0);
3781             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3782         }
3783 #ifndef TCG_TARGET_STACK_GROWSUP
3784         stack_offset += sizeof(tcg_target_long);
3785 #endif
3786     }
3787 
3788     /* assign input registers */
3789     allocated_regs = s->reserved_regs;
3790     for (i = 0; i < nb_regs; i++) {
3791         arg = op->args[nb_oargs + i];
3792         if (arg != TCG_CALL_DUMMY_ARG) {
3793             ts = arg_temp(arg);
3794             reg = tcg_target_call_iarg_regs[i];
3795 
3796             if (ts->val_type == TEMP_VAL_REG) {
3797                 if (ts->reg != reg) {
3798                     tcg_reg_free(s, reg, allocated_regs);
3799                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3800                         /*
3801                          * Cross register class move not supported.  Sync the
3802                          * temp back to its slot and load from there.
3803                          */
3804                         temp_sync(s, ts, allocated_regs, 0, 0);
3805                         tcg_out_ld(s, ts->type, reg,
3806                                    ts->mem_base->reg, ts->mem_offset);
3807                     }
3808                 }
3809             } else {
3810                 TCGRegSet arg_set = 0;
3811 
3812                 tcg_reg_free(s, reg, allocated_regs);
3813                 tcg_regset_set_reg(arg_set, reg);
3814                 temp_load(s, ts, arg_set, allocated_regs, 0);
3815             }
3816 
3817             tcg_regset_set_reg(allocated_regs, reg);
3818         }
3819     }
3820 
3821     /* mark dead temporaries and free the associated registers */
3822     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3823         if (IS_DEAD_ARG(i)) {
3824             temp_dead(s, arg_temp(op->args[i]));
3825         }
3826     }
3827 
3828     /* clobber call registers */
3829     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3830         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3831             tcg_reg_free(s, i, allocated_regs);
3832         }
3833     }
3834 
3835     /* Save globals if they might be written by the helper, sync them if
3836        they might be read. */
3837     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3838         /* Nothing to do */
3839     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3840         sync_globals(s, allocated_regs);
3841     } else {
3842         save_globals(s, allocated_regs);
3843     }
3844 
3845     tcg_out_call(s, func_addr);
3846 
3847     /* assign output registers and emit moves if needed */
3848     for(i = 0; i < nb_oargs; i++) {
3849         arg = op->args[i];
3850         ts = arg_temp(arg);
3851 
3852         /* ENV should not be modified.  */
3853         tcg_debug_assert(!ts->fixed_reg);
3854 
3855         reg = tcg_target_call_oarg_regs[i];
3856         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3857         if (ts->val_type == TEMP_VAL_REG) {
3858             s->reg_to_temp[ts->reg] = NULL;
3859         }
3860         ts->val_type = TEMP_VAL_REG;
3861         ts->reg = reg;
3862         ts->mem_coherent = 0;
3863         s->reg_to_temp[reg] = ts;
3864         if (NEED_SYNC_ARG(i)) {
3865             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3866         } else if (IS_DEAD_ARG(i)) {
3867             temp_dead(s, ts);
3868         }
3869     }
3870 }
3871 
3872 #ifdef CONFIG_PROFILER
3873 
3874 /* avoid copy/paste errors */
3875 #define PROF_ADD(to, from, field)                       \
3876     do {                                                \
3877         (to)->field += atomic_read(&((from)->field));   \
3878     } while (0)
3879 
3880 #define PROF_MAX(to, from, field)                                       \
3881     do {                                                                \
3882         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3883         if (val__ > (to)->field) {                                      \
3884             (to)->field = val__;                                        \
3885         }                                                               \
3886     } while (0)
3887 
3888 /* Pass in a zero'ed @prof */
3889 static inline
3890 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3891 {
3892     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3893     unsigned int i;
3894 
3895     for (i = 0; i < n_ctxs; i++) {
3896         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3897         const TCGProfile *orig = &s->prof;
3898 
3899         if (counters) {
3900             PROF_ADD(prof, orig, cpu_exec_time);
3901             PROF_ADD(prof, orig, tb_count1);
3902             PROF_ADD(prof, orig, tb_count);
3903             PROF_ADD(prof, orig, op_count);
3904             PROF_MAX(prof, orig, op_count_max);
3905             PROF_ADD(prof, orig, temp_count);
3906             PROF_MAX(prof, orig, temp_count_max);
3907             PROF_ADD(prof, orig, del_op_count);
3908             PROF_ADD(prof, orig, code_in_len);
3909             PROF_ADD(prof, orig, code_out_len);
3910             PROF_ADD(prof, orig, search_out_len);
3911             PROF_ADD(prof, orig, interm_time);
3912             PROF_ADD(prof, orig, code_time);
3913             PROF_ADD(prof, orig, la_time);
3914             PROF_ADD(prof, orig, opt_time);
3915             PROF_ADD(prof, orig, restore_count);
3916             PROF_ADD(prof, orig, restore_time);
3917         }
3918         if (table) {
3919             int i;
3920 
3921             for (i = 0; i < NB_OPS; i++) {
3922                 PROF_ADD(prof, orig, table_op_count[i]);
3923             }
3924         }
3925     }
3926 }
3927 
3928 #undef PROF_ADD
3929 #undef PROF_MAX
3930 
3931 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3932 {
3933     tcg_profile_snapshot(prof, true, false);
3934 }
3935 
3936 static void tcg_profile_snapshot_table(TCGProfile *prof)
3937 {
3938     tcg_profile_snapshot(prof, false, true);
3939 }
3940 
3941 void tcg_dump_op_count(void)
3942 {
3943     TCGProfile prof = {};
3944     int i;
3945 
3946     tcg_profile_snapshot_table(&prof);
3947     for (i = 0; i < NB_OPS; i++) {
3948         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3949                     prof.table_op_count[i]);
3950     }
3951 }
3952 
3953 int64_t tcg_cpu_exec_time(void)
3954 {
3955     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3956     unsigned int i;
3957     int64_t ret = 0;
3958 
3959     for (i = 0; i < n_ctxs; i++) {
3960         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3961         const TCGProfile *prof = &s->prof;
3962 
3963         ret += atomic_read(&prof->cpu_exec_time);
3964     }
3965     return ret;
3966 }
3967 #else
3968 void tcg_dump_op_count(void)
3969 {
3970     qemu_printf("[TCG profiler not compiled]\n");
3971 }
3972 
3973 int64_t tcg_cpu_exec_time(void)
3974 {
3975     error_report("%s: TCG profiler not compiled", __func__);
3976     exit(EXIT_FAILURE);
3977 }
3978 #endif
3979 
3980 
3981 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3982 {
3983 #ifdef CONFIG_PROFILER
3984     TCGProfile *prof = &s->prof;
3985 #endif
3986     int i, num_insns;
3987     TCGOp *op;
3988 
3989 #ifdef CONFIG_PROFILER
3990     {
3991         int n = 0;
3992 
3993         QTAILQ_FOREACH(op, &s->ops, link) {
3994             n++;
3995         }
3996         atomic_set(&prof->op_count, prof->op_count + n);
3997         if (n > prof->op_count_max) {
3998             atomic_set(&prof->op_count_max, n);
3999         }
4000 
4001         n = s->nb_temps;
4002         atomic_set(&prof->temp_count, prof->temp_count + n);
4003         if (n > prof->temp_count_max) {
4004             atomic_set(&prof->temp_count_max, n);
4005         }
4006     }
4007 #endif
4008 
4009 #ifdef DEBUG_DISAS
4010     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4011                  && qemu_log_in_addr_range(tb->pc))) {
4012         qemu_log_lock();
4013         qemu_log("OP:\n");
4014         tcg_dump_ops(s, false);
4015         qemu_log("\n");
4016         qemu_log_unlock();
4017     }
4018 #endif
4019 
4020 #ifdef CONFIG_DEBUG_TCG
4021     /* Ensure all labels referenced have been emitted.  */
4022     {
4023         TCGLabel *l;
4024         bool error = false;
4025 
4026         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4027             if (unlikely(!l->present) && l->refs) {
4028                 qemu_log_mask(CPU_LOG_TB_OP,
4029                               "$L%d referenced but not present.\n", l->id);
4030                 error = true;
4031             }
4032         }
4033         assert(!error);
4034     }
4035 #endif
4036 
4037 #ifdef CONFIG_PROFILER
4038     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4039 #endif
4040 
4041 #ifdef USE_TCG_OPTIMIZATIONS
4042     tcg_optimize(s);
4043 #endif
4044 
4045 #ifdef CONFIG_PROFILER
4046     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4047     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
4048 #endif
4049 
4050     reachable_code_pass(s);
4051     liveness_pass_1(s);
4052 
4053     if (s->nb_indirects > 0) {
4054 #ifdef DEBUG_DISAS
4055         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4056                      && qemu_log_in_addr_range(tb->pc))) {
4057             qemu_log_lock();
4058             qemu_log("OP before indirect lowering:\n");
4059             tcg_dump_ops(s, false);
4060             qemu_log("\n");
4061             qemu_log_unlock();
4062         }
4063 #endif
4064         /* Replace indirect temps with direct temps.  */
4065         if (liveness_pass_2(s)) {
4066             /* If changes were made, re-run liveness.  */
4067             liveness_pass_1(s);
4068         }
4069     }
4070 
4071 #ifdef CONFIG_PROFILER
4072     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
4073 #endif
4074 
4075 #ifdef DEBUG_DISAS
4076     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4077                  && qemu_log_in_addr_range(tb->pc))) {
4078         qemu_log_lock();
4079         qemu_log("OP after optimization and liveness analysis:\n");
4080         tcg_dump_ops(s, true);
4081         qemu_log("\n");
4082         qemu_log_unlock();
4083     }
4084 #endif
4085 
4086     tcg_reg_alloc_start(s);
4087 
4088     s->code_buf = tb->tc.ptr;
4089     s->code_ptr = tb->tc.ptr;
4090 
4091 #ifdef TCG_TARGET_NEED_LDST_LABELS
4092     QSIMPLEQ_INIT(&s->ldst_labels);
4093 #endif
4094 #ifdef TCG_TARGET_NEED_POOL_LABELS
4095     s->pool_labels = NULL;
4096 #endif
4097 
4098     num_insns = -1;
4099     QTAILQ_FOREACH(op, &s->ops, link) {
4100         TCGOpcode opc = op->opc;
4101 
4102 #ifdef CONFIG_PROFILER
4103         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4104 #endif
4105 
4106         switch (opc) {
4107         case INDEX_op_mov_i32:
4108         case INDEX_op_mov_i64:
4109         case INDEX_op_mov_vec:
4110             tcg_reg_alloc_mov(s, op);
4111             break;
4112         case INDEX_op_movi_i32:
4113         case INDEX_op_movi_i64:
4114         case INDEX_op_dupi_vec:
4115             tcg_reg_alloc_movi(s, op);
4116             break;
4117         case INDEX_op_dup_vec:
4118             tcg_reg_alloc_dup(s, op);
4119             break;
4120         case INDEX_op_insn_start:
4121             if (num_insns >= 0) {
4122                 size_t off = tcg_current_code_size(s);
4123                 s->gen_insn_end_off[num_insns] = off;
4124                 /* Assert that we do not overflow our stored offset.  */
4125                 assert(s->gen_insn_end_off[num_insns] == off);
4126             }
4127             num_insns++;
4128             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4129                 target_ulong a;
4130 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4131                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4132 #else
4133                 a = op->args[i];
4134 #endif
4135                 s->gen_insn_data[num_insns][i] = a;
4136             }
4137             break;
4138         case INDEX_op_discard:
4139             temp_dead(s, arg_temp(op->args[0]));
4140             break;
4141         case INDEX_op_set_label:
4142             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4143             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4144             break;
4145         case INDEX_op_call:
4146             tcg_reg_alloc_call(s, op);
4147             break;
4148         default:
4149             /* Sanity check that we've not introduced any unhandled opcodes. */
4150             tcg_debug_assert(tcg_op_supported(opc));
4151             /* Note: in order to speed up the code, it would be much
4152                faster to have specialized register allocator functions for
4153                some common argument patterns */
4154             tcg_reg_alloc_op(s, op);
4155             break;
4156         }
4157 #ifdef CONFIG_DEBUG_TCG
4158         check_regs(s);
4159 #endif
4160         /* Test for (pending) buffer overflow.  The assumption is that any
4161            one operation beginning below the high water mark cannot overrun
4162            the buffer completely.  Thus we can test for overflow after
4163            generating code without having to check during generation.  */
4164         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4165             return -1;
4166         }
4167         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4168         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4169             return -2;
4170         }
4171     }
4172     tcg_debug_assert(num_insns >= 0);
4173     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4174 
4175     /* Generate TB finalization at the end of block */
4176 #ifdef TCG_TARGET_NEED_LDST_LABELS
4177     i = tcg_out_ldst_finalize(s);
4178     if (i < 0) {
4179         return i;
4180     }
4181 #endif
4182 #ifdef TCG_TARGET_NEED_POOL_LABELS
4183     i = tcg_out_pool_finalize(s);
4184     if (i < 0) {
4185         return i;
4186     }
4187 #endif
4188     if (!tcg_resolve_relocs(s)) {
4189         return -2;
4190     }
4191 
4192     /* flush instruction cache */
4193     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4194 
4195     return tcg_current_code_size(s);
4196 }
4197 
4198 #ifdef CONFIG_PROFILER
4199 void tcg_dump_info(void)
4200 {
4201     TCGProfile prof = {};
4202     const TCGProfile *s;
4203     int64_t tb_count;
4204     int64_t tb_div_count;
4205     int64_t tot;
4206 
4207     tcg_profile_snapshot_counters(&prof);
4208     s = &prof;
4209     tb_count = s->tb_count;
4210     tb_div_count = tb_count ? tb_count : 1;
4211     tot = s->interm_time + s->code_time;
4212 
4213     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4214                 tot, tot / 2.4e9);
4215     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4216                 " %0.1f%%)\n",
4217                 tb_count, s->tb_count1 - tb_count,
4218                 (double)(s->tb_count1 - s->tb_count)
4219                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4220     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4221                 (double)s->op_count / tb_div_count, s->op_count_max);
4222     qemu_printf("deleted ops/TB      %0.2f\n",
4223                 (double)s->del_op_count / tb_div_count);
4224     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4225                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4226     qemu_printf("avg host code/TB    %0.1f\n",
4227                 (double)s->code_out_len / tb_div_count);
4228     qemu_printf("avg search data/TB  %0.1f\n",
4229                 (double)s->search_out_len / tb_div_count);
4230 
4231     qemu_printf("cycles/op           %0.1f\n",
4232                 s->op_count ? (double)tot / s->op_count : 0);
4233     qemu_printf("cycles/in byte      %0.1f\n",
4234                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4235     qemu_printf("cycles/out byte     %0.1f\n",
4236                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4237     qemu_printf("cycles/search byte     %0.1f\n",
4238                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4239     if (tot == 0) {
4240         tot = 1;
4241     }
4242     qemu_printf("  gen_interm time   %0.1f%%\n",
4243                 (double)s->interm_time / tot * 100.0);
4244     qemu_printf("  gen_code time     %0.1f%%\n",
4245                 (double)s->code_time / tot * 100.0);
4246     qemu_printf("optim./code time    %0.1f%%\n",
4247                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4248                 * 100.0);
4249     qemu_printf("liveness/code time  %0.1f%%\n",
4250                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4251     qemu_printf("cpu_restore count   %" PRId64 "\n",
4252                 s->restore_count);
4253     qemu_printf("  avg cycles        %0.1f\n",
4254                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4255 }
4256 #else
4257 void tcg_dump_info(void)
4258 {
4259     qemu_printf("[TCG profiler not compiled]\n");
4260 }
4261 #endif
4262 
4263 #ifdef ELF_HOST_MACHINE
4264 /* In order to use this feature, the backend needs to do three things:
4265 
4266    (1) Define ELF_HOST_MACHINE to indicate both what value to
4267        put into the ELF image and to indicate support for the feature.
4268 
4269    (2) Define tcg_register_jit.  This should create a buffer containing
4270        the contents of a .debug_frame section that describes the post-
4271        prologue unwind info for the tcg machine.
4272 
4273    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4274 */
4275 
4276 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4277 typedef enum {
4278     JIT_NOACTION = 0,
4279     JIT_REGISTER_FN,
4280     JIT_UNREGISTER_FN
4281 } jit_actions_t;
4282 
4283 struct jit_code_entry {
4284     struct jit_code_entry *next_entry;
4285     struct jit_code_entry *prev_entry;
4286     const void *symfile_addr;
4287     uint64_t symfile_size;
4288 };
4289 
4290 struct jit_descriptor {
4291     uint32_t version;
4292     uint32_t action_flag;
4293     struct jit_code_entry *relevant_entry;
4294     struct jit_code_entry *first_entry;
4295 };
4296 
4297 void __jit_debug_register_code(void) __attribute__((noinline));
4298 void __jit_debug_register_code(void)
4299 {
4300     asm("");
4301 }
4302 
4303 /* Must statically initialize the version, because GDB may check
4304    the version before we can set it.  */
4305 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4306 
4307 /* End GDB interface.  */
4308 
4309 static int find_string(const char *strtab, const char *str)
4310 {
4311     const char *p = strtab + 1;
4312 
4313     while (1) {
4314         if (strcmp(p, str) == 0) {
4315             return p - strtab;
4316         }
4317         p += strlen(p) + 1;
4318     }
4319 }
4320 
4321 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4322                                  const void *debug_frame,
4323                                  size_t debug_frame_size)
4324 {
4325     struct __attribute__((packed)) DebugInfo {
4326         uint32_t  len;
4327         uint16_t  version;
4328         uint32_t  abbrev;
4329         uint8_t   ptr_size;
4330         uint8_t   cu_die;
4331         uint16_t  cu_lang;
4332         uintptr_t cu_low_pc;
4333         uintptr_t cu_high_pc;
4334         uint8_t   fn_die;
4335         char      fn_name[16];
4336         uintptr_t fn_low_pc;
4337         uintptr_t fn_high_pc;
4338         uint8_t   cu_eoc;
4339     };
4340 
4341     struct ElfImage {
4342         ElfW(Ehdr) ehdr;
4343         ElfW(Phdr) phdr;
4344         ElfW(Shdr) shdr[7];
4345         ElfW(Sym)  sym[2];
4346         struct DebugInfo di;
4347         uint8_t    da[24];
4348         char       str[80];
4349     };
4350 
4351     struct ElfImage *img;
4352 
4353     static const struct ElfImage img_template = {
4354         .ehdr = {
4355             .e_ident[EI_MAG0] = ELFMAG0,
4356             .e_ident[EI_MAG1] = ELFMAG1,
4357             .e_ident[EI_MAG2] = ELFMAG2,
4358             .e_ident[EI_MAG3] = ELFMAG3,
4359             .e_ident[EI_CLASS] = ELF_CLASS,
4360             .e_ident[EI_DATA] = ELF_DATA,
4361             .e_ident[EI_VERSION] = EV_CURRENT,
4362             .e_type = ET_EXEC,
4363             .e_machine = ELF_HOST_MACHINE,
4364             .e_version = EV_CURRENT,
4365             .e_phoff = offsetof(struct ElfImage, phdr),
4366             .e_shoff = offsetof(struct ElfImage, shdr),
4367             .e_ehsize = sizeof(ElfW(Shdr)),
4368             .e_phentsize = sizeof(ElfW(Phdr)),
4369             .e_phnum = 1,
4370             .e_shentsize = sizeof(ElfW(Shdr)),
4371             .e_shnum = ARRAY_SIZE(img->shdr),
4372             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4373 #ifdef ELF_HOST_FLAGS
4374             .e_flags = ELF_HOST_FLAGS,
4375 #endif
4376 #ifdef ELF_OSABI
4377             .e_ident[EI_OSABI] = ELF_OSABI,
4378 #endif
4379         },
4380         .phdr = {
4381             .p_type = PT_LOAD,
4382             .p_flags = PF_X,
4383         },
4384         .shdr = {
4385             [0] = { .sh_type = SHT_NULL },
4386             /* Trick: The contents of code_gen_buffer are not present in
4387                this fake ELF file; that got allocated elsewhere.  Therefore
4388                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4389                will not look for contents.  We can record any address.  */
4390             [1] = { /* .text */
4391                 .sh_type = SHT_NOBITS,
4392                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4393             },
4394             [2] = { /* .debug_info */
4395                 .sh_type = SHT_PROGBITS,
4396                 .sh_offset = offsetof(struct ElfImage, di),
4397                 .sh_size = sizeof(struct DebugInfo),
4398             },
4399             [3] = { /* .debug_abbrev */
4400                 .sh_type = SHT_PROGBITS,
4401                 .sh_offset = offsetof(struct ElfImage, da),
4402                 .sh_size = sizeof(img->da),
4403             },
4404             [4] = { /* .debug_frame */
4405                 .sh_type = SHT_PROGBITS,
4406                 .sh_offset = sizeof(struct ElfImage),
4407             },
4408             [5] = { /* .symtab */
4409                 .sh_type = SHT_SYMTAB,
4410                 .sh_offset = offsetof(struct ElfImage, sym),
4411                 .sh_size = sizeof(img->sym),
4412                 .sh_info = 1,
4413                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4414                 .sh_entsize = sizeof(ElfW(Sym)),
4415             },
4416             [6] = { /* .strtab */
4417                 .sh_type = SHT_STRTAB,
4418                 .sh_offset = offsetof(struct ElfImage, str),
4419                 .sh_size = sizeof(img->str),
4420             }
4421         },
4422         .sym = {
4423             [1] = { /* code_gen_buffer */
4424                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4425                 .st_shndx = 1,
4426             }
4427         },
4428         .di = {
4429             .len = sizeof(struct DebugInfo) - 4,
4430             .version = 2,
4431             .ptr_size = sizeof(void *),
4432             .cu_die = 1,
4433             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4434             .fn_die = 2,
4435             .fn_name = "code_gen_buffer"
4436         },
4437         .da = {
4438             1,          /* abbrev number (the cu) */
4439             0x11, 1,    /* DW_TAG_compile_unit, has children */
4440             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4441             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4442             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4443             0, 0,       /* end of abbrev */
4444             2,          /* abbrev number (the fn) */
4445             0x2e, 0,    /* DW_TAG_subprogram, no children */
4446             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4447             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4448             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4449             0, 0,       /* end of abbrev */
4450             0           /* no more abbrev */
4451         },
4452         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4453                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4454     };
4455 
4456     /* We only need a single jit entry; statically allocate it.  */
4457     static struct jit_code_entry one_entry;
4458 
4459     uintptr_t buf = (uintptr_t)buf_ptr;
4460     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4461     DebugFrameHeader *dfh;
4462 
4463     img = g_malloc(img_size);
4464     *img = img_template;
4465 
4466     img->phdr.p_vaddr = buf;
4467     img->phdr.p_paddr = buf;
4468     img->phdr.p_memsz = buf_size;
4469 
4470     img->shdr[1].sh_name = find_string(img->str, ".text");
4471     img->shdr[1].sh_addr = buf;
4472     img->shdr[1].sh_size = buf_size;
4473 
4474     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4475     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4476 
4477     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4478     img->shdr[4].sh_size = debug_frame_size;
4479 
4480     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4481     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4482 
4483     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4484     img->sym[1].st_value = buf;
4485     img->sym[1].st_size = buf_size;
4486 
4487     img->di.cu_low_pc = buf;
4488     img->di.cu_high_pc = buf + buf_size;
4489     img->di.fn_low_pc = buf;
4490     img->di.fn_high_pc = buf + buf_size;
4491 
4492     dfh = (DebugFrameHeader *)(img + 1);
4493     memcpy(dfh, debug_frame, debug_frame_size);
4494     dfh->fde.func_start = buf;
4495     dfh->fde.func_len = buf_size;
4496 
4497 #ifdef DEBUG_JIT
4498     /* Enable this block to be able to debug the ELF image file creation.
4499        One can use readelf, objdump, or other inspection utilities.  */
4500     {
4501         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4502         if (f) {
4503             if (fwrite(img, img_size, 1, f) != img_size) {
4504                 /* Avoid stupid unused return value warning for fwrite.  */
4505             }
4506             fclose(f);
4507         }
4508     }
4509 #endif
4510 
4511     one_entry.symfile_addr = img;
4512     one_entry.symfile_size = img_size;
4513 
4514     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4515     __jit_debug_descriptor.relevant_entry = &one_entry;
4516     __jit_debug_descriptor.first_entry = &one_entry;
4517     __jit_debug_register_code();
4518 }
4519 #else
4520 /* No support for the feature.  Provide the entry point expected by exec.c,
4521    and implement the internal function we declared earlier.  */
4522 
4523 static void tcg_register_jit_int(void *buf, size_t size,
4524                                  const void *debug_frame,
4525                                  size_t debug_frame_size)
4526 {
4527 }
4528 
4529 void tcg_register_jit(void *buf, size_t buf_size)
4530 {
4531 }
4532 #endif /* ELF_HOST_MACHINE */
4533 
4534 #if !TCG_TARGET_MAYBE_vec
4535 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4536 {
4537     g_assert_not_reached();
4538 }
4539 #endif
4540