xref: /openbmc/qemu/tcg/tcg.c (revision 709395f8)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/timer.h"
37 
38 /* Note: the long term plan is to reduce the dependencies on the QEMU
39    CPU definitions. Currently they are used for qemu_ld/st
40    instructions */
41 #define NO_CPU_IO_DEFS
42 #include "cpu.h"
43 
44 #include "exec/cpu-common.h"
45 #include "exec/exec-all.h"
46 
47 #include "tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #ifdef HOST_WORDS_BIGENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "sysemu/sysemu.h"
63 
64 /* Forward declarations for functions declared in tcg-target.inc.c and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
68 static void tcg_target_qemu_prologue(TCGContext *s);
69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
70                         intptr_t value, intptr_t addend);
71 
72 /* The CIE and FDE header definitions will be common to all hosts.  */
73 typedef struct {
74     uint32_t len __attribute__((aligned((sizeof(void *)))));
75     uint32_t id;
76     uint8_t version;
77     char augmentation[1];
78     uint8_t code_align;
79     uint8_t data_align;
80     uint8_t return_column;
81 } DebugFrameCIE;
82 
83 typedef struct QEMU_PACKED {
84     uint32_t len __attribute__((aligned((sizeof(void *)))));
85     uint32_t cie_offset;
86     uintptr_t func_start;
87     uintptr_t func_len;
88 } DebugFrameFDEHeader;
89 
90 typedef struct QEMU_PACKED {
91     DebugFrameCIE cie;
92     DebugFrameFDEHeader fde;
93 } DebugFrameHeader;
94 
95 static void tcg_register_jit_int(void *buf, size_t size,
96                                  const void *debug_frame,
97                                  size_t debug_frame_size)
98     __attribute__((unused));
99 
100 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
101 static const char *target_parse_constraint(TCGArgConstraint *ct,
102                                            const char *ct_str, TCGType type);
103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
104                        intptr_t arg2);
105 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106 static void tcg_out_movi(TCGContext *s, TCGType type,
107                          TCGReg ret, tcg_target_long arg);
108 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
109                        const int *const_args);
110 #if TCG_TARGET_MAYBE_vec
111 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
112                            unsigned vece, const TCGArg *args,
113                            const int *const_args);
114 #else
115 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
116                                   unsigned vece, const TCGArg *args,
117                                   const int *const_args)
118 {
119     g_assert_not_reached();
120 }
121 #endif
122 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
123                        intptr_t arg2);
124 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
125                         TCGReg base, intptr_t ofs);
126 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
127 static int tcg_target_const_match(tcg_target_long val, TCGType type,
128                                   const TCGArgConstraint *arg_ct);
129 #ifdef TCG_TARGET_NEED_LDST_LABELS
130 static bool tcg_out_ldst_finalize(TCGContext *s);
131 #endif
132 
133 #define TCG_HIGHWATER 1024
134 
135 static TCGContext **tcg_ctxs;
136 static unsigned int n_tcg_ctxs;
137 TCGv_env cpu_env = 0;
138 
139 struct tcg_region_tree {
140     QemuMutex lock;
141     GTree *tree;
142     /* padding to avoid false sharing is computed at run-time */
143 };
144 
145 /*
146  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
147  * dynamically allocate from as demand dictates. Given appropriate region
148  * sizing, this minimizes flushes even when some TCG threads generate a lot
149  * more code than others.
150  */
151 struct tcg_region_state {
152     QemuMutex lock;
153 
154     /* fields set at init time */
155     void *start;
156     void *start_aligned;
157     void *end;
158     size_t n;
159     size_t size; /* size of one region */
160     size_t stride; /* .size + guard size */
161 
162     /* fields protected by the lock */
163     size_t current; /* current region index */
164     size_t agg_size_full; /* aggregate size of full regions */
165 };
166 
167 static struct tcg_region_state region;
168 /*
169  * This is an array of struct tcg_region_tree's, with padding.
170  * We use void * to simplify the computation of region_trees[i]; each
171  * struct is found every tree_size bytes.
172  */
173 static void *region_trees;
174 static size_t tree_size;
175 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
176 static TCGRegSet tcg_target_call_clobber_regs;
177 
178 #if TCG_TARGET_INSN_UNIT_SIZE == 1
179 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
180 {
181     *s->code_ptr++ = v;
182 }
183 
184 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
185                                                       uint8_t v)
186 {
187     *p = v;
188 }
189 #endif
190 
191 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
192 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
193 {
194     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
195         *s->code_ptr++ = v;
196     } else {
197         tcg_insn_unit *p = s->code_ptr;
198         memcpy(p, &v, sizeof(v));
199         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
200     }
201 }
202 
203 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
204                                                        uint16_t v)
205 {
206     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
207         *p = v;
208     } else {
209         memcpy(p, &v, sizeof(v));
210     }
211 }
212 #endif
213 
214 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
215 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
216 {
217     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
218         *s->code_ptr++ = v;
219     } else {
220         tcg_insn_unit *p = s->code_ptr;
221         memcpy(p, &v, sizeof(v));
222         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
223     }
224 }
225 
226 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
227                                                        uint32_t v)
228 {
229     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
230         *p = v;
231     } else {
232         memcpy(p, &v, sizeof(v));
233     }
234 }
235 #endif
236 
237 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
238 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
239 {
240     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
241         *s->code_ptr++ = v;
242     } else {
243         tcg_insn_unit *p = s->code_ptr;
244         memcpy(p, &v, sizeof(v));
245         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
246     }
247 }
248 
249 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
250                                                        uint64_t v)
251 {
252     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
253         *p = v;
254     } else {
255         memcpy(p, &v, sizeof(v));
256     }
257 }
258 #endif
259 
260 /* label relocation processing */
261 
262 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
263                           TCGLabel *l, intptr_t addend)
264 {
265     TCGRelocation *r;
266 
267     if (l->has_value) {
268         /* FIXME: This may break relocations on RISC targets that
269            modify instruction fields in place.  The caller may not have
270            written the initial value.  */
271         bool ok = patch_reloc(code_ptr, type, l->u.value, addend);
272         tcg_debug_assert(ok);
273     } else {
274         /* add a new relocation entry */
275         r = tcg_malloc(sizeof(TCGRelocation));
276         r->type = type;
277         r->ptr = code_ptr;
278         r->addend = addend;
279         r->next = l->u.first_reloc;
280         l->u.first_reloc = r;
281     }
282 }
283 
284 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
285 {
286     intptr_t value = (intptr_t)ptr;
287     TCGRelocation *r;
288 
289     tcg_debug_assert(!l->has_value);
290 
291     for (r = l->u.first_reloc; r != NULL; r = r->next) {
292         bool ok = patch_reloc(r->ptr, r->type, value, r->addend);
293         tcg_debug_assert(ok);
294     }
295 
296     l->has_value = 1;
297     l->u.value_ptr = ptr;
298 }
299 
300 TCGLabel *gen_new_label(void)
301 {
302     TCGContext *s = tcg_ctx;
303     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
304 
305     *l = (TCGLabel){
306         .id = s->nb_labels++
307     };
308 #ifdef CONFIG_DEBUG_TCG
309     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
310 #endif
311 
312     return l;
313 }
314 
315 static void set_jmp_reset_offset(TCGContext *s, int which)
316 {
317     size_t off = tcg_current_code_size(s);
318     s->tb_jmp_reset_offset[which] = off;
319     /* Make sure that we didn't overflow the stored offset.  */
320     assert(s->tb_jmp_reset_offset[which] == off);
321 }
322 
323 #include "tcg-target.inc.c"
324 
325 /* compare a pointer @ptr and a tb_tc @s */
326 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
327 {
328     if (ptr >= s->ptr + s->size) {
329         return 1;
330     } else if (ptr < s->ptr) {
331         return -1;
332     }
333     return 0;
334 }
335 
336 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
337 {
338     const struct tb_tc *a = ap;
339     const struct tb_tc *b = bp;
340 
341     /*
342      * When both sizes are set, we know this isn't a lookup.
343      * This is the most likely case: every TB must be inserted; lookups
344      * are a lot less frequent.
345      */
346     if (likely(a->size && b->size)) {
347         if (a->ptr > b->ptr) {
348             return 1;
349         } else if (a->ptr < b->ptr) {
350             return -1;
351         }
352         /* a->ptr == b->ptr should happen only on deletions */
353         g_assert(a->size == b->size);
354         return 0;
355     }
356     /*
357      * All lookups have either .size field set to 0.
358      * From the glib sources we see that @ap is always the lookup key. However
359      * the docs provide no guarantee, so we just mark this case as likely.
360      */
361     if (likely(a->size == 0)) {
362         return ptr_cmp_tb_tc(a->ptr, b);
363     }
364     return ptr_cmp_tb_tc(b->ptr, a);
365 }
366 
367 static void tcg_region_trees_init(void)
368 {
369     size_t i;
370 
371     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
372     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
373     for (i = 0; i < region.n; i++) {
374         struct tcg_region_tree *rt = region_trees + i * tree_size;
375 
376         qemu_mutex_init(&rt->lock);
377         rt->tree = g_tree_new(tb_tc_cmp);
378     }
379 }
380 
381 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
382 {
383     size_t region_idx;
384 
385     if (p < region.start_aligned) {
386         region_idx = 0;
387     } else {
388         ptrdiff_t offset = p - region.start_aligned;
389 
390         if (offset > region.stride * (region.n - 1)) {
391             region_idx = region.n - 1;
392         } else {
393             region_idx = offset / region.stride;
394         }
395     }
396     return region_trees + region_idx * tree_size;
397 }
398 
399 void tcg_tb_insert(TranslationBlock *tb)
400 {
401     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
402 
403     qemu_mutex_lock(&rt->lock);
404     g_tree_insert(rt->tree, &tb->tc, tb);
405     qemu_mutex_unlock(&rt->lock);
406 }
407 
408 void tcg_tb_remove(TranslationBlock *tb)
409 {
410     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
411 
412     qemu_mutex_lock(&rt->lock);
413     g_tree_remove(rt->tree, &tb->tc);
414     qemu_mutex_unlock(&rt->lock);
415 }
416 
417 /*
418  * Find the TB 'tb' such that
419  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
420  * Return NULL if not found.
421  */
422 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
423 {
424     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
425     TranslationBlock *tb;
426     struct tb_tc s = { .ptr = (void *)tc_ptr };
427 
428     qemu_mutex_lock(&rt->lock);
429     tb = g_tree_lookup(rt->tree, &s);
430     qemu_mutex_unlock(&rt->lock);
431     return tb;
432 }
433 
434 static void tcg_region_tree_lock_all(void)
435 {
436     size_t i;
437 
438     for (i = 0; i < region.n; i++) {
439         struct tcg_region_tree *rt = region_trees + i * tree_size;
440 
441         qemu_mutex_lock(&rt->lock);
442     }
443 }
444 
445 static void tcg_region_tree_unlock_all(void)
446 {
447     size_t i;
448 
449     for (i = 0; i < region.n; i++) {
450         struct tcg_region_tree *rt = region_trees + i * tree_size;
451 
452         qemu_mutex_unlock(&rt->lock);
453     }
454 }
455 
456 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
457 {
458     size_t i;
459 
460     tcg_region_tree_lock_all();
461     for (i = 0; i < region.n; i++) {
462         struct tcg_region_tree *rt = region_trees + i * tree_size;
463 
464         g_tree_foreach(rt->tree, func, user_data);
465     }
466     tcg_region_tree_unlock_all();
467 }
468 
469 size_t tcg_nb_tbs(void)
470 {
471     size_t nb_tbs = 0;
472     size_t i;
473 
474     tcg_region_tree_lock_all();
475     for (i = 0; i < region.n; i++) {
476         struct tcg_region_tree *rt = region_trees + i * tree_size;
477 
478         nb_tbs += g_tree_nnodes(rt->tree);
479     }
480     tcg_region_tree_unlock_all();
481     return nb_tbs;
482 }
483 
484 static void tcg_region_tree_reset_all(void)
485 {
486     size_t i;
487 
488     tcg_region_tree_lock_all();
489     for (i = 0; i < region.n; i++) {
490         struct tcg_region_tree *rt = region_trees + i * tree_size;
491 
492         /* Increment the refcount first so that destroy acts as a reset */
493         g_tree_ref(rt->tree);
494         g_tree_destroy(rt->tree);
495     }
496     tcg_region_tree_unlock_all();
497 }
498 
499 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
500 {
501     void *start, *end;
502 
503     start = region.start_aligned + curr_region * region.stride;
504     end = start + region.size;
505 
506     if (curr_region == 0) {
507         start = region.start;
508     }
509     if (curr_region == region.n - 1) {
510         end = region.end;
511     }
512 
513     *pstart = start;
514     *pend = end;
515 }
516 
517 static void tcg_region_assign(TCGContext *s, size_t curr_region)
518 {
519     void *start, *end;
520 
521     tcg_region_bounds(curr_region, &start, &end);
522 
523     s->code_gen_buffer = start;
524     s->code_gen_ptr = start;
525     s->code_gen_buffer_size = end - start;
526     s->code_gen_highwater = end - TCG_HIGHWATER;
527 }
528 
529 static bool tcg_region_alloc__locked(TCGContext *s)
530 {
531     if (region.current == region.n) {
532         return true;
533     }
534     tcg_region_assign(s, region.current);
535     region.current++;
536     return false;
537 }
538 
539 /*
540  * Request a new region once the one in use has filled up.
541  * Returns true on error.
542  */
543 static bool tcg_region_alloc(TCGContext *s)
544 {
545     bool err;
546     /* read the region size now; alloc__locked will overwrite it on success */
547     size_t size_full = s->code_gen_buffer_size;
548 
549     qemu_mutex_lock(&region.lock);
550     err = tcg_region_alloc__locked(s);
551     if (!err) {
552         region.agg_size_full += size_full - TCG_HIGHWATER;
553     }
554     qemu_mutex_unlock(&region.lock);
555     return err;
556 }
557 
558 /*
559  * Perform a context's first region allocation.
560  * This function does _not_ increment region.agg_size_full.
561  */
562 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
563 {
564     return tcg_region_alloc__locked(s);
565 }
566 
567 /* Call from a safe-work context */
568 void tcg_region_reset_all(void)
569 {
570     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
571     unsigned int i;
572 
573     qemu_mutex_lock(&region.lock);
574     region.current = 0;
575     region.agg_size_full = 0;
576 
577     for (i = 0; i < n_ctxs; i++) {
578         TCGContext *s = atomic_read(&tcg_ctxs[i]);
579         bool err = tcg_region_initial_alloc__locked(s);
580 
581         g_assert(!err);
582     }
583     qemu_mutex_unlock(&region.lock);
584 
585     tcg_region_tree_reset_all();
586 }
587 
588 #ifdef CONFIG_USER_ONLY
589 static size_t tcg_n_regions(void)
590 {
591     return 1;
592 }
593 #else
594 /*
595  * It is likely that some vCPUs will translate more code than others, so we
596  * first try to set more regions than max_cpus, with those regions being of
597  * reasonable size. If that's not possible we make do by evenly dividing
598  * the code_gen_buffer among the vCPUs.
599  */
600 static size_t tcg_n_regions(void)
601 {
602     size_t i;
603 
604     /* Use a single region if all we have is one vCPU thread */
605     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
606         return 1;
607     }
608 
609     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
610     for (i = 8; i > 0; i--) {
611         size_t regions_per_thread = i;
612         size_t region_size;
613 
614         region_size = tcg_init_ctx.code_gen_buffer_size;
615         region_size /= max_cpus * regions_per_thread;
616 
617         if (region_size >= 2 * 1024u * 1024) {
618             return max_cpus * regions_per_thread;
619         }
620     }
621     /* If we can't, then just allocate one region per vCPU thread */
622     return max_cpus;
623 }
624 #endif
625 
626 /*
627  * Initializes region partitioning.
628  *
629  * Called at init time from the parent thread (i.e. the one calling
630  * tcg_context_init), after the target's TCG globals have been set.
631  *
632  * Region partitioning works by splitting code_gen_buffer into separate regions,
633  * and then assigning regions to TCG threads so that the threads can translate
634  * code in parallel without synchronization.
635  *
636  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
637  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
638  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
639  * must have been parsed before calling this function, since it calls
640  * qemu_tcg_mttcg_enabled().
641  *
642  * In user-mode we use a single region.  Having multiple regions in user-mode
643  * is not supported, because the number of vCPU threads (recall that each thread
644  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
645  * OS, and usually this number is huge (tens of thousands is not uncommon).
646  * Thus, given this large bound on the number of vCPU threads and the fact
647  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
648  * that the availability of at least one region per vCPU thread.
649  *
650  * However, this user-mode limitation is unlikely to be a significant problem
651  * in practice. Multi-threaded guests share most if not all of their translated
652  * code, which makes parallel code generation less appealing than in softmmu.
653  */
654 void tcg_region_init(void)
655 {
656     void *buf = tcg_init_ctx.code_gen_buffer;
657     void *aligned;
658     size_t size = tcg_init_ctx.code_gen_buffer_size;
659     size_t page_size = qemu_real_host_page_size;
660     size_t region_size;
661     size_t n_regions;
662     size_t i;
663 
664     n_regions = tcg_n_regions();
665 
666     /* The first region will be 'aligned - buf' bytes larger than the others */
667     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
668     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
669     /*
670      * Make region_size a multiple of page_size, using aligned as the start.
671      * As a result of this we might end up with a few extra pages at the end of
672      * the buffer; we will assign those to the last region.
673      */
674     region_size = (size - (aligned - buf)) / n_regions;
675     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
676 
677     /* A region must have at least 2 pages; one code, one guard */
678     g_assert(region_size >= 2 * page_size);
679 
680     /* init the region struct */
681     qemu_mutex_init(&region.lock);
682     region.n = n_regions;
683     region.size = region_size - page_size;
684     region.stride = region_size;
685     region.start = buf;
686     region.start_aligned = aligned;
687     /* page-align the end, since its last page will be a guard page */
688     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
689     /* account for that last guard page */
690     region.end -= page_size;
691 
692     /* set guard pages */
693     for (i = 0; i < region.n; i++) {
694         void *start, *end;
695         int rc;
696 
697         tcg_region_bounds(i, &start, &end);
698         rc = qemu_mprotect_none(end, page_size);
699         g_assert(!rc);
700     }
701 
702     tcg_region_trees_init();
703 
704     /* In user-mode we support only one ctx, so do the initial allocation now */
705 #ifdef CONFIG_USER_ONLY
706     {
707         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
708 
709         g_assert(!err);
710     }
711 #endif
712 }
713 
714 /*
715  * All TCG threads except the parent (i.e. the one that called tcg_context_init
716  * and registered the target's TCG globals) must register with this function
717  * before initiating translation.
718  *
719  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
720  * of tcg_region_init() for the reasoning behind this.
721  *
722  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
723  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
724  * is not used anymore for translation once this function is called.
725  *
726  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
727  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
728  */
729 #ifdef CONFIG_USER_ONLY
730 void tcg_register_thread(void)
731 {
732     tcg_ctx = &tcg_init_ctx;
733 }
734 #else
735 void tcg_register_thread(void)
736 {
737     TCGContext *s = g_malloc(sizeof(*s));
738     unsigned int i, n;
739     bool err;
740 
741     *s = tcg_init_ctx;
742 
743     /* Relink mem_base.  */
744     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
745         if (tcg_init_ctx.temps[i].mem_base) {
746             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
747             tcg_debug_assert(b >= 0 && b < n);
748             s->temps[i].mem_base = &s->temps[b];
749         }
750     }
751 
752     /* Claim an entry in tcg_ctxs */
753     n = atomic_fetch_inc(&n_tcg_ctxs);
754     g_assert(n < max_cpus);
755     atomic_set(&tcg_ctxs[n], s);
756 
757     tcg_ctx = s;
758     qemu_mutex_lock(&region.lock);
759     err = tcg_region_initial_alloc__locked(tcg_ctx);
760     g_assert(!err);
761     qemu_mutex_unlock(&region.lock);
762 }
763 #endif /* !CONFIG_USER_ONLY */
764 
765 /*
766  * Returns the size (in bytes) of all translated code (i.e. from all regions)
767  * currently in the cache.
768  * See also: tcg_code_capacity()
769  * Do not confuse with tcg_current_code_size(); that one applies to a single
770  * TCG context.
771  */
772 size_t tcg_code_size(void)
773 {
774     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
775     unsigned int i;
776     size_t total;
777 
778     qemu_mutex_lock(&region.lock);
779     total = region.agg_size_full;
780     for (i = 0; i < n_ctxs; i++) {
781         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
782         size_t size;
783 
784         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
785         g_assert(size <= s->code_gen_buffer_size);
786         total += size;
787     }
788     qemu_mutex_unlock(&region.lock);
789     return total;
790 }
791 
792 /*
793  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
794  * regions.
795  * See also: tcg_code_size()
796  */
797 size_t tcg_code_capacity(void)
798 {
799     size_t guard_size, capacity;
800 
801     /* no need for synchronization; these variables are set at init time */
802     guard_size = region.stride - region.size;
803     capacity = region.end + guard_size - region.start;
804     capacity -= region.n * (guard_size + TCG_HIGHWATER);
805     return capacity;
806 }
807 
808 size_t tcg_tb_phys_invalidate_count(void)
809 {
810     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
811     unsigned int i;
812     size_t total = 0;
813 
814     for (i = 0; i < n_ctxs; i++) {
815         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
816 
817         total += atomic_read(&s->tb_phys_invalidate_count);
818     }
819     return total;
820 }
821 
822 /* pool based memory allocation */
823 void *tcg_malloc_internal(TCGContext *s, int size)
824 {
825     TCGPool *p;
826     int pool_size;
827 
828     if (size > TCG_POOL_CHUNK_SIZE) {
829         /* big malloc: insert a new pool (XXX: could optimize) */
830         p = g_malloc(sizeof(TCGPool) + size);
831         p->size = size;
832         p->next = s->pool_first_large;
833         s->pool_first_large = p;
834         return p->data;
835     } else {
836         p = s->pool_current;
837         if (!p) {
838             p = s->pool_first;
839             if (!p)
840                 goto new_pool;
841         } else {
842             if (!p->next) {
843             new_pool:
844                 pool_size = TCG_POOL_CHUNK_SIZE;
845                 p = g_malloc(sizeof(TCGPool) + pool_size);
846                 p->size = pool_size;
847                 p->next = NULL;
848                 if (s->pool_current)
849                     s->pool_current->next = p;
850                 else
851                     s->pool_first = p;
852             } else {
853                 p = p->next;
854             }
855         }
856     }
857     s->pool_current = p;
858     s->pool_cur = p->data + size;
859     s->pool_end = p->data + p->size;
860     return p->data;
861 }
862 
863 void tcg_pool_reset(TCGContext *s)
864 {
865     TCGPool *p, *t;
866     for (p = s->pool_first_large; p; p = t) {
867         t = p->next;
868         g_free(p);
869     }
870     s->pool_first_large = NULL;
871     s->pool_cur = s->pool_end = NULL;
872     s->pool_current = NULL;
873 }
874 
875 typedef struct TCGHelperInfo {
876     void *func;
877     const char *name;
878     unsigned flags;
879     unsigned sizemask;
880 } TCGHelperInfo;
881 
882 #include "exec/helper-proto.h"
883 
884 static const TCGHelperInfo all_helpers[] = {
885 #include "exec/helper-tcg.h"
886 };
887 static GHashTable *helper_table;
888 
889 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
890 static void process_op_defs(TCGContext *s);
891 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
892                                             TCGReg reg, const char *name);
893 
894 void tcg_context_init(TCGContext *s)
895 {
896     int op, total_args, n, i;
897     TCGOpDef *def;
898     TCGArgConstraint *args_ct;
899     int *sorted_args;
900     TCGTemp *ts;
901 
902     memset(s, 0, sizeof(*s));
903     s->nb_globals = 0;
904 
905     /* Count total number of arguments and allocate the corresponding
906        space */
907     total_args = 0;
908     for(op = 0; op < NB_OPS; op++) {
909         def = &tcg_op_defs[op];
910         n = def->nb_iargs + def->nb_oargs;
911         total_args += n;
912     }
913 
914     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
915     sorted_args = g_malloc(sizeof(int) * total_args);
916 
917     for(op = 0; op < NB_OPS; op++) {
918         def = &tcg_op_defs[op];
919         def->args_ct = args_ct;
920         def->sorted_args = sorted_args;
921         n = def->nb_iargs + def->nb_oargs;
922         sorted_args += n;
923         args_ct += n;
924     }
925 
926     /* Register helpers.  */
927     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
928     helper_table = g_hash_table_new(NULL, NULL);
929 
930     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
931         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
932                             (gpointer)&all_helpers[i]);
933     }
934 
935     tcg_target_init(s);
936     process_op_defs(s);
937 
938     /* Reverse the order of the saved registers, assuming they're all at
939        the start of tcg_target_reg_alloc_order.  */
940     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
941         int r = tcg_target_reg_alloc_order[n];
942         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
943             break;
944         }
945     }
946     for (i = 0; i < n; ++i) {
947         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
948     }
949     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
950         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
951     }
952 
953     tcg_ctx = s;
954     /*
955      * In user-mode we simply share the init context among threads, since we
956      * use a single region. See the documentation tcg_region_init() for the
957      * reasoning behind this.
958      * In softmmu we will have at most max_cpus TCG threads.
959      */
960 #ifdef CONFIG_USER_ONLY
961     tcg_ctxs = &tcg_ctx;
962     n_tcg_ctxs = 1;
963 #else
964     tcg_ctxs = g_new(TCGContext *, max_cpus);
965 #endif
966 
967     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
968     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
969     cpu_env = temp_tcgv_ptr(ts);
970 }
971 
972 /*
973  * Allocate TBs right before their corresponding translated code, making
974  * sure that TBs and code are on different cache lines.
975  */
976 TranslationBlock *tcg_tb_alloc(TCGContext *s)
977 {
978     uintptr_t align = qemu_icache_linesize;
979     TranslationBlock *tb;
980     void *next;
981 
982  retry:
983     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
984     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
985 
986     if (unlikely(next > s->code_gen_highwater)) {
987         if (tcg_region_alloc(s)) {
988             return NULL;
989         }
990         goto retry;
991     }
992     atomic_set(&s->code_gen_ptr, next);
993     s->data_gen_ptr = NULL;
994     return tb;
995 }
996 
997 void tcg_prologue_init(TCGContext *s)
998 {
999     size_t prologue_size, total_size;
1000     void *buf0, *buf1;
1001 
1002     /* Put the prologue at the beginning of code_gen_buffer.  */
1003     buf0 = s->code_gen_buffer;
1004     total_size = s->code_gen_buffer_size;
1005     s->code_ptr = buf0;
1006     s->code_buf = buf0;
1007     s->data_gen_ptr = NULL;
1008     s->code_gen_prologue = buf0;
1009 
1010     /* Compute a high-water mark, at which we voluntarily flush the buffer
1011        and start over.  The size here is arbitrary, significantly larger
1012        than we expect the code generation for any one opcode to require.  */
1013     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1014 
1015 #ifdef TCG_TARGET_NEED_POOL_LABELS
1016     s->pool_labels = NULL;
1017 #endif
1018 
1019     /* Generate the prologue.  */
1020     tcg_target_qemu_prologue(s);
1021 
1022 #ifdef TCG_TARGET_NEED_POOL_LABELS
1023     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1024     {
1025         bool ok = tcg_out_pool_finalize(s);
1026         tcg_debug_assert(ok);
1027     }
1028 #endif
1029 
1030     buf1 = s->code_ptr;
1031     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1032 
1033     /* Deduct the prologue from the buffer.  */
1034     prologue_size = tcg_current_code_size(s);
1035     s->code_gen_ptr = buf1;
1036     s->code_gen_buffer = buf1;
1037     s->code_buf = buf1;
1038     total_size -= prologue_size;
1039     s->code_gen_buffer_size = total_size;
1040 
1041     tcg_register_jit(s->code_gen_buffer, total_size);
1042 
1043 #ifdef DEBUG_DISAS
1044     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1045         qemu_log_lock();
1046         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1047         if (s->data_gen_ptr) {
1048             size_t code_size = s->data_gen_ptr - buf0;
1049             size_t data_size = prologue_size - code_size;
1050             size_t i;
1051 
1052             log_disas(buf0, code_size);
1053 
1054             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1055                 if (sizeof(tcg_target_ulong) == 8) {
1056                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1057                              (uintptr_t)s->data_gen_ptr + i,
1058                              *(uint64_t *)(s->data_gen_ptr + i));
1059                 } else {
1060                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1061                              (uintptr_t)s->data_gen_ptr + i,
1062                              *(uint32_t *)(s->data_gen_ptr + i));
1063                 }
1064             }
1065         } else {
1066             log_disas(buf0, prologue_size);
1067         }
1068         qemu_log("\n");
1069         qemu_log_flush();
1070         qemu_log_unlock();
1071     }
1072 #endif
1073 
1074     /* Assert that goto_ptr is implemented completely.  */
1075     if (TCG_TARGET_HAS_goto_ptr) {
1076         tcg_debug_assert(s->code_gen_epilogue != NULL);
1077     }
1078 }
1079 
1080 void tcg_func_start(TCGContext *s)
1081 {
1082     tcg_pool_reset(s);
1083     s->nb_temps = s->nb_globals;
1084 
1085     /* No temps have been previously allocated for size or locality.  */
1086     memset(s->free_temps, 0, sizeof(s->free_temps));
1087 
1088     s->nb_ops = 0;
1089     s->nb_labels = 0;
1090     s->current_frame_offset = s->frame_start;
1091 
1092 #ifdef CONFIG_DEBUG_TCG
1093     s->goto_tb_issue_mask = 0;
1094 #endif
1095 
1096     QTAILQ_INIT(&s->ops);
1097     QTAILQ_INIT(&s->free_ops);
1098 #ifdef CONFIG_DEBUG_TCG
1099     QSIMPLEQ_INIT(&s->labels);
1100 #endif
1101 }
1102 
1103 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1104 {
1105     int n = s->nb_temps++;
1106     tcg_debug_assert(n < TCG_MAX_TEMPS);
1107     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1108 }
1109 
1110 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1111 {
1112     TCGTemp *ts;
1113 
1114     tcg_debug_assert(s->nb_globals == s->nb_temps);
1115     s->nb_globals++;
1116     ts = tcg_temp_alloc(s);
1117     ts->temp_global = 1;
1118 
1119     return ts;
1120 }
1121 
1122 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1123                                             TCGReg reg, const char *name)
1124 {
1125     TCGTemp *ts;
1126 
1127     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1128         tcg_abort();
1129     }
1130 
1131     ts = tcg_global_alloc(s);
1132     ts->base_type = type;
1133     ts->type = type;
1134     ts->fixed_reg = 1;
1135     ts->reg = reg;
1136     ts->name = name;
1137     tcg_regset_set_reg(s->reserved_regs, reg);
1138 
1139     return ts;
1140 }
1141 
1142 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1143 {
1144     s->frame_start = start;
1145     s->frame_end = start + size;
1146     s->frame_temp
1147         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1148 }
1149 
1150 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1151                                      intptr_t offset, const char *name)
1152 {
1153     TCGContext *s = tcg_ctx;
1154     TCGTemp *base_ts = tcgv_ptr_temp(base);
1155     TCGTemp *ts = tcg_global_alloc(s);
1156     int indirect_reg = 0, bigendian = 0;
1157 #ifdef HOST_WORDS_BIGENDIAN
1158     bigendian = 1;
1159 #endif
1160 
1161     if (!base_ts->fixed_reg) {
1162         /* We do not support double-indirect registers.  */
1163         tcg_debug_assert(!base_ts->indirect_reg);
1164         base_ts->indirect_base = 1;
1165         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1166                             ? 2 : 1);
1167         indirect_reg = 1;
1168     }
1169 
1170     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1171         TCGTemp *ts2 = tcg_global_alloc(s);
1172         char buf[64];
1173 
1174         ts->base_type = TCG_TYPE_I64;
1175         ts->type = TCG_TYPE_I32;
1176         ts->indirect_reg = indirect_reg;
1177         ts->mem_allocated = 1;
1178         ts->mem_base = base_ts;
1179         ts->mem_offset = offset + bigendian * 4;
1180         pstrcpy(buf, sizeof(buf), name);
1181         pstrcat(buf, sizeof(buf), "_0");
1182         ts->name = strdup(buf);
1183 
1184         tcg_debug_assert(ts2 == ts + 1);
1185         ts2->base_type = TCG_TYPE_I64;
1186         ts2->type = TCG_TYPE_I32;
1187         ts2->indirect_reg = indirect_reg;
1188         ts2->mem_allocated = 1;
1189         ts2->mem_base = base_ts;
1190         ts2->mem_offset = offset + (1 - bigendian) * 4;
1191         pstrcpy(buf, sizeof(buf), name);
1192         pstrcat(buf, sizeof(buf), "_1");
1193         ts2->name = strdup(buf);
1194     } else {
1195         ts->base_type = type;
1196         ts->type = type;
1197         ts->indirect_reg = indirect_reg;
1198         ts->mem_allocated = 1;
1199         ts->mem_base = base_ts;
1200         ts->mem_offset = offset;
1201         ts->name = name;
1202     }
1203     return ts;
1204 }
1205 
1206 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1207 {
1208     TCGContext *s = tcg_ctx;
1209     TCGTemp *ts;
1210     int idx, k;
1211 
1212     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1213     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1214     if (idx < TCG_MAX_TEMPS) {
1215         /* There is already an available temp with the right type.  */
1216         clear_bit(idx, s->free_temps[k].l);
1217 
1218         ts = &s->temps[idx];
1219         ts->temp_allocated = 1;
1220         tcg_debug_assert(ts->base_type == type);
1221         tcg_debug_assert(ts->temp_local == temp_local);
1222     } else {
1223         ts = tcg_temp_alloc(s);
1224         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1225             TCGTemp *ts2 = tcg_temp_alloc(s);
1226 
1227             ts->base_type = type;
1228             ts->type = TCG_TYPE_I32;
1229             ts->temp_allocated = 1;
1230             ts->temp_local = temp_local;
1231 
1232             tcg_debug_assert(ts2 == ts + 1);
1233             ts2->base_type = TCG_TYPE_I64;
1234             ts2->type = TCG_TYPE_I32;
1235             ts2->temp_allocated = 1;
1236             ts2->temp_local = temp_local;
1237         } else {
1238             ts->base_type = type;
1239             ts->type = type;
1240             ts->temp_allocated = 1;
1241             ts->temp_local = temp_local;
1242         }
1243     }
1244 
1245 #if defined(CONFIG_DEBUG_TCG)
1246     s->temps_in_use++;
1247 #endif
1248     return ts;
1249 }
1250 
1251 TCGv_vec tcg_temp_new_vec(TCGType type)
1252 {
1253     TCGTemp *t;
1254 
1255 #ifdef CONFIG_DEBUG_TCG
1256     switch (type) {
1257     case TCG_TYPE_V64:
1258         assert(TCG_TARGET_HAS_v64);
1259         break;
1260     case TCG_TYPE_V128:
1261         assert(TCG_TARGET_HAS_v128);
1262         break;
1263     case TCG_TYPE_V256:
1264         assert(TCG_TARGET_HAS_v256);
1265         break;
1266     default:
1267         g_assert_not_reached();
1268     }
1269 #endif
1270 
1271     t = tcg_temp_new_internal(type, 0);
1272     return temp_tcgv_vec(t);
1273 }
1274 
1275 /* Create a new temp of the same type as an existing temp.  */
1276 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1277 {
1278     TCGTemp *t = tcgv_vec_temp(match);
1279 
1280     tcg_debug_assert(t->temp_allocated != 0);
1281 
1282     t = tcg_temp_new_internal(t->base_type, 0);
1283     return temp_tcgv_vec(t);
1284 }
1285 
1286 void tcg_temp_free_internal(TCGTemp *ts)
1287 {
1288     TCGContext *s = tcg_ctx;
1289     int k, idx;
1290 
1291 #if defined(CONFIG_DEBUG_TCG)
1292     s->temps_in_use--;
1293     if (s->temps_in_use < 0) {
1294         fprintf(stderr, "More temporaries freed than allocated!\n");
1295     }
1296 #endif
1297 
1298     tcg_debug_assert(ts->temp_global == 0);
1299     tcg_debug_assert(ts->temp_allocated != 0);
1300     ts->temp_allocated = 0;
1301 
1302     idx = temp_idx(ts);
1303     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1304     set_bit(idx, s->free_temps[k].l);
1305 }
1306 
1307 TCGv_i32 tcg_const_i32(int32_t val)
1308 {
1309     TCGv_i32 t0;
1310     t0 = tcg_temp_new_i32();
1311     tcg_gen_movi_i32(t0, val);
1312     return t0;
1313 }
1314 
1315 TCGv_i64 tcg_const_i64(int64_t val)
1316 {
1317     TCGv_i64 t0;
1318     t0 = tcg_temp_new_i64();
1319     tcg_gen_movi_i64(t0, val);
1320     return t0;
1321 }
1322 
1323 TCGv_i32 tcg_const_local_i32(int32_t val)
1324 {
1325     TCGv_i32 t0;
1326     t0 = tcg_temp_local_new_i32();
1327     tcg_gen_movi_i32(t0, val);
1328     return t0;
1329 }
1330 
1331 TCGv_i64 tcg_const_local_i64(int64_t val)
1332 {
1333     TCGv_i64 t0;
1334     t0 = tcg_temp_local_new_i64();
1335     tcg_gen_movi_i64(t0, val);
1336     return t0;
1337 }
1338 
1339 #if defined(CONFIG_DEBUG_TCG)
1340 void tcg_clear_temp_count(void)
1341 {
1342     TCGContext *s = tcg_ctx;
1343     s->temps_in_use = 0;
1344 }
1345 
1346 int tcg_check_temp_count(void)
1347 {
1348     TCGContext *s = tcg_ctx;
1349     if (s->temps_in_use) {
1350         /* Clear the count so that we don't give another
1351          * warning immediately next time around.
1352          */
1353         s->temps_in_use = 0;
1354         return 1;
1355     }
1356     return 0;
1357 }
1358 #endif
1359 
1360 /* Return true if OP may appear in the opcode stream.
1361    Test the runtime variable that controls each opcode.  */
1362 bool tcg_op_supported(TCGOpcode op)
1363 {
1364     const bool have_vec
1365         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1366 
1367     switch (op) {
1368     case INDEX_op_discard:
1369     case INDEX_op_set_label:
1370     case INDEX_op_call:
1371     case INDEX_op_br:
1372     case INDEX_op_mb:
1373     case INDEX_op_insn_start:
1374     case INDEX_op_exit_tb:
1375     case INDEX_op_goto_tb:
1376     case INDEX_op_qemu_ld_i32:
1377     case INDEX_op_qemu_st_i32:
1378     case INDEX_op_qemu_ld_i64:
1379     case INDEX_op_qemu_st_i64:
1380         return true;
1381 
1382     case INDEX_op_goto_ptr:
1383         return TCG_TARGET_HAS_goto_ptr;
1384 
1385     case INDEX_op_mov_i32:
1386     case INDEX_op_movi_i32:
1387     case INDEX_op_setcond_i32:
1388     case INDEX_op_brcond_i32:
1389     case INDEX_op_ld8u_i32:
1390     case INDEX_op_ld8s_i32:
1391     case INDEX_op_ld16u_i32:
1392     case INDEX_op_ld16s_i32:
1393     case INDEX_op_ld_i32:
1394     case INDEX_op_st8_i32:
1395     case INDEX_op_st16_i32:
1396     case INDEX_op_st_i32:
1397     case INDEX_op_add_i32:
1398     case INDEX_op_sub_i32:
1399     case INDEX_op_mul_i32:
1400     case INDEX_op_and_i32:
1401     case INDEX_op_or_i32:
1402     case INDEX_op_xor_i32:
1403     case INDEX_op_shl_i32:
1404     case INDEX_op_shr_i32:
1405     case INDEX_op_sar_i32:
1406         return true;
1407 
1408     case INDEX_op_movcond_i32:
1409         return TCG_TARGET_HAS_movcond_i32;
1410     case INDEX_op_div_i32:
1411     case INDEX_op_divu_i32:
1412         return TCG_TARGET_HAS_div_i32;
1413     case INDEX_op_rem_i32:
1414     case INDEX_op_remu_i32:
1415         return TCG_TARGET_HAS_rem_i32;
1416     case INDEX_op_div2_i32:
1417     case INDEX_op_divu2_i32:
1418         return TCG_TARGET_HAS_div2_i32;
1419     case INDEX_op_rotl_i32:
1420     case INDEX_op_rotr_i32:
1421         return TCG_TARGET_HAS_rot_i32;
1422     case INDEX_op_deposit_i32:
1423         return TCG_TARGET_HAS_deposit_i32;
1424     case INDEX_op_extract_i32:
1425         return TCG_TARGET_HAS_extract_i32;
1426     case INDEX_op_sextract_i32:
1427         return TCG_TARGET_HAS_sextract_i32;
1428     case INDEX_op_add2_i32:
1429         return TCG_TARGET_HAS_add2_i32;
1430     case INDEX_op_sub2_i32:
1431         return TCG_TARGET_HAS_sub2_i32;
1432     case INDEX_op_mulu2_i32:
1433         return TCG_TARGET_HAS_mulu2_i32;
1434     case INDEX_op_muls2_i32:
1435         return TCG_TARGET_HAS_muls2_i32;
1436     case INDEX_op_muluh_i32:
1437         return TCG_TARGET_HAS_muluh_i32;
1438     case INDEX_op_mulsh_i32:
1439         return TCG_TARGET_HAS_mulsh_i32;
1440     case INDEX_op_ext8s_i32:
1441         return TCG_TARGET_HAS_ext8s_i32;
1442     case INDEX_op_ext16s_i32:
1443         return TCG_TARGET_HAS_ext16s_i32;
1444     case INDEX_op_ext8u_i32:
1445         return TCG_TARGET_HAS_ext8u_i32;
1446     case INDEX_op_ext16u_i32:
1447         return TCG_TARGET_HAS_ext16u_i32;
1448     case INDEX_op_bswap16_i32:
1449         return TCG_TARGET_HAS_bswap16_i32;
1450     case INDEX_op_bswap32_i32:
1451         return TCG_TARGET_HAS_bswap32_i32;
1452     case INDEX_op_not_i32:
1453         return TCG_TARGET_HAS_not_i32;
1454     case INDEX_op_neg_i32:
1455         return TCG_TARGET_HAS_neg_i32;
1456     case INDEX_op_andc_i32:
1457         return TCG_TARGET_HAS_andc_i32;
1458     case INDEX_op_orc_i32:
1459         return TCG_TARGET_HAS_orc_i32;
1460     case INDEX_op_eqv_i32:
1461         return TCG_TARGET_HAS_eqv_i32;
1462     case INDEX_op_nand_i32:
1463         return TCG_TARGET_HAS_nand_i32;
1464     case INDEX_op_nor_i32:
1465         return TCG_TARGET_HAS_nor_i32;
1466     case INDEX_op_clz_i32:
1467         return TCG_TARGET_HAS_clz_i32;
1468     case INDEX_op_ctz_i32:
1469         return TCG_TARGET_HAS_ctz_i32;
1470     case INDEX_op_ctpop_i32:
1471         return TCG_TARGET_HAS_ctpop_i32;
1472 
1473     case INDEX_op_brcond2_i32:
1474     case INDEX_op_setcond2_i32:
1475         return TCG_TARGET_REG_BITS == 32;
1476 
1477     case INDEX_op_mov_i64:
1478     case INDEX_op_movi_i64:
1479     case INDEX_op_setcond_i64:
1480     case INDEX_op_brcond_i64:
1481     case INDEX_op_ld8u_i64:
1482     case INDEX_op_ld8s_i64:
1483     case INDEX_op_ld16u_i64:
1484     case INDEX_op_ld16s_i64:
1485     case INDEX_op_ld32u_i64:
1486     case INDEX_op_ld32s_i64:
1487     case INDEX_op_ld_i64:
1488     case INDEX_op_st8_i64:
1489     case INDEX_op_st16_i64:
1490     case INDEX_op_st32_i64:
1491     case INDEX_op_st_i64:
1492     case INDEX_op_add_i64:
1493     case INDEX_op_sub_i64:
1494     case INDEX_op_mul_i64:
1495     case INDEX_op_and_i64:
1496     case INDEX_op_or_i64:
1497     case INDEX_op_xor_i64:
1498     case INDEX_op_shl_i64:
1499     case INDEX_op_shr_i64:
1500     case INDEX_op_sar_i64:
1501     case INDEX_op_ext_i32_i64:
1502     case INDEX_op_extu_i32_i64:
1503         return TCG_TARGET_REG_BITS == 64;
1504 
1505     case INDEX_op_movcond_i64:
1506         return TCG_TARGET_HAS_movcond_i64;
1507     case INDEX_op_div_i64:
1508     case INDEX_op_divu_i64:
1509         return TCG_TARGET_HAS_div_i64;
1510     case INDEX_op_rem_i64:
1511     case INDEX_op_remu_i64:
1512         return TCG_TARGET_HAS_rem_i64;
1513     case INDEX_op_div2_i64:
1514     case INDEX_op_divu2_i64:
1515         return TCG_TARGET_HAS_div2_i64;
1516     case INDEX_op_rotl_i64:
1517     case INDEX_op_rotr_i64:
1518         return TCG_TARGET_HAS_rot_i64;
1519     case INDEX_op_deposit_i64:
1520         return TCG_TARGET_HAS_deposit_i64;
1521     case INDEX_op_extract_i64:
1522         return TCG_TARGET_HAS_extract_i64;
1523     case INDEX_op_sextract_i64:
1524         return TCG_TARGET_HAS_sextract_i64;
1525     case INDEX_op_extrl_i64_i32:
1526         return TCG_TARGET_HAS_extrl_i64_i32;
1527     case INDEX_op_extrh_i64_i32:
1528         return TCG_TARGET_HAS_extrh_i64_i32;
1529     case INDEX_op_ext8s_i64:
1530         return TCG_TARGET_HAS_ext8s_i64;
1531     case INDEX_op_ext16s_i64:
1532         return TCG_TARGET_HAS_ext16s_i64;
1533     case INDEX_op_ext32s_i64:
1534         return TCG_TARGET_HAS_ext32s_i64;
1535     case INDEX_op_ext8u_i64:
1536         return TCG_TARGET_HAS_ext8u_i64;
1537     case INDEX_op_ext16u_i64:
1538         return TCG_TARGET_HAS_ext16u_i64;
1539     case INDEX_op_ext32u_i64:
1540         return TCG_TARGET_HAS_ext32u_i64;
1541     case INDEX_op_bswap16_i64:
1542         return TCG_TARGET_HAS_bswap16_i64;
1543     case INDEX_op_bswap32_i64:
1544         return TCG_TARGET_HAS_bswap32_i64;
1545     case INDEX_op_bswap64_i64:
1546         return TCG_TARGET_HAS_bswap64_i64;
1547     case INDEX_op_not_i64:
1548         return TCG_TARGET_HAS_not_i64;
1549     case INDEX_op_neg_i64:
1550         return TCG_TARGET_HAS_neg_i64;
1551     case INDEX_op_andc_i64:
1552         return TCG_TARGET_HAS_andc_i64;
1553     case INDEX_op_orc_i64:
1554         return TCG_TARGET_HAS_orc_i64;
1555     case INDEX_op_eqv_i64:
1556         return TCG_TARGET_HAS_eqv_i64;
1557     case INDEX_op_nand_i64:
1558         return TCG_TARGET_HAS_nand_i64;
1559     case INDEX_op_nor_i64:
1560         return TCG_TARGET_HAS_nor_i64;
1561     case INDEX_op_clz_i64:
1562         return TCG_TARGET_HAS_clz_i64;
1563     case INDEX_op_ctz_i64:
1564         return TCG_TARGET_HAS_ctz_i64;
1565     case INDEX_op_ctpop_i64:
1566         return TCG_TARGET_HAS_ctpop_i64;
1567     case INDEX_op_add2_i64:
1568         return TCG_TARGET_HAS_add2_i64;
1569     case INDEX_op_sub2_i64:
1570         return TCG_TARGET_HAS_sub2_i64;
1571     case INDEX_op_mulu2_i64:
1572         return TCG_TARGET_HAS_mulu2_i64;
1573     case INDEX_op_muls2_i64:
1574         return TCG_TARGET_HAS_muls2_i64;
1575     case INDEX_op_muluh_i64:
1576         return TCG_TARGET_HAS_muluh_i64;
1577     case INDEX_op_mulsh_i64:
1578         return TCG_TARGET_HAS_mulsh_i64;
1579 
1580     case INDEX_op_mov_vec:
1581     case INDEX_op_dup_vec:
1582     case INDEX_op_dupi_vec:
1583     case INDEX_op_ld_vec:
1584     case INDEX_op_st_vec:
1585     case INDEX_op_add_vec:
1586     case INDEX_op_sub_vec:
1587     case INDEX_op_and_vec:
1588     case INDEX_op_or_vec:
1589     case INDEX_op_xor_vec:
1590     case INDEX_op_cmp_vec:
1591         return have_vec;
1592     case INDEX_op_dup2_vec:
1593         return have_vec && TCG_TARGET_REG_BITS == 32;
1594     case INDEX_op_not_vec:
1595         return have_vec && TCG_TARGET_HAS_not_vec;
1596     case INDEX_op_neg_vec:
1597         return have_vec && TCG_TARGET_HAS_neg_vec;
1598     case INDEX_op_andc_vec:
1599         return have_vec && TCG_TARGET_HAS_andc_vec;
1600     case INDEX_op_orc_vec:
1601         return have_vec && TCG_TARGET_HAS_orc_vec;
1602     case INDEX_op_mul_vec:
1603         return have_vec && TCG_TARGET_HAS_mul_vec;
1604     case INDEX_op_shli_vec:
1605     case INDEX_op_shri_vec:
1606     case INDEX_op_sari_vec:
1607         return have_vec && TCG_TARGET_HAS_shi_vec;
1608     case INDEX_op_shls_vec:
1609     case INDEX_op_shrs_vec:
1610     case INDEX_op_sars_vec:
1611         return have_vec && TCG_TARGET_HAS_shs_vec;
1612     case INDEX_op_shlv_vec:
1613     case INDEX_op_shrv_vec:
1614     case INDEX_op_sarv_vec:
1615         return have_vec && TCG_TARGET_HAS_shv_vec;
1616     case INDEX_op_ssadd_vec:
1617     case INDEX_op_usadd_vec:
1618     case INDEX_op_sssub_vec:
1619     case INDEX_op_ussub_vec:
1620         return have_vec && TCG_TARGET_HAS_sat_vec;
1621     case INDEX_op_smin_vec:
1622     case INDEX_op_umin_vec:
1623     case INDEX_op_smax_vec:
1624     case INDEX_op_umax_vec:
1625         return have_vec && TCG_TARGET_HAS_minmax_vec;
1626 
1627     default:
1628         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1629         return true;
1630     }
1631 }
1632 
1633 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1634    and endian swap. Maybe it would be better to do the alignment
1635    and endian swap in tcg_reg_alloc_call(). */
1636 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1637 {
1638     int i, real_args, nb_rets, pi;
1639     unsigned sizemask, flags;
1640     TCGHelperInfo *info;
1641     TCGOp *op;
1642 
1643     info = g_hash_table_lookup(helper_table, (gpointer)func);
1644     flags = info->flags;
1645     sizemask = info->sizemask;
1646 
1647 #if defined(__sparc__) && !defined(__arch64__) \
1648     && !defined(CONFIG_TCG_INTERPRETER)
1649     /* We have 64-bit values in one register, but need to pass as two
1650        separate parameters.  Split them.  */
1651     int orig_sizemask = sizemask;
1652     int orig_nargs = nargs;
1653     TCGv_i64 retl, reth;
1654     TCGTemp *split_args[MAX_OPC_PARAM];
1655 
1656     retl = NULL;
1657     reth = NULL;
1658     if (sizemask != 0) {
1659         for (i = real_args = 0; i < nargs; ++i) {
1660             int is_64bit = sizemask & (1 << (i+1)*2);
1661             if (is_64bit) {
1662                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1663                 TCGv_i32 h = tcg_temp_new_i32();
1664                 TCGv_i32 l = tcg_temp_new_i32();
1665                 tcg_gen_extr_i64_i32(l, h, orig);
1666                 split_args[real_args++] = tcgv_i32_temp(h);
1667                 split_args[real_args++] = tcgv_i32_temp(l);
1668             } else {
1669                 split_args[real_args++] = args[i];
1670             }
1671         }
1672         nargs = real_args;
1673         args = split_args;
1674         sizemask = 0;
1675     }
1676 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1677     for (i = 0; i < nargs; ++i) {
1678         int is_64bit = sizemask & (1 << (i+1)*2);
1679         int is_signed = sizemask & (2 << (i+1)*2);
1680         if (!is_64bit) {
1681             TCGv_i64 temp = tcg_temp_new_i64();
1682             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1683             if (is_signed) {
1684                 tcg_gen_ext32s_i64(temp, orig);
1685             } else {
1686                 tcg_gen_ext32u_i64(temp, orig);
1687             }
1688             args[i] = tcgv_i64_temp(temp);
1689         }
1690     }
1691 #endif /* TCG_TARGET_EXTEND_ARGS */
1692 
1693     op = tcg_emit_op(INDEX_op_call);
1694 
1695     pi = 0;
1696     if (ret != NULL) {
1697 #if defined(__sparc__) && !defined(__arch64__) \
1698     && !defined(CONFIG_TCG_INTERPRETER)
1699         if (orig_sizemask & 1) {
1700             /* The 32-bit ABI is going to return the 64-bit value in
1701                the %o0/%o1 register pair.  Prepare for this by using
1702                two return temporaries, and reassemble below.  */
1703             retl = tcg_temp_new_i64();
1704             reth = tcg_temp_new_i64();
1705             op->args[pi++] = tcgv_i64_arg(reth);
1706             op->args[pi++] = tcgv_i64_arg(retl);
1707             nb_rets = 2;
1708         } else {
1709             op->args[pi++] = temp_arg(ret);
1710             nb_rets = 1;
1711         }
1712 #else
1713         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1714 #ifdef HOST_WORDS_BIGENDIAN
1715             op->args[pi++] = temp_arg(ret + 1);
1716             op->args[pi++] = temp_arg(ret);
1717 #else
1718             op->args[pi++] = temp_arg(ret);
1719             op->args[pi++] = temp_arg(ret + 1);
1720 #endif
1721             nb_rets = 2;
1722         } else {
1723             op->args[pi++] = temp_arg(ret);
1724             nb_rets = 1;
1725         }
1726 #endif
1727     } else {
1728         nb_rets = 0;
1729     }
1730     TCGOP_CALLO(op) = nb_rets;
1731 
1732     real_args = 0;
1733     for (i = 0; i < nargs; i++) {
1734         int is_64bit = sizemask & (1 << (i+1)*2);
1735         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1736 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1737             /* some targets want aligned 64 bit args */
1738             if (real_args & 1) {
1739                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1740                 real_args++;
1741             }
1742 #endif
1743            /* If stack grows up, then we will be placing successive
1744               arguments at lower addresses, which means we need to
1745               reverse the order compared to how we would normally
1746               treat either big or little-endian.  For those arguments
1747               that will wind up in registers, this still works for
1748               HPPA (the only current STACK_GROWSUP target) since the
1749               argument registers are *also* allocated in decreasing
1750               order.  If another such target is added, this logic may
1751               have to get more complicated to differentiate between
1752               stack arguments and register arguments.  */
1753 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1754             op->args[pi++] = temp_arg(args[i] + 1);
1755             op->args[pi++] = temp_arg(args[i]);
1756 #else
1757             op->args[pi++] = temp_arg(args[i]);
1758             op->args[pi++] = temp_arg(args[i] + 1);
1759 #endif
1760             real_args += 2;
1761             continue;
1762         }
1763 
1764         op->args[pi++] = temp_arg(args[i]);
1765         real_args++;
1766     }
1767     op->args[pi++] = (uintptr_t)func;
1768     op->args[pi++] = flags;
1769     TCGOP_CALLI(op) = real_args;
1770 
1771     /* Make sure the fields didn't overflow.  */
1772     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1773     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1774 
1775 #if defined(__sparc__) && !defined(__arch64__) \
1776     && !defined(CONFIG_TCG_INTERPRETER)
1777     /* Free all of the parts we allocated above.  */
1778     for (i = real_args = 0; i < orig_nargs; ++i) {
1779         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1780         if (is_64bit) {
1781             tcg_temp_free_internal(args[real_args++]);
1782             tcg_temp_free_internal(args[real_args++]);
1783         } else {
1784             real_args++;
1785         }
1786     }
1787     if (orig_sizemask & 1) {
1788         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1789            Note that describing these as TCGv_i64 eliminates an unnecessary
1790            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1791         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1792         tcg_temp_free_i64(retl);
1793         tcg_temp_free_i64(reth);
1794     }
1795 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1796     for (i = 0; i < nargs; ++i) {
1797         int is_64bit = sizemask & (1 << (i+1)*2);
1798         if (!is_64bit) {
1799             tcg_temp_free_internal(args[i]);
1800         }
1801     }
1802 #endif /* TCG_TARGET_EXTEND_ARGS */
1803 }
1804 
1805 static void tcg_reg_alloc_start(TCGContext *s)
1806 {
1807     int i, n;
1808     TCGTemp *ts;
1809 
1810     for (i = 0, n = s->nb_globals; i < n; i++) {
1811         ts = &s->temps[i];
1812         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1813     }
1814     for (n = s->nb_temps; i < n; i++) {
1815         ts = &s->temps[i];
1816         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1817         ts->mem_allocated = 0;
1818         ts->fixed_reg = 0;
1819     }
1820 
1821     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1822 }
1823 
1824 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1825                                  TCGTemp *ts)
1826 {
1827     int idx = temp_idx(ts);
1828 
1829     if (ts->temp_global) {
1830         pstrcpy(buf, buf_size, ts->name);
1831     } else if (ts->temp_local) {
1832         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1833     } else {
1834         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1835     }
1836     return buf;
1837 }
1838 
1839 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1840                              int buf_size, TCGArg arg)
1841 {
1842     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1843 }
1844 
1845 /* Find helper name.  */
1846 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1847 {
1848     const char *ret = NULL;
1849     if (helper_table) {
1850         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1851         if (info) {
1852             ret = info->name;
1853         }
1854     }
1855     return ret;
1856 }
1857 
1858 static const char * const cond_name[] =
1859 {
1860     [TCG_COND_NEVER] = "never",
1861     [TCG_COND_ALWAYS] = "always",
1862     [TCG_COND_EQ] = "eq",
1863     [TCG_COND_NE] = "ne",
1864     [TCG_COND_LT] = "lt",
1865     [TCG_COND_GE] = "ge",
1866     [TCG_COND_LE] = "le",
1867     [TCG_COND_GT] = "gt",
1868     [TCG_COND_LTU] = "ltu",
1869     [TCG_COND_GEU] = "geu",
1870     [TCG_COND_LEU] = "leu",
1871     [TCG_COND_GTU] = "gtu"
1872 };
1873 
1874 static const char * const ldst_name[] =
1875 {
1876     [MO_UB]   = "ub",
1877     [MO_SB]   = "sb",
1878     [MO_LEUW] = "leuw",
1879     [MO_LESW] = "lesw",
1880     [MO_LEUL] = "leul",
1881     [MO_LESL] = "lesl",
1882     [MO_LEQ]  = "leq",
1883     [MO_BEUW] = "beuw",
1884     [MO_BESW] = "besw",
1885     [MO_BEUL] = "beul",
1886     [MO_BESL] = "besl",
1887     [MO_BEQ]  = "beq",
1888 };
1889 
1890 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1891 #ifdef ALIGNED_ONLY
1892     [MO_UNALN >> MO_ASHIFT]    = "un+",
1893     [MO_ALIGN >> MO_ASHIFT]    = "",
1894 #else
1895     [MO_UNALN >> MO_ASHIFT]    = "",
1896     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1897 #endif
1898     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1899     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1900     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1901     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1902     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1903     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1904 };
1905 
1906 static inline bool tcg_regset_single(TCGRegSet d)
1907 {
1908     return (d & (d - 1)) == 0;
1909 }
1910 
1911 static inline TCGReg tcg_regset_first(TCGRegSet d)
1912 {
1913     if (TCG_TARGET_NB_REGS <= 32) {
1914         return ctz32(d);
1915     } else {
1916         return ctz64(d);
1917     }
1918 }
1919 
1920 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1921 {
1922     char buf[128];
1923     TCGOp *op;
1924 
1925     QTAILQ_FOREACH(op, &s->ops, link) {
1926         int i, k, nb_oargs, nb_iargs, nb_cargs;
1927         const TCGOpDef *def;
1928         TCGOpcode c;
1929         int col = 0;
1930 
1931         c = op->opc;
1932         def = &tcg_op_defs[c];
1933 
1934         if (c == INDEX_op_insn_start) {
1935             nb_oargs = 0;
1936             col += qemu_log("\n ----");
1937 
1938             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1939                 target_ulong a;
1940 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1941                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1942 #else
1943                 a = op->args[i];
1944 #endif
1945                 col += qemu_log(" " TARGET_FMT_lx, a);
1946             }
1947         } else if (c == INDEX_op_call) {
1948             /* variable number of arguments */
1949             nb_oargs = TCGOP_CALLO(op);
1950             nb_iargs = TCGOP_CALLI(op);
1951             nb_cargs = def->nb_cargs;
1952 
1953             /* function name, flags, out args */
1954             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1955                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1956                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1957             for (i = 0; i < nb_oargs; i++) {
1958                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1959                                                        op->args[i]));
1960             }
1961             for (i = 0; i < nb_iargs; i++) {
1962                 TCGArg arg = op->args[nb_oargs + i];
1963                 const char *t = "<dummy>";
1964                 if (arg != TCG_CALL_DUMMY_ARG) {
1965                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1966                 }
1967                 col += qemu_log(",%s", t);
1968             }
1969         } else {
1970             col += qemu_log(" %s ", def->name);
1971 
1972             nb_oargs = def->nb_oargs;
1973             nb_iargs = def->nb_iargs;
1974             nb_cargs = def->nb_cargs;
1975 
1976             if (def->flags & TCG_OPF_VECTOR) {
1977                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1978                                 8 << TCGOP_VECE(op));
1979             }
1980 
1981             k = 0;
1982             for (i = 0; i < nb_oargs; i++) {
1983                 if (k != 0) {
1984                     col += qemu_log(",");
1985                 }
1986                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1987                                                       op->args[k++]));
1988             }
1989             for (i = 0; i < nb_iargs; i++) {
1990                 if (k != 0) {
1991                     col += qemu_log(",");
1992                 }
1993                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1994                                                       op->args[k++]));
1995             }
1996             switch (c) {
1997             case INDEX_op_brcond_i32:
1998             case INDEX_op_setcond_i32:
1999             case INDEX_op_movcond_i32:
2000             case INDEX_op_brcond2_i32:
2001             case INDEX_op_setcond2_i32:
2002             case INDEX_op_brcond_i64:
2003             case INDEX_op_setcond_i64:
2004             case INDEX_op_movcond_i64:
2005             case INDEX_op_cmp_vec:
2006                 if (op->args[k] < ARRAY_SIZE(cond_name)
2007                     && cond_name[op->args[k]]) {
2008                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2009                 } else {
2010                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2011                 }
2012                 i = 1;
2013                 break;
2014             case INDEX_op_qemu_ld_i32:
2015             case INDEX_op_qemu_st_i32:
2016             case INDEX_op_qemu_ld_i64:
2017             case INDEX_op_qemu_st_i64:
2018                 {
2019                     TCGMemOpIdx oi = op->args[k++];
2020                     TCGMemOp op = get_memop(oi);
2021                     unsigned ix = get_mmuidx(oi);
2022 
2023                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2024                         col += qemu_log(",$0x%x,%u", op, ix);
2025                     } else {
2026                         const char *s_al, *s_op;
2027                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2028                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2029                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2030                     }
2031                     i = 1;
2032                 }
2033                 break;
2034             default:
2035                 i = 0;
2036                 break;
2037             }
2038             switch (c) {
2039             case INDEX_op_set_label:
2040             case INDEX_op_br:
2041             case INDEX_op_brcond_i32:
2042             case INDEX_op_brcond_i64:
2043             case INDEX_op_brcond2_i32:
2044                 col += qemu_log("%s$L%d", k ? "," : "",
2045                                 arg_label(op->args[k])->id);
2046                 i++, k++;
2047                 break;
2048             default:
2049                 break;
2050             }
2051             for (; i < nb_cargs; i++, k++) {
2052                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2053             }
2054         }
2055 
2056         if (have_prefs || op->life) {
2057             for (; col < 40; ++col) {
2058                 putc(' ', qemu_logfile);
2059             }
2060         }
2061 
2062         if (op->life) {
2063             unsigned life = op->life;
2064 
2065             if (life & (SYNC_ARG * 3)) {
2066                 qemu_log("  sync:");
2067                 for (i = 0; i < 2; ++i) {
2068                     if (life & (SYNC_ARG << i)) {
2069                         qemu_log(" %d", i);
2070                     }
2071                 }
2072             }
2073             life /= DEAD_ARG;
2074             if (life) {
2075                 qemu_log("  dead:");
2076                 for (i = 0; life; ++i, life >>= 1) {
2077                     if (life & 1) {
2078                         qemu_log(" %d", i);
2079                     }
2080                 }
2081             }
2082         }
2083 
2084         if (have_prefs) {
2085             for (i = 0; i < nb_oargs; ++i) {
2086                 TCGRegSet set = op->output_pref[i];
2087 
2088                 if (i == 0) {
2089                     qemu_log("  pref=");
2090                 } else {
2091                     qemu_log(",");
2092                 }
2093                 if (set == 0) {
2094                     qemu_log("none");
2095                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2096                     qemu_log("all");
2097 #ifdef CONFIG_DEBUG_TCG
2098                 } else if (tcg_regset_single(set)) {
2099                     TCGReg reg = tcg_regset_first(set);
2100                     qemu_log("%s", tcg_target_reg_names[reg]);
2101 #endif
2102                 } else if (TCG_TARGET_NB_REGS <= 32) {
2103                     qemu_log("%#x", (uint32_t)set);
2104                 } else {
2105                     qemu_log("%#" PRIx64, (uint64_t)set);
2106                 }
2107             }
2108         }
2109 
2110         qemu_log("\n");
2111     }
2112 }
2113 
2114 /* we give more priority to constraints with less registers */
2115 static int get_constraint_priority(const TCGOpDef *def, int k)
2116 {
2117     const TCGArgConstraint *arg_ct;
2118 
2119     int i, n;
2120     arg_ct = &def->args_ct[k];
2121     if (arg_ct->ct & TCG_CT_ALIAS) {
2122         /* an alias is equivalent to a single register */
2123         n = 1;
2124     } else {
2125         if (!(arg_ct->ct & TCG_CT_REG))
2126             return 0;
2127         n = 0;
2128         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2129             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2130                 n++;
2131         }
2132     }
2133     return TCG_TARGET_NB_REGS - n + 1;
2134 }
2135 
2136 /* sort from highest priority to lowest */
2137 static void sort_constraints(TCGOpDef *def, int start, int n)
2138 {
2139     int i, j, p1, p2, tmp;
2140 
2141     for(i = 0; i < n; i++)
2142         def->sorted_args[start + i] = start + i;
2143     if (n <= 1)
2144         return;
2145     for(i = 0; i < n - 1; i++) {
2146         for(j = i + 1; j < n; j++) {
2147             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2148             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2149             if (p1 < p2) {
2150                 tmp = def->sorted_args[start + i];
2151                 def->sorted_args[start + i] = def->sorted_args[start + j];
2152                 def->sorted_args[start + j] = tmp;
2153             }
2154         }
2155     }
2156 }
2157 
2158 static void process_op_defs(TCGContext *s)
2159 {
2160     TCGOpcode op;
2161 
2162     for (op = 0; op < NB_OPS; op++) {
2163         TCGOpDef *def = &tcg_op_defs[op];
2164         const TCGTargetOpDef *tdefs;
2165         TCGType type;
2166         int i, nb_args;
2167 
2168         if (def->flags & TCG_OPF_NOT_PRESENT) {
2169             continue;
2170         }
2171 
2172         nb_args = def->nb_iargs + def->nb_oargs;
2173         if (nb_args == 0) {
2174             continue;
2175         }
2176 
2177         tdefs = tcg_target_op_def(op);
2178         /* Missing TCGTargetOpDef entry. */
2179         tcg_debug_assert(tdefs != NULL);
2180 
2181         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2182         for (i = 0; i < nb_args; i++) {
2183             const char *ct_str = tdefs->args_ct_str[i];
2184             /* Incomplete TCGTargetOpDef entry. */
2185             tcg_debug_assert(ct_str != NULL);
2186 
2187             def->args_ct[i].u.regs = 0;
2188             def->args_ct[i].ct = 0;
2189             while (*ct_str != '\0') {
2190                 switch(*ct_str) {
2191                 case '0' ... '9':
2192                     {
2193                         int oarg = *ct_str - '0';
2194                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2195                         tcg_debug_assert(oarg < def->nb_oargs);
2196                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2197                         /* TCG_CT_ALIAS is for the output arguments.
2198                            The input is tagged with TCG_CT_IALIAS. */
2199                         def->args_ct[i] = def->args_ct[oarg];
2200                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2201                         def->args_ct[oarg].alias_index = i;
2202                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2203                         def->args_ct[i].alias_index = oarg;
2204                     }
2205                     ct_str++;
2206                     break;
2207                 case '&':
2208                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2209                     ct_str++;
2210                     break;
2211                 case 'i':
2212                     def->args_ct[i].ct |= TCG_CT_CONST;
2213                     ct_str++;
2214                     break;
2215                 default:
2216                     ct_str = target_parse_constraint(&def->args_ct[i],
2217                                                      ct_str, type);
2218                     /* Typo in TCGTargetOpDef constraint. */
2219                     tcg_debug_assert(ct_str != NULL);
2220                 }
2221             }
2222         }
2223 
2224         /* TCGTargetOpDef entry with too much information? */
2225         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2226 
2227         /* sort the constraints (XXX: this is just an heuristic) */
2228         sort_constraints(def, 0, def->nb_oargs);
2229         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2230     }
2231 }
2232 
2233 void tcg_op_remove(TCGContext *s, TCGOp *op)
2234 {
2235     TCGLabel *label;
2236 
2237     switch (op->opc) {
2238     case INDEX_op_br:
2239         label = arg_label(op->args[0]);
2240         label->refs--;
2241         break;
2242     case INDEX_op_brcond_i32:
2243     case INDEX_op_brcond_i64:
2244         label = arg_label(op->args[3]);
2245         label->refs--;
2246         break;
2247     case INDEX_op_brcond2_i32:
2248         label = arg_label(op->args[5]);
2249         label->refs--;
2250         break;
2251     default:
2252         break;
2253     }
2254 
2255     QTAILQ_REMOVE(&s->ops, op, link);
2256     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2257     s->nb_ops--;
2258 
2259 #ifdef CONFIG_PROFILER
2260     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2261 #endif
2262 }
2263 
2264 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2265 {
2266     TCGContext *s = tcg_ctx;
2267     TCGOp *op;
2268 
2269     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2270         op = tcg_malloc(sizeof(TCGOp));
2271     } else {
2272         op = QTAILQ_FIRST(&s->free_ops);
2273         QTAILQ_REMOVE(&s->free_ops, op, link);
2274     }
2275     memset(op, 0, offsetof(TCGOp, link));
2276     op->opc = opc;
2277     s->nb_ops++;
2278 
2279     return op;
2280 }
2281 
2282 TCGOp *tcg_emit_op(TCGOpcode opc)
2283 {
2284     TCGOp *op = tcg_op_alloc(opc);
2285     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2286     return op;
2287 }
2288 
2289 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2290 {
2291     TCGOp *new_op = tcg_op_alloc(opc);
2292     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2293     return new_op;
2294 }
2295 
2296 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2297 {
2298     TCGOp *new_op = tcg_op_alloc(opc);
2299     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2300     return new_op;
2301 }
2302 
2303 /* Reachable analysis : remove unreachable code.  */
2304 static void reachable_code_pass(TCGContext *s)
2305 {
2306     TCGOp *op, *op_next;
2307     bool dead = false;
2308 
2309     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2310         bool remove = dead;
2311         TCGLabel *label;
2312         int call_flags;
2313 
2314         switch (op->opc) {
2315         case INDEX_op_set_label:
2316             label = arg_label(op->args[0]);
2317             if (label->refs == 0) {
2318                 /*
2319                  * While there is an occasional backward branch, virtually
2320                  * all branches generated by the translators are forward.
2321                  * Which means that generally we will have already removed
2322                  * all references to the label that will be, and there is
2323                  * little to be gained by iterating.
2324                  */
2325                 remove = true;
2326             } else {
2327                 /* Once we see a label, insns become live again.  */
2328                 dead = false;
2329                 remove = false;
2330 
2331                 /*
2332                  * Optimization can fold conditional branches to unconditional.
2333                  * If we find a label with one reference which is preceded by
2334                  * an unconditional branch to it, remove both.  This needed to
2335                  * wait until the dead code in between them was removed.
2336                  */
2337                 if (label->refs == 1) {
2338                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2339                     if (op_prev->opc == INDEX_op_br &&
2340                         label == arg_label(op_prev->args[0])) {
2341                         tcg_op_remove(s, op_prev);
2342                         remove = true;
2343                     }
2344                 }
2345             }
2346             break;
2347 
2348         case INDEX_op_br:
2349         case INDEX_op_exit_tb:
2350         case INDEX_op_goto_ptr:
2351             /* Unconditional branches; everything following is dead.  */
2352             dead = true;
2353             break;
2354 
2355         case INDEX_op_call:
2356             /* Notice noreturn helper calls, raising exceptions.  */
2357             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2358             if (call_flags & TCG_CALL_NO_RETURN) {
2359                 dead = true;
2360             }
2361             break;
2362 
2363         case INDEX_op_insn_start:
2364             /* Never remove -- we need to keep these for unwind.  */
2365             remove = false;
2366             break;
2367 
2368         default:
2369             break;
2370         }
2371 
2372         if (remove) {
2373             tcg_op_remove(s, op);
2374         }
2375     }
2376 }
2377 
2378 #define TS_DEAD  1
2379 #define TS_MEM   2
2380 
2381 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2382 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2383 
2384 /* For liveness_pass_1, the register preferences for a given temp.  */
2385 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2386 {
2387     return ts->state_ptr;
2388 }
2389 
2390 /* For liveness_pass_1, reset the preferences for a given temp to the
2391  * maximal regset for its type.
2392  */
2393 static inline void la_reset_pref(TCGTemp *ts)
2394 {
2395     *la_temp_pref(ts)
2396         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2397 }
2398 
2399 /* liveness analysis: end of function: all temps are dead, and globals
2400    should be in memory. */
2401 static void la_func_end(TCGContext *s, int ng, int nt)
2402 {
2403     int i;
2404 
2405     for (i = 0; i < ng; ++i) {
2406         s->temps[i].state = TS_DEAD | TS_MEM;
2407         la_reset_pref(&s->temps[i]);
2408     }
2409     for (i = ng; i < nt; ++i) {
2410         s->temps[i].state = TS_DEAD;
2411         la_reset_pref(&s->temps[i]);
2412     }
2413 }
2414 
2415 /* liveness analysis: end of basic block: all temps are dead, globals
2416    and local temps should be in memory. */
2417 static void la_bb_end(TCGContext *s, int ng, int nt)
2418 {
2419     int i;
2420 
2421     for (i = 0; i < ng; ++i) {
2422         s->temps[i].state = TS_DEAD | TS_MEM;
2423         la_reset_pref(&s->temps[i]);
2424     }
2425     for (i = ng; i < nt; ++i) {
2426         s->temps[i].state = (s->temps[i].temp_local
2427                              ? TS_DEAD | TS_MEM
2428                              : TS_DEAD);
2429         la_reset_pref(&s->temps[i]);
2430     }
2431 }
2432 
2433 /* liveness analysis: sync globals back to memory.  */
2434 static void la_global_sync(TCGContext *s, int ng)
2435 {
2436     int i;
2437 
2438     for (i = 0; i < ng; ++i) {
2439         int state = s->temps[i].state;
2440         s->temps[i].state = state | TS_MEM;
2441         if (state == TS_DEAD) {
2442             /* If the global was previously dead, reset prefs.  */
2443             la_reset_pref(&s->temps[i]);
2444         }
2445     }
2446 }
2447 
2448 /* liveness analysis: sync globals back to memory and kill.  */
2449 static void la_global_kill(TCGContext *s, int ng)
2450 {
2451     int i;
2452 
2453     for (i = 0; i < ng; i++) {
2454         s->temps[i].state = TS_DEAD | TS_MEM;
2455         la_reset_pref(&s->temps[i]);
2456     }
2457 }
2458 
2459 /* liveness analysis: note live globals crossing calls.  */
2460 static void la_cross_call(TCGContext *s, int nt)
2461 {
2462     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2463     int i;
2464 
2465     for (i = 0; i < nt; i++) {
2466         TCGTemp *ts = &s->temps[i];
2467         if (!(ts->state & TS_DEAD)) {
2468             TCGRegSet *pset = la_temp_pref(ts);
2469             TCGRegSet set = *pset;
2470 
2471             set &= mask;
2472             /* If the combination is not possible, restart.  */
2473             if (set == 0) {
2474                 set = tcg_target_available_regs[ts->type] & mask;
2475             }
2476             *pset = set;
2477         }
2478     }
2479 }
2480 
2481 /* Liveness analysis : update the opc_arg_life array to tell if a
2482    given input arguments is dead. Instructions updating dead
2483    temporaries are removed. */
2484 static void liveness_pass_1(TCGContext *s)
2485 {
2486     int nb_globals = s->nb_globals;
2487     int nb_temps = s->nb_temps;
2488     TCGOp *op, *op_prev;
2489     TCGRegSet *prefs;
2490     int i;
2491 
2492     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2493     for (i = 0; i < nb_temps; ++i) {
2494         s->temps[i].state_ptr = prefs + i;
2495     }
2496 
2497     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2498     la_func_end(s, nb_globals, nb_temps);
2499 
2500     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2501         int nb_iargs, nb_oargs;
2502         TCGOpcode opc_new, opc_new2;
2503         bool have_opc_new2;
2504         TCGLifeData arg_life = 0;
2505         TCGTemp *ts;
2506         TCGOpcode opc = op->opc;
2507         const TCGOpDef *def = &tcg_op_defs[opc];
2508 
2509         switch (opc) {
2510         case INDEX_op_call:
2511             {
2512                 int call_flags;
2513                 int nb_call_regs;
2514 
2515                 nb_oargs = TCGOP_CALLO(op);
2516                 nb_iargs = TCGOP_CALLI(op);
2517                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2518 
2519                 /* pure functions can be removed if their result is unused */
2520                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2521                     for (i = 0; i < nb_oargs; i++) {
2522                         ts = arg_temp(op->args[i]);
2523                         if (ts->state != TS_DEAD) {
2524                             goto do_not_remove_call;
2525                         }
2526                     }
2527                     goto do_remove;
2528                 }
2529             do_not_remove_call:
2530 
2531                 /* Output args are dead.  */
2532                 for (i = 0; i < nb_oargs; i++) {
2533                     ts = arg_temp(op->args[i]);
2534                     if (ts->state & TS_DEAD) {
2535                         arg_life |= DEAD_ARG << i;
2536                     }
2537                     if (ts->state & TS_MEM) {
2538                         arg_life |= SYNC_ARG << i;
2539                     }
2540                     ts->state = TS_DEAD;
2541                     la_reset_pref(ts);
2542 
2543                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2544                     op->output_pref[i] = 0;
2545                 }
2546 
2547                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2548                                     TCG_CALL_NO_READ_GLOBALS))) {
2549                     la_global_kill(s, nb_globals);
2550                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2551                     la_global_sync(s, nb_globals);
2552                 }
2553 
2554                 /* Record arguments that die in this helper.  */
2555                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2556                     ts = arg_temp(op->args[i]);
2557                     if (ts && ts->state & TS_DEAD) {
2558                         arg_life |= DEAD_ARG << i;
2559                     }
2560                 }
2561 
2562                 /* For all live registers, remove call-clobbered prefs.  */
2563                 la_cross_call(s, nb_temps);
2564 
2565                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2566 
2567                 /* Input arguments are live for preceding opcodes.  */
2568                 for (i = 0; i < nb_iargs; i++) {
2569                     ts = arg_temp(op->args[i + nb_oargs]);
2570                     if (ts && ts->state & TS_DEAD) {
2571                         /* For those arguments that die, and will be allocated
2572                          * in registers, clear the register set for that arg,
2573                          * to be filled in below.  For args that will be on
2574                          * the stack, reset to any available reg.
2575                          */
2576                         *la_temp_pref(ts)
2577                             = (i < nb_call_regs ? 0 :
2578                                tcg_target_available_regs[ts->type]);
2579                         ts->state &= ~TS_DEAD;
2580                     }
2581                 }
2582 
2583                 /* For each input argument, add its input register to prefs.
2584                    If a temp is used once, this produces a single set bit.  */
2585                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2586                     ts = arg_temp(op->args[i + nb_oargs]);
2587                     if (ts) {
2588                         tcg_regset_set_reg(*la_temp_pref(ts),
2589                                            tcg_target_call_iarg_regs[i]);
2590                     }
2591                 }
2592             }
2593             break;
2594         case INDEX_op_insn_start:
2595             break;
2596         case INDEX_op_discard:
2597             /* mark the temporary as dead */
2598             ts = arg_temp(op->args[0]);
2599             ts->state = TS_DEAD;
2600             la_reset_pref(ts);
2601             break;
2602 
2603         case INDEX_op_add2_i32:
2604             opc_new = INDEX_op_add_i32;
2605             goto do_addsub2;
2606         case INDEX_op_sub2_i32:
2607             opc_new = INDEX_op_sub_i32;
2608             goto do_addsub2;
2609         case INDEX_op_add2_i64:
2610             opc_new = INDEX_op_add_i64;
2611             goto do_addsub2;
2612         case INDEX_op_sub2_i64:
2613             opc_new = INDEX_op_sub_i64;
2614         do_addsub2:
2615             nb_iargs = 4;
2616             nb_oargs = 2;
2617             /* Test if the high part of the operation is dead, but not
2618                the low part.  The result can be optimized to a simple
2619                add or sub.  This happens often for x86_64 guest when the
2620                cpu mode is set to 32 bit.  */
2621             if (arg_temp(op->args[1])->state == TS_DEAD) {
2622                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2623                     goto do_remove;
2624                 }
2625                 /* Replace the opcode and adjust the args in place,
2626                    leaving 3 unused args at the end.  */
2627                 op->opc = opc = opc_new;
2628                 op->args[1] = op->args[2];
2629                 op->args[2] = op->args[4];
2630                 /* Fall through and mark the single-word operation live.  */
2631                 nb_iargs = 2;
2632                 nb_oargs = 1;
2633             }
2634             goto do_not_remove;
2635 
2636         case INDEX_op_mulu2_i32:
2637             opc_new = INDEX_op_mul_i32;
2638             opc_new2 = INDEX_op_muluh_i32;
2639             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2640             goto do_mul2;
2641         case INDEX_op_muls2_i32:
2642             opc_new = INDEX_op_mul_i32;
2643             opc_new2 = INDEX_op_mulsh_i32;
2644             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2645             goto do_mul2;
2646         case INDEX_op_mulu2_i64:
2647             opc_new = INDEX_op_mul_i64;
2648             opc_new2 = INDEX_op_muluh_i64;
2649             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2650             goto do_mul2;
2651         case INDEX_op_muls2_i64:
2652             opc_new = INDEX_op_mul_i64;
2653             opc_new2 = INDEX_op_mulsh_i64;
2654             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2655             goto do_mul2;
2656         do_mul2:
2657             nb_iargs = 2;
2658             nb_oargs = 2;
2659             if (arg_temp(op->args[1])->state == TS_DEAD) {
2660                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2661                     /* Both parts of the operation are dead.  */
2662                     goto do_remove;
2663                 }
2664                 /* The high part of the operation is dead; generate the low. */
2665                 op->opc = opc = opc_new;
2666                 op->args[1] = op->args[2];
2667                 op->args[2] = op->args[3];
2668             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2669                 /* The low part of the operation is dead; generate the high. */
2670                 op->opc = opc = opc_new2;
2671                 op->args[0] = op->args[1];
2672                 op->args[1] = op->args[2];
2673                 op->args[2] = op->args[3];
2674             } else {
2675                 goto do_not_remove;
2676             }
2677             /* Mark the single-word operation live.  */
2678             nb_oargs = 1;
2679             goto do_not_remove;
2680 
2681         default:
2682             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2683             nb_iargs = def->nb_iargs;
2684             nb_oargs = def->nb_oargs;
2685 
2686             /* Test if the operation can be removed because all
2687                its outputs are dead. We assume that nb_oargs == 0
2688                implies side effects */
2689             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2690                 for (i = 0; i < nb_oargs; i++) {
2691                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2692                         goto do_not_remove;
2693                     }
2694                 }
2695                 goto do_remove;
2696             }
2697             goto do_not_remove;
2698 
2699         do_remove:
2700             tcg_op_remove(s, op);
2701             break;
2702 
2703         do_not_remove:
2704             for (i = 0; i < nb_oargs; i++) {
2705                 ts = arg_temp(op->args[i]);
2706 
2707                 /* Remember the preference of the uses that followed.  */
2708                 op->output_pref[i] = *la_temp_pref(ts);
2709 
2710                 /* Output args are dead.  */
2711                 if (ts->state & TS_DEAD) {
2712                     arg_life |= DEAD_ARG << i;
2713                 }
2714                 if (ts->state & TS_MEM) {
2715                     arg_life |= SYNC_ARG << i;
2716                 }
2717                 ts->state = TS_DEAD;
2718                 la_reset_pref(ts);
2719             }
2720 
2721             /* If end of basic block, update.  */
2722             if (def->flags & TCG_OPF_BB_EXIT) {
2723                 la_func_end(s, nb_globals, nb_temps);
2724             } else if (def->flags & TCG_OPF_BB_END) {
2725                 la_bb_end(s, nb_globals, nb_temps);
2726             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2727                 la_global_sync(s, nb_globals);
2728                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2729                     la_cross_call(s, nb_temps);
2730                 }
2731             }
2732 
2733             /* Record arguments that die in this opcode.  */
2734             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2735                 ts = arg_temp(op->args[i]);
2736                 if (ts->state & TS_DEAD) {
2737                     arg_life |= DEAD_ARG << i;
2738                 }
2739             }
2740 
2741             /* Input arguments are live for preceding opcodes.  */
2742             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2743                 ts = arg_temp(op->args[i]);
2744                 if (ts->state & TS_DEAD) {
2745                     /* For operands that were dead, initially allow
2746                        all regs for the type.  */
2747                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2748                     ts->state &= ~TS_DEAD;
2749                 }
2750             }
2751 
2752             /* Incorporate constraints for this operand.  */
2753             switch (opc) {
2754             case INDEX_op_mov_i32:
2755             case INDEX_op_mov_i64:
2756                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2757                    have proper constraints.  That said, special case
2758                    moves to propagate preferences backward.  */
2759                 if (IS_DEAD_ARG(1)) {
2760                     *la_temp_pref(arg_temp(op->args[0]))
2761                         = *la_temp_pref(arg_temp(op->args[1]));
2762                 }
2763                 break;
2764 
2765             default:
2766                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2767                     const TCGArgConstraint *ct = &def->args_ct[i];
2768                     TCGRegSet set, *pset;
2769 
2770                     ts = arg_temp(op->args[i]);
2771                     pset = la_temp_pref(ts);
2772                     set = *pset;
2773 
2774                     set &= ct->u.regs;
2775                     if (ct->ct & TCG_CT_IALIAS) {
2776                         set &= op->output_pref[ct->alias_index];
2777                     }
2778                     /* If the combination is not possible, restart.  */
2779                     if (set == 0) {
2780                         set = ct->u.regs;
2781                     }
2782                     *pset = set;
2783                 }
2784                 break;
2785             }
2786             break;
2787         }
2788         op->life = arg_life;
2789     }
2790 }
2791 
2792 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2793 static bool liveness_pass_2(TCGContext *s)
2794 {
2795     int nb_globals = s->nb_globals;
2796     int nb_temps, i;
2797     bool changes = false;
2798     TCGOp *op, *op_next;
2799 
2800     /* Create a temporary for each indirect global.  */
2801     for (i = 0; i < nb_globals; ++i) {
2802         TCGTemp *its = &s->temps[i];
2803         if (its->indirect_reg) {
2804             TCGTemp *dts = tcg_temp_alloc(s);
2805             dts->type = its->type;
2806             dts->base_type = its->base_type;
2807             its->state_ptr = dts;
2808         } else {
2809             its->state_ptr = NULL;
2810         }
2811         /* All globals begin dead.  */
2812         its->state = TS_DEAD;
2813     }
2814     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2815         TCGTemp *its = &s->temps[i];
2816         its->state_ptr = NULL;
2817         its->state = TS_DEAD;
2818     }
2819 
2820     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2821         TCGOpcode opc = op->opc;
2822         const TCGOpDef *def = &tcg_op_defs[opc];
2823         TCGLifeData arg_life = op->life;
2824         int nb_iargs, nb_oargs, call_flags;
2825         TCGTemp *arg_ts, *dir_ts;
2826 
2827         if (opc == INDEX_op_call) {
2828             nb_oargs = TCGOP_CALLO(op);
2829             nb_iargs = TCGOP_CALLI(op);
2830             call_flags = op->args[nb_oargs + nb_iargs + 1];
2831         } else {
2832             nb_iargs = def->nb_iargs;
2833             nb_oargs = def->nb_oargs;
2834 
2835             /* Set flags similar to how calls require.  */
2836             if (def->flags & TCG_OPF_BB_END) {
2837                 /* Like writing globals: save_globals */
2838                 call_flags = 0;
2839             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2840                 /* Like reading globals: sync_globals */
2841                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2842             } else {
2843                 /* No effect on globals.  */
2844                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2845                               TCG_CALL_NO_WRITE_GLOBALS);
2846             }
2847         }
2848 
2849         /* Make sure that input arguments are available.  */
2850         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2851             arg_ts = arg_temp(op->args[i]);
2852             if (arg_ts) {
2853                 dir_ts = arg_ts->state_ptr;
2854                 if (dir_ts && arg_ts->state == TS_DEAD) {
2855                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2856                                       ? INDEX_op_ld_i32
2857                                       : INDEX_op_ld_i64);
2858                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2859 
2860                     lop->args[0] = temp_arg(dir_ts);
2861                     lop->args[1] = temp_arg(arg_ts->mem_base);
2862                     lop->args[2] = arg_ts->mem_offset;
2863 
2864                     /* Loaded, but synced with memory.  */
2865                     arg_ts->state = TS_MEM;
2866                 }
2867             }
2868         }
2869 
2870         /* Perform input replacement, and mark inputs that became dead.
2871            No action is required except keeping temp_state up to date
2872            so that we reload when needed.  */
2873         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2874             arg_ts = arg_temp(op->args[i]);
2875             if (arg_ts) {
2876                 dir_ts = arg_ts->state_ptr;
2877                 if (dir_ts) {
2878                     op->args[i] = temp_arg(dir_ts);
2879                     changes = true;
2880                     if (IS_DEAD_ARG(i)) {
2881                         arg_ts->state = TS_DEAD;
2882                     }
2883                 }
2884             }
2885         }
2886 
2887         /* Liveness analysis should ensure that the following are
2888            all correct, for call sites and basic block end points.  */
2889         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2890             /* Nothing to do */
2891         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2892             for (i = 0; i < nb_globals; ++i) {
2893                 /* Liveness should see that globals are synced back,
2894                    that is, either TS_DEAD or TS_MEM.  */
2895                 arg_ts = &s->temps[i];
2896                 tcg_debug_assert(arg_ts->state_ptr == 0
2897                                  || arg_ts->state != 0);
2898             }
2899         } else {
2900             for (i = 0; i < nb_globals; ++i) {
2901                 /* Liveness should see that globals are saved back,
2902                    that is, TS_DEAD, waiting to be reloaded.  */
2903                 arg_ts = &s->temps[i];
2904                 tcg_debug_assert(arg_ts->state_ptr == 0
2905                                  || arg_ts->state == TS_DEAD);
2906             }
2907         }
2908 
2909         /* Outputs become available.  */
2910         for (i = 0; i < nb_oargs; i++) {
2911             arg_ts = arg_temp(op->args[i]);
2912             dir_ts = arg_ts->state_ptr;
2913             if (!dir_ts) {
2914                 continue;
2915             }
2916             op->args[i] = temp_arg(dir_ts);
2917             changes = true;
2918 
2919             /* The output is now live and modified.  */
2920             arg_ts->state = 0;
2921 
2922             /* Sync outputs upon their last write.  */
2923             if (NEED_SYNC_ARG(i)) {
2924                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2925                                   ? INDEX_op_st_i32
2926                                   : INDEX_op_st_i64);
2927                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2928 
2929                 sop->args[0] = temp_arg(dir_ts);
2930                 sop->args[1] = temp_arg(arg_ts->mem_base);
2931                 sop->args[2] = arg_ts->mem_offset;
2932 
2933                 arg_ts->state = TS_MEM;
2934             }
2935             /* Drop outputs that are dead.  */
2936             if (IS_DEAD_ARG(i)) {
2937                 arg_ts->state = TS_DEAD;
2938             }
2939         }
2940     }
2941 
2942     return changes;
2943 }
2944 
2945 #ifdef CONFIG_DEBUG_TCG
2946 static void dump_regs(TCGContext *s)
2947 {
2948     TCGTemp *ts;
2949     int i;
2950     char buf[64];
2951 
2952     for(i = 0; i < s->nb_temps; i++) {
2953         ts = &s->temps[i];
2954         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2955         switch(ts->val_type) {
2956         case TEMP_VAL_REG:
2957             printf("%s", tcg_target_reg_names[ts->reg]);
2958             break;
2959         case TEMP_VAL_MEM:
2960             printf("%d(%s)", (int)ts->mem_offset,
2961                    tcg_target_reg_names[ts->mem_base->reg]);
2962             break;
2963         case TEMP_VAL_CONST:
2964             printf("$0x%" TCG_PRIlx, ts->val);
2965             break;
2966         case TEMP_VAL_DEAD:
2967             printf("D");
2968             break;
2969         default:
2970             printf("???");
2971             break;
2972         }
2973         printf("\n");
2974     }
2975 
2976     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2977         if (s->reg_to_temp[i] != NULL) {
2978             printf("%s: %s\n",
2979                    tcg_target_reg_names[i],
2980                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2981         }
2982     }
2983 }
2984 
2985 static void check_regs(TCGContext *s)
2986 {
2987     int reg;
2988     int k;
2989     TCGTemp *ts;
2990     char buf[64];
2991 
2992     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2993         ts = s->reg_to_temp[reg];
2994         if (ts != NULL) {
2995             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2996                 printf("Inconsistency for register %s:\n",
2997                        tcg_target_reg_names[reg]);
2998                 goto fail;
2999             }
3000         }
3001     }
3002     for (k = 0; k < s->nb_temps; k++) {
3003         ts = &s->temps[k];
3004         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3005             && s->reg_to_temp[ts->reg] != ts) {
3006             printf("Inconsistency for temp %s:\n",
3007                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3008         fail:
3009             printf("reg state:\n");
3010             dump_regs(s);
3011             tcg_abort();
3012         }
3013     }
3014 }
3015 #endif
3016 
3017 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3018 {
3019 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3020     /* Sparc64 stack is accessed with offset of 2047 */
3021     s->current_frame_offset = (s->current_frame_offset +
3022                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3023         ~(sizeof(tcg_target_long) - 1);
3024 #endif
3025     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3026         s->frame_end) {
3027         tcg_abort();
3028     }
3029     ts->mem_offset = s->current_frame_offset;
3030     ts->mem_base = s->frame_temp;
3031     ts->mem_allocated = 1;
3032     s->current_frame_offset += sizeof(tcg_target_long);
3033 }
3034 
3035 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3036 
3037 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3038    mark it free; otherwise mark it dead.  */
3039 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3040 {
3041     if (ts->fixed_reg) {
3042         return;
3043     }
3044     if (ts->val_type == TEMP_VAL_REG) {
3045         s->reg_to_temp[ts->reg] = NULL;
3046     }
3047     ts->val_type = (free_or_dead < 0
3048                     || ts->temp_local
3049                     || ts->temp_global
3050                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3051 }
3052 
3053 /* Mark a temporary as dead.  */
3054 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3055 {
3056     temp_free_or_dead(s, ts, 1);
3057 }
3058 
3059 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3060    registers needs to be allocated to store a constant.  If 'free_or_dead'
3061    is non-zero, subsequently release the temporary; if it is positive, the
3062    temp is dead; if it is negative, the temp is free.  */
3063 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3064                       TCGRegSet preferred_regs, int free_or_dead)
3065 {
3066     if (ts->fixed_reg) {
3067         return;
3068     }
3069     if (!ts->mem_coherent) {
3070         if (!ts->mem_allocated) {
3071             temp_allocate_frame(s, ts);
3072         }
3073         switch (ts->val_type) {
3074         case TEMP_VAL_CONST:
3075             /* If we're going to free the temp immediately, then we won't
3076                require it later in a register, so attempt to store the
3077                constant to memory directly.  */
3078             if (free_or_dead
3079                 && tcg_out_sti(s, ts->type, ts->val,
3080                                ts->mem_base->reg, ts->mem_offset)) {
3081                 break;
3082             }
3083             temp_load(s, ts, tcg_target_available_regs[ts->type],
3084                       allocated_regs, preferred_regs);
3085             /* fallthrough */
3086 
3087         case TEMP_VAL_REG:
3088             tcg_out_st(s, ts->type, ts->reg,
3089                        ts->mem_base->reg, ts->mem_offset);
3090             break;
3091 
3092         case TEMP_VAL_MEM:
3093             break;
3094 
3095         case TEMP_VAL_DEAD:
3096         default:
3097             tcg_abort();
3098         }
3099         ts->mem_coherent = 1;
3100     }
3101     if (free_or_dead) {
3102         temp_free_or_dead(s, ts, free_or_dead);
3103     }
3104 }
3105 
3106 /* free register 'reg' by spilling the corresponding temporary if necessary */
3107 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3108 {
3109     TCGTemp *ts = s->reg_to_temp[reg];
3110     if (ts != NULL) {
3111         temp_sync(s, ts, allocated_regs, 0, -1);
3112     }
3113 }
3114 
3115 /**
3116  * tcg_reg_alloc:
3117  * @required_regs: Set of registers in which we must allocate.
3118  * @allocated_regs: Set of registers which must be avoided.
3119  * @preferred_regs: Set of registers we should prefer.
3120  * @rev: True if we search the registers in "indirect" order.
3121  *
3122  * The allocated register must be in @required_regs & ~@allocated_regs,
3123  * but if we can put it in @preferred_regs we may save a move later.
3124  */
3125 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3126                             TCGRegSet allocated_regs,
3127                             TCGRegSet preferred_regs, bool rev)
3128 {
3129     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3130     TCGRegSet reg_ct[2];
3131     const int *order;
3132 
3133     reg_ct[1] = required_regs & ~allocated_regs;
3134     tcg_debug_assert(reg_ct[1] != 0);
3135     reg_ct[0] = reg_ct[1] & preferred_regs;
3136 
3137     /* Skip the preferred_regs option if it cannot be satisfied,
3138        or if the preference made no difference.  */
3139     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3140 
3141     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3142 
3143     /* Try free registers, preferences first.  */
3144     for (j = f; j < 2; j++) {
3145         TCGRegSet set = reg_ct[j];
3146 
3147         if (tcg_regset_single(set)) {
3148             /* One register in the set.  */
3149             TCGReg reg = tcg_regset_first(set);
3150             if (s->reg_to_temp[reg] == NULL) {
3151                 return reg;
3152             }
3153         } else {
3154             for (i = 0; i < n; i++) {
3155                 TCGReg reg = order[i];
3156                 if (s->reg_to_temp[reg] == NULL &&
3157                     tcg_regset_test_reg(set, reg)) {
3158                     return reg;
3159                 }
3160             }
3161         }
3162     }
3163 
3164     /* We must spill something.  */
3165     for (j = f; j < 2; j++) {
3166         TCGRegSet set = reg_ct[j];
3167 
3168         if (tcg_regset_single(set)) {
3169             /* One register in the set.  */
3170             TCGReg reg = tcg_regset_first(set);
3171             tcg_reg_free(s, reg, allocated_regs);
3172             return reg;
3173         } else {
3174             for (i = 0; i < n; i++) {
3175                 TCGReg reg = order[i];
3176                 if (tcg_regset_test_reg(set, reg)) {
3177                     tcg_reg_free(s, reg, allocated_regs);
3178                     return reg;
3179                 }
3180             }
3181         }
3182     }
3183 
3184     tcg_abort();
3185 }
3186 
3187 /* Make sure the temporary is in a register.  If needed, allocate the register
3188    from DESIRED while avoiding ALLOCATED.  */
3189 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3190                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3191 {
3192     TCGReg reg;
3193 
3194     switch (ts->val_type) {
3195     case TEMP_VAL_REG:
3196         return;
3197     case TEMP_VAL_CONST:
3198         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3199                             preferred_regs, ts->indirect_base);
3200         tcg_out_movi(s, ts->type, reg, ts->val);
3201         ts->mem_coherent = 0;
3202         break;
3203     case TEMP_VAL_MEM:
3204         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3205                             preferred_regs, ts->indirect_base);
3206         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3207         ts->mem_coherent = 1;
3208         break;
3209     case TEMP_VAL_DEAD:
3210     default:
3211         tcg_abort();
3212     }
3213     ts->reg = reg;
3214     ts->val_type = TEMP_VAL_REG;
3215     s->reg_to_temp[reg] = ts;
3216 }
3217 
3218 /* Save a temporary to memory. 'allocated_regs' is used in case a
3219    temporary registers needs to be allocated to store a constant.  */
3220 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3221 {
3222     /* The liveness analysis already ensures that globals are back
3223        in memory. Keep an tcg_debug_assert for safety. */
3224     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3225 }
3226 
3227 /* save globals to their canonical location and assume they can be
3228    modified be the following code. 'allocated_regs' is used in case a
3229    temporary registers needs to be allocated to store a constant. */
3230 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3231 {
3232     int i, n;
3233 
3234     for (i = 0, n = s->nb_globals; i < n; i++) {
3235         temp_save(s, &s->temps[i], allocated_regs);
3236     }
3237 }
3238 
3239 /* sync globals to their canonical location and assume they can be
3240    read by the following code. 'allocated_regs' is used in case a
3241    temporary registers needs to be allocated to store a constant. */
3242 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3243 {
3244     int i, n;
3245 
3246     for (i = 0, n = s->nb_globals; i < n; i++) {
3247         TCGTemp *ts = &s->temps[i];
3248         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3249                          || ts->fixed_reg
3250                          || ts->mem_coherent);
3251     }
3252 }
3253 
3254 /* at the end of a basic block, we assume all temporaries are dead and
3255    all globals are stored at their canonical location. */
3256 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3257 {
3258     int i;
3259 
3260     for (i = s->nb_globals; i < s->nb_temps; i++) {
3261         TCGTemp *ts = &s->temps[i];
3262         if (ts->temp_local) {
3263             temp_save(s, ts, allocated_regs);
3264         } else {
3265             /* The liveness analysis already ensures that temps are dead.
3266                Keep an tcg_debug_assert for safety. */
3267             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3268         }
3269     }
3270 
3271     save_globals(s, allocated_regs);
3272 }
3273 
3274 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3275                                   tcg_target_ulong val, TCGLifeData arg_life,
3276                                   TCGRegSet preferred_regs)
3277 {
3278     if (ots->fixed_reg) {
3279         /* For fixed registers, we do not do any constant propagation.  */
3280         tcg_out_movi(s, ots->type, ots->reg, val);
3281         return;
3282     }
3283 
3284     /* The movi is not explicitly generated here.  */
3285     if (ots->val_type == TEMP_VAL_REG) {
3286         s->reg_to_temp[ots->reg] = NULL;
3287     }
3288     ots->val_type = TEMP_VAL_CONST;
3289     ots->val = val;
3290     ots->mem_coherent = 0;
3291     if (NEED_SYNC_ARG(0)) {
3292         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3293     } else if (IS_DEAD_ARG(0)) {
3294         temp_dead(s, ots);
3295     }
3296 }
3297 
3298 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3299 {
3300     TCGTemp *ots = arg_temp(op->args[0]);
3301     tcg_target_ulong val = op->args[1];
3302 
3303     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3304 }
3305 
3306 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3307 {
3308     const TCGLifeData arg_life = op->life;
3309     TCGRegSet allocated_regs, preferred_regs;
3310     TCGTemp *ts, *ots;
3311     TCGType otype, itype;
3312 
3313     allocated_regs = s->reserved_regs;
3314     preferred_regs = op->output_pref[0];
3315     ots = arg_temp(op->args[0]);
3316     ts = arg_temp(op->args[1]);
3317 
3318     /* Note that otype != itype for no-op truncation.  */
3319     otype = ots->type;
3320     itype = ts->type;
3321 
3322     if (ts->val_type == TEMP_VAL_CONST) {
3323         /* propagate constant or generate sti */
3324         tcg_target_ulong val = ts->val;
3325         if (IS_DEAD_ARG(1)) {
3326             temp_dead(s, ts);
3327         }
3328         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3329         return;
3330     }
3331 
3332     /* If the source value is in memory we're going to be forced
3333        to have it in a register in order to perform the copy.  Copy
3334        the SOURCE value into its own register first, that way we
3335        don't have to reload SOURCE the next time it is used. */
3336     if (ts->val_type == TEMP_VAL_MEM) {
3337         temp_load(s, ts, tcg_target_available_regs[itype],
3338                   allocated_regs, preferred_regs);
3339     }
3340 
3341     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3342     if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
3343         /* mov to a non-saved dead register makes no sense (even with
3344            liveness analysis disabled). */
3345         tcg_debug_assert(NEED_SYNC_ARG(0));
3346         if (!ots->mem_allocated) {
3347             temp_allocate_frame(s, ots);
3348         }
3349         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3350         if (IS_DEAD_ARG(1)) {
3351             temp_dead(s, ts);
3352         }
3353         temp_dead(s, ots);
3354     } else {
3355         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
3356             /* the mov can be suppressed */
3357             if (ots->val_type == TEMP_VAL_REG) {
3358                 s->reg_to_temp[ots->reg] = NULL;
3359             }
3360             ots->reg = ts->reg;
3361             temp_dead(s, ts);
3362         } else {
3363             if (ots->val_type != TEMP_VAL_REG) {
3364                 /* When allocating a new register, make sure to not spill the
3365                    input one. */
3366                 tcg_regset_set_reg(allocated_regs, ts->reg);
3367                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3368                                          allocated_regs, preferred_regs,
3369                                          ots->indirect_base);
3370             }
3371             tcg_out_mov(s, otype, ots->reg, ts->reg);
3372         }
3373         ots->val_type = TEMP_VAL_REG;
3374         ots->mem_coherent = 0;
3375         s->reg_to_temp[ots->reg] = ots;
3376         if (NEED_SYNC_ARG(0)) {
3377             temp_sync(s, ots, allocated_regs, 0, 0);
3378         }
3379     }
3380 }
3381 
3382 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3383 {
3384     const TCGLifeData arg_life = op->life;
3385     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3386     TCGRegSet i_allocated_regs;
3387     TCGRegSet o_allocated_regs;
3388     int i, k, nb_iargs, nb_oargs;
3389     TCGReg reg;
3390     TCGArg arg;
3391     const TCGArgConstraint *arg_ct;
3392     TCGTemp *ts;
3393     TCGArg new_args[TCG_MAX_OP_ARGS];
3394     int const_args[TCG_MAX_OP_ARGS];
3395 
3396     nb_oargs = def->nb_oargs;
3397     nb_iargs = def->nb_iargs;
3398 
3399     /* copy constants */
3400     memcpy(new_args + nb_oargs + nb_iargs,
3401            op->args + nb_oargs + nb_iargs,
3402            sizeof(TCGArg) * def->nb_cargs);
3403 
3404     i_allocated_regs = s->reserved_regs;
3405     o_allocated_regs = s->reserved_regs;
3406 
3407     /* satisfy input constraints */
3408     for (k = 0; k < nb_iargs; k++) {
3409         TCGRegSet i_preferred_regs, o_preferred_regs;
3410 
3411         i = def->sorted_args[nb_oargs + k];
3412         arg = op->args[i];
3413         arg_ct = &def->args_ct[i];
3414         ts = arg_temp(arg);
3415 
3416         if (ts->val_type == TEMP_VAL_CONST
3417             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3418             /* constant is OK for instruction */
3419             const_args[i] = 1;
3420             new_args[i] = ts->val;
3421             continue;
3422         }
3423 
3424         i_preferred_regs = o_preferred_regs = 0;
3425         if (arg_ct->ct & TCG_CT_IALIAS) {
3426             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3427             if (ts->fixed_reg) {
3428                 /* if fixed register, we must allocate a new register
3429                    if the alias is not the same register */
3430                 if (arg != op->args[arg_ct->alias_index]) {
3431                     goto allocate_in_reg;
3432                 }
3433             } else {
3434                 /* if the input is aliased to an output and if it is
3435                    not dead after the instruction, we must allocate
3436                    a new register and move it */
3437                 if (!IS_DEAD_ARG(i)) {
3438                     goto allocate_in_reg;
3439                 }
3440 
3441                 /* check if the current register has already been allocated
3442                    for another input aliased to an output */
3443                 if (ts->val_type == TEMP_VAL_REG) {
3444                     int k2, i2;
3445                     reg = ts->reg;
3446                     for (k2 = 0 ; k2 < k ; k2++) {
3447                         i2 = def->sorted_args[nb_oargs + k2];
3448                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3449                             reg == new_args[i2]) {
3450                             goto allocate_in_reg;
3451                         }
3452                     }
3453                 }
3454                 i_preferred_regs = o_preferred_regs;
3455             }
3456         }
3457 
3458         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3459         reg = ts->reg;
3460 
3461         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3462             /* nothing to do : the constraint is satisfied */
3463         } else {
3464         allocate_in_reg:
3465             /* allocate a new register matching the constraint
3466                and move the temporary register into it */
3467             temp_load(s, ts, tcg_target_available_regs[ts->type],
3468                       i_allocated_regs, 0);
3469             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3470                                 o_preferred_regs, ts->indirect_base);
3471             tcg_out_mov(s, ts->type, reg, ts->reg);
3472         }
3473         new_args[i] = reg;
3474         const_args[i] = 0;
3475         tcg_regset_set_reg(i_allocated_regs, reg);
3476     }
3477 
3478     /* mark dead temporaries and free the associated registers */
3479     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3480         if (IS_DEAD_ARG(i)) {
3481             temp_dead(s, arg_temp(op->args[i]));
3482         }
3483     }
3484 
3485     if (def->flags & TCG_OPF_BB_END) {
3486         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3487     } else {
3488         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3489             /* XXX: permit generic clobber register list ? */
3490             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3491                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3492                     tcg_reg_free(s, i, i_allocated_regs);
3493                 }
3494             }
3495         }
3496         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3497             /* sync globals if the op has side effects and might trigger
3498                an exception. */
3499             sync_globals(s, i_allocated_regs);
3500         }
3501 
3502         /* satisfy the output constraints */
3503         for(k = 0; k < nb_oargs; k++) {
3504             i = def->sorted_args[k];
3505             arg = op->args[i];
3506             arg_ct = &def->args_ct[i];
3507             ts = arg_temp(arg);
3508             if ((arg_ct->ct & TCG_CT_ALIAS)
3509                 && !const_args[arg_ct->alias_index]) {
3510                 reg = new_args[arg_ct->alias_index];
3511             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3512                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3513                                     i_allocated_regs | o_allocated_regs,
3514                                     op->output_pref[k], ts->indirect_base);
3515             } else {
3516                 /* if fixed register, we try to use it */
3517                 reg = ts->reg;
3518                 if (ts->fixed_reg &&
3519                     tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3520                     goto oarg_end;
3521                 }
3522                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3523                                     op->output_pref[k], ts->indirect_base);
3524             }
3525             tcg_regset_set_reg(o_allocated_regs, reg);
3526             /* if a fixed register is used, then a move will be done afterwards */
3527             if (!ts->fixed_reg) {
3528                 if (ts->val_type == TEMP_VAL_REG) {
3529                     s->reg_to_temp[ts->reg] = NULL;
3530                 }
3531                 ts->val_type = TEMP_VAL_REG;
3532                 ts->reg = reg;
3533                 /* temp value is modified, so the value kept in memory is
3534                    potentially not the same */
3535                 ts->mem_coherent = 0;
3536                 s->reg_to_temp[reg] = ts;
3537             }
3538         oarg_end:
3539             new_args[i] = reg;
3540         }
3541     }
3542 
3543     /* emit instruction */
3544     if (def->flags & TCG_OPF_VECTOR) {
3545         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3546                        new_args, const_args);
3547     } else {
3548         tcg_out_op(s, op->opc, new_args, const_args);
3549     }
3550 
3551     /* move the outputs in the correct register if needed */
3552     for(i = 0; i < nb_oargs; i++) {
3553         ts = arg_temp(op->args[i]);
3554         reg = new_args[i];
3555         if (ts->fixed_reg && ts->reg != reg) {
3556             tcg_out_mov(s, ts->type, ts->reg, reg);
3557         }
3558         if (NEED_SYNC_ARG(i)) {
3559             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3560         } else if (IS_DEAD_ARG(i)) {
3561             temp_dead(s, ts);
3562         }
3563     }
3564 }
3565 
3566 #ifdef TCG_TARGET_STACK_GROWSUP
3567 #define STACK_DIR(x) (-(x))
3568 #else
3569 #define STACK_DIR(x) (x)
3570 #endif
3571 
3572 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3573 {
3574     const int nb_oargs = TCGOP_CALLO(op);
3575     const int nb_iargs = TCGOP_CALLI(op);
3576     const TCGLifeData arg_life = op->life;
3577     int flags, nb_regs, i;
3578     TCGReg reg;
3579     TCGArg arg;
3580     TCGTemp *ts;
3581     intptr_t stack_offset;
3582     size_t call_stack_size;
3583     tcg_insn_unit *func_addr;
3584     int allocate_args;
3585     TCGRegSet allocated_regs;
3586 
3587     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3588     flags = op->args[nb_oargs + nb_iargs + 1];
3589 
3590     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3591     if (nb_regs > nb_iargs) {
3592         nb_regs = nb_iargs;
3593     }
3594 
3595     /* assign stack slots first */
3596     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3597     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3598         ~(TCG_TARGET_STACK_ALIGN - 1);
3599     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3600     if (allocate_args) {
3601         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3602            preallocate call stack */
3603         tcg_abort();
3604     }
3605 
3606     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3607     for (i = nb_regs; i < nb_iargs; i++) {
3608         arg = op->args[nb_oargs + i];
3609 #ifdef TCG_TARGET_STACK_GROWSUP
3610         stack_offset -= sizeof(tcg_target_long);
3611 #endif
3612         if (arg != TCG_CALL_DUMMY_ARG) {
3613             ts = arg_temp(arg);
3614             temp_load(s, ts, tcg_target_available_regs[ts->type],
3615                       s->reserved_regs, 0);
3616             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3617         }
3618 #ifndef TCG_TARGET_STACK_GROWSUP
3619         stack_offset += sizeof(tcg_target_long);
3620 #endif
3621     }
3622 
3623     /* assign input registers */
3624     allocated_regs = s->reserved_regs;
3625     for (i = 0; i < nb_regs; i++) {
3626         arg = op->args[nb_oargs + i];
3627         if (arg != TCG_CALL_DUMMY_ARG) {
3628             ts = arg_temp(arg);
3629             reg = tcg_target_call_iarg_regs[i];
3630 
3631             if (ts->val_type == TEMP_VAL_REG) {
3632                 if (ts->reg != reg) {
3633                     tcg_reg_free(s, reg, allocated_regs);
3634                     tcg_out_mov(s, ts->type, reg, ts->reg);
3635                 }
3636             } else {
3637                 TCGRegSet arg_set = 0;
3638 
3639                 tcg_reg_free(s, reg, allocated_regs);
3640                 tcg_regset_set_reg(arg_set, reg);
3641                 temp_load(s, ts, arg_set, allocated_regs, 0);
3642             }
3643 
3644             tcg_regset_set_reg(allocated_regs, reg);
3645         }
3646     }
3647 
3648     /* mark dead temporaries and free the associated registers */
3649     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3650         if (IS_DEAD_ARG(i)) {
3651             temp_dead(s, arg_temp(op->args[i]));
3652         }
3653     }
3654 
3655     /* clobber call registers */
3656     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3657         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3658             tcg_reg_free(s, i, allocated_regs);
3659         }
3660     }
3661 
3662     /* Save globals if they might be written by the helper, sync them if
3663        they might be read. */
3664     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3665         /* Nothing to do */
3666     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3667         sync_globals(s, allocated_regs);
3668     } else {
3669         save_globals(s, allocated_regs);
3670     }
3671 
3672     tcg_out_call(s, func_addr);
3673 
3674     /* assign output registers and emit moves if needed */
3675     for(i = 0; i < nb_oargs; i++) {
3676         arg = op->args[i];
3677         ts = arg_temp(arg);
3678         reg = tcg_target_call_oarg_regs[i];
3679         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3680 
3681         if (ts->fixed_reg) {
3682             if (ts->reg != reg) {
3683                 tcg_out_mov(s, ts->type, ts->reg, reg);
3684             }
3685         } else {
3686             if (ts->val_type == TEMP_VAL_REG) {
3687                 s->reg_to_temp[ts->reg] = NULL;
3688             }
3689             ts->val_type = TEMP_VAL_REG;
3690             ts->reg = reg;
3691             ts->mem_coherent = 0;
3692             s->reg_to_temp[reg] = ts;
3693             if (NEED_SYNC_ARG(i)) {
3694                 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3695             } else if (IS_DEAD_ARG(i)) {
3696                 temp_dead(s, ts);
3697             }
3698         }
3699     }
3700 }
3701 
3702 #ifdef CONFIG_PROFILER
3703 
3704 /* avoid copy/paste errors */
3705 #define PROF_ADD(to, from, field)                       \
3706     do {                                                \
3707         (to)->field += atomic_read(&((from)->field));   \
3708     } while (0)
3709 
3710 #define PROF_MAX(to, from, field)                                       \
3711     do {                                                                \
3712         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3713         if (val__ > (to)->field) {                                      \
3714             (to)->field = val__;                                        \
3715         }                                                               \
3716     } while (0)
3717 
3718 /* Pass in a zero'ed @prof */
3719 static inline
3720 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3721 {
3722     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3723     unsigned int i;
3724 
3725     for (i = 0; i < n_ctxs; i++) {
3726         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3727         const TCGProfile *orig = &s->prof;
3728 
3729         if (counters) {
3730             PROF_ADD(prof, orig, cpu_exec_time);
3731             PROF_ADD(prof, orig, tb_count1);
3732             PROF_ADD(prof, orig, tb_count);
3733             PROF_ADD(prof, orig, op_count);
3734             PROF_MAX(prof, orig, op_count_max);
3735             PROF_ADD(prof, orig, temp_count);
3736             PROF_MAX(prof, orig, temp_count_max);
3737             PROF_ADD(prof, orig, del_op_count);
3738             PROF_ADD(prof, orig, code_in_len);
3739             PROF_ADD(prof, orig, code_out_len);
3740             PROF_ADD(prof, orig, search_out_len);
3741             PROF_ADD(prof, orig, interm_time);
3742             PROF_ADD(prof, orig, code_time);
3743             PROF_ADD(prof, orig, la_time);
3744             PROF_ADD(prof, orig, opt_time);
3745             PROF_ADD(prof, orig, restore_count);
3746             PROF_ADD(prof, orig, restore_time);
3747         }
3748         if (table) {
3749             int i;
3750 
3751             for (i = 0; i < NB_OPS; i++) {
3752                 PROF_ADD(prof, orig, table_op_count[i]);
3753             }
3754         }
3755     }
3756 }
3757 
3758 #undef PROF_ADD
3759 #undef PROF_MAX
3760 
3761 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3762 {
3763     tcg_profile_snapshot(prof, true, false);
3764 }
3765 
3766 static void tcg_profile_snapshot_table(TCGProfile *prof)
3767 {
3768     tcg_profile_snapshot(prof, false, true);
3769 }
3770 
3771 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3772 {
3773     TCGProfile prof = {};
3774     int i;
3775 
3776     tcg_profile_snapshot_table(&prof);
3777     for (i = 0; i < NB_OPS; i++) {
3778         cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3779                     prof.table_op_count[i]);
3780     }
3781 }
3782 
3783 int64_t tcg_cpu_exec_time(void)
3784 {
3785     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3786     unsigned int i;
3787     int64_t ret = 0;
3788 
3789     for (i = 0; i < n_ctxs; i++) {
3790         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3791         const TCGProfile *prof = &s->prof;
3792 
3793         ret += atomic_read(&prof->cpu_exec_time);
3794     }
3795     return ret;
3796 }
3797 #else
3798 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3799 {
3800     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3801 }
3802 
3803 int64_t tcg_cpu_exec_time(void)
3804 {
3805     error_report("%s: TCG profiler not compiled", __func__);
3806     exit(EXIT_FAILURE);
3807 }
3808 #endif
3809 
3810 
3811 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3812 {
3813 #ifdef CONFIG_PROFILER
3814     TCGProfile *prof = &s->prof;
3815 #endif
3816     int i, num_insns;
3817     TCGOp *op;
3818 
3819 #ifdef CONFIG_PROFILER
3820     {
3821         int n = 0;
3822 
3823         QTAILQ_FOREACH(op, &s->ops, link) {
3824             n++;
3825         }
3826         atomic_set(&prof->op_count, prof->op_count + n);
3827         if (n > prof->op_count_max) {
3828             atomic_set(&prof->op_count_max, n);
3829         }
3830 
3831         n = s->nb_temps;
3832         atomic_set(&prof->temp_count, prof->temp_count + n);
3833         if (n > prof->temp_count_max) {
3834             atomic_set(&prof->temp_count_max, n);
3835         }
3836     }
3837 #endif
3838 
3839 #ifdef DEBUG_DISAS
3840     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3841                  && qemu_log_in_addr_range(tb->pc))) {
3842         qemu_log_lock();
3843         qemu_log("OP:\n");
3844         tcg_dump_ops(s, false);
3845         qemu_log("\n");
3846         qemu_log_unlock();
3847     }
3848 #endif
3849 
3850 #ifdef CONFIG_DEBUG_TCG
3851     /* Ensure all labels referenced have been emitted.  */
3852     {
3853         TCGLabel *l;
3854         bool error = false;
3855 
3856         QSIMPLEQ_FOREACH(l, &s->labels, next) {
3857             if (unlikely(!l->present) && l->refs) {
3858                 qemu_log_mask(CPU_LOG_TB_OP,
3859                               "$L%d referenced but not present.\n", l->id);
3860                 error = true;
3861             }
3862         }
3863         assert(!error);
3864     }
3865 #endif
3866 
3867 #ifdef CONFIG_PROFILER
3868     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3869 #endif
3870 
3871 #ifdef USE_TCG_OPTIMIZATIONS
3872     tcg_optimize(s);
3873 #endif
3874 
3875 #ifdef CONFIG_PROFILER
3876     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3877     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3878 #endif
3879 
3880     reachable_code_pass(s);
3881     liveness_pass_1(s);
3882 
3883     if (s->nb_indirects > 0) {
3884 #ifdef DEBUG_DISAS
3885         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3886                      && qemu_log_in_addr_range(tb->pc))) {
3887             qemu_log_lock();
3888             qemu_log("OP before indirect lowering:\n");
3889             tcg_dump_ops(s, false);
3890             qemu_log("\n");
3891             qemu_log_unlock();
3892         }
3893 #endif
3894         /* Replace indirect temps with direct temps.  */
3895         if (liveness_pass_2(s)) {
3896             /* If changes were made, re-run liveness.  */
3897             liveness_pass_1(s);
3898         }
3899     }
3900 
3901 #ifdef CONFIG_PROFILER
3902     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3903 #endif
3904 
3905 #ifdef DEBUG_DISAS
3906     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3907                  && qemu_log_in_addr_range(tb->pc))) {
3908         qemu_log_lock();
3909         qemu_log("OP after optimization and liveness analysis:\n");
3910         tcg_dump_ops(s, true);
3911         qemu_log("\n");
3912         qemu_log_unlock();
3913     }
3914 #endif
3915 
3916     tcg_reg_alloc_start(s);
3917 
3918     s->code_buf = tb->tc.ptr;
3919     s->code_ptr = tb->tc.ptr;
3920 
3921 #ifdef TCG_TARGET_NEED_LDST_LABELS
3922     QSIMPLEQ_INIT(&s->ldst_labels);
3923 #endif
3924 #ifdef TCG_TARGET_NEED_POOL_LABELS
3925     s->pool_labels = NULL;
3926 #endif
3927 
3928     num_insns = -1;
3929     QTAILQ_FOREACH(op, &s->ops, link) {
3930         TCGOpcode opc = op->opc;
3931 
3932 #ifdef CONFIG_PROFILER
3933         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3934 #endif
3935 
3936         switch (opc) {
3937         case INDEX_op_mov_i32:
3938         case INDEX_op_mov_i64:
3939         case INDEX_op_mov_vec:
3940             tcg_reg_alloc_mov(s, op);
3941             break;
3942         case INDEX_op_movi_i32:
3943         case INDEX_op_movi_i64:
3944         case INDEX_op_dupi_vec:
3945             tcg_reg_alloc_movi(s, op);
3946             break;
3947         case INDEX_op_insn_start:
3948             if (num_insns >= 0) {
3949                 size_t off = tcg_current_code_size(s);
3950                 s->gen_insn_end_off[num_insns] = off;
3951                 /* Assert that we do not overflow our stored offset.  */
3952                 assert(s->gen_insn_end_off[num_insns] == off);
3953             }
3954             num_insns++;
3955             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3956                 target_ulong a;
3957 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3958                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3959 #else
3960                 a = op->args[i];
3961 #endif
3962                 s->gen_insn_data[num_insns][i] = a;
3963             }
3964             break;
3965         case INDEX_op_discard:
3966             temp_dead(s, arg_temp(op->args[0]));
3967             break;
3968         case INDEX_op_set_label:
3969             tcg_reg_alloc_bb_end(s, s->reserved_regs);
3970             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3971             break;
3972         case INDEX_op_call:
3973             tcg_reg_alloc_call(s, op);
3974             break;
3975         default:
3976             /* Sanity check that we've not introduced any unhandled opcodes. */
3977             tcg_debug_assert(tcg_op_supported(opc));
3978             /* Note: in order to speed up the code, it would be much
3979                faster to have specialized register allocator functions for
3980                some common argument patterns */
3981             tcg_reg_alloc_op(s, op);
3982             break;
3983         }
3984 #ifdef CONFIG_DEBUG_TCG
3985         check_regs(s);
3986 #endif
3987         /* Test for (pending) buffer overflow.  The assumption is that any
3988            one operation beginning below the high water mark cannot overrun
3989            the buffer completely.  Thus we can test for overflow after
3990            generating code without having to check during generation.  */
3991         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3992             return -1;
3993         }
3994     }
3995     tcg_debug_assert(num_insns >= 0);
3996     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3997 
3998     /* Generate TB finalization at the end of block */
3999 #ifdef TCG_TARGET_NEED_LDST_LABELS
4000     if (!tcg_out_ldst_finalize(s)) {
4001         return -1;
4002     }
4003 #endif
4004 #ifdef TCG_TARGET_NEED_POOL_LABELS
4005     if (!tcg_out_pool_finalize(s)) {
4006         return -1;
4007     }
4008 #endif
4009 
4010     /* flush instruction cache */
4011     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4012 
4013     return tcg_current_code_size(s);
4014 }
4015 
4016 #ifdef CONFIG_PROFILER
4017 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
4018 {
4019     TCGProfile prof = {};
4020     const TCGProfile *s;
4021     int64_t tb_count;
4022     int64_t tb_div_count;
4023     int64_t tot;
4024 
4025     tcg_profile_snapshot_counters(&prof);
4026     s = &prof;
4027     tb_count = s->tb_count;
4028     tb_div_count = tb_count ? tb_count : 1;
4029     tot = s->interm_time + s->code_time;
4030 
4031     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4032                 tot, tot / 2.4e9);
4033     cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
4034                 tb_count, s->tb_count1 - tb_count,
4035                 (double)(s->tb_count1 - s->tb_count)
4036                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4037     cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n",
4038                 (double)s->op_count / tb_div_count, s->op_count_max);
4039     cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
4040                 (double)s->del_op_count / tb_div_count);
4041     cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
4042                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4043     cpu_fprintf(f, "avg host code/TB    %0.1f\n",
4044                 (double)s->code_out_len / tb_div_count);
4045     cpu_fprintf(f, "avg search data/TB  %0.1f\n",
4046                 (double)s->search_out_len / tb_div_count);
4047 
4048     cpu_fprintf(f, "cycles/op           %0.1f\n",
4049                 s->op_count ? (double)tot / s->op_count : 0);
4050     cpu_fprintf(f, "cycles/in byte      %0.1f\n",
4051                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4052     cpu_fprintf(f, "cycles/out byte     %0.1f\n",
4053                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4054     cpu_fprintf(f, "cycles/search byte     %0.1f\n",
4055                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4056     if (tot == 0) {
4057         tot = 1;
4058     }
4059     cpu_fprintf(f, "  gen_interm time   %0.1f%%\n",
4060                 (double)s->interm_time / tot * 100.0);
4061     cpu_fprintf(f, "  gen_code time     %0.1f%%\n",
4062                 (double)s->code_time / tot * 100.0);
4063     cpu_fprintf(f, "optim./code time    %0.1f%%\n",
4064                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4065                 * 100.0);
4066     cpu_fprintf(f, "liveness/code time  %0.1f%%\n",
4067                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4068     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
4069                 s->restore_count);
4070     cpu_fprintf(f, "  avg cycles        %0.1f\n",
4071                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4072 }
4073 #else
4074 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
4075 {
4076     cpu_fprintf(f, "[TCG profiler not compiled]\n");
4077 }
4078 #endif
4079 
4080 #ifdef ELF_HOST_MACHINE
4081 /* In order to use this feature, the backend needs to do three things:
4082 
4083    (1) Define ELF_HOST_MACHINE to indicate both what value to
4084        put into the ELF image and to indicate support for the feature.
4085 
4086    (2) Define tcg_register_jit.  This should create a buffer containing
4087        the contents of a .debug_frame section that describes the post-
4088        prologue unwind info for the tcg machine.
4089 
4090    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4091 */
4092 
4093 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4094 typedef enum {
4095     JIT_NOACTION = 0,
4096     JIT_REGISTER_FN,
4097     JIT_UNREGISTER_FN
4098 } jit_actions_t;
4099 
4100 struct jit_code_entry {
4101     struct jit_code_entry *next_entry;
4102     struct jit_code_entry *prev_entry;
4103     const void *symfile_addr;
4104     uint64_t symfile_size;
4105 };
4106 
4107 struct jit_descriptor {
4108     uint32_t version;
4109     uint32_t action_flag;
4110     struct jit_code_entry *relevant_entry;
4111     struct jit_code_entry *first_entry;
4112 };
4113 
4114 void __jit_debug_register_code(void) __attribute__((noinline));
4115 void __jit_debug_register_code(void)
4116 {
4117     asm("");
4118 }
4119 
4120 /* Must statically initialize the version, because GDB may check
4121    the version before we can set it.  */
4122 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4123 
4124 /* End GDB interface.  */
4125 
4126 static int find_string(const char *strtab, const char *str)
4127 {
4128     const char *p = strtab + 1;
4129 
4130     while (1) {
4131         if (strcmp(p, str) == 0) {
4132             return p - strtab;
4133         }
4134         p += strlen(p) + 1;
4135     }
4136 }
4137 
4138 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4139                                  const void *debug_frame,
4140                                  size_t debug_frame_size)
4141 {
4142     struct __attribute__((packed)) DebugInfo {
4143         uint32_t  len;
4144         uint16_t  version;
4145         uint32_t  abbrev;
4146         uint8_t   ptr_size;
4147         uint8_t   cu_die;
4148         uint16_t  cu_lang;
4149         uintptr_t cu_low_pc;
4150         uintptr_t cu_high_pc;
4151         uint8_t   fn_die;
4152         char      fn_name[16];
4153         uintptr_t fn_low_pc;
4154         uintptr_t fn_high_pc;
4155         uint8_t   cu_eoc;
4156     };
4157 
4158     struct ElfImage {
4159         ElfW(Ehdr) ehdr;
4160         ElfW(Phdr) phdr;
4161         ElfW(Shdr) shdr[7];
4162         ElfW(Sym)  sym[2];
4163         struct DebugInfo di;
4164         uint8_t    da[24];
4165         char       str[80];
4166     };
4167 
4168     struct ElfImage *img;
4169 
4170     static const struct ElfImage img_template = {
4171         .ehdr = {
4172             .e_ident[EI_MAG0] = ELFMAG0,
4173             .e_ident[EI_MAG1] = ELFMAG1,
4174             .e_ident[EI_MAG2] = ELFMAG2,
4175             .e_ident[EI_MAG3] = ELFMAG3,
4176             .e_ident[EI_CLASS] = ELF_CLASS,
4177             .e_ident[EI_DATA] = ELF_DATA,
4178             .e_ident[EI_VERSION] = EV_CURRENT,
4179             .e_type = ET_EXEC,
4180             .e_machine = ELF_HOST_MACHINE,
4181             .e_version = EV_CURRENT,
4182             .e_phoff = offsetof(struct ElfImage, phdr),
4183             .e_shoff = offsetof(struct ElfImage, shdr),
4184             .e_ehsize = sizeof(ElfW(Shdr)),
4185             .e_phentsize = sizeof(ElfW(Phdr)),
4186             .e_phnum = 1,
4187             .e_shentsize = sizeof(ElfW(Shdr)),
4188             .e_shnum = ARRAY_SIZE(img->shdr),
4189             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4190 #ifdef ELF_HOST_FLAGS
4191             .e_flags = ELF_HOST_FLAGS,
4192 #endif
4193 #ifdef ELF_OSABI
4194             .e_ident[EI_OSABI] = ELF_OSABI,
4195 #endif
4196         },
4197         .phdr = {
4198             .p_type = PT_LOAD,
4199             .p_flags = PF_X,
4200         },
4201         .shdr = {
4202             [0] = { .sh_type = SHT_NULL },
4203             /* Trick: The contents of code_gen_buffer are not present in
4204                this fake ELF file; that got allocated elsewhere.  Therefore
4205                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4206                will not look for contents.  We can record any address.  */
4207             [1] = { /* .text */
4208                 .sh_type = SHT_NOBITS,
4209                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4210             },
4211             [2] = { /* .debug_info */
4212                 .sh_type = SHT_PROGBITS,
4213                 .sh_offset = offsetof(struct ElfImage, di),
4214                 .sh_size = sizeof(struct DebugInfo),
4215             },
4216             [3] = { /* .debug_abbrev */
4217                 .sh_type = SHT_PROGBITS,
4218                 .sh_offset = offsetof(struct ElfImage, da),
4219                 .sh_size = sizeof(img->da),
4220             },
4221             [4] = { /* .debug_frame */
4222                 .sh_type = SHT_PROGBITS,
4223                 .sh_offset = sizeof(struct ElfImage),
4224             },
4225             [5] = { /* .symtab */
4226                 .sh_type = SHT_SYMTAB,
4227                 .sh_offset = offsetof(struct ElfImage, sym),
4228                 .sh_size = sizeof(img->sym),
4229                 .sh_info = 1,
4230                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4231                 .sh_entsize = sizeof(ElfW(Sym)),
4232             },
4233             [6] = { /* .strtab */
4234                 .sh_type = SHT_STRTAB,
4235                 .sh_offset = offsetof(struct ElfImage, str),
4236                 .sh_size = sizeof(img->str),
4237             }
4238         },
4239         .sym = {
4240             [1] = { /* code_gen_buffer */
4241                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4242                 .st_shndx = 1,
4243             }
4244         },
4245         .di = {
4246             .len = sizeof(struct DebugInfo) - 4,
4247             .version = 2,
4248             .ptr_size = sizeof(void *),
4249             .cu_die = 1,
4250             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4251             .fn_die = 2,
4252             .fn_name = "code_gen_buffer"
4253         },
4254         .da = {
4255             1,          /* abbrev number (the cu) */
4256             0x11, 1,    /* DW_TAG_compile_unit, has children */
4257             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4258             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4259             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4260             0, 0,       /* end of abbrev */
4261             2,          /* abbrev number (the fn) */
4262             0x2e, 0,    /* DW_TAG_subprogram, no children */
4263             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4264             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4265             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4266             0, 0,       /* end of abbrev */
4267             0           /* no more abbrev */
4268         },
4269         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4270                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4271     };
4272 
4273     /* We only need a single jit entry; statically allocate it.  */
4274     static struct jit_code_entry one_entry;
4275 
4276     uintptr_t buf = (uintptr_t)buf_ptr;
4277     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4278     DebugFrameHeader *dfh;
4279 
4280     img = g_malloc(img_size);
4281     *img = img_template;
4282 
4283     img->phdr.p_vaddr = buf;
4284     img->phdr.p_paddr = buf;
4285     img->phdr.p_memsz = buf_size;
4286 
4287     img->shdr[1].sh_name = find_string(img->str, ".text");
4288     img->shdr[1].sh_addr = buf;
4289     img->shdr[1].sh_size = buf_size;
4290 
4291     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4292     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4293 
4294     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4295     img->shdr[4].sh_size = debug_frame_size;
4296 
4297     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4298     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4299 
4300     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4301     img->sym[1].st_value = buf;
4302     img->sym[1].st_size = buf_size;
4303 
4304     img->di.cu_low_pc = buf;
4305     img->di.cu_high_pc = buf + buf_size;
4306     img->di.fn_low_pc = buf;
4307     img->di.fn_high_pc = buf + buf_size;
4308 
4309     dfh = (DebugFrameHeader *)(img + 1);
4310     memcpy(dfh, debug_frame, debug_frame_size);
4311     dfh->fde.func_start = buf;
4312     dfh->fde.func_len = buf_size;
4313 
4314 #ifdef DEBUG_JIT
4315     /* Enable this block to be able to debug the ELF image file creation.
4316        One can use readelf, objdump, or other inspection utilities.  */
4317     {
4318         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4319         if (f) {
4320             if (fwrite(img, img_size, 1, f) != img_size) {
4321                 /* Avoid stupid unused return value warning for fwrite.  */
4322             }
4323             fclose(f);
4324         }
4325     }
4326 #endif
4327 
4328     one_entry.symfile_addr = img;
4329     one_entry.symfile_size = img_size;
4330 
4331     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4332     __jit_debug_descriptor.relevant_entry = &one_entry;
4333     __jit_debug_descriptor.first_entry = &one_entry;
4334     __jit_debug_register_code();
4335 }
4336 #else
4337 /* No support for the feature.  Provide the entry point expected by exec.c,
4338    and implement the internal function we declared earlier.  */
4339 
4340 static void tcg_register_jit_int(void *buf, size_t size,
4341                                  const void *debug_frame,
4342                                  size_t debug_frame_size)
4343 {
4344 }
4345 
4346 void tcg_register_jit(void *buf, size_t buf_size)
4347 {
4348 }
4349 #endif /* ELF_HOST_MACHINE */
4350 
4351 #if !TCG_TARGET_MAYBE_vec
4352 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4353 {
4354     g_assert_not_reached();
4355 }
4356 #endif
4357