xref: /openbmc/qemu/tcg/tcg.c (revision 200280af0e19bfaeb9431eb0ee1ee2d8bf8d3a0a)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/timer.h"
37 
38 /* Note: the long term plan is to reduce the dependencies on the QEMU
39    CPU definitions. Currently they are used for qemu_ld/st
40    instructions */
41 #define NO_CPU_IO_DEFS
42 #include "cpu.h"
43 
44 #include "exec/cpu-common.h"
45 #include "exec/exec-all.h"
46 
47 #include "tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #ifdef HOST_WORDS_BIGENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "sysemu/sysemu.h"
63 
64 /* Forward declarations for functions declared in tcg-target.inc.c and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
68 static void tcg_target_qemu_prologue(TCGContext *s);
69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
70                         intptr_t value, intptr_t addend);
71 
72 /* The CIE and FDE header definitions will be common to all hosts.  */
73 typedef struct {
74     uint32_t len __attribute__((aligned((sizeof(void *)))));
75     uint32_t id;
76     uint8_t version;
77     char augmentation[1];
78     uint8_t code_align;
79     uint8_t data_align;
80     uint8_t return_column;
81 } DebugFrameCIE;
82 
83 typedef struct QEMU_PACKED {
84     uint32_t len __attribute__((aligned((sizeof(void *)))));
85     uint32_t cie_offset;
86     uintptr_t func_start;
87     uintptr_t func_len;
88 } DebugFrameFDEHeader;
89 
90 typedef struct QEMU_PACKED {
91     DebugFrameCIE cie;
92     DebugFrameFDEHeader fde;
93 } DebugFrameHeader;
94 
95 static void tcg_register_jit_int(void *buf, size_t size,
96                                  const void *debug_frame,
97                                  size_t debug_frame_size)
98     __attribute__((unused));
99 
100 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
101 static const char *target_parse_constraint(TCGArgConstraint *ct,
102                                            const char *ct_str, TCGType type);
103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
104                        intptr_t arg2);
105 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106 static void tcg_out_movi(TCGContext *s, TCGType type,
107                          TCGReg ret, tcg_target_long arg);
108 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
109                        const int *const_args);
110 #if TCG_TARGET_MAYBE_vec
111 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
112                            unsigned vece, const TCGArg *args,
113                            const int *const_args);
114 #else
115 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
116                                   unsigned vece, const TCGArg *args,
117                                   const int *const_args)
118 {
119     g_assert_not_reached();
120 }
121 #endif
122 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
123                        intptr_t arg2);
124 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
125                         TCGReg base, intptr_t ofs);
126 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
127 static int tcg_target_const_match(tcg_target_long val, TCGType type,
128                                   const TCGArgConstraint *arg_ct);
129 #ifdef TCG_TARGET_NEED_LDST_LABELS
130 static bool tcg_out_ldst_finalize(TCGContext *s);
131 #endif
132 
133 #define TCG_HIGHWATER 1024
134 
135 static TCGContext **tcg_ctxs;
136 static unsigned int n_tcg_ctxs;
137 TCGv_env cpu_env = 0;
138 
139 struct tcg_region_tree {
140     QemuMutex lock;
141     GTree *tree;
142     /* padding to avoid false sharing is computed at run-time */
143 };
144 
145 /*
146  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
147  * dynamically allocate from as demand dictates. Given appropriate region
148  * sizing, this minimizes flushes even when some TCG threads generate a lot
149  * more code than others.
150  */
151 struct tcg_region_state {
152     QemuMutex lock;
153 
154     /* fields set at init time */
155     void *start;
156     void *start_aligned;
157     void *end;
158     size_t n;
159     size_t size; /* size of one region */
160     size_t stride; /* .size + guard size */
161 
162     /* fields protected by the lock */
163     size_t current; /* current region index */
164     size_t agg_size_full; /* aggregate size of full regions */
165 };
166 
167 static struct tcg_region_state region;
168 /*
169  * This is an array of struct tcg_region_tree's, with padding.
170  * We use void * to simplify the computation of region_trees[i]; each
171  * struct is found every tree_size bytes.
172  */
173 static void *region_trees;
174 static size_t tree_size;
175 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
176 static TCGRegSet tcg_target_call_clobber_regs;
177 
178 #if TCG_TARGET_INSN_UNIT_SIZE == 1
179 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
180 {
181     *s->code_ptr++ = v;
182 }
183 
184 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
185                                                       uint8_t v)
186 {
187     *p = v;
188 }
189 #endif
190 
191 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
192 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
193 {
194     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
195         *s->code_ptr++ = v;
196     } else {
197         tcg_insn_unit *p = s->code_ptr;
198         memcpy(p, &v, sizeof(v));
199         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
200     }
201 }
202 
203 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
204                                                        uint16_t v)
205 {
206     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
207         *p = v;
208     } else {
209         memcpy(p, &v, sizeof(v));
210     }
211 }
212 #endif
213 
214 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
215 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
216 {
217     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
218         *s->code_ptr++ = v;
219     } else {
220         tcg_insn_unit *p = s->code_ptr;
221         memcpy(p, &v, sizeof(v));
222         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
223     }
224 }
225 
226 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
227                                                        uint32_t v)
228 {
229     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
230         *p = v;
231     } else {
232         memcpy(p, &v, sizeof(v));
233     }
234 }
235 #endif
236 
237 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
238 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
239 {
240     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
241         *s->code_ptr++ = v;
242     } else {
243         tcg_insn_unit *p = s->code_ptr;
244         memcpy(p, &v, sizeof(v));
245         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
246     }
247 }
248 
249 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
250                                                        uint64_t v)
251 {
252     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
253         *p = v;
254     } else {
255         memcpy(p, &v, sizeof(v));
256     }
257 }
258 #endif
259 
260 /* label relocation processing */
261 
262 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
263                           TCGLabel *l, intptr_t addend)
264 {
265     TCGRelocation *r;
266 
267     if (l->has_value) {
268         /* FIXME: This may break relocations on RISC targets that
269            modify instruction fields in place.  The caller may not have
270            written the initial value.  */
271         bool ok = patch_reloc(code_ptr, type, l->u.value, addend);
272         tcg_debug_assert(ok);
273     } else {
274         /* add a new relocation entry */
275         r = tcg_malloc(sizeof(TCGRelocation));
276         r->type = type;
277         r->ptr = code_ptr;
278         r->addend = addend;
279         r->next = l->u.first_reloc;
280         l->u.first_reloc = r;
281     }
282 }
283 
284 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
285 {
286     intptr_t value = (intptr_t)ptr;
287     TCGRelocation *r;
288 
289     tcg_debug_assert(!l->has_value);
290 
291     for (r = l->u.first_reloc; r != NULL; r = r->next) {
292         bool ok = patch_reloc(r->ptr, r->type, value, r->addend);
293         tcg_debug_assert(ok);
294     }
295 
296     l->has_value = 1;
297     l->u.value_ptr = ptr;
298 }
299 
300 TCGLabel *gen_new_label(void)
301 {
302     TCGContext *s = tcg_ctx;
303     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
304 
305     *l = (TCGLabel){
306         .id = s->nb_labels++
307     };
308 
309     return l;
310 }
311 
312 static void set_jmp_reset_offset(TCGContext *s, int which)
313 {
314     size_t off = tcg_current_code_size(s);
315     s->tb_jmp_reset_offset[which] = off;
316     /* Make sure that we didn't overflow the stored offset.  */
317     assert(s->tb_jmp_reset_offset[which] == off);
318 }
319 
320 #include "tcg-target.inc.c"
321 
322 /* compare a pointer @ptr and a tb_tc @s */
323 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
324 {
325     if (ptr >= s->ptr + s->size) {
326         return 1;
327     } else if (ptr < s->ptr) {
328         return -1;
329     }
330     return 0;
331 }
332 
333 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
334 {
335     const struct tb_tc *a = ap;
336     const struct tb_tc *b = bp;
337 
338     /*
339      * When both sizes are set, we know this isn't a lookup.
340      * This is the most likely case: every TB must be inserted; lookups
341      * are a lot less frequent.
342      */
343     if (likely(a->size && b->size)) {
344         if (a->ptr > b->ptr) {
345             return 1;
346         } else if (a->ptr < b->ptr) {
347             return -1;
348         }
349         /* a->ptr == b->ptr should happen only on deletions */
350         g_assert(a->size == b->size);
351         return 0;
352     }
353     /*
354      * All lookups have either .size field set to 0.
355      * From the glib sources we see that @ap is always the lookup key. However
356      * the docs provide no guarantee, so we just mark this case as likely.
357      */
358     if (likely(a->size == 0)) {
359         return ptr_cmp_tb_tc(a->ptr, b);
360     }
361     return ptr_cmp_tb_tc(b->ptr, a);
362 }
363 
364 static void tcg_region_trees_init(void)
365 {
366     size_t i;
367 
368     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
369     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
370     for (i = 0; i < region.n; i++) {
371         struct tcg_region_tree *rt = region_trees + i * tree_size;
372 
373         qemu_mutex_init(&rt->lock);
374         rt->tree = g_tree_new(tb_tc_cmp);
375     }
376 }
377 
378 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
379 {
380     size_t region_idx;
381 
382     if (p < region.start_aligned) {
383         region_idx = 0;
384     } else {
385         ptrdiff_t offset = p - region.start_aligned;
386 
387         if (offset > region.stride * (region.n - 1)) {
388             region_idx = region.n - 1;
389         } else {
390             region_idx = offset / region.stride;
391         }
392     }
393     return region_trees + region_idx * tree_size;
394 }
395 
396 void tcg_tb_insert(TranslationBlock *tb)
397 {
398     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
399 
400     qemu_mutex_lock(&rt->lock);
401     g_tree_insert(rt->tree, &tb->tc, tb);
402     qemu_mutex_unlock(&rt->lock);
403 }
404 
405 void tcg_tb_remove(TranslationBlock *tb)
406 {
407     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
408 
409     qemu_mutex_lock(&rt->lock);
410     g_tree_remove(rt->tree, &tb->tc);
411     qemu_mutex_unlock(&rt->lock);
412 }
413 
414 /*
415  * Find the TB 'tb' such that
416  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
417  * Return NULL if not found.
418  */
419 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
420 {
421     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
422     TranslationBlock *tb;
423     struct tb_tc s = { .ptr = (void *)tc_ptr };
424 
425     qemu_mutex_lock(&rt->lock);
426     tb = g_tree_lookup(rt->tree, &s);
427     qemu_mutex_unlock(&rt->lock);
428     return tb;
429 }
430 
431 static void tcg_region_tree_lock_all(void)
432 {
433     size_t i;
434 
435     for (i = 0; i < region.n; i++) {
436         struct tcg_region_tree *rt = region_trees + i * tree_size;
437 
438         qemu_mutex_lock(&rt->lock);
439     }
440 }
441 
442 static void tcg_region_tree_unlock_all(void)
443 {
444     size_t i;
445 
446     for (i = 0; i < region.n; i++) {
447         struct tcg_region_tree *rt = region_trees + i * tree_size;
448 
449         qemu_mutex_unlock(&rt->lock);
450     }
451 }
452 
453 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
454 {
455     size_t i;
456 
457     tcg_region_tree_lock_all();
458     for (i = 0; i < region.n; i++) {
459         struct tcg_region_tree *rt = region_trees + i * tree_size;
460 
461         g_tree_foreach(rt->tree, func, user_data);
462     }
463     tcg_region_tree_unlock_all();
464 }
465 
466 size_t tcg_nb_tbs(void)
467 {
468     size_t nb_tbs = 0;
469     size_t i;
470 
471     tcg_region_tree_lock_all();
472     for (i = 0; i < region.n; i++) {
473         struct tcg_region_tree *rt = region_trees + i * tree_size;
474 
475         nb_tbs += g_tree_nnodes(rt->tree);
476     }
477     tcg_region_tree_unlock_all();
478     return nb_tbs;
479 }
480 
481 static void tcg_region_tree_reset_all(void)
482 {
483     size_t i;
484 
485     tcg_region_tree_lock_all();
486     for (i = 0; i < region.n; i++) {
487         struct tcg_region_tree *rt = region_trees + i * tree_size;
488 
489         /* Increment the refcount first so that destroy acts as a reset */
490         g_tree_ref(rt->tree);
491         g_tree_destroy(rt->tree);
492     }
493     tcg_region_tree_unlock_all();
494 }
495 
496 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
497 {
498     void *start, *end;
499 
500     start = region.start_aligned + curr_region * region.stride;
501     end = start + region.size;
502 
503     if (curr_region == 0) {
504         start = region.start;
505     }
506     if (curr_region == region.n - 1) {
507         end = region.end;
508     }
509 
510     *pstart = start;
511     *pend = end;
512 }
513 
514 static void tcg_region_assign(TCGContext *s, size_t curr_region)
515 {
516     void *start, *end;
517 
518     tcg_region_bounds(curr_region, &start, &end);
519 
520     s->code_gen_buffer = start;
521     s->code_gen_ptr = start;
522     s->code_gen_buffer_size = end - start;
523     s->code_gen_highwater = end - TCG_HIGHWATER;
524 }
525 
526 static bool tcg_region_alloc__locked(TCGContext *s)
527 {
528     if (region.current == region.n) {
529         return true;
530     }
531     tcg_region_assign(s, region.current);
532     region.current++;
533     return false;
534 }
535 
536 /*
537  * Request a new region once the one in use has filled up.
538  * Returns true on error.
539  */
540 static bool tcg_region_alloc(TCGContext *s)
541 {
542     bool err;
543     /* read the region size now; alloc__locked will overwrite it on success */
544     size_t size_full = s->code_gen_buffer_size;
545 
546     qemu_mutex_lock(&region.lock);
547     err = tcg_region_alloc__locked(s);
548     if (!err) {
549         region.agg_size_full += size_full - TCG_HIGHWATER;
550     }
551     qemu_mutex_unlock(&region.lock);
552     return err;
553 }
554 
555 /*
556  * Perform a context's first region allocation.
557  * This function does _not_ increment region.agg_size_full.
558  */
559 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
560 {
561     return tcg_region_alloc__locked(s);
562 }
563 
564 /* Call from a safe-work context */
565 void tcg_region_reset_all(void)
566 {
567     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
568     unsigned int i;
569 
570     qemu_mutex_lock(&region.lock);
571     region.current = 0;
572     region.agg_size_full = 0;
573 
574     for (i = 0; i < n_ctxs; i++) {
575         TCGContext *s = atomic_read(&tcg_ctxs[i]);
576         bool err = tcg_region_initial_alloc__locked(s);
577 
578         g_assert(!err);
579     }
580     qemu_mutex_unlock(&region.lock);
581 
582     tcg_region_tree_reset_all();
583 }
584 
585 #ifdef CONFIG_USER_ONLY
586 static size_t tcg_n_regions(void)
587 {
588     return 1;
589 }
590 #else
591 /*
592  * It is likely that some vCPUs will translate more code than others, so we
593  * first try to set more regions than max_cpus, with those regions being of
594  * reasonable size. If that's not possible we make do by evenly dividing
595  * the code_gen_buffer among the vCPUs.
596  */
597 static size_t tcg_n_regions(void)
598 {
599     size_t i;
600 
601     /* Use a single region if all we have is one vCPU thread */
602     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
603         return 1;
604     }
605 
606     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
607     for (i = 8; i > 0; i--) {
608         size_t regions_per_thread = i;
609         size_t region_size;
610 
611         region_size = tcg_init_ctx.code_gen_buffer_size;
612         region_size /= max_cpus * regions_per_thread;
613 
614         if (region_size >= 2 * 1024u * 1024) {
615             return max_cpus * regions_per_thread;
616         }
617     }
618     /* If we can't, then just allocate one region per vCPU thread */
619     return max_cpus;
620 }
621 #endif
622 
623 /*
624  * Initializes region partitioning.
625  *
626  * Called at init time from the parent thread (i.e. the one calling
627  * tcg_context_init), after the target's TCG globals have been set.
628  *
629  * Region partitioning works by splitting code_gen_buffer into separate regions,
630  * and then assigning regions to TCG threads so that the threads can translate
631  * code in parallel without synchronization.
632  *
633  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
634  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
635  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
636  * must have been parsed before calling this function, since it calls
637  * qemu_tcg_mttcg_enabled().
638  *
639  * In user-mode we use a single region.  Having multiple regions in user-mode
640  * is not supported, because the number of vCPU threads (recall that each thread
641  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
642  * OS, and usually this number is huge (tens of thousands is not uncommon).
643  * Thus, given this large bound on the number of vCPU threads and the fact
644  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
645  * that the availability of at least one region per vCPU thread.
646  *
647  * However, this user-mode limitation is unlikely to be a significant problem
648  * in practice. Multi-threaded guests share most if not all of their translated
649  * code, which makes parallel code generation less appealing than in softmmu.
650  */
651 void tcg_region_init(void)
652 {
653     void *buf = tcg_init_ctx.code_gen_buffer;
654     void *aligned;
655     size_t size = tcg_init_ctx.code_gen_buffer_size;
656     size_t page_size = qemu_real_host_page_size;
657     size_t region_size;
658     size_t n_regions;
659     size_t i;
660 
661     n_regions = tcg_n_regions();
662 
663     /* The first region will be 'aligned - buf' bytes larger than the others */
664     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
665     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
666     /*
667      * Make region_size a multiple of page_size, using aligned as the start.
668      * As a result of this we might end up with a few extra pages at the end of
669      * the buffer; we will assign those to the last region.
670      */
671     region_size = (size - (aligned - buf)) / n_regions;
672     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
673 
674     /* A region must have at least 2 pages; one code, one guard */
675     g_assert(region_size >= 2 * page_size);
676 
677     /* init the region struct */
678     qemu_mutex_init(&region.lock);
679     region.n = n_regions;
680     region.size = region_size - page_size;
681     region.stride = region_size;
682     region.start = buf;
683     region.start_aligned = aligned;
684     /* page-align the end, since its last page will be a guard page */
685     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
686     /* account for that last guard page */
687     region.end -= page_size;
688 
689     /* set guard pages */
690     for (i = 0; i < region.n; i++) {
691         void *start, *end;
692         int rc;
693 
694         tcg_region_bounds(i, &start, &end);
695         rc = qemu_mprotect_none(end, page_size);
696         g_assert(!rc);
697     }
698 
699     tcg_region_trees_init();
700 
701     /* In user-mode we support only one ctx, so do the initial allocation now */
702 #ifdef CONFIG_USER_ONLY
703     {
704         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
705 
706         g_assert(!err);
707     }
708 #endif
709 }
710 
711 /*
712  * All TCG threads except the parent (i.e. the one that called tcg_context_init
713  * and registered the target's TCG globals) must register with this function
714  * before initiating translation.
715  *
716  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
717  * of tcg_region_init() for the reasoning behind this.
718  *
719  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
720  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
721  * is not used anymore for translation once this function is called.
722  *
723  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
724  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
725  */
726 #ifdef CONFIG_USER_ONLY
727 void tcg_register_thread(void)
728 {
729     tcg_ctx = &tcg_init_ctx;
730 }
731 #else
732 void tcg_register_thread(void)
733 {
734     TCGContext *s = g_malloc(sizeof(*s));
735     unsigned int i, n;
736     bool err;
737 
738     *s = tcg_init_ctx;
739 
740     /* Relink mem_base.  */
741     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
742         if (tcg_init_ctx.temps[i].mem_base) {
743             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
744             tcg_debug_assert(b >= 0 && b < n);
745             s->temps[i].mem_base = &s->temps[b];
746         }
747     }
748 
749     /* Claim an entry in tcg_ctxs */
750     n = atomic_fetch_inc(&n_tcg_ctxs);
751     g_assert(n < max_cpus);
752     atomic_set(&tcg_ctxs[n], s);
753 
754     tcg_ctx = s;
755     qemu_mutex_lock(&region.lock);
756     err = tcg_region_initial_alloc__locked(tcg_ctx);
757     g_assert(!err);
758     qemu_mutex_unlock(&region.lock);
759 }
760 #endif /* !CONFIG_USER_ONLY */
761 
762 /*
763  * Returns the size (in bytes) of all translated code (i.e. from all regions)
764  * currently in the cache.
765  * See also: tcg_code_capacity()
766  * Do not confuse with tcg_current_code_size(); that one applies to a single
767  * TCG context.
768  */
769 size_t tcg_code_size(void)
770 {
771     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
772     unsigned int i;
773     size_t total;
774 
775     qemu_mutex_lock(&region.lock);
776     total = region.agg_size_full;
777     for (i = 0; i < n_ctxs; i++) {
778         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
779         size_t size;
780 
781         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
782         g_assert(size <= s->code_gen_buffer_size);
783         total += size;
784     }
785     qemu_mutex_unlock(&region.lock);
786     return total;
787 }
788 
789 /*
790  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
791  * regions.
792  * See also: tcg_code_size()
793  */
794 size_t tcg_code_capacity(void)
795 {
796     size_t guard_size, capacity;
797 
798     /* no need for synchronization; these variables are set at init time */
799     guard_size = region.stride - region.size;
800     capacity = region.end + guard_size - region.start;
801     capacity -= region.n * (guard_size + TCG_HIGHWATER);
802     return capacity;
803 }
804 
805 size_t tcg_tb_phys_invalidate_count(void)
806 {
807     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
808     unsigned int i;
809     size_t total = 0;
810 
811     for (i = 0; i < n_ctxs; i++) {
812         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
813 
814         total += atomic_read(&s->tb_phys_invalidate_count);
815     }
816     return total;
817 }
818 
819 /* pool based memory allocation */
820 void *tcg_malloc_internal(TCGContext *s, int size)
821 {
822     TCGPool *p;
823     int pool_size;
824 
825     if (size > TCG_POOL_CHUNK_SIZE) {
826         /* big malloc: insert a new pool (XXX: could optimize) */
827         p = g_malloc(sizeof(TCGPool) + size);
828         p->size = size;
829         p->next = s->pool_first_large;
830         s->pool_first_large = p;
831         return p->data;
832     } else {
833         p = s->pool_current;
834         if (!p) {
835             p = s->pool_first;
836             if (!p)
837                 goto new_pool;
838         } else {
839             if (!p->next) {
840             new_pool:
841                 pool_size = TCG_POOL_CHUNK_SIZE;
842                 p = g_malloc(sizeof(TCGPool) + pool_size);
843                 p->size = pool_size;
844                 p->next = NULL;
845                 if (s->pool_current)
846                     s->pool_current->next = p;
847                 else
848                     s->pool_first = p;
849             } else {
850                 p = p->next;
851             }
852         }
853     }
854     s->pool_current = p;
855     s->pool_cur = p->data + size;
856     s->pool_end = p->data + p->size;
857     return p->data;
858 }
859 
860 void tcg_pool_reset(TCGContext *s)
861 {
862     TCGPool *p, *t;
863     for (p = s->pool_first_large; p; p = t) {
864         t = p->next;
865         g_free(p);
866     }
867     s->pool_first_large = NULL;
868     s->pool_cur = s->pool_end = NULL;
869     s->pool_current = NULL;
870 }
871 
872 typedef struct TCGHelperInfo {
873     void *func;
874     const char *name;
875     unsigned flags;
876     unsigned sizemask;
877 } TCGHelperInfo;
878 
879 #include "exec/helper-proto.h"
880 
881 static const TCGHelperInfo all_helpers[] = {
882 #include "exec/helper-tcg.h"
883 };
884 static GHashTable *helper_table;
885 
886 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
887 static void process_op_defs(TCGContext *s);
888 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
889                                             TCGReg reg, const char *name);
890 
891 void tcg_context_init(TCGContext *s)
892 {
893     int op, total_args, n, i;
894     TCGOpDef *def;
895     TCGArgConstraint *args_ct;
896     int *sorted_args;
897     TCGTemp *ts;
898 
899     memset(s, 0, sizeof(*s));
900     s->nb_globals = 0;
901 
902     /* Count total number of arguments and allocate the corresponding
903        space */
904     total_args = 0;
905     for(op = 0; op < NB_OPS; op++) {
906         def = &tcg_op_defs[op];
907         n = def->nb_iargs + def->nb_oargs;
908         total_args += n;
909     }
910 
911     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
912     sorted_args = g_malloc(sizeof(int) * total_args);
913 
914     for(op = 0; op < NB_OPS; op++) {
915         def = &tcg_op_defs[op];
916         def->args_ct = args_ct;
917         def->sorted_args = sorted_args;
918         n = def->nb_iargs + def->nb_oargs;
919         sorted_args += n;
920         args_ct += n;
921     }
922 
923     /* Register helpers.  */
924     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
925     helper_table = g_hash_table_new(NULL, NULL);
926 
927     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
928         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
929                             (gpointer)&all_helpers[i]);
930     }
931 
932     tcg_target_init(s);
933     process_op_defs(s);
934 
935     /* Reverse the order of the saved registers, assuming they're all at
936        the start of tcg_target_reg_alloc_order.  */
937     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
938         int r = tcg_target_reg_alloc_order[n];
939         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
940             break;
941         }
942     }
943     for (i = 0; i < n; ++i) {
944         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
945     }
946     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
947         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
948     }
949 
950     tcg_ctx = s;
951     /*
952      * In user-mode we simply share the init context among threads, since we
953      * use a single region. See the documentation tcg_region_init() for the
954      * reasoning behind this.
955      * In softmmu we will have at most max_cpus TCG threads.
956      */
957 #ifdef CONFIG_USER_ONLY
958     tcg_ctxs = &tcg_ctx;
959     n_tcg_ctxs = 1;
960 #else
961     tcg_ctxs = g_new(TCGContext *, max_cpus);
962 #endif
963 
964     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
965     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
966     cpu_env = temp_tcgv_ptr(ts);
967 }
968 
969 /*
970  * Allocate TBs right before their corresponding translated code, making
971  * sure that TBs and code are on different cache lines.
972  */
973 TranslationBlock *tcg_tb_alloc(TCGContext *s)
974 {
975     uintptr_t align = qemu_icache_linesize;
976     TranslationBlock *tb;
977     void *next;
978 
979  retry:
980     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
981     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
982 
983     if (unlikely(next > s->code_gen_highwater)) {
984         if (tcg_region_alloc(s)) {
985             return NULL;
986         }
987         goto retry;
988     }
989     atomic_set(&s->code_gen_ptr, next);
990     s->data_gen_ptr = NULL;
991     return tb;
992 }
993 
994 void tcg_prologue_init(TCGContext *s)
995 {
996     size_t prologue_size, total_size;
997     void *buf0, *buf1;
998 
999     /* Put the prologue at the beginning of code_gen_buffer.  */
1000     buf0 = s->code_gen_buffer;
1001     total_size = s->code_gen_buffer_size;
1002     s->code_ptr = buf0;
1003     s->code_buf = buf0;
1004     s->data_gen_ptr = NULL;
1005     s->code_gen_prologue = buf0;
1006 
1007     /* Compute a high-water mark, at which we voluntarily flush the buffer
1008        and start over.  The size here is arbitrary, significantly larger
1009        than we expect the code generation for any one opcode to require.  */
1010     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1011 
1012 #ifdef TCG_TARGET_NEED_POOL_LABELS
1013     s->pool_labels = NULL;
1014 #endif
1015 
1016     /* Generate the prologue.  */
1017     tcg_target_qemu_prologue(s);
1018 
1019 #ifdef TCG_TARGET_NEED_POOL_LABELS
1020     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1021     {
1022         bool ok = tcg_out_pool_finalize(s);
1023         tcg_debug_assert(ok);
1024     }
1025 #endif
1026 
1027     buf1 = s->code_ptr;
1028     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1029 
1030     /* Deduct the prologue from the buffer.  */
1031     prologue_size = tcg_current_code_size(s);
1032     s->code_gen_ptr = buf1;
1033     s->code_gen_buffer = buf1;
1034     s->code_buf = buf1;
1035     total_size -= prologue_size;
1036     s->code_gen_buffer_size = total_size;
1037 
1038     tcg_register_jit(s->code_gen_buffer, total_size);
1039 
1040 #ifdef DEBUG_DISAS
1041     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1042         qemu_log_lock();
1043         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1044         if (s->data_gen_ptr) {
1045             size_t code_size = s->data_gen_ptr - buf0;
1046             size_t data_size = prologue_size - code_size;
1047             size_t i;
1048 
1049             log_disas(buf0, code_size);
1050 
1051             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1052                 if (sizeof(tcg_target_ulong) == 8) {
1053                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1054                              (uintptr_t)s->data_gen_ptr + i,
1055                              *(uint64_t *)(s->data_gen_ptr + i));
1056                 } else {
1057                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1058                              (uintptr_t)s->data_gen_ptr + i,
1059                              *(uint32_t *)(s->data_gen_ptr + i));
1060                 }
1061             }
1062         } else {
1063             log_disas(buf0, prologue_size);
1064         }
1065         qemu_log("\n");
1066         qemu_log_flush();
1067         qemu_log_unlock();
1068     }
1069 #endif
1070 
1071     /* Assert that goto_ptr is implemented completely.  */
1072     if (TCG_TARGET_HAS_goto_ptr) {
1073         tcg_debug_assert(s->code_gen_epilogue != NULL);
1074     }
1075 }
1076 
1077 void tcg_func_start(TCGContext *s)
1078 {
1079     tcg_pool_reset(s);
1080     s->nb_temps = s->nb_globals;
1081 
1082     /* No temps have been previously allocated for size or locality.  */
1083     memset(s->free_temps, 0, sizeof(s->free_temps));
1084 
1085     s->nb_ops = 0;
1086     s->nb_labels = 0;
1087     s->current_frame_offset = s->frame_start;
1088 
1089 #ifdef CONFIG_DEBUG_TCG
1090     s->goto_tb_issue_mask = 0;
1091 #endif
1092 
1093     QTAILQ_INIT(&s->ops);
1094     QTAILQ_INIT(&s->free_ops);
1095 }
1096 
1097 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1098 {
1099     int n = s->nb_temps++;
1100     tcg_debug_assert(n < TCG_MAX_TEMPS);
1101     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1102 }
1103 
1104 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1105 {
1106     TCGTemp *ts;
1107 
1108     tcg_debug_assert(s->nb_globals == s->nb_temps);
1109     s->nb_globals++;
1110     ts = tcg_temp_alloc(s);
1111     ts->temp_global = 1;
1112 
1113     return ts;
1114 }
1115 
1116 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1117                                             TCGReg reg, const char *name)
1118 {
1119     TCGTemp *ts;
1120 
1121     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1122         tcg_abort();
1123     }
1124 
1125     ts = tcg_global_alloc(s);
1126     ts->base_type = type;
1127     ts->type = type;
1128     ts->fixed_reg = 1;
1129     ts->reg = reg;
1130     ts->name = name;
1131     tcg_regset_set_reg(s->reserved_regs, reg);
1132 
1133     return ts;
1134 }
1135 
1136 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1137 {
1138     s->frame_start = start;
1139     s->frame_end = start + size;
1140     s->frame_temp
1141         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1142 }
1143 
1144 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1145                                      intptr_t offset, const char *name)
1146 {
1147     TCGContext *s = tcg_ctx;
1148     TCGTemp *base_ts = tcgv_ptr_temp(base);
1149     TCGTemp *ts = tcg_global_alloc(s);
1150     int indirect_reg = 0, bigendian = 0;
1151 #ifdef HOST_WORDS_BIGENDIAN
1152     bigendian = 1;
1153 #endif
1154 
1155     if (!base_ts->fixed_reg) {
1156         /* We do not support double-indirect registers.  */
1157         tcg_debug_assert(!base_ts->indirect_reg);
1158         base_ts->indirect_base = 1;
1159         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1160                             ? 2 : 1);
1161         indirect_reg = 1;
1162     }
1163 
1164     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1165         TCGTemp *ts2 = tcg_global_alloc(s);
1166         char buf[64];
1167 
1168         ts->base_type = TCG_TYPE_I64;
1169         ts->type = TCG_TYPE_I32;
1170         ts->indirect_reg = indirect_reg;
1171         ts->mem_allocated = 1;
1172         ts->mem_base = base_ts;
1173         ts->mem_offset = offset + bigendian * 4;
1174         pstrcpy(buf, sizeof(buf), name);
1175         pstrcat(buf, sizeof(buf), "_0");
1176         ts->name = strdup(buf);
1177 
1178         tcg_debug_assert(ts2 == ts + 1);
1179         ts2->base_type = TCG_TYPE_I64;
1180         ts2->type = TCG_TYPE_I32;
1181         ts2->indirect_reg = indirect_reg;
1182         ts2->mem_allocated = 1;
1183         ts2->mem_base = base_ts;
1184         ts2->mem_offset = offset + (1 - bigendian) * 4;
1185         pstrcpy(buf, sizeof(buf), name);
1186         pstrcat(buf, sizeof(buf), "_1");
1187         ts2->name = strdup(buf);
1188     } else {
1189         ts->base_type = type;
1190         ts->type = type;
1191         ts->indirect_reg = indirect_reg;
1192         ts->mem_allocated = 1;
1193         ts->mem_base = base_ts;
1194         ts->mem_offset = offset;
1195         ts->name = name;
1196     }
1197     return ts;
1198 }
1199 
1200 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1201 {
1202     TCGContext *s = tcg_ctx;
1203     TCGTemp *ts;
1204     int idx, k;
1205 
1206     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1207     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1208     if (idx < TCG_MAX_TEMPS) {
1209         /* There is already an available temp with the right type.  */
1210         clear_bit(idx, s->free_temps[k].l);
1211 
1212         ts = &s->temps[idx];
1213         ts->temp_allocated = 1;
1214         tcg_debug_assert(ts->base_type == type);
1215         tcg_debug_assert(ts->temp_local == temp_local);
1216     } else {
1217         ts = tcg_temp_alloc(s);
1218         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1219             TCGTemp *ts2 = tcg_temp_alloc(s);
1220 
1221             ts->base_type = type;
1222             ts->type = TCG_TYPE_I32;
1223             ts->temp_allocated = 1;
1224             ts->temp_local = temp_local;
1225 
1226             tcg_debug_assert(ts2 == ts + 1);
1227             ts2->base_type = TCG_TYPE_I64;
1228             ts2->type = TCG_TYPE_I32;
1229             ts2->temp_allocated = 1;
1230             ts2->temp_local = temp_local;
1231         } else {
1232             ts->base_type = type;
1233             ts->type = type;
1234             ts->temp_allocated = 1;
1235             ts->temp_local = temp_local;
1236         }
1237     }
1238 
1239 #if defined(CONFIG_DEBUG_TCG)
1240     s->temps_in_use++;
1241 #endif
1242     return ts;
1243 }
1244 
1245 TCGv_vec tcg_temp_new_vec(TCGType type)
1246 {
1247     TCGTemp *t;
1248 
1249 #ifdef CONFIG_DEBUG_TCG
1250     switch (type) {
1251     case TCG_TYPE_V64:
1252         assert(TCG_TARGET_HAS_v64);
1253         break;
1254     case TCG_TYPE_V128:
1255         assert(TCG_TARGET_HAS_v128);
1256         break;
1257     case TCG_TYPE_V256:
1258         assert(TCG_TARGET_HAS_v256);
1259         break;
1260     default:
1261         g_assert_not_reached();
1262     }
1263 #endif
1264 
1265     t = tcg_temp_new_internal(type, 0);
1266     return temp_tcgv_vec(t);
1267 }
1268 
1269 /* Create a new temp of the same type as an existing temp.  */
1270 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1271 {
1272     TCGTemp *t = tcgv_vec_temp(match);
1273 
1274     tcg_debug_assert(t->temp_allocated != 0);
1275 
1276     t = tcg_temp_new_internal(t->base_type, 0);
1277     return temp_tcgv_vec(t);
1278 }
1279 
1280 void tcg_temp_free_internal(TCGTemp *ts)
1281 {
1282     TCGContext *s = tcg_ctx;
1283     int k, idx;
1284 
1285 #if defined(CONFIG_DEBUG_TCG)
1286     s->temps_in_use--;
1287     if (s->temps_in_use < 0) {
1288         fprintf(stderr, "More temporaries freed than allocated!\n");
1289     }
1290 #endif
1291 
1292     tcg_debug_assert(ts->temp_global == 0);
1293     tcg_debug_assert(ts->temp_allocated != 0);
1294     ts->temp_allocated = 0;
1295 
1296     idx = temp_idx(ts);
1297     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1298     set_bit(idx, s->free_temps[k].l);
1299 }
1300 
1301 TCGv_i32 tcg_const_i32(int32_t val)
1302 {
1303     TCGv_i32 t0;
1304     t0 = tcg_temp_new_i32();
1305     tcg_gen_movi_i32(t0, val);
1306     return t0;
1307 }
1308 
1309 TCGv_i64 tcg_const_i64(int64_t val)
1310 {
1311     TCGv_i64 t0;
1312     t0 = tcg_temp_new_i64();
1313     tcg_gen_movi_i64(t0, val);
1314     return t0;
1315 }
1316 
1317 TCGv_i32 tcg_const_local_i32(int32_t val)
1318 {
1319     TCGv_i32 t0;
1320     t0 = tcg_temp_local_new_i32();
1321     tcg_gen_movi_i32(t0, val);
1322     return t0;
1323 }
1324 
1325 TCGv_i64 tcg_const_local_i64(int64_t val)
1326 {
1327     TCGv_i64 t0;
1328     t0 = tcg_temp_local_new_i64();
1329     tcg_gen_movi_i64(t0, val);
1330     return t0;
1331 }
1332 
1333 #if defined(CONFIG_DEBUG_TCG)
1334 void tcg_clear_temp_count(void)
1335 {
1336     TCGContext *s = tcg_ctx;
1337     s->temps_in_use = 0;
1338 }
1339 
1340 int tcg_check_temp_count(void)
1341 {
1342     TCGContext *s = tcg_ctx;
1343     if (s->temps_in_use) {
1344         /* Clear the count so that we don't give another
1345          * warning immediately next time around.
1346          */
1347         s->temps_in_use = 0;
1348         return 1;
1349     }
1350     return 0;
1351 }
1352 #endif
1353 
1354 /* Return true if OP may appear in the opcode stream.
1355    Test the runtime variable that controls each opcode.  */
1356 bool tcg_op_supported(TCGOpcode op)
1357 {
1358     const bool have_vec
1359         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1360 
1361     switch (op) {
1362     case INDEX_op_discard:
1363     case INDEX_op_set_label:
1364     case INDEX_op_call:
1365     case INDEX_op_br:
1366     case INDEX_op_mb:
1367     case INDEX_op_insn_start:
1368     case INDEX_op_exit_tb:
1369     case INDEX_op_goto_tb:
1370     case INDEX_op_qemu_ld_i32:
1371     case INDEX_op_qemu_st_i32:
1372     case INDEX_op_qemu_ld_i64:
1373     case INDEX_op_qemu_st_i64:
1374         return true;
1375 
1376     case INDEX_op_goto_ptr:
1377         return TCG_TARGET_HAS_goto_ptr;
1378 
1379     case INDEX_op_mov_i32:
1380     case INDEX_op_movi_i32:
1381     case INDEX_op_setcond_i32:
1382     case INDEX_op_brcond_i32:
1383     case INDEX_op_ld8u_i32:
1384     case INDEX_op_ld8s_i32:
1385     case INDEX_op_ld16u_i32:
1386     case INDEX_op_ld16s_i32:
1387     case INDEX_op_ld_i32:
1388     case INDEX_op_st8_i32:
1389     case INDEX_op_st16_i32:
1390     case INDEX_op_st_i32:
1391     case INDEX_op_add_i32:
1392     case INDEX_op_sub_i32:
1393     case INDEX_op_mul_i32:
1394     case INDEX_op_and_i32:
1395     case INDEX_op_or_i32:
1396     case INDEX_op_xor_i32:
1397     case INDEX_op_shl_i32:
1398     case INDEX_op_shr_i32:
1399     case INDEX_op_sar_i32:
1400         return true;
1401 
1402     case INDEX_op_movcond_i32:
1403         return TCG_TARGET_HAS_movcond_i32;
1404     case INDEX_op_div_i32:
1405     case INDEX_op_divu_i32:
1406         return TCG_TARGET_HAS_div_i32;
1407     case INDEX_op_rem_i32:
1408     case INDEX_op_remu_i32:
1409         return TCG_TARGET_HAS_rem_i32;
1410     case INDEX_op_div2_i32:
1411     case INDEX_op_divu2_i32:
1412         return TCG_TARGET_HAS_div2_i32;
1413     case INDEX_op_rotl_i32:
1414     case INDEX_op_rotr_i32:
1415         return TCG_TARGET_HAS_rot_i32;
1416     case INDEX_op_deposit_i32:
1417         return TCG_TARGET_HAS_deposit_i32;
1418     case INDEX_op_extract_i32:
1419         return TCG_TARGET_HAS_extract_i32;
1420     case INDEX_op_sextract_i32:
1421         return TCG_TARGET_HAS_sextract_i32;
1422     case INDEX_op_add2_i32:
1423         return TCG_TARGET_HAS_add2_i32;
1424     case INDEX_op_sub2_i32:
1425         return TCG_TARGET_HAS_sub2_i32;
1426     case INDEX_op_mulu2_i32:
1427         return TCG_TARGET_HAS_mulu2_i32;
1428     case INDEX_op_muls2_i32:
1429         return TCG_TARGET_HAS_muls2_i32;
1430     case INDEX_op_muluh_i32:
1431         return TCG_TARGET_HAS_muluh_i32;
1432     case INDEX_op_mulsh_i32:
1433         return TCG_TARGET_HAS_mulsh_i32;
1434     case INDEX_op_ext8s_i32:
1435         return TCG_TARGET_HAS_ext8s_i32;
1436     case INDEX_op_ext16s_i32:
1437         return TCG_TARGET_HAS_ext16s_i32;
1438     case INDEX_op_ext8u_i32:
1439         return TCG_TARGET_HAS_ext8u_i32;
1440     case INDEX_op_ext16u_i32:
1441         return TCG_TARGET_HAS_ext16u_i32;
1442     case INDEX_op_bswap16_i32:
1443         return TCG_TARGET_HAS_bswap16_i32;
1444     case INDEX_op_bswap32_i32:
1445         return TCG_TARGET_HAS_bswap32_i32;
1446     case INDEX_op_not_i32:
1447         return TCG_TARGET_HAS_not_i32;
1448     case INDEX_op_neg_i32:
1449         return TCG_TARGET_HAS_neg_i32;
1450     case INDEX_op_andc_i32:
1451         return TCG_TARGET_HAS_andc_i32;
1452     case INDEX_op_orc_i32:
1453         return TCG_TARGET_HAS_orc_i32;
1454     case INDEX_op_eqv_i32:
1455         return TCG_TARGET_HAS_eqv_i32;
1456     case INDEX_op_nand_i32:
1457         return TCG_TARGET_HAS_nand_i32;
1458     case INDEX_op_nor_i32:
1459         return TCG_TARGET_HAS_nor_i32;
1460     case INDEX_op_clz_i32:
1461         return TCG_TARGET_HAS_clz_i32;
1462     case INDEX_op_ctz_i32:
1463         return TCG_TARGET_HAS_ctz_i32;
1464     case INDEX_op_ctpop_i32:
1465         return TCG_TARGET_HAS_ctpop_i32;
1466 
1467     case INDEX_op_brcond2_i32:
1468     case INDEX_op_setcond2_i32:
1469         return TCG_TARGET_REG_BITS == 32;
1470 
1471     case INDEX_op_mov_i64:
1472     case INDEX_op_movi_i64:
1473     case INDEX_op_setcond_i64:
1474     case INDEX_op_brcond_i64:
1475     case INDEX_op_ld8u_i64:
1476     case INDEX_op_ld8s_i64:
1477     case INDEX_op_ld16u_i64:
1478     case INDEX_op_ld16s_i64:
1479     case INDEX_op_ld32u_i64:
1480     case INDEX_op_ld32s_i64:
1481     case INDEX_op_ld_i64:
1482     case INDEX_op_st8_i64:
1483     case INDEX_op_st16_i64:
1484     case INDEX_op_st32_i64:
1485     case INDEX_op_st_i64:
1486     case INDEX_op_add_i64:
1487     case INDEX_op_sub_i64:
1488     case INDEX_op_mul_i64:
1489     case INDEX_op_and_i64:
1490     case INDEX_op_or_i64:
1491     case INDEX_op_xor_i64:
1492     case INDEX_op_shl_i64:
1493     case INDEX_op_shr_i64:
1494     case INDEX_op_sar_i64:
1495     case INDEX_op_ext_i32_i64:
1496     case INDEX_op_extu_i32_i64:
1497         return TCG_TARGET_REG_BITS == 64;
1498 
1499     case INDEX_op_movcond_i64:
1500         return TCG_TARGET_HAS_movcond_i64;
1501     case INDEX_op_div_i64:
1502     case INDEX_op_divu_i64:
1503         return TCG_TARGET_HAS_div_i64;
1504     case INDEX_op_rem_i64:
1505     case INDEX_op_remu_i64:
1506         return TCG_TARGET_HAS_rem_i64;
1507     case INDEX_op_div2_i64:
1508     case INDEX_op_divu2_i64:
1509         return TCG_TARGET_HAS_div2_i64;
1510     case INDEX_op_rotl_i64:
1511     case INDEX_op_rotr_i64:
1512         return TCG_TARGET_HAS_rot_i64;
1513     case INDEX_op_deposit_i64:
1514         return TCG_TARGET_HAS_deposit_i64;
1515     case INDEX_op_extract_i64:
1516         return TCG_TARGET_HAS_extract_i64;
1517     case INDEX_op_sextract_i64:
1518         return TCG_TARGET_HAS_sextract_i64;
1519     case INDEX_op_extrl_i64_i32:
1520         return TCG_TARGET_HAS_extrl_i64_i32;
1521     case INDEX_op_extrh_i64_i32:
1522         return TCG_TARGET_HAS_extrh_i64_i32;
1523     case INDEX_op_ext8s_i64:
1524         return TCG_TARGET_HAS_ext8s_i64;
1525     case INDEX_op_ext16s_i64:
1526         return TCG_TARGET_HAS_ext16s_i64;
1527     case INDEX_op_ext32s_i64:
1528         return TCG_TARGET_HAS_ext32s_i64;
1529     case INDEX_op_ext8u_i64:
1530         return TCG_TARGET_HAS_ext8u_i64;
1531     case INDEX_op_ext16u_i64:
1532         return TCG_TARGET_HAS_ext16u_i64;
1533     case INDEX_op_ext32u_i64:
1534         return TCG_TARGET_HAS_ext32u_i64;
1535     case INDEX_op_bswap16_i64:
1536         return TCG_TARGET_HAS_bswap16_i64;
1537     case INDEX_op_bswap32_i64:
1538         return TCG_TARGET_HAS_bswap32_i64;
1539     case INDEX_op_bswap64_i64:
1540         return TCG_TARGET_HAS_bswap64_i64;
1541     case INDEX_op_not_i64:
1542         return TCG_TARGET_HAS_not_i64;
1543     case INDEX_op_neg_i64:
1544         return TCG_TARGET_HAS_neg_i64;
1545     case INDEX_op_andc_i64:
1546         return TCG_TARGET_HAS_andc_i64;
1547     case INDEX_op_orc_i64:
1548         return TCG_TARGET_HAS_orc_i64;
1549     case INDEX_op_eqv_i64:
1550         return TCG_TARGET_HAS_eqv_i64;
1551     case INDEX_op_nand_i64:
1552         return TCG_TARGET_HAS_nand_i64;
1553     case INDEX_op_nor_i64:
1554         return TCG_TARGET_HAS_nor_i64;
1555     case INDEX_op_clz_i64:
1556         return TCG_TARGET_HAS_clz_i64;
1557     case INDEX_op_ctz_i64:
1558         return TCG_TARGET_HAS_ctz_i64;
1559     case INDEX_op_ctpop_i64:
1560         return TCG_TARGET_HAS_ctpop_i64;
1561     case INDEX_op_add2_i64:
1562         return TCG_TARGET_HAS_add2_i64;
1563     case INDEX_op_sub2_i64:
1564         return TCG_TARGET_HAS_sub2_i64;
1565     case INDEX_op_mulu2_i64:
1566         return TCG_TARGET_HAS_mulu2_i64;
1567     case INDEX_op_muls2_i64:
1568         return TCG_TARGET_HAS_muls2_i64;
1569     case INDEX_op_muluh_i64:
1570         return TCG_TARGET_HAS_muluh_i64;
1571     case INDEX_op_mulsh_i64:
1572         return TCG_TARGET_HAS_mulsh_i64;
1573 
1574     case INDEX_op_mov_vec:
1575     case INDEX_op_dup_vec:
1576     case INDEX_op_dupi_vec:
1577     case INDEX_op_ld_vec:
1578     case INDEX_op_st_vec:
1579     case INDEX_op_add_vec:
1580     case INDEX_op_sub_vec:
1581     case INDEX_op_and_vec:
1582     case INDEX_op_or_vec:
1583     case INDEX_op_xor_vec:
1584     case INDEX_op_cmp_vec:
1585         return have_vec;
1586     case INDEX_op_dup2_vec:
1587         return have_vec && TCG_TARGET_REG_BITS == 32;
1588     case INDEX_op_not_vec:
1589         return have_vec && TCG_TARGET_HAS_not_vec;
1590     case INDEX_op_neg_vec:
1591         return have_vec && TCG_TARGET_HAS_neg_vec;
1592     case INDEX_op_andc_vec:
1593         return have_vec && TCG_TARGET_HAS_andc_vec;
1594     case INDEX_op_orc_vec:
1595         return have_vec && TCG_TARGET_HAS_orc_vec;
1596     case INDEX_op_mul_vec:
1597         return have_vec && TCG_TARGET_HAS_mul_vec;
1598     case INDEX_op_shli_vec:
1599     case INDEX_op_shri_vec:
1600     case INDEX_op_sari_vec:
1601         return have_vec && TCG_TARGET_HAS_shi_vec;
1602     case INDEX_op_shls_vec:
1603     case INDEX_op_shrs_vec:
1604     case INDEX_op_sars_vec:
1605         return have_vec && TCG_TARGET_HAS_shs_vec;
1606     case INDEX_op_shlv_vec:
1607     case INDEX_op_shrv_vec:
1608     case INDEX_op_sarv_vec:
1609         return have_vec && TCG_TARGET_HAS_shv_vec;
1610     case INDEX_op_ssadd_vec:
1611     case INDEX_op_usadd_vec:
1612     case INDEX_op_sssub_vec:
1613     case INDEX_op_ussub_vec:
1614         return have_vec && TCG_TARGET_HAS_sat_vec;
1615     case INDEX_op_smin_vec:
1616     case INDEX_op_umin_vec:
1617     case INDEX_op_smax_vec:
1618     case INDEX_op_umax_vec:
1619         return have_vec && TCG_TARGET_HAS_minmax_vec;
1620 
1621     default:
1622         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1623         return true;
1624     }
1625 }
1626 
1627 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1628    and endian swap. Maybe it would be better to do the alignment
1629    and endian swap in tcg_reg_alloc_call(). */
1630 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1631 {
1632     int i, real_args, nb_rets, pi;
1633     unsigned sizemask, flags;
1634     TCGHelperInfo *info;
1635     TCGOp *op;
1636 
1637     info = g_hash_table_lookup(helper_table, (gpointer)func);
1638     flags = info->flags;
1639     sizemask = info->sizemask;
1640 
1641 #if defined(__sparc__) && !defined(__arch64__) \
1642     && !defined(CONFIG_TCG_INTERPRETER)
1643     /* We have 64-bit values in one register, but need to pass as two
1644        separate parameters.  Split them.  */
1645     int orig_sizemask = sizemask;
1646     int orig_nargs = nargs;
1647     TCGv_i64 retl, reth;
1648     TCGTemp *split_args[MAX_OPC_PARAM];
1649 
1650     retl = NULL;
1651     reth = NULL;
1652     if (sizemask != 0) {
1653         for (i = real_args = 0; i < nargs; ++i) {
1654             int is_64bit = sizemask & (1 << (i+1)*2);
1655             if (is_64bit) {
1656                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1657                 TCGv_i32 h = tcg_temp_new_i32();
1658                 TCGv_i32 l = tcg_temp_new_i32();
1659                 tcg_gen_extr_i64_i32(l, h, orig);
1660                 split_args[real_args++] = tcgv_i32_temp(h);
1661                 split_args[real_args++] = tcgv_i32_temp(l);
1662             } else {
1663                 split_args[real_args++] = args[i];
1664             }
1665         }
1666         nargs = real_args;
1667         args = split_args;
1668         sizemask = 0;
1669     }
1670 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1671     for (i = 0; i < nargs; ++i) {
1672         int is_64bit = sizemask & (1 << (i+1)*2);
1673         int is_signed = sizemask & (2 << (i+1)*2);
1674         if (!is_64bit) {
1675             TCGv_i64 temp = tcg_temp_new_i64();
1676             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1677             if (is_signed) {
1678                 tcg_gen_ext32s_i64(temp, orig);
1679             } else {
1680                 tcg_gen_ext32u_i64(temp, orig);
1681             }
1682             args[i] = tcgv_i64_temp(temp);
1683         }
1684     }
1685 #endif /* TCG_TARGET_EXTEND_ARGS */
1686 
1687     op = tcg_emit_op(INDEX_op_call);
1688 
1689     pi = 0;
1690     if (ret != NULL) {
1691 #if defined(__sparc__) && !defined(__arch64__) \
1692     && !defined(CONFIG_TCG_INTERPRETER)
1693         if (orig_sizemask & 1) {
1694             /* The 32-bit ABI is going to return the 64-bit value in
1695                the %o0/%o1 register pair.  Prepare for this by using
1696                two return temporaries, and reassemble below.  */
1697             retl = tcg_temp_new_i64();
1698             reth = tcg_temp_new_i64();
1699             op->args[pi++] = tcgv_i64_arg(reth);
1700             op->args[pi++] = tcgv_i64_arg(retl);
1701             nb_rets = 2;
1702         } else {
1703             op->args[pi++] = temp_arg(ret);
1704             nb_rets = 1;
1705         }
1706 #else
1707         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1708 #ifdef HOST_WORDS_BIGENDIAN
1709             op->args[pi++] = temp_arg(ret + 1);
1710             op->args[pi++] = temp_arg(ret);
1711 #else
1712             op->args[pi++] = temp_arg(ret);
1713             op->args[pi++] = temp_arg(ret + 1);
1714 #endif
1715             nb_rets = 2;
1716         } else {
1717             op->args[pi++] = temp_arg(ret);
1718             nb_rets = 1;
1719         }
1720 #endif
1721     } else {
1722         nb_rets = 0;
1723     }
1724     TCGOP_CALLO(op) = nb_rets;
1725 
1726     real_args = 0;
1727     for (i = 0; i < nargs; i++) {
1728         int is_64bit = sizemask & (1 << (i+1)*2);
1729         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1730 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1731             /* some targets want aligned 64 bit args */
1732             if (real_args & 1) {
1733                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1734                 real_args++;
1735             }
1736 #endif
1737            /* If stack grows up, then we will be placing successive
1738               arguments at lower addresses, which means we need to
1739               reverse the order compared to how we would normally
1740               treat either big or little-endian.  For those arguments
1741               that will wind up in registers, this still works for
1742               HPPA (the only current STACK_GROWSUP target) since the
1743               argument registers are *also* allocated in decreasing
1744               order.  If another such target is added, this logic may
1745               have to get more complicated to differentiate between
1746               stack arguments and register arguments.  */
1747 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1748             op->args[pi++] = temp_arg(args[i] + 1);
1749             op->args[pi++] = temp_arg(args[i]);
1750 #else
1751             op->args[pi++] = temp_arg(args[i]);
1752             op->args[pi++] = temp_arg(args[i] + 1);
1753 #endif
1754             real_args += 2;
1755             continue;
1756         }
1757 
1758         op->args[pi++] = temp_arg(args[i]);
1759         real_args++;
1760     }
1761     op->args[pi++] = (uintptr_t)func;
1762     op->args[pi++] = flags;
1763     TCGOP_CALLI(op) = real_args;
1764 
1765     /* Make sure the fields didn't overflow.  */
1766     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1767     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1768 
1769 #if defined(__sparc__) && !defined(__arch64__) \
1770     && !defined(CONFIG_TCG_INTERPRETER)
1771     /* Free all of the parts we allocated above.  */
1772     for (i = real_args = 0; i < orig_nargs; ++i) {
1773         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1774         if (is_64bit) {
1775             tcg_temp_free_internal(args[real_args++]);
1776             tcg_temp_free_internal(args[real_args++]);
1777         } else {
1778             real_args++;
1779         }
1780     }
1781     if (orig_sizemask & 1) {
1782         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1783            Note that describing these as TCGv_i64 eliminates an unnecessary
1784            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1785         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1786         tcg_temp_free_i64(retl);
1787         tcg_temp_free_i64(reth);
1788     }
1789 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1790     for (i = 0; i < nargs; ++i) {
1791         int is_64bit = sizemask & (1 << (i+1)*2);
1792         if (!is_64bit) {
1793             tcg_temp_free_internal(args[i]);
1794         }
1795     }
1796 #endif /* TCG_TARGET_EXTEND_ARGS */
1797 }
1798 
1799 static void tcg_reg_alloc_start(TCGContext *s)
1800 {
1801     int i, n;
1802     TCGTemp *ts;
1803 
1804     for (i = 0, n = s->nb_globals; i < n; i++) {
1805         ts = &s->temps[i];
1806         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1807     }
1808     for (n = s->nb_temps; i < n; i++) {
1809         ts = &s->temps[i];
1810         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1811         ts->mem_allocated = 0;
1812         ts->fixed_reg = 0;
1813     }
1814 
1815     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1816 }
1817 
1818 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1819                                  TCGTemp *ts)
1820 {
1821     int idx = temp_idx(ts);
1822 
1823     if (ts->temp_global) {
1824         pstrcpy(buf, buf_size, ts->name);
1825     } else if (ts->temp_local) {
1826         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1827     } else {
1828         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1829     }
1830     return buf;
1831 }
1832 
1833 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1834                              int buf_size, TCGArg arg)
1835 {
1836     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1837 }
1838 
1839 /* Find helper name.  */
1840 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1841 {
1842     const char *ret = NULL;
1843     if (helper_table) {
1844         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1845         if (info) {
1846             ret = info->name;
1847         }
1848     }
1849     return ret;
1850 }
1851 
1852 static const char * const cond_name[] =
1853 {
1854     [TCG_COND_NEVER] = "never",
1855     [TCG_COND_ALWAYS] = "always",
1856     [TCG_COND_EQ] = "eq",
1857     [TCG_COND_NE] = "ne",
1858     [TCG_COND_LT] = "lt",
1859     [TCG_COND_GE] = "ge",
1860     [TCG_COND_LE] = "le",
1861     [TCG_COND_GT] = "gt",
1862     [TCG_COND_LTU] = "ltu",
1863     [TCG_COND_GEU] = "geu",
1864     [TCG_COND_LEU] = "leu",
1865     [TCG_COND_GTU] = "gtu"
1866 };
1867 
1868 static const char * const ldst_name[] =
1869 {
1870     [MO_UB]   = "ub",
1871     [MO_SB]   = "sb",
1872     [MO_LEUW] = "leuw",
1873     [MO_LESW] = "lesw",
1874     [MO_LEUL] = "leul",
1875     [MO_LESL] = "lesl",
1876     [MO_LEQ]  = "leq",
1877     [MO_BEUW] = "beuw",
1878     [MO_BESW] = "besw",
1879     [MO_BEUL] = "beul",
1880     [MO_BESL] = "besl",
1881     [MO_BEQ]  = "beq",
1882 };
1883 
1884 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1885 #ifdef ALIGNED_ONLY
1886     [MO_UNALN >> MO_ASHIFT]    = "un+",
1887     [MO_ALIGN >> MO_ASHIFT]    = "",
1888 #else
1889     [MO_UNALN >> MO_ASHIFT]    = "",
1890     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1891 #endif
1892     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1893     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1894     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1895     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1896     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1897     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1898 };
1899 
1900 static inline bool tcg_regset_single(TCGRegSet d)
1901 {
1902     return (d & (d - 1)) == 0;
1903 }
1904 
1905 static inline TCGReg tcg_regset_first(TCGRegSet d)
1906 {
1907     if (TCG_TARGET_NB_REGS <= 32) {
1908         return ctz32(d);
1909     } else {
1910         return ctz64(d);
1911     }
1912 }
1913 
1914 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1915 {
1916     char buf[128];
1917     TCGOp *op;
1918 
1919     QTAILQ_FOREACH(op, &s->ops, link) {
1920         int i, k, nb_oargs, nb_iargs, nb_cargs;
1921         const TCGOpDef *def;
1922         TCGOpcode c;
1923         int col = 0;
1924 
1925         c = op->opc;
1926         def = &tcg_op_defs[c];
1927 
1928         if (c == INDEX_op_insn_start) {
1929             nb_oargs = 0;
1930             col += qemu_log("\n ----");
1931 
1932             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1933                 target_ulong a;
1934 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1935                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1936 #else
1937                 a = op->args[i];
1938 #endif
1939                 col += qemu_log(" " TARGET_FMT_lx, a);
1940             }
1941         } else if (c == INDEX_op_call) {
1942             /* variable number of arguments */
1943             nb_oargs = TCGOP_CALLO(op);
1944             nb_iargs = TCGOP_CALLI(op);
1945             nb_cargs = def->nb_cargs;
1946 
1947             /* function name, flags, out args */
1948             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1949                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1950                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1951             for (i = 0; i < nb_oargs; i++) {
1952                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1953                                                        op->args[i]));
1954             }
1955             for (i = 0; i < nb_iargs; i++) {
1956                 TCGArg arg = op->args[nb_oargs + i];
1957                 const char *t = "<dummy>";
1958                 if (arg != TCG_CALL_DUMMY_ARG) {
1959                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1960                 }
1961                 col += qemu_log(",%s", t);
1962             }
1963         } else {
1964             col += qemu_log(" %s ", def->name);
1965 
1966             nb_oargs = def->nb_oargs;
1967             nb_iargs = def->nb_iargs;
1968             nb_cargs = def->nb_cargs;
1969 
1970             if (def->flags & TCG_OPF_VECTOR) {
1971                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1972                                 8 << TCGOP_VECE(op));
1973             }
1974 
1975             k = 0;
1976             for (i = 0; i < nb_oargs; i++) {
1977                 if (k != 0) {
1978                     col += qemu_log(",");
1979                 }
1980                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1981                                                       op->args[k++]));
1982             }
1983             for (i = 0; i < nb_iargs; i++) {
1984                 if (k != 0) {
1985                     col += qemu_log(",");
1986                 }
1987                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1988                                                       op->args[k++]));
1989             }
1990             switch (c) {
1991             case INDEX_op_brcond_i32:
1992             case INDEX_op_setcond_i32:
1993             case INDEX_op_movcond_i32:
1994             case INDEX_op_brcond2_i32:
1995             case INDEX_op_setcond2_i32:
1996             case INDEX_op_brcond_i64:
1997             case INDEX_op_setcond_i64:
1998             case INDEX_op_movcond_i64:
1999             case INDEX_op_cmp_vec:
2000                 if (op->args[k] < ARRAY_SIZE(cond_name)
2001                     && cond_name[op->args[k]]) {
2002                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2003                 } else {
2004                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2005                 }
2006                 i = 1;
2007                 break;
2008             case INDEX_op_qemu_ld_i32:
2009             case INDEX_op_qemu_st_i32:
2010             case INDEX_op_qemu_ld_i64:
2011             case INDEX_op_qemu_st_i64:
2012                 {
2013                     TCGMemOpIdx oi = op->args[k++];
2014                     TCGMemOp op = get_memop(oi);
2015                     unsigned ix = get_mmuidx(oi);
2016 
2017                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2018                         col += qemu_log(",$0x%x,%u", op, ix);
2019                     } else {
2020                         const char *s_al, *s_op;
2021                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2022                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2023                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2024                     }
2025                     i = 1;
2026                 }
2027                 break;
2028             default:
2029                 i = 0;
2030                 break;
2031             }
2032             switch (c) {
2033             case INDEX_op_set_label:
2034             case INDEX_op_br:
2035             case INDEX_op_brcond_i32:
2036             case INDEX_op_brcond_i64:
2037             case INDEX_op_brcond2_i32:
2038                 col += qemu_log("%s$L%d", k ? "," : "",
2039                                 arg_label(op->args[k])->id);
2040                 i++, k++;
2041                 break;
2042             default:
2043                 break;
2044             }
2045             for (; i < nb_cargs; i++, k++) {
2046                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2047             }
2048         }
2049 
2050         if (have_prefs || op->life) {
2051             for (; col < 40; ++col) {
2052                 putc(' ', qemu_logfile);
2053             }
2054         }
2055 
2056         if (op->life) {
2057             unsigned life = op->life;
2058 
2059             if (life & (SYNC_ARG * 3)) {
2060                 qemu_log("  sync:");
2061                 for (i = 0; i < 2; ++i) {
2062                     if (life & (SYNC_ARG << i)) {
2063                         qemu_log(" %d", i);
2064                     }
2065                 }
2066             }
2067             life /= DEAD_ARG;
2068             if (life) {
2069                 qemu_log("  dead:");
2070                 for (i = 0; life; ++i, life >>= 1) {
2071                     if (life & 1) {
2072                         qemu_log(" %d", i);
2073                     }
2074                 }
2075             }
2076         }
2077 
2078         if (have_prefs) {
2079             for (i = 0; i < nb_oargs; ++i) {
2080                 TCGRegSet set = op->output_pref[i];
2081 
2082                 if (i == 0) {
2083                     qemu_log("  pref=");
2084                 } else {
2085                     qemu_log(",");
2086                 }
2087                 if (set == 0) {
2088                     qemu_log("none");
2089                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2090                     qemu_log("all");
2091 #ifdef CONFIG_DEBUG_TCG
2092                 } else if (tcg_regset_single(set)) {
2093                     TCGReg reg = tcg_regset_first(set);
2094                     qemu_log("%s", tcg_target_reg_names[reg]);
2095 #endif
2096                 } else if (TCG_TARGET_NB_REGS <= 32) {
2097                     qemu_log("%#x", (uint32_t)set);
2098                 } else {
2099                     qemu_log("%#" PRIx64, (uint64_t)set);
2100                 }
2101             }
2102         }
2103 
2104         qemu_log("\n");
2105     }
2106 }
2107 
2108 /* we give more priority to constraints with less registers */
2109 static int get_constraint_priority(const TCGOpDef *def, int k)
2110 {
2111     const TCGArgConstraint *arg_ct;
2112 
2113     int i, n;
2114     arg_ct = &def->args_ct[k];
2115     if (arg_ct->ct & TCG_CT_ALIAS) {
2116         /* an alias is equivalent to a single register */
2117         n = 1;
2118     } else {
2119         if (!(arg_ct->ct & TCG_CT_REG))
2120             return 0;
2121         n = 0;
2122         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2123             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2124                 n++;
2125         }
2126     }
2127     return TCG_TARGET_NB_REGS - n + 1;
2128 }
2129 
2130 /* sort from highest priority to lowest */
2131 static void sort_constraints(TCGOpDef *def, int start, int n)
2132 {
2133     int i, j, p1, p2, tmp;
2134 
2135     for(i = 0; i < n; i++)
2136         def->sorted_args[start + i] = start + i;
2137     if (n <= 1)
2138         return;
2139     for(i = 0; i < n - 1; i++) {
2140         for(j = i + 1; j < n; j++) {
2141             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2142             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2143             if (p1 < p2) {
2144                 tmp = def->sorted_args[start + i];
2145                 def->sorted_args[start + i] = def->sorted_args[start + j];
2146                 def->sorted_args[start + j] = tmp;
2147             }
2148         }
2149     }
2150 }
2151 
2152 static void process_op_defs(TCGContext *s)
2153 {
2154     TCGOpcode op;
2155 
2156     for (op = 0; op < NB_OPS; op++) {
2157         TCGOpDef *def = &tcg_op_defs[op];
2158         const TCGTargetOpDef *tdefs;
2159         TCGType type;
2160         int i, nb_args;
2161 
2162         if (def->flags & TCG_OPF_NOT_PRESENT) {
2163             continue;
2164         }
2165 
2166         nb_args = def->nb_iargs + def->nb_oargs;
2167         if (nb_args == 0) {
2168             continue;
2169         }
2170 
2171         tdefs = tcg_target_op_def(op);
2172         /* Missing TCGTargetOpDef entry. */
2173         tcg_debug_assert(tdefs != NULL);
2174 
2175         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2176         for (i = 0; i < nb_args; i++) {
2177             const char *ct_str = tdefs->args_ct_str[i];
2178             /* Incomplete TCGTargetOpDef entry. */
2179             tcg_debug_assert(ct_str != NULL);
2180 
2181             def->args_ct[i].u.regs = 0;
2182             def->args_ct[i].ct = 0;
2183             while (*ct_str != '\0') {
2184                 switch(*ct_str) {
2185                 case '0' ... '9':
2186                     {
2187                         int oarg = *ct_str - '0';
2188                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2189                         tcg_debug_assert(oarg < def->nb_oargs);
2190                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2191                         /* TCG_CT_ALIAS is for the output arguments.
2192                            The input is tagged with TCG_CT_IALIAS. */
2193                         def->args_ct[i] = def->args_ct[oarg];
2194                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2195                         def->args_ct[oarg].alias_index = i;
2196                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2197                         def->args_ct[i].alias_index = oarg;
2198                     }
2199                     ct_str++;
2200                     break;
2201                 case '&':
2202                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2203                     ct_str++;
2204                     break;
2205                 case 'i':
2206                     def->args_ct[i].ct |= TCG_CT_CONST;
2207                     ct_str++;
2208                     break;
2209                 default:
2210                     ct_str = target_parse_constraint(&def->args_ct[i],
2211                                                      ct_str, type);
2212                     /* Typo in TCGTargetOpDef constraint. */
2213                     tcg_debug_assert(ct_str != NULL);
2214                 }
2215             }
2216         }
2217 
2218         /* TCGTargetOpDef entry with too much information? */
2219         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2220 
2221         /* sort the constraints (XXX: this is just an heuristic) */
2222         sort_constraints(def, 0, def->nb_oargs);
2223         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2224     }
2225 }
2226 
2227 void tcg_op_remove(TCGContext *s, TCGOp *op)
2228 {
2229     TCGLabel *label;
2230 
2231     switch (op->opc) {
2232     case INDEX_op_br:
2233         label = arg_label(op->args[0]);
2234         label->refs--;
2235         break;
2236     case INDEX_op_brcond_i32:
2237     case INDEX_op_brcond_i64:
2238         label = arg_label(op->args[3]);
2239         label->refs--;
2240         break;
2241     case INDEX_op_brcond2_i32:
2242         label = arg_label(op->args[5]);
2243         label->refs--;
2244         break;
2245     default:
2246         break;
2247     }
2248 
2249     QTAILQ_REMOVE(&s->ops, op, link);
2250     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2251     s->nb_ops--;
2252 
2253 #ifdef CONFIG_PROFILER
2254     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2255 #endif
2256 }
2257 
2258 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2259 {
2260     TCGContext *s = tcg_ctx;
2261     TCGOp *op;
2262 
2263     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2264         op = tcg_malloc(sizeof(TCGOp));
2265     } else {
2266         op = QTAILQ_FIRST(&s->free_ops);
2267         QTAILQ_REMOVE(&s->free_ops, op, link);
2268     }
2269     memset(op, 0, offsetof(TCGOp, link));
2270     op->opc = opc;
2271     s->nb_ops++;
2272 
2273     return op;
2274 }
2275 
2276 TCGOp *tcg_emit_op(TCGOpcode opc)
2277 {
2278     TCGOp *op = tcg_op_alloc(opc);
2279     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2280     return op;
2281 }
2282 
2283 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2284 {
2285     TCGOp *new_op = tcg_op_alloc(opc);
2286     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2287     return new_op;
2288 }
2289 
2290 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2291 {
2292     TCGOp *new_op = tcg_op_alloc(opc);
2293     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2294     return new_op;
2295 }
2296 
2297 /* Reachable analysis : remove unreachable code.  */
2298 static void reachable_code_pass(TCGContext *s)
2299 {
2300     TCGOp *op, *op_next;
2301     bool dead = false;
2302 
2303     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2304         bool remove = dead;
2305         TCGLabel *label;
2306         int call_flags;
2307 
2308         switch (op->opc) {
2309         case INDEX_op_set_label:
2310             label = arg_label(op->args[0]);
2311             if (label->refs == 0) {
2312                 /*
2313                  * While there is an occasional backward branch, virtually
2314                  * all branches generated by the translators are forward.
2315                  * Which means that generally we will have already removed
2316                  * all references to the label that will be, and there is
2317                  * little to be gained by iterating.
2318                  */
2319                 remove = true;
2320             } else {
2321                 /* Once we see a label, insns become live again.  */
2322                 dead = false;
2323                 remove = false;
2324 
2325                 /*
2326                  * Optimization can fold conditional branches to unconditional.
2327                  * If we find a label with one reference which is preceded by
2328                  * an unconditional branch to it, remove both.  This needed to
2329                  * wait until the dead code in between them was removed.
2330                  */
2331                 if (label->refs == 1) {
2332                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2333                     if (op_prev->opc == INDEX_op_br &&
2334                         label == arg_label(op_prev->args[0])) {
2335                         tcg_op_remove(s, op_prev);
2336                         remove = true;
2337                     }
2338                 }
2339             }
2340             break;
2341 
2342         case INDEX_op_br:
2343         case INDEX_op_exit_tb:
2344         case INDEX_op_goto_ptr:
2345             /* Unconditional branches; everything following is dead.  */
2346             dead = true;
2347             break;
2348 
2349         case INDEX_op_call:
2350             /* Notice noreturn helper calls, raising exceptions.  */
2351             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2352             if (call_flags & TCG_CALL_NO_RETURN) {
2353                 dead = true;
2354             }
2355             break;
2356 
2357         case INDEX_op_insn_start:
2358             /* Never remove -- we need to keep these for unwind.  */
2359             remove = false;
2360             break;
2361 
2362         default:
2363             break;
2364         }
2365 
2366         if (remove) {
2367             tcg_op_remove(s, op);
2368         }
2369     }
2370 }
2371 
2372 #define TS_DEAD  1
2373 #define TS_MEM   2
2374 
2375 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2376 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2377 
2378 /* For liveness_pass_1, the register preferences for a given temp.  */
2379 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2380 {
2381     return ts->state_ptr;
2382 }
2383 
2384 /* For liveness_pass_1, reset the preferences for a given temp to the
2385  * maximal regset for its type.
2386  */
2387 static inline void la_reset_pref(TCGTemp *ts)
2388 {
2389     *la_temp_pref(ts)
2390         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2391 }
2392 
2393 /* liveness analysis: end of function: all temps are dead, and globals
2394    should be in memory. */
2395 static void la_func_end(TCGContext *s, int ng, int nt)
2396 {
2397     int i;
2398 
2399     for (i = 0; i < ng; ++i) {
2400         s->temps[i].state = TS_DEAD | TS_MEM;
2401         la_reset_pref(&s->temps[i]);
2402     }
2403     for (i = ng; i < nt; ++i) {
2404         s->temps[i].state = TS_DEAD;
2405         la_reset_pref(&s->temps[i]);
2406     }
2407 }
2408 
2409 /* liveness analysis: end of basic block: all temps are dead, globals
2410    and local temps should be in memory. */
2411 static void la_bb_end(TCGContext *s, int ng, int nt)
2412 {
2413     int i;
2414 
2415     for (i = 0; i < ng; ++i) {
2416         s->temps[i].state = TS_DEAD | TS_MEM;
2417         la_reset_pref(&s->temps[i]);
2418     }
2419     for (i = ng; i < nt; ++i) {
2420         s->temps[i].state = (s->temps[i].temp_local
2421                              ? TS_DEAD | TS_MEM
2422                              : TS_DEAD);
2423         la_reset_pref(&s->temps[i]);
2424     }
2425 }
2426 
2427 /* liveness analysis: sync globals back to memory.  */
2428 static void la_global_sync(TCGContext *s, int ng)
2429 {
2430     int i;
2431 
2432     for (i = 0; i < ng; ++i) {
2433         int state = s->temps[i].state;
2434         s->temps[i].state = state | TS_MEM;
2435         if (state == TS_DEAD) {
2436             /* If the global was previously dead, reset prefs.  */
2437             la_reset_pref(&s->temps[i]);
2438         }
2439     }
2440 }
2441 
2442 /* liveness analysis: sync globals back to memory and kill.  */
2443 static void la_global_kill(TCGContext *s, int ng)
2444 {
2445     int i;
2446 
2447     for (i = 0; i < ng; i++) {
2448         s->temps[i].state = TS_DEAD | TS_MEM;
2449         la_reset_pref(&s->temps[i]);
2450     }
2451 }
2452 
2453 /* liveness analysis: note live globals crossing calls.  */
2454 static void la_cross_call(TCGContext *s, int nt)
2455 {
2456     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2457     int i;
2458 
2459     for (i = 0; i < nt; i++) {
2460         TCGTemp *ts = &s->temps[i];
2461         if (!(ts->state & TS_DEAD)) {
2462             TCGRegSet *pset = la_temp_pref(ts);
2463             TCGRegSet set = *pset;
2464 
2465             set &= mask;
2466             /* If the combination is not possible, restart.  */
2467             if (set == 0) {
2468                 set = tcg_target_available_regs[ts->type] & mask;
2469             }
2470             *pset = set;
2471         }
2472     }
2473 }
2474 
2475 /* Liveness analysis : update the opc_arg_life array to tell if a
2476    given input arguments is dead. Instructions updating dead
2477    temporaries are removed. */
2478 static void liveness_pass_1(TCGContext *s)
2479 {
2480     int nb_globals = s->nb_globals;
2481     int nb_temps = s->nb_temps;
2482     TCGOp *op, *op_prev;
2483     TCGRegSet *prefs;
2484     int i;
2485 
2486     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2487     for (i = 0; i < nb_temps; ++i) {
2488         s->temps[i].state_ptr = prefs + i;
2489     }
2490 
2491     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2492     la_func_end(s, nb_globals, nb_temps);
2493 
2494     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2495         int nb_iargs, nb_oargs;
2496         TCGOpcode opc_new, opc_new2;
2497         bool have_opc_new2;
2498         TCGLifeData arg_life = 0;
2499         TCGTemp *ts;
2500         TCGOpcode opc = op->opc;
2501         const TCGOpDef *def = &tcg_op_defs[opc];
2502 
2503         switch (opc) {
2504         case INDEX_op_call:
2505             {
2506                 int call_flags;
2507                 int nb_call_regs;
2508 
2509                 nb_oargs = TCGOP_CALLO(op);
2510                 nb_iargs = TCGOP_CALLI(op);
2511                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2512 
2513                 /* pure functions can be removed if their result is unused */
2514                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2515                     for (i = 0; i < nb_oargs; i++) {
2516                         ts = arg_temp(op->args[i]);
2517                         if (ts->state != TS_DEAD) {
2518                             goto do_not_remove_call;
2519                         }
2520                     }
2521                     goto do_remove;
2522                 }
2523             do_not_remove_call:
2524 
2525                 /* Output args are dead.  */
2526                 for (i = 0; i < nb_oargs; i++) {
2527                     ts = arg_temp(op->args[i]);
2528                     if (ts->state & TS_DEAD) {
2529                         arg_life |= DEAD_ARG << i;
2530                     }
2531                     if (ts->state & TS_MEM) {
2532                         arg_life |= SYNC_ARG << i;
2533                     }
2534                     ts->state = TS_DEAD;
2535                     la_reset_pref(ts);
2536 
2537                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2538                     op->output_pref[i] = 0;
2539                 }
2540 
2541                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2542                                     TCG_CALL_NO_READ_GLOBALS))) {
2543                     la_global_kill(s, nb_globals);
2544                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2545                     la_global_sync(s, nb_globals);
2546                 }
2547 
2548                 /* Record arguments that die in this helper.  */
2549                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2550                     ts = arg_temp(op->args[i]);
2551                     if (ts && ts->state & TS_DEAD) {
2552                         arg_life |= DEAD_ARG << i;
2553                     }
2554                 }
2555 
2556                 /* For all live registers, remove call-clobbered prefs.  */
2557                 la_cross_call(s, nb_temps);
2558 
2559                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2560 
2561                 /* Input arguments are live for preceding opcodes.  */
2562                 for (i = 0; i < nb_iargs; i++) {
2563                     ts = arg_temp(op->args[i + nb_oargs]);
2564                     if (ts && ts->state & TS_DEAD) {
2565                         /* For those arguments that die, and will be allocated
2566                          * in registers, clear the register set for that arg,
2567                          * to be filled in below.  For args that will be on
2568                          * the stack, reset to any available reg.
2569                          */
2570                         *la_temp_pref(ts)
2571                             = (i < nb_call_regs ? 0 :
2572                                tcg_target_available_regs[ts->type]);
2573                         ts->state &= ~TS_DEAD;
2574                     }
2575                 }
2576 
2577                 /* For each input argument, add its input register to prefs.
2578                    If a temp is used once, this produces a single set bit.  */
2579                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2580                     ts = arg_temp(op->args[i + nb_oargs]);
2581                     if (ts) {
2582                         tcg_regset_set_reg(*la_temp_pref(ts),
2583                                            tcg_target_call_iarg_regs[i]);
2584                     }
2585                 }
2586             }
2587             break;
2588         case INDEX_op_insn_start:
2589             break;
2590         case INDEX_op_discard:
2591             /* mark the temporary as dead */
2592             ts = arg_temp(op->args[0]);
2593             ts->state = TS_DEAD;
2594             la_reset_pref(ts);
2595             break;
2596 
2597         case INDEX_op_add2_i32:
2598             opc_new = INDEX_op_add_i32;
2599             goto do_addsub2;
2600         case INDEX_op_sub2_i32:
2601             opc_new = INDEX_op_sub_i32;
2602             goto do_addsub2;
2603         case INDEX_op_add2_i64:
2604             opc_new = INDEX_op_add_i64;
2605             goto do_addsub2;
2606         case INDEX_op_sub2_i64:
2607             opc_new = INDEX_op_sub_i64;
2608         do_addsub2:
2609             nb_iargs = 4;
2610             nb_oargs = 2;
2611             /* Test if the high part of the operation is dead, but not
2612                the low part.  The result can be optimized to a simple
2613                add or sub.  This happens often for x86_64 guest when the
2614                cpu mode is set to 32 bit.  */
2615             if (arg_temp(op->args[1])->state == TS_DEAD) {
2616                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2617                     goto do_remove;
2618                 }
2619                 /* Replace the opcode and adjust the args in place,
2620                    leaving 3 unused args at the end.  */
2621                 op->opc = opc = opc_new;
2622                 op->args[1] = op->args[2];
2623                 op->args[2] = op->args[4];
2624                 /* Fall through and mark the single-word operation live.  */
2625                 nb_iargs = 2;
2626                 nb_oargs = 1;
2627             }
2628             goto do_not_remove;
2629 
2630         case INDEX_op_mulu2_i32:
2631             opc_new = INDEX_op_mul_i32;
2632             opc_new2 = INDEX_op_muluh_i32;
2633             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2634             goto do_mul2;
2635         case INDEX_op_muls2_i32:
2636             opc_new = INDEX_op_mul_i32;
2637             opc_new2 = INDEX_op_mulsh_i32;
2638             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2639             goto do_mul2;
2640         case INDEX_op_mulu2_i64:
2641             opc_new = INDEX_op_mul_i64;
2642             opc_new2 = INDEX_op_muluh_i64;
2643             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2644             goto do_mul2;
2645         case INDEX_op_muls2_i64:
2646             opc_new = INDEX_op_mul_i64;
2647             opc_new2 = INDEX_op_mulsh_i64;
2648             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2649             goto do_mul2;
2650         do_mul2:
2651             nb_iargs = 2;
2652             nb_oargs = 2;
2653             if (arg_temp(op->args[1])->state == TS_DEAD) {
2654                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2655                     /* Both parts of the operation are dead.  */
2656                     goto do_remove;
2657                 }
2658                 /* The high part of the operation is dead; generate the low. */
2659                 op->opc = opc = opc_new;
2660                 op->args[1] = op->args[2];
2661                 op->args[2] = op->args[3];
2662             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2663                 /* The low part of the operation is dead; generate the high. */
2664                 op->opc = opc = opc_new2;
2665                 op->args[0] = op->args[1];
2666                 op->args[1] = op->args[2];
2667                 op->args[2] = op->args[3];
2668             } else {
2669                 goto do_not_remove;
2670             }
2671             /* Mark the single-word operation live.  */
2672             nb_oargs = 1;
2673             goto do_not_remove;
2674 
2675         default:
2676             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2677             nb_iargs = def->nb_iargs;
2678             nb_oargs = def->nb_oargs;
2679 
2680             /* Test if the operation can be removed because all
2681                its outputs are dead. We assume that nb_oargs == 0
2682                implies side effects */
2683             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2684                 for (i = 0; i < nb_oargs; i++) {
2685                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2686                         goto do_not_remove;
2687                     }
2688                 }
2689                 goto do_remove;
2690             }
2691             goto do_not_remove;
2692 
2693         do_remove:
2694             tcg_op_remove(s, op);
2695             break;
2696 
2697         do_not_remove:
2698             for (i = 0; i < nb_oargs; i++) {
2699                 ts = arg_temp(op->args[i]);
2700 
2701                 /* Remember the preference of the uses that followed.  */
2702                 op->output_pref[i] = *la_temp_pref(ts);
2703 
2704                 /* Output args are dead.  */
2705                 if (ts->state & TS_DEAD) {
2706                     arg_life |= DEAD_ARG << i;
2707                 }
2708                 if (ts->state & TS_MEM) {
2709                     arg_life |= SYNC_ARG << i;
2710                 }
2711                 ts->state = TS_DEAD;
2712                 la_reset_pref(ts);
2713             }
2714 
2715             /* If end of basic block, update.  */
2716             if (def->flags & TCG_OPF_BB_EXIT) {
2717                 la_func_end(s, nb_globals, nb_temps);
2718             } else if (def->flags & TCG_OPF_BB_END) {
2719                 la_bb_end(s, nb_globals, nb_temps);
2720             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2721                 la_global_sync(s, nb_globals);
2722                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2723                     la_cross_call(s, nb_temps);
2724                 }
2725             }
2726 
2727             /* Record arguments that die in this opcode.  */
2728             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2729                 ts = arg_temp(op->args[i]);
2730                 if (ts->state & TS_DEAD) {
2731                     arg_life |= DEAD_ARG << i;
2732                 }
2733             }
2734 
2735             /* Input arguments are live for preceding opcodes.  */
2736             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2737                 ts = arg_temp(op->args[i]);
2738                 if (ts->state & TS_DEAD) {
2739                     /* For operands that were dead, initially allow
2740                        all regs for the type.  */
2741                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2742                     ts->state &= ~TS_DEAD;
2743                 }
2744             }
2745 
2746             /* Incorporate constraints for this operand.  */
2747             switch (opc) {
2748             case INDEX_op_mov_i32:
2749             case INDEX_op_mov_i64:
2750                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2751                    have proper constraints.  That said, special case
2752                    moves to propagate preferences backward.  */
2753                 if (IS_DEAD_ARG(1)) {
2754                     *la_temp_pref(arg_temp(op->args[0]))
2755                         = *la_temp_pref(arg_temp(op->args[1]));
2756                 }
2757                 break;
2758 
2759             default:
2760                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2761                     const TCGArgConstraint *ct = &def->args_ct[i];
2762                     TCGRegSet set, *pset;
2763 
2764                     ts = arg_temp(op->args[i]);
2765                     pset = la_temp_pref(ts);
2766                     set = *pset;
2767 
2768                     set &= ct->u.regs;
2769                     if (ct->ct & TCG_CT_IALIAS) {
2770                         set &= op->output_pref[ct->alias_index];
2771                     }
2772                     /* If the combination is not possible, restart.  */
2773                     if (set == 0) {
2774                         set = ct->u.regs;
2775                     }
2776                     *pset = set;
2777                 }
2778                 break;
2779             }
2780             break;
2781         }
2782         op->life = arg_life;
2783     }
2784 }
2785 
2786 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2787 static bool liveness_pass_2(TCGContext *s)
2788 {
2789     int nb_globals = s->nb_globals;
2790     int nb_temps, i;
2791     bool changes = false;
2792     TCGOp *op, *op_next;
2793 
2794     /* Create a temporary for each indirect global.  */
2795     for (i = 0; i < nb_globals; ++i) {
2796         TCGTemp *its = &s->temps[i];
2797         if (its->indirect_reg) {
2798             TCGTemp *dts = tcg_temp_alloc(s);
2799             dts->type = its->type;
2800             dts->base_type = its->base_type;
2801             its->state_ptr = dts;
2802         } else {
2803             its->state_ptr = NULL;
2804         }
2805         /* All globals begin dead.  */
2806         its->state = TS_DEAD;
2807     }
2808     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2809         TCGTemp *its = &s->temps[i];
2810         its->state_ptr = NULL;
2811         its->state = TS_DEAD;
2812     }
2813 
2814     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2815         TCGOpcode opc = op->opc;
2816         const TCGOpDef *def = &tcg_op_defs[opc];
2817         TCGLifeData arg_life = op->life;
2818         int nb_iargs, nb_oargs, call_flags;
2819         TCGTemp *arg_ts, *dir_ts;
2820 
2821         if (opc == INDEX_op_call) {
2822             nb_oargs = TCGOP_CALLO(op);
2823             nb_iargs = TCGOP_CALLI(op);
2824             call_flags = op->args[nb_oargs + nb_iargs + 1];
2825         } else {
2826             nb_iargs = def->nb_iargs;
2827             nb_oargs = def->nb_oargs;
2828 
2829             /* Set flags similar to how calls require.  */
2830             if (def->flags & TCG_OPF_BB_END) {
2831                 /* Like writing globals: save_globals */
2832                 call_flags = 0;
2833             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2834                 /* Like reading globals: sync_globals */
2835                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2836             } else {
2837                 /* No effect on globals.  */
2838                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2839                               TCG_CALL_NO_WRITE_GLOBALS);
2840             }
2841         }
2842 
2843         /* Make sure that input arguments are available.  */
2844         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2845             arg_ts = arg_temp(op->args[i]);
2846             if (arg_ts) {
2847                 dir_ts = arg_ts->state_ptr;
2848                 if (dir_ts && arg_ts->state == TS_DEAD) {
2849                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2850                                       ? INDEX_op_ld_i32
2851                                       : INDEX_op_ld_i64);
2852                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2853 
2854                     lop->args[0] = temp_arg(dir_ts);
2855                     lop->args[1] = temp_arg(arg_ts->mem_base);
2856                     lop->args[2] = arg_ts->mem_offset;
2857 
2858                     /* Loaded, but synced with memory.  */
2859                     arg_ts->state = TS_MEM;
2860                 }
2861             }
2862         }
2863 
2864         /* Perform input replacement, and mark inputs that became dead.
2865            No action is required except keeping temp_state up to date
2866            so that we reload when needed.  */
2867         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2868             arg_ts = arg_temp(op->args[i]);
2869             if (arg_ts) {
2870                 dir_ts = arg_ts->state_ptr;
2871                 if (dir_ts) {
2872                     op->args[i] = temp_arg(dir_ts);
2873                     changes = true;
2874                     if (IS_DEAD_ARG(i)) {
2875                         arg_ts->state = TS_DEAD;
2876                     }
2877                 }
2878             }
2879         }
2880 
2881         /* Liveness analysis should ensure that the following are
2882            all correct, for call sites and basic block end points.  */
2883         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2884             /* Nothing to do */
2885         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2886             for (i = 0; i < nb_globals; ++i) {
2887                 /* Liveness should see that globals are synced back,
2888                    that is, either TS_DEAD or TS_MEM.  */
2889                 arg_ts = &s->temps[i];
2890                 tcg_debug_assert(arg_ts->state_ptr == 0
2891                                  || arg_ts->state != 0);
2892             }
2893         } else {
2894             for (i = 0; i < nb_globals; ++i) {
2895                 /* Liveness should see that globals are saved back,
2896                    that is, TS_DEAD, waiting to be reloaded.  */
2897                 arg_ts = &s->temps[i];
2898                 tcg_debug_assert(arg_ts->state_ptr == 0
2899                                  || arg_ts->state == TS_DEAD);
2900             }
2901         }
2902 
2903         /* Outputs become available.  */
2904         for (i = 0; i < nb_oargs; i++) {
2905             arg_ts = arg_temp(op->args[i]);
2906             dir_ts = arg_ts->state_ptr;
2907             if (!dir_ts) {
2908                 continue;
2909             }
2910             op->args[i] = temp_arg(dir_ts);
2911             changes = true;
2912 
2913             /* The output is now live and modified.  */
2914             arg_ts->state = 0;
2915 
2916             /* Sync outputs upon their last write.  */
2917             if (NEED_SYNC_ARG(i)) {
2918                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2919                                   ? INDEX_op_st_i32
2920                                   : INDEX_op_st_i64);
2921                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2922 
2923                 sop->args[0] = temp_arg(dir_ts);
2924                 sop->args[1] = temp_arg(arg_ts->mem_base);
2925                 sop->args[2] = arg_ts->mem_offset;
2926 
2927                 arg_ts->state = TS_MEM;
2928             }
2929             /* Drop outputs that are dead.  */
2930             if (IS_DEAD_ARG(i)) {
2931                 arg_ts->state = TS_DEAD;
2932             }
2933         }
2934     }
2935 
2936     return changes;
2937 }
2938 
2939 #ifdef CONFIG_DEBUG_TCG
2940 static void dump_regs(TCGContext *s)
2941 {
2942     TCGTemp *ts;
2943     int i;
2944     char buf[64];
2945 
2946     for(i = 0; i < s->nb_temps; i++) {
2947         ts = &s->temps[i];
2948         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2949         switch(ts->val_type) {
2950         case TEMP_VAL_REG:
2951             printf("%s", tcg_target_reg_names[ts->reg]);
2952             break;
2953         case TEMP_VAL_MEM:
2954             printf("%d(%s)", (int)ts->mem_offset,
2955                    tcg_target_reg_names[ts->mem_base->reg]);
2956             break;
2957         case TEMP_VAL_CONST:
2958             printf("$0x%" TCG_PRIlx, ts->val);
2959             break;
2960         case TEMP_VAL_DEAD:
2961             printf("D");
2962             break;
2963         default:
2964             printf("???");
2965             break;
2966         }
2967         printf("\n");
2968     }
2969 
2970     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2971         if (s->reg_to_temp[i] != NULL) {
2972             printf("%s: %s\n",
2973                    tcg_target_reg_names[i],
2974                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2975         }
2976     }
2977 }
2978 
2979 static void check_regs(TCGContext *s)
2980 {
2981     int reg;
2982     int k;
2983     TCGTemp *ts;
2984     char buf[64];
2985 
2986     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2987         ts = s->reg_to_temp[reg];
2988         if (ts != NULL) {
2989             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2990                 printf("Inconsistency for register %s:\n",
2991                        tcg_target_reg_names[reg]);
2992                 goto fail;
2993             }
2994         }
2995     }
2996     for (k = 0; k < s->nb_temps; k++) {
2997         ts = &s->temps[k];
2998         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
2999             && s->reg_to_temp[ts->reg] != ts) {
3000             printf("Inconsistency for temp %s:\n",
3001                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3002         fail:
3003             printf("reg state:\n");
3004             dump_regs(s);
3005             tcg_abort();
3006         }
3007     }
3008 }
3009 #endif
3010 
3011 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3012 {
3013 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3014     /* Sparc64 stack is accessed with offset of 2047 */
3015     s->current_frame_offset = (s->current_frame_offset +
3016                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3017         ~(sizeof(tcg_target_long) - 1);
3018 #endif
3019     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3020         s->frame_end) {
3021         tcg_abort();
3022     }
3023     ts->mem_offset = s->current_frame_offset;
3024     ts->mem_base = s->frame_temp;
3025     ts->mem_allocated = 1;
3026     s->current_frame_offset += sizeof(tcg_target_long);
3027 }
3028 
3029 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3030 
3031 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3032    mark it free; otherwise mark it dead.  */
3033 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3034 {
3035     if (ts->fixed_reg) {
3036         return;
3037     }
3038     if (ts->val_type == TEMP_VAL_REG) {
3039         s->reg_to_temp[ts->reg] = NULL;
3040     }
3041     ts->val_type = (free_or_dead < 0
3042                     || ts->temp_local
3043                     || ts->temp_global
3044                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3045 }
3046 
3047 /* Mark a temporary as dead.  */
3048 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3049 {
3050     temp_free_or_dead(s, ts, 1);
3051 }
3052 
3053 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3054    registers needs to be allocated to store a constant.  If 'free_or_dead'
3055    is non-zero, subsequently release the temporary; if it is positive, the
3056    temp is dead; if it is negative, the temp is free.  */
3057 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3058                       TCGRegSet preferred_regs, int free_or_dead)
3059 {
3060     if (ts->fixed_reg) {
3061         return;
3062     }
3063     if (!ts->mem_coherent) {
3064         if (!ts->mem_allocated) {
3065             temp_allocate_frame(s, ts);
3066         }
3067         switch (ts->val_type) {
3068         case TEMP_VAL_CONST:
3069             /* If we're going to free the temp immediately, then we won't
3070                require it later in a register, so attempt to store the
3071                constant to memory directly.  */
3072             if (free_or_dead
3073                 && tcg_out_sti(s, ts->type, ts->val,
3074                                ts->mem_base->reg, ts->mem_offset)) {
3075                 break;
3076             }
3077             temp_load(s, ts, tcg_target_available_regs[ts->type],
3078                       allocated_regs, preferred_regs);
3079             /* fallthrough */
3080 
3081         case TEMP_VAL_REG:
3082             tcg_out_st(s, ts->type, ts->reg,
3083                        ts->mem_base->reg, ts->mem_offset);
3084             break;
3085 
3086         case TEMP_VAL_MEM:
3087             break;
3088 
3089         case TEMP_VAL_DEAD:
3090         default:
3091             tcg_abort();
3092         }
3093         ts->mem_coherent = 1;
3094     }
3095     if (free_or_dead) {
3096         temp_free_or_dead(s, ts, free_or_dead);
3097     }
3098 }
3099 
3100 /* free register 'reg' by spilling the corresponding temporary if necessary */
3101 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3102 {
3103     TCGTemp *ts = s->reg_to_temp[reg];
3104     if (ts != NULL) {
3105         temp_sync(s, ts, allocated_regs, 0, -1);
3106     }
3107 }
3108 
3109 /**
3110  * tcg_reg_alloc:
3111  * @required_regs: Set of registers in which we must allocate.
3112  * @allocated_regs: Set of registers which must be avoided.
3113  * @preferred_regs: Set of registers we should prefer.
3114  * @rev: True if we search the registers in "indirect" order.
3115  *
3116  * The allocated register must be in @required_regs & ~@allocated_regs,
3117  * but if we can put it in @preferred_regs we may save a move later.
3118  */
3119 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3120                             TCGRegSet allocated_regs,
3121                             TCGRegSet preferred_regs, bool rev)
3122 {
3123     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3124     TCGRegSet reg_ct[2];
3125     const int *order;
3126 
3127     reg_ct[1] = required_regs & ~allocated_regs;
3128     tcg_debug_assert(reg_ct[1] != 0);
3129     reg_ct[0] = reg_ct[1] & preferred_regs;
3130 
3131     /* Skip the preferred_regs option if it cannot be satisfied,
3132        or if the preference made no difference.  */
3133     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3134 
3135     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3136 
3137     /* Try free registers, preferences first.  */
3138     for (j = f; j < 2; j++) {
3139         TCGRegSet set = reg_ct[j];
3140 
3141         if (tcg_regset_single(set)) {
3142             /* One register in the set.  */
3143             TCGReg reg = tcg_regset_first(set);
3144             if (s->reg_to_temp[reg] == NULL) {
3145                 return reg;
3146             }
3147         } else {
3148             for (i = 0; i < n; i++) {
3149                 TCGReg reg = order[i];
3150                 if (s->reg_to_temp[reg] == NULL &&
3151                     tcg_regset_test_reg(set, reg)) {
3152                     return reg;
3153                 }
3154             }
3155         }
3156     }
3157 
3158     /* We must spill something.  */
3159     for (j = f; j < 2; j++) {
3160         TCGRegSet set = reg_ct[j];
3161 
3162         if (tcg_regset_single(set)) {
3163             /* One register in the set.  */
3164             TCGReg reg = tcg_regset_first(set);
3165             tcg_reg_free(s, reg, allocated_regs);
3166             return reg;
3167         } else {
3168             for (i = 0; i < n; i++) {
3169                 TCGReg reg = order[i];
3170                 if (tcg_regset_test_reg(set, reg)) {
3171                     tcg_reg_free(s, reg, allocated_regs);
3172                     return reg;
3173                 }
3174             }
3175         }
3176     }
3177 
3178     tcg_abort();
3179 }
3180 
3181 /* Make sure the temporary is in a register.  If needed, allocate the register
3182    from DESIRED while avoiding ALLOCATED.  */
3183 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3184                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3185 {
3186     TCGReg reg;
3187 
3188     switch (ts->val_type) {
3189     case TEMP_VAL_REG:
3190         return;
3191     case TEMP_VAL_CONST:
3192         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3193                             preferred_regs, ts->indirect_base);
3194         tcg_out_movi(s, ts->type, reg, ts->val);
3195         ts->mem_coherent = 0;
3196         break;
3197     case TEMP_VAL_MEM:
3198         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3199                             preferred_regs, ts->indirect_base);
3200         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3201         ts->mem_coherent = 1;
3202         break;
3203     case TEMP_VAL_DEAD:
3204     default:
3205         tcg_abort();
3206     }
3207     ts->reg = reg;
3208     ts->val_type = TEMP_VAL_REG;
3209     s->reg_to_temp[reg] = ts;
3210 }
3211 
3212 /* Save a temporary to memory. 'allocated_regs' is used in case a
3213    temporary registers needs to be allocated to store a constant.  */
3214 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3215 {
3216     /* The liveness analysis already ensures that globals are back
3217        in memory. Keep an tcg_debug_assert for safety. */
3218     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3219 }
3220 
3221 /* save globals to their canonical location and assume they can be
3222    modified be the following code. 'allocated_regs' is used in case a
3223    temporary registers needs to be allocated to store a constant. */
3224 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3225 {
3226     int i, n;
3227 
3228     for (i = 0, n = s->nb_globals; i < n; i++) {
3229         temp_save(s, &s->temps[i], allocated_regs);
3230     }
3231 }
3232 
3233 /* sync globals to their canonical location and assume they can be
3234    read by the following code. 'allocated_regs' is used in case a
3235    temporary registers needs to be allocated to store a constant. */
3236 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3237 {
3238     int i, n;
3239 
3240     for (i = 0, n = s->nb_globals; i < n; i++) {
3241         TCGTemp *ts = &s->temps[i];
3242         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3243                          || ts->fixed_reg
3244                          || ts->mem_coherent);
3245     }
3246 }
3247 
3248 /* at the end of a basic block, we assume all temporaries are dead and
3249    all globals are stored at their canonical location. */
3250 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3251 {
3252     int i;
3253 
3254     for (i = s->nb_globals; i < s->nb_temps; i++) {
3255         TCGTemp *ts = &s->temps[i];
3256         if (ts->temp_local) {
3257             temp_save(s, ts, allocated_regs);
3258         } else {
3259             /* The liveness analysis already ensures that temps are dead.
3260                Keep an tcg_debug_assert for safety. */
3261             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3262         }
3263     }
3264 
3265     save_globals(s, allocated_regs);
3266 }
3267 
3268 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3269                                   tcg_target_ulong val, TCGLifeData arg_life,
3270                                   TCGRegSet preferred_regs)
3271 {
3272     if (ots->fixed_reg) {
3273         /* For fixed registers, we do not do any constant propagation.  */
3274         tcg_out_movi(s, ots->type, ots->reg, val);
3275         return;
3276     }
3277 
3278     /* The movi is not explicitly generated here.  */
3279     if (ots->val_type == TEMP_VAL_REG) {
3280         s->reg_to_temp[ots->reg] = NULL;
3281     }
3282     ots->val_type = TEMP_VAL_CONST;
3283     ots->val = val;
3284     ots->mem_coherent = 0;
3285     if (NEED_SYNC_ARG(0)) {
3286         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3287     } else if (IS_DEAD_ARG(0)) {
3288         temp_dead(s, ots);
3289     }
3290 }
3291 
3292 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3293 {
3294     TCGTemp *ots = arg_temp(op->args[0]);
3295     tcg_target_ulong val = op->args[1];
3296 
3297     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3298 }
3299 
3300 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3301 {
3302     const TCGLifeData arg_life = op->life;
3303     TCGRegSet allocated_regs, preferred_regs;
3304     TCGTemp *ts, *ots;
3305     TCGType otype, itype;
3306 
3307     allocated_regs = s->reserved_regs;
3308     preferred_regs = op->output_pref[0];
3309     ots = arg_temp(op->args[0]);
3310     ts = arg_temp(op->args[1]);
3311 
3312     /* Note that otype != itype for no-op truncation.  */
3313     otype = ots->type;
3314     itype = ts->type;
3315 
3316     if (ts->val_type == TEMP_VAL_CONST) {
3317         /* propagate constant or generate sti */
3318         tcg_target_ulong val = ts->val;
3319         if (IS_DEAD_ARG(1)) {
3320             temp_dead(s, ts);
3321         }
3322         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3323         return;
3324     }
3325 
3326     /* If the source value is in memory we're going to be forced
3327        to have it in a register in order to perform the copy.  Copy
3328        the SOURCE value into its own register first, that way we
3329        don't have to reload SOURCE the next time it is used. */
3330     if (ts->val_type == TEMP_VAL_MEM) {
3331         temp_load(s, ts, tcg_target_available_regs[itype],
3332                   allocated_regs, preferred_regs);
3333     }
3334 
3335     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3336     if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
3337         /* mov to a non-saved dead register makes no sense (even with
3338            liveness analysis disabled). */
3339         tcg_debug_assert(NEED_SYNC_ARG(0));
3340         if (!ots->mem_allocated) {
3341             temp_allocate_frame(s, ots);
3342         }
3343         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3344         if (IS_DEAD_ARG(1)) {
3345             temp_dead(s, ts);
3346         }
3347         temp_dead(s, ots);
3348     } else {
3349         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
3350             /* the mov can be suppressed */
3351             if (ots->val_type == TEMP_VAL_REG) {
3352                 s->reg_to_temp[ots->reg] = NULL;
3353             }
3354             ots->reg = ts->reg;
3355             temp_dead(s, ts);
3356         } else {
3357             if (ots->val_type != TEMP_VAL_REG) {
3358                 /* When allocating a new register, make sure to not spill the
3359                    input one. */
3360                 tcg_regset_set_reg(allocated_regs, ts->reg);
3361                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3362                                          allocated_regs, preferred_regs,
3363                                          ots->indirect_base);
3364             }
3365             tcg_out_mov(s, otype, ots->reg, ts->reg);
3366         }
3367         ots->val_type = TEMP_VAL_REG;
3368         ots->mem_coherent = 0;
3369         s->reg_to_temp[ots->reg] = ots;
3370         if (NEED_SYNC_ARG(0)) {
3371             temp_sync(s, ots, allocated_regs, 0, 0);
3372         }
3373     }
3374 }
3375 
3376 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3377 {
3378     const TCGLifeData arg_life = op->life;
3379     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3380     TCGRegSet i_allocated_regs;
3381     TCGRegSet o_allocated_regs;
3382     int i, k, nb_iargs, nb_oargs;
3383     TCGReg reg;
3384     TCGArg arg;
3385     const TCGArgConstraint *arg_ct;
3386     TCGTemp *ts;
3387     TCGArg new_args[TCG_MAX_OP_ARGS];
3388     int const_args[TCG_MAX_OP_ARGS];
3389 
3390     nb_oargs = def->nb_oargs;
3391     nb_iargs = def->nb_iargs;
3392 
3393     /* copy constants */
3394     memcpy(new_args + nb_oargs + nb_iargs,
3395            op->args + nb_oargs + nb_iargs,
3396            sizeof(TCGArg) * def->nb_cargs);
3397 
3398     i_allocated_regs = s->reserved_regs;
3399     o_allocated_regs = s->reserved_regs;
3400 
3401     /* satisfy input constraints */
3402     for (k = 0; k < nb_iargs; k++) {
3403         TCGRegSet i_preferred_regs, o_preferred_regs;
3404 
3405         i = def->sorted_args[nb_oargs + k];
3406         arg = op->args[i];
3407         arg_ct = &def->args_ct[i];
3408         ts = arg_temp(arg);
3409 
3410         if (ts->val_type == TEMP_VAL_CONST
3411             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3412             /* constant is OK for instruction */
3413             const_args[i] = 1;
3414             new_args[i] = ts->val;
3415             continue;
3416         }
3417 
3418         i_preferred_regs = o_preferred_regs = 0;
3419         if (arg_ct->ct & TCG_CT_IALIAS) {
3420             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3421             if (ts->fixed_reg) {
3422                 /* if fixed register, we must allocate a new register
3423                    if the alias is not the same register */
3424                 if (arg != op->args[arg_ct->alias_index]) {
3425                     goto allocate_in_reg;
3426                 }
3427             } else {
3428                 /* if the input is aliased to an output and if it is
3429                    not dead after the instruction, we must allocate
3430                    a new register and move it */
3431                 if (!IS_DEAD_ARG(i)) {
3432                     goto allocate_in_reg;
3433                 }
3434 
3435                 /* check if the current register has already been allocated
3436                    for another input aliased to an output */
3437                 if (ts->val_type == TEMP_VAL_REG) {
3438                     int k2, i2;
3439                     reg = ts->reg;
3440                     for (k2 = 0 ; k2 < k ; k2++) {
3441                         i2 = def->sorted_args[nb_oargs + k2];
3442                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3443                             reg == new_args[i2]) {
3444                             goto allocate_in_reg;
3445                         }
3446                     }
3447                 }
3448                 i_preferred_regs = o_preferred_regs;
3449             }
3450         }
3451 
3452         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3453         reg = ts->reg;
3454 
3455         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3456             /* nothing to do : the constraint is satisfied */
3457         } else {
3458         allocate_in_reg:
3459             /* allocate a new register matching the constraint
3460                and move the temporary register into it */
3461             temp_load(s, ts, tcg_target_available_regs[ts->type],
3462                       i_allocated_regs, 0);
3463             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3464                                 o_preferred_regs, ts->indirect_base);
3465             tcg_out_mov(s, ts->type, reg, ts->reg);
3466         }
3467         new_args[i] = reg;
3468         const_args[i] = 0;
3469         tcg_regset_set_reg(i_allocated_regs, reg);
3470     }
3471 
3472     /* mark dead temporaries and free the associated registers */
3473     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3474         if (IS_DEAD_ARG(i)) {
3475             temp_dead(s, arg_temp(op->args[i]));
3476         }
3477     }
3478 
3479     if (def->flags & TCG_OPF_BB_END) {
3480         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3481     } else {
3482         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3483             /* XXX: permit generic clobber register list ? */
3484             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3485                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3486                     tcg_reg_free(s, i, i_allocated_regs);
3487                 }
3488             }
3489         }
3490         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3491             /* sync globals if the op has side effects and might trigger
3492                an exception. */
3493             sync_globals(s, i_allocated_regs);
3494         }
3495 
3496         /* satisfy the output constraints */
3497         for(k = 0; k < nb_oargs; k++) {
3498             i = def->sorted_args[k];
3499             arg = op->args[i];
3500             arg_ct = &def->args_ct[i];
3501             ts = arg_temp(arg);
3502             if ((arg_ct->ct & TCG_CT_ALIAS)
3503                 && !const_args[arg_ct->alias_index]) {
3504                 reg = new_args[arg_ct->alias_index];
3505             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3506                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3507                                     i_allocated_regs | o_allocated_regs,
3508                                     op->output_pref[k], ts->indirect_base);
3509             } else {
3510                 /* if fixed register, we try to use it */
3511                 reg = ts->reg;
3512                 if (ts->fixed_reg &&
3513                     tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3514                     goto oarg_end;
3515                 }
3516                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3517                                     op->output_pref[k], ts->indirect_base);
3518             }
3519             tcg_regset_set_reg(o_allocated_regs, reg);
3520             /* if a fixed register is used, then a move will be done afterwards */
3521             if (!ts->fixed_reg) {
3522                 if (ts->val_type == TEMP_VAL_REG) {
3523                     s->reg_to_temp[ts->reg] = NULL;
3524                 }
3525                 ts->val_type = TEMP_VAL_REG;
3526                 ts->reg = reg;
3527                 /* temp value is modified, so the value kept in memory is
3528                    potentially not the same */
3529                 ts->mem_coherent = 0;
3530                 s->reg_to_temp[reg] = ts;
3531             }
3532         oarg_end:
3533             new_args[i] = reg;
3534         }
3535     }
3536 
3537     /* emit instruction */
3538     if (def->flags & TCG_OPF_VECTOR) {
3539         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3540                        new_args, const_args);
3541     } else {
3542         tcg_out_op(s, op->opc, new_args, const_args);
3543     }
3544 
3545     /* move the outputs in the correct register if needed */
3546     for(i = 0; i < nb_oargs; i++) {
3547         ts = arg_temp(op->args[i]);
3548         reg = new_args[i];
3549         if (ts->fixed_reg && ts->reg != reg) {
3550             tcg_out_mov(s, ts->type, ts->reg, reg);
3551         }
3552         if (NEED_SYNC_ARG(i)) {
3553             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3554         } else if (IS_DEAD_ARG(i)) {
3555             temp_dead(s, ts);
3556         }
3557     }
3558 }
3559 
3560 #ifdef TCG_TARGET_STACK_GROWSUP
3561 #define STACK_DIR(x) (-(x))
3562 #else
3563 #define STACK_DIR(x) (x)
3564 #endif
3565 
3566 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3567 {
3568     const int nb_oargs = TCGOP_CALLO(op);
3569     const int nb_iargs = TCGOP_CALLI(op);
3570     const TCGLifeData arg_life = op->life;
3571     int flags, nb_regs, i;
3572     TCGReg reg;
3573     TCGArg arg;
3574     TCGTemp *ts;
3575     intptr_t stack_offset;
3576     size_t call_stack_size;
3577     tcg_insn_unit *func_addr;
3578     int allocate_args;
3579     TCGRegSet allocated_regs;
3580 
3581     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3582     flags = op->args[nb_oargs + nb_iargs + 1];
3583 
3584     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3585     if (nb_regs > nb_iargs) {
3586         nb_regs = nb_iargs;
3587     }
3588 
3589     /* assign stack slots first */
3590     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3591     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3592         ~(TCG_TARGET_STACK_ALIGN - 1);
3593     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3594     if (allocate_args) {
3595         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3596            preallocate call stack */
3597         tcg_abort();
3598     }
3599 
3600     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3601     for (i = nb_regs; i < nb_iargs; i++) {
3602         arg = op->args[nb_oargs + i];
3603 #ifdef TCG_TARGET_STACK_GROWSUP
3604         stack_offset -= sizeof(tcg_target_long);
3605 #endif
3606         if (arg != TCG_CALL_DUMMY_ARG) {
3607             ts = arg_temp(arg);
3608             temp_load(s, ts, tcg_target_available_regs[ts->type],
3609                       s->reserved_regs, 0);
3610             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3611         }
3612 #ifndef TCG_TARGET_STACK_GROWSUP
3613         stack_offset += sizeof(tcg_target_long);
3614 #endif
3615     }
3616 
3617     /* assign input registers */
3618     allocated_regs = s->reserved_regs;
3619     for (i = 0; i < nb_regs; i++) {
3620         arg = op->args[nb_oargs + i];
3621         if (arg != TCG_CALL_DUMMY_ARG) {
3622             ts = arg_temp(arg);
3623             reg = tcg_target_call_iarg_regs[i];
3624 
3625             if (ts->val_type == TEMP_VAL_REG) {
3626                 if (ts->reg != reg) {
3627                     tcg_reg_free(s, reg, allocated_regs);
3628                     tcg_out_mov(s, ts->type, reg, ts->reg);
3629                 }
3630             } else {
3631                 TCGRegSet arg_set = 0;
3632 
3633                 tcg_reg_free(s, reg, allocated_regs);
3634                 tcg_regset_set_reg(arg_set, reg);
3635                 temp_load(s, ts, arg_set, allocated_regs, 0);
3636             }
3637 
3638             tcg_regset_set_reg(allocated_regs, reg);
3639         }
3640     }
3641 
3642     /* mark dead temporaries and free the associated registers */
3643     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3644         if (IS_DEAD_ARG(i)) {
3645             temp_dead(s, arg_temp(op->args[i]));
3646         }
3647     }
3648 
3649     /* clobber call registers */
3650     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3651         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3652             tcg_reg_free(s, i, allocated_regs);
3653         }
3654     }
3655 
3656     /* Save globals if they might be written by the helper, sync them if
3657        they might be read. */
3658     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3659         /* Nothing to do */
3660     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3661         sync_globals(s, allocated_regs);
3662     } else {
3663         save_globals(s, allocated_regs);
3664     }
3665 
3666     tcg_out_call(s, func_addr);
3667 
3668     /* assign output registers and emit moves if needed */
3669     for(i = 0; i < nb_oargs; i++) {
3670         arg = op->args[i];
3671         ts = arg_temp(arg);
3672         reg = tcg_target_call_oarg_regs[i];
3673         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3674 
3675         if (ts->fixed_reg) {
3676             if (ts->reg != reg) {
3677                 tcg_out_mov(s, ts->type, ts->reg, reg);
3678             }
3679         } else {
3680             if (ts->val_type == TEMP_VAL_REG) {
3681                 s->reg_to_temp[ts->reg] = NULL;
3682             }
3683             ts->val_type = TEMP_VAL_REG;
3684             ts->reg = reg;
3685             ts->mem_coherent = 0;
3686             s->reg_to_temp[reg] = ts;
3687             if (NEED_SYNC_ARG(i)) {
3688                 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3689             } else if (IS_DEAD_ARG(i)) {
3690                 temp_dead(s, ts);
3691             }
3692         }
3693     }
3694 }
3695 
3696 #ifdef CONFIG_PROFILER
3697 
3698 /* avoid copy/paste errors */
3699 #define PROF_ADD(to, from, field)                       \
3700     do {                                                \
3701         (to)->field += atomic_read(&((from)->field));   \
3702     } while (0)
3703 
3704 #define PROF_MAX(to, from, field)                                       \
3705     do {                                                                \
3706         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3707         if (val__ > (to)->field) {                                      \
3708             (to)->field = val__;                                        \
3709         }                                                               \
3710     } while (0)
3711 
3712 /* Pass in a zero'ed @prof */
3713 static inline
3714 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3715 {
3716     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3717     unsigned int i;
3718 
3719     for (i = 0; i < n_ctxs; i++) {
3720         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3721         const TCGProfile *orig = &s->prof;
3722 
3723         if (counters) {
3724             PROF_ADD(prof, orig, cpu_exec_time);
3725             PROF_ADD(prof, orig, tb_count1);
3726             PROF_ADD(prof, orig, tb_count);
3727             PROF_ADD(prof, orig, op_count);
3728             PROF_MAX(prof, orig, op_count_max);
3729             PROF_ADD(prof, orig, temp_count);
3730             PROF_MAX(prof, orig, temp_count_max);
3731             PROF_ADD(prof, orig, del_op_count);
3732             PROF_ADD(prof, orig, code_in_len);
3733             PROF_ADD(prof, orig, code_out_len);
3734             PROF_ADD(prof, orig, search_out_len);
3735             PROF_ADD(prof, orig, interm_time);
3736             PROF_ADD(prof, orig, code_time);
3737             PROF_ADD(prof, orig, la_time);
3738             PROF_ADD(prof, orig, opt_time);
3739             PROF_ADD(prof, orig, restore_count);
3740             PROF_ADD(prof, orig, restore_time);
3741         }
3742         if (table) {
3743             int i;
3744 
3745             for (i = 0; i < NB_OPS; i++) {
3746                 PROF_ADD(prof, orig, table_op_count[i]);
3747             }
3748         }
3749     }
3750 }
3751 
3752 #undef PROF_ADD
3753 #undef PROF_MAX
3754 
3755 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3756 {
3757     tcg_profile_snapshot(prof, true, false);
3758 }
3759 
3760 static void tcg_profile_snapshot_table(TCGProfile *prof)
3761 {
3762     tcg_profile_snapshot(prof, false, true);
3763 }
3764 
3765 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3766 {
3767     TCGProfile prof = {};
3768     int i;
3769 
3770     tcg_profile_snapshot_table(&prof);
3771     for (i = 0; i < NB_OPS; i++) {
3772         cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3773                     prof.table_op_count[i]);
3774     }
3775 }
3776 
3777 int64_t tcg_cpu_exec_time(void)
3778 {
3779     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3780     unsigned int i;
3781     int64_t ret = 0;
3782 
3783     for (i = 0; i < n_ctxs; i++) {
3784         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3785         const TCGProfile *prof = &s->prof;
3786 
3787         ret += atomic_read(&prof->cpu_exec_time);
3788     }
3789     return ret;
3790 }
3791 #else
3792 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3793 {
3794     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3795 }
3796 
3797 int64_t tcg_cpu_exec_time(void)
3798 {
3799     error_report("%s: TCG profiler not compiled", __func__);
3800     exit(EXIT_FAILURE);
3801 }
3802 #endif
3803 
3804 
3805 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3806 {
3807 #ifdef CONFIG_PROFILER
3808     TCGProfile *prof = &s->prof;
3809 #endif
3810     int i, num_insns;
3811     TCGOp *op;
3812 
3813 #ifdef CONFIG_PROFILER
3814     {
3815         int n = 0;
3816 
3817         QTAILQ_FOREACH(op, &s->ops, link) {
3818             n++;
3819         }
3820         atomic_set(&prof->op_count, prof->op_count + n);
3821         if (n > prof->op_count_max) {
3822             atomic_set(&prof->op_count_max, n);
3823         }
3824 
3825         n = s->nb_temps;
3826         atomic_set(&prof->temp_count, prof->temp_count + n);
3827         if (n > prof->temp_count_max) {
3828             atomic_set(&prof->temp_count_max, n);
3829         }
3830     }
3831 #endif
3832 
3833 #ifdef DEBUG_DISAS
3834     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3835                  && qemu_log_in_addr_range(tb->pc))) {
3836         qemu_log_lock();
3837         qemu_log("OP:\n");
3838         tcg_dump_ops(s, false);
3839         qemu_log("\n");
3840         qemu_log_unlock();
3841     }
3842 #endif
3843 
3844 #ifdef CONFIG_PROFILER
3845     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3846 #endif
3847 
3848 #ifdef USE_TCG_OPTIMIZATIONS
3849     tcg_optimize(s);
3850 #endif
3851 
3852 #ifdef CONFIG_PROFILER
3853     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3854     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3855 #endif
3856 
3857     reachable_code_pass(s);
3858     liveness_pass_1(s);
3859 
3860     if (s->nb_indirects > 0) {
3861 #ifdef DEBUG_DISAS
3862         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3863                      && qemu_log_in_addr_range(tb->pc))) {
3864             qemu_log_lock();
3865             qemu_log("OP before indirect lowering:\n");
3866             tcg_dump_ops(s, false);
3867             qemu_log("\n");
3868             qemu_log_unlock();
3869         }
3870 #endif
3871         /* Replace indirect temps with direct temps.  */
3872         if (liveness_pass_2(s)) {
3873             /* If changes were made, re-run liveness.  */
3874             liveness_pass_1(s);
3875         }
3876     }
3877 
3878 #ifdef CONFIG_PROFILER
3879     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3880 #endif
3881 
3882 #ifdef DEBUG_DISAS
3883     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3884                  && qemu_log_in_addr_range(tb->pc))) {
3885         qemu_log_lock();
3886         qemu_log("OP after optimization and liveness analysis:\n");
3887         tcg_dump_ops(s, true);
3888         qemu_log("\n");
3889         qemu_log_unlock();
3890     }
3891 #endif
3892 
3893     tcg_reg_alloc_start(s);
3894 
3895     s->code_buf = tb->tc.ptr;
3896     s->code_ptr = tb->tc.ptr;
3897 
3898 #ifdef TCG_TARGET_NEED_LDST_LABELS
3899     QSIMPLEQ_INIT(&s->ldst_labels);
3900 #endif
3901 #ifdef TCG_TARGET_NEED_POOL_LABELS
3902     s->pool_labels = NULL;
3903 #endif
3904 
3905     num_insns = -1;
3906     QTAILQ_FOREACH(op, &s->ops, link) {
3907         TCGOpcode opc = op->opc;
3908 
3909 #ifdef CONFIG_PROFILER
3910         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3911 #endif
3912 
3913         switch (opc) {
3914         case INDEX_op_mov_i32:
3915         case INDEX_op_mov_i64:
3916         case INDEX_op_mov_vec:
3917             tcg_reg_alloc_mov(s, op);
3918             break;
3919         case INDEX_op_movi_i32:
3920         case INDEX_op_movi_i64:
3921         case INDEX_op_dupi_vec:
3922             tcg_reg_alloc_movi(s, op);
3923             break;
3924         case INDEX_op_insn_start:
3925             if (num_insns >= 0) {
3926                 size_t off = tcg_current_code_size(s);
3927                 s->gen_insn_end_off[num_insns] = off;
3928                 /* Assert that we do not overflow our stored offset.  */
3929                 assert(s->gen_insn_end_off[num_insns] == off);
3930             }
3931             num_insns++;
3932             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3933                 target_ulong a;
3934 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3935                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3936 #else
3937                 a = op->args[i];
3938 #endif
3939                 s->gen_insn_data[num_insns][i] = a;
3940             }
3941             break;
3942         case INDEX_op_discard:
3943             temp_dead(s, arg_temp(op->args[0]));
3944             break;
3945         case INDEX_op_set_label:
3946             tcg_reg_alloc_bb_end(s, s->reserved_regs);
3947             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3948             break;
3949         case INDEX_op_call:
3950             tcg_reg_alloc_call(s, op);
3951             break;
3952         default:
3953             /* Sanity check that we've not introduced any unhandled opcodes. */
3954             tcg_debug_assert(tcg_op_supported(opc));
3955             /* Note: in order to speed up the code, it would be much
3956                faster to have specialized register allocator functions for
3957                some common argument patterns */
3958             tcg_reg_alloc_op(s, op);
3959             break;
3960         }
3961 #ifdef CONFIG_DEBUG_TCG
3962         check_regs(s);
3963 #endif
3964         /* Test for (pending) buffer overflow.  The assumption is that any
3965            one operation beginning below the high water mark cannot overrun
3966            the buffer completely.  Thus we can test for overflow after
3967            generating code without having to check during generation.  */
3968         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3969             return -1;
3970         }
3971     }
3972     tcg_debug_assert(num_insns >= 0);
3973     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3974 
3975     /* Generate TB finalization at the end of block */
3976 #ifdef TCG_TARGET_NEED_LDST_LABELS
3977     if (!tcg_out_ldst_finalize(s)) {
3978         return -1;
3979     }
3980 #endif
3981 #ifdef TCG_TARGET_NEED_POOL_LABELS
3982     if (!tcg_out_pool_finalize(s)) {
3983         return -1;
3984     }
3985 #endif
3986 
3987     /* flush instruction cache */
3988     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
3989 
3990     return tcg_current_code_size(s);
3991 }
3992 
3993 #ifdef CONFIG_PROFILER
3994 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3995 {
3996     TCGProfile prof = {};
3997     const TCGProfile *s;
3998     int64_t tb_count;
3999     int64_t tb_div_count;
4000     int64_t tot;
4001 
4002     tcg_profile_snapshot_counters(&prof);
4003     s = &prof;
4004     tb_count = s->tb_count;
4005     tb_div_count = tb_count ? tb_count : 1;
4006     tot = s->interm_time + s->code_time;
4007 
4008     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4009                 tot, tot / 2.4e9);
4010     cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
4011                 tb_count, s->tb_count1 - tb_count,
4012                 (double)(s->tb_count1 - s->tb_count)
4013                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4014     cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n",
4015                 (double)s->op_count / tb_div_count, s->op_count_max);
4016     cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
4017                 (double)s->del_op_count / tb_div_count);
4018     cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
4019                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4020     cpu_fprintf(f, "avg host code/TB    %0.1f\n",
4021                 (double)s->code_out_len / tb_div_count);
4022     cpu_fprintf(f, "avg search data/TB  %0.1f\n",
4023                 (double)s->search_out_len / tb_div_count);
4024 
4025     cpu_fprintf(f, "cycles/op           %0.1f\n",
4026                 s->op_count ? (double)tot / s->op_count : 0);
4027     cpu_fprintf(f, "cycles/in byte      %0.1f\n",
4028                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4029     cpu_fprintf(f, "cycles/out byte     %0.1f\n",
4030                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4031     cpu_fprintf(f, "cycles/search byte     %0.1f\n",
4032                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4033     if (tot == 0) {
4034         tot = 1;
4035     }
4036     cpu_fprintf(f, "  gen_interm time   %0.1f%%\n",
4037                 (double)s->interm_time / tot * 100.0);
4038     cpu_fprintf(f, "  gen_code time     %0.1f%%\n",
4039                 (double)s->code_time / tot * 100.0);
4040     cpu_fprintf(f, "optim./code time    %0.1f%%\n",
4041                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4042                 * 100.0);
4043     cpu_fprintf(f, "liveness/code time  %0.1f%%\n",
4044                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4045     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
4046                 s->restore_count);
4047     cpu_fprintf(f, "  avg cycles        %0.1f\n",
4048                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4049 }
4050 #else
4051 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
4052 {
4053     cpu_fprintf(f, "[TCG profiler not compiled]\n");
4054 }
4055 #endif
4056 
4057 #ifdef ELF_HOST_MACHINE
4058 /* In order to use this feature, the backend needs to do three things:
4059 
4060    (1) Define ELF_HOST_MACHINE to indicate both what value to
4061        put into the ELF image and to indicate support for the feature.
4062 
4063    (2) Define tcg_register_jit.  This should create a buffer containing
4064        the contents of a .debug_frame section that describes the post-
4065        prologue unwind info for the tcg machine.
4066 
4067    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4068 */
4069 
4070 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4071 typedef enum {
4072     JIT_NOACTION = 0,
4073     JIT_REGISTER_FN,
4074     JIT_UNREGISTER_FN
4075 } jit_actions_t;
4076 
4077 struct jit_code_entry {
4078     struct jit_code_entry *next_entry;
4079     struct jit_code_entry *prev_entry;
4080     const void *symfile_addr;
4081     uint64_t symfile_size;
4082 };
4083 
4084 struct jit_descriptor {
4085     uint32_t version;
4086     uint32_t action_flag;
4087     struct jit_code_entry *relevant_entry;
4088     struct jit_code_entry *first_entry;
4089 };
4090 
4091 void __jit_debug_register_code(void) __attribute__((noinline));
4092 void __jit_debug_register_code(void)
4093 {
4094     asm("");
4095 }
4096 
4097 /* Must statically initialize the version, because GDB may check
4098    the version before we can set it.  */
4099 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4100 
4101 /* End GDB interface.  */
4102 
4103 static int find_string(const char *strtab, const char *str)
4104 {
4105     const char *p = strtab + 1;
4106 
4107     while (1) {
4108         if (strcmp(p, str) == 0) {
4109             return p - strtab;
4110         }
4111         p += strlen(p) + 1;
4112     }
4113 }
4114 
4115 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4116                                  const void *debug_frame,
4117                                  size_t debug_frame_size)
4118 {
4119     struct __attribute__((packed)) DebugInfo {
4120         uint32_t  len;
4121         uint16_t  version;
4122         uint32_t  abbrev;
4123         uint8_t   ptr_size;
4124         uint8_t   cu_die;
4125         uint16_t  cu_lang;
4126         uintptr_t cu_low_pc;
4127         uintptr_t cu_high_pc;
4128         uint8_t   fn_die;
4129         char      fn_name[16];
4130         uintptr_t fn_low_pc;
4131         uintptr_t fn_high_pc;
4132         uint8_t   cu_eoc;
4133     };
4134 
4135     struct ElfImage {
4136         ElfW(Ehdr) ehdr;
4137         ElfW(Phdr) phdr;
4138         ElfW(Shdr) shdr[7];
4139         ElfW(Sym)  sym[2];
4140         struct DebugInfo di;
4141         uint8_t    da[24];
4142         char       str[80];
4143     };
4144 
4145     struct ElfImage *img;
4146 
4147     static const struct ElfImage img_template = {
4148         .ehdr = {
4149             .e_ident[EI_MAG0] = ELFMAG0,
4150             .e_ident[EI_MAG1] = ELFMAG1,
4151             .e_ident[EI_MAG2] = ELFMAG2,
4152             .e_ident[EI_MAG3] = ELFMAG3,
4153             .e_ident[EI_CLASS] = ELF_CLASS,
4154             .e_ident[EI_DATA] = ELF_DATA,
4155             .e_ident[EI_VERSION] = EV_CURRENT,
4156             .e_type = ET_EXEC,
4157             .e_machine = ELF_HOST_MACHINE,
4158             .e_version = EV_CURRENT,
4159             .e_phoff = offsetof(struct ElfImage, phdr),
4160             .e_shoff = offsetof(struct ElfImage, shdr),
4161             .e_ehsize = sizeof(ElfW(Shdr)),
4162             .e_phentsize = sizeof(ElfW(Phdr)),
4163             .e_phnum = 1,
4164             .e_shentsize = sizeof(ElfW(Shdr)),
4165             .e_shnum = ARRAY_SIZE(img->shdr),
4166             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4167 #ifdef ELF_HOST_FLAGS
4168             .e_flags = ELF_HOST_FLAGS,
4169 #endif
4170 #ifdef ELF_OSABI
4171             .e_ident[EI_OSABI] = ELF_OSABI,
4172 #endif
4173         },
4174         .phdr = {
4175             .p_type = PT_LOAD,
4176             .p_flags = PF_X,
4177         },
4178         .shdr = {
4179             [0] = { .sh_type = SHT_NULL },
4180             /* Trick: The contents of code_gen_buffer are not present in
4181                this fake ELF file; that got allocated elsewhere.  Therefore
4182                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4183                will not look for contents.  We can record any address.  */
4184             [1] = { /* .text */
4185                 .sh_type = SHT_NOBITS,
4186                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4187             },
4188             [2] = { /* .debug_info */
4189                 .sh_type = SHT_PROGBITS,
4190                 .sh_offset = offsetof(struct ElfImage, di),
4191                 .sh_size = sizeof(struct DebugInfo),
4192             },
4193             [3] = { /* .debug_abbrev */
4194                 .sh_type = SHT_PROGBITS,
4195                 .sh_offset = offsetof(struct ElfImage, da),
4196                 .sh_size = sizeof(img->da),
4197             },
4198             [4] = { /* .debug_frame */
4199                 .sh_type = SHT_PROGBITS,
4200                 .sh_offset = sizeof(struct ElfImage),
4201             },
4202             [5] = { /* .symtab */
4203                 .sh_type = SHT_SYMTAB,
4204                 .sh_offset = offsetof(struct ElfImage, sym),
4205                 .sh_size = sizeof(img->sym),
4206                 .sh_info = 1,
4207                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4208                 .sh_entsize = sizeof(ElfW(Sym)),
4209             },
4210             [6] = { /* .strtab */
4211                 .sh_type = SHT_STRTAB,
4212                 .sh_offset = offsetof(struct ElfImage, str),
4213                 .sh_size = sizeof(img->str),
4214             }
4215         },
4216         .sym = {
4217             [1] = { /* code_gen_buffer */
4218                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4219                 .st_shndx = 1,
4220             }
4221         },
4222         .di = {
4223             .len = sizeof(struct DebugInfo) - 4,
4224             .version = 2,
4225             .ptr_size = sizeof(void *),
4226             .cu_die = 1,
4227             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4228             .fn_die = 2,
4229             .fn_name = "code_gen_buffer"
4230         },
4231         .da = {
4232             1,          /* abbrev number (the cu) */
4233             0x11, 1,    /* DW_TAG_compile_unit, has children */
4234             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4235             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4236             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4237             0, 0,       /* end of abbrev */
4238             2,          /* abbrev number (the fn) */
4239             0x2e, 0,    /* DW_TAG_subprogram, no children */
4240             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4241             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4242             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4243             0, 0,       /* end of abbrev */
4244             0           /* no more abbrev */
4245         },
4246         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4247                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4248     };
4249 
4250     /* We only need a single jit entry; statically allocate it.  */
4251     static struct jit_code_entry one_entry;
4252 
4253     uintptr_t buf = (uintptr_t)buf_ptr;
4254     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4255     DebugFrameHeader *dfh;
4256 
4257     img = g_malloc(img_size);
4258     *img = img_template;
4259 
4260     img->phdr.p_vaddr = buf;
4261     img->phdr.p_paddr = buf;
4262     img->phdr.p_memsz = buf_size;
4263 
4264     img->shdr[1].sh_name = find_string(img->str, ".text");
4265     img->shdr[1].sh_addr = buf;
4266     img->shdr[1].sh_size = buf_size;
4267 
4268     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4269     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4270 
4271     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4272     img->shdr[4].sh_size = debug_frame_size;
4273 
4274     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4275     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4276 
4277     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4278     img->sym[1].st_value = buf;
4279     img->sym[1].st_size = buf_size;
4280 
4281     img->di.cu_low_pc = buf;
4282     img->di.cu_high_pc = buf + buf_size;
4283     img->di.fn_low_pc = buf;
4284     img->di.fn_high_pc = buf + buf_size;
4285 
4286     dfh = (DebugFrameHeader *)(img + 1);
4287     memcpy(dfh, debug_frame, debug_frame_size);
4288     dfh->fde.func_start = buf;
4289     dfh->fde.func_len = buf_size;
4290 
4291 #ifdef DEBUG_JIT
4292     /* Enable this block to be able to debug the ELF image file creation.
4293        One can use readelf, objdump, or other inspection utilities.  */
4294     {
4295         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4296         if (f) {
4297             if (fwrite(img, img_size, 1, f) != img_size) {
4298                 /* Avoid stupid unused return value warning for fwrite.  */
4299             }
4300             fclose(f);
4301         }
4302     }
4303 #endif
4304 
4305     one_entry.symfile_addr = img;
4306     one_entry.symfile_size = img_size;
4307 
4308     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4309     __jit_debug_descriptor.relevant_entry = &one_entry;
4310     __jit_debug_descriptor.first_entry = &one_entry;
4311     __jit_debug_register_code();
4312 }
4313 #else
4314 /* No support for the feature.  Provide the entry point expected by exec.c,
4315    and implement the internal function we declared earlier.  */
4316 
4317 static void tcg_register_jit_int(void *buf, size_t size,
4318                                  const void *debug_frame,
4319                                  size_t debug_frame_size)
4320 {
4321 }
4322 
4323 void tcg_register_jit(void *buf, size_t buf_size)
4324 {
4325 }
4326 #endif /* ELF_HOST_MACHINE */
4327 
4328 #if !TCG_TARGET_MAYBE_vec
4329 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4330 {
4331     g_assert_not_reached();
4332 }
4333 #endif
4334