xref: /openbmc/qemu/tcg/tcg.c (revision 240ee8bd)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
45 
46 #include "exec/exec-all.h"
47 
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
51 
52 #include "tcg/tcg-op.h"
53 
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS  ELFCLASS32
56 #else
57 # define ELF_CLASS  ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA   ELFDATA2MSB
61 #else
62 # define ELF_DATA   ELFDATA2LSB
63 #endif
64 
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
73 static void tcg_target_qemu_prologue(TCGContext *s);
74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
75                         intptr_t value, intptr_t addend);
76 
77 /* The CIE and FDE header definitions will be common to all hosts.  */
78 typedef struct {
79     uint32_t len __attribute__((aligned((sizeof(void *)))));
80     uint32_t id;
81     uint8_t version;
82     char augmentation[1];
83     uint8_t code_align;
84     uint8_t data_align;
85     uint8_t return_column;
86 } DebugFrameCIE;
87 
88 typedef struct QEMU_PACKED {
89     uint32_t len __attribute__((aligned((sizeof(void *)))));
90     uint32_t cie_offset;
91     uintptr_t func_start;
92     uintptr_t func_len;
93 } DebugFrameFDEHeader;
94 
95 typedef struct QEMU_PACKED {
96     DebugFrameCIE cie;
97     DebugFrameFDEHeader fde;
98 } DebugFrameHeader;
99 
100 static void tcg_register_jit_int(const void *buf, size_t size,
101                                  const void *debug_frame,
102                                  size_t debug_frame_size)
103     __attribute__((unused));
104 
105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
106 static const char *target_parse_constraint(TCGArgConstraint *ct,
107                                            const char *ct_str, TCGType type);
108 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
109                        intptr_t arg2);
110 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
111 static void tcg_out_movi(TCGContext *s, TCGType type,
112                          TCGReg ret, tcg_target_long arg);
113 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
114                        const int *const_args);
115 #if TCG_TARGET_MAYBE_vec
116 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
117                             TCGReg dst, TCGReg src);
118 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, TCGReg base, intptr_t offset);
120 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
121                              TCGReg dst, int64_t arg);
122 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
123                            unsigned vece, const TCGArg *args,
124                            const int *const_args);
125 #else
126 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
127                                    TCGReg dst, TCGReg src)
128 {
129     g_assert_not_reached();
130 }
131 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
132                                     TCGReg dst, TCGReg base, intptr_t offset)
133 {
134     g_assert_not_reached();
135 }
136 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
137                                     TCGReg dst, int64_t arg)
138 {
139     g_assert_not_reached();
140 }
141 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
142                                   unsigned vece, const TCGArg *args,
143                                   const int *const_args)
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
153 static int tcg_target_const_match(tcg_target_long val, TCGType type,
154                                   const TCGArgConstraint *arg_ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
158 
159 #define TCG_HIGHWATER 1024
160 
161 static TCGContext **tcg_ctxs;
162 static unsigned int n_tcg_ctxs;
163 TCGv_env cpu_env = 0;
164 const void *tcg_code_gen_epilogue;
165 uintptr_t tcg_splitwx_diff;
166 
167 #ifndef CONFIG_TCG_INTERPRETER
168 tcg_prologue_fn *tcg_qemu_tb_exec;
169 #endif
170 
171 struct tcg_region_tree {
172     QemuMutex lock;
173     GTree *tree;
174     /* padding to avoid false sharing is computed at run-time */
175 };
176 
177 /*
178  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
179  * dynamically allocate from as demand dictates. Given appropriate region
180  * sizing, this minimizes flushes even when some TCG threads generate a lot
181  * more code than others.
182  */
183 struct tcg_region_state {
184     QemuMutex lock;
185 
186     /* fields set at init time */
187     void *start;
188     void *start_aligned;
189     void *end;
190     size_t n;
191     size_t size; /* size of one region */
192     size_t stride; /* .size + guard size */
193 
194     /* fields protected by the lock */
195     size_t current; /* current region index */
196     size_t agg_size_full; /* aggregate size of full regions */
197 };
198 
199 static struct tcg_region_state region;
200 /*
201  * This is an array of struct tcg_region_tree's, with padding.
202  * We use void * to simplify the computation of region_trees[i]; each
203  * struct is found every tree_size bytes.
204  */
205 static void *region_trees;
206 static size_t tree_size;
207 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
208 static TCGRegSet tcg_target_call_clobber_regs;
209 
210 #if TCG_TARGET_INSN_UNIT_SIZE == 1
211 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
212 {
213     *s->code_ptr++ = v;
214 }
215 
216 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
217                                                       uint8_t v)
218 {
219     *p = v;
220 }
221 #endif
222 
223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
224 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
225 {
226     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
227         *s->code_ptr++ = v;
228     } else {
229         tcg_insn_unit *p = s->code_ptr;
230         memcpy(p, &v, sizeof(v));
231         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
232     }
233 }
234 
235 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
236                                                        uint16_t v)
237 {
238     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
239         *p = v;
240     } else {
241         memcpy(p, &v, sizeof(v));
242     }
243 }
244 #endif
245 
246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
247 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
248 {
249     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
250         *s->code_ptr++ = v;
251     } else {
252         tcg_insn_unit *p = s->code_ptr;
253         memcpy(p, &v, sizeof(v));
254         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
255     }
256 }
257 
258 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
259                                                        uint32_t v)
260 {
261     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
262         *p = v;
263     } else {
264         memcpy(p, &v, sizeof(v));
265     }
266 }
267 #endif
268 
269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
270 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
271 {
272     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
273         *s->code_ptr++ = v;
274     } else {
275         tcg_insn_unit *p = s->code_ptr;
276         memcpy(p, &v, sizeof(v));
277         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
278     }
279 }
280 
281 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
282                                                        uint64_t v)
283 {
284     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
285         *p = v;
286     } else {
287         memcpy(p, &v, sizeof(v));
288     }
289 }
290 #endif
291 
292 /* label relocation processing */
293 
294 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
295                           TCGLabel *l, intptr_t addend)
296 {
297     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
298 
299     r->type = type;
300     r->ptr = code_ptr;
301     r->addend = addend;
302     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
303 }
304 
305 static void tcg_out_label(TCGContext *s, TCGLabel *l)
306 {
307     tcg_debug_assert(!l->has_value);
308     l->has_value = 1;
309     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
310 }
311 
312 TCGLabel *gen_new_label(void)
313 {
314     TCGContext *s = tcg_ctx;
315     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
316 
317     memset(l, 0, sizeof(TCGLabel));
318     l->id = s->nb_labels++;
319     QSIMPLEQ_INIT(&l->relocs);
320 
321     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
322 
323     return l;
324 }
325 
326 static bool tcg_resolve_relocs(TCGContext *s)
327 {
328     TCGLabel *l;
329 
330     QSIMPLEQ_FOREACH(l, &s->labels, next) {
331         TCGRelocation *r;
332         uintptr_t value = l->u.value;
333 
334         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
335             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
336                 return false;
337             }
338         }
339     }
340     return true;
341 }
342 
343 static void set_jmp_reset_offset(TCGContext *s, int which)
344 {
345     /*
346      * We will check for overflow at the end of the opcode loop in
347      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
348      */
349     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
350 }
351 
352 #include "tcg-target.c.inc"
353 
354 /* compare a pointer @ptr and a tb_tc @s */
355 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
356 {
357     if (ptr >= s->ptr + s->size) {
358         return 1;
359     } else if (ptr < s->ptr) {
360         return -1;
361     }
362     return 0;
363 }
364 
365 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
366 {
367     const struct tb_tc *a = ap;
368     const struct tb_tc *b = bp;
369 
370     /*
371      * When both sizes are set, we know this isn't a lookup.
372      * This is the most likely case: every TB must be inserted; lookups
373      * are a lot less frequent.
374      */
375     if (likely(a->size && b->size)) {
376         if (a->ptr > b->ptr) {
377             return 1;
378         } else if (a->ptr < b->ptr) {
379             return -1;
380         }
381         /* a->ptr == b->ptr should happen only on deletions */
382         g_assert(a->size == b->size);
383         return 0;
384     }
385     /*
386      * All lookups have either .size field set to 0.
387      * From the glib sources we see that @ap is always the lookup key. However
388      * the docs provide no guarantee, so we just mark this case as likely.
389      */
390     if (likely(a->size == 0)) {
391         return ptr_cmp_tb_tc(a->ptr, b);
392     }
393     return ptr_cmp_tb_tc(b->ptr, a);
394 }
395 
396 static void tcg_region_trees_init(void)
397 {
398     size_t i;
399 
400     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
401     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
402     for (i = 0; i < region.n; i++) {
403         struct tcg_region_tree *rt = region_trees + i * tree_size;
404 
405         qemu_mutex_init(&rt->lock);
406         rt->tree = g_tree_new(tb_tc_cmp);
407     }
408 }
409 
410 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
411 {
412     void *p = tcg_splitwx_to_rw(cp);
413     size_t region_idx;
414 
415     if (p < region.start_aligned) {
416         region_idx = 0;
417     } else {
418         ptrdiff_t offset = p - region.start_aligned;
419 
420         if (offset > region.stride * (region.n - 1)) {
421             region_idx = region.n - 1;
422         } else {
423             region_idx = offset / region.stride;
424         }
425     }
426     return region_trees + region_idx * tree_size;
427 }
428 
429 void tcg_tb_insert(TranslationBlock *tb)
430 {
431     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
432 
433     qemu_mutex_lock(&rt->lock);
434     g_tree_insert(rt->tree, &tb->tc, tb);
435     qemu_mutex_unlock(&rt->lock);
436 }
437 
438 void tcg_tb_remove(TranslationBlock *tb)
439 {
440     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
441 
442     qemu_mutex_lock(&rt->lock);
443     g_tree_remove(rt->tree, &tb->tc);
444     qemu_mutex_unlock(&rt->lock);
445 }
446 
447 /*
448  * Find the TB 'tb' such that
449  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
450  * Return NULL if not found.
451  */
452 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
453 {
454     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
455     TranslationBlock *tb;
456     struct tb_tc s = { .ptr = (void *)tc_ptr };
457 
458     qemu_mutex_lock(&rt->lock);
459     tb = g_tree_lookup(rt->tree, &s);
460     qemu_mutex_unlock(&rt->lock);
461     return tb;
462 }
463 
464 static void tcg_region_tree_lock_all(void)
465 {
466     size_t i;
467 
468     for (i = 0; i < region.n; i++) {
469         struct tcg_region_tree *rt = region_trees + i * tree_size;
470 
471         qemu_mutex_lock(&rt->lock);
472     }
473 }
474 
475 static void tcg_region_tree_unlock_all(void)
476 {
477     size_t i;
478 
479     for (i = 0; i < region.n; i++) {
480         struct tcg_region_tree *rt = region_trees + i * tree_size;
481 
482         qemu_mutex_unlock(&rt->lock);
483     }
484 }
485 
486 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
487 {
488     size_t i;
489 
490     tcg_region_tree_lock_all();
491     for (i = 0; i < region.n; i++) {
492         struct tcg_region_tree *rt = region_trees + i * tree_size;
493 
494         g_tree_foreach(rt->tree, func, user_data);
495     }
496     tcg_region_tree_unlock_all();
497 }
498 
499 size_t tcg_nb_tbs(void)
500 {
501     size_t nb_tbs = 0;
502     size_t i;
503 
504     tcg_region_tree_lock_all();
505     for (i = 0; i < region.n; i++) {
506         struct tcg_region_tree *rt = region_trees + i * tree_size;
507 
508         nb_tbs += g_tree_nnodes(rt->tree);
509     }
510     tcg_region_tree_unlock_all();
511     return nb_tbs;
512 }
513 
514 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
515 {
516     TranslationBlock *tb = v;
517 
518     tb_destroy(tb);
519     return FALSE;
520 }
521 
522 static void tcg_region_tree_reset_all(void)
523 {
524     size_t i;
525 
526     tcg_region_tree_lock_all();
527     for (i = 0; i < region.n; i++) {
528         struct tcg_region_tree *rt = region_trees + i * tree_size;
529 
530         g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
531         /* Increment the refcount first so that destroy acts as a reset */
532         g_tree_ref(rt->tree);
533         g_tree_destroy(rt->tree);
534     }
535     tcg_region_tree_unlock_all();
536 }
537 
538 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
539 {
540     void *start, *end;
541 
542     start = region.start_aligned + curr_region * region.stride;
543     end = start + region.size;
544 
545     if (curr_region == 0) {
546         start = region.start;
547     }
548     if (curr_region == region.n - 1) {
549         end = region.end;
550     }
551 
552     *pstart = start;
553     *pend = end;
554 }
555 
556 static void tcg_region_assign(TCGContext *s, size_t curr_region)
557 {
558     void *start, *end;
559 
560     tcg_region_bounds(curr_region, &start, &end);
561 
562     s->code_gen_buffer = start;
563     s->code_gen_ptr = start;
564     s->code_gen_buffer_size = end - start;
565     s->code_gen_highwater = end - TCG_HIGHWATER;
566 }
567 
568 static bool tcg_region_alloc__locked(TCGContext *s)
569 {
570     if (region.current == region.n) {
571         return true;
572     }
573     tcg_region_assign(s, region.current);
574     region.current++;
575     return false;
576 }
577 
578 /*
579  * Request a new region once the one in use has filled up.
580  * Returns true on error.
581  */
582 static bool tcg_region_alloc(TCGContext *s)
583 {
584     bool err;
585     /* read the region size now; alloc__locked will overwrite it on success */
586     size_t size_full = s->code_gen_buffer_size;
587 
588     qemu_mutex_lock(&region.lock);
589     err = tcg_region_alloc__locked(s);
590     if (!err) {
591         region.agg_size_full += size_full - TCG_HIGHWATER;
592     }
593     qemu_mutex_unlock(&region.lock);
594     return err;
595 }
596 
597 /*
598  * Perform a context's first region allocation.
599  * This function does _not_ increment region.agg_size_full.
600  */
601 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
602 {
603     return tcg_region_alloc__locked(s);
604 }
605 
606 /* Call from a safe-work context */
607 void tcg_region_reset_all(void)
608 {
609     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
610     unsigned int i;
611 
612     qemu_mutex_lock(&region.lock);
613     region.current = 0;
614     region.agg_size_full = 0;
615 
616     for (i = 0; i < n_ctxs; i++) {
617         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
618         bool err = tcg_region_initial_alloc__locked(s);
619 
620         g_assert(!err);
621     }
622     qemu_mutex_unlock(&region.lock);
623 
624     tcg_region_tree_reset_all();
625 }
626 
627 #ifdef CONFIG_USER_ONLY
628 static size_t tcg_n_regions(void)
629 {
630     return 1;
631 }
632 #else
633 /*
634  * It is likely that some vCPUs will translate more code than others, so we
635  * first try to set more regions than max_cpus, with those regions being of
636  * reasonable size. If that's not possible we make do by evenly dividing
637  * the code_gen_buffer among the vCPUs.
638  */
639 static size_t tcg_n_regions(void)
640 {
641     size_t i;
642 
643     /* Use a single region if all we have is one vCPU thread */
644 #if !defined(CONFIG_USER_ONLY)
645     MachineState *ms = MACHINE(qdev_get_machine());
646     unsigned int max_cpus = ms->smp.max_cpus;
647 #endif
648     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
649         return 1;
650     }
651 
652     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
653     for (i = 8; i > 0; i--) {
654         size_t regions_per_thread = i;
655         size_t region_size;
656 
657         region_size = tcg_init_ctx.code_gen_buffer_size;
658         region_size /= max_cpus * regions_per_thread;
659 
660         if (region_size >= 2 * 1024u * 1024) {
661             return max_cpus * regions_per_thread;
662         }
663     }
664     /* If we can't, then just allocate one region per vCPU thread */
665     return max_cpus;
666 }
667 #endif
668 
669 /*
670  * Initializes region partitioning.
671  *
672  * Called at init time from the parent thread (i.e. the one calling
673  * tcg_context_init), after the target's TCG globals have been set.
674  *
675  * Region partitioning works by splitting code_gen_buffer into separate regions,
676  * and then assigning regions to TCG threads so that the threads can translate
677  * code in parallel without synchronization.
678  *
679  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
680  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
681  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
682  * must have been parsed before calling this function, since it calls
683  * qemu_tcg_mttcg_enabled().
684  *
685  * In user-mode we use a single region.  Having multiple regions in user-mode
686  * is not supported, because the number of vCPU threads (recall that each thread
687  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
688  * OS, and usually this number is huge (tens of thousands is not uncommon).
689  * Thus, given this large bound on the number of vCPU threads and the fact
690  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
691  * that the availability of at least one region per vCPU thread.
692  *
693  * However, this user-mode limitation is unlikely to be a significant problem
694  * in practice. Multi-threaded guests share most if not all of their translated
695  * code, which makes parallel code generation less appealing than in softmmu.
696  */
697 void tcg_region_init(void)
698 {
699     void *buf = tcg_init_ctx.code_gen_buffer;
700     void *aligned;
701     size_t size = tcg_init_ctx.code_gen_buffer_size;
702     size_t page_size = qemu_real_host_page_size;
703     size_t region_size;
704     size_t n_regions;
705     size_t i;
706     uintptr_t splitwx_diff;
707 
708     n_regions = tcg_n_regions();
709 
710     /* The first region will be 'aligned - buf' bytes larger than the others */
711     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
712     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
713     /*
714      * Make region_size a multiple of page_size, using aligned as the start.
715      * As a result of this we might end up with a few extra pages at the end of
716      * the buffer; we will assign those to the last region.
717      */
718     region_size = (size - (aligned - buf)) / n_regions;
719     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
720 
721     /* A region must have at least 2 pages; one code, one guard */
722     g_assert(region_size >= 2 * page_size);
723 
724     /* init the region struct */
725     qemu_mutex_init(&region.lock);
726     region.n = n_regions;
727     region.size = region_size - page_size;
728     region.stride = region_size;
729     region.start = buf;
730     region.start_aligned = aligned;
731     /* page-align the end, since its last page will be a guard page */
732     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
733     /* account for that last guard page */
734     region.end -= page_size;
735 
736     /* set guard pages */
737     splitwx_diff = tcg_splitwx_diff;
738     for (i = 0; i < region.n; i++) {
739         void *start, *end;
740         int rc;
741 
742         tcg_region_bounds(i, &start, &end);
743         rc = qemu_mprotect_none(end, page_size);
744         g_assert(!rc);
745         if (splitwx_diff) {
746             rc = qemu_mprotect_none(end + splitwx_diff, page_size);
747             g_assert(!rc);
748         }
749     }
750 
751     tcg_region_trees_init();
752 
753     /* In user-mode we support only one ctx, so do the initial allocation now */
754 #ifdef CONFIG_USER_ONLY
755     {
756         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
757 
758         g_assert(!err);
759     }
760 #endif
761 }
762 
763 #ifdef CONFIG_DEBUG_TCG
764 const void *tcg_splitwx_to_rx(void *rw)
765 {
766     /* Pass NULL pointers unchanged. */
767     if (rw) {
768         g_assert(in_code_gen_buffer(rw));
769         rw += tcg_splitwx_diff;
770     }
771     return rw;
772 }
773 
774 void *tcg_splitwx_to_rw(const void *rx)
775 {
776     /* Pass NULL pointers unchanged. */
777     if (rx) {
778         rx -= tcg_splitwx_diff;
779         /* Assert that we end with a pointer in the rw region. */
780         g_assert(in_code_gen_buffer(rx));
781     }
782     return (void *)rx;
783 }
784 #endif /* CONFIG_DEBUG_TCG */
785 
786 static void alloc_tcg_plugin_context(TCGContext *s)
787 {
788 #ifdef CONFIG_PLUGIN
789     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
790     s->plugin_tb->insns =
791         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
792 #endif
793 }
794 
795 /*
796  * All TCG threads except the parent (i.e. the one that called tcg_context_init
797  * and registered the target's TCG globals) must register with this function
798  * before initiating translation.
799  *
800  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
801  * of tcg_region_init() for the reasoning behind this.
802  *
803  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
804  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
805  * is not used anymore for translation once this function is called.
806  *
807  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
808  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
809  */
810 #ifdef CONFIG_USER_ONLY
811 void tcg_register_thread(void)
812 {
813     tcg_ctx = &tcg_init_ctx;
814 }
815 #else
816 void tcg_register_thread(void)
817 {
818     MachineState *ms = MACHINE(qdev_get_machine());
819     TCGContext *s = g_malloc(sizeof(*s));
820     unsigned int i, n;
821     bool err;
822 
823     *s = tcg_init_ctx;
824 
825     /* Relink mem_base.  */
826     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
827         if (tcg_init_ctx.temps[i].mem_base) {
828             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
829             tcg_debug_assert(b >= 0 && b < n);
830             s->temps[i].mem_base = &s->temps[b];
831         }
832     }
833 
834     /* Claim an entry in tcg_ctxs */
835     n = qatomic_fetch_inc(&n_tcg_ctxs);
836     g_assert(n < ms->smp.max_cpus);
837     qatomic_set(&tcg_ctxs[n], s);
838 
839     if (n > 0) {
840         alloc_tcg_plugin_context(s);
841     }
842 
843     tcg_ctx = s;
844     qemu_mutex_lock(&region.lock);
845     err = tcg_region_initial_alloc__locked(tcg_ctx);
846     g_assert(!err);
847     qemu_mutex_unlock(&region.lock);
848 }
849 #endif /* !CONFIG_USER_ONLY */
850 
851 /*
852  * Returns the size (in bytes) of all translated code (i.e. from all regions)
853  * currently in the cache.
854  * See also: tcg_code_capacity()
855  * Do not confuse with tcg_current_code_size(); that one applies to a single
856  * TCG context.
857  */
858 size_t tcg_code_size(void)
859 {
860     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
861     unsigned int i;
862     size_t total;
863 
864     qemu_mutex_lock(&region.lock);
865     total = region.agg_size_full;
866     for (i = 0; i < n_ctxs; i++) {
867         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
868         size_t size;
869 
870         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
871         g_assert(size <= s->code_gen_buffer_size);
872         total += size;
873     }
874     qemu_mutex_unlock(&region.lock);
875     return total;
876 }
877 
878 /*
879  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
880  * regions.
881  * See also: tcg_code_size()
882  */
883 size_t tcg_code_capacity(void)
884 {
885     size_t guard_size, capacity;
886 
887     /* no need for synchronization; these variables are set at init time */
888     guard_size = region.stride - region.size;
889     capacity = region.end + guard_size - region.start;
890     capacity -= region.n * (guard_size + TCG_HIGHWATER);
891     return capacity;
892 }
893 
894 size_t tcg_tb_phys_invalidate_count(void)
895 {
896     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
897     unsigned int i;
898     size_t total = 0;
899 
900     for (i = 0; i < n_ctxs; i++) {
901         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
902 
903         total += qatomic_read(&s->tb_phys_invalidate_count);
904     }
905     return total;
906 }
907 
908 /* pool based memory allocation */
909 void *tcg_malloc_internal(TCGContext *s, int size)
910 {
911     TCGPool *p;
912     int pool_size;
913 
914     if (size > TCG_POOL_CHUNK_SIZE) {
915         /* big malloc: insert a new pool (XXX: could optimize) */
916         p = g_malloc(sizeof(TCGPool) + size);
917         p->size = size;
918         p->next = s->pool_first_large;
919         s->pool_first_large = p;
920         return p->data;
921     } else {
922         p = s->pool_current;
923         if (!p) {
924             p = s->pool_first;
925             if (!p)
926                 goto new_pool;
927         } else {
928             if (!p->next) {
929             new_pool:
930                 pool_size = TCG_POOL_CHUNK_SIZE;
931                 p = g_malloc(sizeof(TCGPool) + pool_size);
932                 p->size = pool_size;
933                 p->next = NULL;
934                 if (s->pool_current)
935                     s->pool_current->next = p;
936                 else
937                     s->pool_first = p;
938             } else {
939                 p = p->next;
940             }
941         }
942     }
943     s->pool_current = p;
944     s->pool_cur = p->data + size;
945     s->pool_end = p->data + p->size;
946     return p->data;
947 }
948 
949 void tcg_pool_reset(TCGContext *s)
950 {
951     TCGPool *p, *t;
952     for (p = s->pool_first_large; p; p = t) {
953         t = p->next;
954         g_free(p);
955     }
956     s->pool_first_large = NULL;
957     s->pool_cur = s->pool_end = NULL;
958     s->pool_current = NULL;
959 }
960 
961 typedef struct TCGHelperInfo {
962     void *func;
963     const char *name;
964     unsigned flags;
965     unsigned sizemask;
966 } TCGHelperInfo;
967 
968 #include "exec/helper-proto.h"
969 
970 static const TCGHelperInfo all_helpers[] = {
971 #include "exec/helper-tcg.h"
972 };
973 static GHashTable *helper_table;
974 
975 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
976 static void process_op_defs(TCGContext *s);
977 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
978                                             TCGReg reg, const char *name);
979 
980 void tcg_context_init(TCGContext *s)
981 {
982     int op, total_args, n, i;
983     TCGOpDef *def;
984     TCGArgConstraint *args_ct;
985     TCGTemp *ts;
986 
987     memset(s, 0, sizeof(*s));
988     s->nb_globals = 0;
989 
990     /* Count total number of arguments and allocate the corresponding
991        space */
992     total_args = 0;
993     for(op = 0; op < NB_OPS; op++) {
994         def = &tcg_op_defs[op];
995         n = def->nb_iargs + def->nb_oargs;
996         total_args += n;
997     }
998 
999     args_ct = g_new0(TCGArgConstraint, total_args);
1000 
1001     for(op = 0; op < NB_OPS; op++) {
1002         def = &tcg_op_defs[op];
1003         def->args_ct = args_ct;
1004         n = def->nb_iargs + def->nb_oargs;
1005         args_ct += n;
1006     }
1007 
1008     /* Register helpers.  */
1009     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1010     helper_table = g_hash_table_new(NULL, NULL);
1011 
1012     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1013         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1014                             (gpointer)&all_helpers[i]);
1015     }
1016 
1017     tcg_target_init(s);
1018     process_op_defs(s);
1019 
1020     /* Reverse the order of the saved registers, assuming they're all at
1021        the start of tcg_target_reg_alloc_order.  */
1022     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1023         int r = tcg_target_reg_alloc_order[n];
1024         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1025             break;
1026         }
1027     }
1028     for (i = 0; i < n; ++i) {
1029         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1030     }
1031     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1032         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1033     }
1034 
1035     alloc_tcg_plugin_context(s);
1036 
1037     tcg_ctx = s;
1038     /*
1039      * In user-mode we simply share the init context among threads, since we
1040      * use a single region. See the documentation tcg_region_init() for the
1041      * reasoning behind this.
1042      * In softmmu we will have at most max_cpus TCG threads.
1043      */
1044 #ifdef CONFIG_USER_ONLY
1045     tcg_ctxs = &tcg_ctx;
1046     n_tcg_ctxs = 1;
1047 #else
1048     MachineState *ms = MACHINE(qdev_get_machine());
1049     unsigned int max_cpus = ms->smp.max_cpus;
1050     tcg_ctxs = g_new(TCGContext *, max_cpus);
1051 #endif
1052 
1053     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1054     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1055     cpu_env = temp_tcgv_ptr(ts);
1056 }
1057 
1058 /*
1059  * Allocate TBs right before their corresponding translated code, making
1060  * sure that TBs and code are on different cache lines.
1061  */
1062 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1063 {
1064     uintptr_t align = qemu_icache_linesize;
1065     TranslationBlock *tb;
1066     void *next;
1067 
1068  retry:
1069     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1070     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1071 
1072     if (unlikely(next > s->code_gen_highwater)) {
1073         if (tcg_region_alloc(s)) {
1074             return NULL;
1075         }
1076         goto retry;
1077     }
1078     qatomic_set(&s->code_gen_ptr, next);
1079     s->data_gen_ptr = NULL;
1080     return tb;
1081 }
1082 
1083 void tcg_prologue_init(TCGContext *s)
1084 {
1085     size_t prologue_size, total_size;
1086     void *buf0, *buf1;
1087 
1088     /* Put the prologue at the beginning of code_gen_buffer.  */
1089     buf0 = s->code_gen_buffer;
1090     total_size = s->code_gen_buffer_size;
1091     s->code_ptr = buf0;
1092     s->code_buf = buf0;
1093     s->data_gen_ptr = NULL;
1094 
1095     /*
1096      * The region trees are not yet configured, but tcg_splitwx_to_rx
1097      * needs the bounds for an assert.
1098      */
1099     region.start = buf0;
1100     region.end = buf0 + total_size;
1101 
1102 #ifndef CONFIG_TCG_INTERPRETER
1103     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1104 #endif
1105 
1106     /* Compute a high-water mark, at which we voluntarily flush the buffer
1107        and start over.  The size here is arbitrary, significantly larger
1108        than we expect the code generation for any one opcode to require.  */
1109     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1110 
1111 #ifdef TCG_TARGET_NEED_POOL_LABELS
1112     s->pool_labels = NULL;
1113 #endif
1114 
1115     qemu_thread_jit_write();
1116     /* Generate the prologue.  */
1117     tcg_target_qemu_prologue(s);
1118 
1119 #ifdef TCG_TARGET_NEED_POOL_LABELS
1120     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1121     {
1122         int result = tcg_out_pool_finalize(s);
1123         tcg_debug_assert(result == 0);
1124     }
1125 #endif
1126 
1127     buf1 = s->code_ptr;
1128 #ifndef CONFIG_TCG_INTERPRETER
1129     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1130                         tcg_ptr_byte_diff(buf1, buf0));
1131 #endif
1132 
1133     /* Deduct the prologue from the buffer.  */
1134     prologue_size = tcg_current_code_size(s);
1135     s->code_gen_ptr = buf1;
1136     s->code_gen_buffer = buf1;
1137     s->code_buf = buf1;
1138     total_size -= prologue_size;
1139     s->code_gen_buffer_size = total_size;
1140 
1141     tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1142 
1143 #ifdef DEBUG_DISAS
1144     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1145         FILE *logfile = qemu_log_lock();
1146         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1147         if (s->data_gen_ptr) {
1148             size_t code_size = s->data_gen_ptr - buf0;
1149             size_t data_size = prologue_size - code_size;
1150             size_t i;
1151 
1152             log_disas(buf0, code_size);
1153 
1154             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1155                 if (sizeof(tcg_target_ulong) == 8) {
1156                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1157                              (uintptr_t)s->data_gen_ptr + i,
1158                              *(uint64_t *)(s->data_gen_ptr + i));
1159                 } else {
1160                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1161                              (uintptr_t)s->data_gen_ptr + i,
1162                              *(uint32_t *)(s->data_gen_ptr + i));
1163                 }
1164             }
1165         } else {
1166             log_disas(buf0, prologue_size);
1167         }
1168         qemu_log("\n");
1169         qemu_log_flush();
1170         qemu_log_unlock(logfile);
1171     }
1172 #endif
1173 
1174     /* Assert that goto_ptr is implemented completely.  */
1175     if (TCG_TARGET_HAS_goto_ptr) {
1176         tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1177     }
1178 }
1179 
1180 void tcg_func_start(TCGContext *s)
1181 {
1182     tcg_pool_reset(s);
1183     s->nb_temps = s->nb_globals;
1184 
1185     /* No temps have been previously allocated for size or locality.  */
1186     memset(s->free_temps, 0, sizeof(s->free_temps));
1187 
1188     /* No constant temps have been previously allocated. */
1189     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1190         if (s->const_table[i]) {
1191             g_hash_table_remove_all(s->const_table[i]);
1192         }
1193     }
1194 
1195     s->nb_ops = 0;
1196     s->nb_labels = 0;
1197     s->current_frame_offset = s->frame_start;
1198 
1199 #ifdef CONFIG_DEBUG_TCG
1200     s->goto_tb_issue_mask = 0;
1201 #endif
1202 
1203     QTAILQ_INIT(&s->ops);
1204     QTAILQ_INIT(&s->free_ops);
1205     QSIMPLEQ_INIT(&s->labels);
1206 }
1207 
1208 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1209 {
1210     int n = s->nb_temps++;
1211 
1212     if (n >= TCG_MAX_TEMPS) {
1213         /* Signal overflow, starting over with fewer guest insns. */
1214         siglongjmp(s->jmp_trans, -2);
1215     }
1216     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1217 }
1218 
1219 static TCGTemp *tcg_global_alloc(TCGContext *s)
1220 {
1221     TCGTemp *ts;
1222 
1223     tcg_debug_assert(s->nb_globals == s->nb_temps);
1224     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1225     s->nb_globals++;
1226     ts = tcg_temp_alloc(s);
1227     ts->kind = TEMP_GLOBAL;
1228 
1229     return ts;
1230 }
1231 
1232 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1233                                             TCGReg reg, const char *name)
1234 {
1235     TCGTemp *ts;
1236 
1237     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1238         tcg_abort();
1239     }
1240 
1241     ts = tcg_global_alloc(s);
1242     ts->base_type = type;
1243     ts->type = type;
1244     ts->kind = TEMP_FIXED;
1245     ts->reg = reg;
1246     ts->name = name;
1247     tcg_regset_set_reg(s->reserved_regs, reg);
1248 
1249     return ts;
1250 }
1251 
1252 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1253 {
1254     s->frame_start = start;
1255     s->frame_end = start + size;
1256     s->frame_temp
1257         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1258 }
1259 
1260 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1261                                      intptr_t offset, const char *name)
1262 {
1263     TCGContext *s = tcg_ctx;
1264     TCGTemp *base_ts = tcgv_ptr_temp(base);
1265     TCGTemp *ts = tcg_global_alloc(s);
1266     int indirect_reg = 0, bigendian = 0;
1267 #ifdef HOST_WORDS_BIGENDIAN
1268     bigendian = 1;
1269 #endif
1270 
1271     switch (base_ts->kind) {
1272     case TEMP_FIXED:
1273         break;
1274     case TEMP_GLOBAL:
1275         /* We do not support double-indirect registers.  */
1276         tcg_debug_assert(!base_ts->indirect_reg);
1277         base_ts->indirect_base = 1;
1278         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1279                             ? 2 : 1);
1280         indirect_reg = 1;
1281         break;
1282     default:
1283         g_assert_not_reached();
1284     }
1285 
1286     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1287         TCGTemp *ts2 = tcg_global_alloc(s);
1288         char buf[64];
1289 
1290         ts->base_type = TCG_TYPE_I64;
1291         ts->type = TCG_TYPE_I32;
1292         ts->indirect_reg = indirect_reg;
1293         ts->mem_allocated = 1;
1294         ts->mem_base = base_ts;
1295         ts->mem_offset = offset + bigendian * 4;
1296         pstrcpy(buf, sizeof(buf), name);
1297         pstrcat(buf, sizeof(buf), "_0");
1298         ts->name = strdup(buf);
1299 
1300         tcg_debug_assert(ts2 == ts + 1);
1301         ts2->base_type = TCG_TYPE_I64;
1302         ts2->type = TCG_TYPE_I32;
1303         ts2->indirect_reg = indirect_reg;
1304         ts2->mem_allocated = 1;
1305         ts2->mem_base = base_ts;
1306         ts2->mem_offset = offset + (1 - bigendian) * 4;
1307         pstrcpy(buf, sizeof(buf), name);
1308         pstrcat(buf, sizeof(buf), "_1");
1309         ts2->name = strdup(buf);
1310     } else {
1311         ts->base_type = type;
1312         ts->type = type;
1313         ts->indirect_reg = indirect_reg;
1314         ts->mem_allocated = 1;
1315         ts->mem_base = base_ts;
1316         ts->mem_offset = offset;
1317         ts->name = name;
1318     }
1319     return ts;
1320 }
1321 
1322 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1323 {
1324     TCGContext *s = tcg_ctx;
1325     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1326     TCGTemp *ts;
1327     int idx, k;
1328 
1329     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1330     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1331     if (idx < TCG_MAX_TEMPS) {
1332         /* There is already an available temp with the right type.  */
1333         clear_bit(idx, s->free_temps[k].l);
1334 
1335         ts = &s->temps[idx];
1336         ts->temp_allocated = 1;
1337         tcg_debug_assert(ts->base_type == type);
1338         tcg_debug_assert(ts->kind == kind);
1339     } else {
1340         ts = tcg_temp_alloc(s);
1341         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1342             TCGTemp *ts2 = tcg_temp_alloc(s);
1343 
1344             ts->base_type = type;
1345             ts->type = TCG_TYPE_I32;
1346             ts->temp_allocated = 1;
1347             ts->kind = kind;
1348 
1349             tcg_debug_assert(ts2 == ts + 1);
1350             ts2->base_type = TCG_TYPE_I64;
1351             ts2->type = TCG_TYPE_I32;
1352             ts2->temp_allocated = 1;
1353             ts2->kind = kind;
1354         } else {
1355             ts->base_type = type;
1356             ts->type = type;
1357             ts->temp_allocated = 1;
1358             ts->kind = kind;
1359         }
1360     }
1361 
1362 #if defined(CONFIG_DEBUG_TCG)
1363     s->temps_in_use++;
1364 #endif
1365     return ts;
1366 }
1367 
1368 TCGv_vec tcg_temp_new_vec(TCGType type)
1369 {
1370     TCGTemp *t;
1371 
1372 #ifdef CONFIG_DEBUG_TCG
1373     switch (type) {
1374     case TCG_TYPE_V64:
1375         assert(TCG_TARGET_HAS_v64);
1376         break;
1377     case TCG_TYPE_V128:
1378         assert(TCG_TARGET_HAS_v128);
1379         break;
1380     case TCG_TYPE_V256:
1381         assert(TCG_TARGET_HAS_v256);
1382         break;
1383     default:
1384         g_assert_not_reached();
1385     }
1386 #endif
1387 
1388     t = tcg_temp_new_internal(type, 0);
1389     return temp_tcgv_vec(t);
1390 }
1391 
1392 /* Create a new temp of the same type as an existing temp.  */
1393 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1394 {
1395     TCGTemp *t = tcgv_vec_temp(match);
1396 
1397     tcg_debug_assert(t->temp_allocated != 0);
1398 
1399     t = tcg_temp_new_internal(t->base_type, 0);
1400     return temp_tcgv_vec(t);
1401 }
1402 
1403 void tcg_temp_free_internal(TCGTemp *ts)
1404 {
1405     TCGContext *s = tcg_ctx;
1406     int k, idx;
1407 
1408     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1409     if (ts->kind == TEMP_CONST) {
1410         return;
1411     }
1412 
1413 #if defined(CONFIG_DEBUG_TCG)
1414     s->temps_in_use--;
1415     if (s->temps_in_use < 0) {
1416         fprintf(stderr, "More temporaries freed than allocated!\n");
1417     }
1418 #endif
1419 
1420     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1421     tcg_debug_assert(ts->temp_allocated != 0);
1422     ts->temp_allocated = 0;
1423 
1424     idx = temp_idx(ts);
1425     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1426     set_bit(idx, s->free_temps[k].l);
1427 }
1428 
1429 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1430 {
1431     TCGContext *s = tcg_ctx;
1432     GHashTable *h = s->const_table[type];
1433     TCGTemp *ts;
1434 
1435     if (h == NULL) {
1436         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1437         s->const_table[type] = h;
1438     }
1439 
1440     ts = g_hash_table_lookup(h, &val);
1441     if (ts == NULL) {
1442         ts = tcg_temp_alloc(s);
1443 
1444         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1445             TCGTemp *ts2 = tcg_temp_alloc(s);
1446 
1447             ts->base_type = TCG_TYPE_I64;
1448             ts->type = TCG_TYPE_I32;
1449             ts->kind = TEMP_CONST;
1450             ts->temp_allocated = 1;
1451             /*
1452              * Retain the full value of the 64-bit constant in the low
1453              * part, so that the hash table works.  Actual uses will
1454              * truncate the value to the low part.
1455              */
1456             ts->val = val;
1457 
1458             tcg_debug_assert(ts2 == ts + 1);
1459             ts2->base_type = TCG_TYPE_I64;
1460             ts2->type = TCG_TYPE_I32;
1461             ts2->kind = TEMP_CONST;
1462             ts2->temp_allocated = 1;
1463             ts2->val = val >> 32;
1464         } else {
1465             ts->base_type = type;
1466             ts->type = type;
1467             ts->kind = TEMP_CONST;
1468             ts->temp_allocated = 1;
1469             ts->val = val;
1470         }
1471         g_hash_table_insert(h, &ts->val, ts);
1472     }
1473 
1474     return ts;
1475 }
1476 
1477 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1478 {
1479     val = dup_const(vece, val);
1480     return temp_tcgv_vec(tcg_constant_internal(type, val));
1481 }
1482 
1483 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1484 {
1485     TCGTemp *t = tcgv_vec_temp(match);
1486 
1487     tcg_debug_assert(t->temp_allocated != 0);
1488     return tcg_constant_vec(t->base_type, vece, val);
1489 }
1490 
1491 TCGv_i32 tcg_const_i32(int32_t val)
1492 {
1493     TCGv_i32 t0;
1494     t0 = tcg_temp_new_i32();
1495     tcg_gen_movi_i32(t0, val);
1496     return t0;
1497 }
1498 
1499 TCGv_i64 tcg_const_i64(int64_t val)
1500 {
1501     TCGv_i64 t0;
1502     t0 = tcg_temp_new_i64();
1503     tcg_gen_movi_i64(t0, val);
1504     return t0;
1505 }
1506 
1507 TCGv_i32 tcg_const_local_i32(int32_t val)
1508 {
1509     TCGv_i32 t0;
1510     t0 = tcg_temp_local_new_i32();
1511     tcg_gen_movi_i32(t0, val);
1512     return t0;
1513 }
1514 
1515 TCGv_i64 tcg_const_local_i64(int64_t val)
1516 {
1517     TCGv_i64 t0;
1518     t0 = tcg_temp_local_new_i64();
1519     tcg_gen_movi_i64(t0, val);
1520     return t0;
1521 }
1522 
1523 #if defined(CONFIG_DEBUG_TCG)
1524 void tcg_clear_temp_count(void)
1525 {
1526     TCGContext *s = tcg_ctx;
1527     s->temps_in_use = 0;
1528 }
1529 
1530 int tcg_check_temp_count(void)
1531 {
1532     TCGContext *s = tcg_ctx;
1533     if (s->temps_in_use) {
1534         /* Clear the count so that we don't give another
1535          * warning immediately next time around.
1536          */
1537         s->temps_in_use = 0;
1538         return 1;
1539     }
1540     return 0;
1541 }
1542 #endif
1543 
1544 /* Return true if OP may appear in the opcode stream.
1545    Test the runtime variable that controls each opcode.  */
1546 bool tcg_op_supported(TCGOpcode op)
1547 {
1548     const bool have_vec
1549         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1550 
1551     switch (op) {
1552     case INDEX_op_discard:
1553     case INDEX_op_set_label:
1554     case INDEX_op_call:
1555     case INDEX_op_br:
1556     case INDEX_op_mb:
1557     case INDEX_op_insn_start:
1558     case INDEX_op_exit_tb:
1559     case INDEX_op_goto_tb:
1560     case INDEX_op_qemu_ld_i32:
1561     case INDEX_op_qemu_st_i32:
1562     case INDEX_op_qemu_ld_i64:
1563     case INDEX_op_qemu_st_i64:
1564         return true;
1565 
1566     case INDEX_op_qemu_st8_i32:
1567         return TCG_TARGET_HAS_qemu_st8_i32;
1568 
1569     case INDEX_op_goto_ptr:
1570         return TCG_TARGET_HAS_goto_ptr;
1571 
1572     case INDEX_op_mov_i32:
1573     case INDEX_op_setcond_i32:
1574     case INDEX_op_brcond_i32:
1575     case INDEX_op_ld8u_i32:
1576     case INDEX_op_ld8s_i32:
1577     case INDEX_op_ld16u_i32:
1578     case INDEX_op_ld16s_i32:
1579     case INDEX_op_ld_i32:
1580     case INDEX_op_st8_i32:
1581     case INDEX_op_st16_i32:
1582     case INDEX_op_st_i32:
1583     case INDEX_op_add_i32:
1584     case INDEX_op_sub_i32:
1585     case INDEX_op_mul_i32:
1586     case INDEX_op_and_i32:
1587     case INDEX_op_or_i32:
1588     case INDEX_op_xor_i32:
1589     case INDEX_op_shl_i32:
1590     case INDEX_op_shr_i32:
1591     case INDEX_op_sar_i32:
1592         return true;
1593 
1594     case INDEX_op_movcond_i32:
1595         return TCG_TARGET_HAS_movcond_i32;
1596     case INDEX_op_div_i32:
1597     case INDEX_op_divu_i32:
1598         return TCG_TARGET_HAS_div_i32;
1599     case INDEX_op_rem_i32:
1600     case INDEX_op_remu_i32:
1601         return TCG_TARGET_HAS_rem_i32;
1602     case INDEX_op_div2_i32:
1603     case INDEX_op_divu2_i32:
1604         return TCG_TARGET_HAS_div2_i32;
1605     case INDEX_op_rotl_i32:
1606     case INDEX_op_rotr_i32:
1607         return TCG_TARGET_HAS_rot_i32;
1608     case INDEX_op_deposit_i32:
1609         return TCG_TARGET_HAS_deposit_i32;
1610     case INDEX_op_extract_i32:
1611         return TCG_TARGET_HAS_extract_i32;
1612     case INDEX_op_sextract_i32:
1613         return TCG_TARGET_HAS_sextract_i32;
1614     case INDEX_op_extract2_i32:
1615         return TCG_TARGET_HAS_extract2_i32;
1616     case INDEX_op_add2_i32:
1617         return TCG_TARGET_HAS_add2_i32;
1618     case INDEX_op_sub2_i32:
1619         return TCG_TARGET_HAS_sub2_i32;
1620     case INDEX_op_mulu2_i32:
1621         return TCG_TARGET_HAS_mulu2_i32;
1622     case INDEX_op_muls2_i32:
1623         return TCG_TARGET_HAS_muls2_i32;
1624     case INDEX_op_muluh_i32:
1625         return TCG_TARGET_HAS_muluh_i32;
1626     case INDEX_op_mulsh_i32:
1627         return TCG_TARGET_HAS_mulsh_i32;
1628     case INDEX_op_ext8s_i32:
1629         return TCG_TARGET_HAS_ext8s_i32;
1630     case INDEX_op_ext16s_i32:
1631         return TCG_TARGET_HAS_ext16s_i32;
1632     case INDEX_op_ext8u_i32:
1633         return TCG_TARGET_HAS_ext8u_i32;
1634     case INDEX_op_ext16u_i32:
1635         return TCG_TARGET_HAS_ext16u_i32;
1636     case INDEX_op_bswap16_i32:
1637         return TCG_TARGET_HAS_bswap16_i32;
1638     case INDEX_op_bswap32_i32:
1639         return TCG_TARGET_HAS_bswap32_i32;
1640     case INDEX_op_not_i32:
1641         return TCG_TARGET_HAS_not_i32;
1642     case INDEX_op_neg_i32:
1643         return TCG_TARGET_HAS_neg_i32;
1644     case INDEX_op_andc_i32:
1645         return TCG_TARGET_HAS_andc_i32;
1646     case INDEX_op_orc_i32:
1647         return TCG_TARGET_HAS_orc_i32;
1648     case INDEX_op_eqv_i32:
1649         return TCG_TARGET_HAS_eqv_i32;
1650     case INDEX_op_nand_i32:
1651         return TCG_TARGET_HAS_nand_i32;
1652     case INDEX_op_nor_i32:
1653         return TCG_TARGET_HAS_nor_i32;
1654     case INDEX_op_clz_i32:
1655         return TCG_TARGET_HAS_clz_i32;
1656     case INDEX_op_ctz_i32:
1657         return TCG_TARGET_HAS_ctz_i32;
1658     case INDEX_op_ctpop_i32:
1659         return TCG_TARGET_HAS_ctpop_i32;
1660 
1661     case INDEX_op_brcond2_i32:
1662     case INDEX_op_setcond2_i32:
1663         return TCG_TARGET_REG_BITS == 32;
1664 
1665     case INDEX_op_mov_i64:
1666     case INDEX_op_setcond_i64:
1667     case INDEX_op_brcond_i64:
1668     case INDEX_op_ld8u_i64:
1669     case INDEX_op_ld8s_i64:
1670     case INDEX_op_ld16u_i64:
1671     case INDEX_op_ld16s_i64:
1672     case INDEX_op_ld32u_i64:
1673     case INDEX_op_ld32s_i64:
1674     case INDEX_op_ld_i64:
1675     case INDEX_op_st8_i64:
1676     case INDEX_op_st16_i64:
1677     case INDEX_op_st32_i64:
1678     case INDEX_op_st_i64:
1679     case INDEX_op_add_i64:
1680     case INDEX_op_sub_i64:
1681     case INDEX_op_mul_i64:
1682     case INDEX_op_and_i64:
1683     case INDEX_op_or_i64:
1684     case INDEX_op_xor_i64:
1685     case INDEX_op_shl_i64:
1686     case INDEX_op_shr_i64:
1687     case INDEX_op_sar_i64:
1688     case INDEX_op_ext_i32_i64:
1689     case INDEX_op_extu_i32_i64:
1690         return TCG_TARGET_REG_BITS == 64;
1691 
1692     case INDEX_op_movcond_i64:
1693         return TCG_TARGET_HAS_movcond_i64;
1694     case INDEX_op_div_i64:
1695     case INDEX_op_divu_i64:
1696         return TCG_TARGET_HAS_div_i64;
1697     case INDEX_op_rem_i64:
1698     case INDEX_op_remu_i64:
1699         return TCG_TARGET_HAS_rem_i64;
1700     case INDEX_op_div2_i64:
1701     case INDEX_op_divu2_i64:
1702         return TCG_TARGET_HAS_div2_i64;
1703     case INDEX_op_rotl_i64:
1704     case INDEX_op_rotr_i64:
1705         return TCG_TARGET_HAS_rot_i64;
1706     case INDEX_op_deposit_i64:
1707         return TCG_TARGET_HAS_deposit_i64;
1708     case INDEX_op_extract_i64:
1709         return TCG_TARGET_HAS_extract_i64;
1710     case INDEX_op_sextract_i64:
1711         return TCG_TARGET_HAS_sextract_i64;
1712     case INDEX_op_extract2_i64:
1713         return TCG_TARGET_HAS_extract2_i64;
1714     case INDEX_op_extrl_i64_i32:
1715         return TCG_TARGET_HAS_extrl_i64_i32;
1716     case INDEX_op_extrh_i64_i32:
1717         return TCG_TARGET_HAS_extrh_i64_i32;
1718     case INDEX_op_ext8s_i64:
1719         return TCG_TARGET_HAS_ext8s_i64;
1720     case INDEX_op_ext16s_i64:
1721         return TCG_TARGET_HAS_ext16s_i64;
1722     case INDEX_op_ext32s_i64:
1723         return TCG_TARGET_HAS_ext32s_i64;
1724     case INDEX_op_ext8u_i64:
1725         return TCG_TARGET_HAS_ext8u_i64;
1726     case INDEX_op_ext16u_i64:
1727         return TCG_TARGET_HAS_ext16u_i64;
1728     case INDEX_op_ext32u_i64:
1729         return TCG_TARGET_HAS_ext32u_i64;
1730     case INDEX_op_bswap16_i64:
1731         return TCG_TARGET_HAS_bswap16_i64;
1732     case INDEX_op_bswap32_i64:
1733         return TCG_TARGET_HAS_bswap32_i64;
1734     case INDEX_op_bswap64_i64:
1735         return TCG_TARGET_HAS_bswap64_i64;
1736     case INDEX_op_not_i64:
1737         return TCG_TARGET_HAS_not_i64;
1738     case INDEX_op_neg_i64:
1739         return TCG_TARGET_HAS_neg_i64;
1740     case INDEX_op_andc_i64:
1741         return TCG_TARGET_HAS_andc_i64;
1742     case INDEX_op_orc_i64:
1743         return TCG_TARGET_HAS_orc_i64;
1744     case INDEX_op_eqv_i64:
1745         return TCG_TARGET_HAS_eqv_i64;
1746     case INDEX_op_nand_i64:
1747         return TCG_TARGET_HAS_nand_i64;
1748     case INDEX_op_nor_i64:
1749         return TCG_TARGET_HAS_nor_i64;
1750     case INDEX_op_clz_i64:
1751         return TCG_TARGET_HAS_clz_i64;
1752     case INDEX_op_ctz_i64:
1753         return TCG_TARGET_HAS_ctz_i64;
1754     case INDEX_op_ctpop_i64:
1755         return TCG_TARGET_HAS_ctpop_i64;
1756     case INDEX_op_add2_i64:
1757         return TCG_TARGET_HAS_add2_i64;
1758     case INDEX_op_sub2_i64:
1759         return TCG_TARGET_HAS_sub2_i64;
1760     case INDEX_op_mulu2_i64:
1761         return TCG_TARGET_HAS_mulu2_i64;
1762     case INDEX_op_muls2_i64:
1763         return TCG_TARGET_HAS_muls2_i64;
1764     case INDEX_op_muluh_i64:
1765         return TCG_TARGET_HAS_muluh_i64;
1766     case INDEX_op_mulsh_i64:
1767         return TCG_TARGET_HAS_mulsh_i64;
1768 
1769     case INDEX_op_mov_vec:
1770     case INDEX_op_dup_vec:
1771     case INDEX_op_dupm_vec:
1772     case INDEX_op_ld_vec:
1773     case INDEX_op_st_vec:
1774     case INDEX_op_add_vec:
1775     case INDEX_op_sub_vec:
1776     case INDEX_op_and_vec:
1777     case INDEX_op_or_vec:
1778     case INDEX_op_xor_vec:
1779     case INDEX_op_cmp_vec:
1780         return have_vec;
1781     case INDEX_op_dup2_vec:
1782         return have_vec && TCG_TARGET_REG_BITS == 32;
1783     case INDEX_op_not_vec:
1784         return have_vec && TCG_TARGET_HAS_not_vec;
1785     case INDEX_op_neg_vec:
1786         return have_vec && TCG_TARGET_HAS_neg_vec;
1787     case INDEX_op_abs_vec:
1788         return have_vec && TCG_TARGET_HAS_abs_vec;
1789     case INDEX_op_andc_vec:
1790         return have_vec && TCG_TARGET_HAS_andc_vec;
1791     case INDEX_op_orc_vec:
1792         return have_vec && TCG_TARGET_HAS_orc_vec;
1793     case INDEX_op_mul_vec:
1794         return have_vec && TCG_TARGET_HAS_mul_vec;
1795     case INDEX_op_shli_vec:
1796     case INDEX_op_shri_vec:
1797     case INDEX_op_sari_vec:
1798         return have_vec && TCG_TARGET_HAS_shi_vec;
1799     case INDEX_op_shls_vec:
1800     case INDEX_op_shrs_vec:
1801     case INDEX_op_sars_vec:
1802         return have_vec && TCG_TARGET_HAS_shs_vec;
1803     case INDEX_op_shlv_vec:
1804     case INDEX_op_shrv_vec:
1805     case INDEX_op_sarv_vec:
1806         return have_vec && TCG_TARGET_HAS_shv_vec;
1807     case INDEX_op_rotli_vec:
1808         return have_vec && TCG_TARGET_HAS_roti_vec;
1809     case INDEX_op_rotls_vec:
1810         return have_vec && TCG_TARGET_HAS_rots_vec;
1811     case INDEX_op_rotlv_vec:
1812     case INDEX_op_rotrv_vec:
1813         return have_vec && TCG_TARGET_HAS_rotv_vec;
1814     case INDEX_op_ssadd_vec:
1815     case INDEX_op_usadd_vec:
1816     case INDEX_op_sssub_vec:
1817     case INDEX_op_ussub_vec:
1818         return have_vec && TCG_TARGET_HAS_sat_vec;
1819     case INDEX_op_smin_vec:
1820     case INDEX_op_umin_vec:
1821     case INDEX_op_smax_vec:
1822     case INDEX_op_umax_vec:
1823         return have_vec && TCG_TARGET_HAS_minmax_vec;
1824     case INDEX_op_bitsel_vec:
1825         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1826     case INDEX_op_cmpsel_vec:
1827         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1828 
1829     default:
1830         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1831         return true;
1832     }
1833 }
1834 
1835 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1836    and endian swap. Maybe it would be better to do the alignment
1837    and endian swap in tcg_reg_alloc_call(). */
1838 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1839 {
1840     int i, real_args, nb_rets, pi;
1841     unsigned sizemask, flags;
1842     TCGHelperInfo *info;
1843     TCGOp *op;
1844 
1845     info = g_hash_table_lookup(helper_table, (gpointer)func);
1846     flags = info->flags;
1847     sizemask = info->sizemask;
1848 
1849 #ifdef CONFIG_PLUGIN
1850     /* detect non-plugin helpers */
1851     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1852         tcg_ctx->plugin_insn->calls_helpers = true;
1853     }
1854 #endif
1855 
1856 #if defined(__sparc__) && !defined(__arch64__) \
1857     && !defined(CONFIG_TCG_INTERPRETER)
1858     /* We have 64-bit values in one register, but need to pass as two
1859        separate parameters.  Split them.  */
1860     int orig_sizemask = sizemask;
1861     int orig_nargs = nargs;
1862     TCGv_i64 retl, reth;
1863     TCGTemp *split_args[MAX_OPC_PARAM];
1864 
1865     retl = NULL;
1866     reth = NULL;
1867     if (sizemask != 0) {
1868         for (i = real_args = 0; i < nargs; ++i) {
1869             int is_64bit = sizemask & (1 << (i+1)*2);
1870             if (is_64bit) {
1871                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1872                 TCGv_i32 h = tcg_temp_new_i32();
1873                 TCGv_i32 l = tcg_temp_new_i32();
1874                 tcg_gen_extr_i64_i32(l, h, orig);
1875                 split_args[real_args++] = tcgv_i32_temp(h);
1876                 split_args[real_args++] = tcgv_i32_temp(l);
1877             } else {
1878                 split_args[real_args++] = args[i];
1879             }
1880         }
1881         nargs = real_args;
1882         args = split_args;
1883         sizemask = 0;
1884     }
1885 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1886     for (i = 0; i < nargs; ++i) {
1887         int is_64bit = sizemask & (1 << (i+1)*2);
1888         int is_signed = sizemask & (2 << (i+1)*2);
1889         if (!is_64bit) {
1890             TCGv_i64 temp = tcg_temp_new_i64();
1891             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1892             if (is_signed) {
1893                 tcg_gen_ext32s_i64(temp, orig);
1894             } else {
1895                 tcg_gen_ext32u_i64(temp, orig);
1896             }
1897             args[i] = tcgv_i64_temp(temp);
1898         }
1899     }
1900 #endif /* TCG_TARGET_EXTEND_ARGS */
1901 
1902     op = tcg_emit_op(INDEX_op_call);
1903 
1904     pi = 0;
1905     if (ret != NULL) {
1906 #if defined(__sparc__) && !defined(__arch64__) \
1907     && !defined(CONFIG_TCG_INTERPRETER)
1908         if (orig_sizemask & 1) {
1909             /* The 32-bit ABI is going to return the 64-bit value in
1910                the %o0/%o1 register pair.  Prepare for this by using
1911                two return temporaries, and reassemble below.  */
1912             retl = tcg_temp_new_i64();
1913             reth = tcg_temp_new_i64();
1914             op->args[pi++] = tcgv_i64_arg(reth);
1915             op->args[pi++] = tcgv_i64_arg(retl);
1916             nb_rets = 2;
1917         } else {
1918             op->args[pi++] = temp_arg(ret);
1919             nb_rets = 1;
1920         }
1921 #else
1922         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1923 #ifdef HOST_WORDS_BIGENDIAN
1924             op->args[pi++] = temp_arg(ret + 1);
1925             op->args[pi++] = temp_arg(ret);
1926 #else
1927             op->args[pi++] = temp_arg(ret);
1928             op->args[pi++] = temp_arg(ret + 1);
1929 #endif
1930             nb_rets = 2;
1931         } else {
1932             op->args[pi++] = temp_arg(ret);
1933             nb_rets = 1;
1934         }
1935 #endif
1936     } else {
1937         nb_rets = 0;
1938     }
1939     TCGOP_CALLO(op) = nb_rets;
1940 
1941     real_args = 0;
1942     for (i = 0; i < nargs; i++) {
1943         int is_64bit = sizemask & (1 << (i+1)*2);
1944         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1945 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1946             /* some targets want aligned 64 bit args */
1947             if (real_args & 1) {
1948                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1949                 real_args++;
1950             }
1951 #endif
1952            /* If stack grows up, then we will be placing successive
1953               arguments at lower addresses, which means we need to
1954               reverse the order compared to how we would normally
1955               treat either big or little-endian.  For those arguments
1956               that will wind up in registers, this still works for
1957               HPPA (the only current STACK_GROWSUP target) since the
1958               argument registers are *also* allocated in decreasing
1959               order.  If another such target is added, this logic may
1960               have to get more complicated to differentiate between
1961               stack arguments and register arguments.  */
1962 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1963             op->args[pi++] = temp_arg(args[i] + 1);
1964             op->args[pi++] = temp_arg(args[i]);
1965 #else
1966             op->args[pi++] = temp_arg(args[i]);
1967             op->args[pi++] = temp_arg(args[i] + 1);
1968 #endif
1969             real_args += 2;
1970             continue;
1971         }
1972 
1973         op->args[pi++] = temp_arg(args[i]);
1974         real_args++;
1975     }
1976     op->args[pi++] = (uintptr_t)func;
1977     op->args[pi++] = flags;
1978     TCGOP_CALLI(op) = real_args;
1979 
1980     /* Make sure the fields didn't overflow.  */
1981     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1982     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1983 
1984 #if defined(__sparc__) && !defined(__arch64__) \
1985     && !defined(CONFIG_TCG_INTERPRETER)
1986     /* Free all of the parts we allocated above.  */
1987     for (i = real_args = 0; i < orig_nargs; ++i) {
1988         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1989         if (is_64bit) {
1990             tcg_temp_free_internal(args[real_args++]);
1991             tcg_temp_free_internal(args[real_args++]);
1992         } else {
1993             real_args++;
1994         }
1995     }
1996     if (orig_sizemask & 1) {
1997         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1998            Note that describing these as TCGv_i64 eliminates an unnecessary
1999            zero-extension that tcg_gen_concat_i32_i64 would create.  */
2000         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2001         tcg_temp_free_i64(retl);
2002         tcg_temp_free_i64(reth);
2003     }
2004 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2005     for (i = 0; i < nargs; ++i) {
2006         int is_64bit = sizemask & (1 << (i+1)*2);
2007         if (!is_64bit) {
2008             tcg_temp_free_internal(args[i]);
2009         }
2010     }
2011 #endif /* TCG_TARGET_EXTEND_ARGS */
2012 }
2013 
2014 static void tcg_reg_alloc_start(TCGContext *s)
2015 {
2016     int i, n;
2017 
2018     for (i = 0, n = s->nb_temps; i < n; i++) {
2019         TCGTemp *ts = &s->temps[i];
2020         TCGTempVal val = TEMP_VAL_MEM;
2021 
2022         switch (ts->kind) {
2023         case TEMP_CONST:
2024             val = TEMP_VAL_CONST;
2025             break;
2026         case TEMP_FIXED:
2027             val = TEMP_VAL_REG;
2028             break;
2029         case TEMP_GLOBAL:
2030             break;
2031         case TEMP_NORMAL:
2032             val = TEMP_VAL_DEAD;
2033             /* fall through */
2034         case TEMP_LOCAL:
2035             ts->mem_allocated = 0;
2036             break;
2037         default:
2038             g_assert_not_reached();
2039         }
2040         ts->val_type = val;
2041     }
2042 
2043     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2044 }
2045 
2046 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2047                                  TCGTemp *ts)
2048 {
2049     int idx = temp_idx(ts);
2050 
2051     switch (ts->kind) {
2052     case TEMP_FIXED:
2053     case TEMP_GLOBAL:
2054         pstrcpy(buf, buf_size, ts->name);
2055         break;
2056     case TEMP_LOCAL:
2057         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2058         break;
2059     case TEMP_NORMAL:
2060         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2061         break;
2062     case TEMP_CONST:
2063         switch (ts->type) {
2064         case TCG_TYPE_I32:
2065             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2066             break;
2067 #if TCG_TARGET_REG_BITS > 32
2068         case TCG_TYPE_I64:
2069             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2070             break;
2071 #endif
2072         case TCG_TYPE_V64:
2073         case TCG_TYPE_V128:
2074         case TCG_TYPE_V256:
2075             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2076                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2077             break;
2078         default:
2079             g_assert_not_reached();
2080         }
2081         break;
2082     }
2083     return buf;
2084 }
2085 
2086 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2087                              int buf_size, TCGArg arg)
2088 {
2089     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2090 }
2091 
2092 /* Find helper name.  */
2093 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2094 {
2095     const char *ret = NULL;
2096     if (helper_table) {
2097         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2098         if (info) {
2099             ret = info->name;
2100         }
2101     }
2102     return ret;
2103 }
2104 
2105 static const char * const cond_name[] =
2106 {
2107     [TCG_COND_NEVER] = "never",
2108     [TCG_COND_ALWAYS] = "always",
2109     [TCG_COND_EQ] = "eq",
2110     [TCG_COND_NE] = "ne",
2111     [TCG_COND_LT] = "lt",
2112     [TCG_COND_GE] = "ge",
2113     [TCG_COND_LE] = "le",
2114     [TCG_COND_GT] = "gt",
2115     [TCG_COND_LTU] = "ltu",
2116     [TCG_COND_GEU] = "geu",
2117     [TCG_COND_LEU] = "leu",
2118     [TCG_COND_GTU] = "gtu"
2119 };
2120 
2121 static const char * const ldst_name[] =
2122 {
2123     [MO_UB]   = "ub",
2124     [MO_SB]   = "sb",
2125     [MO_LEUW] = "leuw",
2126     [MO_LESW] = "lesw",
2127     [MO_LEUL] = "leul",
2128     [MO_LESL] = "lesl",
2129     [MO_LEQ]  = "leq",
2130     [MO_BEUW] = "beuw",
2131     [MO_BESW] = "besw",
2132     [MO_BEUL] = "beul",
2133     [MO_BESL] = "besl",
2134     [MO_BEQ]  = "beq",
2135 };
2136 
2137 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2138 #ifdef TARGET_ALIGNED_ONLY
2139     [MO_UNALN >> MO_ASHIFT]    = "un+",
2140     [MO_ALIGN >> MO_ASHIFT]    = "",
2141 #else
2142     [MO_UNALN >> MO_ASHIFT]    = "",
2143     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2144 #endif
2145     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2146     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2147     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2148     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2149     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2150     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2151 };
2152 
2153 static inline bool tcg_regset_single(TCGRegSet d)
2154 {
2155     return (d & (d - 1)) == 0;
2156 }
2157 
2158 static inline TCGReg tcg_regset_first(TCGRegSet d)
2159 {
2160     if (TCG_TARGET_NB_REGS <= 32) {
2161         return ctz32(d);
2162     } else {
2163         return ctz64(d);
2164     }
2165 }
2166 
2167 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2168 {
2169     char buf[128];
2170     TCGOp *op;
2171 
2172     QTAILQ_FOREACH(op, &s->ops, link) {
2173         int i, k, nb_oargs, nb_iargs, nb_cargs;
2174         const TCGOpDef *def;
2175         TCGOpcode c;
2176         int col = 0;
2177 
2178         c = op->opc;
2179         def = &tcg_op_defs[c];
2180 
2181         if (c == INDEX_op_insn_start) {
2182             nb_oargs = 0;
2183             col += qemu_log("\n ----");
2184 
2185             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2186                 target_ulong a;
2187 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2188                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2189 #else
2190                 a = op->args[i];
2191 #endif
2192                 col += qemu_log(" " TARGET_FMT_lx, a);
2193             }
2194         } else if (c == INDEX_op_call) {
2195             /* variable number of arguments */
2196             nb_oargs = TCGOP_CALLO(op);
2197             nb_iargs = TCGOP_CALLI(op);
2198             nb_cargs = def->nb_cargs;
2199 
2200             /* function name, flags, out args */
2201             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2202                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2203                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2204             for (i = 0; i < nb_oargs; i++) {
2205                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2206                                                        op->args[i]));
2207             }
2208             for (i = 0; i < nb_iargs; i++) {
2209                 TCGArg arg = op->args[nb_oargs + i];
2210                 const char *t = "<dummy>";
2211                 if (arg != TCG_CALL_DUMMY_ARG) {
2212                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2213                 }
2214                 col += qemu_log(",%s", t);
2215             }
2216         } else {
2217             col += qemu_log(" %s ", def->name);
2218 
2219             nb_oargs = def->nb_oargs;
2220             nb_iargs = def->nb_iargs;
2221             nb_cargs = def->nb_cargs;
2222 
2223             if (def->flags & TCG_OPF_VECTOR) {
2224                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2225                                 8 << TCGOP_VECE(op));
2226             }
2227 
2228             k = 0;
2229             for (i = 0; i < nb_oargs; i++) {
2230                 if (k != 0) {
2231                     col += qemu_log(",");
2232                 }
2233                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2234                                                       op->args[k++]));
2235             }
2236             for (i = 0; i < nb_iargs; i++) {
2237                 if (k != 0) {
2238                     col += qemu_log(",");
2239                 }
2240                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2241                                                       op->args[k++]));
2242             }
2243             switch (c) {
2244             case INDEX_op_brcond_i32:
2245             case INDEX_op_setcond_i32:
2246             case INDEX_op_movcond_i32:
2247             case INDEX_op_brcond2_i32:
2248             case INDEX_op_setcond2_i32:
2249             case INDEX_op_brcond_i64:
2250             case INDEX_op_setcond_i64:
2251             case INDEX_op_movcond_i64:
2252             case INDEX_op_cmp_vec:
2253             case INDEX_op_cmpsel_vec:
2254                 if (op->args[k] < ARRAY_SIZE(cond_name)
2255                     && cond_name[op->args[k]]) {
2256                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2257                 } else {
2258                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2259                 }
2260                 i = 1;
2261                 break;
2262             case INDEX_op_qemu_ld_i32:
2263             case INDEX_op_qemu_st_i32:
2264             case INDEX_op_qemu_st8_i32:
2265             case INDEX_op_qemu_ld_i64:
2266             case INDEX_op_qemu_st_i64:
2267                 {
2268                     TCGMemOpIdx oi = op->args[k++];
2269                     MemOp op = get_memop(oi);
2270                     unsigned ix = get_mmuidx(oi);
2271 
2272                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2273                         col += qemu_log(",$0x%x,%u", op, ix);
2274                     } else {
2275                         const char *s_al, *s_op;
2276                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2277                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2278                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2279                     }
2280                     i = 1;
2281                 }
2282                 break;
2283             default:
2284                 i = 0;
2285                 break;
2286             }
2287             switch (c) {
2288             case INDEX_op_set_label:
2289             case INDEX_op_br:
2290             case INDEX_op_brcond_i32:
2291             case INDEX_op_brcond_i64:
2292             case INDEX_op_brcond2_i32:
2293                 col += qemu_log("%s$L%d", k ? "," : "",
2294                                 arg_label(op->args[k])->id);
2295                 i++, k++;
2296                 break;
2297             default:
2298                 break;
2299             }
2300             for (; i < nb_cargs; i++, k++) {
2301                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2302             }
2303         }
2304 
2305         if (have_prefs || op->life) {
2306 
2307             QemuLogFile *logfile;
2308 
2309             rcu_read_lock();
2310             logfile = qatomic_rcu_read(&qemu_logfile);
2311             if (logfile) {
2312                 for (; col < 40; ++col) {
2313                     putc(' ', logfile->fd);
2314                 }
2315             }
2316             rcu_read_unlock();
2317         }
2318 
2319         if (op->life) {
2320             unsigned life = op->life;
2321 
2322             if (life & (SYNC_ARG * 3)) {
2323                 qemu_log("  sync:");
2324                 for (i = 0; i < 2; ++i) {
2325                     if (life & (SYNC_ARG << i)) {
2326                         qemu_log(" %d", i);
2327                     }
2328                 }
2329             }
2330             life /= DEAD_ARG;
2331             if (life) {
2332                 qemu_log("  dead:");
2333                 for (i = 0; life; ++i, life >>= 1) {
2334                     if (life & 1) {
2335                         qemu_log(" %d", i);
2336                     }
2337                 }
2338             }
2339         }
2340 
2341         if (have_prefs) {
2342             for (i = 0; i < nb_oargs; ++i) {
2343                 TCGRegSet set = op->output_pref[i];
2344 
2345                 if (i == 0) {
2346                     qemu_log("  pref=");
2347                 } else {
2348                     qemu_log(",");
2349                 }
2350                 if (set == 0) {
2351                     qemu_log("none");
2352                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2353                     qemu_log("all");
2354 #ifdef CONFIG_DEBUG_TCG
2355                 } else if (tcg_regset_single(set)) {
2356                     TCGReg reg = tcg_regset_first(set);
2357                     qemu_log("%s", tcg_target_reg_names[reg]);
2358 #endif
2359                 } else if (TCG_TARGET_NB_REGS <= 32) {
2360                     qemu_log("%#x", (uint32_t)set);
2361                 } else {
2362                     qemu_log("%#" PRIx64, (uint64_t)set);
2363                 }
2364             }
2365         }
2366 
2367         qemu_log("\n");
2368     }
2369 }
2370 
2371 /* we give more priority to constraints with less registers */
2372 static int get_constraint_priority(const TCGOpDef *def, int k)
2373 {
2374     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2375     int n;
2376 
2377     if (arg_ct->oalias) {
2378         /* an alias is equivalent to a single register */
2379         n = 1;
2380     } else {
2381         n = ctpop64(arg_ct->regs);
2382     }
2383     return TCG_TARGET_NB_REGS - n + 1;
2384 }
2385 
2386 /* sort from highest priority to lowest */
2387 static void sort_constraints(TCGOpDef *def, int start, int n)
2388 {
2389     int i, j;
2390     TCGArgConstraint *a = def->args_ct;
2391 
2392     for (i = 0; i < n; i++) {
2393         a[start + i].sort_index = start + i;
2394     }
2395     if (n <= 1) {
2396         return;
2397     }
2398     for (i = 0; i < n - 1; i++) {
2399         for (j = i + 1; j < n; j++) {
2400             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2401             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2402             if (p1 < p2) {
2403                 int tmp = a[start + i].sort_index;
2404                 a[start + i].sort_index = a[start + j].sort_index;
2405                 a[start + j].sort_index = tmp;
2406             }
2407         }
2408     }
2409 }
2410 
2411 static void process_op_defs(TCGContext *s)
2412 {
2413     TCGOpcode op;
2414 
2415     for (op = 0; op < NB_OPS; op++) {
2416         TCGOpDef *def = &tcg_op_defs[op];
2417         const TCGTargetOpDef *tdefs;
2418         TCGType type;
2419         int i, nb_args;
2420 
2421         if (def->flags & TCG_OPF_NOT_PRESENT) {
2422             continue;
2423         }
2424 
2425         nb_args = def->nb_iargs + def->nb_oargs;
2426         if (nb_args == 0) {
2427             continue;
2428         }
2429 
2430         tdefs = tcg_target_op_def(op);
2431         /* Missing TCGTargetOpDef entry. */
2432         tcg_debug_assert(tdefs != NULL);
2433 
2434         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2435         for (i = 0; i < nb_args; i++) {
2436             const char *ct_str = tdefs->args_ct_str[i];
2437             /* Incomplete TCGTargetOpDef entry. */
2438             tcg_debug_assert(ct_str != NULL);
2439 
2440             while (*ct_str != '\0') {
2441                 switch(*ct_str) {
2442                 case '0' ... '9':
2443                     {
2444                         int oarg = *ct_str - '0';
2445                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2446                         tcg_debug_assert(oarg < def->nb_oargs);
2447                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2448                         def->args_ct[i] = def->args_ct[oarg];
2449                         /* The output sets oalias.  */
2450                         def->args_ct[oarg].oalias = true;
2451                         def->args_ct[oarg].alias_index = i;
2452                         /* The input sets ialias. */
2453                         def->args_ct[i].ialias = true;
2454                         def->args_ct[i].alias_index = oarg;
2455                     }
2456                     ct_str++;
2457                     break;
2458                 case '&':
2459                     def->args_ct[i].newreg = true;
2460                     ct_str++;
2461                     break;
2462                 case 'i':
2463                     def->args_ct[i].ct |= TCG_CT_CONST;
2464                     ct_str++;
2465                     break;
2466                 default:
2467                     ct_str = target_parse_constraint(&def->args_ct[i],
2468                                                      ct_str, type);
2469                     /* Typo in TCGTargetOpDef constraint. */
2470                     tcg_debug_assert(ct_str != NULL);
2471                 }
2472             }
2473         }
2474 
2475         /* TCGTargetOpDef entry with too much information? */
2476         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2477 
2478         /* sort the constraints (XXX: this is just an heuristic) */
2479         sort_constraints(def, 0, def->nb_oargs);
2480         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2481     }
2482 }
2483 
2484 void tcg_op_remove(TCGContext *s, TCGOp *op)
2485 {
2486     TCGLabel *label;
2487 
2488     switch (op->opc) {
2489     case INDEX_op_br:
2490         label = arg_label(op->args[0]);
2491         label->refs--;
2492         break;
2493     case INDEX_op_brcond_i32:
2494     case INDEX_op_brcond_i64:
2495         label = arg_label(op->args[3]);
2496         label->refs--;
2497         break;
2498     case INDEX_op_brcond2_i32:
2499         label = arg_label(op->args[5]);
2500         label->refs--;
2501         break;
2502     default:
2503         break;
2504     }
2505 
2506     QTAILQ_REMOVE(&s->ops, op, link);
2507     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2508     s->nb_ops--;
2509 
2510 #ifdef CONFIG_PROFILER
2511     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2512 #endif
2513 }
2514 
2515 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2516 {
2517     TCGContext *s = tcg_ctx;
2518     TCGOp *op;
2519 
2520     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2521         op = tcg_malloc(sizeof(TCGOp));
2522     } else {
2523         op = QTAILQ_FIRST(&s->free_ops);
2524         QTAILQ_REMOVE(&s->free_ops, op, link);
2525     }
2526     memset(op, 0, offsetof(TCGOp, link));
2527     op->opc = opc;
2528     s->nb_ops++;
2529 
2530     return op;
2531 }
2532 
2533 TCGOp *tcg_emit_op(TCGOpcode opc)
2534 {
2535     TCGOp *op = tcg_op_alloc(opc);
2536     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2537     return op;
2538 }
2539 
2540 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2541 {
2542     TCGOp *new_op = tcg_op_alloc(opc);
2543     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2544     return new_op;
2545 }
2546 
2547 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2548 {
2549     TCGOp *new_op = tcg_op_alloc(opc);
2550     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2551     return new_op;
2552 }
2553 
2554 /* Reachable analysis : remove unreachable code.  */
2555 static void reachable_code_pass(TCGContext *s)
2556 {
2557     TCGOp *op, *op_next;
2558     bool dead = false;
2559 
2560     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2561         bool remove = dead;
2562         TCGLabel *label;
2563         int call_flags;
2564 
2565         switch (op->opc) {
2566         case INDEX_op_set_label:
2567             label = arg_label(op->args[0]);
2568             if (label->refs == 0) {
2569                 /*
2570                  * While there is an occasional backward branch, virtually
2571                  * all branches generated by the translators are forward.
2572                  * Which means that generally we will have already removed
2573                  * all references to the label that will be, and there is
2574                  * little to be gained by iterating.
2575                  */
2576                 remove = true;
2577             } else {
2578                 /* Once we see a label, insns become live again.  */
2579                 dead = false;
2580                 remove = false;
2581 
2582                 /*
2583                  * Optimization can fold conditional branches to unconditional.
2584                  * If we find a label with one reference which is preceded by
2585                  * an unconditional branch to it, remove both.  This needed to
2586                  * wait until the dead code in between them was removed.
2587                  */
2588                 if (label->refs == 1) {
2589                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2590                     if (op_prev->opc == INDEX_op_br &&
2591                         label == arg_label(op_prev->args[0])) {
2592                         tcg_op_remove(s, op_prev);
2593                         remove = true;
2594                     }
2595                 }
2596             }
2597             break;
2598 
2599         case INDEX_op_br:
2600         case INDEX_op_exit_tb:
2601         case INDEX_op_goto_ptr:
2602             /* Unconditional branches; everything following is dead.  */
2603             dead = true;
2604             break;
2605 
2606         case INDEX_op_call:
2607             /* Notice noreturn helper calls, raising exceptions.  */
2608             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2609             if (call_flags & TCG_CALL_NO_RETURN) {
2610                 dead = true;
2611             }
2612             break;
2613 
2614         case INDEX_op_insn_start:
2615             /* Never remove -- we need to keep these for unwind.  */
2616             remove = false;
2617             break;
2618 
2619         default:
2620             break;
2621         }
2622 
2623         if (remove) {
2624             tcg_op_remove(s, op);
2625         }
2626     }
2627 }
2628 
2629 #define TS_DEAD  1
2630 #define TS_MEM   2
2631 
2632 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2633 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2634 
2635 /* For liveness_pass_1, the register preferences for a given temp.  */
2636 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2637 {
2638     return ts->state_ptr;
2639 }
2640 
2641 /* For liveness_pass_1, reset the preferences for a given temp to the
2642  * maximal regset for its type.
2643  */
2644 static inline void la_reset_pref(TCGTemp *ts)
2645 {
2646     *la_temp_pref(ts)
2647         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2648 }
2649 
2650 /* liveness analysis: end of function: all temps are dead, and globals
2651    should be in memory. */
2652 static void la_func_end(TCGContext *s, int ng, int nt)
2653 {
2654     int i;
2655 
2656     for (i = 0; i < ng; ++i) {
2657         s->temps[i].state = TS_DEAD | TS_MEM;
2658         la_reset_pref(&s->temps[i]);
2659     }
2660     for (i = ng; i < nt; ++i) {
2661         s->temps[i].state = TS_DEAD;
2662         la_reset_pref(&s->temps[i]);
2663     }
2664 }
2665 
2666 /* liveness analysis: end of basic block: all temps are dead, globals
2667    and local temps should be in memory. */
2668 static void la_bb_end(TCGContext *s, int ng, int nt)
2669 {
2670     int i;
2671 
2672     for (i = 0; i < nt; ++i) {
2673         TCGTemp *ts = &s->temps[i];
2674         int state;
2675 
2676         switch (ts->kind) {
2677         case TEMP_FIXED:
2678         case TEMP_GLOBAL:
2679         case TEMP_LOCAL:
2680             state = TS_DEAD | TS_MEM;
2681             break;
2682         case TEMP_NORMAL:
2683         case TEMP_CONST:
2684             state = TS_DEAD;
2685             break;
2686         default:
2687             g_assert_not_reached();
2688         }
2689         ts->state = state;
2690         la_reset_pref(ts);
2691     }
2692 }
2693 
2694 /* liveness analysis: sync globals back to memory.  */
2695 static void la_global_sync(TCGContext *s, int ng)
2696 {
2697     int i;
2698 
2699     for (i = 0; i < ng; ++i) {
2700         int state = s->temps[i].state;
2701         s->temps[i].state = state | TS_MEM;
2702         if (state == TS_DEAD) {
2703             /* If the global was previously dead, reset prefs.  */
2704             la_reset_pref(&s->temps[i]);
2705         }
2706     }
2707 }
2708 
2709 /*
2710  * liveness analysis: conditional branch: all temps are dead,
2711  * globals and local temps should be synced.
2712  */
2713 static void la_bb_sync(TCGContext *s, int ng, int nt)
2714 {
2715     la_global_sync(s, ng);
2716 
2717     for (int i = ng; i < nt; ++i) {
2718         TCGTemp *ts = &s->temps[i];
2719         int state;
2720 
2721         switch (ts->kind) {
2722         case TEMP_LOCAL:
2723             state = ts->state;
2724             ts->state = state | TS_MEM;
2725             if (state != TS_DEAD) {
2726                 continue;
2727             }
2728             break;
2729         case TEMP_NORMAL:
2730             s->temps[i].state = TS_DEAD;
2731             break;
2732         case TEMP_CONST:
2733             continue;
2734         default:
2735             g_assert_not_reached();
2736         }
2737         la_reset_pref(&s->temps[i]);
2738     }
2739 }
2740 
2741 /* liveness analysis: sync globals back to memory and kill.  */
2742 static void la_global_kill(TCGContext *s, int ng)
2743 {
2744     int i;
2745 
2746     for (i = 0; i < ng; i++) {
2747         s->temps[i].state = TS_DEAD | TS_MEM;
2748         la_reset_pref(&s->temps[i]);
2749     }
2750 }
2751 
2752 /* liveness analysis: note live globals crossing calls.  */
2753 static void la_cross_call(TCGContext *s, int nt)
2754 {
2755     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2756     int i;
2757 
2758     for (i = 0; i < nt; i++) {
2759         TCGTemp *ts = &s->temps[i];
2760         if (!(ts->state & TS_DEAD)) {
2761             TCGRegSet *pset = la_temp_pref(ts);
2762             TCGRegSet set = *pset;
2763 
2764             set &= mask;
2765             /* If the combination is not possible, restart.  */
2766             if (set == 0) {
2767                 set = tcg_target_available_regs[ts->type] & mask;
2768             }
2769             *pset = set;
2770         }
2771     }
2772 }
2773 
2774 /* Liveness analysis : update the opc_arg_life array to tell if a
2775    given input arguments is dead. Instructions updating dead
2776    temporaries are removed. */
2777 static void liveness_pass_1(TCGContext *s)
2778 {
2779     int nb_globals = s->nb_globals;
2780     int nb_temps = s->nb_temps;
2781     TCGOp *op, *op_prev;
2782     TCGRegSet *prefs;
2783     int i;
2784 
2785     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2786     for (i = 0; i < nb_temps; ++i) {
2787         s->temps[i].state_ptr = prefs + i;
2788     }
2789 
2790     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2791     la_func_end(s, nb_globals, nb_temps);
2792 
2793     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2794         int nb_iargs, nb_oargs;
2795         TCGOpcode opc_new, opc_new2;
2796         bool have_opc_new2;
2797         TCGLifeData arg_life = 0;
2798         TCGTemp *ts;
2799         TCGOpcode opc = op->opc;
2800         const TCGOpDef *def = &tcg_op_defs[opc];
2801 
2802         switch (opc) {
2803         case INDEX_op_call:
2804             {
2805                 int call_flags;
2806                 int nb_call_regs;
2807 
2808                 nb_oargs = TCGOP_CALLO(op);
2809                 nb_iargs = TCGOP_CALLI(op);
2810                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2811 
2812                 /* pure functions can be removed if their result is unused */
2813                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2814                     for (i = 0; i < nb_oargs; i++) {
2815                         ts = arg_temp(op->args[i]);
2816                         if (ts->state != TS_DEAD) {
2817                             goto do_not_remove_call;
2818                         }
2819                     }
2820                     goto do_remove;
2821                 }
2822             do_not_remove_call:
2823 
2824                 /* Output args are dead.  */
2825                 for (i = 0; i < nb_oargs; i++) {
2826                     ts = arg_temp(op->args[i]);
2827                     if (ts->state & TS_DEAD) {
2828                         arg_life |= DEAD_ARG << i;
2829                     }
2830                     if (ts->state & TS_MEM) {
2831                         arg_life |= SYNC_ARG << i;
2832                     }
2833                     ts->state = TS_DEAD;
2834                     la_reset_pref(ts);
2835 
2836                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2837                     op->output_pref[i] = 0;
2838                 }
2839 
2840                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2841                                     TCG_CALL_NO_READ_GLOBALS))) {
2842                     la_global_kill(s, nb_globals);
2843                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2844                     la_global_sync(s, nb_globals);
2845                 }
2846 
2847                 /* Record arguments that die in this helper.  */
2848                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2849                     ts = arg_temp(op->args[i]);
2850                     if (ts && ts->state & TS_DEAD) {
2851                         arg_life |= DEAD_ARG << i;
2852                     }
2853                 }
2854 
2855                 /* For all live registers, remove call-clobbered prefs.  */
2856                 la_cross_call(s, nb_temps);
2857 
2858                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2859 
2860                 /* Input arguments are live for preceding opcodes.  */
2861                 for (i = 0; i < nb_iargs; i++) {
2862                     ts = arg_temp(op->args[i + nb_oargs]);
2863                     if (ts && ts->state & TS_DEAD) {
2864                         /* For those arguments that die, and will be allocated
2865                          * in registers, clear the register set for that arg,
2866                          * to be filled in below.  For args that will be on
2867                          * the stack, reset to any available reg.
2868                          */
2869                         *la_temp_pref(ts)
2870                             = (i < nb_call_regs ? 0 :
2871                                tcg_target_available_regs[ts->type]);
2872                         ts->state &= ~TS_DEAD;
2873                     }
2874                 }
2875 
2876                 /* For each input argument, add its input register to prefs.
2877                    If a temp is used once, this produces a single set bit.  */
2878                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2879                     ts = arg_temp(op->args[i + nb_oargs]);
2880                     if (ts) {
2881                         tcg_regset_set_reg(*la_temp_pref(ts),
2882                                            tcg_target_call_iarg_regs[i]);
2883                     }
2884                 }
2885             }
2886             break;
2887         case INDEX_op_insn_start:
2888             break;
2889         case INDEX_op_discard:
2890             /* mark the temporary as dead */
2891             ts = arg_temp(op->args[0]);
2892             ts->state = TS_DEAD;
2893             la_reset_pref(ts);
2894             break;
2895 
2896         case INDEX_op_add2_i32:
2897             opc_new = INDEX_op_add_i32;
2898             goto do_addsub2;
2899         case INDEX_op_sub2_i32:
2900             opc_new = INDEX_op_sub_i32;
2901             goto do_addsub2;
2902         case INDEX_op_add2_i64:
2903             opc_new = INDEX_op_add_i64;
2904             goto do_addsub2;
2905         case INDEX_op_sub2_i64:
2906             opc_new = INDEX_op_sub_i64;
2907         do_addsub2:
2908             nb_iargs = 4;
2909             nb_oargs = 2;
2910             /* Test if the high part of the operation is dead, but not
2911                the low part.  The result can be optimized to a simple
2912                add or sub.  This happens often for x86_64 guest when the
2913                cpu mode is set to 32 bit.  */
2914             if (arg_temp(op->args[1])->state == TS_DEAD) {
2915                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2916                     goto do_remove;
2917                 }
2918                 /* Replace the opcode and adjust the args in place,
2919                    leaving 3 unused args at the end.  */
2920                 op->opc = opc = opc_new;
2921                 op->args[1] = op->args[2];
2922                 op->args[2] = op->args[4];
2923                 /* Fall through and mark the single-word operation live.  */
2924                 nb_iargs = 2;
2925                 nb_oargs = 1;
2926             }
2927             goto do_not_remove;
2928 
2929         case INDEX_op_mulu2_i32:
2930             opc_new = INDEX_op_mul_i32;
2931             opc_new2 = INDEX_op_muluh_i32;
2932             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2933             goto do_mul2;
2934         case INDEX_op_muls2_i32:
2935             opc_new = INDEX_op_mul_i32;
2936             opc_new2 = INDEX_op_mulsh_i32;
2937             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2938             goto do_mul2;
2939         case INDEX_op_mulu2_i64:
2940             opc_new = INDEX_op_mul_i64;
2941             opc_new2 = INDEX_op_muluh_i64;
2942             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2943             goto do_mul2;
2944         case INDEX_op_muls2_i64:
2945             opc_new = INDEX_op_mul_i64;
2946             opc_new2 = INDEX_op_mulsh_i64;
2947             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2948             goto do_mul2;
2949         do_mul2:
2950             nb_iargs = 2;
2951             nb_oargs = 2;
2952             if (arg_temp(op->args[1])->state == TS_DEAD) {
2953                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2954                     /* Both parts of the operation are dead.  */
2955                     goto do_remove;
2956                 }
2957                 /* The high part of the operation is dead; generate the low. */
2958                 op->opc = opc = opc_new;
2959                 op->args[1] = op->args[2];
2960                 op->args[2] = op->args[3];
2961             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2962                 /* The low part of the operation is dead; generate the high. */
2963                 op->opc = opc = opc_new2;
2964                 op->args[0] = op->args[1];
2965                 op->args[1] = op->args[2];
2966                 op->args[2] = op->args[3];
2967             } else {
2968                 goto do_not_remove;
2969             }
2970             /* Mark the single-word operation live.  */
2971             nb_oargs = 1;
2972             goto do_not_remove;
2973 
2974         default:
2975             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2976             nb_iargs = def->nb_iargs;
2977             nb_oargs = def->nb_oargs;
2978 
2979             /* Test if the operation can be removed because all
2980                its outputs are dead. We assume that nb_oargs == 0
2981                implies side effects */
2982             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2983                 for (i = 0; i < nb_oargs; i++) {
2984                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2985                         goto do_not_remove;
2986                     }
2987                 }
2988                 goto do_remove;
2989             }
2990             goto do_not_remove;
2991 
2992         do_remove:
2993             tcg_op_remove(s, op);
2994             break;
2995 
2996         do_not_remove:
2997             for (i = 0; i < nb_oargs; i++) {
2998                 ts = arg_temp(op->args[i]);
2999 
3000                 /* Remember the preference of the uses that followed.  */
3001                 op->output_pref[i] = *la_temp_pref(ts);
3002 
3003                 /* Output args are dead.  */
3004                 if (ts->state & TS_DEAD) {
3005                     arg_life |= DEAD_ARG << i;
3006                 }
3007                 if (ts->state & TS_MEM) {
3008                     arg_life |= SYNC_ARG << i;
3009                 }
3010                 ts->state = TS_DEAD;
3011                 la_reset_pref(ts);
3012             }
3013 
3014             /* If end of basic block, update.  */
3015             if (def->flags & TCG_OPF_BB_EXIT) {
3016                 la_func_end(s, nb_globals, nb_temps);
3017             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3018                 la_bb_sync(s, nb_globals, nb_temps);
3019             } else if (def->flags & TCG_OPF_BB_END) {
3020                 la_bb_end(s, nb_globals, nb_temps);
3021             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3022                 la_global_sync(s, nb_globals);
3023                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3024                     la_cross_call(s, nb_temps);
3025                 }
3026             }
3027 
3028             /* Record arguments that die in this opcode.  */
3029             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3030                 ts = arg_temp(op->args[i]);
3031                 if (ts->state & TS_DEAD) {
3032                     arg_life |= DEAD_ARG << i;
3033                 }
3034             }
3035 
3036             /* Input arguments are live for preceding opcodes.  */
3037             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3038                 ts = arg_temp(op->args[i]);
3039                 if (ts->state & TS_DEAD) {
3040                     /* For operands that were dead, initially allow
3041                        all regs for the type.  */
3042                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3043                     ts->state &= ~TS_DEAD;
3044                 }
3045             }
3046 
3047             /* Incorporate constraints for this operand.  */
3048             switch (opc) {
3049             case INDEX_op_mov_i32:
3050             case INDEX_op_mov_i64:
3051                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3052                    have proper constraints.  That said, special case
3053                    moves to propagate preferences backward.  */
3054                 if (IS_DEAD_ARG(1)) {
3055                     *la_temp_pref(arg_temp(op->args[0]))
3056                         = *la_temp_pref(arg_temp(op->args[1]));
3057                 }
3058                 break;
3059 
3060             default:
3061                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3062                     const TCGArgConstraint *ct = &def->args_ct[i];
3063                     TCGRegSet set, *pset;
3064 
3065                     ts = arg_temp(op->args[i]);
3066                     pset = la_temp_pref(ts);
3067                     set = *pset;
3068 
3069                     set &= ct->regs;
3070                     if (ct->ialias) {
3071                         set &= op->output_pref[ct->alias_index];
3072                     }
3073                     /* If the combination is not possible, restart.  */
3074                     if (set == 0) {
3075                         set = ct->regs;
3076                     }
3077                     *pset = set;
3078                 }
3079                 break;
3080             }
3081             break;
3082         }
3083         op->life = arg_life;
3084     }
3085 }
3086 
3087 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3088 static bool liveness_pass_2(TCGContext *s)
3089 {
3090     int nb_globals = s->nb_globals;
3091     int nb_temps, i;
3092     bool changes = false;
3093     TCGOp *op, *op_next;
3094 
3095     /* Create a temporary for each indirect global.  */
3096     for (i = 0; i < nb_globals; ++i) {
3097         TCGTemp *its = &s->temps[i];
3098         if (its->indirect_reg) {
3099             TCGTemp *dts = tcg_temp_alloc(s);
3100             dts->type = its->type;
3101             dts->base_type = its->base_type;
3102             its->state_ptr = dts;
3103         } else {
3104             its->state_ptr = NULL;
3105         }
3106         /* All globals begin dead.  */
3107         its->state = TS_DEAD;
3108     }
3109     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3110         TCGTemp *its = &s->temps[i];
3111         its->state_ptr = NULL;
3112         its->state = TS_DEAD;
3113     }
3114 
3115     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3116         TCGOpcode opc = op->opc;
3117         const TCGOpDef *def = &tcg_op_defs[opc];
3118         TCGLifeData arg_life = op->life;
3119         int nb_iargs, nb_oargs, call_flags;
3120         TCGTemp *arg_ts, *dir_ts;
3121 
3122         if (opc == INDEX_op_call) {
3123             nb_oargs = TCGOP_CALLO(op);
3124             nb_iargs = TCGOP_CALLI(op);
3125             call_flags = op->args[nb_oargs + nb_iargs + 1];
3126         } else {
3127             nb_iargs = def->nb_iargs;
3128             nb_oargs = def->nb_oargs;
3129 
3130             /* Set flags similar to how calls require.  */
3131             if (def->flags & TCG_OPF_COND_BRANCH) {
3132                 /* Like reading globals: sync_globals */
3133                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3134             } else if (def->flags & TCG_OPF_BB_END) {
3135                 /* Like writing globals: save_globals */
3136                 call_flags = 0;
3137             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3138                 /* Like reading globals: sync_globals */
3139                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3140             } else {
3141                 /* No effect on globals.  */
3142                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3143                               TCG_CALL_NO_WRITE_GLOBALS);
3144             }
3145         }
3146 
3147         /* Make sure that input arguments are available.  */
3148         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3149             arg_ts = arg_temp(op->args[i]);
3150             if (arg_ts) {
3151                 dir_ts = arg_ts->state_ptr;
3152                 if (dir_ts && arg_ts->state == TS_DEAD) {
3153                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3154                                       ? INDEX_op_ld_i32
3155                                       : INDEX_op_ld_i64);
3156                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3157 
3158                     lop->args[0] = temp_arg(dir_ts);
3159                     lop->args[1] = temp_arg(arg_ts->mem_base);
3160                     lop->args[2] = arg_ts->mem_offset;
3161 
3162                     /* Loaded, but synced with memory.  */
3163                     arg_ts->state = TS_MEM;
3164                 }
3165             }
3166         }
3167 
3168         /* Perform input replacement, and mark inputs that became dead.
3169            No action is required except keeping temp_state up to date
3170            so that we reload when needed.  */
3171         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3172             arg_ts = arg_temp(op->args[i]);
3173             if (arg_ts) {
3174                 dir_ts = arg_ts->state_ptr;
3175                 if (dir_ts) {
3176                     op->args[i] = temp_arg(dir_ts);
3177                     changes = true;
3178                     if (IS_DEAD_ARG(i)) {
3179                         arg_ts->state = TS_DEAD;
3180                     }
3181                 }
3182             }
3183         }
3184 
3185         /* Liveness analysis should ensure that the following are
3186            all correct, for call sites and basic block end points.  */
3187         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3188             /* Nothing to do */
3189         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3190             for (i = 0; i < nb_globals; ++i) {
3191                 /* Liveness should see that globals are synced back,
3192                    that is, either TS_DEAD or TS_MEM.  */
3193                 arg_ts = &s->temps[i];
3194                 tcg_debug_assert(arg_ts->state_ptr == 0
3195                                  || arg_ts->state != 0);
3196             }
3197         } else {
3198             for (i = 0; i < nb_globals; ++i) {
3199                 /* Liveness should see that globals are saved back,
3200                    that is, TS_DEAD, waiting to be reloaded.  */
3201                 arg_ts = &s->temps[i];
3202                 tcg_debug_assert(arg_ts->state_ptr == 0
3203                                  || arg_ts->state == TS_DEAD);
3204             }
3205         }
3206 
3207         /* Outputs become available.  */
3208         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3209             arg_ts = arg_temp(op->args[0]);
3210             dir_ts = arg_ts->state_ptr;
3211             if (dir_ts) {
3212                 op->args[0] = temp_arg(dir_ts);
3213                 changes = true;
3214 
3215                 /* The output is now live and modified.  */
3216                 arg_ts->state = 0;
3217 
3218                 if (NEED_SYNC_ARG(0)) {
3219                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3220                                       ? INDEX_op_st_i32
3221                                       : INDEX_op_st_i64);
3222                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3223                     TCGTemp *out_ts = dir_ts;
3224 
3225                     if (IS_DEAD_ARG(0)) {
3226                         out_ts = arg_temp(op->args[1]);
3227                         arg_ts->state = TS_DEAD;
3228                         tcg_op_remove(s, op);
3229                     } else {
3230                         arg_ts->state = TS_MEM;
3231                     }
3232 
3233                     sop->args[0] = temp_arg(out_ts);
3234                     sop->args[1] = temp_arg(arg_ts->mem_base);
3235                     sop->args[2] = arg_ts->mem_offset;
3236                 } else {
3237                     tcg_debug_assert(!IS_DEAD_ARG(0));
3238                 }
3239             }
3240         } else {
3241             for (i = 0; i < nb_oargs; i++) {
3242                 arg_ts = arg_temp(op->args[i]);
3243                 dir_ts = arg_ts->state_ptr;
3244                 if (!dir_ts) {
3245                     continue;
3246                 }
3247                 op->args[i] = temp_arg(dir_ts);
3248                 changes = true;
3249 
3250                 /* The output is now live and modified.  */
3251                 arg_ts->state = 0;
3252 
3253                 /* Sync outputs upon their last write.  */
3254                 if (NEED_SYNC_ARG(i)) {
3255                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3256                                       ? INDEX_op_st_i32
3257                                       : INDEX_op_st_i64);
3258                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3259 
3260                     sop->args[0] = temp_arg(dir_ts);
3261                     sop->args[1] = temp_arg(arg_ts->mem_base);
3262                     sop->args[2] = arg_ts->mem_offset;
3263 
3264                     arg_ts->state = TS_MEM;
3265                 }
3266                 /* Drop outputs that are dead.  */
3267                 if (IS_DEAD_ARG(i)) {
3268                     arg_ts->state = TS_DEAD;
3269                 }
3270             }
3271         }
3272     }
3273 
3274     return changes;
3275 }
3276 
3277 #ifdef CONFIG_DEBUG_TCG
3278 static void dump_regs(TCGContext *s)
3279 {
3280     TCGTemp *ts;
3281     int i;
3282     char buf[64];
3283 
3284     for(i = 0; i < s->nb_temps; i++) {
3285         ts = &s->temps[i];
3286         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3287         switch(ts->val_type) {
3288         case TEMP_VAL_REG:
3289             printf("%s", tcg_target_reg_names[ts->reg]);
3290             break;
3291         case TEMP_VAL_MEM:
3292             printf("%d(%s)", (int)ts->mem_offset,
3293                    tcg_target_reg_names[ts->mem_base->reg]);
3294             break;
3295         case TEMP_VAL_CONST:
3296             printf("$0x%" PRIx64, ts->val);
3297             break;
3298         case TEMP_VAL_DEAD:
3299             printf("D");
3300             break;
3301         default:
3302             printf("???");
3303             break;
3304         }
3305         printf("\n");
3306     }
3307 
3308     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3309         if (s->reg_to_temp[i] != NULL) {
3310             printf("%s: %s\n",
3311                    tcg_target_reg_names[i],
3312                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3313         }
3314     }
3315 }
3316 
3317 static void check_regs(TCGContext *s)
3318 {
3319     int reg;
3320     int k;
3321     TCGTemp *ts;
3322     char buf[64];
3323 
3324     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3325         ts = s->reg_to_temp[reg];
3326         if (ts != NULL) {
3327             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3328                 printf("Inconsistency for register %s:\n",
3329                        tcg_target_reg_names[reg]);
3330                 goto fail;
3331             }
3332         }
3333     }
3334     for (k = 0; k < s->nb_temps; k++) {
3335         ts = &s->temps[k];
3336         if (ts->val_type == TEMP_VAL_REG
3337             && ts->kind != TEMP_FIXED
3338             && s->reg_to_temp[ts->reg] != ts) {
3339             printf("Inconsistency for temp %s:\n",
3340                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3341         fail:
3342             printf("reg state:\n");
3343             dump_regs(s);
3344             tcg_abort();
3345         }
3346     }
3347 }
3348 #endif
3349 
3350 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3351 {
3352 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3353     /* Sparc64 stack is accessed with offset of 2047 */
3354     s->current_frame_offset = (s->current_frame_offset +
3355                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3356         ~(sizeof(tcg_target_long) - 1);
3357 #endif
3358     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3359         s->frame_end) {
3360         tcg_abort();
3361     }
3362     ts->mem_offset = s->current_frame_offset;
3363     ts->mem_base = s->frame_temp;
3364     ts->mem_allocated = 1;
3365     s->current_frame_offset += sizeof(tcg_target_long);
3366 }
3367 
3368 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3369 
3370 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3371    mark it free; otherwise mark it dead.  */
3372 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3373 {
3374     TCGTempVal new_type;
3375 
3376     switch (ts->kind) {
3377     case TEMP_FIXED:
3378         return;
3379     case TEMP_GLOBAL:
3380     case TEMP_LOCAL:
3381         new_type = TEMP_VAL_MEM;
3382         break;
3383     case TEMP_NORMAL:
3384         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3385         break;
3386     case TEMP_CONST:
3387         new_type = TEMP_VAL_CONST;
3388         break;
3389     default:
3390         g_assert_not_reached();
3391     }
3392     if (ts->val_type == TEMP_VAL_REG) {
3393         s->reg_to_temp[ts->reg] = NULL;
3394     }
3395     ts->val_type = new_type;
3396 }
3397 
3398 /* Mark a temporary as dead.  */
3399 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3400 {
3401     temp_free_or_dead(s, ts, 1);
3402 }
3403 
3404 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3405    registers needs to be allocated to store a constant.  If 'free_or_dead'
3406    is non-zero, subsequently release the temporary; if it is positive, the
3407    temp is dead; if it is negative, the temp is free.  */
3408 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3409                       TCGRegSet preferred_regs, int free_or_dead)
3410 {
3411     if (!temp_readonly(ts) && !ts->mem_coherent) {
3412         if (!ts->mem_allocated) {
3413             temp_allocate_frame(s, ts);
3414         }
3415         switch (ts->val_type) {
3416         case TEMP_VAL_CONST:
3417             /* If we're going to free the temp immediately, then we won't
3418                require it later in a register, so attempt to store the
3419                constant to memory directly.  */
3420             if (free_or_dead
3421                 && tcg_out_sti(s, ts->type, ts->val,
3422                                ts->mem_base->reg, ts->mem_offset)) {
3423                 break;
3424             }
3425             temp_load(s, ts, tcg_target_available_regs[ts->type],
3426                       allocated_regs, preferred_regs);
3427             /* fallthrough */
3428 
3429         case TEMP_VAL_REG:
3430             tcg_out_st(s, ts->type, ts->reg,
3431                        ts->mem_base->reg, ts->mem_offset);
3432             break;
3433 
3434         case TEMP_VAL_MEM:
3435             break;
3436 
3437         case TEMP_VAL_DEAD:
3438         default:
3439             tcg_abort();
3440         }
3441         ts->mem_coherent = 1;
3442     }
3443     if (free_or_dead) {
3444         temp_free_or_dead(s, ts, free_or_dead);
3445     }
3446 }
3447 
3448 /* free register 'reg' by spilling the corresponding temporary if necessary */
3449 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3450 {
3451     TCGTemp *ts = s->reg_to_temp[reg];
3452     if (ts != NULL) {
3453         temp_sync(s, ts, allocated_regs, 0, -1);
3454     }
3455 }
3456 
3457 /**
3458  * tcg_reg_alloc:
3459  * @required_regs: Set of registers in which we must allocate.
3460  * @allocated_regs: Set of registers which must be avoided.
3461  * @preferred_regs: Set of registers we should prefer.
3462  * @rev: True if we search the registers in "indirect" order.
3463  *
3464  * The allocated register must be in @required_regs & ~@allocated_regs,
3465  * but if we can put it in @preferred_regs we may save a move later.
3466  */
3467 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3468                             TCGRegSet allocated_regs,
3469                             TCGRegSet preferred_regs, bool rev)
3470 {
3471     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3472     TCGRegSet reg_ct[2];
3473     const int *order;
3474 
3475     reg_ct[1] = required_regs & ~allocated_regs;
3476     tcg_debug_assert(reg_ct[1] != 0);
3477     reg_ct[0] = reg_ct[1] & preferred_regs;
3478 
3479     /* Skip the preferred_regs option if it cannot be satisfied,
3480        or if the preference made no difference.  */
3481     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3482 
3483     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3484 
3485     /* Try free registers, preferences first.  */
3486     for (j = f; j < 2; j++) {
3487         TCGRegSet set = reg_ct[j];
3488 
3489         if (tcg_regset_single(set)) {
3490             /* One register in the set.  */
3491             TCGReg reg = tcg_regset_first(set);
3492             if (s->reg_to_temp[reg] == NULL) {
3493                 return reg;
3494             }
3495         } else {
3496             for (i = 0; i < n; i++) {
3497                 TCGReg reg = order[i];
3498                 if (s->reg_to_temp[reg] == NULL &&
3499                     tcg_regset_test_reg(set, reg)) {
3500                     return reg;
3501                 }
3502             }
3503         }
3504     }
3505 
3506     /* We must spill something.  */
3507     for (j = f; j < 2; j++) {
3508         TCGRegSet set = reg_ct[j];
3509 
3510         if (tcg_regset_single(set)) {
3511             /* One register in the set.  */
3512             TCGReg reg = tcg_regset_first(set);
3513             tcg_reg_free(s, reg, allocated_regs);
3514             return reg;
3515         } else {
3516             for (i = 0; i < n; i++) {
3517                 TCGReg reg = order[i];
3518                 if (tcg_regset_test_reg(set, reg)) {
3519                     tcg_reg_free(s, reg, allocated_regs);
3520                     return reg;
3521                 }
3522             }
3523         }
3524     }
3525 
3526     tcg_abort();
3527 }
3528 
3529 /* Make sure the temporary is in a register.  If needed, allocate the register
3530    from DESIRED while avoiding ALLOCATED.  */
3531 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3532                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3533 {
3534     TCGReg reg;
3535 
3536     switch (ts->val_type) {
3537     case TEMP_VAL_REG:
3538         return;
3539     case TEMP_VAL_CONST:
3540         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3541                             preferred_regs, ts->indirect_base);
3542         if (ts->type <= TCG_TYPE_I64) {
3543             tcg_out_movi(s, ts->type, reg, ts->val);
3544         } else {
3545             uint64_t val = ts->val;
3546             MemOp vece = MO_64;
3547 
3548             /*
3549              * Find the minimal vector element that matches the constant.
3550              * The targets will, in general, have to do this search anyway,
3551              * do this generically.
3552              */
3553             if (val == dup_const(MO_8, val)) {
3554                 vece = MO_8;
3555             } else if (val == dup_const(MO_16, val)) {
3556                 vece = MO_16;
3557             } else if (val == dup_const(MO_32, val)) {
3558                 vece = MO_32;
3559             }
3560 
3561             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3562         }
3563         ts->mem_coherent = 0;
3564         break;
3565     case TEMP_VAL_MEM:
3566         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3567                             preferred_regs, ts->indirect_base);
3568         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3569         ts->mem_coherent = 1;
3570         break;
3571     case TEMP_VAL_DEAD:
3572     default:
3573         tcg_abort();
3574     }
3575     ts->reg = reg;
3576     ts->val_type = TEMP_VAL_REG;
3577     s->reg_to_temp[reg] = ts;
3578 }
3579 
3580 /* Save a temporary to memory. 'allocated_regs' is used in case a
3581    temporary registers needs to be allocated to store a constant.  */
3582 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3583 {
3584     /* The liveness analysis already ensures that globals are back
3585        in memory. Keep an tcg_debug_assert for safety. */
3586     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3587 }
3588 
3589 /* save globals to their canonical location and assume they can be
3590    modified be the following code. 'allocated_regs' is used in case a
3591    temporary registers needs to be allocated to store a constant. */
3592 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3593 {
3594     int i, n;
3595 
3596     for (i = 0, n = s->nb_globals; i < n; i++) {
3597         temp_save(s, &s->temps[i], allocated_regs);
3598     }
3599 }
3600 
3601 /* sync globals to their canonical location and assume they can be
3602    read by the following code. 'allocated_regs' is used in case a
3603    temporary registers needs to be allocated to store a constant. */
3604 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3605 {
3606     int i, n;
3607 
3608     for (i = 0, n = s->nb_globals; i < n; i++) {
3609         TCGTemp *ts = &s->temps[i];
3610         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3611                          || ts->kind == TEMP_FIXED
3612                          || ts->mem_coherent);
3613     }
3614 }
3615 
3616 /* at the end of a basic block, we assume all temporaries are dead and
3617    all globals are stored at their canonical location. */
3618 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3619 {
3620     int i;
3621 
3622     for (i = s->nb_globals; i < s->nb_temps; i++) {
3623         TCGTemp *ts = &s->temps[i];
3624 
3625         switch (ts->kind) {
3626         case TEMP_LOCAL:
3627             temp_save(s, ts, allocated_regs);
3628             break;
3629         case TEMP_NORMAL:
3630             /* The liveness analysis already ensures that temps are dead.
3631                Keep an tcg_debug_assert for safety. */
3632             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3633             break;
3634         case TEMP_CONST:
3635             /* Similarly, we should have freed any allocated register. */
3636             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3637             break;
3638         default:
3639             g_assert_not_reached();
3640         }
3641     }
3642 
3643     save_globals(s, allocated_regs);
3644 }
3645 
3646 /*
3647  * At a conditional branch, we assume all temporaries are dead and
3648  * all globals and local temps are synced to their location.
3649  */
3650 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3651 {
3652     sync_globals(s, allocated_regs);
3653 
3654     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3655         TCGTemp *ts = &s->temps[i];
3656         /*
3657          * The liveness analysis already ensures that temps are dead.
3658          * Keep tcg_debug_asserts for safety.
3659          */
3660         switch (ts->kind) {
3661         case TEMP_LOCAL:
3662             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3663             break;
3664         case TEMP_NORMAL:
3665             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3666             break;
3667         case TEMP_CONST:
3668             break;
3669         default:
3670             g_assert_not_reached();
3671         }
3672     }
3673 }
3674 
3675 /*
3676  * Specialized code generation for INDEX_op_mov_* with a constant.
3677  */
3678 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3679                                   tcg_target_ulong val, TCGLifeData arg_life,
3680                                   TCGRegSet preferred_regs)
3681 {
3682     /* ENV should not be modified.  */
3683     tcg_debug_assert(!temp_readonly(ots));
3684 
3685     /* The movi is not explicitly generated here.  */
3686     if (ots->val_type == TEMP_VAL_REG) {
3687         s->reg_to_temp[ots->reg] = NULL;
3688     }
3689     ots->val_type = TEMP_VAL_CONST;
3690     ots->val = val;
3691     ots->mem_coherent = 0;
3692     if (NEED_SYNC_ARG(0)) {
3693         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3694     } else if (IS_DEAD_ARG(0)) {
3695         temp_dead(s, ots);
3696     }
3697 }
3698 
3699 /*
3700  * Specialized code generation for INDEX_op_mov_*.
3701  */
3702 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3703 {
3704     const TCGLifeData arg_life = op->life;
3705     TCGRegSet allocated_regs, preferred_regs;
3706     TCGTemp *ts, *ots;
3707     TCGType otype, itype;
3708 
3709     allocated_regs = s->reserved_regs;
3710     preferred_regs = op->output_pref[0];
3711     ots = arg_temp(op->args[0]);
3712     ts = arg_temp(op->args[1]);
3713 
3714     /* ENV should not be modified.  */
3715     tcg_debug_assert(!temp_readonly(ots));
3716 
3717     /* Note that otype != itype for no-op truncation.  */
3718     otype = ots->type;
3719     itype = ts->type;
3720 
3721     if (ts->val_type == TEMP_VAL_CONST) {
3722         /* propagate constant or generate sti */
3723         tcg_target_ulong val = ts->val;
3724         if (IS_DEAD_ARG(1)) {
3725             temp_dead(s, ts);
3726         }
3727         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3728         return;
3729     }
3730 
3731     /* If the source value is in memory we're going to be forced
3732        to have it in a register in order to perform the copy.  Copy
3733        the SOURCE value into its own register first, that way we
3734        don't have to reload SOURCE the next time it is used. */
3735     if (ts->val_type == TEMP_VAL_MEM) {
3736         temp_load(s, ts, tcg_target_available_regs[itype],
3737                   allocated_regs, preferred_regs);
3738     }
3739 
3740     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3741     if (IS_DEAD_ARG(0)) {
3742         /* mov to a non-saved dead register makes no sense (even with
3743            liveness analysis disabled). */
3744         tcg_debug_assert(NEED_SYNC_ARG(0));
3745         if (!ots->mem_allocated) {
3746             temp_allocate_frame(s, ots);
3747         }
3748         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3749         if (IS_DEAD_ARG(1)) {
3750             temp_dead(s, ts);
3751         }
3752         temp_dead(s, ots);
3753     } else {
3754         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3755             /* the mov can be suppressed */
3756             if (ots->val_type == TEMP_VAL_REG) {
3757                 s->reg_to_temp[ots->reg] = NULL;
3758             }
3759             ots->reg = ts->reg;
3760             temp_dead(s, ts);
3761         } else {
3762             if (ots->val_type != TEMP_VAL_REG) {
3763                 /* When allocating a new register, make sure to not spill the
3764                    input one. */
3765                 tcg_regset_set_reg(allocated_regs, ts->reg);
3766                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3767                                          allocated_regs, preferred_regs,
3768                                          ots->indirect_base);
3769             }
3770             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3771                 /*
3772                  * Cross register class move not supported.
3773                  * Store the source register into the destination slot
3774                  * and leave the destination temp as TEMP_VAL_MEM.
3775                  */
3776                 assert(!temp_readonly(ots));
3777                 if (!ts->mem_allocated) {
3778                     temp_allocate_frame(s, ots);
3779                 }
3780                 tcg_out_st(s, ts->type, ts->reg,
3781                            ots->mem_base->reg, ots->mem_offset);
3782                 ots->mem_coherent = 1;
3783                 temp_free_or_dead(s, ots, -1);
3784                 return;
3785             }
3786         }
3787         ots->val_type = TEMP_VAL_REG;
3788         ots->mem_coherent = 0;
3789         s->reg_to_temp[ots->reg] = ots;
3790         if (NEED_SYNC_ARG(0)) {
3791             temp_sync(s, ots, allocated_regs, 0, 0);
3792         }
3793     }
3794 }
3795 
3796 /*
3797  * Specialized code generation for INDEX_op_dup_vec.
3798  */
3799 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3800 {
3801     const TCGLifeData arg_life = op->life;
3802     TCGRegSet dup_out_regs, dup_in_regs;
3803     TCGTemp *its, *ots;
3804     TCGType itype, vtype;
3805     intptr_t endian_fixup;
3806     unsigned vece;
3807     bool ok;
3808 
3809     ots = arg_temp(op->args[0]);
3810     its = arg_temp(op->args[1]);
3811 
3812     /* ENV should not be modified.  */
3813     tcg_debug_assert(!temp_readonly(ots));
3814 
3815     itype = its->type;
3816     vece = TCGOP_VECE(op);
3817     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3818 
3819     if (its->val_type == TEMP_VAL_CONST) {
3820         /* Propagate constant via movi -> dupi.  */
3821         tcg_target_ulong val = its->val;
3822         if (IS_DEAD_ARG(1)) {
3823             temp_dead(s, its);
3824         }
3825         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3826         return;
3827     }
3828 
3829     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3830     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3831 
3832     /* Allocate the output register now.  */
3833     if (ots->val_type != TEMP_VAL_REG) {
3834         TCGRegSet allocated_regs = s->reserved_regs;
3835 
3836         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3837             /* Make sure to not spill the input register. */
3838             tcg_regset_set_reg(allocated_regs, its->reg);
3839         }
3840         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3841                                  op->output_pref[0], ots->indirect_base);
3842         ots->val_type = TEMP_VAL_REG;
3843         ots->mem_coherent = 0;
3844         s->reg_to_temp[ots->reg] = ots;
3845     }
3846 
3847     switch (its->val_type) {
3848     case TEMP_VAL_REG:
3849         /*
3850          * The dup constriaints must be broad, covering all possible VECE.
3851          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3852          * to fail, indicating that extra moves are required for that case.
3853          */
3854         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3855             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3856                 goto done;
3857             }
3858             /* Try again from memory or a vector input register.  */
3859         }
3860         if (!its->mem_coherent) {
3861             /*
3862              * The input register is not synced, and so an extra store
3863              * would be required to use memory.  Attempt an integer-vector
3864              * register move first.  We do not have a TCGRegSet for this.
3865              */
3866             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3867                 break;
3868             }
3869             /* Sync the temp back to its slot and load from there.  */
3870             temp_sync(s, its, s->reserved_regs, 0, 0);
3871         }
3872         /* fall through */
3873 
3874     case TEMP_VAL_MEM:
3875 #ifdef HOST_WORDS_BIGENDIAN
3876         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3877         endian_fixup -= 1 << vece;
3878 #else
3879         endian_fixup = 0;
3880 #endif
3881         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3882                              its->mem_offset + endian_fixup)) {
3883             goto done;
3884         }
3885         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3886         break;
3887 
3888     default:
3889         g_assert_not_reached();
3890     }
3891 
3892     /* We now have a vector input register, so dup must succeed. */
3893     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3894     tcg_debug_assert(ok);
3895 
3896  done:
3897     if (IS_DEAD_ARG(1)) {
3898         temp_dead(s, its);
3899     }
3900     if (NEED_SYNC_ARG(0)) {
3901         temp_sync(s, ots, s->reserved_regs, 0, 0);
3902     }
3903     if (IS_DEAD_ARG(0)) {
3904         temp_dead(s, ots);
3905     }
3906 }
3907 
3908 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3909 {
3910     const TCGLifeData arg_life = op->life;
3911     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3912     TCGRegSet i_allocated_regs;
3913     TCGRegSet o_allocated_regs;
3914     int i, k, nb_iargs, nb_oargs;
3915     TCGReg reg;
3916     TCGArg arg;
3917     const TCGArgConstraint *arg_ct;
3918     TCGTemp *ts;
3919     TCGArg new_args[TCG_MAX_OP_ARGS];
3920     int const_args[TCG_MAX_OP_ARGS];
3921 
3922     nb_oargs = def->nb_oargs;
3923     nb_iargs = def->nb_iargs;
3924 
3925     /* copy constants */
3926     memcpy(new_args + nb_oargs + nb_iargs,
3927            op->args + nb_oargs + nb_iargs,
3928            sizeof(TCGArg) * def->nb_cargs);
3929 
3930     i_allocated_regs = s->reserved_regs;
3931     o_allocated_regs = s->reserved_regs;
3932 
3933     /* satisfy input constraints */
3934     for (k = 0; k < nb_iargs; k++) {
3935         TCGRegSet i_preferred_regs, o_preferred_regs;
3936 
3937         i = def->args_ct[nb_oargs + k].sort_index;
3938         arg = op->args[i];
3939         arg_ct = &def->args_ct[i];
3940         ts = arg_temp(arg);
3941 
3942         if (ts->val_type == TEMP_VAL_CONST
3943             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3944             /* constant is OK for instruction */
3945             const_args[i] = 1;
3946             new_args[i] = ts->val;
3947             continue;
3948         }
3949 
3950         i_preferred_regs = o_preferred_regs = 0;
3951         if (arg_ct->ialias) {
3952             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3953 
3954             /*
3955              * If the input is readonly, then it cannot also be an
3956              * output and aliased to itself.  If the input is not
3957              * dead after the instruction, we must allocate a new
3958              * register and move it.
3959              */
3960             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3961                 goto allocate_in_reg;
3962             }
3963 
3964             /*
3965              * Check if the current register has already been allocated
3966              * for another input aliased to an output.
3967              */
3968             if (ts->val_type == TEMP_VAL_REG) {
3969                 reg = ts->reg;
3970                 for (int k2 = 0; k2 < k; k2++) {
3971                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3972                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3973                         goto allocate_in_reg;
3974                     }
3975                 }
3976             }
3977             i_preferred_regs = o_preferred_regs;
3978         }
3979 
3980         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3981         reg = ts->reg;
3982 
3983         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3984  allocate_in_reg:
3985             /*
3986              * Allocate a new register matching the constraint
3987              * and move the temporary register into it.
3988              */
3989             temp_load(s, ts, tcg_target_available_regs[ts->type],
3990                       i_allocated_regs, 0);
3991             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3992                                 o_preferred_regs, ts->indirect_base);
3993             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3994                 /*
3995                  * Cross register class move not supported.  Sync the
3996                  * temp back to its slot and load from there.
3997                  */
3998                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3999                 tcg_out_ld(s, ts->type, reg,
4000                            ts->mem_base->reg, ts->mem_offset);
4001             }
4002         }
4003         new_args[i] = reg;
4004         const_args[i] = 0;
4005         tcg_regset_set_reg(i_allocated_regs, reg);
4006     }
4007 
4008     /* mark dead temporaries and free the associated registers */
4009     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4010         if (IS_DEAD_ARG(i)) {
4011             temp_dead(s, arg_temp(op->args[i]));
4012         }
4013     }
4014 
4015     if (def->flags & TCG_OPF_COND_BRANCH) {
4016         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4017     } else if (def->flags & TCG_OPF_BB_END) {
4018         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4019     } else {
4020         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4021             /* XXX: permit generic clobber register list ? */
4022             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4023                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4024                     tcg_reg_free(s, i, i_allocated_regs);
4025                 }
4026             }
4027         }
4028         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4029             /* sync globals if the op has side effects and might trigger
4030                an exception. */
4031             sync_globals(s, i_allocated_regs);
4032         }
4033 
4034         /* satisfy the output constraints */
4035         for(k = 0; k < nb_oargs; k++) {
4036             i = def->args_ct[k].sort_index;
4037             arg = op->args[i];
4038             arg_ct = &def->args_ct[i];
4039             ts = arg_temp(arg);
4040 
4041             /* ENV should not be modified.  */
4042             tcg_debug_assert(!temp_readonly(ts));
4043 
4044             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4045                 reg = new_args[arg_ct->alias_index];
4046             } else if (arg_ct->newreg) {
4047                 reg = tcg_reg_alloc(s, arg_ct->regs,
4048                                     i_allocated_regs | o_allocated_regs,
4049                                     op->output_pref[k], ts->indirect_base);
4050             } else {
4051                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4052                                     op->output_pref[k], ts->indirect_base);
4053             }
4054             tcg_regset_set_reg(o_allocated_regs, reg);
4055             if (ts->val_type == TEMP_VAL_REG) {
4056                 s->reg_to_temp[ts->reg] = NULL;
4057             }
4058             ts->val_type = TEMP_VAL_REG;
4059             ts->reg = reg;
4060             /*
4061              * Temp value is modified, so the value kept in memory is
4062              * potentially not the same.
4063              */
4064             ts->mem_coherent = 0;
4065             s->reg_to_temp[reg] = ts;
4066             new_args[i] = reg;
4067         }
4068     }
4069 
4070     /* emit instruction */
4071     if (def->flags & TCG_OPF_VECTOR) {
4072         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4073                        new_args, const_args);
4074     } else {
4075         tcg_out_op(s, op->opc, new_args, const_args);
4076     }
4077 
4078     /* move the outputs in the correct register if needed */
4079     for(i = 0; i < nb_oargs; i++) {
4080         ts = arg_temp(op->args[i]);
4081 
4082         /* ENV should not be modified.  */
4083         tcg_debug_assert(!temp_readonly(ts));
4084 
4085         if (NEED_SYNC_ARG(i)) {
4086             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4087         } else if (IS_DEAD_ARG(i)) {
4088             temp_dead(s, ts);
4089         }
4090     }
4091 }
4092 
4093 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4094 {
4095     const TCGLifeData arg_life = op->life;
4096     TCGTemp *ots, *itsl, *itsh;
4097     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4098 
4099     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4100     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4101     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4102 
4103     ots = arg_temp(op->args[0]);
4104     itsl = arg_temp(op->args[1]);
4105     itsh = arg_temp(op->args[2]);
4106 
4107     /* ENV should not be modified.  */
4108     tcg_debug_assert(!temp_readonly(ots));
4109 
4110     /* Allocate the output register now.  */
4111     if (ots->val_type != TEMP_VAL_REG) {
4112         TCGRegSet allocated_regs = s->reserved_regs;
4113         TCGRegSet dup_out_regs =
4114             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4115 
4116         /* Make sure to not spill the input registers. */
4117         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4118             tcg_regset_set_reg(allocated_regs, itsl->reg);
4119         }
4120         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4121             tcg_regset_set_reg(allocated_regs, itsh->reg);
4122         }
4123 
4124         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4125                                  op->output_pref[0], ots->indirect_base);
4126         ots->val_type = TEMP_VAL_REG;
4127         ots->mem_coherent = 0;
4128         s->reg_to_temp[ots->reg] = ots;
4129     }
4130 
4131     /* Promote dup2 of immediates to dupi_vec. */
4132     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4133         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4134         MemOp vece = MO_64;
4135 
4136         if (val == dup_const(MO_8, val)) {
4137             vece = MO_8;
4138         } else if (val == dup_const(MO_16, val)) {
4139             vece = MO_16;
4140         } else if (val == dup_const(MO_32, val)) {
4141             vece = MO_32;
4142         }
4143 
4144         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4145         goto done;
4146     }
4147 
4148     /* If the two inputs form one 64-bit value, try dupm_vec. */
4149     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4150         if (!itsl->mem_coherent) {
4151             temp_sync(s, itsl, s->reserved_regs, 0, 0);
4152         }
4153         if (!itsh->mem_coherent) {
4154             temp_sync(s, itsh, s->reserved_regs, 0, 0);
4155         }
4156 #ifdef HOST_WORDS_BIGENDIAN
4157         TCGTemp *its = itsh;
4158 #else
4159         TCGTemp *its = itsl;
4160 #endif
4161         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4162                              its->mem_base->reg, its->mem_offset)) {
4163             goto done;
4164         }
4165     }
4166 
4167     /* Fall back to generic expansion. */
4168     return false;
4169 
4170  done:
4171     if (IS_DEAD_ARG(1)) {
4172         temp_dead(s, itsl);
4173     }
4174     if (IS_DEAD_ARG(2)) {
4175         temp_dead(s, itsh);
4176     }
4177     if (NEED_SYNC_ARG(0)) {
4178         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4179     } else if (IS_DEAD_ARG(0)) {
4180         temp_dead(s, ots);
4181     }
4182     return true;
4183 }
4184 
4185 #ifdef TCG_TARGET_STACK_GROWSUP
4186 #define STACK_DIR(x) (-(x))
4187 #else
4188 #define STACK_DIR(x) (x)
4189 #endif
4190 
4191 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4192 {
4193     const int nb_oargs = TCGOP_CALLO(op);
4194     const int nb_iargs = TCGOP_CALLI(op);
4195     const TCGLifeData arg_life = op->life;
4196     int flags, nb_regs, i;
4197     TCGReg reg;
4198     TCGArg arg;
4199     TCGTemp *ts;
4200     intptr_t stack_offset;
4201     size_t call_stack_size;
4202     tcg_insn_unit *func_addr;
4203     int allocate_args;
4204     TCGRegSet allocated_regs;
4205 
4206     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4207     flags = op->args[nb_oargs + nb_iargs + 1];
4208 
4209     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4210     if (nb_regs > nb_iargs) {
4211         nb_regs = nb_iargs;
4212     }
4213 
4214     /* assign stack slots first */
4215     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4216     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4217         ~(TCG_TARGET_STACK_ALIGN - 1);
4218     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4219     if (allocate_args) {
4220         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4221            preallocate call stack */
4222         tcg_abort();
4223     }
4224 
4225     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4226     for (i = nb_regs; i < nb_iargs; i++) {
4227         arg = op->args[nb_oargs + i];
4228 #ifdef TCG_TARGET_STACK_GROWSUP
4229         stack_offset -= sizeof(tcg_target_long);
4230 #endif
4231         if (arg != TCG_CALL_DUMMY_ARG) {
4232             ts = arg_temp(arg);
4233             temp_load(s, ts, tcg_target_available_regs[ts->type],
4234                       s->reserved_regs, 0);
4235             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4236         }
4237 #ifndef TCG_TARGET_STACK_GROWSUP
4238         stack_offset += sizeof(tcg_target_long);
4239 #endif
4240     }
4241 
4242     /* assign input registers */
4243     allocated_regs = s->reserved_regs;
4244     for (i = 0; i < nb_regs; i++) {
4245         arg = op->args[nb_oargs + i];
4246         if (arg != TCG_CALL_DUMMY_ARG) {
4247             ts = arg_temp(arg);
4248             reg = tcg_target_call_iarg_regs[i];
4249 
4250             if (ts->val_type == TEMP_VAL_REG) {
4251                 if (ts->reg != reg) {
4252                     tcg_reg_free(s, reg, allocated_regs);
4253                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4254                         /*
4255                          * Cross register class move not supported.  Sync the
4256                          * temp back to its slot and load from there.
4257                          */
4258                         temp_sync(s, ts, allocated_regs, 0, 0);
4259                         tcg_out_ld(s, ts->type, reg,
4260                                    ts->mem_base->reg, ts->mem_offset);
4261                     }
4262                 }
4263             } else {
4264                 TCGRegSet arg_set = 0;
4265 
4266                 tcg_reg_free(s, reg, allocated_regs);
4267                 tcg_regset_set_reg(arg_set, reg);
4268                 temp_load(s, ts, arg_set, allocated_regs, 0);
4269             }
4270 
4271             tcg_regset_set_reg(allocated_regs, reg);
4272         }
4273     }
4274 
4275     /* mark dead temporaries and free the associated registers */
4276     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4277         if (IS_DEAD_ARG(i)) {
4278             temp_dead(s, arg_temp(op->args[i]));
4279         }
4280     }
4281 
4282     /* clobber call registers */
4283     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4284         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4285             tcg_reg_free(s, i, allocated_regs);
4286         }
4287     }
4288 
4289     /* Save globals if they might be written by the helper, sync them if
4290        they might be read. */
4291     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4292         /* Nothing to do */
4293     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4294         sync_globals(s, allocated_regs);
4295     } else {
4296         save_globals(s, allocated_regs);
4297     }
4298 
4299     tcg_out_call(s, func_addr);
4300 
4301     /* assign output registers and emit moves if needed */
4302     for(i = 0; i < nb_oargs; i++) {
4303         arg = op->args[i];
4304         ts = arg_temp(arg);
4305 
4306         /* ENV should not be modified.  */
4307         tcg_debug_assert(!temp_readonly(ts));
4308 
4309         reg = tcg_target_call_oarg_regs[i];
4310         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4311         if (ts->val_type == TEMP_VAL_REG) {
4312             s->reg_to_temp[ts->reg] = NULL;
4313         }
4314         ts->val_type = TEMP_VAL_REG;
4315         ts->reg = reg;
4316         ts->mem_coherent = 0;
4317         s->reg_to_temp[reg] = ts;
4318         if (NEED_SYNC_ARG(i)) {
4319             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4320         } else if (IS_DEAD_ARG(i)) {
4321             temp_dead(s, ts);
4322         }
4323     }
4324 }
4325 
4326 #ifdef CONFIG_PROFILER
4327 
4328 /* avoid copy/paste errors */
4329 #define PROF_ADD(to, from, field)                       \
4330     do {                                                \
4331         (to)->field += qatomic_read(&((from)->field));  \
4332     } while (0)
4333 
4334 #define PROF_MAX(to, from, field)                                       \
4335     do {                                                                \
4336         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4337         if (val__ > (to)->field) {                                      \
4338             (to)->field = val__;                                        \
4339         }                                                               \
4340     } while (0)
4341 
4342 /* Pass in a zero'ed @prof */
4343 static inline
4344 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4345 {
4346     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4347     unsigned int i;
4348 
4349     for (i = 0; i < n_ctxs; i++) {
4350         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4351         const TCGProfile *orig = &s->prof;
4352 
4353         if (counters) {
4354             PROF_ADD(prof, orig, cpu_exec_time);
4355             PROF_ADD(prof, orig, tb_count1);
4356             PROF_ADD(prof, orig, tb_count);
4357             PROF_ADD(prof, orig, op_count);
4358             PROF_MAX(prof, orig, op_count_max);
4359             PROF_ADD(prof, orig, temp_count);
4360             PROF_MAX(prof, orig, temp_count_max);
4361             PROF_ADD(prof, orig, del_op_count);
4362             PROF_ADD(prof, orig, code_in_len);
4363             PROF_ADD(prof, orig, code_out_len);
4364             PROF_ADD(prof, orig, search_out_len);
4365             PROF_ADD(prof, orig, interm_time);
4366             PROF_ADD(prof, orig, code_time);
4367             PROF_ADD(prof, orig, la_time);
4368             PROF_ADD(prof, orig, opt_time);
4369             PROF_ADD(prof, orig, restore_count);
4370             PROF_ADD(prof, orig, restore_time);
4371         }
4372         if (table) {
4373             int i;
4374 
4375             for (i = 0; i < NB_OPS; i++) {
4376                 PROF_ADD(prof, orig, table_op_count[i]);
4377             }
4378         }
4379     }
4380 }
4381 
4382 #undef PROF_ADD
4383 #undef PROF_MAX
4384 
4385 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4386 {
4387     tcg_profile_snapshot(prof, true, false);
4388 }
4389 
4390 static void tcg_profile_snapshot_table(TCGProfile *prof)
4391 {
4392     tcg_profile_snapshot(prof, false, true);
4393 }
4394 
4395 void tcg_dump_op_count(void)
4396 {
4397     TCGProfile prof = {};
4398     int i;
4399 
4400     tcg_profile_snapshot_table(&prof);
4401     for (i = 0; i < NB_OPS; i++) {
4402         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4403                     prof.table_op_count[i]);
4404     }
4405 }
4406 
4407 int64_t tcg_cpu_exec_time(void)
4408 {
4409     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4410     unsigned int i;
4411     int64_t ret = 0;
4412 
4413     for (i = 0; i < n_ctxs; i++) {
4414         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4415         const TCGProfile *prof = &s->prof;
4416 
4417         ret += qatomic_read(&prof->cpu_exec_time);
4418     }
4419     return ret;
4420 }
4421 #else
4422 void tcg_dump_op_count(void)
4423 {
4424     qemu_printf("[TCG profiler not compiled]\n");
4425 }
4426 
4427 int64_t tcg_cpu_exec_time(void)
4428 {
4429     error_report("%s: TCG profiler not compiled", __func__);
4430     exit(EXIT_FAILURE);
4431 }
4432 #endif
4433 
4434 
4435 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4436 {
4437 #ifdef CONFIG_PROFILER
4438     TCGProfile *prof = &s->prof;
4439 #endif
4440     int i, num_insns;
4441     TCGOp *op;
4442 
4443 #ifdef CONFIG_PROFILER
4444     {
4445         int n = 0;
4446 
4447         QTAILQ_FOREACH(op, &s->ops, link) {
4448             n++;
4449         }
4450         qatomic_set(&prof->op_count, prof->op_count + n);
4451         if (n > prof->op_count_max) {
4452             qatomic_set(&prof->op_count_max, n);
4453         }
4454 
4455         n = s->nb_temps;
4456         qatomic_set(&prof->temp_count, prof->temp_count + n);
4457         if (n > prof->temp_count_max) {
4458             qatomic_set(&prof->temp_count_max, n);
4459         }
4460     }
4461 #endif
4462 
4463 #ifdef DEBUG_DISAS
4464     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4465                  && qemu_log_in_addr_range(tb->pc))) {
4466         FILE *logfile = qemu_log_lock();
4467         qemu_log("OP:\n");
4468         tcg_dump_ops(s, false);
4469         qemu_log("\n");
4470         qemu_log_unlock(logfile);
4471     }
4472 #endif
4473 
4474 #ifdef CONFIG_DEBUG_TCG
4475     /* Ensure all labels referenced have been emitted.  */
4476     {
4477         TCGLabel *l;
4478         bool error = false;
4479 
4480         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4481             if (unlikely(!l->present) && l->refs) {
4482                 qemu_log_mask(CPU_LOG_TB_OP,
4483                               "$L%d referenced but not present.\n", l->id);
4484                 error = true;
4485             }
4486         }
4487         assert(!error);
4488     }
4489 #endif
4490 
4491 #ifdef CONFIG_PROFILER
4492     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4493 #endif
4494 
4495 #ifdef USE_TCG_OPTIMIZATIONS
4496     tcg_optimize(s);
4497 #endif
4498 
4499 #ifdef CONFIG_PROFILER
4500     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4501     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4502 #endif
4503 
4504     reachable_code_pass(s);
4505     liveness_pass_1(s);
4506 
4507     if (s->nb_indirects > 0) {
4508 #ifdef DEBUG_DISAS
4509         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4510                      && qemu_log_in_addr_range(tb->pc))) {
4511             FILE *logfile = qemu_log_lock();
4512             qemu_log("OP before indirect lowering:\n");
4513             tcg_dump_ops(s, false);
4514             qemu_log("\n");
4515             qemu_log_unlock(logfile);
4516         }
4517 #endif
4518         /* Replace indirect temps with direct temps.  */
4519         if (liveness_pass_2(s)) {
4520             /* If changes were made, re-run liveness.  */
4521             liveness_pass_1(s);
4522         }
4523     }
4524 
4525 #ifdef CONFIG_PROFILER
4526     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4527 #endif
4528 
4529 #ifdef DEBUG_DISAS
4530     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4531                  && qemu_log_in_addr_range(tb->pc))) {
4532         FILE *logfile = qemu_log_lock();
4533         qemu_log("OP after optimization and liveness analysis:\n");
4534         tcg_dump_ops(s, true);
4535         qemu_log("\n");
4536         qemu_log_unlock(logfile);
4537     }
4538 #endif
4539 
4540     tcg_reg_alloc_start(s);
4541 
4542     /*
4543      * Reset the buffer pointers when restarting after overflow.
4544      * TODO: Move this into translate-all.c with the rest of the
4545      * buffer management.  Having only this done here is confusing.
4546      */
4547     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4548     s->code_ptr = s->code_buf;
4549 
4550 #ifdef TCG_TARGET_NEED_LDST_LABELS
4551     QSIMPLEQ_INIT(&s->ldst_labels);
4552 #endif
4553 #ifdef TCG_TARGET_NEED_POOL_LABELS
4554     s->pool_labels = NULL;
4555 #endif
4556 
4557     num_insns = -1;
4558     QTAILQ_FOREACH(op, &s->ops, link) {
4559         TCGOpcode opc = op->opc;
4560 
4561 #ifdef CONFIG_PROFILER
4562         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4563 #endif
4564 
4565         switch (opc) {
4566         case INDEX_op_mov_i32:
4567         case INDEX_op_mov_i64:
4568         case INDEX_op_mov_vec:
4569             tcg_reg_alloc_mov(s, op);
4570             break;
4571         case INDEX_op_dup_vec:
4572             tcg_reg_alloc_dup(s, op);
4573             break;
4574         case INDEX_op_insn_start:
4575             if (num_insns >= 0) {
4576                 size_t off = tcg_current_code_size(s);
4577                 s->gen_insn_end_off[num_insns] = off;
4578                 /* Assert that we do not overflow our stored offset.  */
4579                 assert(s->gen_insn_end_off[num_insns] == off);
4580             }
4581             num_insns++;
4582             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4583                 target_ulong a;
4584 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4585                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4586 #else
4587                 a = op->args[i];
4588 #endif
4589                 s->gen_insn_data[num_insns][i] = a;
4590             }
4591             break;
4592         case INDEX_op_discard:
4593             temp_dead(s, arg_temp(op->args[0]));
4594             break;
4595         case INDEX_op_set_label:
4596             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4597             tcg_out_label(s, arg_label(op->args[0]));
4598             break;
4599         case INDEX_op_call:
4600             tcg_reg_alloc_call(s, op);
4601             break;
4602         case INDEX_op_dup2_vec:
4603             if (tcg_reg_alloc_dup2(s, op)) {
4604                 break;
4605             }
4606             /* fall through */
4607         default:
4608             /* Sanity check that we've not introduced any unhandled opcodes. */
4609             tcg_debug_assert(tcg_op_supported(opc));
4610             /* Note: in order to speed up the code, it would be much
4611                faster to have specialized register allocator functions for
4612                some common argument patterns */
4613             tcg_reg_alloc_op(s, op);
4614             break;
4615         }
4616 #ifdef CONFIG_DEBUG_TCG
4617         check_regs(s);
4618 #endif
4619         /* Test for (pending) buffer overflow.  The assumption is that any
4620            one operation beginning below the high water mark cannot overrun
4621            the buffer completely.  Thus we can test for overflow after
4622            generating code without having to check during generation.  */
4623         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4624             return -1;
4625         }
4626         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4627         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4628             return -2;
4629         }
4630     }
4631     tcg_debug_assert(num_insns >= 0);
4632     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4633 
4634     /* Generate TB finalization at the end of block */
4635 #ifdef TCG_TARGET_NEED_LDST_LABELS
4636     i = tcg_out_ldst_finalize(s);
4637     if (i < 0) {
4638         return i;
4639     }
4640 #endif
4641 #ifdef TCG_TARGET_NEED_POOL_LABELS
4642     i = tcg_out_pool_finalize(s);
4643     if (i < 0) {
4644         return i;
4645     }
4646 #endif
4647     if (!tcg_resolve_relocs(s)) {
4648         return -2;
4649     }
4650 
4651 #ifndef CONFIG_TCG_INTERPRETER
4652     /* flush instruction cache */
4653     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4654                         (uintptr_t)s->code_buf,
4655                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4656 #endif
4657 
4658     return tcg_current_code_size(s);
4659 }
4660 
4661 #ifdef CONFIG_PROFILER
4662 void tcg_dump_info(void)
4663 {
4664     TCGProfile prof = {};
4665     const TCGProfile *s;
4666     int64_t tb_count;
4667     int64_t tb_div_count;
4668     int64_t tot;
4669 
4670     tcg_profile_snapshot_counters(&prof);
4671     s = &prof;
4672     tb_count = s->tb_count;
4673     tb_div_count = tb_count ? tb_count : 1;
4674     tot = s->interm_time + s->code_time;
4675 
4676     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4677                 tot, tot / 2.4e9);
4678     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4679                 " %0.1f%%)\n",
4680                 tb_count, s->tb_count1 - tb_count,
4681                 (double)(s->tb_count1 - s->tb_count)
4682                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4683     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4684                 (double)s->op_count / tb_div_count, s->op_count_max);
4685     qemu_printf("deleted ops/TB      %0.2f\n",
4686                 (double)s->del_op_count / tb_div_count);
4687     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4688                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4689     qemu_printf("avg host code/TB    %0.1f\n",
4690                 (double)s->code_out_len / tb_div_count);
4691     qemu_printf("avg search data/TB  %0.1f\n",
4692                 (double)s->search_out_len / tb_div_count);
4693 
4694     qemu_printf("cycles/op           %0.1f\n",
4695                 s->op_count ? (double)tot / s->op_count : 0);
4696     qemu_printf("cycles/in byte      %0.1f\n",
4697                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4698     qemu_printf("cycles/out byte     %0.1f\n",
4699                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4700     qemu_printf("cycles/search byte     %0.1f\n",
4701                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4702     if (tot == 0) {
4703         tot = 1;
4704     }
4705     qemu_printf("  gen_interm time   %0.1f%%\n",
4706                 (double)s->interm_time / tot * 100.0);
4707     qemu_printf("  gen_code time     %0.1f%%\n",
4708                 (double)s->code_time / tot * 100.0);
4709     qemu_printf("optim./code time    %0.1f%%\n",
4710                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4711                 * 100.0);
4712     qemu_printf("liveness/code time  %0.1f%%\n",
4713                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4714     qemu_printf("cpu_restore count   %" PRId64 "\n",
4715                 s->restore_count);
4716     qemu_printf("  avg cycles        %0.1f\n",
4717                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4718 }
4719 #else
4720 void tcg_dump_info(void)
4721 {
4722     qemu_printf("[TCG profiler not compiled]\n");
4723 }
4724 #endif
4725 
4726 #ifdef ELF_HOST_MACHINE
4727 /* In order to use this feature, the backend needs to do three things:
4728 
4729    (1) Define ELF_HOST_MACHINE to indicate both what value to
4730        put into the ELF image and to indicate support for the feature.
4731 
4732    (2) Define tcg_register_jit.  This should create a buffer containing
4733        the contents of a .debug_frame section that describes the post-
4734        prologue unwind info for the tcg machine.
4735 
4736    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4737 */
4738 
4739 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4740 typedef enum {
4741     JIT_NOACTION = 0,
4742     JIT_REGISTER_FN,
4743     JIT_UNREGISTER_FN
4744 } jit_actions_t;
4745 
4746 struct jit_code_entry {
4747     struct jit_code_entry *next_entry;
4748     struct jit_code_entry *prev_entry;
4749     const void *symfile_addr;
4750     uint64_t symfile_size;
4751 };
4752 
4753 struct jit_descriptor {
4754     uint32_t version;
4755     uint32_t action_flag;
4756     struct jit_code_entry *relevant_entry;
4757     struct jit_code_entry *first_entry;
4758 };
4759 
4760 void __jit_debug_register_code(void) __attribute__((noinline));
4761 void __jit_debug_register_code(void)
4762 {
4763     asm("");
4764 }
4765 
4766 /* Must statically initialize the version, because GDB may check
4767    the version before we can set it.  */
4768 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4769 
4770 /* End GDB interface.  */
4771 
4772 static int find_string(const char *strtab, const char *str)
4773 {
4774     const char *p = strtab + 1;
4775 
4776     while (1) {
4777         if (strcmp(p, str) == 0) {
4778             return p - strtab;
4779         }
4780         p += strlen(p) + 1;
4781     }
4782 }
4783 
4784 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4785                                  const void *debug_frame,
4786                                  size_t debug_frame_size)
4787 {
4788     struct __attribute__((packed)) DebugInfo {
4789         uint32_t  len;
4790         uint16_t  version;
4791         uint32_t  abbrev;
4792         uint8_t   ptr_size;
4793         uint8_t   cu_die;
4794         uint16_t  cu_lang;
4795         uintptr_t cu_low_pc;
4796         uintptr_t cu_high_pc;
4797         uint8_t   fn_die;
4798         char      fn_name[16];
4799         uintptr_t fn_low_pc;
4800         uintptr_t fn_high_pc;
4801         uint8_t   cu_eoc;
4802     };
4803 
4804     struct ElfImage {
4805         ElfW(Ehdr) ehdr;
4806         ElfW(Phdr) phdr;
4807         ElfW(Shdr) shdr[7];
4808         ElfW(Sym)  sym[2];
4809         struct DebugInfo di;
4810         uint8_t    da[24];
4811         char       str[80];
4812     };
4813 
4814     struct ElfImage *img;
4815 
4816     static const struct ElfImage img_template = {
4817         .ehdr = {
4818             .e_ident[EI_MAG0] = ELFMAG0,
4819             .e_ident[EI_MAG1] = ELFMAG1,
4820             .e_ident[EI_MAG2] = ELFMAG2,
4821             .e_ident[EI_MAG3] = ELFMAG3,
4822             .e_ident[EI_CLASS] = ELF_CLASS,
4823             .e_ident[EI_DATA] = ELF_DATA,
4824             .e_ident[EI_VERSION] = EV_CURRENT,
4825             .e_type = ET_EXEC,
4826             .e_machine = ELF_HOST_MACHINE,
4827             .e_version = EV_CURRENT,
4828             .e_phoff = offsetof(struct ElfImage, phdr),
4829             .e_shoff = offsetof(struct ElfImage, shdr),
4830             .e_ehsize = sizeof(ElfW(Shdr)),
4831             .e_phentsize = sizeof(ElfW(Phdr)),
4832             .e_phnum = 1,
4833             .e_shentsize = sizeof(ElfW(Shdr)),
4834             .e_shnum = ARRAY_SIZE(img->shdr),
4835             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4836 #ifdef ELF_HOST_FLAGS
4837             .e_flags = ELF_HOST_FLAGS,
4838 #endif
4839 #ifdef ELF_OSABI
4840             .e_ident[EI_OSABI] = ELF_OSABI,
4841 #endif
4842         },
4843         .phdr = {
4844             .p_type = PT_LOAD,
4845             .p_flags = PF_X,
4846         },
4847         .shdr = {
4848             [0] = { .sh_type = SHT_NULL },
4849             /* Trick: The contents of code_gen_buffer are not present in
4850                this fake ELF file; that got allocated elsewhere.  Therefore
4851                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4852                will not look for contents.  We can record any address.  */
4853             [1] = { /* .text */
4854                 .sh_type = SHT_NOBITS,
4855                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4856             },
4857             [2] = { /* .debug_info */
4858                 .sh_type = SHT_PROGBITS,
4859                 .sh_offset = offsetof(struct ElfImage, di),
4860                 .sh_size = sizeof(struct DebugInfo),
4861             },
4862             [3] = { /* .debug_abbrev */
4863                 .sh_type = SHT_PROGBITS,
4864                 .sh_offset = offsetof(struct ElfImage, da),
4865                 .sh_size = sizeof(img->da),
4866             },
4867             [4] = { /* .debug_frame */
4868                 .sh_type = SHT_PROGBITS,
4869                 .sh_offset = sizeof(struct ElfImage),
4870             },
4871             [5] = { /* .symtab */
4872                 .sh_type = SHT_SYMTAB,
4873                 .sh_offset = offsetof(struct ElfImage, sym),
4874                 .sh_size = sizeof(img->sym),
4875                 .sh_info = 1,
4876                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4877                 .sh_entsize = sizeof(ElfW(Sym)),
4878             },
4879             [6] = { /* .strtab */
4880                 .sh_type = SHT_STRTAB,
4881                 .sh_offset = offsetof(struct ElfImage, str),
4882                 .sh_size = sizeof(img->str),
4883             }
4884         },
4885         .sym = {
4886             [1] = { /* code_gen_buffer */
4887                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4888                 .st_shndx = 1,
4889             }
4890         },
4891         .di = {
4892             .len = sizeof(struct DebugInfo) - 4,
4893             .version = 2,
4894             .ptr_size = sizeof(void *),
4895             .cu_die = 1,
4896             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4897             .fn_die = 2,
4898             .fn_name = "code_gen_buffer"
4899         },
4900         .da = {
4901             1,          /* abbrev number (the cu) */
4902             0x11, 1,    /* DW_TAG_compile_unit, has children */
4903             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4904             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4905             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4906             0, 0,       /* end of abbrev */
4907             2,          /* abbrev number (the fn) */
4908             0x2e, 0,    /* DW_TAG_subprogram, no children */
4909             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4910             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4911             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4912             0, 0,       /* end of abbrev */
4913             0           /* no more abbrev */
4914         },
4915         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4916                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4917     };
4918 
4919     /* We only need a single jit entry; statically allocate it.  */
4920     static struct jit_code_entry one_entry;
4921 
4922     uintptr_t buf = (uintptr_t)buf_ptr;
4923     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4924     DebugFrameHeader *dfh;
4925 
4926     img = g_malloc(img_size);
4927     *img = img_template;
4928 
4929     img->phdr.p_vaddr = buf;
4930     img->phdr.p_paddr = buf;
4931     img->phdr.p_memsz = buf_size;
4932 
4933     img->shdr[1].sh_name = find_string(img->str, ".text");
4934     img->shdr[1].sh_addr = buf;
4935     img->shdr[1].sh_size = buf_size;
4936 
4937     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4938     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4939 
4940     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4941     img->shdr[4].sh_size = debug_frame_size;
4942 
4943     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4944     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4945 
4946     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4947     img->sym[1].st_value = buf;
4948     img->sym[1].st_size = buf_size;
4949 
4950     img->di.cu_low_pc = buf;
4951     img->di.cu_high_pc = buf + buf_size;
4952     img->di.fn_low_pc = buf;
4953     img->di.fn_high_pc = buf + buf_size;
4954 
4955     dfh = (DebugFrameHeader *)(img + 1);
4956     memcpy(dfh, debug_frame, debug_frame_size);
4957     dfh->fde.func_start = buf;
4958     dfh->fde.func_len = buf_size;
4959 
4960 #ifdef DEBUG_JIT
4961     /* Enable this block to be able to debug the ELF image file creation.
4962        One can use readelf, objdump, or other inspection utilities.  */
4963     {
4964         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4965         if (f) {
4966             if (fwrite(img, img_size, 1, f) != img_size) {
4967                 /* Avoid stupid unused return value warning for fwrite.  */
4968             }
4969             fclose(f);
4970         }
4971     }
4972 #endif
4973 
4974     one_entry.symfile_addr = img;
4975     one_entry.symfile_size = img_size;
4976 
4977     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4978     __jit_debug_descriptor.relevant_entry = &one_entry;
4979     __jit_debug_descriptor.first_entry = &one_entry;
4980     __jit_debug_register_code();
4981 }
4982 #else
4983 /* No support for the feature.  Provide the entry point expected by exec.c,
4984    and implement the internal function we declared earlier.  */
4985 
4986 static void tcg_register_jit_int(const void *buf, size_t size,
4987                                  const void *debug_frame,
4988                                  size_t debug_frame_size)
4989 {
4990 }
4991 
4992 void tcg_register_jit(const void *buf, size_t buf_size)
4993 {
4994 }
4995 #endif /* ELF_HOST_MACHINE */
4996 
4997 #if !TCG_TARGET_MAYBE_vec
4998 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4999 {
5000     g_assert_not_reached();
5001 }
5002 #endif
5003