xref: /openbmc/qemu/tcg/tcg.c (revision 8cf108c5)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/cpu-common.h"
46 #include "exec/exec-all.h"
47 
48 #include "tcg-op.h"
49 
50 #if UINTPTR_MAX == UINT32_MAX
51 # define ELF_CLASS  ELFCLASS32
52 #else
53 # define ELF_CLASS  ELFCLASS64
54 #endif
55 #ifdef HOST_WORDS_BIGENDIAN
56 # define ELF_DATA   ELFDATA2MSB
57 #else
58 # define ELF_DATA   ELFDATA2LSB
59 #endif
60 
61 #include "elf.h"
62 #include "exec/log.h"
63 #include "sysemu/sysemu.h"
64 
65 /* Forward declarations for functions declared in tcg-target.inc.c and
66    used here. */
67 static void tcg_target_init(TCGContext *s);
68 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
69 static void tcg_target_qemu_prologue(TCGContext *s);
70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
71                         intptr_t value, intptr_t addend);
72 
73 /* The CIE and FDE header definitions will be common to all hosts.  */
74 typedef struct {
75     uint32_t len __attribute__((aligned((sizeof(void *)))));
76     uint32_t id;
77     uint8_t version;
78     char augmentation[1];
79     uint8_t code_align;
80     uint8_t data_align;
81     uint8_t return_column;
82 } DebugFrameCIE;
83 
84 typedef struct QEMU_PACKED {
85     uint32_t len __attribute__((aligned((sizeof(void *)))));
86     uint32_t cie_offset;
87     uintptr_t func_start;
88     uintptr_t func_len;
89 } DebugFrameFDEHeader;
90 
91 typedef struct QEMU_PACKED {
92     DebugFrameCIE cie;
93     DebugFrameFDEHeader fde;
94 } DebugFrameHeader;
95 
96 static void tcg_register_jit_int(void *buf, size_t size,
97                                  const void *debug_frame,
98                                  size_t debug_frame_size)
99     __attribute__((unused));
100 
101 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
102 static const char *target_parse_constraint(TCGArgConstraint *ct,
103                                            const char *ct_str, TCGType type);
104 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
105                        intptr_t arg2);
106 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
107 static void tcg_out_movi(TCGContext *s, TCGType type,
108                          TCGReg ret, tcg_target_long arg);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
110                        const int *const_args);
111 #if TCG_TARGET_MAYBE_vec
112 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
113                            unsigned vece, const TCGArg *args,
114                            const int *const_args);
115 #else
116 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
117                                   unsigned vece, const TCGArg *args,
118                                   const int *const_args)
119 {
120     g_assert_not_reached();
121 }
122 #endif
123 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
124                        intptr_t arg2);
125 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
126                         TCGReg base, intptr_t ofs);
127 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
128 static int tcg_target_const_match(tcg_target_long val, TCGType type,
129                                   const TCGArgConstraint *arg_ct);
130 #ifdef TCG_TARGET_NEED_LDST_LABELS
131 static bool tcg_out_ldst_finalize(TCGContext *s);
132 #endif
133 
134 #define TCG_HIGHWATER 1024
135 
136 static TCGContext **tcg_ctxs;
137 static unsigned int n_tcg_ctxs;
138 TCGv_env cpu_env = 0;
139 
140 struct tcg_region_tree {
141     QemuMutex lock;
142     GTree *tree;
143     /* padding to avoid false sharing is computed at run-time */
144 };
145 
146 /*
147  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
148  * dynamically allocate from as demand dictates. Given appropriate region
149  * sizing, this minimizes flushes even when some TCG threads generate a lot
150  * more code than others.
151  */
152 struct tcg_region_state {
153     QemuMutex lock;
154 
155     /* fields set at init time */
156     void *start;
157     void *start_aligned;
158     void *end;
159     size_t n;
160     size_t size; /* size of one region */
161     size_t stride; /* .size + guard size */
162 
163     /* fields protected by the lock */
164     size_t current; /* current region index */
165     size_t agg_size_full; /* aggregate size of full regions */
166 };
167 
168 static struct tcg_region_state region;
169 /*
170  * This is an array of struct tcg_region_tree's, with padding.
171  * We use void * to simplify the computation of region_trees[i]; each
172  * struct is found every tree_size bytes.
173  */
174 static void *region_trees;
175 static size_t tree_size;
176 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
177 static TCGRegSet tcg_target_call_clobber_regs;
178 
179 #if TCG_TARGET_INSN_UNIT_SIZE == 1
180 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
181 {
182     *s->code_ptr++ = v;
183 }
184 
185 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
186                                                       uint8_t v)
187 {
188     *p = v;
189 }
190 #endif
191 
192 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
193 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
194 {
195     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
196         *s->code_ptr++ = v;
197     } else {
198         tcg_insn_unit *p = s->code_ptr;
199         memcpy(p, &v, sizeof(v));
200         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
201     }
202 }
203 
204 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
205                                                        uint16_t v)
206 {
207     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
208         *p = v;
209     } else {
210         memcpy(p, &v, sizeof(v));
211     }
212 }
213 #endif
214 
215 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
216 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
217 {
218     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
219         *s->code_ptr++ = v;
220     } else {
221         tcg_insn_unit *p = s->code_ptr;
222         memcpy(p, &v, sizeof(v));
223         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
224     }
225 }
226 
227 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
228                                                        uint32_t v)
229 {
230     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
231         *p = v;
232     } else {
233         memcpy(p, &v, sizeof(v));
234     }
235 }
236 #endif
237 
238 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
239 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
240 {
241     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
242         *s->code_ptr++ = v;
243     } else {
244         tcg_insn_unit *p = s->code_ptr;
245         memcpy(p, &v, sizeof(v));
246         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
247     }
248 }
249 
250 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
251                                                        uint64_t v)
252 {
253     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
254         *p = v;
255     } else {
256         memcpy(p, &v, sizeof(v));
257     }
258 }
259 #endif
260 
261 /* label relocation processing */
262 
263 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
264                           TCGLabel *l, intptr_t addend)
265 {
266     TCGRelocation *r;
267 
268     if (l->has_value) {
269         /* FIXME: This may break relocations on RISC targets that
270            modify instruction fields in place.  The caller may not have
271            written the initial value.  */
272         bool ok = patch_reloc(code_ptr, type, l->u.value, addend);
273         tcg_debug_assert(ok);
274     } else {
275         /* add a new relocation entry */
276         r = tcg_malloc(sizeof(TCGRelocation));
277         r->type = type;
278         r->ptr = code_ptr;
279         r->addend = addend;
280         r->next = l->u.first_reloc;
281         l->u.first_reloc = r;
282     }
283 }
284 
285 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
286 {
287     intptr_t value = (intptr_t)ptr;
288     TCGRelocation *r;
289 
290     tcg_debug_assert(!l->has_value);
291 
292     for (r = l->u.first_reloc; r != NULL; r = r->next) {
293         bool ok = patch_reloc(r->ptr, r->type, value, r->addend);
294         tcg_debug_assert(ok);
295     }
296 
297     l->has_value = 1;
298     l->u.value_ptr = ptr;
299 }
300 
301 TCGLabel *gen_new_label(void)
302 {
303     TCGContext *s = tcg_ctx;
304     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
305 
306     *l = (TCGLabel){
307         .id = s->nb_labels++
308     };
309 #ifdef CONFIG_DEBUG_TCG
310     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
311 #endif
312 
313     return l;
314 }
315 
316 static void set_jmp_reset_offset(TCGContext *s, int which)
317 {
318     size_t off = tcg_current_code_size(s);
319     s->tb_jmp_reset_offset[which] = off;
320     /* Make sure that we didn't overflow the stored offset.  */
321     assert(s->tb_jmp_reset_offset[which] == off);
322 }
323 
324 #include "tcg-target.inc.c"
325 
326 /* compare a pointer @ptr and a tb_tc @s */
327 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
328 {
329     if (ptr >= s->ptr + s->size) {
330         return 1;
331     } else if (ptr < s->ptr) {
332         return -1;
333     }
334     return 0;
335 }
336 
337 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
338 {
339     const struct tb_tc *a = ap;
340     const struct tb_tc *b = bp;
341 
342     /*
343      * When both sizes are set, we know this isn't a lookup.
344      * This is the most likely case: every TB must be inserted; lookups
345      * are a lot less frequent.
346      */
347     if (likely(a->size && b->size)) {
348         if (a->ptr > b->ptr) {
349             return 1;
350         } else if (a->ptr < b->ptr) {
351             return -1;
352         }
353         /* a->ptr == b->ptr should happen only on deletions */
354         g_assert(a->size == b->size);
355         return 0;
356     }
357     /*
358      * All lookups have either .size field set to 0.
359      * From the glib sources we see that @ap is always the lookup key. However
360      * the docs provide no guarantee, so we just mark this case as likely.
361      */
362     if (likely(a->size == 0)) {
363         return ptr_cmp_tb_tc(a->ptr, b);
364     }
365     return ptr_cmp_tb_tc(b->ptr, a);
366 }
367 
368 static void tcg_region_trees_init(void)
369 {
370     size_t i;
371 
372     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
373     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
374     for (i = 0; i < region.n; i++) {
375         struct tcg_region_tree *rt = region_trees + i * tree_size;
376 
377         qemu_mutex_init(&rt->lock);
378         rt->tree = g_tree_new(tb_tc_cmp);
379     }
380 }
381 
382 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
383 {
384     size_t region_idx;
385 
386     if (p < region.start_aligned) {
387         region_idx = 0;
388     } else {
389         ptrdiff_t offset = p - region.start_aligned;
390 
391         if (offset > region.stride * (region.n - 1)) {
392             region_idx = region.n - 1;
393         } else {
394             region_idx = offset / region.stride;
395         }
396     }
397     return region_trees + region_idx * tree_size;
398 }
399 
400 void tcg_tb_insert(TranslationBlock *tb)
401 {
402     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
403 
404     qemu_mutex_lock(&rt->lock);
405     g_tree_insert(rt->tree, &tb->tc, tb);
406     qemu_mutex_unlock(&rt->lock);
407 }
408 
409 void tcg_tb_remove(TranslationBlock *tb)
410 {
411     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
412 
413     qemu_mutex_lock(&rt->lock);
414     g_tree_remove(rt->tree, &tb->tc);
415     qemu_mutex_unlock(&rt->lock);
416 }
417 
418 /*
419  * Find the TB 'tb' such that
420  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
421  * Return NULL if not found.
422  */
423 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
424 {
425     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
426     TranslationBlock *tb;
427     struct tb_tc s = { .ptr = (void *)tc_ptr };
428 
429     qemu_mutex_lock(&rt->lock);
430     tb = g_tree_lookup(rt->tree, &s);
431     qemu_mutex_unlock(&rt->lock);
432     return tb;
433 }
434 
435 static void tcg_region_tree_lock_all(void)
436 {
437     size_t i;
438 
439     for (i = 0; i < region.n; i++) {
440         struct tcg_region_tree *rt = region_trees + i * tree_size;
441 
442         qemu_mutex_lock(&rt->lock);
443     }
444 }
445 
446 static void tcg_region_tree_unlock_all(void)
447 {
448     size_t i;
449 
450     for (i = 0; i < region.n; i++) {
451         struct tcg_region_tree *rt = region_trees + i * tree_size;
452 
453         qemu_mutex_unlock(&rt->lock);
454     }
455 }
456 
457 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
458 {
459     size_t i;
460 
461     tcg_region_tree_lock_all();
462     for (i = 0; i < region.n; i++) {
463         struct tcg_region_tree *rt = region_trees + i * tree_size;
464 
465         g_tree_foreach(rt->tree, func, user_data);
466     }
467     tcg_region_tree_unlock_all();
468 }
469 
470 size_t tcg_nb_tbs(void)
471 {
472     size_t nb_tbs = 0;
473     size_t i;
474 
475     tcg_region_tree_lock_all();
476     for (i = 0; i < region.n; i++) {
477         struct tcg_region_tree *rt = region_trees + i * tree_size;
478 
479         nb_tbs += g_tree_nnodes(rt->tree);
480     }
481     tcg_region_tree_unlock_all();
482     return nb_tbs;
483 }
484 
485 static void tcg_region_tree_reset_all(void)
486 {
487     size_t i;
488 
489     tcg_region_tree_lock_all();
490     for (i = 0; i < region.n; i++) {
491         struct tcg_region_tree *rt = region_trees + i * tree_size;
492 
493         /* Increment the refcount first so that destroy acts as a reset */
494         g_tree_ref(rt->tree);
495         g_tree_destroy(rt->tree);
496     }
497     tcg_region_tree_unlock_all();
498 }
499 
500 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
501 {
502     void *start, *end;
503 
504     start = region.start_aligned + curr_region * region.stride;
505     end = start + region.size;
506 
507     if (curr_region == 0) {
508         start = region.start;
509     }
510     if (curr_region == region.n - 1) {
511         end = region.end;
512     }
513 
514     *pstart = start;
515     *pend = end;
516 }
517 
518 static void tcg_region_assign(TCGContext *s, size_t curr_region)
519 {
520     void *start, *end;
521 
522     tcg_region_bounds(curr_region, &start, &end);
523 
524     s->code_gen_buffer = start;
525     s->code_gen_ptr = start;
526     s->code_gen_buffer_size = end - start;
527     s->code_gen_highwater = end - TCG_HIGHWATER;
528 }
529 
530 static bool tcg_region_alloc__locked(TCGContext *s)
531 {
532     if (region.current == region.n) {
533         return true;
534     }
535     tcg_region_assign(s, region.current);
536     region.current++;
537     return false;
538 }
539 
540 /*
541  * Request a new region once the one in use has filled up.
542  * Returns true on error.
543  */
544 static bool tcg_region_alloc(TCGContext *s)
545 {
546     bool err;
547     /* read the region size now; alloc__locked will overwrite it on success */
548     size_t size_full = s->code_gen_buffer_size;
549 
550     qemu_mutex_lock(&region.lock);
551     err = tcg_region_alloc__locked(s);
552     if (!err) {
553         region.agg_size_full += size_full - TCG_HIGHWATER;
554     }
555     qemu_mutex_unlock(&region.lock);
556     return err;
557 }
558 
559 /*
560  * Perform a context's first region allocation.
561  * This function does _not_ increment region.agg_size_full.
562  */
563 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
564 {
565     return tcg_region_alloc__locked(s);
566 }
567 
568 /* Call from a safe-work context */
569 void tcg_region_reset_all(void)
570 {
571     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
572     unsigned int i;
573 
574     qemu_mutex_lock(&region.lock);
575     region.current = 0;
576     region.agg_size_full = 0;
577 
578     for (i = 0; i < n_ctxs; i++) {
579         TCGContext *s = atomic_read(&tcg_ctxs[i]);
580         bool err = tcg_region_initial_alloc__locked(s);
581 
582         g_assert(!err);
583     }
584     qemu_mutex_unlock(&region.lock);
585 
586     tcg_region_tree_reset_all();
587 }
588 
589 #ifdef CONFIG_USER_ONLY
590 static size_t tcg_n_regions(void)
591 {
592     return 1;
593 }
594 #else
595 /*
596  * It is likely that some vCPUs will translate more code than others, so we
597  * first try to set more regions than max_cpus, with those regions being of
598  * reasonable size. If that's not possible we make do by evenly dividing
599  * the code_gen_buffer among the vCPUs.
600  */
601 static size_t tcg_n_regions(void)
602 {
603     size_t i;
604 
605     /* Use a single region if all we have is one vCPU thread */
606     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
607         return 1;
608     }
609 
610     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
611     for (i = 8; i > 0; i--) {
612         size_t regions_per_thread = i;
613         size_t region_size;
614 
615         region_size = tcg_init_ctx.code_gen_buffer_size;
616         region_size /= max_cpus * regions_per_thread;
617 
618         if (region_size >= 2 * 1024u * 1024) {
619             return max_cpus * regions_per_thread;
620         }
621     }
622     /* If we can't, then just allocate one region per vCPU thread */
623     return max_cpus;
624 }
625 #endif
626 
627 /*
628  * Initializes region partitioning.
629  *
630  * Called at init time from the parent thread (i.e. the one calling
631  * tcg_context_init), after the target's TCG globals have been set.
632  *
633  * Region partitioning works by splitting code_gen_buffer into separate regions,
634  * and then assigning regions to TCG threads so that the threads can translate
635  * code in parallel without synchronization.
636  *
637  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
638  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
639  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
640  * must have been parsed before calling this function, since it calls
641  * qemu_tcg_mttcg_enabled().
642  *
643  * In user-mode we use a single region.  Having multiple regions in user-mode
644  * is not supported, because the number of vCPU threads (recall that each thread
645  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
646  * OS, and usually this number is huge (tens of thousands is not uncommon).
647  * Thus, given this large bound on the number of vCPU threads and the fact
648  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
649  * that the availability of at least one region per vCPU thread.
650  *
651  * However, this user-mode limitation is unlikely to be a significant problem
652  * in practice. Multi-threaded guests share most if not all of their translated
653  * code, which makes parallel code generation less appealing than in softmmu.
654  */
655 void tcg_region_init(void)
656 {
657     void *buf = tcg_init_ctx.code_gen_buffer;
658     void *aligned;
659     size_t size = tcg_init_ctx.code_gen_buffer_size;
660     size_t page_size = qemu_real_host_page_size;
661     size_t region_size;
662     size_t n_regions;
663     size_t i;
664 
665     n_regions = tcg_n_regions();
666 
667     /* The first region will be 'aligned - buf' bytes larger than the others */
668     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
669     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
670     /*
671      * Make region_size a multiple of page_size, using aligned as the start.
672      * As a result of this we might end up with a few extra pages at the end of
673      * the buffer; we will assign those to the last region.
674      */
675     region_size = (size - (aligned - buf)) / n_regions;
676     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
677 
678     /* A region must have at least 2 pages; one code, one guard */
679     g_assert(region_size >= 2 * page_size);
680 
681     /* init the region struct */
682     qemu_mutex_init(&region.lock);
683     region.n = n_regions;
684     region.size = region_size - page_size;
685     region.stride = region_size;
686     region.start = buf;
687     region.start_aligned = aligned;
688     /* page-align the end, since its last page will be a guard page */
689     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
690     /* account for that last guard page */
691     region.end -= page_size;
692 
693     /* set guard pages */
694     for (i = 0; i < region.n; i++) {
695         void *start, *end;
696         int rc;
697 
698         tcg_region_bounds(i, &start, &end);
699         rc = qemu_mprotect_none(end, page_size);
700         g_assert(!rc);
701     }
702 
703     tcg_region_trees_init();
704 
705     /* In user-mode we support only one ctx, so do the initial allocation now */
706 #ifdef CONFIG_USER_ONLY
707     {
708         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
709 
710         g_assert(!err);
711     }
712 #endif
713 }
714 
715 /*
716  * All TCG threads except the parent (i.e. the one that called tcg_context_init
717  * and registered the target's TCG globals) must register with this function
718  * before initiating translation.
719  *
720  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
721  * of tcg_region_init() for the reasoning behind this.
722  *
723  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
724  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
725  * is not used anymore for translation once this function is called.
726  *
727  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
728  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
729  */
730 #ifdef CONFIG_USER_ONLY
731 void tcg_register_thread(void)
732 {
733     tcg_ctx = &tcg_init_ctx;
734 }
735 #else
736 void tcg_register_thread(void)
737 {
738     TCGContext *s = g_malloc(sizeof(*s));
739     unsigned int i, n;
740     bool err;
741 
742     *s = tcg_init_ctx;
743 
744     /* Relink mem_base.  */
745     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
746         if (tcg_init_ctx.temps[i].mem_base) {
747             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
748             tcg_debug_assert(b >= 0 && b < n);
749             s->temps[i].mem_base = &s->temps[b];
750         }
751     }
752 
753     /* Claim an entry in tcg_ctxs */
754     n = atomic_fetch_inc(&n_tcg_ctxs);
755     g_assert(n < max_cpus);
756     atomic_set(&tcg_ctxs[n], s);
757 
758     tcg_ctx = s;
759     qemu_mutex_lock(&region.lock);
760     err = tcg_region_initial_alloc__locked(tcg_ctx);
761     g_assert(!err);
762     qemu_mutex_unlock(&region.lock);
763 }
764 #endif /* !CONFIG_USER_ONLY */
765 
766 /*
767  * Returns the size (in bytes) of all translated code (i.e. from all regions)
768  * currently in the cache.
769  * See also: tcg_code_capacity()
770  * Do not confuse with tcg_current_code_size(); that one applies to a single
771  * TCG context.
772  */
773 size_t tcg_code_size(void)
774 {
775     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
776     unsigned int i;
777     size_t total;
778 
779     qemu_mutex_lock(&region.lock);
780     total = region.agg_size_full;
781     for (i = 0; i < n_ctxs; i++) {
782         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
783         size_t size;
784 
785         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
786         g_assert(size <= s->code_gen_buffer_size);
787         total += size;
788     }
789     qemu_mutex_unlock(&region.lock);
790     return total;
791 }
792 
793 /*
794  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
795  * regions.
796  * See also: tcg_code_size()
797  */
798 size_t tcg_code_capacity(void)
799 {
800     size_t guard_size, capacity;
801 
802     /* no need for synchronization; these variables are set at init time */
803     guard_size = region.stride - region.size;
804     capacity = region.end + guard_size - region.start;
805     capacity -= region.n * (guard_size + TCG_HIGHWATER);
806     return capacity;
807 }
808 
809 size_t tcg_tb_phys_invalidate_count(void)
810 {
811     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
812     unsigned int i;
813     size_t total = 0;
814 
815     for (i = 0; i < n_ctxs; i++) {
816         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
817 
818         total += atomic_read(&s->tb_phys_invalidate_count);
819     }
820     return total;
821 }
822 
823 /* pool based memory allocation */
824 void *tcg_malloc_internal(TCGContext *s, int size)
825 {
826     TCGPool *p;
827     int pool_size;
828 
829     if (size > TCG_POOL_CHUNK_SIZE) {
830         /* big malloc: insert a new pool (XXX: could optimize) */
831         p = g_malloc(sizeof(TCGPool) + size);
832         p->size = size;
833         p->next = s->pool_first_large;
834         s->pool_first_large = p;
835         return p->data;
836     } else {
837         p = s->pool_current;
838         if (!p) {
839             p = s->pool_first;
840             if (!p)
841                 goto new_pool;
842         } else {
843             if (!p->next) {
844             new_pool:
845                 pool_size = TCG_POOL_CHUNK_SIZE;
846                 p = g_malloc(sizeof(TCGPool) + pool_size);
847                 p->size = pool_size;
848                 p->next = NULL;
849                 if (s->pool_current)
850                     s->pool_current->next = p;
851                 else
852                     s->pool_first = p;
853             } else {
854                 p = p->next;
855             }
856         }
857     }
858     s->pool_current = p;
859     s->pool_cur = p->data + size;
860     s->pool_end = p->data + p->size;
861     return p->data;
862 }
863 
864 void tcg_pool_reset(TCGContext *s)
865 {
866     TCGPool *p, *t;
867     for (p = s->pool_first_large; p; p = t) {
868         t = p->next;
869         g_free(p);
870     }
871     s->pool_first_large = NULL;
872     s->pool_cur = s->pool_end = NULL;
873     s->pool_current = NULL;
874 }
875 
876 typedef struct TCGHelperInfo {
877     void *func;
878     const char *name;
879     unsigned flags;
880     unsigned sizemask;
881 } TCGHelperInfo;
882 
883 #include "exec/helper-proto.h"
884 
885 static const TCGHelperInfo all_helpers[] = {
886 #include "exec/helper-tcg.h"
887 };
888 static GHashTable *helper_table;
889 
890 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
891 static void process_op_defs(TCGContext *s);
892 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
893                                             TCGReg reg, const char *name);
894 
895 void tcg_context_init(TCGContext *s)
896 {
897     int op, total_args, n, i;
898     TCGOpDef *def;
899     TCGArgConstraint *args_ct;
900     int *sorted_args;
901     TCGTemp *ts;
902 
903     memset(s, 0, sizeof(*s));
904     s->nb_globals = 0;
905 
906     /* Count total number of arguments and allocate the corresponding
907        space */
908     total_args = 0;
909     for(op = 0; op < NB_OPS; op++) {
910         def = &tcg_op_defs[op];
911         n = def->nb_iargs + def->nb_oargs;
912         total_args += n;
913     }
914 
915     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
916     sorted_args = g_malloc(sizeof(int) * total_args);
917 
918     for(op = 0; op < NB_OPS; op++) {
919         def = &tcg_op_defs[op];
920         def->args_ct = args_ct;
921         def->sorted_args = sorted_args;
922         n = def->nb_iargs + def->nb_oargs;
923         sorted_args += n;
924         args_ct += n;
925     }
926 
927     /* Register helpers.  */
928     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
929     helper_table = g_hash_table_new(NULL, NULL);
930 
931     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
932         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
933                             (gpointer)&all_helpers[i]);
934     }
935 
936     tcg_target_init(s);
937     process_op_defs(s);
938 
939     /* Reverse the order of the saved registers, assuming they're all at
940        the start of tcg_target_reg_alloc_order.  */
941     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
942         int r = tcg_target_reg_alloc_order[n];
943         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
944             break;
945         }
946     }
947     for (i = 0; i < n; ++i) {
948         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
949     }
950     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
951         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
952     }
953 
954     tcg_ctx = s;
955     /*
956      * In user-mode we simply share the init context among threads, since we
957      * use a single region. See the documentation tcg_region_init() for the
958      * reasoning behind this.
959      * In softmmu we will have at most max_cpus TCG threads.
960      */
961 #ifdef CONFIG_USER_ONLY
962     tcg_ctxs = &tcg_ctx;
963     n_tcg_ctxs = 1;
964 #else
965     tcg_ctxs = g_new(TCGContext *, max_cpus);
966 #endif
967 
968     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
969     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
970     cpu_env = temp_tcgv_ptr(ts);
971 }
972 
973 /*
974  * Allocate TBs right before their corresponding translated code, making
975  * sure that TBs and code are on different cache lines.
976  */
977 TranslationBlock *tcg_tb_alloc(TCGContext *s)
978 {
979     uintptr_t align = qemu_icache_linesize;
980     TranslationBlock *tb;
981     void *next;
982 
983  retry:
984     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
985     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
986 
987     if (unlikely(next > s->code_gen_highwater)) {
988         if (tcg_region_alloc(s)) {
989             return NULL;
990         }
991         goto retry;
992     }
993     atomic_set(&s->code_gen_ptr, next);
994     s->data_gen_ptr = NULL;
995     return tb;
996 }
997 
998 void tcg_prologue_init(TCGContext *s)
999 {
1000     size_t prologue_size, total_size;
1001     void *buf0, *buf1;
1002 
1003     /* Put the prologue at the beginning of code_gen_buffer.  */
1004     buf0 = s->code_gen_buffer;
1005     total_size = s->code_gen_buffer_size;
1006     s->code_ptr = buf0;
1007     s->code_buf = buf0;
1008     s->data_gen_ptr = NULL;
1009     s->code_gen_prologue = buf0;
1010 
1011     /* Compute a high-water mark, at which we voluntarily flush the buffer
1012        and start over.  The size here is arbitrary, significantly larger
1013        than we expect the code generation for any one opcode to require.  */
1014     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1015 
1016 #ifdef TCG_TARGET_NEED_POOL_LABELS
1017     s->pool_labels = NULL;
1018 #endif
1019 
1020     /* Generate the prologue.  */
1021     tcg_target_qemu_prologue(s);
1022 
1023 #ifdef TCG_TARGET_NEED_POOL_LABELS
1024     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1025     {
1026         bool ok = tcg_out_pool_finalize(s);
1027         tcg_debug_assert(ok);
1028     }
1029 #endif
1030 
1031     buf1 = s->code_ptr;
1032     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1033 
1034     /* Deduct the prologue from the buffer.  */
1035     prologue_size = tcg_current_code_size(s);
1036     s->code_gen_ptr = buf1;
1037     s->code_gen_buffer = buf1;
1038     s->code_buf = buf1;
1039     total_size -= prologue_size;
1040     s->code_gen_buffer_size = total_size;
1041 
1042     tcg_register_jit(s->code_gen_buffer, total_size);
1043 
1044 #ifdef DEBUG_DISAS
1045     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1046         qemu_log_lock();
1047         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1048         if (s->data_gen_ptr) {
1049             size_t code_size = s->data_gen_ptr - buf0;
1050             size_t data_size = prologue_size - code_size;
1051             size_t i;
1052 
1053             log_disas(buf0, code_size);
1054 
1055             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1056                 if (sizeof(tcg_target_ulong) == 8) {
1057                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1058                              (uintptr_t)s->data_gen_ptr + i,
1059                              *(uint64_t *)(s->data_gen_ptr + i));
1060                 } else {
1061                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1062                              (uintptr_t)s->data_gen_ptr + i,
1063                              *(uint32_t *)(s->data_gen_ptr + i));
1064                 }
1065             }
1066         } else {
1067             log_disas(buf0, prologue_size);
1068         }
1069         qemu_log("\n");
1070         qemu_log_flush();
1071         qemu_log_unlock();
1072     }
1073 #endif
1074 
1075     /* Assert that goto_ptr is implemented completely.  */
1076     if (TCG_TARGET_HAS_goto_ptr) {
1077         tcg_debug_assert(s->code_gen_epilogue != NULL);
1078     }
1079 }
1080 
1081 void tcg_func_start(TCGContext *s)
1082 {
1083     tcg_pool_reset(s);
1084     s->nb_temps = s->nb_globals;
1085 
1086     /* No temps have been previously allocated for size or locality.  */
1087     memset(s->free_temps, 0, sizeof(s->free_temps));
1088 
1089     s->nb_ops = 0;
1090     s->nb_labels = 0;
1091     s->current_frame_offset = s->frame_start;
1092 
1093 #ifdef CONFIG_DEBUG_TCG
1094     s->goto_tb_issue_mask = 0;
1095 #endif
1096 
1097     QTAILQ_INIT(&s->ops);
1098     QTAILQ_INIT(&s->free_ops);
1099 #ifdef CONFIG_DEBUG_TCG
1100     QSIMPLEQ_INIT(&s->labels);
1101 #endif
1102 }
1103 
1104 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1105 {
1106     int n = s->nb_temps++;
1107     tcg_debug_assert(n < TCG_MAX_TEMPS);
1108     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1109 }
1110 
1111 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1112 {
1113     TCGTemp *ts;
1114 
1115     tcg_debug_assert(s->nb_globals == s->nb_temps);
1116     s->nb_globals++;
1117     ts = tcg_temp_alloc(s);
1118     ts->temp_global = 1;
1119 
1120     return ts;
1121 }
1122 
1123 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1124                                             TCGReg reg, const char *name)
1125 {
1126     TCGTemp *ts;
1127 
1128     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1129         tcg_abort();
1130     }
1131 
1132     ts = tcg_global_alloc(s);
1133     ts->base_type = type;
1134     ts->type = type;
1135     ts->fixed_reg = 1;
1136     ts->reg = reg;
1137     ts->name = name;
1138     tcg_regset_set_reg(s->reserved_regs, reg);
1139 
1140     return ts;
1141 }
1142 
1143 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1144 {
1145     s->frame_start = start;
1146     s->frame_end = start + size;
1147     s->frame_temp
1148         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1149 }
1150 
1151 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1152                                      intptr_t offset, const char *name)
1153 {
1154     TCGContext *s = tcg_ctx;
1155     TCGTemp *base_ts = tcgv_ptr_temp(base);
1156     TCGTemp *ts = tcg_global_alloc(s);
1157     int indirect_reg = 0, bigendian = 0;
1158 #ifdef HOST_WORDS_BIGENDIAN
1159     bigendian = 1;
1160 #endif
1161 
1162     if (!base_ts->fixed_reg) {
1163         /* We do not support double-indirect registers.  */
1164         tcg_debug_assert(!base_ts->indirect_reg);
1165         base_ts->indirect_base = 1;
1166         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1167                             ? 2 : 1);
1168         indirect_reg = 1;
1169     }
1170 
1171     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1172         TCGTemp *ts2 = tcg_global_alloc(s);
1173         char buf[64];
1174 
1175         ts->base_type = TCG_TYPE_I64;
1176         ts->type = TCG_TYPE_I32;
1177         ts->indirect_reg = indirect_reg;
1178         ts->mem_allocated = 1;
1179         ts->mem_base = base_ts;
1180         ts->mem_offset = offset + bigendian * 4;
1181         pstrcpy(buf, sizeof(buf), name);
1182         pstrcat(buf, sizeof(buf), "_0");
1183         ts->name = strdup(buf);
1184 
1185         tcg_debug_assert(ts2 == ts + 1);
1186         ts2->base_type = TCG_TYPE_I64;
1187         ts2->type = TCG_TYPE_I32;
1188         ts2->indirect_reg = indirect_reg;
1189         ts2->mem_allocated = 1;
1190         ts2->mem_base = base_ts;
1191         ts2->mem_offset = offset + (1 - bigendian) * 4;
1192         pstrcpy(buf, sizeof(buf), name);
1193         pstrcat(buf, sizeof(buf), "_1");
1194         ts2->name = strdup(buf);
1195     } else {
1196         ts->base_type = type;
1197         ts->type = type;
1198         ts->indirect_reg = indirect_reg;
1199         ts->mem_allocated = 1;
1200         ts->mem_base = base_ts;
1201         ts->mem_offset = offset;
1202         ts->name = name;
1203     }
1204     return ts;
1205 }
1206 
1207 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1208 {
1209     TCGContext *s = tcg_ctx;
1210     TCGTemp *ts;
1211     int idx, k;
1212 
1213     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1214     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1215     if (idx < TCG_MAX_TEMPS) {
1216         /* There is already an available temp with the right type.  */
1217         clear_bit(idx, s->free_temps[k].l);
1218 
1219         ts = &s->temps[idx];
1220         ts->temp_allocated = 1;
1221         tcg_debug_assert(ts->base_type == type);
1222         tcg_debug_assert(ts->temp_local == temp_local);
1223     } else {
1224         ts = tcg_temp_alloc(s);
1225         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1226             TCGTemp *ts2 = tcg_temp_alloc(s);
1227 
1228             ts->base_type = type;
1229             ts->type = TCG_TYPE_I32;
1230             ts->temp_allocated = 1;
1231             ts->temp_local = temp_local;
1232 
1233             tcg_debug_assert(ts2 == ts + 1);
1234             ts2->base_type = TCG_TYPE_I64;
1235             ts2->type = TCG_TYPE_I32;
1236             ts2->temp_allocated = 1;
1237             ts2->temp_local = temp_local;
1238         } else {
1239             ts->base_type = type;
1240             ts->type = type;
1241             ts->temp_allocated = 1;
1242             ts->temp_local = temp_local;
1243         }
1244     }
1245 
1246 #if defined(CONFIG_DEBUG_TCG)
1247     s->temps_in_use++;
1248 #endif
1249     return ts;
1250 }
1251 
1252 TCGv_vec tcg_temp_new_vec(TCGType type)
1253 {
1254     TCGTemp *t;
1255 
1256 #ifdef CONFIG_DEBUG_TCG
1257     switch (type) {
1258     case TCG_TYPE_V64:
1259         assert(TCG_TARGET_HAS_v64);
1260         break;
1261     case TCG_TYPE_V128:
1262         assert(TCG_TARGET_HAS_v128);
1263         break;
1264     case TCG_TYPE_V256:
1265         assert(TCG_TARGET_HAS_v256);
1266         break;
1267     default:
1268         g_assert_not_reached();
1269     }
1270 #endif
1271 
1272     t = tcg_temp_new_internal(type, 0);
1273     return temp_tcgv_vec(t);
1274 }
1275 
1276 /* Create a new temp of the same type as an existing temp.  */
1277 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1278 {
1279     TCGTemp *t = tcgv_vec_temp(match);
1280 
1281     tcg_debug_assert(t->temp_allocated != 0);
1282 
1283     t = tcg_temp_new_internal(t->base_type, 0);
1284     return temp_tcgv_vec(t);
1285 }
1286 
1287 void tcg_temp_free_internal(TCGTemp *ts)
1288 {
1289     TCGContext *s = tcg_ctx;
1290     int k, idx;
1291 
1292 #if defined(CONFIG_DEBUG_TCG)
1293     s->temps_in_use--;
1294     if (s->temps_in_use < 0) {
1295         fprintf(stderr, "More temporaries freed than allocated!\n");
1296     }
1297 #endif
1298 
1299     tcg_debug_assert(ts->temp_global == 0);
1300     tcg_debug_assert(ts->temp_allocated != 0);
1301     ts->temp_allocated = 0;
1302 
1303     idx = temp_idx(ts);
1304     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1305     set_bit(idx, s->free_temps[k].l);
1306 }
1307 
1308 TCGv_i32 tcg_const_i32(int32_t val)
1309 {
1310     TCGv_i32 t0;
1311     t0 = tcg_temp_new_i32();
1312     tcg_gen_movi_i32(t0, val);
1313     return t0;
1314 }
1315 
1316 TCGv_i64 tcg_const_i64(int64_t val)
1317 {
1318     TCGv_i64 t0;
1319     t0 = tcg_temp_new_i64();
1320     tcg_gen_movi_i64(t0, val);
1321     return t0;
1322 }
1323 
1324 TCGv_i32 tcg_const_local_i32(int32_t val)
1325 {
1326     TCGv_i32 t0;
1327     t0 = tcg_temp_local_new_i32();
1328     tcg_gen_movi_i32(t0, val);
1329     return t0;
1330 }
1331 
1332 TCGv_i64 tcg_const_local_i64(int64_t val)
1333 {
1334     TCGv_i64 t0;
1335     t0 = tcg_temp_local_new_i64();
1336     tcg_gen_movi_i64(t0, val);
1337     return t0;
1338 }
1339 
1340 #if defined(CONFIG_DEBUG_TCG)
1341 void tcg_clear_temp_count(void)
1342 {
1343     TCGContext *s = tcg_ctx;
1344     s->temps_in_use = 0;
1345 }
1346 
1347 int tcg_check_temp_count(void)
1348 {
1349     TCGContext *s = tcg_ctx;
1350     if (s->temps_in_use) {
1351         /* Clear the count so that we don't give another
1352          * warning immediately next time around.
1353          */
1354         s->temps_in_use = 0;
1355         return 1;
1356     }
1357     return 0;
1358 }
1359 #endif
1360 
1361 /* Return true if OP may appear in the opcode stream.
1362    Test the runtime variable that controls each opcode.  */
1363 bool tcg_op_supported(TCGOpcode op)
1364 {
1365     const bool have_vec
1366         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1367 
1368     switch (op) {
1369     case INDEX_op_discard:
1370     case INDEX_op_set_label:
1371     case INDEX_op_call:
1372     case INDEX_op_br:
1373     case INDEX_op_mb:
1374     case INDEX_op_insn_start:
1375     case INDEX_op_exit_tb:
1376     case INDEX_op_goto_tb:
1377     case INDEX_op_qemu_ld_i32:
1378     case INDEX_op_qemu_st_i32:
1379     case INDEX_op_qemu_ld_i64:
1380     case INDEX_op_qemu_st_i64:
1381         return true;
1382 
1383     case INDEX_op_goto_ptr:
1384         return TCG_TARGET_HAS_goto_ptr;
1385 
1386     case INDEX_op_mov_i32:
1387     case INDEX_op_movi_i32:
1388     case INDEX_op_setcond_i32:
1389     case INDEX_op_brcond_i32:
1390     case INDEX_op_ld8u_i32:
1391     case INDEX_op_ld8s_i32:
1392     case INDEX_op_ld16u_i32:
1393     case INDEX_op_ld16s_i32:
1394     case INDEX_op_ld_i32:
1395     case INDEX_op_st8_i32:
1396     case INDEX_op_st16_i32:
1397     case INDEX_op_st_i32:
1398     case INDEX_op_add_i32:
1399     case INDEX_op_sub_i32:
1400     case INDEX_op_mul_i32:
1401     case INDEX_op_and_i32:
1402     case INDEX_op_or_i32:
1403     case INDEX_op_xor_i32:
1404     case INDEX_op_shl_i32:
1405     case INDEX_op_shr_i32:
1406     case INDEX_op_sar_i32:
1407         return true;
1408 
1409     case INDEX_op_movcond_i32:
1410         return TCG_TARGET_HAS_movcond_i32;
1411     case INDEX_op_div_i32:
1412     case INDEX_op_divu_i32:
1413         return TCG_TARGET_HAS_div_i32;
1414     case INDEX_op_rem_i32:
1415     case INDEX_op_remu_i32:
1416         return TCG_TARGET_HAS_rem_i32;
1417     case INDEX_op_div2_i32:
1418     case INDEX_op_divu2_i32:
1419         return TCG_TARGET_HAS_div2_i32;
1420     case INDEX_op_rotl_i32:
1421     case INDEX_op_rotr_i32:
1422         return TCG_TARGET_HAS_rot_i32;
1423     case INDEX_op_deposit_i32:
1424         return TCG_TARGET_HAS_deposit_i32;
1425     case INDEX_op_extract_i32:
1426         return TCG_TARGET_HAS_extract_i32;
1427     case INDEX_op_sextract_i32:
1428         return TCG_TARGET_HAS_sextract_i32;
1429     case INDEX_op_add2_i32:
1430         return TCG_TARGET_HAS_add2_i32;
1431     case INDEX_op_sub2_i32:
1432         return TCG_TARGET_HAS_sub2_i32;
1433     case INDEX_op_mulu2_i32:
1434         return TCG_TARGET_HAS_mulu2_i32;
1435     case INDEX_op_muls2_i32:
1436         return TCG_TARGET_HAS_muls2_i32;
1437     case INDEX_op_muluh_i32:
1438         return TCG_TARGET_HAS_muluh_i32;
1439     case INDEX_op_mulsh_i32:
1440         return TCG_TARGET_HAS_mulsh_i32;
1441     case INDEX_op_ext8s_i32:
1442         return TCG_TARGET_HAS_ext8s_i32;
1443     case INDEX_op_ext16s_i32:
1444         return TCG_TARGET_HAS_ext16s_i32;
1445     case INDEX_op_ext8u_i32:
1446         return TCG_TARGET_HAS_ext8u_i32;
1447     case INDEX_op_ext16u_i32:
1448         return TCG_TARGET_HAS_ext16u_i32;
1449     case INDEX_op_bswap16_i32:
1450         return TCG_TARGET_HAS_bswap16_i32;
1451     case INDEX_op_bswap32_i32:
1452         return TCG_TARGET_HAS_bswap32_i32;
1453     case INDEX_op_not_i32:
1454         return TCG_TARGET_HAS_not_i32;
1455     case INDEX_op_neg_i32:
1456         return TCG_TARGET_HAS_neg_i32;
1457     case INDEX_op_andc_i32:
1458         return TCG_TARGET_HAS_andc_i32;
1459     case INDEX_op_orc_i32:
1460         return TCG_TARGET_HAS_orc_i32;
1461     case INDEX_op_eqv_i32:
1462         return TCG_TARGET_HAS_eqv_i32;
1463     case INDEX_op_nand_i32:
1464         return TCG_TARGET_HAS_nand_i32;
1465     case INDEX_op_nor_i32:
1466         return TCG_TARGET_HAS_nor_i32;
1467     case INDEX_op_clz_i32:
1468         return TCG_TARGET_HAS_clz_i32;
1469     case INDEX_op_ctz_i32:
1470         return TCG_TARGET_HAS_ctz_i32;
1471     case INDEX_op_ctpop_i32:
1472         return TCG_TARGET_HAS_ctpop_i32;
1473 
1474     case INDEX_op_brcond2_i32:
1475     case INDEX_op_setcond2_i32:
1476         return TCG_TARGET_REG_BITS == 32;
1477 
1478     case INDEX_op_mov_i64:
1479     case INDEX_op_movi_i64:
1480     case INDEX_op_setcond_i64:
1481     case INDEX_op_brcond_i64:
1482     case INDEX_op_ld8u_i64:
1483     case INDEX_op_ld8s_i64:
1484     case INDEX_op_ld16u_i64:
1485     case INDEX_op_ld16s_i64:
1486     case INDEX_op_ld32u_i64:
1487     case INDEX_op_ld32s_i64:
1488     case INDEX_op_ld_i64:
1489     case INDEX_op_st8_i64:
1490     case INDEX_op_st16_i64:
1491     case INDEX_op_st32_i64:
1492     case INDEX_op_st_i64:
1493     case INDEX_op_add_i64:
1494     case INDEX_op_sub_i64:
1495     case INDEX_op_mul_i64:
1496     case INDEX_op_and_i64:
1497     case INDEX_op_or_i64:
1498     case INDEX_op_xor_i64:
1499     case INDEX_op_shl_i64:
1500     case INDEX_op_shr_i64:
1501     case INDEX_op_sar_i64:
1502     case INDEX_op_ext_i32_i64:
1503     case INDEX_op_extu_i32_i64:
1504         return TCG_TARGET_REG_BITS == 64;
1505 
1506     case INDEX_op_movcond_i64:
1507         return TCG_TARGET_HAS_movcond_i64;
1508     case INDEX_op_div_i64:
1509     case INDEX_op_divu_i64:
1510         return TCG_TARGET_HAS_div_i64;
1511     case INDEX_op_rem_i64:
1512     case INDEX_op_remu_i64:
1513         return TCG_TARGET_HAS_rem_i64;
1514     case INDEX_op_div2_i64:
1515     case INDEX_op_divu2_i64:
1516         return TCG_TARGET_HAS_div2_i64;
1517     case INDEX_op_rotl_i64:
1518     case INDEX_op_rotr_i64:
1519         return TCG_TARGET_HAS_rot_i64;
1520     case INDEX_op_deposit_i64:
1521         return TCG_TARGET_HAS_deposit_i64;
1522     case INDEX_op_extract_i64:
1523         return TCG_TARGET_HAS_extract_i64;
1524     case INDEX_op_sextract_i64:
1525         return TCG_TARGET_HAS_sextract_i64;
1526     case INDEX_op_extrl_i64_i32:
1527         return TCG_TARGET_HAS_extrl_i64_i32;
1528     case INDEX_op_extrh_i64_i32:
1529         return TCG_TARGET_HAS_extrh_i64_i32;
1530     case INDEX_op_ext8s_i64:
1531         return TCG_TARGET_HAS_ext8s_i64;
1532     case INDEX_op_ext16s_i64:
1533         return TCG_TARGET_HAS_ext16s_i64;
1534     case INDEX_op_ext32s_i64:
1535         return TCG_TARGET_HAS_ext32s_i64;
1536     case INDEX_op_ext8u_i64:
1537         return TCG_TARGET_HAS_ext8u_i64;
1538     case INDEX_op_ext16u_i64:
1539         return TCG_TARGET_HAS_ext16u_i64;
1540     case INDEX_op_ext32u_i64:
1541         return TCG_TARGET_HAS_ext32u_i64;
1542     case INDEX_op_bswap16_i64:
1543         return TCG_TARGET_HAS_bswap16_i64;
1544     case INDEX_op_bswap32_i64:
1545         return TCG_TARGET_HAS_bswap32_i64;
1546     case INDEX_op_bswap64_i64:
1547         return TCG_TARGET_HAS_bswap64_i64;
1548     case INDEX_op_not_i64:
1549         return TCG_TARGET_HAS_not_i64;
1550     case INDEX_op_neg_i64:
1551         return TCG_TARGET_HAS_neg_i64;
1552     case INDEX_op_andc_i64:
1553         return TCG_TARGET_HAS_andc_i64;
1554     case INDEX_op_orc_i64:
1555         return TCG_TARGET_HAS_orc_i64;
1556     case INDEX_op_eqv_i64:
1557         return TCG_TARGET_HAS_eqv_i64;
1558     case INDEX_op_nand_i64:
1559         return TCG_TARGET_HAS_nand_i64;
1560     case INDEX_op_nor_i64:
1561         return TCG_TARGET_HAS_nor_i64;
1562     case INDEX_op_clz_i64:
1563         return TCG_TARGET_HAS_clz_i64;
1564     case INDEX_op_ctz_i64:
1565         return TCG_TARGET_HAS_ctz_i64;
1566     case INDEX_op_ctpop_i64:
1567         return TCG_TARGET_HAS_ctpop_i64;
1568     case INDEX_op_add2_i64:
1569         return TCG_TARGET_HAS_add2_i64;
1570     case INDEX_op_sub2_i64:
1571         return TCG_TARGET_HAS_sub2_i64;
1572     case INDEX_op_mulu2_i64:
1573         return TCG_TARGET_HAS_mulu2_i64;
1574     case INDEX_op_muls2_i64:
1575         return TCG_TARGET_HAS_muls2_i64;
1576     case INDEX_op_muluh_i64:
1577         return TCG_TARGET_HAS_muluh_i64;
1578     case INDEX_op_mulsh_i64:
1579         return TCG_TARGET_HAS_mulsh_i64;
1580 
1581     case INDEX_op_mov_vec:
1582     case INDEX_op_dup_vec:
1583     case INDEX_op_dupi_vec:
1584     case INDEX_op_ld_vec:
1585     case INDEX_op_st_vec:
1586     case INDEX_op_add_vec:
1587     case INDEX_op_sub_vec:
1588     case INDEX_op_and_vec:
1589     case INDEX_op_or_vec:
1590     case INDEX_op_xor_vec:
1591     case INDEX_op_cmp_vec:
1592         return have_vec;
1593     case INDEX_op_dup2_vec:
1594         return have_vec && TCG_TARGET_REG_BITS == 32;
1595     case INDEX_op_not_vec:
1596         return have_vec && TCG_TARGET_HAS_not_vec;
1597     case INDEX_op_neg_vec:
1598         return have_vec && TCG_TARGET_HAS_neg_vec;
1599     case INDEX_op_andc_vec:
1600         return have_vec && TCG_TARGET_HAS_andc_vec;
1601     case INDEX_op_orc_vec:
1602         return have_vec && TCG_TARGET_HAS_orc_vec;
1603     case INDEX_op_mul_vec:
1604         return have_vec && TCG_TARGET_HAS_mul_vec;
1605     case INDEX_op_shli_vec:
1606     case INDEX_op_shri_vec:
1607     case INDEX_op_sari_vec:
1608         return have_vec && TCG_TARGET_HAS_shi_vec;
1609     case INDEX_op_shls_vec:
1610     case INDEX_op_shrs_vec:
1611     case INDEX_op_sars_vec:
1612         return have_vec && TCG_TARGET_HAS_shs_vec;
1613     case INDEX_op_shlv_vec:
1614     case INDEX_op_shrv_vec:
1615     case INDEX_op_sarv_vec:
1616         return have_vec && TCG_TARGET_HAS_shv_vec;
1617     case INDEX_op_ssadd_vec:
1618     case INDEX_op_usadd_vec:
1619     case INDEX_op_sssub_vec:
1620     case INDEX_op_ussub_vec:
1621         return have_vec && TCG_TARGET_HAS_sat_vec;
1622     case INDEX_op_smin_vec:
1623     case INDEX_op_umin_vec:
1624     case INDEX_op_smax_vec:
1625     case INDEX_op_umax_vec:
1626         return have_vec && TCG_TARGET_HAS_minmax_vec;
1627 
1628     default:
1629         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1630         return true;
1631     }
1632 }
1633 
1634 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1635    and endian swap. Maybe it would be better to do the alignment
1636    and endian swap in tcg_reg_alloc_call(). */
1637 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1638 {
1639     int i, real_args, nb_rets, pi;
1640     unsigned sizemask, flags;
1641     TCGHelperInfo *info;
1642     TCGOp *op;
1643 
1644     info = g_hash_table_lookup(helper_table, (gpointer)func);
1645     flags = info->flags;
1646     sizemask = info->sizemask;
1647 
1648 #if defined(__sparc__) && !defined(__arch64__) \
1649     && !defined(CONFIG_TCG_INTERPRETER)
1650     /* We have 64-bit values in one register, but need to pass as two
1651        separate parameters.  Split them.  */
1652     int orig_sizemask = sizemask;
1653     int orig_nargs = nargs;
1654     TCGv_i64 retl, reth;
1655     TCGTemp *split_args[MAX_OPC_PARAM];
1656 
1657     retl = NULL;
1658     reth = NULL;
1659     if (sizemask != 0) {
1660         for (i = real_args = 0; i < nargs; ++i) {
1661             int is_64bit = sizemask & (1 << (i+1)*2);
1662             if (is_64bit) {
1663                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1664                 TCGv_i32 h = tcg_temp_new_i32();
1665                 TCGv_i32 l = tcg_temp_new_i32();
1666                 tcg_gen_extr_i64_i32(l, h, orig);
1667                 split_args[real_args++] = tcgv_i32_temp(h);
1668                 split_args[real_args++] = tcgv_i32_temp(l);
1669             } else {
1670                 split_args[real_args++] = args[i];
1671             }
1672         }
1673         nargs = real_args;
1674         args = split_args;
1675         sizemask = 0;
1676     }
1677 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1678     for (i = 0; i < nargs; ++i) {
1679         int is_64bit = sizemask & (1 << (i+1)*2);
1680         int is_signed = sizemask & (2 << (i+1)*2);
1681         if (!is_64bit) {
1682             TCGv_i64 temp = tcg_temp_new_i64();
1683             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1684             if (is_signed) {
1685                 tcg_gen_ext32s_i64(temp, orig);
1686             } else {
1687                 tcg_gen_ext32u_i64(temp, orig);
1688             }
1689             args[i] = tcgv_i64_temp(temp);
1690         }
1691     }
1692 #endif /* TCG_TARGET_EXTEND_ARGS */
1693 
1694     op = tcg_emit_op(INDEX_op_call);
1695 
1696     pi = 0;
1697     if (ret != NULL) {
1698 #if defined(__sparc__) && !defined(__arch64__) \
1699     && !defined(CONFIG_TCG_INTERPRETER)
1700         if (orig_sizemask & 1) {
1701             /* The 32-bit ABI is going to return the 64-bit value in
1702                the %o0/%o1 register pair.  Prepare for this by using
1703                two return temporaries, and reassemble below.  */
1704             retl = tcg_temp_new_i64();
1705             reth = tcg_temp_new_i64();
1706             op->args[pi++] = tcgv_i64_arg(reth);
1707             op->args[pi++] = tcgv_i64_arg(retl);
1708             nb_rets = 2;
1709         } else {
1710             op->args[pi++] = temp_arg(ret);
1711             nb_rets = 1;
1712         }
1713 #else
1714         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1715 #ifdef HOST_WORDS_BIGENDIAN
1716             op->args[pi++] = temp_arg(ret + 1);
1717             op->args[pi++] = temp_arg(ret);
1718 #else
1719             op->args[pi++] = temp_arg(ret);
1720             op->args[pi++] = temp_arg(ret + 1);
1721 #endif
1722             nb_rets = 2;
1723         } else {
1724             op->args[pi++] = temp_arg(ret);
1725             nb_rets = 1;
1726         }
1727 #endif
1728     } else {
1729         nb_rets = 0;
1730     }
1731     TCGOP_CALLO(op) = nb_rets;
1732 
1733     real_args = 0;
1734     for (i = 0; i < nargs; i++) {
1735         int is_64bit = sizemask & (1 << (i+1)*2);
1736         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1737 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1738             /* some targets want aligned 64 bit args */
1739             if (real_args & 1) {
1740                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1741                 real_args++;
1742             }
1743 #endif
1744            /* If stack grows up, then we will be placing successive
1745               arguments at lower addresses, which means we need to
1746               reverse the order compared to how we would normally
1747               treat either big or little-endian.  For those arguments
1748               that will wind up in registers, this still works for
1749               HPPA (the only current STACK_GROWSUP target) since the
1750               argument registers are *also* allocated in decreasing
1751               order.  If another such target is added, this logic may
1752               have to get more complicated to differentiate between
1753               stack arguments and register arguments.  */
1754 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1755             op->args[pi++] = temp_arg(args[i] + 1);
1756             op->args[pi++] = temp_arg(args[i]);
1757 #else
1758             op->args[pi++] = temp_arg(args[i]);
1759             op->args[pi++] = temp_arg(args[i] + 1);
1760 #endif
1761             real_args += 2;
1762             continue;
1763         }
1764 
1765         op->args[pi++] = temp_arg(args[i]);
1766         real_args++;
1767     }
1768     op->args[pi++] = (uintptr_t)func;
1769     op->args[pi++] = flags;
1770     TCGOP_CALLI(op) = real_args;
1771 
1772     /* Make sure the fields didn't overflow.  */
1773     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1774     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1775 
1776 #if defined(__sparc__) && !defined(__arch64__) \
1777     && !defined(CONFIG_TCG_INTERPRETER)
1778     /* Free all of the parts we allocated above.  */
1779     for (i = real_args = 0; i < orig_nargs; ++i) {
1780         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1781         if (is_64bit) {
1782             tcg_temp_free_internal(args[real_args++]);
1783             tcg_temp_free_internal(args[real_args++]);
1784         } else {
1785             real_args++;
1786         }
1787     }
1788     if (orig_sizemask & 1) {
1789         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1790            Note that describing these as TCGv_i64 eliminates an unnecessary
1791            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1792         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1793         tcg_temp_free_i64(retl);
1794         tcg_temp_free_i64(reth);
1795     }
1796 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1797     for (i = 0; i < nargs; ++i) {
1798         int is_64bit = sizemask & (1 << (i+1)*2);
1799         if (!is_64bit) {
1800             tcg_temp_free_internal(args[i]);
1801         }
1802     }
1803 #endif /* TCG_TARGET_EXTEND_ARGS */
1804 }
1805 
1806 static void tcg_reg_alloc_start(TCGContext *s)
1807 {
1808     int i, n;
1809     TCGTemp *ts;
1810 
1811     for (i = 0, n = s->nb_globals; i < n; i++) {
1812         ts = &s->temps[i];
1813         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1814     }
1815     for (n = s->nb_temps; i < n; i++) {
1816         ts = &s->temps[i];
1817         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1818         ts->mem_allocated = 0;
1819         ts->fixed_reg = 0;
1820     }
1821 
1822     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1823 }
1824 
1825 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1826                                  TCGTemp *ts)
1827 {
1828     int idx = temp_idx(ts);
1829 
1830     if (ts->temp_global) {
1831         pstrcpy(buf, buf_size, ts->name);
1832     } else if (ts->temp_local) {
1833         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1834     } else {
1835         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1836     }
1837     return buf;
1838 }
1839 
1840 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1841                              int buf_size, TCGArg arg)
1842 {
1843     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1844 }
1845 
1846 /* Find helper name.  */
1847 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1848 {
1849     const char *ret = NULL;
1850     if (helper_table) {
1851         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1852         if (info) {
1853             ret = info->name;
1854         }
1855     }
1856     return ret;
1857 }
1858 
1859 static const char * const cond_name[] =
1860 {
1861     [TCG_COND_NEVER] = "never",
1862     [TCG_COND_ALWAYS] = "always",
1863     [TCG_COND_EQ] = "eq",
1864     [TCG_COND_NE] = "ne",
1865     [TCG_COND_LT] = "lt",
1866     [TCG_COND_GE] = "ge",
1867     [TCG_COND_LE] = "le",
1868     [TCG_COND_GT] = "gt",
1869     [TCG_COND_LTU] = "ltu",
1870     [TCG_COND_GEU] = "geu",
1871     [TCG_COND_LEU] = "leu",
1872     [TCG_COND_GTU] = "gtu"
1873 };
1874 
1875 static const char * const ldst_name[] =
1876 {
1877     [MO_UB]   = "ub",
1878     [MO_SB]   = "sb",
1879     [MO_LEUW] = "leuw",
1880     [MO_LESW] = "lesw",
1881     [MO_LEUL] = "leul",
1882     [MO_LESL] = "lesl",
1883     [MO_LEQ]  = "leq",
1884     [MO_BEUW] = "beuw",
1885     [MO_BESW] = "besw",
1886     [MO_BEUL] = "beul",
1887     [MO_BESL] = "besl",
1888     [MO_BEQ]  = "beq",
1889 };
1890 
1891 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1892 #ifdef ALIGNED_ONLY
1893     [MO_UNALN >> MO_ASHIFT]    = "un+",
1894     [MO_ALIGN >> MO_ASHIFT]    = "",
1895 #else
1896     [MO_UNALN >> MO_ASHIFT]    = "",
1897     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1898 #endif
1899     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1900     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1901     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1902     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1903     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1904     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1905 };
1906 
1907 static inline bool tcg_regset_single(TCGRegSet d)
1908 {
1909     return (d & (d - 1)) == 0;
1910 }
1911 
1912 static inline TCGReg tcg_regset_first(TCGRegSet d)
1913 {
1914     if (TCG_TARGET_NB_REGS <= 32) {
1915         return ctz32(d);
1916     } else {
1917         return ctz64(d);
1918     }
1919 }
1920 
1921 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1922 {
1923     char buf[128];
1924     TCGOp *op;
1925 
1926     QTAILQ_FOREACH(op, &s->ops, link) {
1927         int i, k, nb_oargs, nb_iargs, nb_cargs;
1928         const TCGOpDef *def;
1929         TCGOpcode c;
1930         int col = 0;
1931 
1932         c = op->opc;
1933         def = &tcg_op_defs[c];
1934 
1935         if (c == INDEX_op_insn_start) {
1936             nb_oargs = 0;
1937             col += qemu_log("\n ----");
1938 
1939             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1940                 target_ulong a;
1941 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1942                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1943 #else
1944                 a = op->args[i];
1945 #endif
1946                 col += qemu_log(" " TARGET_FMT_lx, a);
1947             }
1948         } else if (c == INDEX_op_call) {
1949             /* variable number of arguments */
1950             nb_oargs = TCGOP_CALLO(op);
1951             nb_iargs = TCGOP_CALLI(op);
1952             nb_cargs = def->nb_cargs;
1953 
1954             /* function name, flags, out args */
1955             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1956                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1957                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1958             for (i = 0; i < nb_oargs; i++) {
1959                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1960                                                        op->args[i]));
1961             }
1962             for (i = 0; i < nb_iargs; i++) {
1963                 TCGArg arg = op->args[nb_oargs + i];
1964                 const char *t = "<dummy>";
1965                 if (arg != TCG_CALL_DUMMY_ARG) {
1966                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1967                 }
1968                 col += qemu_log(",%s", t);
1969             }
1970         } else {
1971             col += qemu_log(" %s ", def->name);
1972 
1973             nb_oargs = def->nb_oargs;
1974             nb_iargs = def->nb_iargs;
1975             nb_cargs = def->nb_cargs;
1976 
1977             if (def->flags & TCG_OPF_VECTOR) {
1978                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1979                                 8 << TCGOP_VECE(op));
1980             }
1981 
1982             k = 0;
1983             for (i = 0; i < nb_oargs; i++) {
1984                 if (k != 0) {
1985                     col += qemu_log(",");
1986                 }
1987                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1988                                                       op->args[k++]));
1989             }
1990             for (i = 0; i < nb_iargs; i++) {
1991                 if (k != 0) {
1992                     col += qemu_log(",");
1993                 }
1994                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1995                                                       op->args[k++]));
1996             }
1997             switch (c) {
1998             case INDEX_op_brcond_i32:
1999             case INDEX_op_setcond_i32:
2000             case INDEX_op_movcond_i32:
2001             case INDEX_op_brcond2_i32:
2002             case INDEX_op_setcond2_i32:
2003             case INDEX_op_brcond_i64:
2004             case INDEX_op_setcond_i64:
2005             case INDEX_op_movcond_i64:
2006             case INDEX_op_cmp_vec:
2007                 if (op->args[k] < ARRAY_SIZE(cond_name)
2008                     && cond_name[op->args[k]]) {
2009                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2010                 } else {
2011                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2012                 }
2013                 i = 1;
2014                 break;
2015             case INDEX_op_qemu_ld_i32:
2016             case INDEX_op_qemu_st_i32:
2017             case INDEX_op_qemu_ld_i64:
2018             case INDEX_op_qemu_st_i64:
2019                 {
2020                     TCGMemOpIdx oi = op->args[k++];
2021                     TCGMemOp op = get_memop(oi);
2022                     unsigned ix = get_mmuidx(oi);
2023 
2024                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2025                         col += qemu_log(",$0x%x,%u", op, ix);
2026                     } else {
2027                         const char *s_al, *s_op;
2028                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2029                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2030                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2031                     }
2032                     i = 1;
2033                 }
2034                 break;
2035             default:
2036                 i = 0;
2037                 break;
2038             }
2039             switch (c) {
2040             case INDEX_op_set_label:
2041             case INDEX_op_br:
2042             case INDEX_op_brcond_i32:
2043             case INDEX_op_brcond_i64:
2044             case INDEX_op_brcond2_i32:
2045                 col += qemu_log("%s$L%d", k ? "," : "",
2046                                 arg_label(op->args[k])->id);
2047                 i++, k++;
2048                 break;
2049             default:
2050                 break;
2051             }
2052             for (; i < nb_cargs; i++, k++) {
2053                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2054             }
2055         }
2056 
2057         if (have_prefs || op->life) {
2058             for (; col < 40; ++col) {
2059                 putc(' ', qemu_logfile);
2060             }
2061         }
2062 
2063         if (op->life) {
2064             unsigned life = op->life;
2065 
2066             if (life & (SYNC_ARG * 3)) {
2067                 qemu_log("  sync:");
2068                 for (i = 0; i < 2; ++i) {
2069                     if (life & (SYNC_ARG << i)) {
2070                         qemu_log(" %d", i);
2071                     }
2072                 }
2073             }
2074             life /= DEAD_ARG;
2075             if (life) {
2076                 qemu_log("  dead:");
2077                 for (i = 0; life; ++i, life >>= 1) {
2078                     if (life & 1) {
2079                         qemu_log(" %d", i);
2080                     }
2081                 }
2082             }
2083         }
2084 
2085         if (have_prefs) {
2086             for (i = 0; i < nb_oargs; ++i) {
2087                 TCGRegSet set = op->output_pref[i];
2088 
2089                 if (i == 0) {
2090                     qemu_log("  pref=");
2091                 } else {
2092                     qemu_log(",");
2093                 }
2094                 if (set == 0) {
2095                     qemu_log("none");
2096                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2097                     qemu_log("all");
2098 #ifdef CONFIG_DEBUG_TCG
2099                 } else if (tcg_regset_single(set)) {
2100                     TCGReg reg = tcg_regset_first(set);
2101                     qemu_log("%s", tcg_target_reg_names[reg]);
2102 #endif
2103                 } else if (TCG_TARGET_NB_REGS <= 32) {
2104                     qemu_log("%#x", (uint32_t)set);
2105                 } else {
2106                     qemu_log("%#" PRIx64, (uint64_t)set);
2107                 }
2108             }
2109         }
2110 
2111         qemu_log("\n");
2112     }
2113 }
2114 
2115 /* we give more priority to constraints with less registers */
2116 static int get_constraint_priority(const TCGOpDef *def, int k)
2117 {
2118     const TCGArgConstraint *arg_ct;
2119 
2120     int i, n;
2121     arg_ct = &def->args_ct[k];
2122     if (arg_ct->ct & TCG_CT_ALIAS) {
2123         /* an alias is equivalent to a single register */
2124         n = 1;
2125     } else {
2126         if (!(arg_ct->ct & TCG_CT_REG))
2127             return 0;
2128         n = 0;
2129         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2130             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2131                 n++;
2132         }
2133     }
2134     return TCG_TARGET_NB_REGS - n + 1;
2135 }
2136 
2137 /* sort from highest priority to lowest */
2138 static void sort_constraints(TCGOpDef *def, int start, int n)
2139 {
2140     int i, j, p1, p2, tmp;
2141 
2142     for(i = 0; i < n; i++)
2143         def->sorted_args[start + i] = start + i;
2144     if (n <= 1)
2145         return;
2146     for(i = 0; i < n - 1; i++) {
2147         for(j = i + 1; j < n; j++) {
2148             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2149             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2150             if (p1 < p2) {
2151                 tmp = def->sorted_args[start + i];
2152                 def->sorted_args[start + i] = def->sorted_args[start + j];
2153                 def->sorted_args[start + j] = tmp;
2154             }
2155         }
2156     }
2157 }
2158 
2159 static void process_op_defs(TCGContext *s)
2160 {
2161     TCGOpcode op;
2162 
2163     for (op = 0; op < NB_OPS; op++) {
2164         TCGOpDef *def = &tcg_op_defs[op];
2165         const TCGTargetOpDef *tdefs;
2166         TCGType type;
2167         int i, nb_args;
2168 
2169         if (def->flags & TCG_OPF_NOT_PRESENT) {
2170             continue;
2171         }
2172 
2173         nb_args = def->nb_iargs + def->nb_oargs;
2174         if (nb_args == 0) {
2175             continue;
2176         }
2177 
2178         tdefs = tcg_target_op_def(op);
2179         /* Missing TCGTargetOpDef entry. */
2180         tcg_debug_assert(tdefs != NULL);
2181 
2182         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2183         for (i = 0; i < nb_args; i++) {
2184             const char *ct_str = tdefs->args_ct_str[i];
2185             /* Incomplete TCGTargetOpDef entry. */
2186             tcg_debug_assert(ct_str != NULL);
2187 
2188             def->args_ct[i].u.regs = 0;
2189             def->args_ct[i].ct = 0;
2190             while (*ct_str != '\0') {
2191                 switch(*ct_str) {
2192                 case '0' ... '9':
2193                     {
2194                         int oarg = *ct_str - '0';
2195                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2196                         tcg_debug_assert(oarg < def->nb_oargs);
2197                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2198                         /* TCG_CT_ALIAS is for the output arguments.
2199                            The input is tagged with TCG_CT_IALIAS. */
2200                         def->args_ct[i] = def->args_ct[oarg];
2201                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2202                         def->args_ct[oarg].alias_index = i;
2203                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2204                         def->args_ct[i].alias_index = oarg;
2205                     }
2206                     ct_str++;
2207                     break;
2208                 case '&':
2209                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2210                     ct_str++;
2211                     break;
2212                 case 'i':
2213                     def->args_ct[i].ct |= TCG_CT_CONST;
2214                     ct_str++;
2215                     break;
2216                 default:
2217                     ct_str = target_parse_constraint(&def->args_ct[i],
2218                                                      ct_str, type);
2219                     /* Typo in TCGTargetOpDef constraint. */
2220                     tcg_debug_assert(ct_str != NULL);
2221                 }
2222             }
2223         }
2224 
2225         /* TCGTargetOpDef entry with too much information? */
2226         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2227 
2228         /* sort the constraints (XXX: this is just an heuristic) */
2229         sort_constraints(def, 0, def->nb_oargs);
2230         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2231     }
2232 }
2233 
2234 void tcg_op_remove(TCGContext *s, TCGOp *op)
2235 {
2236     TCGLabel *label;
2237 
2238     switch (op->opc) {
2239     case INDEX_op_br:
2240         label = arg_label(op->args[0]);
2241         label->refs--;
2242         break;
2243     case INDEX_op_brcond_i32:
2244     case INDEX_op_brcond_i64:
2245         label = arg_label(op->args[3]);
2246         label->refs--;
2247         break;
2248     case INDEX_op_brcond2_i32:
2249         label = arg_label(op->args[5]);
2250         label->refs--;
2251         break;
2252     default:
2253         break;
2254     }
2255 
2256     QTAILQ_REMOVE(&s->ops, op, link);
2257     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2258     s->nb_ops--;
2259 
2260 #ifdef CONFIG_PROFILER
2261     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2262 #endif
2263 }
2264 
2265 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2266 {
2267     TCGContext *s = tcg_ctx;
2268     TCGOp *op;
2269 
2270     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2271         op = tcg_malloc(sizeof(TCGOp));
2272     } else {
2273         op = QTAILQ_FIRST(&s->free_ops);
2274         QTAILQ_REMOVE(&s->free_ops, op, link);
2275     }
2276     memset(op, 0, offsetof(TCGOp, link));
2277     op->opc = opc;
2278     s->nb_ops++;
2279 
2280     return op;
2281 }
2282 
2283 TCGOp *tcg_emit_op(TCGOpcode opc)
2284 {
2285     TCGOp *op = tcg_op_alloc(opc);
2286     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2287     return op;
2288 }
2289 
2290 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2291 {
2292     TCGOp *new_op = tcg_op_alloc(opc);
2293     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2294     return new_op;
2295 }
2296 
2297 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2298 {
2299     TCGOp *new_op = tcg_op_alloc(opc);
2300     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2301     return new_op;
2302 }
2303 
2304 /* Reachable analysis : remove unreachable code.  */
2305 static void reachable_code_pass(TCGContext *s)
2306 {
2307     TCGOp *op, *op_next;
2308     bool dead = false;
2309 
2310     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2311         bool remove = dead;
2312         TCGLabel *label;
2313         int call_flags;
2314 
2315         switch (op->opc) {
2316         case INDEX_op_set_label:
2317             label = arg_label(op->args[0]);
2318             if (label->refs == 0) {
2319                 /*
2320                  * While there is an occasional backward branch, virtually
2321                  * all branches generated by the translators are forward.
2322                  * Which means that generally we will have already removed
2323                  * all references to the label that will be, and there is
2324                  * little to be gained by iterating.
2325                  */
2326                 remove = true;
2327             } else {
2328                 /* Once we see a label, insns become live again.  */
2329                 dead = false;
2330                 remove = false;
2331 
2332                 /*
2333                  * Optimization can fold conditional branches to unconditional.
2334                  * If we find a label with one reference which is preceded by
2335                  * an unconditional branch to it, remove both.  This needed to
2336                  * wait until the dead code in between them was removed.
2337                  */
2338                 if (label->refs == 1) {
2339                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2340                     if (op_prev->opc == INDEX_op_br &&
2341                         label == arg_label(op_prev->args[0])) {
2342                         tcg_op_remove(s, op_prev);
2343                         remove = true;
2344                     }
2345                 }
2346             }
2347             break;
2348 
2349         case INDEX_op_br:
2350         case INDEX_op_exit_tb:
2351         case INDEX_op_goto_ptr:
2352             /* Unconditional branches; everything following is dead.  */
2353             dead = true;
2354             break;
2355 
2356         case INDEX_op_call:
2357             /* Notice noreturn helper calls, raising exceptions.  */
2358             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2359             if (call_flags & TCG_CALL_NO_RETURN) {
2360                 dead = true;
2361             }
2362             break;
2363 
2364         case INDEX_op_insn_start:
2365             /* Never remove -- we need to keep these for unwind.  */
2366             remove = false;
2367             break;
2368 
2369         default:
2370             break;
2371         }
2372 
2373         if (remove) {
2374             tcg_op_remove(s, op);
2375         }
2376     }
2377 }
2378 
2379 #define TS_DEAD  1
2380 #define TS_MEM   2
2381 
2382 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2383 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2384 
2385 /* For liveness_pass_1, the register preferences for a given temp.  */
2386 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2387 {
2388     return ts->state_ptr;
2389 }
2390 
2391 /* For liveness_pass_1, reset the preferences for a given temp to the
2392  * maximal regset for its type.
2393  */
2394 static inline void la_reset_pref(TCGTemp *ts)
2395 {
2396     *la_temp_pref(ts)
2397         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2398 }
2399 
2400 /* liveness analysis: end of function: all temps are dead, and globals
2401    should be in memory. */
2402 static void la_func_end(TCGContext *s, int ng, int nt)
2403 {
2404     int i;
2405 
2406     for (i = 0; i < ng; ++i) {
2407         s->temps[i].state = TS_DEAD | TS_MEM;
2408         la_reset_pref(&s->temps[i]);
2409     }
2410     for (i = ng; i < nt; ++i) {
2411         s->temps[i].state = TS_DEAD;
2412         la_reset_pref(&s->temps[i]);
2413     }
2414 }
2415 
2416 /* liveness analysis: end of basic block: all temps are dead, globals
2417    and local temps should be in memory. */
2418 static void la_bb_end(TCGContext *s, int ng, int nt)
2419 {
2420     int i;
2421 
2422     for (i = 0; i < ng; ++i) {
2423         s->temps[i].state = TS_DEAD | TS_MEM;
2424         la_reset_pref(&s->temps[i]);
2425     }
2426     for (i = ng; i < nt; ++i) {
2427         s->temps[i].state = (s->temps[i].temp_local
2428                              ? TS_DEAD | TS_MEM
2429                              : TS_DEAD);
2430         la_reset_pref(&s->temps[i]);
2431     }
2432 }
2433 
2434 /* liveness analysis: sync globals back to memory.  */
2435 static void la_global_sync(TCGContext *s, int ng)
2436 {
2437     int i;
2438 
2439     for (i = 0; i < ng; ++i) {
2440         int state = s->temps[i].state;
2441         s->temps[i].state = state | TS_MEM;
2442         if (state == TS_DEAD) {
2443             /* If the global was previously dead, reset prefs.  */
2444             la_reset_pref(&s->temps[i]);
2445         }
2446     }
2447 }
2448 
2449 /* liveness analysis: sync globals back to memory and kill.  */
2450 static void la_global_kill(TCGContext *s, int ng)
2451 {
2452     int i;
2453 
2454     for (i = 0; i < ng; i++) {
2455         s->temps[i].state = TS_DEAD | TS_MEM;
2456         la_reset_pref(&s->temps[i]);
2457     }
2458 }
2459 
2460 /* liveness analysis: note live globals crossing calls.  */
2461 static void la_cross_call(TCGContext *s, int nt)
2462 {
2463     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2464     int i;
2465 
2466     for (i = 0; i < nt; i++) {
2467         TCGTemp *ts = &s->temps[i];
2468         if (!(ts->state & TS_DEAD)) {
2469             TCGRegSet *pset = la_temp_pref(ts);
2470             TCGRegSet set = *pset;
2471 
2472             set &= mask;
2473             /* If the combination is not possible, restart.  */
2474             if (set == 0) {
2475                 set = tcg_target_available_regs[ts->type] & mask;
2476             }
2477             *pset = set;
2478         }
2479     }
2480 }
2481 
2482 /* Liveness analysis : update the opc_arg_life array to tell if a
2483    given input arguments is dead. Instructions updating dead
2484    temporaries are removed. */
2485 static void liveness_pass_1(TCGContext *s)
2486 {
2487     int nb_globals = s->nb_globals;
2488     int nb_temps = s->nb_temps;
2489     TCGOp *op, *op_prev;
2490     TCGRegSet *prefs;
2491     int i;
2492 
2493     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2494     for (i = 0; i < nb_temps; ++i) {
2495         s->temps[i].state_ptr = prefs + i;
2496     }
2497 
2498     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2499     la_func_end(s, nb_globals, nb_temps);
2500 
2501     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2502         int nb_iargs, nb_oargs;
2503         TCGOpcode opc_new, opc_new2;
2504         bool have_opc_new2;
2505         TCGLifeData arg_life = 0;
2506         TCGTemp *ts;
2507         TCGOpcode opc = op->opc;
2508         const TCGOpDef *def = &tcg_op_defs[opc];
2509 
2510         switch (opc) {
2511         case INDEX_op_call:
2512             {
2513                 int call_flags;
2514                 int nb_call_regs;
2515 
2516                 nb_oargs = TCGOP_CALLO(op);
2517                 nb_iargs = TCGOP_CALLI(op);
2518                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2519 
2520                 /* pure functions can be removed if their result is unused */
2521                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2522                     for (i = 0; i < nb_oargs; i++) {
2523                         ts = arg_temp(op->args[i]);
2524                         if (ts->state != TS_DEAD) {
2525                             goto do_not_remove_call;
2526                         }
2527                     }
2528                     goto do_remove;
2529                 }
2530             do_not_remove_call:
2531 
2532                 /* Output args are dead.  */
2533                 for (i = 0; i < nb_oargs; i++) {
2534                     ts = arg_temp(op->args[i]);
2535                     if (ts->state & TS_DEAD) {
2536                         arg_life |= DEAD_ARG << i;
2537                     }
2538                     if (ts->state & TS_MEM) {
2539                         arg_life |= SYNC_ARG << i;
2540                     }
2541                     ts->state = TS_DEAD;
2542                     la_reset_pref(ts);
2543 
2544                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2545                     op->output_pref[i] = 0;
2546                 }
2547 
2548                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2549                                     TCG_CALL_NO_READ_GLOBALS))) {
2550                     la_global_kill(s, nb_globals);
2551                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2552                     la_global_sync(s, nb_globals);
2553                 }
2554 
2555                 /* Record arguments that die in this helper.  */
2556                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2557                     ts = arg_temp(op->args[i]);
2558                     if (ts && ts->state & TS_DEAD) {
2559                         arg_life |= DEAD_ARG << i;
2560                     }
2561                 }
2562 
2563                 /* For all live registers, remove call-clobbered prefs.  */
2564                 la_cross_call(s, nb_temps);
2565 
2566                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2567 
2568                 /* Input arguments are live for preceding opcodes.  */
2569                 for (i = 0; i < nb_iargs; i++) {
2570                     ts = arg_temp(op->args[i + nb_oargs]);
2571                     if (ts && ts->state & TS_DEAD) {
2572                         /* For those arguments that die, and will be allocated
2573                          * in registers, clear the register set for that arg,
2574                          * to be filled in below.  For args that will be on
2575                          * the stack, reset to any available reg.
2576                          */
2577                         *la_temp_pref(ts)
2578                             = (i < nb_call_regs ? 0 :
2579                                tcg_target_available_regs[ts->type]);
2580                         ts->state &= ~TS_DEAD;
2581                     }
2582                 }
2583 
2584                 /* For each input argument, add its input register to prefs.
2585                    If a temp is used once, this produces a single set bit.  */
2586                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2587                     ts = arg_temp(op->args[i + nb_oargs]);
2588                     if (ts) {
2589                         tcg_regset_set_reg(*la_temp_pref(ts),
2590                                            tcg_target_call_iarg_regs[i]);
2591                     }
2592                 }
2593             }
2594             break;
2595         case INDEX_op_insn_start:
2596             break;
2597         case INDEX_op_discard:
2598             /* mark the temporary as dead */
2599             ts = arg_temp(op->args[0]);
2600             ts->state = TS_DEAD;
2601             la_reset_pref(ts);
2602             break;
2603 
2604         case INDEX_op_add2_i32:
2605             opc_new = INDEX_op_add_i32;
2606             goto do_addsub2;
2607         case INDEX_op_sub2_i32:
2608             opc_new = INDEX_op_sub_i32;
2609             goto do_addsub2;
2610         case INDEX_op_add2_i64:
2611             opc_new = INDEX_op_add_i64;
2612             goto do_addsub2;
2613         case INDEX_op_sub2_i64:
2614             opc_new = INDEX_op_sub_i64;
2615         do_addsub2:
2616             nb_iargs = 4;
2617             nb_oargs = 2;
2618             /* Test if the high part of the operation is dead, but not
2619                the low part.  The result can be optimized to a simple
2620                add or sub.  This happens often for x86_64 guest when the
2621                cpu mode is set to 32 bit.  */
2622             if (arg_temp(op->args[1])->state == TS_DEAD) {
2623                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2624                     goto do_remove;
2625                 }
2626                 /* Replace the opcode and adjust the args in place,
2627                    leaving 3 unused args at the end.  */
2628                 op->opc = opc = opc_new;
2629                 op->args[1] = op->args[2];
2630                 op->args[2] = op->args[4];
2631                 /* Fall through and mark the single-word operation live.  */
2632                 nb_iargs = 2;
2633                 nb_oargs = 1;
2634             }
2635             goto do_not_remove;
2636 
2637         case INDEX_op_mulu2_i32:
2638             opc_new = INDEX_op_mul_i32;
2639             opc_new2 = INDEX_op_muluh_i32;
2640             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2641             goto do_mul2;
2642         case INDEX_op_muls2_i32:
2643             opc_new = INDEX_op_mul_i32;
2644             opc_new2 = INDEX_op_mulsh_i32;
2645             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2646             goto do_mul2;
2647         case INDEX_op_mulu2_i64:
2648             opc_new = INDEX_op_mul_i64;
2649             opc_new2 = INDEX_op_muluh_i64;
2650             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2651             goto do_mul2;
2652         case INDEX_op_muls2_i64:
2653             opc_new = INDEX_op_mul_i64;
2654             opc_new2 = INDEX_op_mulsh_i64;
2655             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2656             goto do_mul2;
2657         do_mul2:
2658             nb_iargs = 2;
2659             nb_oargs = 2;
2660             if (arg_temp(op->args[1])->state == TS_DEAD) {
2661                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2662                     /* Both parts of the operation are dead.  */
2663                     goto do_remove;
2664                 }
2665                 /* The high part of the operation is dead; generate the low. */
2666                 op->opc = opc = opc_new;
2667                 op->args[1] = op->args[2];
2668                 op->args[2] = op->args[3];
2669             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2670                 /* The low part of the operation is dead; generate the high. */
2671                 op->opc = opc = opc_new2;
2672                 op->args[0] = op->args[1];
2673                 op->args[1] = op->args[2];
2674                 op->args[2] = op->args[3];
2675             } else {
2676                 goto do_not_remove;
2677             }
2678             /* Mark the single-word operation live.  */
2679             nb_oargs = 1;
2680             goto do_not_remove;
2681 
2682         default:
2683             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2684             nb_iargs = def->nb_iargs;
2685             nb_oargs = def->nb_oargs;
2686 
2687             /* Test if the operation can be removed because all
2688                its outputs are dead. We assume that nb_oargs == 0
2689                implies side effects */
2690             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2691                 for (i = 0; i < nb_oargs; i++) {
2692                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2693                         goto do_not_remove;
2694                     }
2695                 }
2696                 goto do_remove;
2697             }
2698             goto do_not_remove;
2699 
2700         do_remove:
2701             tcg_op_remove(s, op);
2702             break;
2703 
2704         do_not_remove:
2705             for (i = 0; i < nb_oargs; i++) {
2706                 ts = arg_temp(op->args[i]);
2707 
2708                 /* Remember the preference of the uses that followed.  */
2709                 op->output_pref[i] = *la_temp_pref(ts);
2710 
2711                 /* Output args are dead.  */
2712                 if (ts->state & TS_DEAD) {
2713                     arg_life |= DEAD_ARG << i;
2714                 }
2715                 if (ts->state & TS_MEM) {
2716                     arg_life |= SYNC_ARG << i;
2717                 }
2718                 ts->state = TS_DEAD;
2719                 la_reset_pref(ts);
2720             }
2721 
2722             /* If end of basic block, update.  */
2723             if (def->flags & TCG_OPF_BB_EXIT) {
2724                 la_func_end(s, nb_globals, nb_temps);
2725             } else if (def->flags & TCG_OPF_BB_END) {
2726                 la_bb_end(s, nb_globals, nb_temps);
2727             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2728                 la_global_sync(s, nb_globals);
2729                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2730                     la_cross_call(s, nb_temps);
2731                 }
2732             }
2733 
2734             /* Record arguments that die in this opcode.  */
2735             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2736                 ts = arg_temp(op->args[i]);
2737                 if (ts->state & TS_DEAD) {
2738                     arg_life |= DEAD_ARG << i;
2739                 }
2740             }
2741 
2742             /* Input arguments are live for preceding opcodes.  */
2743             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2744                 ts = arg_temp(op->args[i]);
2745                 if (ts->state & TS_DEAD) {
2746                     /* For operands that were dead, initially allow
2747                        all regs for the type.  */
2748                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2749                     ts->state &= ~TS_DEAD;
2750                 }
2751             }
2752 
2753             /* Incorporate constraints for this operand.  */
2754             switch (opc) {
2755             case INDEX_op_mov_i32:
2756             case INDEX_op_mov_i64:
2757                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2758                    have proper constraints.  That said, special case
2759                    moves to propagate preferences backward.  */
2760                 if (IS_DEAD_ARG(1)) {
2761                     *la_temp_pref(arg_temp(op->args[0]))
2762                         = *la_temp_pref(arg_temp(op->args[1]));
2763                 }
2764                 break;
2765 
2766             default:
2767                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2768                     const TCGArgConstraint *ct = &def->args_ct[i];
2769                     TCGRegSet set, *pset;
2770 
2771                     ts = arg_temp(op->args[i]);
2772                     pset = la_temp_pref(ts);
2773                     set = *pset;
2774 
2775                     set &= ct->u.regs;
2776                     if (ct->ct & TCG_CT_IALIAS) {
2777                         set &= op->output_pref[ct->alias_index];
2778                     }
2779                     /* If the combination is not possible, restart.  */
2780                     if (set == 0) {
2781                         set = ct->u.regs;
2782                     }
2783                     *pset = set;
2784                 }
2785                 break;
2786             }
2787             break;
2788         }
2789         op->life = arg_life;
2790     }
2791 }
2792 
2793 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2794 static bool liveness_pass_2(TCGContext *s)
2795 {
2796     int nb_globals = s->nb_globals;
2797     int nb_temps, i;
2798     bool changes = false;
2799     TCGOp *op, *op_next;
2800 
2801     /* Create a temporary for each indirect global.  */
2802     for (i = 0; i < nb_globals; ++i) {
2803         TCGTemp *its = &s->temps[i];
2804         if (its->indirect_reg) {
2805             TCGTemp *dts = tcg_temp_alloc(s);
2806             dts->type = its->type;
2807             dts->base_type = its->base_type;
2808             its->state_ptr = dts;
2809         } else {
2810             its->state_ptr = NULL;
2811         }
2812         /* All globals begin dead.  */
2813         its->state = TS_DEAD;
2814     }
2815     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2816         TCGTemp *its = &s->temps[i];
2817         its->state_ptr = NULL;
2818         its->state = TS_DEAD;
2819     }
2820 
2821     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2822         TCGOpcode opc = op->opc;
2823         const TCGOpDef *def = &tcg_op_defs[opc];
2824         TCGLifeData arg_life = op->life;
2825         int nb_iargs, nb_oargs, call_flags;
2826         TCGTemp *arg_ts, *dir_ts;
2827 
2828         if (opc == INDEX_op_call) {
2829             nb_oargs = TCGOP_CALLO(op);
2830             nb_iargs = TCGOP_CALLI(op);
2831             call_flags = op->args[nb_oargs + nb_iargs + 1];
2832         } else {
2833             nb_iargs = def->nb_iargs;
2834             nb_oargs = def->nb_oargs;
2835 
2836             /* Set flags similar to how calls require.  */
2837             if (def->flags & TCG_OPF_BB_END) {
2838                 /* Like writing globals: save_globals */
2839                 call_flags = 0;
2840             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2841                 /* Like reading globals: sync_globals */
2842                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2843             } else {
2844                 /* No effect on globals.  */
2845                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2846                               TCG_CALL_NO_WRITE_GLOBALS);
2847             }
2848         }
2849 
2850         /* Make sure that input arguments are available.  */
2851         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2852             arg_ts = arg_temp(op->args[i]);
2853             if (arg_ts) {
2854                 dir_ts = arg_ts->state_ptr;
2855                 if (dir_ts && arg_ts->state == TS_DEAD) {
2856                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2857                                       ? INDEX_op_ld_i32
2858                                       : INDEX_op_ld_i64);
2859                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2860 
2861                     lop->args[0] = temp_arg(dir_ts);
2862                     lop->args[1] = temp_arg(arg_ts->mem_base);
2863                     lop->args[2] = arg_ts->mem_offset;
2864 
2865                     /* Loaded, but synced with memory.  */
2866                     arg_ts->state = TS_MEM;
2867                 }
2868             }
2869         }
2870 
2871         /* Perform input replacement, and mark inputs that became dead.
2872            No action is required except keeping temp_state up to date
2873            so that we reload when needed.  */
2874         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2875             arg_ts = arg_temp(op->args[i]);
2876             if (arg_ts) {
2877                 dir_ts = arg_ts->state_ptr;
2878                 if (dir_ts) {
2879                     op->args[i] = temp_arg(dir_ts);
2880                     changes = true;
2881                     if (IS_DEAD_ARG(i)) {
2882                         arg_ts->state = TS_DEAD;
2883                     }
2884                 }
2885             }
2886         }
2887 
2888         /* Liveness analysis should ensure that the following are
2889            all correct, for call sites and basic block end points.  */
2890         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2891             /* Nothing to do */
2892         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2893             for (i = 0; i < nb_globals; ++i) {
2894                 /* Liveness should see that globals are synced back,
2895                    that is, either TS_DEAD or TS_MEM.  */
2896                 arg_ts = &s->temps[i];
2897                 tcg_debug_assert(arg_ts->state_ptr == 0
2898                                  || arg_ts->state != 0);
2899             }
2900         } else {
2901             for (i = 0; i < nb_globals; ++i) {
2902                 /* Liveness should see that globals are saved back,
2903                    that is, TS_DEAD, waiting to be reloaded.  */
2904                 arg_ts = &s->temps[i];
2905                 tcg_debug_assert(arg_ts->state_ptr == 0
2906                                  || arg_ts->state == TS_DEAD);
2907             }
2908         }
2909 
2910         /* Outputs become available.  */
2911         for (i = 0; i < nb_oargs; i++) {
2912             arg_ts = arg_temp(op->args[i]);
2913             dir_ts = arg_ts->state_ptr;
2914             if (!dir_ts) {
2915                 continue;
2916             }
2917             op->args[i] = temp_arg(dir_ts);
2918             changes = true;
2919 
2920             /* The output is now live and modified.  */
2921             arg_ts->state = 0;
2922 
2923             /* Sync outputs upon their last write.  */
2924             if (NEED_SYNC_ARG(i)) {
2925                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2926                                   ? INDEX_op_st_i32
2927                                   : INDEX_op_st_i64);
2928                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2929 
2930                 sop->args[0] = temp_arg(dir_ts);
2931                 sop->args[1] = temp_arg(arg_ts->mem_base);
2932                 sop->args[2] = arg_ts->mem_offset;
2933 
2934                 arg_ts->state = TS_MEM;
2935             }
2936             /* Drop outputs that are dead.  */
2937             if (IS_DEAD_ARG(i)) {
2938                 arg_ts->state = TS_DEAD;
2939             }
2940         }
2941     }
2942 
2943     return changes;
2944 }
2945 
2946 #ifdef CONFIG_DEBUG_TCG
2947 static void dump_regs(TCGContext *s)
2948 {
2949     TCGTemp *ts;
2950     int i;
2951     char buf[64];
2952 
2953     for(i = 0; i < s->nb_temps; i++) {
2954         ts = &s->temps[i];
2955         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2956         switch(ts->val_type) {
2957         case TEMP_VAL_REG:
2958             printf("%s", tcg_target_reg_names[ts->reg]);
2959             break;
2960         case TEMP_VAL_MEM:
2961             printf("%d(%s)", (int)ts->mem_offset,
2962                    tcg_target_reg_names[ts->mem_base->reg]);
2963             break;
2964         case TEMP_VAL_CONST:
2965             printf("$0x%" TCG_PRIlx, ts->val);
2966             break;
2967         case TEMP_VAL_DEAD:
2968             printf("D");
2969             break;
2970         default:
2971             printf("???");
2972             break;
2973         }
2974         printf("\n");
2975     }
2976 
2977     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2978         if (s->reg_to_temp[i] != NULL) {
2979             printf("%s: %s\n",
2980                    tcg_target_reg_names[i],
2981                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2982         }
2983     }
2984 }
2985 
2986 static void check_regs(TCGContext *s)
2987 {
2988     int reg;
2989     int k;
2990     TCGTemp *ts;
2991     char buf[64];
2992 
2993     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2994         ts = s->reg_to_temp[reg];
2995         if (ts != NULL) {
2996             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2997                 printf("Inconsistency for register %s:\n",
2998                        tcg_target_reg_names[reg]);
2999                 goto fail;
3000             }
3001         }
3002     }
3003     for (k = 0; k < s->nb_temps; k++) {
3004         ts = &s->temps[k];
3005         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3006             && s->reg_to_temp[ts->reg] != ts) {
3007             printf("Inconsistency for temp %s:\n",
3008                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3009         fail:
3010             printf("reg state:\n");
3011             dump_regs(s);
3012             tcg_abort();
3013         }
3014     }
3015 }
3016 #endif
3017 
3018 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3019 {
3020 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3021     /* Sparc64 stack is accessed with offset of 2047 */
3022     s->current_frame_offset = (s->current_frame_offset +
3023                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3024         ~(sizeof(tcg_target_long) - 1);
3025 #endif
3026     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3027         s->frame_end) {
3028         tcg_abort();
3029     }
3030     ts->mem_offset = s->current_frame_offset;
3031     ts->mem_base = s->frame_temp;
3032     ts->mem_allocated = 1;
3033     s->current_frame_offset += sizeof(tcg_target_long);
3034 }
3035 
3036 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3037 
3038 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3039    mark it free; otherwise mark it dead.  */
3040 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3041 {
3042     if (ts->fixed_reg) {
3043         return;
3044     }
3045     if (ts->val_type == TEMP_VAL_REG) {
3046         s->reg_to_temp[ts->reg] = NULL;
3047     }
3048     ts->val_type = (free_or_dead < 0
3049                     || ts->temp_local
3050                     || ts->temp_global
3051                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3052 }
3053 
3054 /* Mark a temporary as dead.  */
3055 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3056 {
3057     temp_free_or_dead(s, ts, 1);
3058 }
3059 
3060 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3061    registers needs to be allocated to store a constant.  If 'free_or_dead'
3062    is non-zero, subsequently release the temporary; if it is positive, the
3063    temp is dead; if it is negative, the temp is free.  */
3064 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3065                       TCGRegSet preferred_regs, int free_or_dead)
3066 {
3067     if (ts->fixed_reg) {
3068         return;
3069     }
3070     if (!ts->mem_coherent) {
3071         if (!ts->mem_allocated) {
3072             temp_allocate_frame(s, ts);
3073         }
3074         switch (ts->val_type) {
3075         case TEMP_VAL_CONST:
3076             /* If we're going to free the temp immediately, then we won't
3077                require it later in a register, so attempt to store the
3078                constant to memory directly.  */
3079             if (free_or_dead
3080                 && tcg_out_sti(s, ts->type, ts->val,
3081                                ts->mem_base->reg, ts->mem_offset)) {
3082                 break;
3083             }
3084             temp_load(s, ts, tcg_target_available_regs[ts->type],
3085                       allocated_regs, preferred_regs);
3086             /* fallthrough */
3087 
3088         case TEMP_VAL_REG:
3089             tcg_out_st(s, ts->type, ts->reg,
3090                        ts->mem_base->reg, ts->mem_offset);
3091             break;
3092 
3093         case TEMP_VAL_MEM:
3094             break;
3095 
3096         case TEMP_VAL_DEAD:
3097         default:
3098             tcg_abort();
3099         }
3100         ts->mem_coherent = 1;
3101     }
3102     if (free_or_dead) {
3103         temp_free_or_dead(s, ts, free_or_dead);
3104     }
3105 }
3106 
3107 /* free register 'reg' by spilling the corresponding temporary if necessary */
3108 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3109 {
3110     TCGTemp *ts = s->reg_to_temp[reg];
3111     if (ts != NULL) {
3112         temp_sync(s, ts, allocated_regs, 0, -1);
3113     }
3114 }
3115 
3116 /**
3117  * tcg_reg_alloc:
3118  * @required_regs: Set of registers in which we must allocate.
3119  * @allocated_regs: Set of registers which must be avoided.
3120  * @preferred_regs: Set of registers we should prefer.
3121  * @rev: True if we search the registers in "indirect" order.
3122  *
3123  * The allocated register must be in @required_regs & ~@allocated_regs,
3124  * but if we can put it in @preferred_regs we may save a move later.
3125  */
3126 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3127                             TCGRegSet allocated_regs,
3128                             TCGRegSet preferred_regs, bool rev)
3129 {
3130     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3131     TCGRegSet reg_ct[2];
3132     const int *order;
3133 
3134     reg_ct[1] = required_regs & ~allocated_regs;
3135     tcg_debug_assert(reg_ct[1] != 0);
3136     reg_ct[0] = reg_ct[1] & preferred_regs;
3137 
3138     /* Skip the preferred_regs option if it cannot be satisfied,
3139        or if the preference made no difference.  */
3140     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3141 
3142     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3143 
3144     /* Try free registers, preferences first.  */
3145     for (j = f; j < 2; j++) {
3146         TCGRegSet set = reg_ct[j];
3147 
3148         if (tcg_regset_single(set)) {
3149             /* One register in the set.  */
3150             TCGReg reg = tcg_regset_first(set);
3151             if (s->reg_to_temp[reg] == NULL) {
3152                 return reg;
3153             }
3154         } else {
3155             for (i = 0; i < n; i++) {
3156                 TCGReg reg = order[i];
3157                 if (s->reg_to_temp[reg] == NULL &&
3158                     tcg_regset_test_reg(set, reg)) {
3159                     return reg;
3160                 }
3161             }
3162         }
3163     }
3164 
3165     /* We must spill something.  */
3166     for (j = f; j < 2; j++) {
3167         TCGRegSet set = reg_ct[j];
3168 
3169         if (tcg_regset_single(set)) {
3170             /* One register in the set.  */
3171             TCGReg reg = tcg_regset_first(set);
3172             tcg_reg_free(s, reg, allocated_regs);
3173             return reg;
3174         } else {
3175             for (i = 0; i < n; i++) {
3176                 TCGReg reg = order[i];
3177                 if (tcg_regset_test_reg(set, reg)) {
3178                     tcg_reg_free(s, reg, allocated_regs);
3179                     return reg;
3180                 }
3181             }
3182         }
3183     }
3184 
3185     tcg_abort();
3186 }
3187 
3188 /* Make sure the temporary is in a register.  If needed, allocate the register
3189    from DESIRED while avoiding ALLOCATED.  */
3190 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3191                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3192 {
3193     TCGReg reg;
3194 
3195     switch (ts->val_type) {
3196     case TEMP_VAL_REG:
3197         return;
3198     case TEMP_VAL_CONST:
3199         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3200                             preferred_regs, ts->indirect_base);
3201         tcg_out_movi(s, ts->type, reg, ts->val);
3202         ts->mem_coherent = 0;
3203         break;
3204     case TEMP_VAL_MEM:
3205         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3206                             preferred_regs, ts->indirect_base);
3207         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3208         ts->mem_coherent = 1;
3209         break;
3210     case TEMP_VAL_DEAD:
3211     default:
3212         tcg_abort();
3213     }
3214     ts->reg = reg;
3215     ts->val_type = TEMP_VAL_REG;
3216     s->reg_to_temp[reg] = ts;
3217 }
3218 
3219 /* Save a temporary to memory. 'allocated_regs' is used in case a
3220    temporary registers needs to be allocated to store a constant.  */
3221 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3222 {
3223     /* The liveness analysis already ensures that globals are back
3224        in memory. Keep an tcg_debug_assert for safety. */
3225     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3226 }
3227 
3228 /* save globals to their canonical location and assume they can be
3229    modified be the following code. 'allocated_regs' is used in case a
3230    temporary registers needs to be allocated to store a constant. */
3231 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3232 {
3233     int i, n;
3234 
3235     for (i = 0, n = s->nb_globals; i < n; i++) {
3236         temp_save(s, &s->temps[i], allocated_regs);
3237     }
3238 }
3239 
3240 /* sync globals to their canonical location and assume they can be
3241    read by the following code. 'allocated_regs' is used in case a
3242    temporary registers needs to be allocated to store a constant. */
3243 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3244 {
3245     int i, n;
3246 
3247     for (i = 0, n = s->nb_globals; i < n; i++) {
3248         TCGTemp *ts = &s->temps[i];
3249         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3250                          || ts->fixed_reg
3251                          || ts->mem_coherent);
3252     }
3253 }
3254 
3255 /* at the end of a basic block, we assume all temporaries are dead and
3256    all globals are stored at their canonical location. */
3257 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3258 {
3259     int i;
3260 
3261     for (i = s->nb_globals; i < s->nb_temps; i++) {
3262         TCGTemp *ts = &s->temps[i];
3263         if (ts->temp_local) {
3264             temp_save(s, ts, allocated_regs);
3265         } else {
3266             /* The liveness analysis already ensures that temps are dead.
3267                Keep an tcg_debug_assert for safety. */
3268             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3269         }
3270     }
3271 
3272     save_globals(s, allocated_regs);
3273 }
3274 
3275 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3276                                   tcg_target_ulong val, TCGLifeData arg_life,
3277                                   TCGRegSet preferred_regs)
3278 {
3279     if (ots->fixed_reg) {
3280         /* For fixed registers, we do not do any constant propagation.  */
3281         tcg_out_movi(s, ots->type, ots->reg, val);
3282         return;
3283     }
3284 
3285     /* The movi is not explicitly generated here.  */
3286     if (ots->val_type == TEMP_VAL_REG) {
3287         s->reg_to_temp[ots->reg] = NULL;
3288     }
3289     ots->val_type = TEMP_VAL_CONST;
3290     ots->val = val;
3291     ots->mem_coherent = 0;
3292     if (NEED_SYNC_ARG(0)) {
3293         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3294     } else if (IS_DEAD_ARG(0)) {
3295         temp_dead(s, ots);
3296     }
3297 }
3298 
3299 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3300 {
3301     TCGTemp *ots = arg_temp(op->args[0]);
3302     tcg_target_ulong val = op->args[1];
3303 
3304     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3305 }
3306 
3307 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3308 {
3309     const TCGLifeData arg_life = op->life;
3310     TCGRegSet allocated_regs, preferred_regs;
3311     TCGTemp *ts, *ots;
3312     TCGType otype, itype;
3313 
3314     allocated_regs = s->reserved_regs;
3315     preferred_regs = op->output_pref[0];
3316     ots = arg_temp(op->args[0]);
3317     ts = arg_temp(op->args[1]);
3318 
3319     /* Note that otype != itype for no-op truncation.  */
3320     otype = ots->type;
3321     itype = ts->type;
3322 
3323     if (ts->val_type == TEMP_VAL_CONST) {
3324         /* propagate constant or generate sti */
3325         tcg_target_ulong val = ts->val;
3326         if (IS_DEAD_ARG(1)) {
3327             temp_dead(s, ts);
3328         }
3329         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3330         return;
3331     }
3332 
3333     /* If the source value is in memory we're going to be forced
3334        to have it in a register in order to perform the copy.  Copy
3335        the SOURCE value into its own register first, that way we
3336        don't have to reload SOURCE the next time it is used. */
3337     if (ts->val_type == TEMP_VAL_MEM) {
3338         temp_load(s, ts, tcg_target_available_regs[itype],
3339                   allocated_regs, preferred_regs);
3340     }
3341 
3342     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3343     if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
3344         /* mov to a non-saved dead register makes no sense (even with
3345            liveness analysis disabled). */
3346         tcg_debug_assert(NEED_SYNC_ARG(0));
3347         if (!ots->mem_allocated) {
3348             temp_allocate_frame(s, ots);
3349         }
3350         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3351         if (IS_DEAD_ARG(1)) {
3352             temp_dead(s, ts);
3353         }
3354         temp_dead(s, ots);
3355     } else {
3356         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
3357             /* the mov can be suppressed */
3358             if (ots->val_type == TEMP_VAL_REG) {
3359                 s->reg_to_temp[ots->reg] = NULL;
3360             }
3361             ots->reg = ts->reg;
3362             temp_dead(s, ts);
3363         } else {
3364             if (ots->val_type != TEMP_VAL_REG) {
3365                 /* When allocating a new register, make sure to not spill the
3366                    input one. */
3367                 tcg_regset_set_reg(allocated_regs, ts->reg);
3368                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3369                                          allocated_regs, preferred_regs,
3370                                          ots->indirect_base);
3371             }
3372             tcg_out_mov(s, otype, ots->reg, ts->reg);
3373         }
3374         ots->val_type = TEMP_VAL_REG;
3375         ots->mem_coherent = 0;
3376         s->reg_to_temp[ots->reg] = ots;
3377         if (NEED_SYNC_ARG(0)) {
3378             temp_sync(s, ots, allocated_regs, 0, 0);
3379         }
3380     }
3381 }
3382 
3383 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3384 {
3385     const TCGLifeData arg_life = op->life;
3386     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3387     TCGRegSet i_allocated_regs;
3388     TCGRegSet o_allocated_regs;
3389     int i, k, nb_iargs, nb_oargs;
3390     TCGReg reg;
3391     TCGArg arg;
3392     const TCGArgConstraint *arg_ct;
3393     TCGTemp *ts;
3394     TCGArg new_args[TCG_MAX_OP_ARGS];
3395     int const_args[TCG_MAX_OP_ARGS];
3396 
3397     nb_oargs = def->nb_oargs;
3398     nb_iargs = def->nb_iargs;
3399 
3400     /* copy constants */
3401     memcpy(new_args + nb_oargs + nb_iargs,
3402            op->args + nb_oargs + nb_iargs,
3403            sizeof(TCGArg) * def->nb_cargs);
3404 
3405     i_allocated_regs = s->reserved_regs;
3406     o_allocated_regs = s->reserved_regs;
3407 
3408     /* satisfy input constraints */
3409     for (k = 0; k < nb_iargs; k++) {
3410         TCGRegSet i_preferred_regs, o_preferred_regs;
3411 
3412         i = def->sorted_args[nb_oargs + k];
3413         arg = op->args[i];
3414         arg_ct = &def->args_ct[i];
3415         ts = arg_temp(arg);
3416 
3417         if (ts->val_type == TEMP_VAL_CONST
3418             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3419             /* constant is OK for instruction */
3420             const_args[i] = 1;
3421             new_args[i] = ts->val;
3422             continue;
3423         }
3424 
3425         i_preferred_regs = o_preferred_regs = 0;
3426         if (arg_ct->ct & TCG_CT_IALIAS) {
3427             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3428             if (ts->fixed_reg) {
3429                 /* if fixed register, we must allocate a new register
3430                    if the alias is not the same register */
3431                 if (arg != op->args[arg_ct->alias_index]) {
3432                     goto allocate_in_reg;
3433                 }
3434             } else {
3435                 /* if the input is aliased to an output and if it is
3436                    not dead after the instruction, we must allocate
3437                    a new register and move it */
3438                 if (!IS_DEAD_ARG(i)) {
3439                     goto allocate_in_reg;
3440                 }
3441 
3442                 /* check if the current register has already been allocated
3443                    for another input aliased to an output */
3444                 if (ts->val_type == TEMP_VAL_REG) {
3445                     int k2, i2;
3446                     reg = ts->reg;
3447                     for (k2 = 0 ; k2 < k ; k2++) {
3448                         i2 = def->sorted_args[nb_oargs + k2];
3449                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3450                             reg == new_args[i2]) {
3451                             goto allocate_in_reg;
3452                         }
3453                     }
3454                 }
3455                 i_preferred_regs = o_preferred_regs;
3456             }
3457         }
3458 
3459         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3460         reg = ts->reg;
3461 
3462         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3463             /* nothing to do : the constraint is satisfied */
3464         } else {
3465         allocate_in_reg:
3466             /* allocate a new register matching the constraint
3467                and move the temporary register into it */
3468             temp_load(s, ts, tcg_target_available_regs[ts->type],
3469                       i_allocated_regs, 0);
3470             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3471                                 o_preferred_regs, ts->indirect_base);
3472             tcg_out_mov(s, ts->type, reg, ts->reg);
3473         }
3474         new_args[i] = reg;
3475         const_args[i] = 0;
3476         tcg_regset_set_reg(i_allocated_regs, reg);
3477     }
3478 
3479     /* mark dead temporaries and free the associated registers */
3480     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3481         if (IS_DEAD_ARG(i)) {
3482             temp_dead(s, arg_temp(op->args[i]));
3483         }
3484     }
3485 
3486     if (def->flags & TCG_OPF_BB_END) {
3487         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3488     } else {
3489         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3490             /* XXX: permit generic clobber register list ? */
3491             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3492                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3493                     tcg_reg_free(s, i, i_allocated_regs);
3494                 }
3495             }
3496         }
3497         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3498             /* sync globals if the op has side effects and might trigger
3499                an exception. */
3500             sync_globals(s, i_allocated_regs);
3501         }
3502 
3503         /* satisfy the output constraints */
3504         for(k = 0; k < nb_oargs; k++) {
3505             i = def->sorted_args[k];
3506             arg = op->args[i];
3507             arg_ct = &def->args_ct[i];
3508             ts = arg_temp(arg);
3509             if ((arg_ct->ct & TCG_CT_ALIAS)
3510                 && !const_args[arg_ct->alias_index]) {
3511                 reg = new_args[arg_ct->alias_index];
3512             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3513                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3514                                     i_allocated_regs | o_allocated_regs,
3515                                     op->output_pref[k], ts->indirect_base);
3516             } else {
3517                 /* if fixed register, we try to use it */
3518                 reg = ts->reg;
3519                 if (ts->fixed_reg &&
3520                     tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3521                     goto oarg_end;
3522                 }
3523                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3524                                     op->output_pref[k], ts->indirect_base);
3525             }
3526             tcg_regset_set_reg(o_allocated_regs, reg);
3527             /* if a fixed register is used, then a move will be done afterwards */
3528             if (!ts->fixed_reg) {
3529                 if (ts->val_type == TEMP_VAL_REG) {
3530                     s->reg_to_temp[ts->reg] = NULL;
3531                 }
3532                 ts->val_type = TEMP_VAL_REG;
3533                 ts->reg = reg;
3534                 /* temp value is modified, so the value kept in memory is
3535                    potentially not the same */
3536                 ts->mem_coherent = 0;
3537                 s->reg_to_temp[reg] = ts;
3538             }
3539         oarg_end:
3540             new_args[i] = reg;
3541         }
3542     }
3543 
3544     /* emit instruction */
3545     if (def->flags & TCG_OPF_VECTOR) {
3546         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3547                        new_args, const_args);
3548     } else {
3549         tcg_out_op(s, op->opc, new_args, const_args);
3550     }
3551 
3552     /* move the outputs in the correct register if needed */
3553     for(i = 0; i < nb_oargs; i++) {
3554         ts = arg_temp(op->args[i]);
3555         reg = new_args[i];
3556         if (ts->fixed_reg && ts->reg != reg) {
3557             tcg_out_mov(s, ts->type, ts->reg, reg);
3558         }
3559         if (NEED_SYNC_ARG(i)) {
3560             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3561         } else if (IS_DEAD_ARG(i)) {
3562             temp_dead(s, ts);
3563         }
3564     }
3565 }
3566 
3567 #ifdef TCG_TARGET_STACK_GROWSUP
3568 #define STACK_DIR(x) (-(x))
3569 #else
3570 #define STACK_DIR(x) (x)
3571 #endif
3572 
3573 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3574 {
3575     const int nb_oargs = TCGOP_CALLO(op);
3576     const int nb_iargs = TCGOP_CALLI(op);
3577     const TCGLifeData arg_life = op->life;
3578     int flags, nb_regs, i;
3579     TCGReg reg;
3580     TCGArg arg;
3581     TCGTemp *ts;
3582     intptr_t stack_offset;
3583     size_t call_stack_size;
3584     tcg_insn_unit *func_addr;
3585     int allocate_args;
3586     TCGRegSet allocated_regs;
3587 
3588     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3589     flags = op->args[nb_oargs + nb_iargs + 1];
3590 
3591     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3592     if (nb_regs > nb_iargs) {
3593         nb_regs = nb_iargs;
3594     }
3595 
3596     /* assign stack slots first */
3597     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3598     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3599         ~(TCG_TARGET_STACK_ALIGN - 1);
3600     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3601     if (allocate_args) {
3602         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3603            preallocate call stack */
3604         tcg_abort();
3605     }
3606 
3607     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3608     for (i = nb_regs; i < nb_iargs; i++) {
3609         arg = op->args[nb_oargs + i];
3610 #ifdef TCG_TARGET_STACK_GROWSUP
3611         stack_offset -= sizeof(tcg_target_long);
3612 #endif
3613         if (arg != TCG_CALL_DUMMY_ARG) {
3614             ts = arg_temp(arg);
3615             temp_load(s, ts, tcg_target_available_regs[ts->type],
3616                       s->reserved_regs, 0);
3617             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3618         }
3619 #ifndef TCG_TARGET_STACK_GROWSUP
3620         stack_offset += sizeof(tcg_target_long);
3621 #endif
3622     }
3623 
3624     /* assign input registers */
3625     allocated_regs = s->reserved_regs;
3626     for (i = 0; i < nb_regs; i++) {
3627         arg = op->args[nb_oargs + i];
3628         if (arg != TCG_CALL_DUMMY_ARG) {
3629             ts = arg_temp(arg);
3630             reg = tcg_target_call_iarg_regs[i];
3631 
3632             if (ts->val_type == TEMP_VAL_REG) {
3633                 if (ts->reg != reg) {
3634                     tcg_reg_free(s, reg, allocated_regs);
3635                     tcg_out_mov(s, ts->type, reg, ts->reg);
3636                 }
3637             } else {
3638                 TCGRegSet arg_set = 0;
3639 
3640                 tcg_reg_free(s, reg, allocated_regs);
3641                 tcg_regset_set_reg(arg_set, reg);
3642                 temp_load(s, ts, arg_set, allocated_regs, 0);
3643             }
3644 
3645             tcg_regset_set_reg(allocated_regs, reg);
3646         }
3647     }
3648 
3649     /* mark dead temporaries and free the associated registers */
3650     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3651         if (IS_DEAD_ARG(i)) {
3652             temp_dead(s, arg_temp(op->args[i]));
3653         }
3654     }
3655 
3656     /* clobber call registers */
3657     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3658         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3659             tcg_reg_free(s, i, allocated_regs);
3660         }
3661     }
3662 
3663     /* Save globals if they might be written by the helper, sync them if
3664        they might be read. */
3665     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3666         /* Nothing to do */
3667     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3668         sync_globals(s, allocated_regs);
3669     } else {
3670         save_globals(s, allocated_regs);
3671     }
3672 
3673     tcg_out_call(s, func_addr);
3674 
3675     /* assign output registers and emit moves if needed */
3676     for(i = 0; i < nb_oargs; i++) {
3677         arg = op->args[i];
3678         ts = arg_temp(arg);
3679         reg = tcg_target_call_oarg_regs[i];
3680         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3681 
3682         if (ts->fixed_reg) {
3683             if (ts->reg != reg) {
3684                 tcg_out_mov(s, ts->type, ts->reg, reg);
3685             }
3686         } else {
3687             if (ts->val_type == TEMP_VAL_REG) {
3688                 s->reg_to_temp[ts->reg] = NULL;
3689             }
3690             ts->val_type = TEMP_VAL_REG;
3691             ts->reg = reg;
3692             ts->mem_coherent = 0;
3693             s->reg_to_temp[reg] = ts;
3694             if (NEED_SYNC_ARG(i)) {
3695                 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3696             } else if (IS_DEAD_ARG(i)) {
3697                 temp_dead(s, ts);
3698             }
3699         }
3700     }
3701 }
3702 
3703 #ifdef CONFIG_PROFILER
3704 
3705 /* avoid copy/paste errors */
3706 #define PROF_ADD(to, from, field)                       \
3707     do {                                                \
3708         (to)->field += atomic_read(&((from)->field));   \
3709     } while (0)
3710 
3711 #define PROF_MAX(to, from, field)                                       \
3712     do {                                                                \
3713         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3714         if (val__ > (to)->field) {                                      \
3715             (to)->field = val__;                                        \
3716         }                                                               \
3717     } while (0)
3718 
3719 /* Pass in a zero'ed @prof */
3720 static inline
3721 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3722 {
3723     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3724     unsigned int i;
3725 
3726     for (i = 0; i < n_ctxs; i++) {
3727         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3728         const TCGProfile *orig = &s->prof;
3729 
3730         if (counters) {
3731             PROF_ADD(prof, orig, cpu_exec_time);
3732             PROF_ADD(prof, orig, tb_count1);
3733             PROF_ADD(prof, orig, tb_count);
3734             PROF_ADD(prof, orig, op_count);
3735             PROF_MAX(prof, orig, op_count_max);
3736             PROF_ADD(prof, orig, temp_count);
3737             PROF_MAX(prof, orig, temp_count_max);
3738             PROF_ADD(prof, orig, del_op_count);
3739             PROF_ADD(prof, orig, code_in_len);
3740             PROF_ADD(prof, orig, code_out_len);
3741             PROF_ADD(prof, orig, search_out_len);
3742             PROF_ADD(prof, orig, interm_time);
3743             PROF_ADD(prof, orig, code_time);
3744             PROF_ADD(prof, orig, la_time);
3745             PROF_ADD(prof, orig, opt_time);
3746             PROF_ADD(prof, orig, restore_count);
3747             PROF_ADD(prof, orig, restore_time);
3748         }
3749         if (table) {
3750             int i;
3751 
3752             for (i = 0; i < NB_OPS; i++) {
3753                 PROF_ADD(prof, orig, table_op_count[i]);
3754             }
3755         }
3756     }
3757 }
3758 
3759 #undef PROF_ADD
3760 #undef PROF_MAX
3761 
3762 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3763 {
3764     tcg_profile_snapshot(prof, true, false);
3765 }
3766 
3767 static void tcg_profile_snapshot_table(TCGProfile *prof)
3768 {
3769     tcg_profile_snapshot(prof, false, true);
3770 }
3771 
3772 void tcg_dump_op_count(void)
3773 {
3774     TCGProfile prof = {};
3775     int i;
3776 
3777     tcg_profile_snapshot_table(&prof);
3778     for (i = 0; i < NB_OPS; i++) {
3779         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3780                     prof.table_op_count[i]);
3781     }
3782 }
3783 
3784 int64_t tcg_cpu_exec_time(void)
3785 {
3786     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3787     unsigned int i;
3788     int64_t ret = 0;
3789 
3790     for (i = 0; i < n_ctxs; i++) {
3791         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3792         const TCGProfile *prof = &s->prof;
3793 
3794         ret += atomic_read(&prof->cpu_exec_time);
3795     }
3796     return ret;
3797 }
3798 #else
3799 void tcg_dump_op_count(void)
3800 {
3801     qemu_printf("[TCG profiler not compiled]\n");
3802 }
3803 
3804 int64_t tcg_cpu_exec_time(void)
3805 {
3806     error_report("%s: TCG profiler not compiled", __func__);
3807     exit(EXIT_FAILURE);
3808 }
3809 #endif
3810 
3811 
3812 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3813 {
3814 #ifdef CONFIG_PROFILER
3815     TCGProfile *prof = &s->prof;
3816 #endif
3817     int i, num_insns;
3818     TCGOp *op;
3819 
3820 #ifdef CONFIG_PROFILER
3821     {
3822         int n = 0;
3823 
3824         QTAILQ_FOREACH(op, &s->ops, link) {
3825             n++;
3826         }
3827         atomic_set(&prof->op_count, prof->op_count + n);
3828         if (n > prof->op_count_max) {
3829             atomic_set(&prof->op_count_max, n);
3830         }
3831 
3832         n = s->nb_temps;
3833         atomic_set(&prof->temp_count, prof->temp_count + n);
3834         if (n > prof->temp_count_max) {
3835             atomic_set(&prof->temp_count_max, n);
3836         }
3837     }
3838 #endif
3839 
3840 #ifdef DEBUG_DISAS
3841     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3842                  && qemu_log_in_addr_range(tb->pc))) {
3843         qemu_log_lock();
3844         qemu_log("OP:\n");
3845         tcg_dump_ops(s, false);
3846         qemu_log("\n");
3847         qemu_log_unlock();
3848     }
3849 #endif
3850 
3851 #ifdef CONFIG_DEBUG_TCG
3852     /* Ensure all labels referenced have been emitted.  */
3853     {
3854         TCGLabel *l;
3855         bool error = false;
3856 
3857         QSIMPLEQ_FOREACH(l, &s->labels, next) {
3858             if (unlikely(!l->present) && l->refs) {
3859                 qemu_log_mask(CPU_LOG_TB_OP,
3860                               "$L%d referenced but not present.\n", l->id);
3861                 error = true;
3862             }
3863         }
3864         assert(!error);
3865     }
3866 #endif
3867 
3868 #ifdef CONFIG_PROFILER
3869     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3870 #endif
3871 
3872 #ifdef USE_TCG_OPTIMIZATIONS
3873     tcg_optimize(s);
3874 #endif
3875 
3876 #ifdef CONFIG_PROFILER
3877     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3878     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3879 #endif
3880 
3881     reachable_code_pass(s);
3882     liveness_pass_1(s);
3883 
3884     if (s->nb_indirects > 0) {
3885 #ifdef DEBUG_DISAS
3886         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3887                      && qemu_log_in_addr_range(tb->pc))) {
3888             qemu_log_lock();
3889             qemu_log("OP before indirect lowering:\n");
3890             tcg_dump_ops(s, false);
3891             qemu_log("\n");
3892             qemu_log_unlock();
3893         }
3894 #endif
3895         /* Replace indirect temps with direct temps.  */
3896         if (liveness_pass_2(s)) {
3897             /* If changes were made, re-run liveness.  */
3898             liveness_pass_1(s);
3899         }
3900     }
3901 
3902 #ifdef CONFIG_PROFILER
3903     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3904 #endif
3905 
3906 #ifdef DEBUG_DISAS
3907     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3908                  && qemu_log_in_addr_range(tb->pc))) {
3909         qemu_log_lock();
3910         qemu_log("OP after optimization and liveness analysis:\n");
3911         tcg_dump_ops(s, true);
3912         qemu_log("\n");
3913         qemu_log_unlock();
3914     }
3915 #endif
3916 
3917     tcg_reg_alloc_start(s);
3918 
3919     s->code_buf = tb->tc.ptr;
3920     s->code_ptr = tb->tc.ptr;
3921 
3922 #ifdef TCG_TARGET_NEED_LDST_LABELS
3923     QSIMPLEQ_INIT(&s->ldst_labels);
3924 #endif
3925 #ifdef TCG_TARGET_NEED_POOL_LABELS
3926     s->pool_labels = NULL;
3927 #endif
3928 
3929     num_insns = -1;
3930     QTAILQ_FOREACH(op, &s->ops, link) {
3931         TCGOpcode opc = op->opc;
3932 
3933 #ifdef CONFIG_PROFILER
3934         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3935 #endif
3936 
3937         switch (opc) {
3938         case INDEX_op_mov_i32:
3939         case INDEX_op_mov_i64:
3940         case INDEX_op_mov_vec:
3941             tcg_reg_alloc_mov(s, op);
3942             break;
3943         case INDEX_op_movi_i32:
3944         case INDEX_op_movi_i64:
3945         case INDEX_op_dupi_vec:
3946             tcg_reg_alloc_movi(s, op);
3947             break;
3948         case INDEX_op_insn_start:
3949             if (num_insns >= 0) {
3950                 size_t off = tcg_current_code_size(s);
3951                 s->gen_insn_end_off[num_insns] = off;
3952                 /* Assert that we do not overflow our stored offset.  */
3953                 assert(s->gen_insn_end_off[num_insns] == off);
3954             }
3955             num_insns++;
3956             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3957                 target_ulong a;
3958 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3959                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3960 #else
3961                 a = op->args[i];
3962 #endif
3963                 s->gen_insn_data[num_insns][i] = a;
3964             }
3965             break;
3966         case INDEX_op_discard:
3967             temp_dead(s, arg_temp(op->args[0]));
3968             break;
3969         case INDEX_op_set_label:
3970             tcg_reg_alloc_bb_end(s, s->reserved_regs);
3971             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3972             break;
3973         case INDEX_op_call:
3974             tcg_reg_alloc_call(s, op);
3975             break;
3976         default:
3977             /* Sanity check that we've not introduced any unhandled opcodes. */
3978             tcg_debug_assert(tcg_op_supported(opc));
3979             /* Note: in order to speed up the code, it would be much
3980                faster to have specialized register allocator functions for
3981                some common argument patterns */
3982             tcg_reg_alloc_op(s, op);
3983             break;
3984         }
3985 #ifdef CONFIG_DEBUG_TCG
3986         check_regs(s);
3987 #endif
3988         /* Test for (pending) buffer overflow.  The assumption is that any
3989            one operation beginning below the high water mark cannot overrun
3990            the buffer completely.  Thus we can test for overflow after
3991            generating code without having to check during generation.  */
3992         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3993             return -1;
3994         }
3995     }
3996     tcg_debug_assert(num_insns >= 0);
3997     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3998 
3999     /* Generate TB finalization at the end of block */
4000 #ifdef TCG_TARGET_NEED_LDST_LABELS
4001     if (!tcg_out_ldst_finalize(s)) {
4002         return -1;
4003     }
4004 #endif
4005 #ifdef TCG_TARGET_NEED_POOL_LABELS
4006     if (!tcg_out_pool_finalize(s)) {
4007         return -1;
4008     }
4009 #endif
4010 
4011     /* flush instruction cache */
4012     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4013 
4014     return tcg_current_code_size(s);
4015 }
4016 
4017 #ifdef CONFIG_PROFILER
4018 void tcg_dump_info(void)
4019 {
4020     TCGProfile prof = {};
4021     const TCGProfile *s;
4022     int64_t tb_count;
4023     int64_t tb_div_count;
4024     int64_t tot;
4025 
4026     tcg_profile_snapshot_counters(&prof);
4027     s = &prof;
4028     tb_count = s->tb_count;
4029     tb_div_count = tb_count ? tb_count : 1;
4030     tot = s->interm_time + s->code_time;
4031 
4032     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4033                 tot, tot / 2.4e9);
4034     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4035                 " %0.1f%%)\n",
4036                 tb_count, s->tb_count1 - tb_count,
4037                 (double)(s->tb_count1 - s->tb_count)
4038                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4039     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4040                 (double)s->op_count / tb_div_count, s->op_count_max);
4041     qemu_printf("deleted ops/TB      %0.2f\n",
4042                 (double)s->del_op_count / tb_div_count);
4043     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4044                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4045     qemu_printf("avg host code/TB    %0.1f\n",
4046                 (double)s->code_out_len / tb_div_count);
4047     qemu_printf("avg search data/TB  %0.1f\n",
4048                 (double)s->search_out_len / tb_div_count);
4049 
4050     qemu_printf("cycles/op           %0.1f\n",
4051                 s->op_count ? (double)tot / s->op_count : 0);
4052     qemu_printf("cycles/in byte      %0.1f\n",
4053                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4054     qemu_printf("cycles/out byte     %0.1f\n",
4055                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4056     qemu_printf("cycles/search byte     %0.1f\n",
4057                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4058     if (tot == 0) {
4059         tot = 1;
4060     }
4061     qemu_printf("  gen_interm time   %0.1f%%\n",
4062                 (double)s->interm_time / tot * 100.0);
4063     qemu_printf("  gen_code time     %0.1f%%\n",
4064                 (double)s->code_time / tot * 100.0);
4065     qemu_printf("optim./code time    %0.1f%%\n",
4066                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4067                 * 100.0);
4068     qemu_printf("liveness/code time  %0.1f%%\n",
4069                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4070     qemu_printf("cpu_restore count   %" PRId64 "\n",
4071                 s->restore_count);
4072     qemu_printf("  avg cycles        %0.1f\n",
4073                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4074 }
4075 #else
4076 void tcg_dump_info(void)
4077 {
4078     qemu_printf("[TCG profiler not compiled]\n");
4079 }
4080 #endif
4081 
4082 #ifdef ELF_HOST_MACHINE
4083 /* In order to use this feature, the backend needs to do three things:
4084 
4085    (1) Define ELF_HOST_MACHINE to indicate both what value to
4086        put into the ELF image and to indicate support for the feature.
4087 
4088    (2) Define tcg_register_jit.  This should create a buffer containing
4089        the contents of a .debug_frame section that describes the post-
4090        prologue unwind info for the tcg machine.
4091 
4092    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4093 */
4094 
4095 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4096 typedef enum {
4097     JIT_NOACTION = 0,
4098     JIT_REGISTER_FN,
4099     JIT_UNREGISTER_FN
4100 } jit_actions_t;
4101 
4102 struct jit_code_entry {
4103     struct jit_code_entry *next_entry;
4104     struct jit_code_entry *prev_entry;
4105     const void *symfile_addr;
4106     uint64_t symfile_size;
4107 };
4108 
4109 struct jit_descriptor {
4110     uint32_t version;
4111     uint32_t action_flag;
4112     struct jit_code_entry *relevant_entry;
4113     struct jit_code_entry *first_entry;
4114 };
4115 
4116 void __jit_debug_register_code(void) __attribute__((noinline));
4117 void __jit_debug_register_code(void)
4118 {
4119     asm("");
4120 }
4121 
4122 /* Must statically initialize the version, because GDB may check
4123    the version before we can set it.  */
4124 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4125 
4126 /* End GDB interface.  */
4127 
4128 static int find_string(const char *strtab, const char *str)
4129 {
4130     const char *p = strtab + 1;
4131 
4132     while (1) {
4133         if (strcmp(p, str) == 0) {
4134             return p - strtab;
4135         }
4136         p += strlen(p) + 1;
4137     }
4138 }
4139 
4140 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4141                                  const void *debug_frame,
4142                                  size_t debug_frame_size)
4143 {
4144     struct __attribute__((packed)) DebugInfo {
4145         uint32_t  len;
4146         uint16_t  version;
4147         uint32_t  abbrev;
4148         uint8_t   ptr_size;
4149         uint8_t   cu_die;
4150         uint16_t  cu_lang;
4151         uintptr_t cu_low_pc;
4152         uintptr_t cu_high_pc;
4153         uint8_t   fn_die;
4154         char      fn_name[16];
4155         uintptr_t fn_low_pc;
4156         uintptr_t fn_high_pc;
4157         uint8_t   cu_eoc;
4158     };
4159 
4160     struct ElfImage {
4161         ElfW(Ehdr) ehdr;
4162         ElfW(Phdr) phdr;
4163         ElfW(Shdr) shdr[7];
4164         ElfW(Sym)  sym[2];
4165         struct DebugInfo di;
4166         uint8_t    da[24];
4167         char       str[80];
4168     };
4169 
4170     struct ElfImage *img;
4171 
4172     static const struct ElfImage img_template = {
4173         .ehdr = {
4174             .e_ident[EI_MAG0] = ELFMAG0,
4175             .e_ident[EI_MAG1] = ELFMAG1,
4176             .e_ident[EI_MAG2] = ELFMAG2,
4177             .e_ident[EI_MAG3] = ELFMAG3,
4178             .e_ident[EI_CLASS] = ELF_CLASS,
4179             .e_ident[EI_DATA] = ELF_DATA,
4180             .e_ident[EI_VERSION] = EV_CURRENT,
4181             .e_type = ET_EXEC,
4182             .e_machine = ELF_HOST_MACHINE,
4183             .e_version = EV_CURRENT,
4184             .e_phoff = offsetof(struct ElfImage, phdr),
4185             .e_shoff = offsetof(struct ElfImage, shdr),
4186             .e_ehsize = sizeof(ElfW(Shdr)),
4187             .e_phentsize = sizeof(ElfW(Phdr)),
4188             .e_phnum = 1,
4189             .e_shentsize = sizeof(ElfW(Shdr)),
4190             .e_shnum = ARRAY_SIZE(img->shdr),
4191             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4192 #ifdef ELF_HOST_FLAGS
4193             .e_flags = ELF_HOST_FLAGS,
4194 #endif
4195 #ifdef ELF_OSABI
4196             .e_ident[EI_OSABI] = ELF_OSABI,
4197 #endif
4198         },
4199         .phdr = {
4200             .p_type = PT_LOAD,
4201             .p_flags = PF_X,
4202         },
4203         .shdr = {
4204             [0] = { .sh_type = SHT_NULL },
4205             /* Trick: The contents of code_gen_buffer are not present in
4206                this fake ELF file; that got allocated elsewhere.  Therefore
4207                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4208                will not look for contents.  We can record any address.  */
4209             [1] = { /* .text */
4210                 .sh_type = SHT_NOBITS,
4211                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4212             },
4213             [2] = { /* .debug_info */
4214                 .sh_type = SHT_PROGBITS,
4215                 .sh_offset = offsetof(struct ElfImage, di),
4216                 .sh_size = sizeof(struct DebugInfo),
4217             },
4218             [3] = { /* .debug_abbrev */
4219                 .sh_type = SHT_PROGBITS,
4220                 .sh_offset = offsetof(struct ElfImage, da),
4221                 .sh_size = sizeof(img->da),
4222             },
4223             [4] = { /* .debug_frame */
4224                 .sh_type = SHT_PROGBITS,
4225                 .sh_offset = sizeof(struct ElfImage),
4226             },
4227             [5] = { /* .symtab */
4228                 .sh_type = SHT_SYMTAB,
4229                 .sh_offset = offsetof(struct ElfImage, sym),
4230                 .sh_size = sizeof(img->sym),
4231                 .sh_info = 1,
4232                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4233                 .sh_entsize = sizeof(ElfW(Sym)),
4234             },
4235             [6] = { /* .strtab */
4236                 .sh_type = SHT_STRTAB,
4237                 .sh_offset = offsetof(struct ElfImage, str),
4238                 .sh_size = sizeof(img->str),
4239             }
4240         },
4241         .sym = {
4242             [1] = { /* code_gen_buffer */
4243                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4244                 .st_shndx = 1,
4245             }
4246         },
4247         .di = {
4248             .len = sizeof(struct DebugInfo) - 4,
4249             .version = 2,
4250             .ptr_size = sizeof(void *),
4251             .cu_die = 1,
4252             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4253             .fn_die = 2,
4254             .fn_name = "code_gen_buffer"
4255         },
4256         .da = {
4257             1,          /* abbrev number (the cu) */
4258             0x11, 1,    /* DW_TAG_compile_unit, has children */
4259             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4260             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4261             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4262             0, 0,       /* end of abbrev */
4263             2,          /* abbrev number (the fn) */
4264             0x2e, 0,    /* DW_TAG_subprogram, no children */
4265             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4266             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4267             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4268             0, 0,       /* end of abbrev */
4269             0           /* no more abbrev */
4270         },
4271         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4272                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4273     };
4274 
4275     /* We only need a single jit entry; statically allocate it.  */
4276     static struct jit_code_entry one_entry;
4277 
4278     uintptr_t buf = (uintptr_t)buf_ptr;
4279     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4280     DebugFrameHeader *dfh;
4281 
4282     img = g_malloc(img_size);
4283     *img = img_template;
4284 
4285     img->phdr.p_vaddr = buf;
4286     img->phdr.p_paddr = buf;
4287     img->phdr.p_memsz = buf_size;
4288 
4289     img->shdr[1].sh_name = find_string(img->str, ".text");
4290     img->shdr[1].sh_addr = buf;
4291     img->shdr[1].sh_size = buf_size;
4292 
4293     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4294     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4295 
4296     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4297     img->shdr[4].sh_size = debug_frame_size;
4298 
4299     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4300     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4301 
4302     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4303     img->sym[1].st_value = buf;
4304     img->sym[1].st_size = buf_size;
4305 
4306     img->di.cu_low_pc = buf;
4307     img->di.cu_high_pc = buf + buf_size;
4308     img->di.fn_low_pc = buf;
4309     img->di.fn_high_pc = buf + buf_size;
4310 
4311     dfh = (DebugFrameHeader *)(img + 1);
4312     memcpy(dfh, debug_frame, debug_frame_size);
4313     dfh->fde.func_start = buf;
4314     dfh->fde.func_len = buf_size;
4315 
4316 #ifdef DEBUG_JIT
4317     /* Enable this block to be able to debug the ELF image file creation.
4318        One can use readelf, objdump, or other inspection utilities.  */
4319     {
4320         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4321         if (f) {
4322             if (fwrite(img, img_size, 1, f) != img_size) {
4323                 /* Avoid stupid unused return value warning for fwrite.  */
4324             }
4325             fclose(f);
4326         }
4327     }
4328 #endif
4329 
4330     one_entry.symfile_addr = img;
4331     one_entry.symfile_size = img_size;
4332 
4333     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4334     __jit_debug_descriptor.relevant_entry = &one_entry;
4335     __jit_debug_descriptor.first_entry = &one_entry;
4336     __jit_debug_register_code();
4337 }
4338 #else
4339 /* No support for the feature.  Provide the entry point expected by exec.c,
4340    and implement the internal function we declared earlier.  */
4341 
4342 static void tcg_register_jit_int(void *buf, size_t size,
4343                                  const void *debug_frame,
4344                                  size_t debug_frame_size)
4345 {
4346 }
4347 
4348 void tcg_register_jit(void *buf, size_t buf_size)
4349 {
4350 }
4351 #endif /* ELF_HOST_MACHINE */
4352 
4353 #if !TCG_TARGET_MAYBE_vec
4354 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4355 {
4356     g_assert_not_reached();
4357 }
4358 #endif
4359